[08/11] net: allow to dump and restore more than one network namespace

Submitted by Andrei Vagin on Feb. 2, 2017, 12:04 a.m.

Details

Message ID 1485993871-3990-9-git-send-email-avagin@openvz.org
State New
Series "Dump and restore nested network namespaces"
Headers show

Commit Message

Andrei Vagin Feb. 2, 2017, 12:04 a.m.
From: Andrei Vagin <avagin@virtuozzo.com>

Restore all network namespaces from the root task and then set
a proper namespace for each task after restoring sockets, because
we need to switch network namespaces to restore sockets.

Each socket has to be created in a proper network namespace.

Signed-off-by: Andrei Vagin <avagin@virtuozzo.com>
---
 criu/cr-restore.c         |  21 ++++++----
 criu/include/namespaces.h |   4 +-
 criu/include/net.h        |   6 ++-
 criu/namespaces.c         |   6 +--
 criu/net.c                | 104 +++++++++++++++++++++++++++++++++++++++++++++-
 criu/pstree.c             |   4 ++
 6 files changed, 131 insertions(+), 14 deletions(-)

Patch hide | download patch | download mbox

diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index 681655d..288add3 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -713,6 +713,13 @@  static int restore_one_alive_task(int pid, CoreEntry *core)
 	if (prepare_vmas(current, ta))
 		return -1;
 
+	/*
+	 * Sockets have to be restored in their network namespaces,
+	 * so a task namespace has to be restored after sockets.
+	 */
+	if (restore_task_net_ns(current))
+		return -1;
+
 	if (setup_uffd(pid, ta))
 		return -1;
 
@@ -1388,14 +1395,6 @@  static int restore_task_with_children(void *_arg)
 	if (ret < 0)
 		goto err;
 
-	if (ca->clone_flags & CLONE_NEWNET) {
-		ret = unshare(CLONE_NEWNET);
-		if (ret) {
-			pr_perror("Can't unshare net-namespace");
-			goto err;
-		}
-	}
-
 	if (!(ca->clone_flags & CLONE_FILES)) {
 		ret = close_old_fds();
 		if (ret)
@@ -2965,6 +2964,12 @@  static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
 	if (rst_prep_creds(pid, core, &creds_pos))
 		goto err_nv;
 
+	if (current->parent == NULL) {
+		/* Wait when all tasks restored all files */
+		restore_wait_other_tasks();
+		fini_net_namespaces();
+	}
+
 	/*
 	 * We're about to search for free VM area and inject the restorer blob
 	 * into it. No irrelevant mmaps/mremaps beyond this point, otherwise
diff --git a/criu/include/namespaces.h b/criu/include/namespaces.h
index 18eafb2..522c098 100644
--- a/criu/include/namespaces.h
+++ b/criu/include/namespaces.h
@@ -36,7 +36,8 @@ 
 #define CLONE_ALLNS	(CLONE_NEWPID | CLONE_NEWNET | CLONE_NEWIPC | CLONE_NEWUTS | CLONE_NEWNS | CLONE_NEWUSER | CLONE_NEWCGROUP)
 
 /* Nested namespaces are supported only for these types */
-#define CLONE_SUBNS	(CLONE_NEWNS)
+#define CLONE_SUBNS	(CLONE_NEWNS | CLONE_NEWNET)
+
 #define EXTRA_SIZE	20
 
 struct ns_desc {
@@ -95,6 +96,7 @@  struct ns_id {
 		} mnt;
 
 		struct {
+			int ns_fd;	/* a file handle for the namespace */
 			int nlsk;	/* for sockets collection */
 			int seqsk;	/* to talk to parasite daemons */
 		} net;
diff --git a/criu/include/net.h b/criu/include/net.h
index deac65f..49eca36 100644
--- a/criu/include/net.h
+++ b/criu/include/net.h
@@ -12,9 +12,13 @@ 
 
 struct cr_imgset;
 extern int dump_net_ns(int ns_id);
-extern int prepare_net_ns(int pid);
+extern int prepare_net_namespaces(void);
+extern void fini_net_namespaces(void);
 extern int netns_keep_nsfd(void);
 
+struct pstree_item;
+extern int restore_task_net_ns(struct pstree_item *current);
+
 struct veth_pair {
 	struct list_head node;
 	char *inside;
diff --git a/criu/namespaces.c b/criu/namespaces.c
index f655c16..64175f2 100644
--- a/criu/namespaces.c
+++ b/criu/namespaces.c
@@ -1660,9 +1660,6 @@  int prepare_namespace(struct pstree_item *item, unsigned long clone_flags)
 	 * tree (i.e. -- mnt_ns restoring)
 	 */
 
-	id = ns_per_id ? item->ids->net_ns_id : pid;
-	if ((clone_flags & CLONE_NEWNET) && prepare_net_ns(id))
-		return -1;
 	id = ns_per_id ? item->ids->uts_ns_id : pid;
 	if ((clone_flags & CLONE_NEWUTS) && prepare_utsns(id))
 		return -1;
@@ -1670,6 +1667,9 @@  int prepare_namespace(struct pstree_item *item, unsigned long clone_flags)
 	if ((clone_flags & CLONE_NEWIPC) && prepare_ipc_ns(id))
 		return -1;
 
+	if (prepare_net_namespaces())
+		return -1;
+
 	/*
 	 * This one is special -- there can be several mount
 	 * namespaces and prepare_mnt_ns handles them itself.
diff --git a/criu/net.c b/criu/net.c
index 7dadc8d..a7e0a6e 100644
--- a/criu/net.c
+++ b/criu/net.c
@@ -1670,7 +1670,7 @@  int dump_net_ns(int ns_id)
 	return ret;
 }
 
-int prepare_net_ns(int pid)
+static int prepare_net_ns(int pid)
 {
 	int ret = 0;
 	NetnsEntry *netns = NULL;
@@ -1702,6 +1702,108 @@  int prepare_net_ns(int pid)
 	return ret;
 }
 
+static int open_net_ns(struct ns_id *nsid, struct rst_info *rst)
+{
+	int fd, tfd;
+
+	/* Pin one with a file descriptor */
+	fd = open_proc(PROC_SELF, "ns/net");
+	if (fd < 0)
+		return -1;
+	tfd = reopen_as_unused_fd(fd, rst);
+	if (tfd < 0) {
+		close(fd);
+		return -1;
+	}
+	nsid->net.ns_fd = tfd;
+
+	return 0;
+}
+
+int prepare_net_namespaces()
+{
+	struct ns_id *nsid;
+
+	if (!(root_ns_mask & CLONE_NEWNET))
+		return 0;
+
+	for (nsid = ns_ids; nsid != NULL; nsid = nsid->next) {
+		if (nsid->nd != &net_ns_desc)
+			continue;
+
+		if (unshare(CLONE_NEWNET)) {
+			pr_perror("Unable to create a new netns");
+			goto err;
+		}
+
+		if (prepare_net_ns(nsid->id))
+			goto err;
+
+		if (open_net_ns(nsid, rsti(root_item)))
+			goto err;
+	}
+
+	return 0;
+err:
+	return -1;
+}
+
+void fini_net_namespaces()
+{
+	struct ns_id *nsid;
+
+	if (!(root_ns_mask & CLONE_NEWNET))
+		return;
+
+	for (nsid = ns_ids; nsid != NULL; nsid = nsid->next) {
+		if (nsid->nd != &net_ns_desc)
+			continue;
+		close_safe(&nsid->net.ns_fd);
+	}
+}
+
+static int do_restore_task_net_ns(struct ns_id *nsid, struct pstree_item *current)
+{
+	int fd;
+
+	if (!(root_ns_mask & CLONE_NEWNET))
+		return 0;
+
+	fd = open_proc(root_item->pid->ns[0].virt, "fd/%d", nsid->net.ns_fd);
+	if (fd < 0)
+		return -1;
+
+	if (setns(fd, CLONE_NEWNET)) {
+		pr_perror("Can't restore netns");
+		close(fd);
+		return -1;
+	}
+	close(fd);
+
+	return 0;
+}
+
+int restore_task_net_ns(struct pstree_item *current)
+{
+	if (current->ids && current->ids->has_net_ns_id) {
+		unsigned int id = current->ids->net_ns_id;
+		struct ns_id *nsid;
+
+		nsid = lookup_ns_by_id(id, &net_ns_desc);
+		if (nsid == NULL) {
+			pr_err("Can't find mount namespace %d\n", id);
+			return -1;
+		}
+
+		BUG_ON(nsid->type == NS_CRIU);
+
+		if (do_restore_task_net_ns(nsid, current))
+			return -1;
+	}
+
+	return 0;
+}
+
 int netns_keep_nsfd(void)
 {
 	int ns_fd, ret;
diff --git a/criu/pstree.c b/criu/pstree.c
index 833b3d0..bce7b72 100644
--- a/criu/pstree.c
+++ b/criu/pstree.c
@@ -14,6 +14,8 @@ 
 #include "mount.h"
 #include "dump.h"
 #include "util.h"
+#include "net.h"
+
 #include "protobuf.h"
 #include "images/pstree.pb-c.h"
 #include "crtools.h"
@@ -472,6 +474,8 @@  static int read_pstree_ids(struct pstree_item *pi)
 	if (pi->ids->has_mnt_ns_id) {
 		if (rst_add_ns_id(pi->ids->mnt_ns_id, pi, &mnt_ns_desc))
 			return -1;
+		if (rst_add_ns_id(pi->ids->net_ns_id, pi, &net_ns_desc))
+			return -1;
 	}
 
 	return 0;

Comments

Dmitry Safonov Feb. 3, 2017, 12:26 p.m.
Doing UTS & IPC namespaces over your patches set, have found
a minor thing:

2017-02-02 3:04 GMT+03:00 Andrei Vagin <avagin@openvz.org>:
> diff --git a/criu/pstree.c b/criu/pstree.c
> index 833b3d0..bce7b72 100644
> --- a/criu/pstree.c
> +++ b/criu/pstree.c
> @@ -14,6 +14,8 @@
>  #include "mount.h"
>  #include "dump.h"
>  #include "util.h"
> +#include "net.h"
> +
>  #include "protobuf.h"
>  #include "images/pstree.pb-c.h"
>  #include "crtools.h"
> @@ -472,6 +474,8 @@ static int read_pstree_ids(struct pstree_item *pi)
>         if (pi->ids->has_mnt_ns_id) {
>                 if (rst_add_ns_id(pi->ids->mnt_ns_id, pi, &mnt_ns_desc))
>                         return -1;
> +               if (rst_add_ns_id(pi->ids->net_ns_id, pi, &net_ns_desc))
> +                       return -1;

Shouldn't it be under?
if (pi->ids->has_net_ns_id)
Kirill Tkhai Feb. 3, 2017, 2:29 p.m.
On 02.02.2017 03:04, Andrei Vagin wrote:
> From: Andrei Vagin <avagin@virtuozzo.com>
> 
> Restore all network namespaces from the root task and then set
> a proper namespace for each task after restoring sockets, because
> we need to switch network namespaces to restore sockets.
> 
> Each socket has to be created in a proper network namespace.
> 
> Signed-off-by: Andrei Vagin <avagin@virtuozzo.com>
> ---
>  criu/cr-restore.c         |  21 ++++++----
>  criu/include/namespaces.h |   4 +-
>  criu/include/net.h        |   6 ++-
>  criu/namespaces.c         |   6 +--
>  criu/net.c                | 104 +++++++++++++++++++++++++++++++++++++++++++++-
>  criu/pstree.c             |   4 ++
>  6 files changed, 131 insertions(+), 14 deletions(-)
> 
> diff --git a/criu/cr-restore.c b/criu/cr-restore.c
> index 681655d..288add3 100644
> --- a/criu/cr-restore.c
> +++ b/criu/cr-restore.c
> @@ -713,6 +713,13 @@ static int restore_one_alive_task(int pid, CoreEntry *core)
>  	if (prepare_vmas(current, ta))
>  		return -1;
>  
> +	/*
> +	 * Sockets have to be restored in their network namespaces,
> +	 * so a task namespace has to be restored after sockets.
> +	 */
> +	if (restore_task_net_ns(current))
> +		return -1;
> +
>  	if (setup_uffd(pid, ta))
>  		return -1;
>  
> @@ -1388,14 +1395,6 @@ static int restore_task_with_children(void *_arg)
>  	if (ret < 0)
>  		goto err;
>  
> -	if (ca->clone_flags & CLONE_NEWNET) {
> -		ret = unshare(CLONE_NEWNET);
> -		if (ret) {
> -			pr_perror("Can't unshare net-namespace");
> -			goto err;
> -		}
> -	}
> -
>  	if (!(ca->clone_flags & CLONE_FILES)) {
>  		ret = close_old_fds();
>  		if (ret)
> @@ -2965,6 +2964,12 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
>  	if (rst_prep_creds(pid, core, &creds_pos))
>  		goto err_nv;
>  
> +	if (current->parent == NULL) {
> +		/* Wait when all tasks restored all files */
> +		restore_wait_other_tasks();
> +		fini_net_namespaces();
> +	}
> +
>  	/*
>  	 * We're about to search for free VM area and inject the restorer blob
>  	 * into it. No irrelevant mmaps/mremaps beyond this point, otherwise
> diff --git a/criu/include/namespaces.h b/criu/include/namespaces.h
> index 18eafb2..522c098 100644
> --- a/criu/include/namespaces.h
> +++ b/criu/include/namespaces.h
> @@ -36,7 +36,8 @@
>  #define CLONE_ALLNS	(CLONE_NEWPID | CLONE_NEWNET | CLONE_NEWIPC | CLONE_NEWUTS | CLONE_NEWNS | CLONE_NEWUSER | CLONE_NEWCGROUP)
>  
>  /* Nested namespaces are supported only for these types */
> -#define CLONE_SUBNS	(CLONE_NEWNS)
> +#define CLONE_SUBNS	(CLONE_NEWNS | CLONE_NEWNET)
> +
>  #define EXTRA_SIZE	20
>  
>  struct ns_desc {
> @@ -95,6 +96,7 @@ struct ns_id {
>  		} mnt;
>  
>  		struct {
> +			int ns_fd;	/* a file handle for the namespace */
>  			int nlsk;	/* for sockets collection */
>  			int seqsk;	/* to talk to parasite daemons */
>  		} net;
> diff --git a/criu/include/net.h b/criu/include/net.h
> index deac65f..49eca36 100644
> --- a/criu/include/net.h
> +++ b/criu/include/net.h
> @@ -12,9 +12,13 @@
>  
>  struct cr_imgset;
>  extern int dump_net_ns(int ns_id);
> -extern int prepare_net_ns(int pid);
> +extern int prepare_net_namespaces(void);
> +extern void fini_net_namespaces(void);
>  extern int netns_keep_nsfd(void);
>  
> +struct pstree_item;
> +extern int restore_task_net_ns(struct pstree_item *current);
> +
>  struct veth_pair {
>  	struct list_head node;
>  	char *inside;
> diff --git a/criu/namespaces.c b/criu/namespaces.c
> index f655c16..64175f2 100644
> --- a/criu/namespaces.c
> +++ b/criu/namespaces.c
> @@ -1660,9 +1660,6 @@ int prepare_namespace(struct pstree_item *item, unsigned long clone_flags)
>  	 * tree (i.e. -- mnt_ns restoring)
>  	 */
>  
> -	id = ns_per_id ? item->ids->net_ns_id : pid;
> -	if ((clone_flags & CLONE_NEWNET) && prepare_net_ns(id))
> -		return -1;
>  	id = ns_per_id ? item->ids->uts_ns_id : pid;
>  	if ((clone_flags & CLONE_NEWUTS) && prepare_utsns(id))
>  		return -1;
> @@ -1670,6 +1667,9 @@ int prepare_namespace(struct pstree_item *item, unsigned long clone_flags)
>  	if ((clone_flags & CLONE_NEWIPC) && prepare_ipc_ns(id))
>  		return -1;
>  
> +	if (prepare_net_namespaces())
> +		return -1;
> +
>  	/*
>  	 * This one is special -- there can be several mount
>  	 * namespaces and prepare_mnt_ns handles them itself.
> diff --git a/criu/net.c b/criu/net.c
> index 7dadc8d..a7e0a6e 100644
> --- a/criu/net.c
> +++ b/criu/net.c
> @@ -1670,7 +1670,7 @@ int dump_net_ns(int ns_id)
>  	return ret;
>  }
>  
> -int prepare_net_ns(int pid)
> +static int prepare_net_ns(int pid)
>  {
>  	int ret = 0;
>  	NetnsEntry *netns = NULL;
> @@ -1702,6 +1702,108 @@ int prepare_net_ns(int pid)
>  	return ret;
>  }
>  
> +static int open_net_ns(struct ns_id *nsid, struct rst_info *rst)
> +{
> +	int fd, tfd;
> +
> +	/* Pin one with a file descriptor */
> +	fd = open_proc(PROC_SELF, "ns/net");
> +	if (fd < 0)
> +		return -1;
> +	tfd = reopen_as_unused_fd(fd, rst);
> +	if (tfd < 0) {
> +		close(fd);
> +		return -1;
> +	}
> +	nsid->net.ns_fd = tfd;
> +
> +	return 0;
> +}
> +
> +int prepare_net_namespaces()
> +{
> +	struct ns_id *nsid;
> +
> +	if (!(root_ns_mask & CLONE_NEWNET))
> +		return 0;
> +
> +	for (nsid = ns_ids; nsid != NULL; nsid = nsid->next) {
> +		if (nsid->nd != &net_ns_desc)
> +			continue;
> +
> +		if (unshare(CLONE_NEWNET)) {

You create net namespaces from criu root task in NS_CRIU user_ns,
which is wrong in case of (root_ns_mask & CLONE_NEWNER) != 0.

To do not loose NS_ROOT user_ns in net_ns, you may do unshare()s
in a child task. Create the child using CLONE_FILES, and you'll
see the same descriptors in criu root task.

> +			pr_perror("Unable to create a new netns");
> +			goto err;
> +		}
> +
> +		if (prepare_net_ns(nsid->id))
> +			goto err;
> +
> +		if (open_net_ns(nsid, rsti(root_item)))
> +			goto err;
> +	}
> +
> +	return 0;
> +err:
> +	return -1;
> +}
> +
> +void fini_net_namespaces()
> +{
> +	struct ns_id *nsid;
> +
> +	if (!(root_ns_mask & CLONE_NEWNET))
> +		return;
> +
> +	for (nsid = ns_ids; nsid != NULL; nsid = nsid->next) {
> +		if (nsid->nd != &net_ns_desc)
> +			continue;
> +		close_safe(&nsid->net.ns_fd);
> +	}
> +}
> +
> +static int do_restore_task_net_ns(struct ns_id *nsid, struct pstree_item *current)
> +{
> +	int fd;
> +
> +	if (!(root_ns_mask & CLONE_NEWNET))
> +		return 0;
> +
> +	fd = open_proc(root_item->pid->ns[0].virt, "fd/%d", nsid->net.ns_fd);
> +	if (fd < 0)
> +		return -1;
> +
> +	if (setns(fd, CLONE_NEWNET)) {
> +		pr_perror("Can't restore netns");
> +		close(fd);
> +		return -1;
> +	}
> +	close(fd);
> +
> +	return 0;
> +}
> +
> +int restore_task_net_ns(struct pstree_item *current)
> +{
> +	if (current->ids && current->ids->has_net_ns_id) {
> +		unsigned int id = current->ids->net_ns_id;
> +		struct ns_id *nsid;
> +
> +		nsid = lookup_ns_by_id(id, &net_ns_desc);
> +		if (nsid == NULL) {
> +			pr_err("Can't find mount namespace %d\n", id);
> +			return -1;
> +		}
> +
> +		BUG_ON(nsid->type == NS_CRIU);
> +
> +		if (do_restore_task_net_ns(nsid, current))
> +			return -1;
> +	}
> +
> +	return 0;
> +}
> +
>  int netns_keep_nsfd(void)
>  {
>  	int ns_fd, ret;
> diff --git a/criu/pstree.c b/criu/pstree.c
> index 833b3d0..bce7b72 100644
> --- a/criu/pstree.c
> +++ b/criu/pstree.c
> @@ -14,6 +14,8 @@
>  #include "mount.h"
>  #include "dump.h"
>  #include "util.h"
> +#include "net.h"
> +
>  #include "protobuf.h"
>  #include "images/pstree.pb-c.h"
>  #include "crtools.h"
> @@ -472,6 +474,8 @@ static int read_pstree_ids(struct pstree_item *pi)
>  	if (pi->ids->has_mnt_ns_id) {
>  		if (rst_add_ns_id(pi->ids->mnt_ns_id, pi, &mnt_ns_desc))
>  			return -1;
> +		if (rst_add_ns_id(pi->ids->net_ns_id, pi, &net_ns_desc))
> +			return -1;
>  	}
>  
>  	return 0;
>
Andrey Vagin Feb. 3, 2017, 11:05 p.m.
On Fri, Feb 03, 2017 at 05:29:23PM +0300, Kirill Tkhai wrote:
> 
> 
> On 02.02.2017 03:04, Andrei Vagin wrote:
> > From: Andrei Vagin <avagin@virtuozzo.com>
> > 
> > Restore all network namespaces from the root task and then set
> > a proper namespace for each task after restoring sockets, because
> > we need to switch network namespaces to restore sockets.
> > 
> > Each socket has to be created in a proper network namespace.
> > 
> > Signed-off-by: Andrei Vagin <avagin@virtuozzo.com>
> > ---
> >  criu/cr-restore.c         |  21 ++++++----
> >  criu/include/namespaces.h |   4 +-
> >  criu/include/net.h        |   6 ++-
> >  criu/namespaces.c         |   6 +--
> >  criu/net.c                | 104 +++++++++++++++++++++++++++++++++++++++++++++-
> >  criu/pstree.c             |   4 ++
> >  6 files changed, 131 insertions(+), 14 deletions(-)
> > 
> > diff --git a/criu/cr-restore.c b/criu/cr-restore.c
> > index 681655d..288add3 100644
> > --- a/criu/cr-restore.c
> > +++ b/criu/cr-restore.c
> > @@ -713,6 +713,13 @@ static int restore_one_alive_task(int pid, CoreEntry *core)
> >  	if (prepare_vmas(current, ta))
> >  		return -1;
> >  
> > +	/*
> > +	 * Sockets have to be restored in their network namespaces,
> > +	 * so a task namespace has to be restored after sockets.
> > +	 */
> > +	if (restore_task_net_ns(current))
> > +		return -1;
> > +
> >  	if (setup_uffd(pid, ta))
> >  		return -1;
> >  
> > @@ -1388,14 +1395,6 @@ static int restore_task_with_children(void *_arg)
> >  	if (ret < 0)
> >  		goto err;
> >  
> > -	if (ca->clone_flags & CLONE_NEWNET) {
> > -		ret = unshare(CLONE_NEWNET);
> > -		if (ret) {
> > -			pr_perror("Can't unshare net-namespace");
> > -			goto err;
> > -		}
> > -	}
> > -
> >  	if (!(ca->clone_flags & CLONE_FILES)) {
> >  		ret = close_old_fds();
> >  		if (ret)
> > @@ -2965,6 +2964,12 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
> >  	if (rst_prep_creds(pid, core, &creds_pos))
> >  		goto err_nv;
> >  
> > +	if (current->parent == NULL) {
> > +		/* Wait when all tasks restored all files */
> > +		restore_wait_other_tasks();
> > +		fini_net_namespaces();
> > +	}
> > +
> >  	/*
> >  	 * We're about to search for free VM area and inject the restorer blob
> >  	 * into it. No irrelevant mmaps/mremaps beyond this point, otherwise
> > diff --git a/criu/include/namespaces.h b/criu/include/namespaces.h
> > index 18eafb2..522c098 100644
> > --- a/criu/include/namespaces.h
> > +++ b/criu/include/namespaces.h
> > @@ -36,7 +36,8 @@
> >  #define CLONE_ALLNS	(CLONE_NEWPID | CLONE_NEWNET | CLONE_NEWIPC | CLONE_NEWUTS | CLONE_NEWNS | CLONE_NEWUSER | CLONE_NEWCGROUP)
> >  
> >  /* Nested namespaces are supported only for these types */
> > -#define CLONE_SUBNS	(CLONE_NEWNS)
> > +#define CLONE_SUBNS	(CLONE_NEWNS | CLONE_NEWNET)
> > +
> >  #define EXTRA_SIZE	20
> >  
> >  struct ns_desc {
> > @@ -95,6 +96,7 @@ struct ns_id {
> >  		} mnt;
> >  
> >  		struct {
> > +			int ns_fd;	/* a file handle for the namespace */
> >  			int nlsk;	/* for sockets collection */
> >  			int seqsk;	/* to talk to parasite daemons */
> >  		} net;
> > diff --git a/criu/include/net.h b/criu/include/net.h
> > index deac65f..49eca36 100644
> > --- a/criu/include/net.h
> > +++ b/criu/include/net.h
> > @@ -12,9 +12,13 @@
> >  
> >  struct cr_imgset;
> >  extern int dump_net_ns(int ns_id);
> > -extern int prepare_net_ns(int pid);
> > +extern int prepare_net_namespaces(void);
> > +extern void fini_net_namespaces(void);
> >  extern int netns_keep_nsfd(void);
> >  
> > +struct pstree_item;
> > +extern int restore_task_net_ns(struct pstree_item *current);
> > +
> >  struct veth_pair {
> >  	struct list_head node;
> >  	char *inside;
> > diff --git a/criu/namespaces.c b/criu/namespaces.c
> > index f655c16..64175f2 100644
> > --- a/criu/namespaces.c
> > +++ b/criu/namespaces.c
> > @@ -1660,9 +1660,6 @@ int prepare_namespace(struct pstree_item *item, unsigned long clone_flags)
> >  	 * tree (i.e. -- mnt_ns restoring)
> >  	 */
> >  
> > -	id = ns_per_id ? item->ids->net_ns_id : pid;
> > -	if ((clone_flags & CLONE_NEWNET) && prepare_net_ns(id))
> > -		return -1;
> >  	id = ns_per_id ? item->ids->uts_ns_id : pid;
> >  	if ((clone_flags & CLONE_NEWUTS) && prepare_utsns(id))
> >  		return -1;
> > @@ -1670,6 +1667,9 @@ int prepare_namespace(struct pstree_item *item, unsigned long clone_flags)
> >  	if ((clone_flags & CLONE_NEWIPC) && prepare_ipc_ns(id))
> >  		return -1;
> >  
> > +	if (prepare_net_namespaces())
> > +		return -1;
> > +
> >  	/*
> >  	 * This one is special -- there can be several mount
> >  	 * namespaces and prepare_mnt_ns handles them itself.
> > diff --git a/criu/net.c b/criu/net.c
> > index 7dadc8d..a7e0a6e 100644
> > --- a/criu/net.c
> > +++ b/criu/net.c
> > @@ -1670,7 +1670,7 @@ int dump_net_ns(int ns_id)
> >  	return ret;
> >  }
> >  
> > -int prepare_net_ns(int pid)
> > +static int prepare_net_ns(int pid)
> >  {
> >  	int ret = 0;
> >  	NetnsEntry *netns = NULL;
> > @@ -1702,6 +1702,108 @@ int prepare_net_ns(int pid)
> >  	return ret;
> >  }
> >  
> > +static int open_net_ns(struct ns_id *nsid, struct rst_info *rst)
> > +{
> > +	int fd, tfd;
> > +
> > +	/* Pin one with a file descriptor */
> > +	fd = open_proc(PROC_SELF, "ns/net");
> > +	if (fd < 0)
> > +		return -1;
> > +	tfd = reopen_as_unused_fd(fd, rst);
> > +	if (tfd < 0) {
> > +		close(fd);
> > +		return -1;
> > +	}
> > +	nsid->net.ns_fd = tfd;
> > +
> > +	return 0;
> > +}
> > +
> > +int prepare_net_namespaces()
> > +{
> > +	struct ns_id *nsid;
> > +
> > +	if (!(root_ns_mask & CLONE_NEWNET))
> > +		return 0;
> > +
> > +	for (nsid = ns_ids; nsid != NULL; nsid = nsid->next) {
> > +		if (nsid->nd != &net_ns_desc)
> > +			continue;
> > +
> > +		if (unshare(CLONE_NEWNET)) {
> 
> You create net namespaces from criu root task in NS_CRIU user_ns,

Currently prepare_net_namespaces() is called from the root task of a
restored tree, so it is called from NS_ROOT.

> which is wrong in case of (root_ns_mask & CLONE_NEWNER) != 0.

> 
> To do not loose NS_ROOT user_ns in net_ns, you may do unshare()s
> in a child task. Create the child using CLONE_FILES, and you'll
> see the same descriptors in criu root task.
> 
> > +			pr_perror("Unable to create a new netns");
> > +			goto err;
> > +		}
> > +
> > +		if (prepare_net_ns(nsid->id))
> > +			goto err;
> > +
> > +		if (open_net_ns(nsid, rsti(root_item)))
> > +			goto err;
> > +	}
> > +
> > +	return 0;
> > +err:
> > +	return -1;
> > +}
> > +
> > +void fini_net_namespaces()
> > +{
> > +	struct ns_id *nsid;
> > +
> > +	if (!(root_ns_mask & CLONE_NEWNET))
> > +		return;
> > +
> > +	for (nsid = ns_ids; nsid != NULL; nsid = nsid->next) {
> > +		if (nsid->nd != &net_ns_desc)
> > +			continue;
> > +		close_safe(&nsid->net.ns_fd);
> > +	}
> > +}
> > +
> > +static int do_restore_task_net_ns(struct ns_id *nsid, struct pstree_item *current)
> > +{
> > +	int fd;
> > +
> > +	if (!(root_ns_mask & CLONE_NEWNET))
> > +		return 0;
> > +
> > +	fd = open_proc(root_item->pid->ns[0].virt, "fd/%d", nsid->net.ns_fd);
> > +	if (fd < 0)
> > +		return -1;
> > +
> > +	if (setns(fd, CLONE_NEWNET)) {
> > +		pr_perror("Can't restore netns");
> > +		close(fd);
> > +		return -1;
> > +	}
> > +	close(fd);
> > +
> > +	return 0;
> > +}
> > +
> > +int restore_task_net_ns(struct pstree_item *current)
> > +{
> > +	if (current->ids && current->ids->has_net_ns_id) {
> > +		unsigned int id = current->ids->net_ns_id;
> > +		struct ns_id *nsid;
> > +
> > +		nsid = lookup_ns_by_id(id, &net_ns_desc);
> > +		if (nsid == NULL) {
> > +			pr_err("Can't find mount namespace %d\n", id);
> > +			return -1;
> > +		}
> > +
> > +		BUG_ON(nsid->type == NS_CRIU);
> > +
> > +		if (do_restore_task_net_ns(nsid, current))
> > +			return -1;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> >  int netns_keep_nsfd(void)
> >  {
> >  	int ns_fd, ret;
> > diff --git a/criu/pstree.c b/criu/pstree.c
> > index 833b3d0..bce7b72 100644
> > --- a/criu/pstree.c
> > +++ b/criu/pstree.c
> > @@ -14,6 +14,8 @@
> >  #include "mount.h"
> >  #include "dump.h"
> >  #include "util.h"
> > +#include "net.h"
> > +
> >  #include "protobuf.h"
> >  #include "images/pstree.pb-c.h"
> >  #include "crtools.h"
> > @@ -472,6 +474,8 @@ static int read_pstree_ids(struct pstree_item *pi)
> >  	if (pi->ids->has_mnt_ns_id) {
> >  		if (rst_add_ns_id(pi->ids->mnt_ns_id, pi, &mnt_ns_desc))
> >  			return -1;
> > +		if (rst_add_ns_id(pi->ids->net_ns_id, pi, &net_ns_desc))
> > +			return -1;
> >  	}
> >  
> >  	return 0;
> >
Andrey Vagin Feb. 4, 2017, 7:38 a.m.
On Fri, Feb 03, 2017 at 03:26:22PM +0300, Dmitry Safonov wrote:
> Doing UTS & IPC namespaces over your patches set, have found
> a minor thing:
> 
> 2017-02-02 3:04 GMT+03:00 Andrei Vagin <avagin@openvz.org>:
> > diff --git a/criu/pstree.c b/criu/pstree.c
> > index 833b3d0..bce7b72 100644
> > --- a/criu/pstree.c
> > +++ b/criu/pstree.c
> > @@ -14,6 +14,8 @@
> >  #include "mount.h"
> >  #include "dump.h"
> >  #include "util.h"
> > +#include "net.h"
> > +
> >  #include "protobuf.h"
> >  #include "images/pstree.pb-c.h"
> >  #include "crtools.h"
> > @@ -472,6 +474,8 @@ static int read_pstree_ids(struct pstree_item *pi)
> >         if (pi->ids->has_mnt_ns_id) {
> >                 if (rst_add_ns_id(pi->ids->mnt_ns_id, pi, &mnt_ns_desc))
> >                         return -1;
> > +               if (rst_add_ns_id(pi->ids->net_ns_id, pi, &net_ns_desc))
> > +                       return -1;
> 
> Shouldn't it be under?

Yes, it should. Thansk!

> if (pi->ids->has_net_ns_id)
> 
> -- 
>              Dmitry