Adding checkpoint/restore support to podman question

Submitted by Adrian Reber on July 20, 2018, 6:31 p.m.

Details

Message ID 20180720183116.GG24619@lisas.de
State New
Series "Adding checkpoint/restore support to podman question"
Headers show

Commit Message

Adrian Reber July 20, 2018, 6:31 p.m.
On Wed, Jul 18, 2018 at 03:25:09PM +0200, Adrian Reber wrote:
> > On Fri, May 04, 2018 at 02:01:08PM -0700, Andrei Vagin wrote:
> > > On Thu, May 03, 2018 at 07:48:55PM +0200, Adrian Reber wrote:
> > > > If you could implement the CRIU part that would be great. I have not
> > > > started yet but I would then do the necessary changes in runc (and
> > > > higher).
> > > 
> > > Here is a criu part:
> > > https://github.com/avagin/criu/tree/netns_ext
> > > 
> > > Let me know if you will have any questions or comments.

As your patch does not apply anymore (I guess due to the removal of
nested namespaces) I tried to port your patch to the current criu-dev
branch. And it almost works. I am storing the ext_key now in
'netns_entry', but the problem is that that image file is read to late.

So at the point the network namespace is restored the necessary
information is not yet read from the image file and the network
namespace is never restored as described via inherit_fd.

Please have a look at my attached patch and maybe you have an idea how
to correctly implement it. Maybe you could just let me know which image
file would be best suited for the ext_key of the network namespace and
how to make sure it is available at restore time. Thanks.


		Adrian

Patch hide | download patch | download mbox

diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index 807a582..0e5b064 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -1674,7 +1674,11 @@  static int restore_task_with_children(void *_arg)
 		 * ACT_SETUP_NS scripts, so the root netns has to be created here
 		 */
 		if (root_ns_mask & CLONE_NEWNET) {
-			ret = unshare(CLONE_NEWNET);
+			struct ns_id *ns = net_get_root_ns();
+			if (ns->ext_key)
+				ret = net_set_ext(ns);
+			else
+				ret = unshare(CLONE_NEWNET);
 			if (ret) {
 				pr_perror("Can't unshare net-namespace");
 				goto err;
diff --git a/criu/include/namespaces.h b/criu/include/namespaces.h
index c0ad0f4..5fe8038 100644
--- a/criu/include/namespaces.h
+++ b/criu/include/namespaces.h
@@ -91,6 +91,7 @@  struct ns_id {
 	struct ns_desc *nd;
 	struct ns_id *next;
 	enum ns_type type;
+	char *ext_key;
 
 	/*
 	 * For mount namespaces on restore -- indicates that
diff --git a/criu/include/net.h b/criu/include/net.h
index 1f6be06..e9419a9 100644
--- a/criu/include/net.h
+++ b/criu/include/net.h
@@ -50,5 +50,6 @@  extern int net_get_nsid(int rtsk, int fd, int *nsid);
 extern struct ns_id *net_get_root_ns();
 extern int kerndat_nsid(void);
 extern void check_has_netns_ioc(int fd, bool *kdat_val, const char *name);
+extern int net_set_ext(struct ns_id *ns);
 
 #endif /* __CR_NET_H__ */
diff --git a/criu/net.c b/criu/net.c
index cf3ef5c..b7a4fcc 100644
--- a/criu/net.c
+++ b/criu/net.c
@@ -1827,6 +1827,7 @@  static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds)
 	char all_stable_secret[MAX_STR_CONF_LEN + 1] = {};
 	NetnsId	*ids;
 	struct netns_id *p;
+	char id[64], *val;
 
 	i = 0;
 	list_for_each_entry(p, &ns->net.ids, node)
@@ -1840,6 +1841,15 @@  static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds)
 	if (!buf)
 		goto out;
 
+	snprintf(id, sizeof(id), "net[%u]", ns->kid);
+	val = external_lookup_by_key(id);
+	if (!IS_ERR_OR_NULL(val)) {
+		pr_debug("The %s netns is external\n", id);
+		ns->ext_key = val;
+	}
+
+	netns.ext_key = ns->ext_key;
+
 	netns.nsids = xptr_pull_s(&buf, i * sizeof(NetnsId*));
 	ids = xptr_pull_s(&buf, i * sizeof(NetnsId));
 	i = 0;
@@ -2178,6 +2188,22 @@  static int dump_netns_ids(int rtsk, struct ns_id *ns)
 			(void *)&arg);
 }
 
+int net_set_ext(struct ns_id *ns)
+{
+	int fd, ret;
+
+	fd = inherit_fd_lookup_id(ns->ext_key);
+	if (fd < 0) {
+		pr_err("Unable to find an external netns: %s\n", ns->ext_key);
+		return -1;
+	}
+
+	ret = switch_ns_by_fd(fd, &net_ns_desc, NULL);
+	close(fd);
+
+	return ret;
+}
+
 int dump_net_ns(struct ns_id *ns)
 {
 	struct cr_imgset *fds;
@@ -2188,7 +2214,7 @@  int dump_net_ns(struct ns_id *ns)
 		return -1;
 
 	ret = mount_ns_sysfs();
-	if (!(opts.empty_ns & CLONE_NEWNET)) {
+	if (!(opts.empty_ns & CLONE_NEWNET) && !ns->ext_key) {
 		int sk;
 
 		sk = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
@@ -2285,7 +2311,7 @@  static int prepare_net_ns_first_stage(struct ns_id *ns)
 {
 	int ret = 0;
 
-	if (opts.empty_ns & CLONE_NEWNET)
+	if (ns->ext_key || opts.empty_ns & CLONE_NEWNET)
 		return 0;
 
 	ret = restore_netns_conf(ns);
@@ -2301,7 +2327,7 @@  static int prepare_net_ns_second_stage(struct ns_id *ns)
 {
 	int ret = 0, nsid = ns->id;
 
-	if (!(opts.empty_ns & CLONE_NEWNET)) {
+	if (!(opts.empty_ns & CLONE_NEWNET) && !ns->ext_key) {
 		if (ns->net.netns)
 			netns_entry__free_unpacked(ns->net.netns, NULL);
 
@@ -2349,7 +2375,13 @@  static int open_net_ns(struct ns_id *nsid)
 
 static int do_create_net_ns(struct ns_id *ns)
 {
-	if (unshare(CLONE_NEWNET)) {
+	int ret;
+
+	if (ns->ext_key)
+		ret = net_set_ext(ns);
+	else
+		ret = unshare(CLONE_NEWNET);
+	if (ret) {
 		pr_perror("Unable to create a new netns");
 		return -1;
 	}
@@ -2378,6 +2410,9 @@  static int __prepare_net_namespaces(void *unused)
 		if (nsid->type == NS_ROOT) {
 			nsid->net.ns_fd = root_ns;
 		} else {
+			pr_debug("nsid->net.netns %p\n", nsid->net.netns);
+			if (nsid->net.netns && nsid->net.netns->ext_key)
+				nsid->ext_key = xstrdup(nsid->net.netns->ext_key);
 			if (do_create_net_ns(nsid))
 				goto err;
 		}
diff --git a/images/netdev.proto b/images/netdev.proto
index b4b64d2..28b162a 100644
--- a/images/netdev.proto
+++ b/images/netdev.proto
@@ -70,4 +70,6 @@  message netns_entry {
 	repeated sysctl_entry all_conf6	= 6;
 
 	repeated netns_id nsids		= 7;
+
+	optional string	ext_key		= 8;
 }

Comments

Andrei Vagin July 21, 2018, 4:30 a.m.
On Fri, Jul 20, 2018 at 08:31:16PM +0200, Adrian Reber wrote:
> On Wed, Jul 18, 2018 at 03:25:09PM +0200, Adrian Reber wrote:
> > > On Fri, May 04, 2018 at 02:01:08PM -0700, Andrei Vagin wrote:
> > > > On Thu, May 03, 2018 at 07:48:55PM +0200, Adrian Reber wrote:
> > > > > If you could implement the CRIU part that would be great. I have not
> > > > > started yet but I would then do the necessary changes in runc (and
> > > > > higher).
> > > > 
> > > > Here is a criu part:
> > > > https://github.com/avagin/criu/tree/netns_ext
> > > > 
> > > > Let me know if you will have any questions or comments.
> 
> As your patch does not apply anymore (I guess due to the removal of
> nested namespaces) I tried to port your patch to the current criu-dev
> branch. And it almost works. I am storing the ext_key now in
> 'netns_entry', but the problem is that that image file is read to late.
> 
> So at the point the network namespace is restored the necessary
> information is not yet read from the image file and the network
> namespace is never restored as described via inherit_fd.
> 
> Please have a look at my attached patch and maybe you have an idea how
> to correctly implement it. Maybe you could just let me know which image
> file would be best suited for the ext_key of the network namespace and
> how to make sure it is available at restore time. Thanks.
>

Hi Adrian,

Could you try out this branch
https://github.com/avagin/criu/tree/netns_ext2

> 
> 		Adrian

> diff --git a/criu/cr-restore.c b/criu/cr-restore.c
> index 807a582..0e5b064 100644
> --- a/criu/cr-restore.c
> +++ b/criu/cr-restore.c
> @@ -1674,7 +1674,11 @@ static int restore_task_with_children(void *_arg)
>  		 * ACT_SETUP_NS scripts, so the root netns has to be created here
>  		 */
>  		if (root_ns_mask & CLONE_NEWNET) {
> -			ret = unshare(CLONE_NEWNET);
> +			struct ns_id *ns = net_get_root_ns();
> +			if (ns->ext_key)
> +				ret = net_set_ext(ns);
> +			else
> +				ret = unshare(CLONE_NEWNET);
>  			if (ret) {
>  				pr_perror("Can't unshare net-namespace");
>  				goto err;
> diff --git a/criu/include/namespaces.h b/criu/include/namespaces.h
> index c0ad0f4..5fe8038 100644
> --- a/criu/include/namespaces.h
> +++ b/criu/include/namespaces.h
> @@ -91,6 +91,7 @@ struct ns_id {
>  	struct ns_desc *nd;
>  	struct ns_id *next;
>  	enum ns_type type;
> +	char *ext_key;
>  
>  	/*
>  	 * For mount namespaces on restore -- indicates that
> diff --git a/criu/include/net.h b/criu/include/net.h
> index 1f6be06..e9419a9 100644
> --- a/criu/include/net.h
> +++ b/criu/include/net.h
> @@ -50,5 +50,6 @@ extern int net_get_nsid(int rtsk, int fd, int *nsid);
>  extern struct ns_id *net_get_root_ns();
>  extern int kerndat_nsid(void);
>  extern void check_has_netns_ioc(int fd, bool *kdat_val, const char *name);
> +extern int net_set_ext(struct ns_id *ns);
>  
>  #endif /* __CR_NET_H__ */
> diff --git a/criu/net.c b/criu/net.c
> index cf3ef5c..b7a4fcc 100644
> --- a/criu/net.c
> +++ b/criu/net.c
> @@ -1827,6 +1827,7 @@ static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds)
>  	char all_stable_secret[MAX_STR_CONF_LEN + 1] = {};
>  	NetnsId	*ids;
>  	struct netns_id *p;
> +	char id[64], *val;
>  
>  	i = 0;
>  	list_for_each_entry(p, &ns->net.ids, node)
> @@ -1840,6 +1841,15 @@ static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds)
>  	if (!buf)
>  		goto out;
>  
> +	snprintf(id, sizeof(id), "net[%u]", ns->kid);
> +	val = external_lookup_by_key(id);
> +	if (!IS_ERR_OR_NULL(val)) {
> +		pr_debug("The %s netns is external\n", id);
> +		ns->ext_key = val;
> +	}
> +
> +	netns.ext_key = ns->ext_key;
> +
>  	netns.nsids = xptr_pull_s(&buf, i * sizeof(NetnsId*));
>  	ids = xptr_pull_s(&buf, i * sizeof(NetnsId));
>  	i = 0;
> @@ -2178,6 +2188,22 @@ static int dump_netns_ids(int rtsk, struct ns_id *ns)
>  			(void *)&arg);
>  }
>  
> +int net_set_ext(struct ns_id *ns)
> +{
> +	int fd, ret;
> +
> +	fd = inherit_fd_lookup_id(ns->ext_key);
> +	if (fd < 0) {
> +		pr_err("Unable to find an external netns: %s\n", ns->ext_key);
> +		return -1;
> +	}
> +
> +	ret = switch_ns_by_fd(fd, &net_ns_desc, NULL);
> +	close(fd);
> +
> +	return ret;
> +}
> +
>  int dump_net_ns(struct ns_id *ns)
>  {
>  	struct cr_imgset *fds;
> @@ -2188,7 +2214,7 @@ int dump_net_ns(struct ns_id *ns)
>  		return -1;
>  
>  	ret = mount_ns_sysfs();
> -	if (!(opts.empty_ns & CLONE_NEWNET)) {
> +	if (!(opts.empty_ns & CLONE_NEWNET) && !ns->ext_key) {
>  		int sk;
>  
>  		sk = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
> @@ -2285,7 +2311,7 @@ static int prepare_net_ns_first_stage(struct ns_id *ns)
>  {
>  	int ret = 0;
>  
> -	if (opts.empty_ns & CLONE_NEWNET)
> +	if (ns->ext_key || opts.empty_ns & CLONE_NEWNET)
>  		return 0;
>  
>  	ret = restore_netns_conf(ns);
> @@ -2301,7 +2327,7 @@ static int prepare_net_ns_second_stage(struct ns_id *ns)
>  {
>  	int ret = 0, nsid = ns->id;
>  
> -	if (!(opts.empty_ns & CLONE_NEWNET)) {
> +	if (!(opts.empty_ns & CLONE_NEWNET) && !ns->ext_key) {
>  		if (ns->net.netns)
>  			netns_entry__free_unpacked(ns->net.netns, NULL);
>  
> @@ -2349,7 +2375,13 @@ static int open_net_ns(struct ns_id *nsid)
>  
>  static int do_create_net_ns(struct ns_id *ns)
>  {
> -	if (unshare(CLONE_NEWNET)) {
> +	int ret;
> +
> +	if (ns->ext_key)
> +		ret = net_set_ext(ns);
> +	else
> +		ret = unshare(CLONE_NEWNET);
> +	if (ret) {
>  		pr_perror("Unable to create a new netns");
>  		return -1;
>  	}
> @@ -2378,6 +2410,9 @@ static int __prepare_net_namespaces(void *unused)
>  		if (nsid->type == NS_ROOT) {
>  			nsid->net.ns_fd = root_ns;
>  		} else {
> +			pr_debug("nsid->net.netns %p\n", nsid->net.netns);
> +			if (nsid->net.netns && nsid->net.netns->ext_key)
> +				nsid->ext_key = xstrdup(nsid->net.netns->ext_key);
>  			if (do_create_net_ns(nsid))
>  				goto err;
>  		}
> diff --git a/images/netdev.proto b/images/netdev.proto
> index b4b64d2..28b162a 100644
> --- a/images/netdev.proto
> +++ b/images/netdev.proto
> @@ -70,4 +70,6 @@ message netns_entry {
>  	repeated sysctl_entry all_conf6	= 6;
>  
>  	repeated netns_id nsids		= 7;
> +
> +	optional string	ext_key		= 8;
>  }
Adrian Reber July 21, 2018, 9:38 a.m.
On Fri, Jul 20, 2018 at 09:30:17PM -0700, Andrei Vagin wrote:
> On Fri, Jul 20, 2018 at 08:31:16PM +0200, Adrian Reber wrote:
> > On Wed, Jul 18, 2018 at 03:25:09PM +0200, Adrian Reber wrote:
> > > > On Fri, May 04, 2018 at 02:01:08PM -0700, Andrei Vagin wrote:
> > > > > On Thu, May 03, 2018 at 07:48:55PM +0200, Adrian Reber wrote:
> > > > > > If you could implement the CRIU part that would be great. I have not
> > > > > > started yet but I would then do the necessary changes in runc (and
> > > > > > higher).
> > > > > 
> > > > > Here is a criu part:
> > > > > https://github.com/avagin/criu/tree/netns_ext
> > > > > 
> > > > > Let me know if you will have any questions or comments.
> > 
> > As your patch does not apply anymore (I guess due to the removal of
> > nested namespaces) I tried to port your patch to the current criu-dev
> > branch. And it almost works. I am storing the ext_key now in
> > 'netns_entry', but the problem is that that image file is read to late.
> > 
> > So at the point the network namespace is restored the necessary
> > information is not yet read from the image file and the network
> > namespace is never restored as described via inherit_fd.
> > 
> > Please have a look at my attached patch and maybe you have an idea how
> > to correctly implement it. Maybe you could just let me know which image
> > file would be best suited for the ext_key of the network namespace and
> > how to make sure it is available at restore time. Thanks.
> >
> 
> Hi Adrian,
> 
> Could you try out this branch
> https://github.com/avagin/criu/tree/netns_ext2

Perfect, that works. Thanks a lot. I have three additional patches on
top of it for RPC. My runc integration patches and my runc test cases
are also successful. This gets my

Acked-by: Adrian Reber <areber@redhat.com>

Do you want to merge this patch and I will submit my patches on top of
it, or do you want to merge the whole series, your patch plus my runc
RPC patches on top of it?

		Adrian

> > diff --git a/criu/cr-restore.c b/criu/cr-restore.c
> > index 807a582..0e5b064 100644
> > --- a/criu/cr-restore.c
> > +++ b/criu/cr-restore.c
> > @@ -1674,7 +1674,11 @@ static int restore_task_with_children(void *_arg)
> >  		 * ACT_SETUP_NS scripts, so the root netns has to be created here
> >  		 */
> >  		if (root_ns_mask & CLONE_NEWNET) {
> > -			ret = unshare(CLONE_NEWNET);
> > +			struct ns_id *ns = net_get_root_ns();
> > +			if (ns->ext_key)
> > +				ret = net_set_ext(ns);
> > +			else
> > +				ret = unshare(CLONE_NEWNET);
> >  			if (ret) {
> >  				pr_perror("Can't unshare net-namespace");
> >  				goto err;
> > diff --git a/criu/include/namespaces.h b/criu/include/namespaces.h
> > index c0ad0f4..5fe8038 100644
> > --- a/criu/include/namespaces.h
> > +++ b/criu/include/namespaces.h
> > @@ -91,6 +91,7 @@ struct ns_id {
> >  	struct ns_desc *nd;
> >  	struct ns_id *next;
> >  	enum ns_type type;
> > +	char *ext_key;
> >  
> >  	/*
> >  	 * For mount namespaces on restore -- indicates that
> > diff --git a/criu/include/net.h b/criu/include/net.h
> > index 1f6be06..e9419a9 100644
> > --- a/criu/include/net.h
> > +++ b/criu/include/net.h
> > @@ -50,5 +50,6 @@ extern int net_get_nsid(int rtsk, int fd, int *nsid);
> >  extern struct ns_id *net_get_root_ns();
> >  extern int kerndat_nsid(void);
> >  extern void check_has_netns_ioc(int fd, bool *kdat_val, const char *name);
> > +extern int net_set_ext(struct ns_id *ns);
> >  
> >  #endif /* __CR_NET_H__ */
> > diff --git a/criu/net.c b/criu/net.c
> > index cf3ef5c..b7a4fcc 100644
> > --- a/criu/net.c
> > +++ b/criu/net.c
> > @@ -1827,6 +1827,7 @@ static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds)
> >  	char all_stable_secret[MAX_STR_CONF_LEN + 1] = {};
> >  	NetnsId	*ids;
> >  	struct netns_id *p;
> > +	char id[64], *val;
> >  
> >  	i = 0;
> >  	list_for_each_entry(p, &ns->net.ids, node)
> > @@ -1840,6 +1841,15 @@ static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds)
> >  	if (!buf)
> >  		goto out;
> >  
> > +	snprintf(id, sizeof(id), "net[%u]", ns->kid);
> > +	val = external_lookup_by_key(id);
> > +	if (!IS_ERR_OR_NULL(val)) {
> > +		pr_debug("The %s netns is external\n", id);
> > +		ns->ext_key = val;
> > +	}
> > +
> > +	netns.ext_key = ns->ext_key;
> > +
> >  	netns.nsids = xptr_pull_s(&buf, i * sizeof(NetnsId*));
> >  	ids = xptr_pull_s(&buf, i * sizeof(NetnsId));
> >  	i = 0;
> > @@ -2178,6 +2188,22 @@ static int dump_netns_ids(int rtsk, struct ns_id *ns)
> >  			(void *)&arg);
> >  }
> >  
> > +int net_set_ext(struct ns_id *ns)
> > +{
> > +	int fd, ret;
> > +
> > +	fd = inherit_fd_lookup_id(ns->ext_key);
> > +	if (fd < 0) {
> > +		pr_err("Unable to find an external netns: %s\n", ns->ext_key);
> > +		return -1;
> > +	}
> > +
> > +	ret = switch_ns_by_fd(fd, &net_ns_desc, NULL);
> > +	close(fd);
> > +
> > +	return ret;
> > +}
> > +
> >  int dump_net_ns(struct ns_id *ns)
> >  {
> >  	struct cr_imgset *fds;
> > @@ -2188,7 +2214,7 @@ int dump_net_ns(struct ns_id *ns)
> >  		return -1;
> >  
> >  	ret = mount_ns_sysfs();
> > -	if (!(opts.empty_ns & CLONE_NEWNET)) {
> > +	if (!(opts.empty_ns & CLONE_NEWNET) && !ns->ext_key) {
> >  		int sk;
> >  
> >  		sk = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
> > @@ -2285,7 +2311,7 @@ static int prepare_net_ns_first_stage(struct ns_id *ns)
> >  {
> >  	int ret = 0;
> >  
> > -	if (opts.empty_ns & CLONE_NEWNET)
> > +	if (ns->ext_key || opts.empty_ns & CLONE_NEWNET)
> >  		return 0;
> >  
> >  	ret = restore_netns_conf(ns);
> > @@ -2301,7 +2327,7 @@ static int prepare_net_ns_second_stage(struct ns_id *ns)
> >  {
> >  	int ret = 0, nsid = ns->id;
> >  
> > -	if (!(opts.empty_ns & CLONE_NEWNET)) {
> > +	if (!(opts.empty_ns & CLONE_NEWNET) && !ns->ext_key) {
> >  		if (ns->net.netns)
> >  			netns_entry__free_unpacked(ns->net.netns, NULL);
> >  
> > @@ -2349,7 +2375,13 @@ static int open_net_ns(struct ns_id *nsid)
> >  
> >  static int do_create_net_ns(struct ns_id *ns)
> >  {
> > -	if (unshare(CLONE_NEWNET)) {
> > +	int ret;
> > +
> > +	if (ns->ext_key)
> > +		ret = net_set_ext(ns);
> > +	else
> > +		ret = unshare(CLONE_NEWNET);
> > +	if (ret) {
> >  		pr_perror("Unable to create a new netns");
> >  		return -1;
> >  	}
> > @@ -2378,6 +2410,9 @@ static int __prepare_net_namespaces(void *unused)
> >  		if (nsid->type == NS_ROOT) {
> >  			nsid->net.ns_fd = root_ns;
> >  		} else {
> > +			pr_debug("nsid->net.netns %p\n", nsid->net.netns);
> > +			if (nsid->net.netns && nsid->net.netns->ext_key)
> > +				nsid->ext_key = xstrdup(nsid->net.netns->ext_key);
> >  			if (do_create_net_ns(nsid))
> >  				goto err;
> >  		}
> > diff --git a/images/netdev.proto b/images/netdev.proto
> > index b4b64d2..28b162a 100644
> > --- a/images/netdev.proto
> > +++ b/images/netdev.proto
> > @@ -70,4 +70,6 @@ message netns_entry {
> >  	repeated sysctl_entry all_conf6	= 6;
> >  
> >  	repeated netns_id nsids		= 7;
> > +
> > +	optional string	ext_key		= 8;
> >  }

		Adrian