[06/12] netns: dump and restore network namespace ID-s

Submitted by Andrei Vagin on Feb. 28, 2017, 11:53 p.m.

Details

Message ID 1488325988-28456-7-git-send-email-avagin@openvz.org
State New
Series "Dump and restore internal veth devices"
Headers show

Commit Message

Andrei Vagin Feb. 28, 2017, 11:53 p.m.
From: Andrei Vagin <avagin@virtuozzo.com>

In each network namespace we can set an id for another network namespace
to be able to address it in netlink messages.

For example, we can say that a peer of a veth devices has to be created
in a network namespace with a specified id. If we request information about
a veth device, a kernel will report where a peer device lives.

An user are able to set this ID-s, so we have to dump and restore them.

Signed-off-by: Andrei Vagin <avagin@virtuozzo.com>
---
 criu/include/namespaces.h |   8 ++
 criu/namespaces.c         |   7 ++
 criu/net.c                | 185 +++++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 189 insertions(+), 11 deletions(-)

Patch hide | download patch | download mbox

diff --git a/criu/include/namespaces.h b/criu/include/namespaces.h
index 2a3dc08..5df7679 100644
--- a/criu/include/namespaces.h
+++ b/criu/include/namespaces.h
@@ -82,6 +82,12 @@  enum ns_type {
 	NS_OTHER,
 };
 
+struct netns_id {
+	unsigned		id;
+	unsigned		net_id;
+	struct list_head	node;
+};
+
 struct ns_id {
 	unsigned int kid;
 	unsigned int id;
@@ -112,8 +118,10 @@  struct ns_id {
 
 		struct {
 			int nsfd_id;	/* a namespace descriptor id in fdstore */
+			int ns_fd;	/* a namespace file descriptor */
 			int nlsk;	/* for sockets collection */
 			int seqsk;	/* to talk to parasite daemons */
+			struct list_head ids;
 		} net;
 		struct {
 			UsernsEntry *e;
diff --git a/criu/namespaces.c b/criu/namespaces.c
index 3563c16..8e170aa 100644
--- a/criu/namespaces.c
+++ b/criu/namespaces.c
@@ -307,6 +307,9 @@  struct ns_id *rst_new_ns_id(unsigned int id, pid_t pid,
 		nsid->ns_populated = false;
 		INIT_LIST_HEAD(&nsid->children);
 		INIT_LIST_HEAD(&nsid->siblings);
+
+		if (nd == &net_ns_desc)
+			INIT_LIST_HEAD(&nsid->net.ids);
 	}
 
 	return nsid;
@@ -432,6 +435,10 @@  static unsigned int generate_ns_id(int pid, unsigned int kid, struct ns_desc *nd
 	INIT_LIST_HEAD(&nsid->siblings);
 	nsid_add(nsid, nd, ns_next_id++, pid);
 
+	if (nd == &net_ns_desc) {
+		INIT_LIST_HEAD(&nsid->net.ids);
+	}
+
 found:
 	if (ns_ret)
 		*ns_ret = nsid;
diff --git a/criu/net.c b/criu/net.c
index a3f79df..e893c0c 100644
--- a/criu/net.c
+++ b/criu/net.c
@@ -640,6 +640,11 @@  static int dump_one_gre(struct ifinfomsg *ifi, char *kind,
 	return dump_unknown_device(ifi, kind, tb, ns, fds);
 }
 
+static int list_one_link(struct nlmsghdr *hdr, struct ns_id *ns, void *arg)
+{
+	return 0;
+}
+
 static int dump_one_link(struct nlmsghdr *hdr, struct ns_id *ns, void *arg)
 {
 	struct cr_imgset *fds = arg;
@@ -856,6 +861,26 @@  out:
 
 }
 
+static int list_links(int rtsk, void *args)
+{
+	struct {
+		struct nlmsghdr nlh;
+		struct rtgenmsg g;
+	} req;
+
+	pr_info("Dumping netns links\n");
+
+	memset(&req, 0, sizeof(req));
+	req.nlh.nlmsg_len = sizeof(req);
+	req.nlh.nlmsg_type = RTM_GETLINK;
+	req.nlh.nlmsg_flags = NLM_F_ROOT|NLM_F_MATCH|NLM_F_REQUEST;
+	req.nlh.nlmsg_pid = 0;
+	req.nlh.nlmsg_seq = CR_NLMSG_SEQ;
+	req.g.rtgen_family = AF_PACKET;
+
+	return do_rtnl_req(rtsk, &req, sizeof(req), list_one_link, NULL, NULL, args);
+}
+
 static int dump_links(int rtsk, struct ns_id *ns, struct cr_imgset *fds)
 {
 	struct {
@@ -1370,6 +1395,24 @@  static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds)
 	int size6 = ARRAY_SIZE(devconfs6);
 	char def_stable_secret[MAX_STR_CONF_LEN + 1] = {};
 	char all_stable_secret[MAX_STR_CONF_LEN + 1] = {};
+	NetnsId	*ids;
+	struct netns_id *p;
+
+	i = 0;
+	list_for_each_entry(p, &ns->net.ids, node)
+		i++;
+
+	netns.nsids = xmalloc(sizeof(NetnsId *) * i);
+	ids = xmalloc(sizeof(NetnsId) * i);
+	i = 0;
+	list_for_each_entry(p, &ns->net.ids, node) {
+		netns_id__init(&ids[i]);
+		ids[i].id = p->id;
+		ids[i].nsid = p->net_id;
+		netns.nsids[i] = ids + i;
+		i++;
+	}
+	netns.n_nsids = i;
 
 	netns.n_def_conf4 = size4;
 	netns.n_all_conf4 = size4;
@@ -1629,6 +1672,46 @@  static int mount_ns_sysfs(void)
 	return ns_sysfs_fd >= 0 ? 0 : -1;
 }
 
+struct net_id_arg {
+	struct ns_id *ns;
+	int sk;
+};
+
+static int collect_netns_id(struct ns_id *ns, void *oarg)
+{
+	struct net_id_arg *arg = oarg;
+	struct netns_id *netns_id;
+	int nsid = -1;
+
+	if (net_get_nsid(arg->sk, ns->ns_pid, &nsid))
+		return -1;
+
+	if (nsid == -1)
+		return 0;
+
+	netns_id = xmalloc(sizeof(*netns_id));
+	if (!netns_id)
+		return -1;
+
+	pr_debug("Fount the %d id for %d in %d\n", nsid, ns->id, arg->ns->id);
+	netns_id->id = ns->id;
+	netns_id->net_id = nsid;
+
+	list_add(&netns_id->node, &arg->ns->net.ids);
+
+	return 0;
+}
+
+static int dump_netns_ids(int rtsk, struct ns_id *ns)
+{
+	struct net_id_arg arg = {
+		.ns = ns,
+		.sk = rtsk,
+	};
+	return walk_namespaces(&net_ns_desc, collect_netns_id,
+			(void *)&arg);
+}
+
 int dump_net_ns(struct ns_id *ns)
 {
 	struct cr_imgset *fds;
@@ -1649,6 +1732,10 @@  int dump_net_ns(struct ns_id *ns)
 		}
 
 		if (!ret)
+			ret = list_links(sk, NULL);
+		if (!ret)
+			ret = dump_netns_ids(sk, ns);
+		if (!ret)
 			ret = dump_links(sk, ns, fds);
 
 		close(sk);
@@ -1676,6 +1763,45 @@  int dump_net_ns(struct ns_id *ns)
 	return ret;
 }
 
+static int net_set_nsid(int rtsk, int fd, int nsid);
+static int restore_netns_ids(struct ns_id *ns, NetnsEntry *netns)
+{
+	int i, sk, exit_code = -1;
+
+	sk = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+	if (sk < 0) {
+		pr_perror("Can't open rtnl sock for net dump");
+		return -1;
+	}
+
+	for (i = 0; i < netns->n_nsids; i++) {
+		struct ns_id *tg_ns;
+		struct netns_id *id;
+
+		id = xmalloc(sizeof(*id));
+		if (!id)
+			goto out;
+		id->id = netns->nsids[i]->id;
+		id->net_id = netns->nsids[i]->nsid;
+		list_add(&id->node, &ns->net.ids);
+
+		tg_ns = lookup_ns_by_id(id->id, &net_ns_desc);
+		if (tg_ns == NULL) {
+			pr_err("Unknown namespace: %d\n", id->id);
+			goto out;
+		}
+
+		if (net_set_nsid(sk, tg_ns->net.ns_fd, id->net_id))
+			goto out;
+	}
+
+	exit_code = 0;
+out:
+	close(sk);
+
+	return exit_code;
+}
+
 static int prepare_net_ns(struct ns_id *ns)
 {
 	int ret = 0, nsid = ns->id;
@@ -1684,6 +1810,8 @@  static int prepare_net_ns(struct ns_id *ns)
 	if (!(opts.empty_ns & CLONE_NEWNET)) {
 		ret = restore_netns_conf(nsid, &netns);
 		if (!ret)
+			ret = restore_netns_ids(ns, netns);
+		if (!ret)
 			ret = restore_links(nsid, &netns);
 		if (netns)
 			netns_entry__free_unpacked(netns, NULL);
@@ -1703,25 +1831,27 @@  static int prepare_net_ns(struct ns_id *ns)
 	if (!ret)
 		ret = restore_nf_ct(nsid, CR_FD_NETNF_EXP);
 
+	if (!ret) {
+		ns->net.nsfd_id = fdstore_add(ns->net.ns_fd);
+		if (ns->net.nsfd_id < 0)
+			ret = -1;
+	}
+	close_safe(&ns->net.ns_fd);
+
+	ns->ns_populated = true;
+
 	return ret;
 }
 
 static int open_net_ns(struct ns_id *nsid)
 {
-	int fd, id;
+	int fd;
 
 	/* Pin one with a file descriptor */
 	fd = open_proc(PROC_SELF, "ns/net");
 	if (fd < 0)
 		return -1;
-
-	id = fdstore_add(fd);
-	close(fd);
-	if (id < 0) {
-		return -1;
-	}
-
-	nsid->net.nsfd_id = id;
+	nsid->net.ns_fd = fd;
 
 	return 0;
 }
@@ -1732,8 +1862,6 @@  static int do_create_net_ns(struct ns_id *ns)
 		pr_perror("Unable to create a new netns");
 		return -1;
 	}
-	if (prepare_net_ns(ns))
-		return -1;
 	if (open_net_ns(ns))
 		return -1;
 	return 0;
@@ -1790,6 +1918,17 @@  int prepare_net_namespaces()
 			goto err;
 	}
 
+	for (nsid = ns_ids; nsid != NULL; nsid = nsid->next) {
+		if (nsid->nd != &net_ns_desc)
+			continue;
+
+		if (switch_ns_by_fd(nsid->net.ns_fd, &net_ns_desc, NULL))
+			goto err;
+
+		if (prepare_net_ns(nsid))
+			goto err;
+	}
+
 	close_service_fd(NS_FD_OFF);
 
 	return 0;
@@ -2277,6 +2416,30 @@  static int nsid_cb(struct nlmsghdr *msg, struct ns_id *ns, void *arg)
 	return 0;
 }
 
+static int net_set_nsid(int rtsk, int fd, int nsid)
+{
+	struct {
+		struct nlmsghdr nlh;
+		struct rtgenmsg g;
+		char msg[128];
+	} req;
+
+	memset(&req, 0, sizeof(req));
+	req.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg));
+	req.nlh.nlmsg_type = RTM_NEWNSID;
+	req.nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+	req.nlh.nlmsg_seq = CR_NLMSG_SEQ;
+	if (addattr_l(&req.nlh, sizeof(req), NETNSA_FD, &fd, sizeof(fd)))
+		return -1;
+	if (addattr_l(&req.nlh, sizeof(req), NETNSA_NSID, &nsid, sizeof(nsid)))
+		return -1;
+
+	if (do_rtnl_req(rtsk, &req, req.nlh.nlmsg_len, NULL, NULL, NULL, NULL) < 0)
+		return -1;
+
+	return 0;
+}
+
 int net_get_nsid(int rtsk, int pid, int *nsid)
 {
 	struct {

Comments

Pavel Emelianov March 13, 2017, 11:10 a.m.
On 03/01/2017 02:53 AM, Andrei Vagin wrote:
> From: Andrei Vagin <avagin@virtuozzo.com>
> 
> In each network namespace we can set an id for another network namespace
> to be able to address it in netlink messages.
> 
> For example, we can say that a peer of a veth devices has to be created
> in a network namespace with a specified id. If we request information about
> a veth device, a kernel will report where a peer device lives.
> 
> An user are able to set this ID-s, so we have to dump and restore them.
> 
> Signed-off-by: Andrei Vagin <avagin@virtuozzo.com>
> ---
>  criu/include/namespaces.h |   8 ++
>  criu/namespaces.c         |   7 ++
>  criu/net.c                | 185 +++++++++++++++++++++++++++++++++++++++++++---
>  3 files changed, 189 insertions(+), 11 deletions(-)
> 
> diff --git a/criu/include/namespaces.h b/criu/include/namespaces.h
> index 2a3dc08..5df7679 100644
> --- a/criu/include/namespaces.h
> +++ b/criu/include/namespaces.h
> @@ -82,6 +82,12 @@ enum ns_type {
>  	NS_OTHER,
>  };
>  
> +struct netns_id {
> +	unsigned		id;
> +	unsigned		net_id;
> +	struct list_head	node;
> +};
> +
>  struct ns_id {
>  	unsigned int kid;
>  	unsigned int id;
> @@ -112,8 +118,10 @@ struct ns_id {
>  
>  		struct {
>  			int nsfd_id;	/* a namespace descriptor id in fdstore */
> +			int ns_fd;	/* a namespace file descriptor */

Why do we need both?

>  			int nlsk;	/* for sockets collection */
>  			int seqsk;	/* to talk to parasite daemons */
> +			struct list_head ids;
>  		} net;
>  		struct {
>  			UsernsEntry *e;
> diff --git a/criu/namespaces.c b/criu/namespaces.c
> index 3563c16..8e170aa 100644
> --- a/criu/namespaces.c
> +++ b/criu/namespaces.c
> @@ -307,6 +307,9 @@ struct ns_id *rst_new_ns_id(unsigned int id, pid_t pid,
>  		nsid->ns_populated = false;
>  		INIT_LIST_HEAD(&nsid->children);
>  		INIT_LIST_HEAD(&nsid->siblings);
> +
> +		if (nd == &net_ns_desc)
> +			INIT_LIST_HEAD(&nsid->net.ids);
>  	}
>  
>  	return nsid;
> @@ -432,6 +435,10 @@ static unsigned int generate_ns_id(int pid, unsigned int kid, struct ns_desc *nd
>  	INIT_LIST_HEAD(&nsid->siblings);
>  	nsid_add(nsid, nd, ns_next_id++, pid);
>  
> +	if (nd == &net_ns_desc) {
> +		INIT_LIST_HEAD(&nsid->net.ids);
> +	}
> +
>  found:
>  	if (ns_ret)
>  		*ns_ret = nsid;
> diff --git a/criu/net.c b/criu/net.c
> index a3f79df..e893c0c 100644
> --- a/criu/net.c
> +++ b/criu/net.c
> @@ -640,6 +640,11 @@ static int dump_one_gre(struct ifinfomsg *ifi, char *kind,
>  	return dump_unknown_device(ifi, kind, tb, ns, fds);
>  }
>  
> +static int list_one_link(struct nlmsghdr *hdr, struct ns_id *ns, void *arg)
> +{
> +	return 0;
> +}
> +
>  static int dump_one_link(struct nlmsghdr *hdr, struct ns_id *ns, void *arg)
>  {
>  	struct cr_imgset *fds = arg;
> @@ -856,6 +861,26 @@ out:
>  
>  }
>  
> +static int list_links(int rtsk, void *args)
> +{
> +	struct {
> +		struct nlmsghdr nlh;
> +		struct rtgenmsg g;
> +	} req;
> +
> +	pr_info("Dumping netns links\n");
> +
> +	memset(&req, 0, sizeof(req));
> +	req.nlh.nlmsg_len = sizeof(req);
> +	req.nlh.nlmsg_type = RTM_GETLINK;
> +	req.nlh.nlmsg_flags = NLM_F_ROOT|NLM_F_MATCH|NLM_F_REQUEST;
> +	req.nlh.nlmsg_pid = 0;
> +	req.nlh.nlmsg_seq = CR_NLMSG_SEQ;
> +	req.g.rtgen_family = AF_PACKET;
> +
> +	return do_rtnl_req(rtsk, &req, sizeof(req), list_one_link, NULL, NULL, args);
> +}

Why is this call with empty list_one_link() required at all?

> +
>  static int dump_links(int rtsk, struct ns_id *ns, struct cr_imgset *fds)
>  {
>  	struct {
> @@ -1370,6 +1395,24 @@ static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds)
>  	int size6 = ARRAY_SIZE(devconfs6);
>  	char def_stable_secret[MAX_STR_CONF_LEN + 1] = {};
>  	char all_stable_secret[MAX_STR_CONF_LEN + 1] = {};
> +	NetnsId	*ids;
> +	struct netns_id *p;
> +
> +	i = 0;
> +	list_for_each_entry(p, &ns->net.ids, node)
> +		i++;

This list is willed with walk_namespaces() call, one entry per callback,
so this is effectively constant.

> +
> +	netns.nsids = xmalloc(sizeof(NetnsId *) * i);
> +	ids = xmalloc(sizeof(NetnsId) * i);
> +	i = 0;
> +	list_for_each_entry(p, &ns->net.ids, node) {
> +		netns_id__init(&ids[i]);
> +		ids[i].id = p->id;
> +		ids[i].nsid = p->net_id;
> +		netns.nsids[i] = ids + i;
> +		i++;
> +	}
> +	netns.n_nsids = i;
>  
>  	netns.n_def_conf4 = size4;
>  	netns.n_all_conf4 = size4;
> @@ -1629,6 +1672,46 @@ static int mount_ns_sysfs(void)
>  	return ns_sysfs_fd >= 0 ? 0 : -1;
>  }
>  
> +struct net_id_arg {
> +	struct ns_id *ns;
> +	int sk;
> +};
> +
> +static int collect_netns_id(struct ns_id *ns, void *oarg)
> +{
> +	struct net_id_arg *arg = oarg;
> +	struct netns_id *netns_id;
> +	int nsid = -1;
> +
> +	if (net_get_nsid(arg->sk, ns->ns_pid, &nsid))
> +		return -1;
> +
> +	if (nsid == -1)
> +		return 0;
> +
> +	netns_id = xmalloc(sizeof(*netns_id));
> +	if (!netns_id)
> +		return -1;
> +
> +	pr_debug("Fount the %d id for %d in %d\n", nsid, ns->id, arg->ns->id);
> +	netns_id->id = ns->id;
> +	netns_id->net_id = nsid;
> +
> +	list_add(&netns_id->node, &arg->ns->net.ids);
> +
> +	return 0;
> +}
> +
> +static int dump_netns_ids(int rtsk, struct ns_id *ns)
> +{
> +	struct net_id_arg arg = {
> +		.ns = ns,
> +		.sk = rtsk,
> +	};
> +	return walk_namespaces(&net_ns_desc, collect_netns_id,
> +			(void *)&arg);
> +}
> +
>  int dump_net_ns(struct ns_id *ns)
>  {
>  	struct cr_imgset *fds;
> @@ -1649,6 +1732,10 @@ int dump_net_ns(struct ns_id *ns)
>  		}
>  
>  		if (!ret)
> +			ret = list_links(sk, NULL);
> +		if (!ret)
> +			ret = dump_netns_ids(sk, ns);
> +		if (!ret)
>  			ret = dump_links(sk, ns, fds);
>  
>  		close(sk);
> @@ -1676,6 +1763,45 @@ int dump_net_ns(struct ns_id *ns)
>  	return ret;
>  }
>  
> +static int net_set_nsid(int rtsk, int fd, int nsid);
> +static int restore_netns_ids(struct ns_id *ns, NetnsEntry *netns)
> +{
> +	int i, sk, exit_code = -1;
> +
> +	sk = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
> +	if (sk < 0) {
> +		pr_perror("Can't open rtnl sock for net dump");
> +		return -1;
> +	}
> +
> +	for (i = 0; i < netns->n_nsids; i++) {
> +		struct ns_id *tg_ns;
> +		struct netns_id *id;
> +
> +		id = xmalloc(sizeof(*id));
> +		if (!id)
> +			goto out;
> +		id->id = netns->nsids[i]->id;
> +		id->net_id = netns->nsids[i]->nsid;
> +		list_add(&id->node, &ns->net.ids);
> +
> +		tg_ns = lookup_ns_by_id(id->id, &net_ns_desc);
> +		if (tg_ns == NULL) {
> +			pr_err("Unknown namespace: %d\n", id->id);
> +			goto out;
> +		}
> +
> +		if (net_set_nsid(sk, tg_ns->net.ns_fd, id->net_id))
> +			goto out;
> +	}
> +
> +	exit_code = 0;
> +out:
> +	close(sk);
> +
> +	return exit_code;
> +}
> +
>  static int prepare_net_ns(struct ns_id *ns)
>  {
>  	int ret = 0, nsid = ns->id;
> @@ -1684,6 +1810,8 @@ static int prepare_net_ns(struct ns_id *ns)
>  	if (!(opts.empty_ns & CLONE_NEWNET)) {
>  		ret = restore_netns_conf(nsid, &netns);
>  		if (!ret)
> +			ret = restore_netns_ids(ns, netns);
> +		if (!ret)
>  			ret = restore_links(nsid, &netns);
>  		if (netns)
>  			netns_entry__free_unpacked(netns, NULL);
> @@ -1703,25 +1831,27 @@ static int prepare_net_ns(struct ns_id *ns)
>  	if (!ret)
>  		ret = restore_nf_ct(nsid, CR_FD_NETNF_EXP);
>  
> +	if (!ret) {
> +		ns->net.nsfd_id = fdstore_add(ns->net.ns_fd);
> +		if (ns->net.nsfd_id < 0)
> +			ret = -1;
> +	}
> +	close_safe(&ns->net.ns_fd);
> +
> +	ns->ns_populated = true;
> +
>  	return ret;
>  }
>  
>  static int open_net_ns(struct ns_id *nsid)
>  {
> -	int fd, id;
> +	int fd;
>  
>  	/* Pin one with a file descriptor */
>  	fd = open_proc(PROC_SELF, "ns/net");
>  	if (fd < 0)
>  		return -1;
> -
> -	id = fdstore_add(fd);
> -	close(fd);
> -	if (id < 0) {
> -		return -1;
> -	}
> -
> -	nsid->net.nsfd_id = id;
> +	nsid->net.ns_fd = fd;
>  
>  	return 0;
>  }
> @@ -1732,8 +1862,6 @@ static int do_create_net_ns(struct ns_id *ns)
>  		pr_perror("Unable to create a new netns");
>  		return -1;
>  	}
> -	if (prepare_net_ns(ns))
> -		return -1;
>  	if (open_net_ns(ns))
>  		return -1;
>  	return 0;
> @@ -1790,6 +1918,17 @@ int prepare_net_namespaces()
>  			goto err;
>  	}
>  
> +	for (nsid = ns_ids; nsid != NULL; nsid = nsid->next) {
> +		if (nsid->nd != &net_ns_desc)
> +			continue;
> +
> +		if (switch_ns_by_fd(nsid->net.ns_fd, &net_ns_desc, NULL))
> +			goto err;
> +
> +		if (prepare_net_ns(nsid))
> +			goto err;
> +	}
> +
>  	close_service_fd(NS_FD_OFF);
>  
>  	return 0;
> @@ -2277,6 +2416,30 @@ static int nsid_cb(struct nlmsghdr *msg, struct ns_id *ns, void *arg)
>  	return 0;
>  }
>  
> +static int net_set_nsid(int rtsk, int fd, int nsid)
> +{
> +	struct {
> +		struct nlmsghdr nlh;
> +		struct rtgenmsg g;
> +		char msg[128];
> +	} req;
> +
> +	memset(&req, 0, sizeof(req));
> +	req.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg));
> +	req.nlh.nlmsg_type = RTM_NEWNSID;
> +	req.nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
> +	req.nlh.nlmsg_seq = CR_NLMSG_SEQ;
> +	if (addattr_l(&req.nlh, sizeof(req), NETNSA_FD, &fd, sizeof(fd)))
> +		return -1;
> +	if (addattr_l(&req.nlh, sizeof(req), NETNSA_NSID, &nsid, sizeof(nsid)))
> +		return -1;
> +
> +	if (do_rtnl_req(rtsk, &req, req.nlh.nlmsg_len, NULL, NULL, NULL, NULL) < 0)
> +		return -1;
> +
> +	return 0;
> +}
> +
>  int net_get_nsid(int rtsk, int pid, int *nsid)
>  {
>  	struct {
>
Andrey Vagin March 20, 2017, 9:01 p.m.
On Mon, Mar 13, 2017 at 02:10:35PM +0300, Pavel Emelyanov wrote:
> On 03/01/2017 02:53 AM, Andrei Vagin wrote:
> > From: Andrei Vagin <avagin@virtuozzo.com>
> > 
> > In each network namespace we can set an id for another network namespace
> > to be able to address it in netlink messages.
> > 
> > For example, we can say that a peer of a veth devices has to be created
> > in a network namespace with a specified id. If we request information about
> > a veth device, a kernel will report where a peer device lives.
> > 
> > An user are able to set this ID-s, so we have to dump and restore them.
> > 
> > Signed-off-by: Andrei Vagin <avagin@virtuozzo.com>
> > ---
> >  criu/include/namespaces.h |   8 ++
> >  criu/namespaces.c         |   7 ++
> >  criu/net.c                | 185 +++++++++++++++++++++++++++++++++++++++++++---
> >  3 files changed, 189 insertions(+), 11 deletions(-)
> > 
> > diff --git a/criu/include/namespaces.h b/criu/include/namespaces.h
> > index 2a3dc08..5df7679 100644
> > --- a/criu/include/namespaces.h
> > +++ b/criu/include/namespaces.h
> > @@ -82,6 +82,12 @@ enum ns_type {
> >  	NS_OTHER,
> >  };
> >  
> > +struct netns_id {
> > +	unsigned		id;
> > +	unsigned		net_id;
> > +	struct list_head	node;
> > +};
> > +
> >  struct ns_id {
> >  	unsigned int kid;
> >  	unsigned int id;
> > @@ -112,8 +118,10 @@ struct ns_id {
> >  
> >  		struct {
> >  			int nsfd_id;	/* a namespace descriptor id in fdstore */
> > +			int ns_fd;	/* a namespace file descriptor */
> 
> Why do we need both?

When we create network devices, we need to switch between namespaces or
set namespaces for pair devices, so it is more optimal to have
descriptors on this stage.

nsfd_id is required to restore sockets, when we can't use randow file
descirptors, because they can conflict with process file descriptors.

> 
> >  			int nlsk;	/* for sockets collection */
> >  			int seqsk;	/* to talk to parasite daemons */
> > +			struct list_head ids;
> >  		} net;
> >  		struct {
> >  			UsernsEntry *e;
> > diff --git a/criu/namespaces.c b/criu/namespaces.c
> > index 3563c16..8e170aa 100644
> > --- a/criu/namespaces.c
> > +++ b/criu/namespaces.c
> > @@ -307,6 +307,9 @@ struct ns_id *rst_new_ns_id(unsigned int id, pid_t pid,
> >  		nsid->ns_populated = false;
> >  		INIT_LIST_HEAD(&nsid->children);
> >  		INIT_LIST_HEAD(&nsid->siblings);
> > +
> > +		if (nd == &net_ns_desc)
> > +			INIT_LIST_HEAD(&nsid->net.ids);
> >  	}
> >  
> >  	return nsid;
> > @@ -432,6 +435,10 @@ static unsigned int generate_ns_id(int pid, unsigned int kid, struct ns_desc *nd
> >  	INIT_LIST_HEAD(&nsid->siblings);
> >  	nsid_add(nsid, nd, ns_next_id++, pid);
> >  
> > +	if (nd == &net_ns_desc) {
> > +		INIT_LIST_HEAD(&nsid->net.ids);
> > +	}
> > +
> >  found:
> >  	if (ns_ret)
> >  		*ns_ret = nsid;
> > diff --git a/criu/net.c b/criu/net.c
> > index a3f79df..e893c0c 100644
> > --- a/criu/net.c
> > +++ b/criu/net.c
> > @@ -640,6 +640,11 @@ static int dump_one_gre(struct ifinfomsg *ifi, char *kind,
> >  	return dump_unknown_device(ifi, kind, tb, ns, fds);
> >  }
> >  
> > +static int list_one_link(struct nlmsghdr *hdr, struct ns_id *ns, void *arg)
> > +{
> > +	return 0;
> > +}
> > +
> >  static int dump_one_link(struct nlmsghdr *hdr, struct ns_id *ns, void *arg)
> >  {
> >  	struct cr_imgset *fds = arg;
> > @@ -856,6 +861,26 @@ out:
> >  
> >  }
> >  
> > +static int list_links(int rtsk, void *args)
> > +{
> > +	struct {
> > +		struct nlmsghdr nlh;
> > +		struct rtgenmsg g;
> > +	} req;
> > +
> > +	pr_info("Dumping netns links\n");
> > +
> > +	memset(&req, 0, sizeof(req));
> > +	req.nlh.nlmsg_len = sizeof(req);
> > +	req.nlh.nlmsg_type = RTM_GETLINK;
> > +	req.nlh.nlmsg_flags = NLM_F_ROOT|NLM_F_MATCH|NLM_F_REQUEST;
> > +	req.nlh.nlmsg_pid = 0;
> > +	req.nlh.nlmsg_seq = CR_NLMSG_SEQ;
> > +	req.g.rtgen_family = AF_PACKET;
> > +
> > +	return do_rtnl_req(rtsk, &req, sizeof(req), list_one_link, NULL, NULL, args);
> > +}
> 
> Why is this call with empty list_one_link() required at all?

If a device has a pair in another netns, the kernel will generate nsid
for the second netns in the first netns when we requiest information
about this device. So we list all devices before dumping net ids in this
namespace.

> 
> > +
> >  static int dump_links(int rtsk, struct ns_id *ns, struct cr_imgset *fds)
> >  {
> >  	struct {
> > @@ -1370,6 +1395,24 @@ static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds)
> >  	int size6 = ARRAY_SIZE(devconfs6);
> >  	char def_stable_secret[MAX_STR_CONF_LEN + 1] = {};
> >  	char all_stable_secret[MAX_STR_CONF_LEN + 1] = {};
> > +	NetnsId	*ids;
> > +	struct netns_id *p;
> > +
> > +	i = 0;
> > +	list_for_each_entry(p, &ns->net.ids, node)
> > +		i++;
> 
> This list is willed with walk_namespaces() call, one entry per callback,
> so this is effectively constant.

I don't understand what you mean. Each namespace has own set of net ids
and it has ids only for a few of other namespaces.

static int collect_netns_id(struct ns_id *ns, void *oarg)
{
        struct net_id_arg *arg = oarg;
        struct netns_id *netns_id;
        int nsid = -1;

        if (net_get_nsid(arg->sk, ns->ns_pid, &nsid))
                return -1;

        if (nsid == -1)
                return 0;

^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ this means that this namespace doesn't
have id in the "ns" netns.

        netns_id = xmalloc(sizeof(*netns_id));
        if (!netns_id) 
                return -1;

        pr_debug("Fount the %d id for %d in %d\n", nsid, ns->id, arg->ns->id);
        netns_id->id = ns->id;
        netns_id->net_id = nsid;

        list_add(&netns_id->node, &arg->ns->net.ids);

        return 0;
}               


> 
> > +
> > +	netns.nsids = xmalloc(sizeof(NetnsId *) * i);
> > +	ids = xmalloc(sizeof(NetnsId) * i);
> > +	i = 0;
> > +	list_for_each_entry(p, &ns->net.ids, node) {
> > +		netns_id__init(&ids[i]);
> > +		ids[i].id = p->id;
> > +		ids[i].nsid = p->net_id;
> > +		netns.nsids[i] = ids + i;
> > +		i++;
> > +	}
> > +	netns.n_nsids = i;
> >  
> >  	netns.n_def_conf4 = size4;
> >  	netns.n_all_conf4 = size4;
> > @@ -1629,6 +1672,46 @@ static int mount_ns_sysfs(void)
> >  	return ns_sysfs_fd >= 0 ? 0 : -1;
> >  }
> >  
> > +struct net_id_arg {
> > +	struct ns_id *ns;
> > +	int sk;
> > +};
> > +
> > +static int collect_netns_id(struct ns_id *ns, void *oarg)
> > +{
> > +	struct net_id_arg *arg = oarg;
> > +	struct netns_id *netns_id;
> > +	int nsid = -1;
> > +
> > +	if (net_get_nsid(arg->sk, ns->ns_pid, &nsid))
> > +		return -1;
> > +
> > +	if (nsid == -1)
> > +		return 0;
> > +
> > +	netns_id = xmalloc(sizeof(*netns_id));
> > +	if (!netns_id)
> > +		return -1;
> > +
> > +	pr_debug("Fount the %d id for %d in %d\n", nsid, ns->id, arg->ns->id);
> > +	netns_id->id = ns->id;
> > +	netns_id->net_id = nsid;
> > +
> > +	list_add(&netns_id->node, &arg->ns->net.ids);
> > +
> > +	return 0;
> > +}
> > +
> > +static int dump_netns_ids(int rtsk, struct ns_id *ns)
> > +{
> > +	struct net_id_arg arg = {
> > +		.ns = ns,
> > +		.sk = rtsk,
> > +	};
> > +	return walk_namespaces(&net_ns_desc, collect_netns_id,
> > +			(void *)&arg);
> > +}
> > +
> >  int dump_net_ns(struct ns_id *ns)
> >  {
> >  	struct cr_imgset *fds;
> > @@ -1649,6 +1732,10 @@ int dump_net_ns(struct ns_id *ns)
> >  		}
> >  
> >  		if (!ret)
> > +			ret = list_links(sk, NULL);
> > +		if (!ret)
> > +			ret = dump_netns_ids(sk, ns);
> > +		if (!ret)
> >  			ret = dump_links(sk, ns, fds);
> >  
> >  		close(sk);
> > @@ -1676,6 +1763,45 @@ int dump_net_ns(struct ns_id *ns)
> >  	return ret;
> >  }
> >  
> > +static int net_set_nsid(int rtsk, int fd, int nsid);
> > +static int restore_netns_ids(struct ns_id *ns, NetnsEntry *netns)
> > +{
> > +	int i, sk, exit_code = -1;
> > +
> > +	sk = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
> > +	if (sk < 0) {
> > +		pr_perror("Can't open rtnl sock for net dump");
> > +		return -1;
> > +	}
> > +
> > +	for (i = 0; i < netns->n_nsids; i++) {
> > +		struct ns_id *tg_ns;
> > +		struct netns_id *id;
> > +
> > +		id = xmalloc(sizeof(*id));
> > +		if (!id)
> > +			goto out;
> > +		id->id = netns->nsids[i]->id;
> > +		id->net_id = netns->nsids[i]->nsid;
> > +		list_add(&id->node, &ns->net.ids);
> > +
> > +		tg_ns = lookup_ns_by_id(id->id, &net_ns_desc);
> > +		if (tg_ns == NULL) {
> > +			pr_err("Unknown namespace: %d\n", id->id);
> > +			goto out;
> > +		}
> > +
> > +		if (net_set_nsid(sk, tg_ns->net.ns_fd, id->net_id))
> > +			goto out;
> > +	}
> > +
> > +	exit_code = 0;
> > +out:
> > +	close(sk);
> > +
> > +	return exit_code;
> > +}
> > +
> >  static int prepare_net_ns(struct ns_id *ns)
> >  {
> >  	int ret = 0, nsid = ns->id;
> > @@ -1684,6 +1810,8 @@ static int prepare_net_ns(struct ns_id *ns)
> >  	if (!(opts.empty_ns & CLONE_NEWNET)) {
> >  		ret = restore_netns_conf(nsid, &netns);
> >  		if (!ret)
> > +			ret = restore_netns_ids(ns, netns);
> > +		if (!ret)
> >  			ret = restore_links(nsid, &netns);
> >  		if (netns)
> >  			netns_entry__free_unpacked(netns, NULL);
> > @@ -1703,25 +1831,27 @@ static int prepare_net_ns(struct ns_id *ns)
> >  	if (!ret)
> >  		ret = restore_nf_ct(nsid, CR_FD_NETNF_EXP);
> >  
> > +	if (!ret) {
> > +		ns->net.nsfd_id = fdstore_add(ns->net.ns_fd);
> > +		if (ns->net.nsfd_id < 0)
> > +			ret = -1;
> > +	}
> > +	close_safe(&ns->net.ns_fd);
> > +
> > +	ns->ns_populated = true;
> > +
> >  	return ret;
> >  }
> >  
> >  static int open_net_ns(struct ns_id *nsid)
> >  {
> > -	int fd, id;
> > +	int fd;
> >  
> >  	/* Pin one with a file descriptor */
> >  	fd = open_proc(PROC_SELF, "ns/net");
> >  	if (fd < 0)
> >  		return -1;
> > -
> > -	id = fdstore_add(fd);
> > -	close(fd);
> > -	if (id < 0) {
> > -		return -1;
> > -	}
> > -
> > -	nsid->net.nsfd_id = id;
> > +	nsid->net.ns_fd = fd;
> >  
> >  	return 0;
> >  }
> > @@ -1732,8 +1862,6 @@ static int do_create_net_ns(struct ns_id *ns)
> >  		pr_perror("Unable to create a new netns");
> >  		return -1;
> >  	}
> > -	if (prepare_net_ns(ns))
> > -		return -1;
> >  	if (open_net_ns(ns))
> >  		return -1;
> >  	return 0;
> > @@ -1790,6 +1918,17 @@ int prepare_net_namespaces()
> >  			goto err;
> >  	}
> >  
> > +	for (nsid = ns_ids; nsid != NULL; nsid = nsid->next) {
> > +		if (nsid->nd != &net_ns_desc)
> > +			continue;
> > +
> > +		if (switch_ns_by_fd(nsid->net.ns_fd, &net_ns_desc, NULL))
> > +			goto err;
> > +
> > +		if (prepare_net_ns(nsid))
> > +			goto err;
> > +	}
> > +
> >  	close_service_fd(NS_FD_OFF);
> >  
> >  	return 0;
> > @@ -2277,6 +2416,30 @@ static int nsid_cb(struct nlmsghdr *msg, struct ns_id *ns, void *arg)
> >  	return 0;
> >  }
> >  
> > +static int net_set_nsid(int rtsk, int fd, int nsid)
> > +{
> > +	struct {
> > +		struct nlmsghdr nlh;
> > +		struct rtgenmsg g;
> > +		char msg[128];
> > +	} req;
> > +
> > +	memset(&req, 0, sizeof(req));
> > +	req.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg));
> > +	req.nlh.nlmsg_type = RTM_NEWNSID;
> > +	req.nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
> > +	req.nlh.nlmsg_seq = CR_NLMSG_SEQ;
> > +	if (addattr_l(&req.nlh, sizeof(req), NETNSA_FD, &fd, sizeof(fd)))
> > +		return -1;
> > +	if (addattr_l(&req.nlh, sizeof(req), NETNSA_NSID, &nsid, sizeof(nsid)))
> > +		return -1;
> > +
> > +	if (do_rtnl_req(rtsk, &req, req.nlh.nlmsg_len, NULL, NULL, NULL, NULL) < 0)
> > +		return -1;
> > +
> > +	return 0;
> > +}
> > +
> >  int net_get_nsid(int rtsk, int pid, int *nsid)
> >  {
> >  	struct {
> > 
>
Pavel Emelianov March 21, 2017, 10:41 a.m.
On 03/21/2017 12:01 AM, Andrei Vagin wrote:
> On Mon, Mar 13, 2017 at 02:10:35PM +0300, Pavel Emelyanov wrote:
>> On 03/01/2017 02:53 AM, Andrei Vagin wrote:
>>> From: Andrei Vagin <avagin@virtuozzo.com>
>>>
>>> In each network namespace we can set an id for another network namespace
>>> to be able to address it in netlink messages.
>>>
>>> For example, we can say that a peer of a veth devices has to be created
>>> in a network namespace with a specified id. If we request information about
>>> a veth device, a kernel will report where a peer device lives.
>>>
>>> An user are able to set this ID-s, so we have to dump and restore them.
>>>
>>> Signed-off-by: Andrei Vagin <avagin@virtuozzo.com>
>>> ---
>>>  criu/include/namespaces.h |   8 ++
>>>  criu/namespaces.c         |   7 ++
>>>  criu/net.c                | 185 +++++++++++++++++++++++++++++++++++++++++++---
>>>  3 files changed, 189 insertions(+), 11 deletions(-)
>>>
>>> diff --git a/criu/include/namespaces.h b/criu/include/namespaces.h
>>> index 2a3dc08..5df7679 100644
>>> --- a/criu/include/namespaces.h
>>> +++ b/criu/include/namespaces.h
>>> @@ -82,6 +82,12 @@ enum ns_type {
>>>  	NS_OTHER,
>>>  };
>>>  
>>> +struct netns_id {
>>> +	unsigned		id;
>>> +	unsigned		net_id;
>>> +	struct list_head	node;
>>> +};
>>> +
>>>  struct ns_id {
>>>  	unsigned int kid;
>>>  	unsigned int id;
>>> @@ -112,8 +118,10 @@ struct ns_id {
>>>  
>>>  		struct {
>>>  			int nsfd_id;	/* a namespace descriptor id in fdstore */
>>> +			int ns_fd;	/* a namespace file descriptor */
>>
>> Why do we need both?
> 
> When we create network devices, we need to switch between namespaces or
> set namespaces for pair devices, so it is more optimal to have
> descriptors on this stage.
> 
> nsfd_id is required to restore sockets, when we can't use randow file
> descirptors, because they can conflict with process file descriptors.

So these two are used "sequentially", one on early stage only and
the other one on later stage only, aren't they? Can we union them?

>>
>>>  			int nlsk;	/* for sockets collection */
>>>  			int seqsk;	/* to talk to parasite daemons */
>>> +			struct list_head ids;
>>>  		} net;
>>>  		struct {
>>>  			UsernsEntry *e;
>>> diff --git a/criu/namespaces.c b/criu/namespaces.c
>>> index 3563c16..8e170aa 100644
>>> --- a/criu/namespaces.c
>>> +++ b/criu/namespaces.c
>>> @@ -307,6 +307,9 @@ struct ns_id *rst_new_ns_id(unsigned int id, pid_t pid,
>>>  		nsid->ns_populated = false;
>>>  		INIT_LIST_HEAD(&nsid->children);
>>>  		INIT_LIST_HEAD(&nsid->siblings);
>>> +
>>> +		if (nd == &net_ns_desc)
>>> +			INIT_LIST_HEAD(&nsid->net.ids);
>>>  	}
>>>  
>>>  	return nsid;
>>> @@ -432,6 +435,10 @@ static unsigned int generate_ns_id(int pid, unsigned int kid, struct ns_desc *nd
>>>  	INIT_LIST_HEAD(&nsid->siblings);
>>>  	nsid_add(nsid, nd, ns_next_id++, pid);
>>>  
>>> +	if (nd == &net_ns_desc) {
>>> +		INIT_LIST_HEAD(&nsid->net.ids);
>>> +	}
>>> +
>>>  found:
>>>  	if (ns_ret)
>>>  		*ns_ret = nsid;
>>> diff --git a/criu/net.c b/criu/net.c
>>> index a3f79df..e893c0c 100644
>>> --- a/criu/net.c
>>> +++ b/criu/net.c
>>> @@ -640,6 +640,11 @@ static int dump_one_gre(struct ifinfomsg *ifi, char *kind,
>>>  	return dump_unknown_device(ifi, kind, tb, ns, fds);
>>>  }
>>>  
>>> +static int list_one_link(struct nlmsghdr *hdr, struct ns_id *ns, void *arg)
>>> +{
>>> +	return 0;
>>> +}
>>> +
>>>  static int dump_one_link(struct nlmsghdr *hdr, struct ns_id *ns, void *arg)
>>>  {
>>>  	struct cr_imgset *fds = arg;
>>> @@ -856,6 +861,26 @@ out:
>>>  
>>>  }
>>>  
>>> +static int list_links(int rtsk, void *args)
>>> +{
>>> +	struct {
>>> +		struct nlmsghdr nlh;
>>> +		struct rtgenmsg g;
>>> +	} req;
>>> +
>>> +	pr_info("Dumping netns links\n");
>>> +
>>> +	memset(&req, 0, sizeof(req));
>>> +	req.nlh.nlmsg_len = sizeof(req);
>>> +	req.nlh.nlmsg_type = RTM_GETLINK;
>>> +	req.nlh.nlmsg_flags = NLM_F_ROOT|NLM_F_MATCH|NLM_F_REQUEST;
>>> +	req.nlh.nlmsg_pid = 0;
>>> +	req.nlh.nlmsg_seq = CR_NLMSG_SEQ;
>>> +	req.g.rtgen_family = AF_PACKET;
>>> +
>>> +	return do_rtnl_req(rtsk, &req, sizeof(req), list_one_link, NULL, NULL, args);
>>> +}
>>
>> Why is this call with empty list_one_link() required at all?
> 
> If a device has a pair in another netns, the kernel will generate nsid
> for the second netns in the first netns when we requiest information
> about this device. So we list all devices before dumping net ids in this
> namespace.

We need a good descriptive comment about this fact in
that routine.

>>
>>> +
>>>  static int dump_links(int rtsk, struct ns_id *ns, struct cr_imgset *fds)
>>>  {
>>>  	struct {
>>> @@ -1370,6 +1395,24 @@ static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds)
>>>  	int size6 = ARRAY_SIZE(devconfs6);
>>>  	char def_stable_secret[MAX_STR_CONF_LEN + 1] = {};
>>>  	char all_stable_secret[MAX_STR_CONF_LEN + 1] = {};
>>> +	NetnsId	*ids;
>>> +	struct netns_id *p;
>>> +
>>> +	i = 0;
>>> +	list_for_each_entry(p, &ns->net.ids, node)
>>> +		i++;
>>
>> This list is willed with walk_namespaces() call, one entry per callback,
>> so this is effectively constant.
> 
> I don't understand what you mean. Each namespace has own set of net ids
> and it has ids only for a few of other namespaces.
> 
> static int collect_netns_id(struct ns_id *ns, void *oarg)
> {
>         struct net_id_arg *arg = oarg;
>         struct netns_id *netns_id;
>         int nsid = -1;
> 
>         if (net_get_nsid(arg->sk, ns->ns_pid, &nsid))
>                 return -1;
> 
>         if (nsid == -1)
>                 return 0;
> 
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ this means that this namespace doesn't
> have id in the "ns" netns.

Ah! Please, add code comment about it.

>         netns_id = xmalloc(sizeof(*netns_id));
>         if (!netns_id) 
>                 return -1;
> 
>         pr_debug("Fount the %d id for %d in %d\n", nsid, ns->id, arg->ns->id);
>         netns_id->id = ns->id;
>         netns_id->net_id = nsid;
> 
>         list_add(&netns_id->node, &arg->ns->net.ids);
> 
>         return 0;
> }