[08/12] netns: restore internal veth devices

Submitted by Andrei Vagin on Feb. 28, 2017, 11:53 p.m.

Details

Message ID 1488325988-28456-9-git-send-email-avagin@openvz.org
State New
Series "Dump and restore internal veth devices"
Headers show

Commit Message

Andrei Vagin Feb. 28, 2017, 11:53 p.m.
From: Andrei Vagin <avagin@virtuozzo.com>

When we dump a veth device, the kernel reports where a peer device lives
and we use this information to restore this veth pair.

On restore we set a net ns id for a peer and it is created in the required
netns.

Signed-off-by: Andrei Vagin <avagin@virtuozzo.com>
---
 criu/include/namespaces.h     |  7 +++
 criu/namespaces.c             |  5 ++-
 criu/net.c                    | 99 +++++++++++++++++++++++++++++++++++++++----
 scripts/build/Dockerfile.tmpl |  1 +
 scripts/travis/travis-tests   |  2 +-
 5 files changed, 104 insertions(+), 10 deletions(-)

Patch hide | download patch | download mbox

diff --git a/criu/include/namespaces.h b/criu/include/namespaces.h
index 5df7679..2302cff 100644
--- a/criu/include/namespaces.h
+++ b/criu/include/namespaces.h
@@ -88,6 +88,12 @@  struct netns_id {
 	struct list_head	node;
 };
 
+struct net_link {
+	unsigned int		ifindex;
+	bool			created;
+	struct list_head	node;
+};
+
 struct ns_id {
 	unsigned int kid;
 	unsigned int id;
@@ -122,6 +128,7 @@  struct ns_id {
 			int nlsk;	/* for sockets collection */
 			int seqsk;	/* to talk to parasite daemons */
 			struct list_head ids;
+			struct list_head links;
 		} net;
 		struct {
 			UsernsEntry *e;
diff --git a/criu/namespaces.c b/criu/namespaces.c
index 8e170aa..797e5ee 100644
--- a/criu/namespaces.c
+++ b/criu/namespaces.c
@@ -308,8 +308,10 @@  struct ns_id *rst_new_ns_id(unsigned int id, pid_t pid,
 		INIT_LIST_HEAD(&nsid->children);
 		INIT_LIST_HEAD(&nsid->siblings);
 
-		if (nd == &net_ns_desc)
+		if (nd == &net_ns_desc) {
 			INIT_LIST_HEAD(&nsid->net.ids);
+			INIT_LIST_HEAD(&nsid->net.links);
+		}
 	}
 
 	return nsid;
@@ -437,6 +439,7 @@  static unsigned int generate_ns_id(int pid, unsigned int kid, struct ns_desc *nd
 
 	if (nd == &net_ns_desc) {
 		INIT_LIST_HEAD(&nsid->net.ids);
+		INIT_LIST_HEAD(&nsid->net.links);
 	}
 
 found:
diff --git a/criu/net.c b/criu/net.c
index f889403..b48440e 100644
--- a/criu/net.c
+++ b/criu/net.c
@@ -366,12 +366,22 @@  int write_netdev_img(NetDeviceEntry *nde, struct cr_imgset *fds, struct nlattr *
 	return pb_write_one(img_from_set(fds, CR_FD_NETDEV), nde, PB_NETDEV);
 }
 
+static int lookup_net_by_netid(struct ns_id *ns, int net_id)
+{
+	struct netns_id *p;
+
+	list_for_each_entry(p, &ns->net.ids, node)
+		if (p->net_id == net_id)
+			return p->id;
+
+	return -1;
+}
+
 static int dump_one_netdev(int type, struct ifinfomsg *ifi,
 		struct nlattr **tb, struct ns_id *ns, struct cr_imgset *fds,
 		int (*dump)(NetDeviceEntry *, struct cr_imgset *, struct nlattr **info))
 {
-	int ret = -1;
-	int i;
+	int ret = -1, i, peer_ifindex;
 	NetDeviceEntry netdev = NET_DEVICE_ENTRY__INIT;
 	SysctlEntry *confs4 = NULL;
 	int size4 = ARRAY_SIZE(devconfs4);
@@ -391,6 +401,37 @@  static int dump_one_netdev(int type, struct ifinfomsg *ifi,
 	netdev.flags = ifi->ifi_flags;
 	netdev.name = RTA_DATA(tb[IFLA_IFNAME]);
 
+	if (kdat.has_nsid) {
+		peer_ifindex = ifi->ifi_index;
+		if (tb[IFLA_LINK])
+			peer_ifindex = nla_get_u32(tb[IFLA_LINK]);
+
+		netdev.has_peer_ifindex = true;
+		netdev.peer_ifindex = peer_ifindex;
+	}
+
+	if (kdat.has_nsid) {
+		s32 nsid = -1;
+
+		if (tb[IFLA_LINK_NETNSID])
+			nsid = nla_get_s32(tb[IFLA_LINK_NETNSID]);
+
+		pr_debug("The peer link is in the %d netns with the %u index\n",
+						nsid, netdev.peer_ifindex);
+
+		if (nsid == -1)
+			nsid = ns->id;
+		else
+			nsid = lookup_net_by_netid(ns, nsid);
+		if (nsid < 0) {
+			pr_warn("The %s veth is in an external netns\n",
+								netdev.name);
+		} else {
+			netdev.has_peer_nsid = true;
+			netdev.peer_nsid = nsid;
+		}
+	}
+
 	if (tb[IFLA_ADDRESS] && (type != ND_TYPE__LOOPBACK)) {
 		netdev.has_address = true;
 		netdev.address.data = nla_data(tb[IFLA_ADDRESS]);
@@ -1017,9 +1058,11 @@  enum {
 #define IFLA_NET_NS_FD	28
 #endif
 
-static void veth_peer_info(NetDeviceEntry *nde, struct newlink_req *req)
+static int veth_peer_info(NetDeviceEntry *nde, struct newlink_req *req,
+						struct ns_id *ns, int ns_fd)
 {
 	char key[100], *val;
+	struct ns_id *peer_ns = NULL;
 
 	snprintf(key, sizeof(key), "veth[%s]", nde->name);
 	val = external_lookup_by_key(key);
@@ -1028,7 +1071,47 @@  static void veth_peer_info(NetDeviceEntry *nde, struct newlink_req *req)
 
 		aux = strchrnul(val, '@');
 		addattr_l(&req->h, sizeof(*req), IFLA_IFNAME, val, aux - val);
+		addattr_l(&req->h, sizeof(*req), IFLA_NET_NS_FD, &ns_fd, sizeof(ns_fd));
+		return 0;
 	}
+
+	if (nde->has_peer_nsid) {
+		if (ns && nde->peer_nsid == ns->id) {
+			struct net_link *link;
+
+			list_for_each_entry(link, &ns->net.links, node)
+				if (link->ifindex == nde->peer_ifindex && link->created) {
+					pr_err("%d\n", nde->peer_ifindex);
+					req->h.nlmsg_type = RTM_SETLINK;
+					return 0;
+				}
+		}
+		peer_ns = lookup_ns_by_id(nde->peer_nsid, &net_ns_desc);
+		if (peer_ns->ns_populated) {
+			req->h.nlmsg_type = RTM_SETLINK;
+			return 0;
+		}
+	}
+
+	if (peer_ns) {
+		if (ns && nde->peer_nsid == ns->id) {
+			struct net_link *link;
+
+			link = xmalloc(sizeof(*link));
+			if (link == NULL)
+				return -1;
+
+			link->ifindex = nde->ifindex;
+			link->created = true;
+			list_add(&link->node, &ns->net.links);
+		}
+
+		addattr_l(&req->h, sizeof(*req), IFLA_NET_NS_FD, &peer_ns->net.ns_fd, sizeof(int));
+		return 0;
+	}
+
+	pr_err("Unknown peer net namespace");
+	return -1;
 }
 
 static int veth_link_info(struct ns_id *ns, NetDeviceEntry *nde, struct newlink_req *req)
@@ -1037,17 +1120,17 @@  static int veth_link_info(struct ns_id *ns, NetDeviceEntry *nde, struct newlink_
 	struct rtattr *veth_data, *peer_data;
 	struct ifinfomsg ifm;
 
-	BUG_ON(ns_fd < 0);
-
 	addattr_l(&req->h, sizeof(*req), IFLA_INFO_KIND, "veth", 4);
 
 	veth_data = NLMSG_TAIL(&req->h);
 	addattr_l(&req->h, sizeof(*req), IFLA_INFO_DATA, NULL, 0);
 	peer_data = NLMSG_TAIL(&req->h);
 	memset(&ifm, 0, sizeof(ifm));
+
+	ifm.ifi_index = nde->peer_ifindex;
 	addattr_l(&req->h, sizeof(*req), VETH_INFO_PEER, &ifm, sizeof(ifm));
-	veth_peer_info(nde, req);
-	addattr_l(&req->h, sizeof(*req), IFLA_NET_NS_FD, &ns_fd, sizeof(ns_fd));
+
+	veth_peer_info(nde, req, ns, ns_fd);
 	peer_data->rta_len = (void *)NLMSG_TAIL(&req->h) - (void *)peer_data;
 	veth_data->rta_len = (void *)NLMSG_TAIL(&req->h) - (void *)veth_data;
 
@@ -1253,7 +1336,7 @@  static int restore_links(struct ns_id *ns, NetnsEntry **netns)
 
 		ret = restore_link(ns, nde, nlsk, criu_nlsk);
 		if (ret) {
-			pr_err("Can't restore link\n");
+			pr_err("Can't restore link: %d\n", ret);
 			goto exit;
 		}
 
diff --git a/scripts/build/Dockerfile.tmpl b/scripts/build/Dockerfile.tmpl
index 6e35f87..0f7c214 100644
--- a/scripts/build/Dockerfile.tmpl
+++ b/scripts/build/Dockerfile.tmpl
@@ -12,6 +12,7 @@  RUN apt-get update && apt-get install -y \
                 libcap-dev \
                 iptables \
                 libnl-3-dev \
+		libnl-route-3-dev \
                 libselinux-dev \
                 pkg-config \
                 git-core \
diff --git a/scripts/travis/travis-tests b/scripts/travis/travis-tests
index 75d15f5..7b487cb 100755
--- a/scripts/travis/travis-tests
+++ b/scripts/travis/travis-tests
@@ -4,7 +4,7 @@  set -x -e
 TRAVIS_PKGS="protobuf-c-compiler libprotobuf-c0-dev libaio-dev
 		libprotobuf-dev protobuf-compiler python-ipaddr libcap-dev
 		libnl-3-dev gcc-multilib libc6-dev-i386 gdb bash python-protobuf
-		libnet-dev util-linux"
+		libnet-dev util-linux libnl-route-3-dev"
 
 travis_prep () {
 	[ -n "$SKIP_TRAVIS_PREP" ] && return

Comments

Pavel Emelianov March 13, 2017, 10:50 a.m.
On 03/01/2017 02:53 AM, Andrei Vagin wrote:
> From: Andrei Vagin <avagin@virtuozzo.com>
> 
> When we dump a veth device, the kernel reports where a peer device lives
> and we use this information to restore this veth pair.
> 
> On restore we set a net ns id for a peer and it is created in the required
> netns.
> 
> Signed-off-by: Andrei Vagin <avagin@virtuozzo.com>
> ---
>  criu/include/namespaces.h     |  7 +++
>  criu/namespaces.c             |  5 ++-
>  criu/net.c                    | 99 +++++++++++++++++++++++++++++++++++++++----
>  scripts/build/Dockerfile.tmpl |  1 +
>  scripts/travis/travis-tests   |  2 +-
>  5 files changed, 104 insertions(+), 10 deletions(-)
> 
> diff --git a/criu/include/namespaces.h b/criu/include/namespaces.h
> index 5df7679..2302cff 100644
> --- a/criu/include/namespaces.h
> +++ b/criu/include/namespaces.h
> @@ -88,6 +88,12 @@ struct netns_id {
>  	struct list_head	node;
>  };
>  
> +struct net_link {
> +	unsigned int		ifindex;
> +	bool			created;
> +	struct list_head	node;
> +};
> +
>  struct ns_id {
>  	unsigned int kid;
>  	unsigned int id;
> @@ -122,6 +128,7 @@ struct ns_id {
>  			int nlsk;	/* for sockets collection */
>  			int seqsk;	/* to talk to parasite daemons */
>  			struct list_head ids;
> +			struct list_head links;
>  		} net;
>  		struct {
>  			UsernsEntry *e;
> diff --git a/criu/namespaces.c b/criu/namespaces.c
> index 8e170aa..797e5ee 100644
> --- a/criu/namespaces.c
> +++ b/criu/namespaces.c
> @@ -308,8 +308,10 @@ struct ns_id *rst_new_ns_id(unsigned int id, pid_t pid,
>  		INIT_LIST_HEAD(&nsid->children);
>  		INIT_LIST_HEAD(&nsid->siblings);
>  
> -		if (nd == &net_ns_desc)
> +		if (nd == &net_ns_desc) {
>  			INIT_LIST_HEAD(&nsid->net.ids);
> +			INIT_LIST_HEAD(&nsid->net.links);
> +		}
>  	}
>  
>  	return nsid;
> @@ -437,6 +439,7 @@ static unsigned int generate_ns_id(int pid, unsigned int kid, struct ns_desc *nd
>  
>  	if (nd == &net_ns_desc) {
>  		INIT_LIST_HEAD(&nsid->net.ids);
> +		INIT_LIST_HEAD(&nsid->net.links);
>  	}
>  
>  found:
> diff --git a/criu/net.c b/criu/net.c
> index f889403..b48440e 100644
> --- a/criu/net.c
> +++ b/criu/net.c
> @@ -366,12 +366,22 @@ int write_netdev_img(NetDeviceEntry *nde, struct cr_imgset *fds, struct nlattr *
>  	return pb_write_one(img_from_set(fds, CR_FD_NETDEV), nde, PB_NETDEV);
>  }
>  
> +static int lookup_net_by_netid(struct ns_id *ns, int net_id)
> +{
> +	struct netns_id *p;
> +
> +	list_for_each_entry(p, &ns->net.ids, node)
> +		if (p->net_id == net_id)
> +			return p->id;
> +
> +	return -1;
> +}
> +
>  static int dump_one_netdev(int type, struct ifinfomsg *ifi,
>  		struct nlattr **tb, struct ns_id *ns, struct cr_imgset *fds,
>  		int (*dump)(NetDeviceEntry *, struct cr_imgset *, struct nlattr **info))
>  {
> -	int ret = -1;
> -	int i;
> +	int ret = -1, i, peer_ifindex;
>  	NetDeviceEntry netdev = NET_DEVICE_ENTRY__INIT;
>  	SysctlEntry *confs4 = NULL;
>  	int size4 = ARRAY_SIZE(devconfs4);
> @@ -391,6 +401,37 @@ static int dump_one_netdev(int type, struct ifinfomsg *ifi,
>  	netdev.flags = ifi->ifi_flags;
>  	netdev.name = RTA_DATA(tb[IFLA_IFNAME]);
>  
> +	if (kdat.has_nsid) {
> +		peer_ifindex = ifi->ifi_index;
> +		if (tb[IFLA_LINK])
> +			peer_ifindex = nla_get_u32(tb[IFLA_LINK]);
> +
> +		netdev.has_peer_ifindex = true;
> +		netdev.peer_ifindex = peer_ifindex;
> +	}
> +
> +	if (kdat.has_nsid) {
> +		s32 nsid = -1;
> +
> +		if (tb[IFLA_LINK_NETNSID])
> +			nsid = nla_get_s32(tb[IFLA_LINK_NETNSID]);
> +
> +		pr_debug("The peer link is in the %d netns with the %u index\n",
> +						nsid, netdev.peer_ifindex);
> +
> +		if (nsid == -1)
> +			nsid = ns->id;

This place is not clear. If the kernel didn't report the ns veth lives
in then ... what? We assume it lives in current? Is this correct?

> +		else
> +			nsid = lookup_net_by_netid(ns, nsid);
> +		if (nsid < 0) {
> +			pr_warn("The %s veth is in an external netns\n",
> +								netdev.name);
> +		} else {
> +			netdev.has_peer_nsid = true;
> +			netdev.peer_nsid = nsid;
> +		}
> +	}
> +
>  	if (tb[IFLA_ADDRESS] && (type != ND_TYPE__LOOPBACK)) {
>  		netdev.has_address = true;
>  		netdev.address.data = nla_data(tb[IFLA_ADDRESS]);
> @@ -1017,9 +1058,11 @@ enum {
>  #define IFLA_NET_NS_FD	28
>  #endif
>  
> -static void veth_peer_info(NetDeviceEntry *nde, struct newlink_req *req)
> +static int veth_peer_info(NetDeviceEntry *nde, struct newlink_req *req,
> +						struct ns_id *ns, int ns_fd)
>  {
>  	char key[100], *val;
> +	struct ns_id *peer_ns = NULL;
>  
>  	snprintf(key, sizeof(key), "veth[%s]", nde->name);
>  	val = external_lookup_by_key(key);
> @@ -1028,7 +1071,47 @@ static void veth_peer_info(NetDeviceEntry *nde, struct newlink_req *req)
>  
>  		aux = strchrnul(val, '@');
>  		addattr_l(&req->h, sizeof(*req), IFLA_IFNAME, val, aux - val);
> +		addattr_l(&req->h, sizeof(*req), IFLA_NET_NS_FD, &ns_fd, sizeof(ns_fd));
> +		return 0;
>  	}
> +
> +	if (nde->has_peer_nsid) {
> +		if (ns && nde->peer_nsid == ns->id) {
> +			struct net_link *link;
> +
> +			list_for_each_entry(link, &ns->net.links, node)
> +				if (link->ifindex == nde->peer_ifindex && link->created) {
> +					pr_err("%d\n", nde->peer_ifindex);
> +					req->h.nlmsg_type = RTM_SETLINK;
> +					return 0;
> +				}
> +		}
> +		peer_ns = lookup_ns_by_id(nde->peer_nsid, &net_ns_desc);
> +		if (peer_ns->ns_populated) {
> +			req->h.nlmsg_type = RTM_SETLINK;
> +			return 0;
> +		}
> +	}
> +
> +	if (peer_ns) {
> +		if (ns && nde->peer_nsid == ns->id) {
> +			struct net_link *link;
> +
> +			link = xmalloc(sizeof(*link));
> +			if (link == NULL)
> +				return -1;
> +
> +			link->ifindex = nde->ifindex;
> +			link->created = true;
> +			list_add(&link->node, &ns->net.links);
> +		}
> +
> +		addattr_l(&req->h, sizeof(*req), IFLA_NET_NS_FD, &peer_ns->net.ns_fd, sizeof(int));
> +		return 0;
> +	}
> +
> +	pr_err("Unknown peer net namespace");
> +	return -1;
>  }
>  
>  static int veth_link_info(struct ns_id *ns, NetDeviceEntry *nde, struct newlink_req *req)
> @@ -1037,17 +1120,17 @@ static int veth_link_info(struct ns_id *ns, NetDeviceEntry *nde, struct newlink_
>  	struct rtattr *veth_data, *peer_data;
>  	struct ifinfomsg ifm;
>  
> -	BUG_ON(ns_fd < 0);
> -
>  	addattr_l(&req->h, sizeof(*req), IFLA_INFO_KIND, "veth", 4);
>  
>  	veth_data = NLMSG_TAIL(&req->h);
>  	addattr_l(&req->h, sizeof(*req), IFLA_INFO_DATA, NULL, 0);
>  	peer_data = NLMSG_TAIL(&req->h);
>  	memset(&ifm, 0, sizeof(ifm));
> +
> +	ifm.ifi_index = nde->peer_ifindex;
>  	addattr_l(&req->h, sizeof(*req), VETH_INFO_PEER, &ifm, sizeof(ifm));
> -	veth_peer_info(nde, req);
> -	addattr_l(&req->h, sizeof(*req), IFLA_NET_NS_FD, &ns_fd, sizeof(ns_fd));
> +
> +	veth_peer_info(nde, req, ns, ns_fd);
>  	peer_data->rta_len = (void *)NLMSG_TAIL(&req->h) - (void *)peer_data;
>  	veth_data->rta_len = (void *)NLMSG_TAIL(&req->h) - (void *)veth_data;
>  
> @@ -1253,7 +1336,7 @@ static int restore_links(struct ns_id *ns, NetnsEntry **netns)
>  
>  		ret = restore_link(ns, nde, nlsk, criu_nlsk);
>  		if (ret) {
> -			pr_err("Can't restore link\n");
> +			pr_err("Can't restore link: %d\n", ret);
>  			goto exit;
>  		}
>  
> diff --git a/scripts/build/Dockerfile.tmpl b/scripts/build/Dockerfile.tmpl
> index 6e35f87..0f7c214 100644
> --- a/scripts/build/Dockerfile.tmpl
> +++ b/scripts/build/Dockerfile.tmpl
> @@ -12,6 +12,7 @@ RUN apt-get update && apt-get install -y \
>                  libcap-dev \
>                  iptables \
>                  libnl-3-dev \
> +		libnl-route-3-dev \
>                  libselinux-dev \
>                  pkg-config \
>                  git-core \
> diff --git a/scripts/travis/travis-tests b/scripts/travis/travis-tests
> index 75d15f5..7b487cb 100755
> --- a/scripts/travis/travis-tests
> +++ b/scripts/travis/travis-tests
> @@ -4,7 +4,7 @@ set -x -e
>  TRAVIS_PKGS="protobuf-c-compiler libprotobuf-c0-dev libaio-dev
>  		libprotobuf-dev protobuf-compiler python-ipaddr libcap-dev
>  		libnl-3-dev gcc-multilib libc6-dev-i386 gdb bash python-protobuf
> -		libnet-dev util-linux"
> +		libnet-dev util-linux libnl-route-3-dev"
>  
>  travis_prep () {
>  	[ -n "$SKIP_TRAVIS_PREP" ] && return
>
Andrey Vagin March 20, 2017, 9:10 p.m.
On Mon, Mar 13, 2017 at 01:50:15PM +0300, Pavel Emelyanov wrote:
> On 03/01/2017 02:53 AM, Andrei Vagin wrote:
> > From: Andrei Vagin <avagin@virtuozzo.com>
> > 
> > When we dump a veth device, the kernel reports where a peer device lives
> > and we use this information to restore this veth pair.
> > 
> > On restore we set a net ns id for a peer and it is created in the required
> > netns.
> > 
> > Signed-off-by: Andrei Vagin <avagin@virtuozzo.com>
> > ---
> >  criu/include/namespaces.h     |  7 +++
> >  criu/namespaces.c             |  5 ++-
> >  criu/net.c                    | 99 +++++++++++++++++++++++++++++++++++++++----
> >  scripts/build/Dockerfile.tmpl |  1 +
> >  scripts/travis/travis-tests   |  2 +-
> >  5 files changed, 104 insertions(+), 10 deletions(-)
> > 
> > diff --git a/criu/include/namespaces.h b/criu/include/namespaces.h
> > index 5df7679..2302cff 100644
> > --- a/criu/include/namespaces.h
> > +++ b/criu/include/namespaces.h
> > @@ -88,6 +88,12 @@ struct netns_id {
> >  	struct list_head	node;
> >  };
> >  
> > +struct net_link {
> > +	unsigned int		ifindex;
> > +	bool			created;
> > +	struct list_head	node;
> > +};
> > +
> >  struct ns_id {
> >  	unsigned int kid;
> >  	unsigned int id;
> > @@ -122,6 +128,7 @@ struct ns_id {
> >  			int nlsk;	/* for sockets collection */
> >  			int seqsk;	/* to talk to parasite daemons */
> >  			struct list_head ids;
> > +			struct list_head links;
> >  		} net;
> >  		struct {
> >  			UsernsEntry *e;
> > diff --git a/criu/namespaces.c b/criu/namespaces.c
> > index 8e170aa..797e5ee 100644
> > --- a/criu/namespaces.c
> > +++ b/criu/namespaces.c
> > @@ -308,8 +308,10 @@ struct ns_id *rst_new_ns_id(unsigned int id, pid_t pid,
> >  		INIT_LIST_HEAD(&nsid->children);
> >  		INIT_LIST_HEAD(&nsid->siblings);
> >  
> > -		if (nd == &net_ns_desc)
> > +		if (nd == &net_ns_desc) {
> >  			INIT_LIST_HEAD(&nsid->net.ids);
> > +			INIT_LIST_HEAD(&nsid->net.links);
> > +		}
> >  	}
> >  
> >  	return nsid;
> > @@ -437,6 +439,7 @@ static unsigned int generate_ns_id(int pid, unsigned int kid, struct ns_desc *nd
> >  
> >  	if (nd == &net_ns_desc) {
> >  		INIT_LIST_HEAD(&nsid->net.ids);
> > +		INIT_LIST_HEAD(&nsid->net.links);
> >  	}
> >  
> >  found:
> > diff --git a/criu/net.c b/criu/net.c
> > index f889403..b48440e 100644
> > --- a/criu/net.c
> > +++ b/criu/net.c
> > @@ -366,12 +366,22 @@ int write_netdev_img(NetDeviceEntry *nde, struct cr_imgset *fds, struct nlattr *
> >  	return pb_write_one(img_from_set(fds, CR_FD_NETDEV), nde, PB_NETDEV);
> >  }
> >  
> > +static int lookup_net_by_netid(struct ns_id *ns, int net_id)
> > +{
> > +	struct netns_id *p;
> > +
> > +	list_for_each_entry(p, &ns->net.ids, node)
> > +		if (p->net_id == net_id)
> > +			return p->id;
> > +
> > +	return -1;
> > +}
> > +
> >  static int dump_one_netdev(int type, struct ifinfomsg *ifi,
> >  		struct nlattr **tb, struct ns_id *ns, struct cr_imgset *fds,
> >  		int (*dump)(NetDeviceEntry *, struct cr_imgset *, struct nlattr **info))
> >  {
> > -	int ret = -1;
> > -	int i;
> > +	int ret = -1, i, peer_ifindex;
> >  	NetDeviceEntry netdev = NET_DEVICE_ENTRY__INIT;
> >  	SysctlEntry *confs4 = NULL;
> >  	int size4 = ARRAY_SIZE(devconfs4);
> > @@ -391,6 +401,37 @@ static int dump_one_netdev(int type, struct ifinfomsg *ifi,
> >  	netdev.flags = ifi->ifi_flags;
> >  	netdev.name = RTA_DATA(tb[IFLA_IFNAME]);
> >  
> > +	if (kdat.has_nsid) {
> > +		peer_ifindex = ifi->ifi_index;
> > +		if (tb[IFLA_LINK])
> > +			peer_ifindex = nla_get_u32(tb[IFLA_LINK]);
> > +
> > +		netdev.has_peer_ifindex = true;
> > +		netdev.peer_ifindex = peer_ifindex;
> > +	}
> > +
> > +	if (kdat.has_nsid) {

here we check that kernel reports nsid for devices.

> > +		s32 nsid = -1;
> > +
> > +		if (tb[IFLA_LINK_NETNSID])
> > +			nsid = nla_get_s32(tb[IFLA_LINK_NETNSID]);
> > +
> > +		pr_debug("The peer link is in the %d netns with the %u index\n",
> > +						nsid, netdev.peer_ifindex);
> > +
> > +		if (nsid == -1)
> > +			nsid = ns->id;
> 
> This place is not clear. If the kernel didn't report the ns veth lives
> in then ... what? We assume it lives in current? Is this correct?

so here this means that a pair device lives in the current netns

> 
> > +		else
> > +			nsid = lookup_net_by_netid(ns, nsid);
> > +		if (nsid < 0) {
> > +			pr_warn("The %s veth is in an external netns\n",
> > +								netdev.name);
> > +		} else {
> > +			netdev.has_peer_nsid = true;
> > +			netdev.peer_nsid = nsid;
> > +		}
> > +	}
> > +
> >  	if (tb[IFLA_ADDRESS] && (type != ND_TYPE__LOOPBACK)) {
> >  		netdev.has_address = true;
> >  		netdev.address.data = nla_data(tb[IFLA_ADDRESS]);
> > @@ -1017,9 +1058,11 @@ enum {
> >  #define IFLA_NET_NS_FD	28
> >  #endif
> >  
> > -static void veth_peer_info(NetDeviceEntry *nde, struct newlink_req *req)
> > +static int veth_peer_info(NetDeviceEntry *nde, struct newlink_req *req,
> > +						struct ns_id *ns, int ns_fd)
> >  {
> >  	char key[100], *val;
> > +	struct ns_id *peer_ns = NULL;
> >  
> >  	snprintf(key, sizeof(key), "veth[%s]", nde->name);
> >  	val = external_lookup_by_key(key);
> > @@ -1028,7 +1071,47 @@ static void veth_peer_info(NetDeviceEntry *nde, struct newlink_req *req)
> >  
> >  		aux = strchrnul(val, '@');
> >  		addattr_l(&req->h, sizeof(*req), IFLA_IFNAME, val, aux - val);
> > +		addattr_l(&req->h, sizeof(*req), IFLA_NET_NS_FD, &ns_fd, sizeof(ns_fd));
> > +		return 0;
> >  	}
> > +
> > +	if (nde->has_peer_nsid) {
> > +		if (ns && nde->peer_nsid == ns->id) {
> > +			struct net_link *link;
> > +
> > +			list_for_each_entry(link, &ns->net.links, node)
> > +				if (link->ifindex == nde->peer_ifindex && link->created) {
> > +					pr_err("%d\n", nde->peer_ifindex);
> > +					req->h.nlmsg_type = RTM_SETLINK;
> > +					return 0;
> > +				}
> > +		}
> > +		peer_ns = lookup_ns_by_id(nde->peer_nsid, &net_ns_desc);
> > +		if (peer_ns->ns_populated) {
> > +			req->h.nlmsg_type = RTM_SETLINK;
> > +			return 0;
> > +		}
> > +	}
> > +
> > +	if (peer_ns) {
> > +		if (ns && nde->peer_nsid == ns->id) {
> > +			struct net_link *link;
> > +
> > +			link = xmalloc(sizeof(*link));
> > +			if (link == NULL)
> > +				return -1;
> > +
> > +			link->ifindex = nde->ifindex;
> > +			link->created = true;
> > +			list_add(&link->node, &ns->net.links);
> > +		}
> > +
> > +		addattr_l(&req->h, sizeof(*req), IFLA_NET_NS_FD, &peer_ns->net.ns_fd, sizeof(int));
> > +		return 0;
> > +	}
> > +
> > +	pr_err("Unknown peer net namespace");
> > +	return -1;
> >  }
> >  
> >  static int veth_link_info(struct ns_id *ns, NetDeviceEntry *nde, struct newlink_req *req)
> > @@ -1037,17 +1120,17 @@ static int veth_link_info(struct ns_id *ns, NetDeviceEntry *nde, struct newlink_
> >  	struct rtattr *veth_data, *peer_data;
> >  	struct ifinfomsg ifm;
> >  
> > -	BUG_ON(ns_fd < 0);
> > -
> >  	addattr_l(&req->h, sizeof(*req), IFLA_INFO_KIND, "veth", 4);
> >  
> >  	veth_data = NLMSG_TAIL(&req->h);
> >  	addattr_l(&req->h, sizeof(*req), IFLA_INFO_DATA, NULL, 0);
> >  	peer_data = NLMSG_TAIL(&req->h);
> >  	memset(&ifm, 0, sizeof(ifm));
> > +
> > +	ifm.ifi_index = nde->peer_ifindex;
> >  	addattr_l(&req->h, sizeof(*req), VETH_INFO_PEER, &ifm, sizeof(ifm));
> > -	veth_peer_info(nde, req);
> > -	addattr_l(&req->h, sizeof(*req), IFLA_NET_NS_FD, &ns_fd, sizeof(ns_fd));
> > +
> > +	veth_peer_info(nde, req, ns, ns_fd);
> >  	peer_data->rta_len = (void *)NLMSG_TAIL(&req->h) - (void *)peer_data;
> >  	veth_data->rta_len = (void *)NLMSG_TAIL(&req->h) - (void *)veth_data;
> >  
> > @@ -1253,7 +1336,7 @@ static int restore_links(struct ns_id *ns, NetnsEntry **netns)
> >  
> >  		ret = restore_link(ns, nde, nlsk, criu_nlsk);
> >  		if (ret) {
> > -			pr_err("Can't restore link\n");
> > +			pr_err("Can't restore link: %d\n", ret);
> >  			goto exit;
> >  		}
> >  
> > diff --git a/scripts/build/Dockerfile.tmpl b/scripts/build/Dockerfile.tmpl
> > index 6e35f87..0f7c214 100644
> > --- a/scripts/build/Dockerfile.tmpl
> > +++ b/scripts/build/Dockerfile.tmpl
> > @@ -12,6 +12,7 @@ RUN apt-get update && apt-get install -y \
> >                  libcap-dev \
> >                  iptables \
> >                  libnl-3-dev \
> > +		libnl-route-3-dev \
> >                  libselinux-dev \
> >                  pkg-config \
> >                  git-core \
> > diff --git a/scripts/travis/travis-tests b/scripts/travis/travis-tests
> > index 75d15f5..7b487cb 100755
> > --- a/scripts/travis/travis-tests
> > +++ b/scripts/travis/travis-tests
> > @@ -4,7 +4,7 @@ set -x -e
> >  TRAVIS_PKGS="protobuf-c-compiler libprotobuf-c0-dev libaio-dev
> >  		libprotobuf-dev protobuf-compiler python-ipaddr libcap-dev
> >  		libnl-3-dev gcc-multilib libc6-dev-i386 gdb bash python-protobuf
> > -		libnet-dev util-linux"
> > +		libnet-dev util-linux libnl-route-3-dev"
> >  
> >  travis_prep () {
> >  	[ -n "$SKIP_TRAVIS_PREP" ] && return
> > 
>
Pavel Emelianov March 21, 2017, 3:18 p.m.
On 03/21/2017 12:10 AM, Andrei Vagin wrote:
> On Mon, Mar 13, 2017 at 01:50:15PM +0300, Pavel Emelyanov wrote:
>> On 03/01/2017 02:53 AM, Andrei Vagin wrote:
>>> From: Andrei Vagin <avagin@virtuozzo.com>
>>>
>>> When we dump a veth device, the kernel reports where a peer device lives
>>> and we use this information to restore this veth pair.
>>>
>>> On restore we set a net ns id for a peer and it is created in the required
>>> netns.
>>>
>>> Signed-off-by: Andrei Vagin <avagin@virtuozzo.com>
>>> ---
>>>  criu/include/namespaces.h     |  7 +++
>>>  criu/namespaces.c             |  5 ++-
>>>  criu/net.c                    | 99 +++++++++++++++++++++++++++++++++++++++----
>>>  scripts/build/Dockerfile.tmpl |  1 +
>>>  scripts/travis/travis-tests   |  2 +-
>>>  5 files changed, 104 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/criu/include/namespaces.h b/criu/include/namespaces.h
>>> index 5df7679..2302cff 100644
>>> --- a/criu/include/namespaces.h
>>> +++ b/criu/include/namespaces.h
>>> @@ -88,6 +88,12 @@ struct netns_id {
>>>  	struct list_head	node;
>>>  };
>>>  
>>> +struct net_link {
>>> +	unsigned int		ifindex;
>>> +	bool			created;
>>> +	struct list_head	node;
>>> +};
>>> +
>>>  struct ns_id {
>>>  	unsigned int kid;
>>>  	unsigned int id;
>>> @@ -122,6 +128,7 @@ struct ns_id {
>>>  			int nlsk;	/* for sockets collection */
>>>  			int seqsk;	/* to talk to parasite daemons */
>>>  			struct list_head ids;
>>> +			struct list_head links;
>>>  		} net;
>>>  		struct {
>>>  			UsernsEntry *e;
>>> diff --git a/criu/namespaces.c b/criu/namespaces.c
>>> index 8e170aa..797e5ee 100644
>>> --- a/criu/namespaces.c
>>> +++ b/criu/namespaces.c
>>> @@ -308,8 +308,10 @@ struct ns_id *rst_new_ns_id(unsigned int id, pid_t pid,
>>>  		INIT_LIST_HEAD(&nsid->children);
>>>  		INIT_LIST_HEAD(&nsid->siblings);
>>>  
>>> -		if (nd == &net_ns_desc)
>>> +		if (nd == &net_ns_desc) {
>>>  			INIT_LIST_HEAD(&nsid->net.ids);
>>> +			INIT_LIST_HEAD(&nsid->net.links);
>>> +		}
>>>  	}
>>>  
>>>  	return nsid;
>>> @@ -437,6 +439,7 @@ static unsigned int generate_ns_id(int pid, unsigned int kid, struct ns_desc *nd
>>>  
>>>  	if (nd == &net_ns_desc) {
>>>  		INIT_LIST_HEAD(&nsid->net.ids);
>>> +		INIT_LIST_HEAD(&nsid->net.links);
>>>  	}
>>>  
>>>  found:
>>> diff --git a/criu/net.c b/criu/net.c
>>> index f889403..b48440e 100644
>>> --- a/criu/net.c
>>> +++ b/criu/net.c
>>> @@ -366,12 +366,22 @@ int write_netdev_img(NetDeviceEntry *nde, struct cr_imgset *fds, struct nlattr *
>>>  	return pb_write_one(img_from_set(fds, CR_FD_NETDEV), nde, PB_NETDEV);
>>>  }
>>>  
>>> +static int lookup_net_by_netid(struct ns_id *ns, int net_id)
>>> +{
>>> +	struct netns_id *p;
>>> +
>>> +	list_for_each_entry(p, &ns->net.ids, node)
>>> +		if (p->net_id == net_id)
>>> +			return p->id;
>>> +
>>> +	return -1;
>>> +}
>>> +
>>>  static int dump_one_netdev(int type, struct ifinfomsg *ifi,
>>>  		struct nlattr **tb, struct ns_id *ns, struct cr_imgset *fds,
>>>  		int (*dump)(NetDeviceEntry *, struct cr_imgset *, struct nlattr **info))
>>>  {
>>> -	int ret = -1;
>>> -	int i;
>>> +	int ret = -1, i, peer_ifindex;
>>>  	NetDeviceEntry netdev = NET_DEVICE_ENTRY__INIT;
>>>  	SysctlEntry *confs4 = NULL;
>>>  	int size4 = ARRAY_SIZE(devconfs4);
>>> @@ -391,6 +401,37 @@ static int dump_one_netdev(int type, struct ifinfomsg *ifi,
>>>  	netdev.flags = ifi->ifi_flags;
>>>  	netdev.name = RTA_DATA(tb[IFLA_IFNAME]);
>>>  
>>> +	if (kdat.has_nsid) {
>>> +		peer_ifindex = ifi->ifi_index;
>>> +		if (tb[IFLA_LINK])
>>> +			peer_ifindex = nla_get_u32(tb[IFLA_LINK]);
>>> +
>>> +		netdev.has_peer_ifindex = true;
>>> +		netdev.peer_ifindex = peer_ifindex;
>>> +	}
>>> +
>>> +	if (kdat.has_nsid) {
> 
> here we check that kernel reports nsid for devices.
> 
>>> +		s32 nsid = -1;
>>> +
>>> +		if (tb[IFLA_LINK_NETNSID])
>>> +			nsid = nla_get_s32(tb[IFLA_LINK_NETNSID]);
>>> +
>>> +		pr_debug("The peer link is in the %d netns with the %u index\n",
>>> +						nsid, netdev.peer_ifindex);
>>> +
>>> +		if (nsid == -1)
>>> +			nsid = ns->id;
>>
>> This place is not clear. If the kernel didn't report the ns veth lives
>> in then ... what? We assume it lives in current? Is this correct?
> 
> so here this means that a pair device lives in the current netns

Ah, again I mix up -1 between error and no-value. Would you add a
code comment at this place?

>>
>>> +		else
>>> +			nsid = lookup_net_by_netid(ns, nsid);
>>> +		if (nsid < 0) {
>>> +			pr_warn("The %s veth is in an external netns\n",
>>> +								netdev.name);
>>> +		} else {
>>> +			netdev.has_peer_nsid = true;
>>> +			netdev.peer_nsid = nsid;
>>> +		}
>>> +	}
>>> +
>>>  	if (tb[IFLA_ADDRESS] && (type != ND_TYPE__LOOPBACK)) {
>>>  		netdev.has_address = true;
>>>  		netdev.address.data = nla_data(tb[IFLA_ADDRESS]);
>>> @@ -1017,9 +1058,11 @@ enum {
>>>  #define IFLA_NET_NS_FD	28
>>>  #endif
>>>  
>>> -static void veth_peer_info(NetDeviceEntry *nde, struct newlink_req *req)
>>> +static int veth_peer_info(NetDeviceEntry *nde, struct newlink_req *req,
>>> +						struct ns_id *ns, int ns_fd)
>>>  {
>>>  	char key[100], *val;
>>> +	struct ns_id *peer_ns = NULL;
>>>  
>>>  	snprintf(key, sizeof(key), "veth[%s]", nde->name);
>>>  	val = external_lookup_by_key(key);
>>> @@ -1028,7 +1071,47 @@ static void veth_peer_info(NetDeviceEntry *nde, struct newlink_req *req)
>>>  
>>>  		aux = strchrnul(val, '@');
>>>  		addattr_l(&req->h, sizeof(*req), IFLA_IFNAME, val, aux - val);
>>> +		addattr_l(&req->h, sizeof(*req), IFLA_NET_NS_FD, &ns_fd, sizeof(ns_fd));
>>> +		return 0;
>>>  	}
>>> +
>>> +	if (nde->has_peer_nsid) {
>>> +		if (ns && nde->peer_nsid == ns->id) {
>>> +			struct net_link *link;
>>> +
>>> +			list_for_each_entry(link, &ns->net.links, node)
>>> +				if (link->ifindex == nde->peer_ifindex && link->created) {
>>> +					pr_err("%d\n", nde->peer_ifindex);
>>> +					req->h.nlmsg_type = RTM_SETLINK;
>>> +					return 0;
>>> +				}
>>> +		}
>>> +		peer_ns = lookup_ns_by_id(nde->peer_nsid, &net_ns_desc);
>>> +		if (peer_ns->ns_populated) {
>>> +			req->h.nlmsg_type = RTM_SETLINK;
>>> +			return 0;
>>> +		}
>>> +	}
>>> +
>>> +	if (peer_ns) {
>>> +		if (ns && nde->peer_nsid == ns->id) {
>>> +			struct net_link *link;
>>> +
>>> +			link = xmalloc(sizeof(*link));
>>> +			if (link == NULL)
>>> +				return -1;
>>> +
>>> +			link->ifindex = nde->ifindex;
>>> +			link->created = true;
>>> +			list_add(&link->node, &ns->net.links);
>>> +		}
>>> +
>>> +		addattr_l(&req->h, sizeof(*req), IFLA_NET_NS_FD, &peer_ns->net.ns_fd, sizeof(int));
>>> +		return 0;
>>> +	}
>>> +
>>> +	pr_err("Unknown peer net namespace");
>>> +	return -1;
>>>  }
>>>  
>>>  static int veth_link_info(struct ns_id *ns, NetDeviceEntry *nde, struct newlink_req *req)
>>> @@ -1037,17 +1120,17 @@ static int veth_link_info(struct ns_id *ns, NetDeviceEntry *nde, struct newlink_
>>>  	struct rtattr *veth_data, *peer_data;
>>>  	struct ifinfomsg ifm;
>>>  
>>> -	BUG_ON(ns_fd < 0);
>>> -
>>>  	addattr_l(&req->h, sizeof(*req), IFLA_INFO_KIND, "veth", 4);
>>>  
>>>  	veth_data = NLMSG_TAIL(&req->h);
>>>  	addattr_l(&req->h, sizeof(*req), IFLA_INFO_DATA, NULL, 0);
>>>  	peer_data = NLMSG_TAIL(&req->h);
>>>  	memset(&ifm, 0, sizeof(ifm));
>>> +
>>> +	ifm.ifi_index = nde->peer_ifindex;
>>>  	addattr_l(&req->h, sizeof(*req), VETH_INFO_PEER, &ifm, sizeof(ifm));
>>> -	veth_peer_info(nde, req);
>>> -	addattr_l(&req->h, sizeof(*req), IFLA_NET_NS_FD, &ns_fd, sizeof(ns_fd));
>>> +
>>> +	veth_peer_info(nde, req, ns, ns_fd);
>>>  	peer_data->rta_len = (void *)NLMSG_TAIL(&req->h) - (void *)peer_data;
>>>  	veth_data->rta_len = (void *)NLMSG_TAIL(&req->h) - (void *)veth_data;
>>>  
>>> @@ -1253,7 +1336,7 @@ static int restore_links(struct ns_id *ns, NetnsEntry **netns)
>>>  
>>>  		ret = restore_link(ns, nde, nlsk, criu_nlsk);
>>>  		if (ret) {
>>> -			pr_err("Can't restore link\n");
>>> +			pr_err("Can't restore link: %d\n", ret);
>>>  			goto exit;
>>>  		}
>>>  
>>> diff --git a/scripts/build/Dockerfile.tmpl b/scripts/build/Dockerfile.tmpl
>>> index 6e35f87..0f7c214 100644
>>> --- a/scripts/build/Dockerfile.tmpl
>>> +++ b/scripts/build/Dockerfile.tmpl
>>> @@ -12,6 +12,7 @@ RUN apt-get update && apt-get install -y \
>>>                  libcap-dev \
>>>                  iptables \
>>>                  libnl-3-dev \
>>> +		libnl-route-3-dev \
>>>                  libselinux-dev \
>>>                  pkg-config \
>>>                  git-core \
>>> diff --git a/scripts/travis/travis-tests b/scripts/travis/travis-tests
>>> index 75d15f5..7b487cb 100755
>>> --- a/scripts/travis/travis-tests
>>> +++ b/scripts/travis/travis-tests
>>> @@ -4,7 +4,7 @@ set -x -e
>>>  TRAVIS_PKGS="protobuf-c-compiler libprotobuf-c0-dev libaio-dev
>>>  		libprotobuf-dev protobuf-compiler python-ipaddr libcap-dev
>>>  		libnl-3-dev gcc-multilib libc6-dev-i386 gdb bash python-protobuf
>>> -		libnet-dev util-linux"
>>> +		libnet-dev util-linux libnl-route-3-dev"
>>>  
>>>  travis_prep () {
>>>  	[ -n "$SKIP_TRAVIS_PREP" ] && return
>>>
>>
> .
>