[v3,19/33] ns: Write/read ns entries in new way

Submitted by Kirill Tkhai on Feb. 16, 2017, 12:09 p.m.

Details

Message ID 148724696381.22444.9524234964804766176.stgit@localhost.localdomain
State New
Series "Nested user namespaces support"
Headers show

Commit Message

Kirill Tkhai Feb. 16, 2017, 12:09 p.m.
The patch introduces generic way for dumping all the namespaces
in a generic way (currently, only user ns entries are dumped).

Handler for old user ns images is remained on its place.

v3: On restore, keep in mind, that parent ns may not be read
    at the moment of the searching of it.
    Set correct user ns id to d_ns.
    Reflect the fact, that parent_id is moved to pid and user ext.
    Read ns ids before tasks.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
---
 criu/cr-restore.c         |    3 +
 criu/include/namespaces.h |    1 
 criu/namespaces.c         |  214 +++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 209 insertions(+), 9 deletions(-)

Patch hide | download patch | download mbox

diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index 62aca471b..2ee170323 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -2130,6 +2130,9 @@  int cr_restore_tasks(void)
 	if (prepare_task_entries() < 0)
 		goto err;
 
+	if (read_ns_hookups())
+		goto err;
+
 	if (prepare_pstree() < 0)
 		goto err;
 
diff --git a/criu/include/namespaces.h b/criu/include/namespaces.h
index 0c89c0c61..9d578d806 100644
--- a/criu/include/namespaces.h
+++ b/criu/include/namespaces.h
@@ -146,6 +146,7 @@  extern int collect_namespaces(bool for_dump);
 extern int collect_mnt_namespaces(bool for_dump);
 extern int dump_mnt_namespaces(void);
 extern int dump_namespaces(struct pstree_item *item, unsigned int ns_flags);
+extern int read_ns_hookups(void);
 extern int prepare_namespace_before_tasks(void);
 extern int prepare_namespace(struct pstree_item *item, unsigned long clone_flags);
 
diff --git a/criu/namespaces.c b/criu/namespaces.c
index 8c085aeff..fd55336a9 100644
--- a/criu/namespaces.c
+++ b/criu/namespaces.c
@@ -1133,7 +1133,6 @@  static int dump_user_ns(struct ns_id *ns)
 	int ret, exit_code = -1;
 	pid_t pid = ns->ns_pid;
 	UsernsEntry *e = ns->user.e;
-	struct cr_img *img;
 
 	ret = parse_id_map(pid, "uid_map", &e->uid_map);
 	if (ret < 0)
@@ -1148,14 +1147,6 @@  static int dump_user_ns(struct ns_id *ns)
 	if (check_user_ns(ns))
 		return -1;
 
-	img = open_image(CR_FD_USERNS, O_DUMP, ns->id);
-	if (!img)
-		goto err;
-	ret = pb_write_one(img, e, PB_USERNS);
-	close_image(img);
-	if (ret < 0)
-		goto err;
-
 	return 0;
 err:
 	if (e->uid_map) {
@@ -1742,6 +1733,207 @@  static int read_old_user_ns_img(void)
 	return 0;
 }
 
+#define SET_NS_PARENT(ns, e)					\
+	do {							\
+		if (ns->parent) {				\
+			e->has_parent_id = true;		\
+			e->parent_id = ns->parent->id;		\
+		}						\
+	} while (0)
+
+static int dump_ns_hookups(int for_dump)
+{
+	struct cr_img *img;
+	struct ns_id *ns;
+	NsHookupEntry e;
+	PidnsEntry p;
+	NetnsEntry2 n;
+	int ret = 0;
+	u32 u_ns_id;
+
+	if (!for_dump)
+		return 0;
+
+	img = open_image(CR_FD_NS_HOOKUP, O_DUMP);
+	if (!img)
+		return -1;
+
+	for (ns = ns_ids; ns != NULL; ns = ns->next) {
+		if (ns->nd != &user_ns_desc &&
+		    ns->nd != &pid_ns_desc &&
+		    ns->nd != &net_ns_desc)
+			continue;
+		if (ns->type == NS_CRIU ||
+		    !(root_ns_mask & ns->nd->cflag))
+			continue;
+
+		ns_hookup_entry__init(&e);
+		e.id = ns->id;
+		e.ns_cflag = ns->nd->cflag;
+
+		u_ns_id = ns->user_ns ? ns->user_ns->id : 0;
+		if (ns->nd == &user_ns_desc) {
+			e.user_ext = ns->user.e;
+			SET_NS_PARENT(ns, e.user_ext);
+		} else if (ns->nd == &pid_ns_desc) {
+			pidns_entry__init(&p);
+			p.userns_id = u_ns_id;
+			e.pid_ext = &p;
+			SET_NS_PARENT(ns, e.pid_ext);
+		} else {
+			netns_entry2__init(&n);
+			n.userns_id = u_ns_id;
+			e.net_ext = &n;
+		}
+		ret = pb_write_one(img, &e, PB_NS_HOOKUP);
+		if (ret < 0) {
+			pr_err("Can't write ns-hookup.img\n");
+			break;
+		}
+	}
+
+	close_image(img);
+	return ret;
+}
+
+struct delayed_ns {
+	struct ns_id *ns;
+	u32 userns_id;
+	u32 parent_id;
+};
+
+int read_ns_hookups(void)
+{
+	struct ns_id *ns, *p_ns, *u_ns;
+	struct delayed_ns *d_ns = NULL;
+	struct pstree_item fake;
+	NsHookupEntry *e = NULL;
+	int ret = 0, nr_d = 0;
+	struct ns_desc *desc;
+	struct cr_img *img;
+	struct pid pid;
+	u32 u_id, p_id;
+
+	pid.ns[0].virt = -1;
+	fake.pid = &pid;
+
+	img = open_image(CR_FD_NS_HOOKUP, O_RSTR);
+	if (!img)
+		return -1;
+	if (empty_image(img))
+		goto close;
+
+	while (1) {
+		ret = pb_read_one_eof(img, &e, PB_NS_HOOKUP);
+		if (ret <= 0)
+			break;
+		ret = -1;
+		desc = &pid_ns_desc;
+		if (e->ns_cflag == CLONE_NEWUSER)
+			desc = &user_ns_desc;
+		else if (e->ns_cflag == CLONE_NEWNET)
+			desc = &net_ns_desc;
+
+		if (rst_add_ns_id(e->id, &fake, desc)) {
+			pr_err("Can't add user ns\n");
+			break;
+		}
+
+		ns = lookup_ns_by_id(e->id, desc);
+		if (!ns) {
+			pr_err("Can't find ns %d\n", e->id);
+			break;
+		}
+
+		if (e->user_ext && e->ns_cflag == CLONE_NEWUSER) {
+			ns->user.e = dup_userns_entry(e->user_ext);
+			if (!ns->user.e) {
+				pr_err("Can't dup map\n");
+				goto close;
+			}
+		} else if ((e->pid_ext && e->ns_cflag == CLONE_NEWPID) ||
+			   (e->net_ext && e->ns_cflag == CLONE_NEWNET)) {
+			u_id = e->pid_ext ? e->pid_ext->userns_id : e->net_ext->userns_id;
+			if (u_id > 0) {
+				u_ns = lookup_ns_by_id(u_id, &user_ns_desc);
+				if (!u_ns) {
+					/* User_ns hasn't read yet; set aside this ns */
+					d_ns = xrealloc(d_ns, (nr_d + 1) * sizeof(*d_ns));
+					if (!d_ns)
+						goto close;
+					d_ns[nr_d].ns = ns;
+					d_ns[nr_d].parent_id = 0;
+					d_ns[nr_d++].userns_id = u_id;
+				} else
+					ns->user_ns = u_ns;
+			}
+		} else {
+			pr_err("No ns ext\n");
+			break;
+		}
+
+		if ((e->pid_ext && e->ns_cflag == CLONE_NEWPID) ||
+		    (e->user_ext && e->ns_cflag == CLONE_NEWUSER)) {
+			if (e->pid_ext)
+				p_id = e->pid_ext->has_parent_id ? e->pid_ext->parent_id : 0;
+			else
+				p_id = e->user_ext->has_parent_id ? e->user_ext->parent_id : 0;
+			if (p_id) {
+				p_ns = lookup_ns_by_id(p_id, desc);
+				if (!p_ns) {
+					/* Parent ns may hasn't been read yet */
+					if (!nr_d || d_ns[nr_d-1].ns != ns) {
+						d_ns = xrealloc(d_ns, (nr_d + 1) * sizeof(*d_ns));
+						if (!d_ns)
+							goto close;
+						d_ns[nr_d].ns = ns;
+						d_ns[nr_d++].userns_id = 0;
+					}
+					d_ns[nr_d-1].parent_id = p_id;
+				} else {
+					ns->parent = p_ns;
+					list_add(&ns->siblings, &p_ns->children);
+				}
+			} else if (e->ns_cflag == CLONE_NEWUSER) {
+				ns->type = NS_ROOT;
+				root_user_ns = ns;
+				userns_entry = ns->user.e;
+			}
+		}
+
+		ns_hookup_entry__free_unpacked(e, NULL);
+	}
+close:
+	if (!ret) {
+		while (nr_d-- > 0) {
+			if (d_ns[nr_d].userns_id > 0) {
+				u_ns = lookup_ns_by_id(d_ns[nr_d].userns_id, &user_ns_desc);
+				if (!u_ns) {
+					pr_err("Can't find user_ns: %d\n", d_ns[nr_d].userns_id);
+					ret = -1;
+					break;
+				}
+				d_ns[nr_d].ns->user_ns = u_ns;
+			}
+			if (d_ns[nr_d].parent_id > 0) {
+				p_ns = lookup_ns_by_id(d_ns[nr_d].parent_id, d_ns[nr_d].ns->nd);
+				if (!p_ns) {
+					pr_err("Can't find parent\n");
+					ret = -1;
+					break;
+				}
+				d_ns[nr_d].ns->parent = p_ns;
+				list_add(&d_ns[nr_d].ns->siblings, &p_ns->children);
+			}
+		}
+	}
+	if (ret)
+		pr_err("Failed to read ns image\n");
+	xfree(d_ns);
+	close_image(img);
+	return ret;
+}
+
 int prepare_userns(struct pstree_item *item)
 {
 	UsernsEntry *e = userns_entry;
@@ -1775,6 +1967,10 @@  int collect_namespaces(bool for_dump)
 	if (ret < 0)
 		return ret;
 
+	ret = dump_ns_hookups(for_dump);
+	if (ret < 0)
+		return ret;
+
 	return 0;
 }