[RFC,1/8] criu: slightly refactor memory dump to support delaying it

Submitted by Mike Rapoport on May 21, 2016, 10:49 a.m.

Details

Message ID 1463827782-6081-2-git-send-email-rppt@linux.vnet.ibm.com
State Rejected
Series "lazy-pages: add support for remote pages"
Headers show

Commit Message

Mike Rapoport May 21, 2016, 10:49 a.m.
* keep pointer to parasite_ctl in pstree_item to simplify pre_dump_finish
and make pre_dump and dump interfaces the same
* pass boolean rather than pp_ret pointer to parasite_dump_pages_seized to
distinguish delayed and immediate dump more explicitly
* allocate iovs for delayed dumps because when memory dump is delayed, the
shared memory between dump and parasite cannot be used to keep iovs

Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
---
 criu/cr-dump.c                  | 23 +++++++++++-----------
 criu/include/mem.h              |  2 +-
 criu/include/parasite-syscall.h |  1 -
 criu/include/pstree.h           |  1 +
 criu/mem.c                      | 42 ++++++++++++++++++++++++-----------------
 criu/parasite-syscall.c         |  2 ++
 6 files changed, 40 insertions(+), 31 deletions(-)

Patch hide | download patch | download mbox

diff --git a/criu/cr-dump.c b/criu/cr-dump.c
index 4c0ae01..5d9520f 100644
--- a/criu/cr-dump.c
+++ b/criu/cr-dump.c
@@ -1086,7 +1086,7 @@  err:
 	return ret;
 }
 
-static int pre_dump_one_task(struct pstree_item *item, struct list_head *ctls)
+static int pre_dump_one_task(struct pstree_item *item)
 {
 	pid_t pid = item->pid.real;
 	struct vm_area_list vmas;
@@ -1142,13 +1142,12 @@  static int pre_dump_one_task(struct pstree_item *item, struct list_head *ctls)
 
 	parasite_ctl->pid.virt = item->pid.virt = misc.pid;
 
-	ret = parasite_dump_pages_seized(parasite_ctl, &vmas, &parasite_ctl->mem_pp);
+	ret = parasite_dump_pages_seized(parasite_ctl, &vmas, true);
 	if (ret)
 		goto err_cure;
 
 	if (parasite_cure_remote(parasite_ctl))
 		pr_err("Can't cure (pid: %d) from parasite\n", pid);
-	list_add_tail(&parasite_ctl->pre_list, ctls);
 err_free:
 	free_mappings(&vmas);
 err:
@@ -1299,7 +1298,7 @@  static int dump_one_task(struct pstree_item *item)
 		}
 	}
 
-	ret = parasite_dump_pages_seized(parasite_ctl, &vmas, NULL);
+	ret = parasite_dump_pages_seized(parasite_ctl, &vmas, false);
 	if (ret)
 		goto err_cure;
 
@@ -1408,12 +1407,11 @@  static int setup_alarm_handler()
 	return 0;
 }
 
-static int cr_pre_dump_finish(struct list_head *ctls, int ret)
+static int cr_pre_dump_finish(int ret)
 {
-	struct parasite_ctl *ctl, *n;
+	struct pstree_item *item;
 
 	pstree_switch_state(root_item, TASK_ALIVE);
-	free_pstree(root_item);
 
 	timing_stop(TIME_FROZEN);
 
@@ -1421,7 +1419,8 @@  static int cr_pre_dump_finish(struct list_head *ctls, int ret)
 		goto err;
 
 	pr_info("Pre-dumping tasks' memory\n");
-	list_for_each_entry_safe(ctl, n, ctls, pre_list) {
+	for_each_pstree_item(item) {
+		struct parasite_ctl *ctl = item->parasite_ctl;
 		struct page_xfer xfer;
 
 		pr_info("\tPre-dumping %d\n", ctl->pid.virt);
@@ -1440,10 +1439,11 @@  static int cr_pre_dump_finish(struct list_head *ctls, int ret)
 		timing_stop(TIME_MEMWRITE);
 
 		destroy_page_pipe(ctl->mem_pp);
-		list_del(&ctl->pre_list);
 		parasite_cure_local(ctl);
 	}
 
+	free_pstree(root_item);
+
 	if (irmap_predump_run()) {
 		ret = -1;
 		goto err;
@@ -1469,7 +1469,6 @@  int cr_pre_dump_tasks(pid_t pid)
 {
 	struct pstree_item *item;
 	int ret = -1;
-	LIST_HEAD(ctls);
 
 	if (!opts.track_mem) {
 		pr_info("Enforcing memory tracking for pre-dump.\n");
@@ -1515,7 +1514,7 @@  int cr_pre_dump_tasks(pid_t pid)
 		goto err;
 
 	for_each_pstree_item(item)
-		if (pre_dump_one_task(item, &ctls))
+		if (pre_dump_one_task(item))
 			goto err;
 
 	if (irmap_predump_prep())
@@ -1523,7 +1522,7 @@  int cr_pre_dump_tasks(pid_t pid)
 
 	ret = 0;
 err:
-	return cr_pre_dump_finish(&ctls, ret);
+	return cr_pre_dump_finish(ret);
 }
 
 static int cr_dump_finish(int ret)
diff --git a/criu/include/mem.h b/criu/include/mem.h
index 5269cad..afe6596 100644
--- a/criu/include/mem.h
+++ b/criu/include/mem.h
@@ -11,7 +11,7 @@  extern int do_task_reset_dirty_track(int pid);
 extern unsigned int dump_pages_args_size(struct vm_area_list *vmas);
 extern int parasite_dump_pages_seized(struct parasite_ctl *ctl,
 				      struct vm_area_list *vma_area_list,
-				      struct page_pipe **pp);
+				      bool delayed_dump);
 
 #define PME_PRESENT		(1ULL << 63)
 #define PME_SWAP		(1ULL << 62)
diff --git a/criu/include/parasite-syscall.h b/criu/include/parasite-syscall.h
index 15a13a6..6c78e23 100644
--- a/criu/include/parasite-syscall.h
+++ b/criu/include/parasite-syscall.h
@@ -53,7 +53,6 @@  struct parasite_ctl {
 	unsigned long		args_size;
 	int			tsock;					/* transport socket for transfering fds */
 
-	struct list_head	pre_list;
 	struct page_pipe	*mem_pp;
 };
 
diff --git a/criu/include/pstree.h b/criu/include/pstree.h
index 5998295..a2a2b28 100644
--- a/criu/include/pstree.h
+++ b/criu/include/pstree.h
@@ -25,6 +25,7 @@  struct pstree_item {
 	struct pid		*threads;	/* array of threads */
 	CoreEntry		**core;
 	TaskKobjIdsEntry	*ids;
+	struct parasite_ctl	*parasite_ctl;
 };
 
 struct rst_info;
diff --git a/criu/mem.c b/criu/mem.c
index 4d11a8d..becbd6d 100644
--- a/criu/mem.c
+++ b/criu/mem.c
@@ -176,7 +176,7 @@  static int generate_iovs(struct vma_area *vma, struct page_pipe *pp, u64 *map, u
 }
 
 static struct parasite_dump_pages_args *prep_dump_pages_args(struct parasite_ctl *ctl,
-		struct vm_area_list *vma_area_list, struct page_pipe **pp_ret)
+		struct vm_area_list *vma_area_list, bool delayed_dump)
 {
 	struct parasite_dump_pages_args *args;
 	struct parasite_vma_entry *p_vma;
@@ -194,7 +194,7 @@  static struct parasite_dump_pages_args *prep_dump_pages_args(struct parasite_ctl
 		 * Kernel write to aio ring is not soft-dirty tracked,
 		 * so we ignore them at pre-dump.
 		 */
-		if (vma_entry_is(vma->e, VMA_AREA_AIORING) && pp_ret)
+		if (vma_entry_is(vma->e, VMA_AREA_AIORING) && delayed_dump)
 			continue;
 		if (vma->e->prot & PROT_READ)
 			continue;
@@ -255,11 +255,12 @@  static int dump_pages(struct page_pipe *pp, struct parasite_ctl *ctl,
 static int __parasite_dump_pages_seized(struct parasite_ctl *ctl,
 		struct parasite_dump_pages_args *args,
 		struct vm_area_list *vma_area_list,
-		struct page_pipe **pp_ret)
+		bool delayed_dump)
 {
 	pmc_t pmc = PMC_INIT;
 	struct page_pipe *pp;
 	struct vma_area *vma_area;
+	struct iovec *iovs;
 	struct page_xfer xfer = { .parent = NULL };
 	int ret = -1;
 
@@ -281,12 +282,19 @@  static int __parasite_dump_pages_seized(struct parasite_ctl *ctl,
 		return -1;
 
 	ret = -1;
-	pp = create_page_pipe(vma_area_list->priv_size,
-			      pargs_iovs(args), pp_ret == NULL);
+
+	if (delayed_dump)
+		/* FIXME: take care of alloc error */
+		iovs = xmalloc(sizeof(*iovs) * vma_area_list->priv_size);
+	else
+		iovs = pargs_iovs(args);
+
+	ctl->mem_pp = pp = create_page_pipe(vma_area_list->priv_size,
+					    iovs, !delayed_dump);
 	if (!pp)
 		goto out;
 
-	if (pp_ret == NULL) {
+	if (!delayed_dump) {
 		ret = open_page_xfer(&xfer, CR_FD_PAGEMAP, ctl->pid.virt);
 		if (ret < 0)
 			goto out_pp;
@@ -311,7 +319,7 @@  static int __parasite_dump_pages_seized(struct parasite_ctl *ctl,
 		if (!vma_area_is_private(vma_area, kdat.task_size))
 			continue;
 		if (vma_entry_is(vma_area->e, VMA_AREA_AIORING)) {
-			if (pp_ret)
+			if (delayed_dump)
 				continue;
 			has_parent = false;
 		}
@@ -322,7 +330,7 @@  static int __parasite_dump_pages_seized(struct parasite_ctl *ctl,
 again:
 		ret = generate_iovs(vma_area, pp, map, &off, has_parent);
 		if (ret == -EAGAIN) {
-			BUG_ON(pp_ret);
+			BUG_ON(delayed_dump);
 
 			ret = dump_pages(pp, ctl, args, &xfer);
 			if (ret)
@@ -334,25 +342,24 @@  again:
 			goto out_xfer;
 	}
 
-	ret = dump_pages(pp, ctl, args, pp_ret ? NULL : &xfer);
+	if (delayed_dump)
+		memcpy(pargs_iovs(args), iovs, sizeof(*iovs) * vma_area_list->priv_size);
+	ret = dump_pages(pp, ctl, args, delayed_dump ? NULL : &xfer);
 	if (ret)
 		goto out_xfer;
 
 	timing_stop(TIME_MEMDUMP);
 
-	if (pp_ret)
-		*pp_ret = pp;
-
 	/*
 	 * Step 4 -- clean up
 	 */
 
 	ret = task_reset_dirty_track(ctl->pid.real);
 out_xfer:
-	if (pp_ret == NULL)
+	if (!delayed_dump)
 		xfer.close(&xfer);
 out_pp:
-	if (ret || !pp_ret)
+	if (ret || !delayed_dump)
 		destroy_page_pipe(pp);
 out:
 	pmc_fini(&pmc);
@@ -361,12 +368,12 @@  out:
 }
 
 int parasite_dump_pages_seized(struct parasite_ctl *ctl,
-		struct vm_area_list *vma_area_list, struct page_pipe **pp)
+		struct vm_area_list *vma_area_list, bool delayed_dump)
 {
 	int ret;
 	struct parasite_dump_pages_args *pargs;
 
-	pargs = prep_dump_pages_args(ctl, vma_area_list, pp);
+	pargs = prep_dump_pages_args(ctl, vma_area_list, delayed_dump);
 
 	/*
 	 * Add PROT_READ protection for all VMAs we're about to
@@ -388,7 +395,8 @@  int parasite_dump_pages_seized(struct parasite_ctl *ctl,
 		return -1;
 	}
 
-	ret = __parasite_dump_pages_seized(ctl, pargs, vma_area_list, pp);
+	ret = __parasite_dump_pages_seized(ctl, pargs, vma_area_list,
+					   delayed_dump);
 
 	if (ret) {
 		pr_err("Can't dump page with parasite\n");
diff --git a/criu/parasite-syscall.c b/criu/parasite-syscall.c
index f9e0bac..e665da4 100644
--- a/criu/parasite-syscall.c
+++ b/criu/parasite-syscall.c
@@ -1417,6 +1417,8 @@  struct parasite_ctl *parasite_infect_seized(pid_t pid, struct pstree_item *item,
 	if (parasite_start_daemon(ctl, item))
 		goto err_restore;
 
+	item->parasite_ctl = ctl;
+
 	return ctl;
 
 err_restore: