[v5,8/8] criu: lazy-pages: enable remoting of lazy pages

Submitted by Mike Rapoport on June 29, 2016, 5:55 a.m.

Details

Message ID 1467179713-14400-9-git-send-email-rppt@linux.vnet.ibm.com
State Rejected
Series "criu: make pagemap friendlier to random access"
Headers show

Commit Message

Mike Rapoport June 29, 2016, 5:55 a.m.
The remote lazy pages variant can be run as follows:

src# criu dump -t <pid> --lazy-pages --port 9876 -D /tmp/1 &
src# while ! sudo fuser 9876/tcp ; do sleep 1; done
src# scp -r /tmp/1/ dst:/tmp/

dst# criu lazy-pages --page-server --address dst --port 9876 -D /tmp/1 &
dst# criu restore --lazy-pages -D /tmp/1

In a nutshell, this implementation of remote lazy pages does the following:

- dump collects the process memory into the pipes, transfers non-lazy pages
  to the images or to the page-server on the restore side. The lazy pages
  are kept in pipes for later transfer
- when the dump creates the page_pipe_bufs, it marks the buffers containing
potentially lazy pages with PPB_LAZY
- at the dump_finish stage, the dump side starts TCP server that will
handle page requests from the restore side
- the checkpoint directory is transferred to the restore side
- on the restore side lazy-pages daemon is started, it creates UNIX socket
to receive uffd's from the restore and a TCP socket to forward page
requests to the dump side
- restore creates memory mappings and fills the VMAs that cannot be handled
by uffd with the contents of the pages*img.
- restore registers lazy VMAs with uffd and sends the userfault file
descriptors to the lazy-pages daemon
- when a #PF occurs, the lazy-pages daemon sends PS_IOV_GET command to the dump
side; the command contains PID, the faulting address and amount of pages
(always 1 at the moment)
- the dump side extracts the requested pages from the pipe and splices them
into the TCP socket.
- the lazy-pages daemon copies the received pages into the restored process
address space

Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
---
 criu/cr-dump.c     | 36 +++++++++++++++++++++++++++++++++---
 criu/include/mem.h |  3 ++-
 criu/mem.c         | 27 +++++++++++++++++----------
 criu/uffd.c        |  9 ++++++++-
 4 files changed, 60 insertions(+), 15 deletions(-)

Patch hide | download patch | download mbox

diff --git a/criu/cr-dump.c b/criu/cr-dump.c
index ddd64cd..22fcce2 100644
--- a/criu/cr-dump.c
+++ b/criu/cr-dump.c
@@ -1142,7 +1142,7 @@  static int pre_dump_one_task(struct pstree_item *item)
 
 	parasite_ctl->pid.virt = item->pid.virt = misc.pid;
 
-	ret = parasite_dump_pages_seized(parasite_ctl, &vmas, true);
+	ret = parasite_dump_pages_seized(parasite_ctl, &vmas, true, false);
 	if (ret)
 		goto err_cure;
 
@@ -1298,7 +1298,8 @@  static int dump_one_task(struct pstree_item *item)
 		}
 	}
 
-	ret = parasite_dump_pages_seized(parasite_ctl, &vmas, false);
+	ret = parasite_dump_pages_seized(parasite_ctl, &vmas, opts.lazy_pages,
+					 opts.lazy_pages);
 	if (ret)
 		goto err_cure;
 
@@ -1338,7 +1339,10 @@  static int dump_one_task(struct pstree_item *item)
 		goto err;
 	}
 
-	ret = parasite_cure_seized(parasite_ctl);
+	if (opts.lazy_pages)
+		ret = parasite_cure_remote(parasite_ctl);
+	else
+		ret = parasite_cure_seized(parasite_ctl);
 	if (ret) {
 		pr_err("Can't cure (pid: %d) from parasite\n", pid);
 		goto err;
@@ -1530,6 +1534,28 @@  err:
 	return cr_pre_dump_finish(ret);
 }
 
+static int cr_lazy_mem_dump(void)
+{
+	struct pstree_item *item;
+	int ret = 0;
+
+	pr_info("Starting lazy pages server\n");
+	ret = cr_page_server(false, -1);
+
+	for_each_pstree_item(item) {
+		struct parasite_ctl *ctl = dmpi(item)->parasite_ctl;
+		destroy_page_pipe(ctl->mem_pp);
+		parasite_cure_local(ctl);
+	}
+
+	if (ret)
+		pr_err("Lazy pages transfer FAILED.\n");
+	else
+		pr_info("Lazy pages transfer finished successfully\n");
+
+	return ret;
+}
+
 static int cr_dump_finish(int ret)
 {
 	int post_dump_ret = 0;
@@ -1588,6 +1614,10 @@  static int cr_dump_finish(int ret)
 		network_unlock();
 		delete_link_remaps();
 	}
+
+	if (opts.lazy_pages)
+		ret = cr_lazy_mem_dump();
+
 	pstree_switch_state(root_item,
 			    (ret || post_dump_ret) ?
 			    TASK_ALIVE : opts.final_state);
diff --git a/criu/include/mem.h b/criu/include/mem.h
index a9750db..a4696fc 100644
--- a/criu/include/mem.h
+++ b/criu/include/mem.h
@@ -11,7 +11,8 @@  extern int do_task_reset_dirty_track(int pid);
 extern unsigned int dump_pages_args_size(struct vm_area_list *vmas);
 extern int parasite_dump_pages_seized(struct parasite_ctl *ctl,
 				      struct vm_area_list *vma_area_list,
-				      bool delayed_dump);
+				      bool delayed_dump,
+				      bool lazy);
 
 #define PME_PRESENT		(1ULL << 63)
 #define PME_SWAP		(1ULL << 62)
diff --git a/criu/mem.c b/criu/mem.c
index 897c48b..2593a82 100644
--- a/criu/mem.c
+++ b/criu/mem.c
@@ -234,7 +234,8 @@  static struct parasite_dump_pages_args *prep_dump_pages_args(struct parasite_ctl
 }
 
 static int dump_pages(struct page_pipe *pp, struct parasite_ctl *ctl,
-			struct parasite_dump_pages_args *args, struct page_xfer *xfer)
+		      struct parasite_dump_pages_args *args,
+		      struct page_xfer *xfer, bool lazy)
 {
 	struct page_pipe_buf *ppb;
 	int ret = 0;
@@ -268,7 +269,7 @@  static int dump_pages(struct page_pipe *pp, struct parasite_ctl *ctl,
 	 */
 	if (xfer) {
 		timing_start(TIME_MEMWRITE);
-		ret = page_xfer_dump_pages(xfer, pp, 0, true);
+		ret = page_xfer_dump_pages(xfer, pp, 0, !lazy);
 		timing_stop(TIME_MEMWRITE);
 	}
 
@@ -278,13 +279,14 @@  static int dump_pages(struct page_pipe *pp, struct parasite_ctl *ctl,
 static int __parasite_dump_pages_seized(struct parasite_ctl *ctl,
 		struct parasite_dump_pages_args *args,
 		struct vm_area_list *vma_area_list,
-		bool delayed_dump)
+		bool delayed_dump, bool lazy)
 {
 	pmc_t pmc = PMC_INIT;
 	struct page_pipe *pp;
 	struct vma_area *vma_area;
 	struct page_xfer xfer = { .parent = NULL };
 	int ret = -1;
+	bool should_xfer = (!delayed_dump || lazy);
 
 	pr_info("\n");
 	pr_info("Dumping pages (type: %d pid: %d)\n", CR_FD_PAGES, ctl->pid.real);
@@ -306,11 +308,12 @@  static int __parasite_dump_pages_seized(struct parasite_ctl *ctl,
 	ret = -1;
 
 	ctl->mem_pp = pp = create_page_pipe(vma_area_list->priv_size,
-					    pargs_iovs(args), !delayed_dump);
+					    lazy ? NULL : pargs_iovs(args),
+					    !delayed_dump);
 	if (!pp)
 		goto out;
 
-	if (!delayed_dump) {
+	if (should_xfer) {
 		ret = open_page_xfer(&xfer, CR_FD_PAGEMAP, ctl->pid.virt);
 		if (ret < 0)
 			goto out_pp;
@@ -348,7 +351,7 @@  again:
 		if (ret == -EAGAIN) {
 			BUG_ON(delayed_dump);
 
-			ret = dump_pages(pp, ctl, args, &xfer);
+			ret = dump_pages(pp, ctl, args, &xfer, false);
 			if (ret)
 				goto out_xfer;
 			page_pipe_reinit(pp);
@@ -358,7 +361,10 @@  again:
 			goto out_xfer;
 	}
 
-	ret = dump_pages(pp, ctl, args, delayed_dump ? NULL : &xfer);
+	if (lazy)
+		memcpy(pargs_iovs(args), pp->pages.iovs,
+		       sizeof(struct iovec) * pp->pages.nr_iovs);
+	ret = dump_pages(pp, ctl, args, should_xfer ? &xfer : NULL, lazy);
 	if (ret)
 		goto out_xfer;
 
@@ -370,7 +376,7 @@  again:
 
 	ret = task_reset_dirty_track(ctl->pid.real);
 out_xfer:
-	if (!delayed_dump)
+	if (should_xfer)
 		xfer.close(&xfer);
 out_pp:
 	if (ret || !delayed_dump)
@@ -382,7 +388,8 @@  out:
 }
 
 int parasite_dump_pages_seized(struct parasite_ctl *ctl,
-		struct vm_area_list *vma_area_list, bool delayed_dump)
+		struct vm_area_list *vma_area_list, bool delayed_dump,
+		bool lazy)
 {
 	int ret;
 	struct parasite_dump_pages_args *pargs;
@@ -410,7 +417,7 @@  int parasite_dump_pages_seized(struct parasite_ctl *ctl,
 	}
 
 	ret = __parasite_dump_pages_seized(ctl, pargs, vma_area_list,
-					   delayed_dump);
+					   delayed_dump, lazy);
 
 	if (ret) {
 		pr_err("Can't dump page with parasite\n");
diff --git a/criu/uffd.c b/criu/uffd.c
index 1e0a57a..9dee9d9 100644
--- a/criu/uffd.c
+++ b/criu/uffd.c
@@ -34,6 +34,7 @@ 
 #include "xmalloc.h"
 #include "syscall-codes.h"
 #include "restorer.h"
+#include "page-xfer.h"
 
 #undef  LOG_PREFIX
 #define LOG_PREFIX "lazy-pages: "
@@ -364,7 +365,10 @@  static int uffd_copy_page(struct lazy_pages_info *lpi, __u64 address,
 	struct uffdio_copy uffdio_copy;
 	int rc;
 
-	rc = get_page(lpi, address, dest);
+	if (opts.use_page_server)
+		rc = get_remote_pages(lpi->pid, address, 1, dest);
+	else
+		rc = get_page(lpi, address, dest);
 	if (rc <= 0)
 		return rc;
 
@@ -855,6 +859,9 @@  int cr_lazy_pages()
 	if (prepare_uffds(epollfd))
 		return -1;
 
+	if (connect_to_page_server())
+		return -1;
+
 	ret = handle_requests(epollfd, events);
 	lpi_hash_fini();