[2/2] criu: RFC: added support for remote lazy restore

Submitted by Adrian Reber on April 26, 2016, 9:38 a.m.

Details

Message ID 1461663490-5425-3-git-send-email-adrian@lisas.de
State Rejected
Series "Series without cover letter"
Headers show

Commit Message

Adrian Reber April 26, 2016, 9:38 a.m.
From: Adrian Reber <areber@redhat.com>

This is the second attempt to implement lazy restore between two hosts.
The memory pages which should be restored lazily (on demand) using
userfaultfd are not on the system were the process is restored and
therefore the required pages need to be transferred over the network.

In the first attempt the logic (the brain) of the restore process was
running on the source system and the uffd daemon on the destination
system was just forwarding the pages and page request to/from the
userfaultfd.

This new implementation is now based on the existing page-server and its
protocols (plus additions (see previous patch)). The logic which pages
are transferred is now in the uffd daemon on the restore side.

The current implementation is only the first step and still requires
additional work, but it offers a working remote lazy-restore
implementation.

Following commands are necessary to use this implementation:

Source system:

 criu page-server -D /tmp/4/ --port 27

Destination system:

 criu lazy-pages -D /tmp/4 --lazy-address /tmp/userfault.socket \
 --page-client --address source-system-ip --port 27

 and

 criu restore -D /tmp/4 -j --lazy-pages \
 --lazy-address /tmp/userfault.socket

Signed-off-by: Adrian Reber <areber@redhat.com>
---
 criu/crtools.c            |  4 ++++
 criu/include/cr_options.h |  1 +
 criu/page-read.c          |  9 ++++++++-
 criu/page-xfer.c          | 14 +++++++++++---
 criu/uffd.c               | 38 +++++++++++++++++++++++++-------------
 5 files changed, 49 insertions(+), 17 deletions(-)

Patch hide | download patch | download mbox

diff --git a/criu/crtools.c b/criu/crtools.c
index 211d883..ec9ee1f 100644
--- a/criu/crtools.c
+++ b/criu/crtools.c
@@ -322,6 +322,7 @@  int main(int argc, char *argv[], char *envp[])
 		{ "experimental",		no_argument,		0, 1078	},
 		{ "all",			no_argument,		0, 1079	},
 		{ "page-client",		no_argument,		0, 1080	},
+		{ "lazy-address",               required_argument,      0, 1081 },
 		{ },
 	};
 
@@ -627,6 +628,9 @@  int main(int argc, char *argv[], char *envp[])
 		case 1080:
 			opts.use_page_client = true;
 			break;
+		case 1081:
+			opts.lazy_address = optarg;
+			break;
 		case 'V':
 			pr_msg("Version: %s\n", CRIU_VERSION);
 			if (strcmp(CRIU_GITID, "0"))
diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h
index 0c8b122..97d5ebb 100644
--- a/criu/include/cr_options.h
+++ b/criu/include/cr_options.h
@@ -111,6 +111,7 @@  struct cr_options {
 	unsigned int		timeout;
 	unsigned int		empty_ns;
 	bool			lazy_pages;
+	char			*lazy_address;
 };
 
 extern struct cr_options opts;
diff --git a/criu/page-read.c b/criu/page-read.c
index 8e23dc5..c20428c 100644
--- a/criu/page-read.c
+++ b/criu/page-read.c
@@ -93,7 +93,14 @@  static void skip_pagemap_pages(struct page_read *pr, unsigned long len)
 		return;
 
 	pr_debug("\tpr%u Skip %lu bytes from page-dump\n", pr->id, len);
-	if (!pr->pe->in_parent)
+	/*
+	 * Skipping the actual seek in page client mode. If lazy restoring
+	 * from a local image the seek needs to be performed. If lazy
+	 * restoring remotely seeking happens on the actual seeking happens
+	 * on the other system but we still need to change to address in
+	 * pr->cvaddr.
+	 */
+	if (!pr->pe->in_parent && !opts.use_page_client)
 		lseek(img_raw_fd(pr->pi), len, SEEK_CUR);
 	pr->cvaddr += len;
 }
diff --git a/criu/page-xfer.c b/criu/page-xfer.c
index df85976..ec850b5 100644
--- a/criu/page-xfer.c
+++ b/criu/page-xfer.c
@@ -241,7 +241,14 @@  static int page_server_get(int sk, struct page_server_iov *pi)
 		goto out;
 	}
 
-	page_read.close(&page_read);
+	/*
+	 * If running in page_client mode, the network connection should
+	 * not be closed immediately. Page client mode will make multiple
+	 * requests and only once all pages have been transfered the
+	 * connection should be finally closed.
+	 */
+	if (!opts.use_page_client)
+		page_read.close(&page_read);
 	ret = 0;
 out:
 	free(buf);
@@ -433,7 +440,7 @@  int disconnect_from_page_server(void)
 	int32_t status = -1;
 	int ret = -1;
 
-	if (!opts.use_page_server)
+	if (!opts.use_page_server && !opts.use_page_client)
 		return 0;
 
 	if (page_server_sk == -1)
@@ -594,7 +601,8 @@  static int open_page_client_xfer(struct page_xfer *xfer, int fd_type, long id)
 	struct page_server_iov pi;
 	char has_parent;
 
-	connect_to_page_server();
+	if (page_server_sk == -1)
+		connect_to_page_server();
 
 	xfer->sk = page_server_sk;
 	xfer->read_pages = read_pages_from_server;
diff --git a/criu/uffd.c b/criu/uffd.c
index 15cfa56..63e4abf 100644
--- a/criu/uffd.c
+++ b/criu/uffd.c
@@ -25,6 +25,7 @@ 
 #include "include/mem.h"
 #include "include/uffd.h"
 #include "include/util-pie.h"
+#include "include/page-xfer.h"
 #include "include/pstree.h"
 #include "include/crtools.h"
 #include "include/cr_options.h"
@@ -113,7 +114,7 @@  static int send_uffd(int sendfd, int pid)
 	int ret = -1;
 	struct sockaddr_un sun;
 
-	if (!opts.addr) {
+	if (!opts.lazy_address) {
 		pr_info("Please specify a file name for the unix domain socket\n");
 		pr_info("used to communicate between the lazy-pages server\n");
 		pr_info("and the restore process. Use the --address option like\n");
@@ -124,7 +125,7 @@  static int send_uffd(int sendfd, int pid)
 	if (sendfd < 0)
 		return -1;
 
-	if (strlen(opts.addr) >= sizeof(sun.sun_path)) {
+	if (strlen(opts.lazy_address) >= sizeof(sun.sun_path)) {
 		return -1;
 	}
 
@@ -133,10 +134,10 @@  static int send_uffd(int sendfd, int pid)
 
 	memset(&sun, 0, sizeof(sun));
 	sun.sun_family = AF_UNIX;
-	strcpy(sun.sun_path, opts.addr);
-	len = offsetof(struct sockaddr_un, sun_path) + strlen(opts.addr);
+	strcpy(sun.sun_path, opts.lazy_address);
+	len = offsetof(struct sockaddr_un, sun_path) + strlen(opts.lazy_address);
 	if (connect(fd, (struct sockaddr *) &sun, len) < 0) {
-		pr_perror("connect to %s failed", opts.addr);
+		pr_perror("connect to %s failed", opts.lazy_address);
 		goto out;
 	}
 
@@ -195,19 +196,19 @@  static int server_listen(struct sockaddr_un *saddr)
 	int fd;
 	int len;
 
-	if (strlen(opts.addr) >= sizeof(saddr->sun_path)) {
+	if (strlen(opts.lazy_address) >= sizeof(saddr->sun_path)) {
 		return -1;
 	}
 
 	if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0)
 		return -1;
 
-	unlink(opts.addr);
+	unlink(opts.lazy_address);
 
 	memset(saddr, 0, sizeof(struct sockaddr_un));
 	saddr->sun_family = AF_UNIX;
-	strcpy(saddr->sun_path, opts.addr);
-	len = offsetof(struct sockaddr_un, sun_path) + strlen(opts.addr);
+	strcpy(saddr->sun_path, opts.lazy_address);
+	len = offsetof(struct sockaddr_un, sun_path) + strlen(opts.lazy_address);
 
 	if (bind(fd, (struct sockaddr *) saddr, len) < 0) {
 		goto out;
@@ -292,9 +293,16 @@  static int get_page(struct lazy_pages_info *lpi, unsigned long addr, void *dest)
 	int ret;
 	unsigned char buf[PAGE_SIZE];
 	struct page_read pr;
+	int pr_flags = PR_TASK | PR_MOD;
+
+	if (opts.use_page_client)
+		pr_flags |= PR_REMOTE;
+
+	ret = open_page_read(lpi->pid, &pr, pr_flags);
 
-	ret = open_page_read(lpi->pid, &pr, PR_TASK | PR_MOD);
 	pr_debug("get_page ret %d\n", ret);
+	if (ret <= 0)
+		return ret;
 
 	ret = pr.get_pagemap(&pr, &iov);
 	pr_debug("get_pagemap ret %d\n", ret);
@@ -313,7 +321,7 @@  static int get_page(struct lazy_pages_info *lpi, unsigned long addr, void *dest)
 
 	memcpy(dest, buf, PAGE_SIZE);
 
-	if (pr.close)
+	if (pr.close && !opts.use_page_client)
 		pr.close(&pr);
 
 	return 1;
@@ -778,7 +786,7 @@  static int prepare_uffds(int epollfd)
 	int listen;
 	struct sockaddr_un saddr;
 
-	pr_debug("Waiting for incoming connections on %s\n", opts.addr);
+	pr_debug("Waiting for incoming connections on %s\n", opts.lazy_address);
 	if ((listen = server_listen(&saddr)) < 0) {
 		pr_perror("server_listen error");
 		return -1;
@@ -808,7 +816,7 @@  int cr_lazy_pages()
 	int epollfd;
 	int ret;
 
-	if (!opts.addr) {
+	if (!opts.lazy_address) {
 		pr_info("Please specify a file name for the unix domain socket\n");
 		pr_info("used to communicate between the lazy-pages server\n");
 		pr_info("and the restore process. Use the --address option like\n");
@@ -831,6 +839,10 @@  int cr_lazy_pages()
 	ret = handle_requests(epollfd, events);
 	lpi_hash_fini();
 
+	/* Clean shutdown of the remote server (if necessary). */
+	if (opts.use_page_client)
+		disconnect_from_page_server();
+
 	return ret;
 }