[1/9] Adding --pre-dump-mode option

Submitted by Abhishek Dubey on Sept. 22, 2019, 4:24 a.m.

Details

Message ID 1569126298-23300-2-git-send-email-dubeyabhishek777@gmail.com
State New
Series "Series without cover letter"
Headers show

Commit Message

Abhishek Dubey Sept. 22, 2019, 4:24 a.m.
Two modes of pre-dump algorithm:
    1) splicing memory by parasite
        --pre-dump-mode=splice (default)
    2) using process_vm_readv syscall
        --pre-dump-mode=read

Signed-off-by: Abhishek Dubey <dubeyabhishek777@gmail.com>
---
 Documentation/criu.txt    |  6 ++++++
 criu/config.c             | 10 ++++++++++
 criu/cr-dump.c            |  6 +++++-
 criu/cr-service.c         | 13 +++++++++++++
 criu/crtools.c            |  2 ++
 criu/include/cr_options.h |  7 +++++++
 criu/mem.c                | 13 ++++++++++++-
 images/rpc.proto          |  6 ++++++
 lib/c/criu.c              | 15 +++++++++++++++
 lib/c/criu.h              |  7 +++++++
 test/zdtm.py              |  9 ++++++++-
 11 files changed, 91 insertions(+), 3 deletions(-)

Patch hide | download patch | download mbox

diff --git a/Documentation/criu.txt b/Documentation/criu.txt
index 28913a7..2729bc9 100644
--- a/Documentation/criu.txt
+++ b/Documentation/criu.txt
@@ -156,6 +156,12 @@  In addition, *page-server* options may be specified.
     Turn on memory changes tracker in the kernel. If the option is
     not passed the memory tracker get turned on implicitly.
 
+*--pre-dump-mode*='mode'::
+    There are two 'mode' to operate pre-dump algorithm. The 'splice' mode
+    is parasite based, whereas 'read' mode is based on process_vm_readv
+    syscall. The 'read' mode incurs reduced frozen time and reduced
+    memory pressure as compared to 'splice' mode. Default is 'splice' mode.
+
 *dump*
 ~~~~~~
 Performs a checkpoint procedure.
diff --git a/criu/config.c b/criu/config.c
index 1a6d014..b252095 100644
--- a/criu/config.c
+++ b/criu/config.c
@@ -276,6 +276,7 @@  void init_opts(void)
 	opts.empty_ns = 0;
 	opts.status_fd = -1;
 	opts.log_level = DEFAULT_LOGLEVEL;
+	opts.pre_dump_mode = PRE_DUMP_SPLICE;
 }
 
 bool deprecated_ok(char *what)
@@ -518,6 +519,7 @@  int parse_options(int argc, char **argv, bool *usage_error,
 		BOOL_OPT("tls", &opts.tls),
 		{"tls-no-cn-verify",		no_argument,		&opts.tls_no_cn_verify, true},
 		{ "cgroup-yard",		required_argument,	0, 1096 },
+		{ "pre-dump-mode",		required_argument,	0, 1097},
 		{ },
 	};
 
@@ -819,6 +821,14 @@  int parse_options(int argc, char **argv, bool *usage_error,
 		case 1096:
 			SET_CHAR_OPTS(cgroup_yard, optarg);
 			break;
+		case 1097:
+			if (!strcmp("read", optarg)) {
+				opts.pre_dump_mode = PRE_DUMP_READ;
+			} else if (strcmp("splice", optarg)) {
+				pr_err("Unable to parse value of --pre-dump-mode\n");
+				return 1;
+			}
+			break;
 		case 'V':
 			pr_msg("Version: %s\n", CRIU_VERSION);
 			if (strcmp(CRIU_GITID, "0"))
diff --git a/criu/cr-dump.c b/criu/cr-dump.c
index 119c82d..d4adfad 100644
--- a/criu/cr-dump.c
+++ b/criu/cr-dump.c
@@ -1511,7 +1511,11 @@  static int cr_pre_dump_finish(int status)
 			goto err;
 
 		mem_pp = dmpi(item)->mem_pp;
-		ret = page_xfer_dump_pages(&xfer, mem_pp);
+
+		if (opts.pre_dump_mode == PRE_DUMP_READ)
+			ret = 0;  /* Replace with call to optimized pre-dump */
+		else
+			ret = page_xfer_dump_pages(&xfer, mem_pp);
 
 		xfer.close(&xfer);
 
diff --git a/criu/cr-service.c b/criu/cr-service.c
index 95ba2e5..392e9ac 100644
--- a/criu/cr-service.c
+++ b/criu/cr-service.c
@@ -473,6 +473,19 @@  static int setup_opts_from_req(int sk, CriuOpts *req)
 		opts.lazy_pages = req->lazy_pages;
 	}
 
+	if (req->has_pre_dump_mode) {
+		switch (req->pre_dump_mode) {
+			case CRIU_PRE_DUMP_MODE__SPLICE:
+				opts.pre_dump_mode = PRE_DUMP_SPLICE;
+				break;
+			case CRIU_PRE_DUMP_MODE__READ:
+				opts.pre_dump_mode = PRE_DUMP_READ;
+				break;
+			default:
+				goto err;
+		}
+	}
+
 	if (req->ps) {
 		opts.port = (short)req->ps->port;
 
diff --git a/criu/crtools.c b/criu/crtools.c
index c0de1c0..d908e7f 100644
--- a/criu/crtools.c
+++ b/criu/crtools.c
@@ -446,6 +446,8 @@  usage:
 "                        pages images of previous dump\n"
 "                        when used on restore, as soon as page is restored, it\n"
 "                        will be punched from the image\n"
+"  --pre-dump-mode       splice - parasite based pre-dumping (default)\n"
+"                        read   - process_vm_readv syscall based pre-dumping\n"
 "\n"
 "Page/Service server options:\n"
 "  --address ADDR        address of server or service\n"
diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h
index 5cbc56f..53bd5ed 100644
--- a/criu/include/cr_options.h
+++ b/criu/include/cr_options.h
@@ -39,6 +39,12 @@  struct cg_root_opt {
 };
 
 /*
+ * Pre-dump variants
+ */
+#define PRE_DUMP_SPLICE		1		/* Pre-dump using parasite */
+#define PRE_DUMP_READ			2		/* Pre-dump using process_vm_readv syscall */
+
+/*
  * Cgroup management options.
  */
 #define CG_MODE_IGNORE		(0u << 0)	/* Zero is important here */
@@ -81,6 +87,7 @@  struct cr_options {
 	int			evasive_devices;
 	int			link_remap_ok;
 	int			log_file_per_pid;
+	int			pre_dump_mode;
 	bool			swrk_restore;
 	char			*output;
 	char			*root;
diff --git a/criu/mem.c b/criu/mem.c
index de66a62..911b9d2 100644
--- a/criu/mem.c
+++ b/criu/mem.c
@@ -482,7 +482,18 @@  static int __parasite_dump_pages_seized(struct pstree_item *item,
 	if (mdc->lazy)
 		memcpy(pargs_iovs(args), pp->iovs,
 		       sizeof(struct iovec) * pp->nr_iovs);
-	ret = drain_pages(pp, ctl, args);
+
+	/*
+	 * Faking drain_pages for pre-dump here. Actual drain_pages for pre-dump
+	 * will happen after task unfreezing in cr_pre_dump_finish(). This is
+	 * actual optimization which reduces time for which process was frozen
+	 * during pre-dump.
+	 */
+	if (mdc->pre_dump && opts.pre_dump_mode == PRE_DUMP_READ)
+		ret = 0;
+	else
+		ret = drain_pages(pp, ctl, args);
+
 	if (!ret && !mdc->pre_dump)
 		ret = xfer_pages(pp, &xfer);
 	if (ret)
diff --git a/images/rpc.proto b/images/rpc.proto
index c402259..fc2f1bc 100644
--- a/images/rpc.proto
+++ b/images/rpc.proto
@@ -47,6 +47,11 @@  enum criu_cg_mode {
 	DEFAULT = 6;
 };
 
+enum criu_pre_dump_mode {
+	SPLICE = 	1;
+	READ =		2;
+};
+
 message criu_opts {
 	required int32			images_dir_fd	= 1;
 	optional int32			pid		= 2; /* if not set on dump, will dump requesting process */
@@ -121,6 +126,7 @@  message criu_opts {
 	optional bool			tls			= 58;
 	optional bool			tls_no_cn_verify	= 59;
 	optional string			cgroup_yard		= 60;
+	optional criu_pre_dump_mode	pre_dump_mode		= 61 [default = SPLICE];
 /*	optional bool			check_mounts		= 128;	*/
 }
 
diff --git a/lib/c/criu.c b/lib/c/criu.c
index 14ddff2..fffb9fd 100644
--- a/lib/c/criu.c
+++ b/lib/c/criu.c
@@ -336,6 +336,21 @@  int criu_set_parent_images(const char *path)
 	return criu_local_set_parent_images(global_opts, path);
 }
 
+int criu_local_set_pre_dump_mode(criu_opts *opts, enum criu_pre_dump_mode mode)
+{
+	opts->rpc->has_pre_dump_mode = true;
+	if (mode == CRIU_PRE_DUMP_SPLICE || mode == CRIU_PRE_DUMP_READ) {
+		opts->rpc->pre_dump_mode = mode;
+		return 0;
+	}
+	return -1;
+}
+
+int criu_set_pre_dump_mode(enum criu_pre_dump_mode mode)
+{
+	return criu_local_set_pre_dump_mode(global_opts, mode);
+}
+
 void criu_local_set_track_mem(criu_opts *opts, bool track_mem)
 {
 	opts->rpc->has_track_mem = true;
diff --git a/lib/c/criu.h b/lib/c/criu.h
index cb37c52..22db0fd 100644
--- a/lib/c/criu.h
+++ b/lib/c/criu.h
@@ -43,6 +43,11 @@  enum criu_cg_mode {
 	CRIU_CG_MODE_DEFAULT,
 };
 
+enum criu_pre_dump_mode {
+	CRIU_PRE_DUMP_SPLICE =	1,
+	CRIU_PRE_DUMP_READ =	2
+};
+
 int criu_set_service_address(const char *path);
 void criu_set_service_fd(int fd);
 int criu_set_service_binary(const char *path);
@@ -95,6 +100,7 @@  int criu_add_irmap_path(const char *path);
 int criu_add_inherit_fd(int fd, const char *key);
 int criu_add_external(const char *key);
 int criu_set_page_server_address_port(const char *address, int port);
+int criu_set_pre_dump_mode(enum criu_pre_dump_mode mode);
 
 /*
  * The criu_notify_arg_t na argument is an opaque
@@ -211,6 +217,7 @@  int criu_local_add_cg_yard(criu_opts *opts, const char *path);
 int criu_local_add_inherit_fd(criu_opts *opts, int fd, const char *key);
 int criu_local_add_external(criu_opts *opts, const char *key);
 int criu_local_set_page_server_address_port(criu_opts *opts, const char *address, int port);
+int criu_local_set_pre_dump_mode(criu_opts *opts, enum criu_pre_dump_mode mode);
 
 void criu_local_set_notify_cb(criu_opts *opts, int (*cb)(char *action, criu_notify_arg_t na));
 
diff --git a/test/zdtm.py b/test/zdtm.py
index 98d113f..7fdb8a3 100755
--- a/test/zdtm.py
+++ b/test/zdtm.py
@@ -1020,6 +1020,7 @@  class criu:
         self.__tls = self.__tls_options() if opts['tls'] else []
         self.__criu_bin = opts['criu_bin']
         self.__crit_bin = opts['crit_bin']
+        self.__pre_dump_mode = opts['pre_dump_mode']
 
     def fini(self):
         if self.__lazy_migrate:
@@ -1276,6 +1277,8 @@  class criu:
             a_opts += ['--leave-stopped']
         if self.__empty_ns:
             a_opts += ['--empty-ns', 'net']
+        if self.__pre_dump_mode:
+            a_opts += ["--pre-dump-mode", "%s" % self.__pre_dump_mode]
 
         nowait = False
         if self.__lazy_migrate and action == "dump":
@@ -1865,7 +1868,7 @@  class Launcher:
               'sat', 'script', 'rpc', 'lazy_pages', 'join_ns', 'dedup', 'sbs',
               'freezecg', 'user', 'dry_run', 'noauto_dedup',
               'remote_lazy_pages', 'show_stats', 'lazy_migrate', 'remote',
-              'tls', 'criu_bin', 'crit_bin')
+              'tls', 'criu_bin', 'crit_bin', 'pre_dump_mode')
         arg = repr((name, desc, flavor, {d: self.__opts[d] for d in nd}))
 
         if self.__use_log:
@@ -2512,6 +2515,10 @@  rp.add_argument("--criu-bin",
 rp.add_argument("--crit-bin",
                 help="Path to crit binary",
                 default='../crit/crit')
+rp.add_argument("--pre-dump-mode",
+                help="Use splice or read mode of pre-dumping",
+                choices=['splice', 'read'],
+                default='splice')
 
 lp = sp.add_parser("list", help="List tests")
 lp.set_defaults(action=list_tests)

Comments

Andrei Vagin Sept. 25, 2019, 4:15 p.m.
On Sun, Sep 22, 2019 at 09:54:50AM +0530, Abhishek Dubey wrote:
> Two modes of pre-dump algorithm:
>     1) splicing memory by parasite
>         --pre-dump-mode=splice (default)
>     2) using process_vm_readv syscall
>         --pre-dump-mode=read
> 
> Signed-off-by: Abhishek Dubey <dubeyabhishek777@gmail.com>
> ---
>  Documentation/criu.txt    |  6 ++++++
>  criu/config.c             | 10 ++++++++++
>  criu/cr-dump.c            |  6 +++++-
>  criu/cr-service.c         | 13 +++++++++++++
>  criu/crtools.c            |  2 ++
>  criu/include/cr_options.h |  7 +++++++
>  criu/mem.c                | 13 ++++++++++++-
>  images/rpc.proto          |  6 ++++++
>  lib/c/criu.c              | 15 +++++++++++++++
>  lib/c/criu.h              |  7 +++++++
>  test/zdtm.py              |  9 ++++++++-
>  11 files changed, 91 insertions(+), 3 deletions(-)
> 
> diff --git a/Documentation/criu.txt b/Documentation/criu.txt
> index 28913a7..2729bc9 100644
> --- a/Documentation/criu.txt
> +++ b/Documentation/criu.txt
> @@ -156,6 +156,12 @@ In addition, *page-server* options may be specified.
>      Turn on memory changes tracker in the kernel. If the option is
>      not passed the memory tracker get turned on implicitly.
>  
> +*--pre-dump-mode*='mode'::
> +    There are two 'mode' to operate pre-dump algorithm. The 'splice' mode
> +    is parasite based, whereas 'read' mode is based on process_vm_readv
> +    syscall. The 'read' mode incurs reduced frozen time and reduced
> +    memory pressure as compared to 'splice' mode. Default is 'splice' mode.
> +
>  *dump*
>  ~~~~~~
>  Performs a checkpoint procedure.
> diff --git a/criu/config.c b/criu/config.c
> index 1a6d014..b252095 100644
> --- a/criu/config.c
> +++ b/criu/config.c
> @@ -276,6 +276,7 @@ void init_opts(void)
>  	opts.empty_ns = 0;
>  	opts.status_fd = -1;
>  	opts.log_level = DEFAULT_LOGLEVEL;
> +	opts.pre_dump_mode = PRE_DUMP_SPLICE;
>  }
>  
>  bool deprecated_ok(char *what)
> @@ -518,6 +519,7 @@ int parse_options(int argc, char **argv, bool *usage_error,
>  		BOOL_OPT("tls", &opts.tls),
>  		{"tls-no-cn-verify",		no_argument,		&opts.tls_no_cn_verify, true},
>  		{ "cgroup-yard",		required_argument,	0, 1096 },
> +		{ "pre-dump-mode",		required_argument,	0, 1097},
>  		{ },
>  	};
>  
> @@ -819,6 +821,14 @@ int parse_options(int argc, char **argv, bool *usage_error,
>  		case 1096:
>  			SET_CHAR_OPTS(cgroup_yard, optarg);
>  			break;
> +		case 1097:
> +			if (!strcmp("read", optarg)) {
> +				opts.pre_dump_mode = PRE_DUMP_READ;
> +			} else if (strcmp("splice", optarg)) {
> +				pr_err("Unable to parse value of --pre-dump-mode\n");
> +				return 1;
> +			}
> +			break;
>  		case 'V':
>  			pr_msg("Version: %s\n", CRIU_VERSION);
>  			if (strcmp(CRIU_GITID, "0"))
> diff --git a/criu/cr-dump.c b/criu/cr-dump.c
> index 119c82d..d4adfad 100644
> --- a/criu/cr-dump.c
> +++ b/criu/cr-dump.c
> @@ -1511,7 +1511,11 @@ static int cr_pre_dump_finish(int status)
>  			goto err;
>  
>  		mem_pp = dmpi(item)->mem_pp;
> -		ret = page_xfer_dump_pages(&xfer, mem_pp);
> +
> +		if (opts.pre_dump_mode == PRE_DUMP_READ)
> +			ret = 0;  /* Replace with call to optimized pre-dump */

I don't understand what this comment means...

> +		else
> +			ret = page_xfer_dump_pages(&xfer, mem_pp);
>  
>  		xfer.close(&xfer);
>  
> diff --git a/criu/cr-service.c b/criu/cr-service.c
> index 95ba2e5..392e9ac 100644
> --- a/criu/cr-service.c
> +++ b/criu/cr-service.c
> @@ -473,6 +473,19 @@ static int setup_opts_from_req(int sk, CriuOpts *req)
>  		opts.lazy_pages = req->lazy_pages;
>  	}
>  
> +	if (req->has_pre_dump_mode) {
> +		switch (req->pre_dump_mode) {
> +			case CRIU_PRE_DUMP_MODE__SPLICE:
> +				opts.pre_dump_mode = PRE_DUMP_SPLICE;
> +				break;
> +			case CRIU_PRE_DUMP_MODE__READ:
> +				opts.pre_dump_mode = PRE_DUMP_READ;
> +				break;
> +			default:
> +				goto err;
> +		}
> +	}
> +
>  	if (req->ps) {
>  		opts.port = (short)req->ps->port;
>  
> diff --git a/criu/crtools.c b/criu/crtools.c
> index c0de1c0..d908e7f 100644
> --- a/criu/crtools.c
> +++ b/criu/crtools.c
> @@ -446,6 +446,8 @@ usage:
>  "                        pages images of previous dump\n"
>  "                        when used on restore, as soon as page is restored, it\n"
>  "                        will be punched from the image\n"
> +"  --pre-dump-mode       splice - parasite based pre-dumping (default)\n"
> +"                        read   - process_vm_readv syscall based pre-dumping\n"
>  "\n"
>  "Page/Service server options:\n"
>  "  --address ADDR        address of server or service\n"
> diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h
> index 5cbc56f..53bd5ed 100644
> --- a/criu/include/cr_options.h
> +++ b/criu/include/cr_options.h
> @@ -39,6 +39,12 @@ struct cg_root_opt {
>  };
>  
>  /*
> + * Pre-dump variants
> + */
> +#define PRE_DUMP_SPLICE		1		/* Pre-dump using parasite */
> +#define PRE_DUMP_READ			2		/* Pre-dump using process_vm_readv syscall */
> +
> +/*
>   * Cgroup management options.
>   */
>  #define CG_MODE_IGNORE		(0u << 0)	/* Zero is important here */
> @@ -81,6 +87,7 @@ struct cr_options {
>  	int			evasive_devices;
>  	int			link_remap_ok;
>  	int			log_file_per_pid;
> +	int			pre_dump_mode;
>  	bool			swrk_restore;
>  	char			*output;
>  	char			*root;
> diff --git a/criu/mem.c b/criu/mem.c
> index de66a62..911b9d2 100644
> --- a/criu/mem.c
> +++ b/criu/mem.c
> @@ -482,7 +482,18 @@ static int __parasite_dump_pages_seized(struct pstree_item *item,
>  	if (mdc->lazy)
>  		memcpy(pargs_iovs(args), pp->iovs,
>  		       sizeof(struct iovec) * pp->nr_iovs);
> -	ret = drain_pages(pp, ctl, args);
> +
> +	/*
> +	 * Faking drain_pages for pre-dump here. Actual drain_pages for pre-dump
> +	 * will happen after task unfreezing in cr_pre_dump_finish(). This is
> +	 * actual optimization which reduces time for which process was frozen
> +	 * during pre-dump.
> +	 */
> +	if (mdc->pre_dump && opts.pre_dump_mode == PRE_DUMP_READ)
> +		ret = 0;
> +	else
> +		ret = drain_pages(pp, ctl, args);
> +
>  	if (!ret && !mdc->pre_dump)
>  		ret = xfer_pages(pp, &xfer);
>  	if (ret)
> diff --git a/images/rpc.proto b/images/rpc.proto
> index c402259..fc2f1bc 100644
> --- a/images/rpc.proto
> +++ b/images/rpc.proto
> @@ -47,6 +47,11 @@ enum criu_cg_mode {
>  	DEFAULT = 6;
>  };
>  
> +enum criu_pre_dump_mode {
> +	SPLICE = 	1;
> +	READ =		2;
> +};
> +
>  message criu_opts {
>  	required int32			images_dir_fd	= 1;
>  	optional int32			pid		= 2; /* if not set on dump, will dump requesting process */
> @@ -121,6 +126,7 @@ message criu_opts {
>  	optional bool			tls			= 58;
>  	optional bool			tls_no_cn_verify	= 59;
>  	optional string			cgroup_yard		= 60;
> +	optional criu_pre_dump_mode	pre_dump_mode		= 61 [default = SPLICE];
>  /*	optional bool			check_mounts		= 128;	*/
>  }
>  
> diff --git a/lib/c/criu.c b/lib/c/criu.c
> index 14ddff2..fffb9fd 100644
> --- a/lib/c/criu.c
> +++ b/lib/c/criu.c
> @@ -336,6 +336,21 @@ int criu_set_parent_images(const char *path)
>  	return criu_local_set_parent_images(global_opts, path);
>  }
>  
> +int criu_local_set_pre_dump_mode(criu_opts *opts, enum criu_pre_dump_mode mode)
> +{
> +	opts->rpc->has_pre_dump_mode = true;
> +	if (mode == CRIU_PRE_DUMP_SPLICE || mode == CRIU_PRE_DUMP_READ) {
> +		opts->rpc->pre_dump_mode = mode;
> +		return 0;
> +	}
> +	return -1;
> +}
> +
> +int criu_set_pre_dump_mode(enum criu_pre_dump_mode mode)
> +{
> +	return criu_local_set_pre_dump_mode(global_opts, mode);
> +}
> +
>  void criu_local_set_track_mem(criu_opts *opts, bool track_mem)
>  {
>  	opts->rpc->has_track_mem = true;
> diff --git a/lib/c/criu.h b/lib/c/criu.h
> index cb37c52..22db0fd 100644
> --- a/lib/c/criu.h
> +++ b/lib/c/criu.h
> @@ -43,6 +43,11 @@ enum criu_cg_mode {
>  	CRIU_CG_MODE_DEFAULT,
>  };
>  
> +enum criu_pre_dump_mode {
> +	CRIU_PRE_DUMP_SPLICE =	1,
> +	CRIU_PRE_DUMP_READ =	2
> +};
> +
>  int criu_set_service_address(const char *path);
>  void criu_set_service_fd(int fd);
>  int criu_set_service_binary(const char *path);
> @@ -95,6 +100,7 @@ int criu_add_irmap_path(const char *path);
>  int criu_add_inherit_fd(int fd, const char *key);
>  int criu_add_external(const char *key);
>  int criu_set_page_server_address_port(const char *address, int port);
> +int criu_set_pre_dump_mode(enum criu_pre_dump_mode mode);
>  
>  /*
>   * The criu_notify_arg_t na argument is an opaque
> @@ -211,6 +217,7 @@ int criu_local_add_cg_yard(criu_opts *opts, const char *path);
>  int criu_local_add_inherit_fd(criu_opts *opts, int fd, const char *key);
>  int criu_local_add_external(criu_opts *opts, const char *key);
>  int criu_local_set_page_server_address_port(criu_opts *opts, const char *address, int port);
> +int criu_local_set_pre_dump_mode(criu_opts *opts, enum criu_pre_dump_mode mode);
>  
>  void criu_local_set_notify_cb(criu_opts *opts, int (*cb)(char *action, criu_notify_arg_t na));
>  
> diff --git a/test/zdtm.py b/test/zdtm.py
> index 98d113f..7fdb8a3 100755
> --- a/test/zdtm.py
> +++ b/test/zdtm.py
> @@ -1020,6 +1020,7 @@ class criu:
>          self.__tls = self.__tls_options() if opts['tls'] else []
>          self.__criu_bin = opts['criu_bin']
>          self.__crit_bin = opts['crit_bin']
> +        self.__pre_dump_mode = opts['pre_dump_mode']
>  
>      def fini(self):
>          if self.__lazy_migrate:
> @@ -1276,6 +1277,8 @@ class criu:
>              a_opts += ['--leave-stopped']
>          if self.__empty_ns:
>              a_opts += ['--empty-ns', 'net']
> +        if self.__pre_dump_mode:
> +            a_opts += ["--pre-dump-mode", "%s" % self.__pre_dump_mode]
>  
>          nowait = False
>          if self.__lazy_migrate and action == "dump":
> @@ -1865,7 +1868,7 @@ class Launcher:
>                'sat', 'script', 'rpc', 'lazy_pages', 'join_ns', 'dedup', 'sbs',
>                'freezecg', 'user', 'dry_run', 'noauto_dedup',
>                'remote_lazy_pages', 'show_stats', 'lazy_migrate', 'remote',
> -              'tls', 'criu_bin', 'crit_bin')
> +              'tls', 'criu_bin', 'crit_bin', 'pre_dump_mode')
>          arg = repr((name, desc, flavor, {d: self.__opts[d] for d in nd}))
>  
>          if self.__use_log:
> @@ -2512,6 +2515,10 @@ rp.add_argument("--criu-bin",
>  rp.add_argument("--crit-bin",
>                  help="Path to crit binary",
>                  default='../crit/crit')
> +rp.add_argument("--pre-dump-mode",
> +                help="Use splice or read mode of pre-dumping",
> +                choices=['splice', 'read'],
> +                default='splice')
>  
>  lp = sp.add_parser("list", help="List tests")
>  lp.set_defaults(action=list_tests)
> -- 
> 2.7.4
>
Abhishek Dubey Sept. 27, 2019, 9:32 a.m.
Actually, I must move this call to patch 6/9, where it must be

ret = page_xfer_predump_pages(item->pid->real, &xfer, mem_pp);

On 25/09/19 9:45 PM, Andrei Vagin wrote:
> On Sun, Sep 22, 2019 at 09:54:50AM +0530, Abhishek Dubey wrote:
>> Two modes of pre-dump algorithm:
>>      1) splicing memory by parasite
>>          --pre-dump-mode=splice (default)
>>      2) using process_vm_readv syscall
>>          --pre-dump-mode=read
>>
>> Signed-off-by: Abhishek Dubey <dubeyabhishek777@gmail.com>
>> ---
>>   Documentation/criu.txt    |  6 ++++++
>>   criu/config.c             | 10 ++++++++++
>>   criu/cr-dump.c            |  6 +++++-
>>   criu/cr-service.c         | 13 +++++++++++++
>>   criu/crtools.c            |  2 ++
>>   criu/include/cr_options.h |  7 +++++++
>>   criu/mem.c                | 13 ++++++++++++-
>>   images/rpc.proto          |  6 ++++++
>>   lib/c/criu.c              | 15 +++++++++++++++
>>   lib/c/criu.h              |  7 +++++++
>>   test/zdtm.py              |  9 ++++++++-
>>   11 files changed, 91 insertions(+), 3 deletions(-)
>>
>> diff --git a/Documentation/criu.txt b/Documentation/criu.txt
>> index 28913a7..2729bc9 100644
>> --- a/Documentation/criu.txt
>> +++ b/Documentation/criu.txt
>> @@ -156,6 +156,12 @@ In addition, *page-server* options may be specified.
>>       Turn on memory changes tracker in the kernel. If the option is
>>       not passed the memory tracker get turned on implicitly.
>>   
>> +*--pre-dump-mode*='mode'::
>> +    There are two 'mode' to operate pre-dump algorithm. The 'splice' mode
>> +    is parasite based, whereas 'read' mode is based on process_vm_readv
>> +    syscall. The 'read' mode incurs reduced frozen time and reduced
>> +    memory pressure as compared to 'splice' mode. Default is 'splice' mode.
>> +
>>   *dump*
>>   ~~~~~~
>>   Performs a checkpoint procedure.
>> diff --git a/criu/config.c b/criu/config.c
>> index 1a6d014..b252095 100644
>> --- a/criu/config.c
>> +++ b/criu/config.c
>> @@ -276,6 +276,7 @@ void init_opts(void)
>>   	opts.empty_ns = 0;
>>   	opts.status_fd = -1;
>>   	opts.log_level = DEFAULT_LOGLEVEL;
>> +	opts.pre_dump_mode = PRE_DUMP_SPLICE;
>>   }
>>   
>>   bool deprecated_ok(char *what)
>> @@ -518,6 +519,7 @@ int parse_options(int argc, char **argv, bool *usage_error,
>>   		BOOL_OPT("tls", &opts.tls),
>>   		{"tls-no-cn-verify",		no_argument,		&opts.tls_no_cn_verify, true},
>>   		{ "cgroup-yard",		required_argument,	0, 1096 },
>> +		{ "pre-dump-mode",		required_argument,	0, 1097},
>>   		{ },
>>   	};
>>   
>> @@ -819Actually, I must move this call to patch 6/9,6 +821,14 @@ int parse_options(int argc, char **argv, bool *usage_error,
>>   		case 1096:
>>   			SET_CHAR_OPTS(cgroup_yard, optarg);
>>   			break;
>> +		case 1097:
>> +			if (!strcmp("read", optarg)) {
>> +				opts.pre_dump_mode = PRE_DUMP_READ;
>> +			} else if (strcmp("splice", optarg)) {
>> +				pr_err("Unable to parse value of --pre-dump-mode\n");
>> +				return 1;
>> +			}
>> +			break;
>>   		case 'V':
>>   			pr_msg("Version: %s\n", CRIU_VERSION);
>>   			if (strcmp(CRIU_GITID, "0"))
>> diff --git a/criu/cr-dump.c b/criu/cr-dump.c
>> index 119c82d..d4adfad 100644
>> --- a/criu/cr-dump.c
>> +++ b/criu/cr-dump.c
>> @@ -1511,7 +1511,11 @@ static int cr_pre_dump_finish(int status)
>>   			goto err;
>>   
>>   		mem_pp = dmpi(item)->mem_pp;
>> -		ret = page_xfer_dump_pages(&xfer, mem_pp);
>> +
>> +		if (opts.pre_dump_mode == PRE_DUMP_READ)
>> +			ret = 0;  /* Replace with call to optimized pre-dump */
> I don't understand what this comment means..
>
>> +		else
>> +			ret = page_xfer_dump_pages(&xfer, mem_pp);
>>   
>>   		xfer.close(&xfer);
>>   
>> diff --git a/criu/cr-service.c b/criu/cr-service.c
>> index 95ba2e5..392e9ac 100644
>> --- a/criu/cr-service.c
>> +++ b/criu/cr-service.c
>> @@ -473,6 +473,19 @@ static int setup_opts_from_req(int sk, CriuOpts *req)
>>   		opts.lazy_pages = req->lazy_pages;
>>   	}
>>   
>> +	if (req->has_pre_dump_mode) {
>> +		switch (req->pre_dump_mode) {
>> +			case CRIU_PRE_DUMP_MODE__SPLICE:
>> +				opts.pre_dump_mode = PRE_DUMP_SPLICE;
>> +				break;
>> +			case CRIU_PRE_DUMP_MODE__READ:
>> +				opts.pre_dump_mode = PRE_DUMP_READ;
>> +				break;
>> +			default:
>> +				goto err;
>> +		}
>> +	}
>> +
>>   	if (req->ps) {
>>   		opts.port = (short)req->ps->port;
>>   
>> diff --git a/criu/crtools.c b/criu/crtools.c
>> index c0de1c0..d908e7f 100644
>> --- a/criu/crtools.c
>> +++ b/criu/crtools.c
>> @@ -446,6 +446,8 @@ usage:
>>   "                        pages images of previous dump\n"
>>   "                        when used on restore, as soon as page is restored, it\n"
>>   "                        will be punched from the image\n"
>> +"  --pre-dump-mode       splice - parasite based pre-dumping (default)\n"
>> +"                        read   - process_vm_readv syscall based pre-dumping\n"
>>   "\n"
>>   "Page/Service server options:\n"
>>   "  --address ADDR        address of server or service\n"
>> diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h
>> index 5cbc56f..53bd5ed 100644
>> --- a/criu/include/cr_options.h
>> +++ b/criu/include/cr_options.h
>> @@ -39,6 +39,12 @@ struct cg_root_opt {
>>   };
>>   
>>   /*
>> + * Pre-dump variants
>> + */
>> +#define PRE_DUMP_SPLICE		1		/* Pre-dump using parasite */
>> +#define PRE_DUMP_READ			2		/* Pre-dump using process_vm_readv syscall */
>> +
>> +/*
>>    * Cgroup management options.
>>    */
>>   #define CG_MODE_IGNORE		(0u << 0)	/* Zero is important here */
>> @@ -81,6 +87,7 @@ struct cr_options {
>>   	int			evasive_devices;
>>   	int			link_remap_ok;
>>   	int			log_file_per_pid;
>> +	int			pre_dump_mode;
>>   	bool			swrk_restore;
>>   	char			*output;
>>   	char			*root;
>> diff --git a/criu/mem.c b/criu/mem.c
>> index de66a62..911b9d2 100644
>> --- a/criu/mem.c
>> +++ b/criu/mem.c
>> @@ -482,7 +482,18 @@ static int __parasite_dump_pages_seized(struct pstree_item *item,
>>   	if (mdc->lazy)
>>   		memcpy(pargs_iovs(args), pp->iovs,
>>   		       sizeof(struct iovec) * pp->nr_iovs);
>> -	ret = drain_pages(pp, ctl, args);
>> +
>> +	/*
>> +	 * Faking drain_pages for pre-dump here. Actual drain_pages for pre-dump
>> +	 * will happen after task unfreezing in cr_pre_dump_finish(). This is
>> +	 * actual optimization which reduces time for which process was frozen
>> +	 * during pre-dump.
>> +	 */
>> +	if (mdc->pre_dump && opts.pre_dump_mode == PRE_DUMP_READ)
>> +		ret = 0;
>> +	else
>> +		ret = drain_pages(pp, ctl, args);
>> +
>>   	if (!ret && !mdc->pre_dump)
>>   		ret = xfer_pages(pp, &xfer);
>>   	if (ret)
>> diff --git a/images/rpc.proto b/images/rpc.proto
>> index c402259..fc2f1bc 100644
>> --- a/images/rpc.proto
>> +++ b/images/rpc.proto
>> @@ -47,6 +47,11 @@ enum criu_cg_mode {
>>   	DEFAULT = 6;
>>   };
>>   
>> +enum criu_pre_dump_mode {
>> +	SPLICE = 	1;
>> +	READ =		2;
>> +};
>> +
>>   message criu_opts {
>>   	required int32			images_dir_fd	= 1;
>>   	optional int32			pid		= 2; /* if not set on dump, will dump requesting process */
>> @@ -121,6 +126,7 @@ message criu_opts {
>>   	optional bool			tls			= 58;
>>   	optional bool			tls_no_cn_verify	= 59;
>>   	optional string			cgroup_yard		= 60;
>> +	optional criu_pre_dump_mode	pre_dump_mode		= 61 [default = SPLICE];
>>   /*	optional bool			check_mounts		= 128;	*/
>>   }
>>   
>> diff --git a/lib/c/criu.c b/lib/c/criu.c
>> index 14ddff2..fffb9fd 100644
>> --- a/lib/c/criu.c
>> +++ b/lib/c/criu.c
>> @@ -336,6 +336,21 @@ int criu_set_parent_images(const char *path)
>>   	return criu_local_set_parent_images(global_opts, path);
>>   }
>>   
>> +int criu_local_set_pre_dump_mode(criu_opts *opts, enum criu_pre_dump_mode mode)
>> +{
>> +	opts->rpc->has_pre_dump_mode = true;
>> +	if (mode == CRIU_PRE_DUMP_SPLICE || mode == CRIU_PRE_DUMP_READ) {
>> +		opts->rpc->pre_dump_mode = mode;
>> +		return 0;
>> +	}
>> +	return -1;
>> +}
>> +
>> +int criu_set_pre_dump_mode(enum criu_pre_dump_mode mode)
>> +{
>> +	return criu_local_set_pre_dump_mode(global_opts, mode);
>> +}
>> +
>>   void criu_local_set_track_mem(criu_opts *opts, bool track_mem)
>>   {
>>   	opts->rpc->has_track_mem = true;
>> diff --git a/lib/c/criu.h b/lib/c/criu.h
>> index cb37c52..22db0fd 100644
>> --- a/lib/c/criu.h
>> +++ b/lib/c/criu.h
>> @@ -43,6 +43,11 @@ enum criu_cg_mode {
>>   	CRIU_CG_MODE_DEFAULT,
>>   };
>>   
>> +enum criu_pre_dump_mode {
>> +	CRIU_PRE_DUMP_SPLICE =	1,
>> +	CRIU_PRE_DUMP_READ =	2
>> +};
>> +
>>   int criu_set_service_address(const char *path);
>>   void criu_set_service_fd(int fd);
>>   int criu_set_service_binary(const char *path);
>> @@ -95,6 +100,7 @@ int criu_add_irmap_path(const char *path);
>>   int criu_add_inherit_fd(int fd, const char *key);
>>   int criu_add_external(const char *key);
>>   int criu_set_page_server_address_port(const char *address, int port);
>> +int criu_set_pre_dump_mode(enum criu_pre_dump_mode mode);
>>   
>>   /*
>>    * The criu_notify_arg_t na argument is an opaque
>> @@ -211,6 +217,7 @@ int criu_local_add_cg_yard(criu_opts *opts, const char *path);
>>   int criu_local_add_inherit_fd(criu_opts *opts, int fd, const char *key);
>>   int criu_local_add_external(criu_opts *opts, const char *key);
>>   int criu_local_set_page_server_address_port(criu_opts *opts, const char *address, int port);
>> +int criu_local_set_pre_dump_mode(criu_opts *opts, enum criu_pre_dump_mode mode);
>>   
>>   void criu_local_set_notify_cb(criu_opts *opts, int (*cb)(char *action, criu_notify_arg_t na));
>>   
>> diff --git a/test/zdtm.py b/test/zdtm.py
>> index 98d113f..7fdb8a3 100755
>> --- a/test/zdtm.py
>> +++ b/test/zdtm.py
>> @@ -1020,6 +1020,7 @@ class criu:
>>           self.__tls = self.__tls_options() if opts['tls'] else []
>>           self.__criu_bin = opts['criu_bin']
>>           self.__crit_bin = opts['crit_bin']
>> +        self.__pre_dump_mode = opts['pre_dump_mode']
>>   
>>       def fini(self):
>>           if self.__lazy_migrate:
>> @@ -1276,6 +1277,8 @@ class criu:
>>               a_opts += ['--leave-stopped']
>>           if self.__empty_ns:
>>               a_opts += ['--empty-ns', 'net']
>> +        if self.__pre_dump_mode:
>> +            a_opts += ["--pre-dump-mode", "%s" % self.__pre_dump_mode]
>>   
>>           nowait = False
>>           if self.__lazy_migrate and action == "dump":
>> @@ -1865,7 +1868,7 @@ class Launcher:
>>                 'sat', 'script', 'rpc', 'lazy_pages', 'join_ns', 'dedup', 'sbs',
>>                 'freezecg', 'user', 'dry_run', 'noauto_dedup',
>>                 'remote_lazy_pages', 'show_stats', 'lazy_migrate', 'remote',
>> -              'tls', 'criu_bin', 'crit_bin')
>> +              'tls', 'criu_bin', 'crit_bin', 'pre_dump_mode')
>>           arg = repr((name, desc, flavor, {d: self.__opts[d] for d in nd}))
>>   
>>           if self.__use_log:
>> @@ -2512,6 +2515,10 @@ rp.add_argument("--criu-bin",
>>   rp.add_argument("--crit-bin",
>>                   help="Path to crit binary",
>>                   default='../crit/crit')
>> +rp.add_argument("--pre-dump-mode",
>> +                help="Use splice or read mode of pre-dumping",
>> +                choices=['splice', 'read'],
>> +                default='splice')
>>   
>>   lp = sp.add_parser("list", help="List tests")
>>   lp.set_defaults(action=list_tests)
>> -- 
>> 2.7.4
>>