[9/7] Added --mix-pre-dump option for zdtm suite

Submitted by Abhishek Dubey on Aug. 31, 2019, 12:12 p.m.

Details

Message ID 1567253573-25318-1-git-send-email-dubeyabhishek777@gmail.com
State New
Series "Series without cover letter"
Headers show

Commit Message

Abhishek Dubey Aug. 31, 2019, 12:12 p.m.
READ and SPLICE mode pre-dumps can happen
in any order. New option --mix-pre-dump is
added for zdtm suite only, to test integrity
of random order of pre-dump modes.

Signed-off-by: Abhishek Dubey <dubeyabhishek777@gmail.com>
---
 criu/cr-dump.c       |  4 ++--
 criu/cr-restore.c    |  4 ++--
 criu/include/stats.h |  4 ++--
 criu/mem.c           | 65 ++++++++++++++++++++++++++++++++++++++++------------
 criu/stats.c         | 42 ++++++++++++++++++++++++++++++++-
 images/stats.proto   |  1 +
 test/zdtm.py         | 13 ++++++++++-
 7 files changed, 110 insertions(+), 23 deletions(-)

Patch hide | download patch | download mbox

diff --git a/criu/cr-dump.c b/criu/cr-dump.c
index b916e0a..f62362f 100644
--- a/criu/cr-dump.c
+++ b/criu/cr-dump.c
@@ -1554,7 +1554,7 @@  err:
 	if (ret)
 		pr_err("Pre-dumping FAILED.\n");
 	else {
-		write_stats(DUMP_STATS);
+		write_stats(DUMP_STATS, 1);
 		pr_info("Pre-dumping finished successfully\n");
 	}
 	return ret;
@@ -1760,7 +1760,7 @@  static int cr_dump_finish(int ret)
 	if (ret) {
 		pr_err("Dumping FAILED.\n");
 	} else {
-		write_stats(DUMP_STATS);
+		write_stats(DUMP_STATS, 0);
 		pr_info("Dumping finished successfully\n");
 	}
 	return post_dump_ret ? : (ret != 0);
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index de0b2cb..8ca46e4 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -2226,7 +2226,7 @@  skip_ns_bouncing:
 	if (ret != 0) {
 		pr_err("Aborting restore due to post-restore script ret code %d\n", ret);
 		timing_stop(TIME_RESTORE);
-		write_stats(RESTORE_STATS);
+		write_stats(RESTORE_STATS, 0);
 		goto out_kill;
 	}
 
@@ -2292,7 +2292,7 @@  skip_ns_bouncing:
 	/* Detaches from processes and they continue run through sigreturn. */
 	finalize_restore_detach(ret);
 
-	write_stats(RESTORE_STATS);
+	write_stats(RESTORE_STATS, 0);
 
 	ret = run_scripts(ACT_POST_RESUME);
 	if (ret != 0)
diff --git a/criu/include/stats.h b/criu/include/stats.h
index 5d408b7..e6f86cb 100644
--- a/criu/include/stats.h
+++ b/criu/include/stats.h
@@ -51,6 +51,6 @@  extern void cnt_sub(int c, unsigned long val);
 #define RESTORE_STATS	2
 
 extern int init_stats(int what);
-extern void write_stats(int what);
-
+extern void write_stats(int what, int pre_dump);
+extern int get_parent_pre_dump_type();
 #endif /* __CR_STATS_H__ */
diff --git a/criu/mem.c b/criu/mem.c
index 15094f7..0cfa4c2 100644
--- a/criu/mem.c
+++ b/criu/mem.c
@@ -351,7 +351,8 @@  static int generate_vma_iovs(struct pstree_item *item, struct vma_area *vma,
 			     struct page_pipe *pp, struct page_xfer *xfer,
 			     struct parasite_dump_pages_args *args,
 			     struct parasite_ctl *ctl, pmc_t *pmc,
-			     bool has_parent, bool pre_dump)
+			     bool has_parent, bool pre_dump,
+			     int parent_predump_mode)
 {
 	u64 off = 0;
 	u64 *map;
@@ -362,17 +363,47 @@  static int generate_vma_iovs(struct pstree_item *item, struct vma_area *vma,
 		return 0;
 
 	/*
-	 * process_vm_readv syscall can't copy memory regions lacking
-	 * PROT_READ flag. Therefore, avoid generating iovs for such
-	 * regions in "read" mode pre-dump. Regions skipped by pre-dumps
-	 * can't be referred as parent by following dump stage. So, mark
-	 * "has_parent=false" for such regions.
+	 * To facilitate any combination of pre-dump modes to run after
+	 * one another, we need to take extra care as discussed below.
+	 *
+	 * The SPLICE mode pre-dump, processes all type of memory regions,
+	 * whereas READ mode pre-dump skips processing those memory regions
+	 * which lacks PROT_READ flag.
+	 *
+	 * Now on mixing pre-dump modes:
+	 * 	If SPLICE mode follows SPLICE mode	: no issue
+	 *		-> everything dumped both the times
+	 *
+	 * 	If READ mode follows READ mode		: no issue
+	 *		-> non-PROT_READ skipped both the time
+	 *
+	 * 	If READ mode follows SPLICE mode   	: no issue
+	 *		-> everything dumped at first,
+	 *		   the non-PROT_READ skipped later
+	 *
+	 * 	If SPLICE mode follows READ mode   	: Need special care
+	 *
+	 * If READ pre-dump happens first then it has skipped processing
+	 * non-PROT_READ regions. Following SPLICE pre-dump expects pagemap
+	 * entries for all mappings in parent pagemap, but last READ mode
+	 * pre-dump has skipped processing and pagemap generation for
+	 * non-PROT_READ regions. So SPLICE mode throws error of missing
+	 * pagemap entry for non-PROT_READ mapping.
+	 *
+	 * To resolve this, the mode of pre-dump is stored in pre-dump's
+	 * stats file. This mode is read back from stats file during next
+	 * pre-dump.
+	 * If parent-pre-dump and next pre-dump turns out in READ-mode -->
+	 * SPLICE-mode order, then SPLICE mode doesn't expect mappings for
+	 * non-PROT_READ regions in parent-image and marks "has_parent=false".
 	 */
-	if (opts.pre_dump_mode == PRE_DUMP_READ &&
-	                          !(vma->e->prot & PROT_READ)) {
-		if (pre_dump)
+
+	if (!(vma->e->prot & PROT_READ)) {
+		if (opts.pre_dump_mode == PRE_DUMP_READ && pre_dump)
 			return 0;
-		has_parent = false;
+		if ((parent_predump_mode == PRE_DUMP_READ &&
+			opts.pre_dump_mode == PRE_DUMP_SPLICE) || !pre_dump)
+			has_parent = false;
 	}
 
 	if (vma_entry_is(vma->e, VMA_AREA_AIORING)) {
@@ -420,6 +451,7 @@  static int __parasite_dump_pages_seized(struct pstree_item *item,
 	unsigned long pmc_size;
 	int possible_pid_reuse = 0;
 	bool has_parent;
+	int parent_predump_mode;
 
 	pr_info("\n");
 	pr_info("Dumping pages (type: %d pid: %d)\n", CR_FD_PAGES, item->pid->real);
@@ -486,9 +518,12 @@  static int __parasite_dump_pages_seized(struct pstree_item *item,
 	 */
 	args->off = 0;
 	has_parent = !!xfer.parent && !possible_pid_reuse;
+	parent_predump_mode = get_parent_pre_dump_type();
+
 	list_for_each_entry(vma_area, &vma_area_list->h, list) {
 		ret = generate_vma_iovs(item, vma_area, pp, &xfer, args, ctl,
-					&pmc, has_parent, mdc->pre_dump);
+					&pmc, has_parent, mdc->pre_dump,
+					parent_predump_mode);
 		if (ret < 0)
 			goto out_xfer;
 	}
@@ -593,11 +628,11 @@  int parasite_dump_pages_seized(struct pstree_item *item,
 			pr_err("Can't dump unprotect vmas with parasite\n");
 			return ret;
 		}
+	}
 
-		if (fault_injected(FI_DUMP_PAGES)) {
-			pr_err("fault: Dump VMA pages failure!\n");
-			return -1;
-		}
+	if (fault_injected(FI_DUMP_PAGES)) {
+		pr_err("fault: Dump VMA pages failure!\n");
+		return -1;
 	}
 
 	ret = __parasite_dump_pages_seized(item, pargs, vma_area_list, mdc, ctl);
diff --git a/criu/stats.c b/criu/stats.c
index cb52801..4e1ff08 100644
--- a/criu/stats.c
+++ b/criu/stats.c
@@ -149,7 +149,7 @@  static void display_stats(int what, StatsEntry *stats)
 		return;
 }
 
-void write_stats(int what)
+void write_stats(int what, int pre_dump)
 {
 	StatsEntry stats = STATS_ENTRY__INIT;
 	DumpStatsEntry ds_entry = DUMP_STATS_ENTRY__INIT;
@@ -183,6 +183,8 @@  void write_stats(int what)
 		ds_entry.has_shpages_skipped_parent = true;
 		ds_entry.shpages_written = dstats->counts[CNT_SHPAGES_WRITTEN];
 		ds_entry.has_shpages_written = true;
+		ds_entry.pre_dump_mode = opts.pre_dump_mode;
+		ds_entry.has_pre_dump_mode = pre_dump;
 
 		name = "dump";
 	} else if (what == RESTORE_STATS) {
@@ -228,3 +230,41 @@  int init_stats(int what)
 	rstats = shmalloc(sizeof(struct restore_stats));
 	return rstats ? 0 : -1;
 }
+
+int get_parent_pre_dump_type(void)
+{
+        struct cr_img *img;
+        StatsEntry *stats;
+        int dir;
+        unsigned int parent_pre_dump_mode = -1;
+
+        dir = openat(get_service_fd(IMG_FD_OFF), CR_PARENT_LINK, O_RDONLY);
+        if (dir == -1) {
+                if (errno != ENOENT)
+                        pr_err("Failed to open parent directory for \
+					pre-dump mode reading\n");
+                return parent_pre_dump_mode;
+        }
+
+        img = open_image_at(dir, CR_FD_STATS, O_RSTR, "dump");
+        if (!img) {
+                pr_err("Failed to open parent pre-dump stats image\n");
+                close(dir);
+                return parent_pre_dump_mode;
+        }
+
+        if (pb_read_one(img, &stats, PB_STATS) < 0) {
+                pr_err("Failed to read parent pre-dump stat entry\n");
+                close_image(img);
+                close(dir);
+                return parent_pre_dump_mode;
+        }
+
+        if (stats->dump->has_pre_dump_mode) {
+                parent_pre_dump_mode = (int)stats->dump->pre_dump_mode;
+        }
+
+        close_image(img);
+        close(dir);
+        return parent_pre_dump_mode;
+}
diff --git a/images/stats.proto b/images/stats.proto
index 68d2f1b..aef3b16 100644
--- a/images/stats.proto
+++ b/images/stats.proto
@@ -20,6 +20,7 @@  message dump_stats_entry {
 	optional uint64			shpages_scanned		= 12;
 	optional uint64			shpages_skipped_parent	= 13;
 	optional uint64			shpages_written		= 14;
+	optional uint32			pre_dump_mode		= 15;
 }
 
 message restore_stats_entry {
diff --git a/test/zdtm.py b/test/zdtm.py
index ca8b165..1f401a6 100755
--- a/test/zdtm.py
+++ b/test/zdtm.py
@@ -1021,6 +1021,7 @@  class criu:
         self.__criu_bin = opts['criu_bin']
         self.__crit_bin = opts['crit_bin']
         self.__pre_dump_mode = opts['pre_dump_mode']
+        self.__mix_pre_dump = bool(opts['mix_pre_dump'])
 
     def fini(self):
         if self.__lazy_migrate:
@@ -1279,6 +1280,13 @@  class criu:
             a_opts += ['--empty-ns', 'net']
         if self.__pre_dump_mode:
             a_opts += ["--pre-dump-mode", "%s" % self.__pre_dump_mode]
+        if self.__mix_pre_dump:
+            if(random.randrange(1, 1000, 5) % 2):
+                print("Mix-mode selected: splice")
+                a_opts += ["--pre-dump-mode", "splice"]
+            else:
+                print("Mix-mode selected: read")
+                a_opts += ["--pre-dump-mode", "read"]
 
         nowait = False
         if self.__lazy_migrate and action == "dump":
@@ -1868,7 +1876,7 @@  class Launcher:
               'sat', 'script', 'rpc', 'lazy_pages', 'join_ns', 'dedup', 'sbs',
               'freezecg', 'user', 'dry_run', 'noauto_dedup',
               'remote_lazy_pages', 'show_stats', 'lazy_migrate', 'remote',
-              'tls', 'criu_bin', 'crit_bin', 'pre_dump_mode')
+              'tls', 'criu_bin', 'crit_bin', 'pre_dump_mode', 'mix_pre_dump')
         arg = repr((name, desc, flavor, {d: self.__opts[d] for d in nd}))
 
         if self.__use_log:
@@ -2519,6 +2527,9 @@  rp.add_argument("--pre-dump-mode",
                 help="Use splice or read mode of pre-dumping",
                 choices=['splice', 'read'],
                 default='splice')
+rp.add_argument("--mix-pre-dump",
+                help="mixing splice/read pre-dump modes",
+                action='store_true')
 
 lp = sp.add_parser("list", help="List tests")
 lp.set_defaults(action=list_tests)

Comments

Andrei Vagin Sept. 15, 2019, 4:44 a.m.
Could you resend the whole series?

On Sat, Aug 31, 2019 at 05:42:53PM +0530, Abhishek Dubey wrote:
> READ and SPLICE mode pre-dumps can happen
> in any order. New option --mix-pre-dump is
> added for zdtm suite only, to test integrity
> of random order of pre-dump modes.
> 
> Signed-off-by: Abhishek Dubey <dubeyabhishek777@gmail.com>
> ---
>  criu/cr-dump.c       |  4 ++--
>  criu/cr-restore.c    |  4 ++--
>  criu/include/stats.h |  4 ++--
>  criu/mem.c           | 65 ++++++++++++++++++++++++++++++++++++++++------------
>  criu/stats.c         | 42 ++++++++++++++++++++++++++++++++-
>  images/stats.proto   |  1 +
>  test/zdtm.py         | 13 ++++++++++-
>  7 files changed, 110 insertions(+), 23 deletions(-)
> 
> diff --git a/criu/cr-dump.c b/criu/cr-dump.c
> index b916e0a..f62362f 100644
> --- a/criu/cr-dump.c
> +++ b/criu/cr-dump.c
> @@ -1554,7 +1554,7 @@ err:
>  	if (ret)
>  		pr_err("Pre-dumping FAILED.\n");
>  	else {
> -		write_stats(DUMP_STATS);
> +		write_stats(DUMP_STATS, 1);
>  		pr_info("Pre-dumping finished successfully\n");
>  	}
>  	return ret;
> @@ -1760,7 +1760,7 @@ static int cr_dump_finish(int ret)
>  	if (ret) {
>  		pr_err("Dumping FAILED.\n");
>  	} else {
> -		write_stats(DUMP_STATS);
> +		write_stats(DUMP_STATS, 0);
>  		pr_info("Dumping finished successfully\n");
>  	}
>  	return post_dump_ret ? : (ret != 0);
> diff --git a/criu/cr-restore.c b/criu/cr-restore.c
> index de0b2cb..8ca46e4 100644
> --- a/criu/cr-restore.c
> +++ b/criu/cr-restore.c
> @@ -2226,7 +2226,7 @@ skip_ns_bouncing:
>  	if (ret != 0) {
>  		pr_err("Aborting restore due to post-restore script ret code %d\n", ret);
>  		timing_stop(TIME_RESTORE);
> -		write_stats(RESTORE_STATS);
> +		write_stats(RESTORE_STATS, 0);
>  		goto out_kill;
>  	}
>  
> @@ -2292,7 +2292,7 @@ skip_ns_bouncing:
>  	/* Detaches from processes and they continue run through sigreturn. */
>  	finalize_restore_detach(ret);
>  
> -	write_stats(RESTORE_STATS);
> +	write_stats(RESTORE_STATS, 0);
>  
>  	ret = run_scripts(ACT_POST_RESUME);
>  	if (ret != 0)
> diff --git a/criu/include/stats.h b/criu/include/stats.h
> index 5d408b7..e6f86cb 100644
> --- a/criu/include/stats.h
> +++ b/criu/include/stats.h
> @@ -51,6 +51,6 @@ extern void cnt_sub(int c, unsigned long val);
>  #define RESTORE_STATS	2
>  
>  extern int init_stats(int what);
> -extern void write_stats(int what);
> -
> +extern void write_stats(int what, int pre_dump);
> +extern int get_parent_pre_dump_type();
>  #endif /* __CR_STATS_H__ */
> diff --git a/criu/mem.c b/criu/mem.c
> index 15094f7..0cfa4c2 100644
> --- a/criu/mem.c
> +++ b/criu/mem.c
> @@ -351,7 +351,8 @@ static int generate_vma_iovs(struct pstree_item *item, struct vma_area *vma,
>  			     struct page_pipe *pp, struct page_xfer *xfer,
>  			     struct parasite_dump_pages_args *args,
>  			     struct parasite_ctl *ctl, pmc_t *pmc,
> -			     bool has_parent, bool pre_dump)
> +			     bool has_parent, bool pre_dump,
> +			     int parent_predump_mode)
>  {
>  	u64 off = 0;
>  	u64 *map;
> @@ -362,17 +363,47 @@ static int generate_vma_iovs(struct pstree_item *item, struct vma_area *vma,
>  		return 0;
>  
>  	/*
> -	 * process_vm_readv syscall can't copy memory regions lacking
> -	 * PROT_READ flag. Therefore, avoid generating iovs for such
> -	 * regions in "read" mode pre-dump. Regions skipped by pre-dumps
> -	 * can't be referred as parent by following dump stage. So, mark
> -	 * "has_parent=false" for such regions.
> +	 * To facilitate any combination of pre-dump modes to run after
> +	 * one another, we need to take extra care as discussed below.
> +	 *
> +	 * The SPLICE mode pre-dump, processes all type of memory regions,
> +	 * whereas READ mode pre-dump skips processing those memory regions
> +	 * which lacks PROT_READ flag.
> +	 *
> +	 * Now on mixing pre-dump modes:
> +	 * 	If SPLICE mode follows SPLICE mode	: no issue
> +	 *		-> everything dumped both the times
> +	 *
> +	 * 	If READ mode follows READ mode		: no issue
> +	 *		-> non-PROT_READ skipped both the time
> +	 *
> +	 * 	If READ mode follows SPLICE mode   	: no issue
> +	 *		-> everything dumped at first,
> +	 *		   the non-PROT_READ skipped later
> +	 *
> +	 * 	If SPLICE mode follows READ mode   	: Need special care
> +	 *
> +	 * If READ pre-dump happens first then it has skipped processing
> +	 * non-PROT_READ regions. Following SPLICE pre-dump expects pagemap
> +	 * entries for all mappings in parent pagemap, but last READ mode
> +	 * pre-dump has skipped processing and pagemap generation for
> +	 * non-PROT_READ regions. So SPLICE mode throws error of missing
> +	 * pagemap entry for non-PROT_READ mapping.
> +	 *
> +	 * To resolve this, the mode of pre-dump is stored in pre-dump's
> +	 * stats file. This mode is read back from stats file during next
> +	 * pre-dump.
> +	 * If parent-pre-dump and next pre-dump turns out in READ-mode -->
> +	 * SPLICE-mode order, then SPLICE mode doesn't expect mappings for
> +	 * non-PROT_READ regions in parent-image and marks "has_parent=false".
>  	 */
> -	if (opts.pre_dump_mode == PRE_DUMP_READ &&
> -	                          !(vma->e->prot & PROT_READ)) {
> -		if (pre_dump)
> +
> +	if (!(vma->e->prot & PROT_READ)) {
> +		if (opts.pre_dump_mode == PRE_DUMP_READ && pre_dump)
>  			return 0;
> -		has_parent = false;
> +		if ((parent_predump_mode == PRE_DUMP_READ &&
> +			opts.pre_dump_mode == PRE_DUMP_SPLICE) || !pre_dump)
> +			has_parent = false;
>  	}
>  
>  	if (vma_entry_is(vma->e, VMA_AREA_AIORING)) {
> @@ -420,6 +451,7 @@ static int __parasite_dump_pages_seized(struct pstree_item *item,
>  	unsigned long pmc_size;
>  	int possible_pid_reuse = 0;
>  	bool has_parent;
> +	int parent_predump_mode;
>  
>  	pr_info("\n");
>  	pr_info("Dumping pages (type: %d pid: %d)\n", CR_FD_PAGES, item->pid->real);
> @@ -486,9 +518,12 @@ static int __parasite_dump_pages_seized(struct pstree_item *item,
>  	 */
>  	args->off = 0;
>  	has_parent = !!xfer.parent && !possible_pid_reuse;
> +	parent_predump_mode = get_parent_pre_dump_type();
> +
>  	list_for_each_entry(vma_area, &vma_area_list->h, list) {
>  		ret = generate_vma_iovs(item, vma_area, pp, &xfer, args, ctl,
> -					&pmc, has_parent, mdc->pre_dump);
> +					&pmc, has_parent, mdc->pre_dump,
> +					parent_predump_mode);
>  		if (ret < 0)
>  			goto out_xfer;
>  	}
> @@ -593,11 +628,11 @@ int parasite_dump_pages_seized(struct pstree_item *item,
>  			pr_err("Can't dump unprotect vmas with parasite\n");
>  			return ret;
>  		}
> +	}
>  
> -		if (fault_injected(FI_DUMP_PAGES)) {
> -			pr_err("fault: Dump VMA pages failure!\n");
> -			return -1;
> -		}
> +	if (fault_injected(FI_DUMP_PAGES)) {
> +		pr_err("fault: Dump VMA pages failure!\n");
> +		return -1;
>  	}
>  
>  	ret = __parasite_dump_pages_seized(item, pargs, vma_area_list, mdc, ctl);
> diff --git a/criu/stats.c b/criu/stats.c
> index cb52801..4e1ff08 100644
> --- a/criu/stats.c
> +++ b/criu/stats.c
> @@ -149,7 +149,7 @@ static void display_stats(int what, StatsEntry *stats)
>  		return;
>  }
>  
> -void write_stats(int what)
> +void write_stats(int what, int pre_dump)
>  {
>  	StatsEntry stats = STATS_ENTRY__INIT;
>  	DumpStatsEntry ds_entry = DUMP_STATS_ENTRY__INIT;
> @@ -183,6 +183,8 @@ void write_stats(int what)
>  		ds_entry.has_shpages_skipped_parent = true;
>  		ds_entry.shpages_written = dstats->counts[CNT_SHPAGES_WRITTEN];
>  		ds_entry.has_shpages_written = true;
> +		ds_entry.pre_dump_mode = opts.pre_dump_mode;
> +		ds_entry.has_pre_dump_mode = pre_dump;
>  
>  		name = "dump";
>  	} else if (what == RESTORE_STATS) {
> @@ -228,3 +230,41 @@ int init_stats(int what)
>  	rstats = shmalloc(sizeof(struct restore_stats));
>  	return rstats ? 0 : -1;
>  }
> +
> +int get_parent_pre_dump_type(void)
> +{
> +        struct cr_img *img;
> +        StatsEntry *stats;
> +        int dir;
> +        unsigned int parent_pre_dump_mode = -1;
> +
> +        dir = openat(get_service_fd(IMG_FD_OFF), CR_PARENT_LINK, O_RDONLY);
> +        if (dir == -1) {
> +                if (errno != ENOENT)
> +                        pr_err("Failed to open parent directory for \
> +					pre-dump mode reading\n");
> +                return parent_pre_dump_mode;
> +        }
> +
> +        img = open_image_at(dir, CR_FD_STATS, O_RSTR, "dump");
> +        if (!img) {
> +                pr_err("Failed to open parent pre-dump stats image\n");
> +                close(dir);
> +                return parent_pre_dump_mode;
> +        }
> +
> +        if (pb_read_one(img, &stats, PB_STATS) < 0) {
> +                pr_err("Failed to read parent pre-dump stat entry\n");
> +                close_image(img);
> +                close(dir);
> +                return parent_pre_dump_mode;
> +        }
> +
> +        if (stats->dump->has_pre_dump_mode) {
> +                parent_pre_dump_mode = (int)stats->dump->pre_dump_mode;
> +        }
> +
> +        close_image(img);
> +        close(dir);
> +        return parent_pre_dump_mode;
> +}
> diff --git a/images/stats.proto b/images/stats.proto
> index 68d2f1b..aef3b16 100644
> --- a/images/stats.proto
> +++ b/images/stats.proto
> @@ -20,6 +20,7 @@ message dump_stats_entry {
>  	optional uint64			shpages_scanned		= 12;
>  	optional uint64			shpages_skipped_parent	= 13;
>  	optional uint64			shpages_written		= 14;
> +	optional uint32			pre_dump_mode		= 15;
>  }
>  
>  message restore_stats_entry {
> diff --git a/test/zdtm.py b/test/zdtm.py
> index ca8b165..1f401a6 100755
> --- a/test/zdtm.py
> +++ b/test/zdtm.py
> @@ -1021,6 +1021,7 @@ class criu:
>          self.__criu_bin = opts['criu_bin']
>          self.__crit_bin = opts['crit_bin']
>          self.__pre_dump_mode = opts['pre_dump_mode']
> +        self.__mix_pre_dump = bool(opts['mix_pre_dump'])
>  
>      def fini(self):
>          if self.__lazy_migrate:
> @@ -1279,6 +1280,13 @@ class criu:
>              a_opts += ['--empty-ns', 'net']
>          if self.__pre_dump_mode:
>              a_opts += ["--pre-dump-mode", "%s" % self.__pre_dump_mode]
> +        if self.__mix_pre_dump:
> +            if(random.randrange(1, 1000, 5) % 2):
> +                print("Mix-mode selected: splice")
> +                a_opts += ["--pre-dump-mode", "splice"]
> +            else:
> +                print("Mix-mode selected: read")
> +                a_opts += ["--pre-dump-mode", "read"]
>  
>          nowait = False
>          if self.__lazy_migrate and action == "dump":
> @@ -1868,7 +1876,7 @@ class Launcher:
>                'sat', 'script', 'rpc', 'lazy_pages', 'join_ns', 'dedup', 'sbs',
>                'freezecg', 'user', 'dry_run', 'noauto_dedup',
>                'remote_lazy_pages', 'show_stats', 'lazy_migrate', 'remote',
> -              'tls', 'criu_bin', 'crit_bin', 'pre_dump_mode')
> +              'tls', 'criu_bin', 'crit_bin', 'pre_dump_mode', 'mix_pre_dump')
>          arg = repr((name, desc, flavor, {d: self.__opts[d] for d in nd}))
>  
>          if self.__use_log:
> @@ -2519,6 +2527,9 @@ rp.add_argument("--pre-dump-mode",
>                  help="Use splice or read mode of pre-dumping",
>                  choices=['splice', 'read'],
>                  default='splice')
> +rp.add_argument("--mix-pre-dump",
> +                help="mixing splice/read pre-dump modes",
> +                action='store_true')
>  
>  lp = sp.add_parser("list", help="List tests")
>  lp.set_defaults(action=list_tests)
> -- 
> 2.7.4
>
Abhishek Dubey Sept. 15, 2019, 3:45 p.m.
Hi Andrei,

For final changes, the user-buffer is allocated using calloc. The buffer 
allocation uses PAGE_SHIFT for generic page-size.

Are following changes fine for different architectures (in terms of page 
size)?

https://github.com/dubeyabhishek/ultimateCRIU/commit/64717e29c9e63b41fd08ca6e965854f2ed7c59dd

On 15/09/19 10:14 AM, Andrei Vagin wrote:
> Could you resend the whole series?
>
> On Sat, Aug 31, 2019 at 05:42:53PM +0530, Abhishek Dubey wrote:
>> READ and SPLICE mode pre-dumps can happen
>> in any order. New option --mix-pre-dump is
>> added for zdtm suite only, to test integrity
>> of random order of pre-dump modes.
>>
>> Signed-off-by: Abhishek Dubey <dubeyabhishek777@gmail.com>
>> ---
>>   criu/cr-dump.c       |  4 ++--
>>   criu/cr-restore.c    |  4 ++--
>>   criu/include/stats.h |  4 ++--
>>   criu/mem.c           | 65 ++++++++++++++++++++++++++++++++++++++++------------
>>   criu/stats.c         | 42 ++++++++++++++++++++++++++++++++-
>>   images/stats.proto   |  1 +
>>   test/zdtm.py         | 13 ++++++++++-
>>   7 files changed, 110 insertions(+), 23 deletions(-)
>>
>> diff --git a/criu/cr-dump.c b/criu/cr-dump.c
>> index b916e0a..f62362f 100644
>> --- a/criu/cr-dump.c
>> +++ b/criu/cr-dump.c
>> @@ -1554,7 +1554,7 @@ err:
>>   	if (ret)
>>   		pr_err("Pre-dumping FAILED.\n");
>>   	else {
>> -		write_stats(DUMP_STATS);
>> +		write_stats(DUMP_STATS, 1);
>>   		pr_info("Pre-dumping finished successfully\n");
>>   	}
>>   	return ret;
>> @@ -1760,7 +1760,7 @@ static int cr_dump_finish(int ret)
>>   	if (ret) {
>>   		pr_err("Dumping FAILED.\n");
>>   	} else {
>> -		write_stats(DUMP_STATS);
>> +		write_stats(DUMP_STATS, 0);
>>   		pr_info("Dumping finished successfully\n");
>>   	}
>>   	return post_dump_ret ? : (ret != 0);
>> diff --git a/criu/cr-restore.c b/criu/cr-restore.c
>> index de0b2cb..8ca46e4 100644
>> --- a/criu/cr-restore.c
>> +++ b/criu/cr-restore.c
>> @@ -2226,7 +2226,7 @@ skip_ns_bouncing:
>>   	if (ret != 0) {
>>   		pr_err("Aborting restore due to post-restore script ret code %d\n", ret);
>>   		timing_stop(TIME_RESTORE);
>> -		write_stats(RESTORE_STATS);
>> +		write_stats(RESTORE_STATS, 0);
>>   		goto out_kill;
>>   	}
>>   
>> @@ -2292,7 +2292,7 @@ skip_ns_bouncing:
>>   	/* Detaches from processes and they continue run through sigreturn. */
>>   	finalize_restore_detach(ret);
>>   
>> -	write_stats(RESTORE_STATS);
>> +	write_stats(RESTORE_STATS, 0);
>>   
>>   	ret = run_scripts(ACT_POST_RESUME);
>>   	if (ret != 0)
>> diff --git a/criu/include/stats.h b/criu/include/stats.h
>> index 5d408b7..e6f86cb 100644
>> --- a/criu/include/stats.h
>> +++ b/criu/include/stats.h
>> @@ -51,6 +51,6 @@ extern void cnt_sub(int c, unsigned long val);
>>   #define RESTORE_STATS	2
>>   
>>   extern int init_stats(int what);
>> -extern void write_stats(int what);
>> -
>> +extern void write_stats(int what, int pre_dump);
>> +extern int get_parent_pre_dump_type();
>>   #endif /* __CR_STATS_H__ */
>> diff --git a/criu/mem.c b/criu/mem.c
>> index 15094f7..0cfa4c2 100644
>> --- a/criu/mem.c
>> +++ b/criu/mem.c
>> @@ -351,7 +351,8 @@ static int generate_vma_iovs(struct pstree_item *item, struct vma_area *vma,
>>   			     struct page_pipe *pp, struct page_xfer *xfer,
>>   			     struct parasite_dump_pages_args *args,
>>   			     struct parasite_ctl *ctl, pmc_t *pmc,
>> -			     bool has_parent, bool pre_dump)
>> +			     bool has_parent, bool pre_dump,
>> +			     int parent_predump_mode)
>>   {
>>   	u64 off = 0;
>>   	u64 *map;
>> @@ -362,17 +363,47 @@ static int generate_vma_iovs(struct pstree_item *item, struct vma_area *vma,
>>   		return 0;
>>   
>>   	/*
>> -	 * process_vm_readv syscall can't copy memory regions lacking
>> -	 * PROT_READ flag. Therefore, avoid generating iovs for such
>> -	 * regions in "read" mode pre-dump. Regions skipped by pre-dumps
>> -	 * can't be referred as parent by following dump stage. So, mark
>> -	 * "has_parent=false" for such regions.
>> +	 * To facilitate any combination of pre-dump modes to run after
>> +	 * one another, we need to take extra care as discussed below.
>> +	 *
>> +	 * The SPLICE mode pre-dump, processes all type of memory regions,
>> +	 * whereas READ mode pre-dump skips processing those memory regions
>> +	 * which lacks PROT_READ flag.
>> +	 *
>> +	 * Now on mixing pre-dump modes:
>> +	 * 	If SPLICE mode follows SPLICE mode	: no issue
>> +	 *		-> everything dumped both the times
>> +	 *
>> +	 * 	If READ mode follows READ mode		: no issue
>> +	 *		-> non-PROT_READ skipped both the time
>> +	 *
>> +	 * 	If READ mode follows SPLICE mode   	: no issue
>> +	 *		-> everything dumped at first,
>> +	 *		   the non-PROT_READ skipped later
>> +	 *
>> +	 * 	If SPLICE mode follows READ mode   	: Need special care
>> +	 *
>> +	 * If READ pre-dump happens first then it has skipped processing
>> +	 * non-PROT_READ regions. Following SPLICE pre-dump expects pagemap
>> +	 * entries for all mappings in parent pagemap, but last READ mode
>> +	 * pre-dump has skipped processing and pagemap generation for
>> +	 * non-PROT_READ regions. So SPLICE mode throws error of missing
>> +	 * pagemap entry for non-PROT_READ mapping.
>> +	 *
>> +	 * To resolve this, the mode of pre-dump is stored in pre-dump's
>> +	 * stats file. This mode is read back from stats file during next
>> +	 * pre-dump.
>> +	 * If parent-pre-dump and next pre-dump turns out in READ-mode -->
>> +	 * SPLICE-mode order, then SPLICE mode doesn't expect mappings for
>> +	 * non-PROT_READ regions in parent-image and marks "has_parent=false".
>>   	 */
>> -	if (opts.pre_dump_mode == PRE_DUMP_READ &&
>> -	                          !(vma->e->prot & PROT_READ)) {
>> -		if (pre_dump)
>> +
>> +	if (!(vma->e->prot & PROT_READ)) {
>> +		if (opts.pre_dump_mode == PRE_DUMP_READ && pre_dump)
>>   			return 0;
>> -		has_parent = false;
>> +		if ((parent_predump_mode == PRE_DUMP_READ &&
>> +			opts.pre_dump_mode == PRE_DUMP_SPLICE) || !pre_dump)
>> +			has_parent = false;
>>   	}
>>   
>>   	if (vma_entry_is(vma->e, VMA_AREA_AIORING)) {
>> @@ -420,6 +451,7 @@ static int __parasite_dump_pages_seized(struct pstree_item *item,
>>   	unsigned long pmc_size;
>>   	int possible_pid_reuse = 0;
>>   	bool has_parent;
>> +	int parent_predump_mode;
>>   
>>   	pr_info("\n");
>>   	pr_info("Dumping pages (type: %d pid: %d)\n", CR_FD_PAGES, item->pid->real);
>> @@ -486,9 +518,12 @@ static int __parasite_dump_pages_seized(struct pstree_item *item,
>>   	 */
>>   	args->off = 0;
>>   	has_parent = !!xfer.parent && !possible_pid_reuse;
>> +	parent_predump_mode = get_parent_pre_dump_type();
>> +
>>   	list_for_each_entry(vma_area, &vma_area_list->h, list) {
>>   		ret = generate_vma_iovs(item, vma_area, pp, &xfer, args, ctl,
>> -					&pmc, has_parent, mdc->pre_dump);
>> +					&pmc, has_parent, mdc->pre_dump,
>> +					parent_predump_mode);
>>   		if (ret < 0)
>>   			goto out_xfer;
>>   	}
>> @@ -593,11 +628,11 @@ int parasite_dump_pages_seized(struct pstree_item *item,
>>   			pr_err("Can't dump unprotect vmas with parasite\n");
>>   			return ret;
>>   		}
>> +	}
>>   
>> -		if (fault_injected(FI_DUMP_PAGES)) {
>> -			pr_err("fault: Dump VMA pages failure!\n");
>> -			return -1;
>> -		}
>> +	if (fault_injected(FI_DUMP_PAGES)) {
>> +		pr_err("fault: Dump VMA pages failure!\n");
>> +		return -1;
>>   	}
>>   
>>   	ret = __parasite_dump_pages_seized(item, pargs, vma_area_list, mdc, ctl);
>> diff --git a/criu/stats.c b/criu/stats.c
>> index cb52801..4e1ff08 100644
>> --- a/criu/stats.c
>> +++ b/criu/stats.c
>> @@ -149,7 +149,7 @@ static void display_stats(int what, StatsEntry *stats)
>>   		return;
>>   }
>>   
>> -void write_stats(int what)
>> +void write_stats(int what, int pre_dump)
>>   {
>>   	StatsEntry stats = STATS_ENTRY__INIT;
>>   	DumpStatsEntry ds_entry = DUMP_STATS_ENTRY__INIT;
>> @@ -183,6 +183,8 @@ void write_stats(int what)
>>   		ds_entry.has_shpages_skipped_parent = true;
>>   		ds_entry.shpages_written = dstats->counts[CNT_SHPAGES_WRITTEN];
>>   		ds_entry.has_shpages_written = true;
>> +		ds_entry.pre_dump_mode = opts.pre_dump_mode;
>> +		ds_entry.has_pre_dump_mode = pre_dump;
>>   
>>   		name = "dump";
>>   	} else if (what == RESTORE_STATS) {
>> @@ -228,3 +230,41 @@ int init_stats(int what)
>>   	rstats = shmalloc(sizeof(struct restore_stats));
>>   	return rstats ? 0 : -1;
>>   }
>> +
>> +int get_parent_pre_dump_type(void)
>> +{
>> +        struct cr_img *img;
>> +        StatsEntry *stats;
>> +        int dir;
>> +        unsigned int parent_pre_dump_mode = -1;
>> +
>> +        dir = openat(get_service_fd(IMG_FD_OFF), CR_PARENT_LINK, O_RDONLY);
>> +        if (dir == -1) {
>> +                if (errno != ENOENT)
>> +                        pr_err("Failed to open parent directory for \
>> +					pre-dump mode reading\n");
>> +                return parent_pre_dump_mode;
>> +        }
>> +
>> +        img = open_image_at(dir, CR_FD_STATS, O_RSTR, "dump");
>> +        if (!img) {
>> +                pr_err("Failed to open parent pre-dump stats image\n");
>> +                close(dir);
>> +                return parent_pre_dump_mode;
>> +        }
>> +
>> +        if (pb_read_one(img, &stats, PB_STATS) < 0) {
>> +                pr_err("Failed to read parent pre-dump stat entry\n");
>> +                close_image(img);
>> +                close(dir);
>> +                return parent_pre_dump_mode;
>> +        }
>> +
>> +        if (stats->dump->has_pre_dump_mode) {
>> +                parent_pre_dump_mode = (int)stats->dump->pre_dump_mode;
>> +        }
>> +
>> +        close_image(img);
>> +        close(dir);
>> +        return parent_pre_dump_mode;
>> +}
>> diff --git a/images/stats.proto b/images/stats.proto
>> index 68d2f1b..aef3b16 100644
>> --- a/images/stats.proto
>> +++ b/images/stats.proto
>> @@ -20,6 +20,7 @@ message dump_stats_entry {
>>   	optional uint64			shpages_scanned		= 12;
>>   	optional uint64			shpages_skipped_parent	= 13;
>>   	optional uint64			shpages_written		= 14;
>> +	optional uint32			pre_dump_mode		= 15;
>>   }
>>   
>>   message restore_stats_entry {
>> diff --git a/test/zdtm.py b/test/zdtm.py
>> index ca8b165..1f401a6 100755
>> --- a/test/zdtm.py
>> +++ b/test/zdtm.py
>> @@ -1021,6 +1021,7 @@ class criu:
>>           self.__criu_bin = opts['criu_bin']
>>           self.__crit_bin = opts['crit_bin']
>>           self.__pre_dump_mode = opts['pre_dump_mode']
>> +        self.__mix_pre_dump = bool(opts['mix_pre_dump'])
>>   
>>       def fini(self):
>>           if self.__lazy_migrate:
>> @@ -1279,6 +1280,13 @@ class criu:
>>               a_opts += ['--empty-ns', 'net']
>>           if self.__pre_dump_mode:
>>               a_opts += ["--pre-dump-mode", "%s" % self.__pre_dump_mode]
>> +        if self.__mix_pre_dump:
>> +            if(random.randrange(1, 1000, 5) % 2):
>> +                print("Mix-mode selected: splice")
>> +                a_opts += ["--pre-dump-mode", "splice"]
>> +            else:
>> +                print("Mix-mode selected: read")
>> +                a_opts += ["--pre-dump-mode", "read"]
>>   
>>           nowait = False
>>           if self.__lazy_migrate and action == "dump":
>> @@ -1868,7 +1876,7 @@ class Launcher:
>>                 'sat', 'script', 'rpc', 'lazy_pages', 'join_ns', 'dedup', 'sbs',
>>                 'freezecg', 'user', 'dry_run', 'noauto_dedup',
>>                 'remote_lazy_pages', 'show_stats', 'lazy_migrate', 'remote',
>> -              'tls', 'criu_bin', 'crit_bin', 'pre_dump_mode')
>> +              'tls', 'criu_bin', 'crit_bin', 'pre_dump_mode', 'mix_pre_dump')
>>           arg = repr((name, desc, flavor, {d: self.__opts[d] for d in nd}))
>>   
>>           if self.__use_log:
>> @@ -2519,6 +2527,9 @@ rp.add_argument("--pre-dump-mode",
>>                   help="Use splice or read mode of pre-dumping",
>>                   choices=['splice', 'read'],
>>                   default='splice')
>> +rp.add_argument("--mix-pre-dump",
>> +                help="mixing splice/read pre-dump modes",
>> +                action='store_true')
>>   
>>   lp = sp.add_parser("list", help="List tests")
>>   lp.set_defaults(action=list_tests)
>> -- 
>> 2.7.4
>>
-Abhishek