[v2,4/8] memory: don't use parent memdump if detected possible pid reuse

Submitted by Pavel Tikhomirov on Feb. 12, 2018, 10:31 a.m.

Details

Message ID 20180212103105.13582-5-ptikhomirov@virtuozzo.com
State New
Series "don't use wrong pagemap (from other task) on pid reuse"
Headers show

Commit Message

Pavel Tikhomirov Feb. 12, 2018, 10:31 a.m.
From: ptikhomirov <ptikhomirov@virtuozzo.com>

We have a problem when a pid is reused between consequent dumps we can't
understand if pagemap and pages from images of parent dump are invalid
to restore these pid already. That can lead even to wrong memory
restored for these pid, see the test in last patch.

So these is a try do separate processes with (likely) invalid previous
memory dump from processes with 100% valid previous dump.

For that we use the value of /proc/<pid>/stat's start_time and also the
timestamp of each (pre)dump. If the start time is strictly less than the
timestamp, that means that the pagemap for these pid from previous dump
is valid - was done for exactly the same process.

Creation time is in centiseconds by default so if predump is really fast
(<1csec) we can have false negative decisions for some processes, but in
case of long running processes we are fine.

https://jira.sw.ru/browse/PSBM-67502

v2: remove __maybe_unused for get_parent_stats; fix get_parent_stats to
have static typing; print warning only if unsure; check has_dump_uptime

Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
---
 criu/mem.c   | 44 +++++++++++++++++++++++++++++++++++++++++++-
 criu/stats.c |  2 +-
 2 files changed, 44 insertions(+), 2 deletions(-)

Patch hide | download patch | download mbox

diff --git a/criu/mem.c b/criu/mem.c
index 4c6942a11..2bf9bcb56 100644
--- a/criu/mem.c
+++ b/criu/mem.c
@@ -30,9 +30,11 @@ 
 #include "fault-injection.h"
 #include "prctl.h"
 #include <compel/compel.h>
+#include "proc_parse.h"
 
 #include "protobuf.h"
 #include "images/pagemap.pb-c.h"
+#include "images/stats.pb-c.h"
 
 static int task_reset_dirty_track(int pid)
 {
@@ -303,6 +305,7 @@  static int __parasite_dump_pages_seized(struct pstree_item *item,
 	int ret = -1;
 	unsigned cpp_flags = 0;
 	unsigned long pmc_size;
+	bool possible_pid_reuse = false;
 
 	if (opts.check_only)
 		return 0;
@@ -360,6 +363,45 @@  static int __parasite_dump_pages_seized(struct pstree_item *item,
 			xfer.parent = NULL + 1;
 	}
 
+	if (xfer.parent) {
+		struct proc_pid_stat pps_buf;
+		StatsEntry *stats = NULL;
+		unsigned long dump_ticks;
+		unsigned long clock_ticks;
+
+		clock_ticks = sysconf(_SC_CLK_TCK);
+		if (clock_ticks == -1) {
+			pr_perror("Failed to get clock ticks via sysconf");
+			goto out_xfer;
+		}
+
+		ret = parse_pid_stat(item->pid->real, &pps_buf);
+		if (ret < 0)
+			goto out_xfer;
+
+		ret = get_parent_stats(&stats);
+		if (ret < 0)
+			goto out_xfer;
+		if (stats->dump->has_dump_uptime) {
+			dump_ticks = stats->dump->dump_uptime/(USEC_PER_SEC / clock_ticks);
+			stats_entry__free_unpacked(stats, NULL);
+
+			if (pps_buf.start_time >= dump_ticks) {
+				/* Print warning when we are not sure */
+				if (pps_buf.start_time == dump_ticks)
+					pr_warn("Will do full redump for pid=%d due " \
+						"to possible pid reuse\n",
+						item->pid->real);
+				possible_pid_reuse = true;
+			}
+		} else {
+			pr_warn_once("Parent image has no dump timestamp, " \
+				     "pid reuse detection OFF!\n");
+			stats_entry__free_unpacked(stats, NULL);
+		}
+	}
+
+
 	/*
 	 * Step 1 -- generate the pagemap
 	 */
@@ -386,7 +428,7 @@  static int __parasite_dump_pages_seized(struct pstree_item *item,
 		else {
 again:
 			ret = generate_iovs(vma_area, pp, map, &off,
-				has_parent);
+				has_parent && !possible_pid_reuse);
 			if (ret == -EAGAIN) {
 				BUG_ON(!(pp->flags & PP_CHUNK_MODE));
 
diff --git a/criu/stats.c b/criu/stats.c
index ddc852a30..975cacf71 100644
--- a/criu/stats.c
+++ b/criu/stats.c
@@ -213,7 +213,7 @@  void write_stats(int what)
 		display_stats(what, &stats);
 }
 
-__maybe_unused int get_parent_stats(StatsEntry **stats)
+int get_parent_stats(StatsEntry **stats)
 {
 	struct cr_img *img;
 	int dir;

Comments

Andrey Vagin Feb. 13, 2018, 7:36 a.m.
On Mon, Feb 12, 2018 at 01:31:01PM +0300, Pavel Tikhomirov wrote:
> From: ptikhomirov <ptikhomirov@virtuozzo.com>
> 
> We have a problem when a pid is reused between consequent dumps we can't
> understand if pagemap and pages from images of parent dump are invalid
> to restore these pid already. That can lead even to wrong memory
> restored for these pid, see the test in last patch.
> 
> So these is a try do separate processes with (likely) invalid previous
> memory dump from processes with 100% valid previous dump.
> 
> For that we use the value of /proc/<pid>/stat's start_time and also the
> timestamp of each (pre)dump. If the start time is strictly less than the
> timestamp, that means that the pagemap for these pid from previous dump
> is valid - was done for exactly the same process.
> 
> Creation time is in centiseconds by default so if predump is really fast
> (<1csec) we can have false negative decisions for some processes, but in
> case of long running processes we are fine.
> 
> https://jira.sw.ru/browse/PSBM-67502
> 
> v2: remove __maybe_unused for get_parent_stats; fix get_parent_stats to
> have static typing; print warning only if unsure; check has_dump_uptime
> 
> Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
> ---
>  criu/mem.c   | 44 +++++++++++++++++++++++++++++++++++++++++++-
>  criu/stats.c |  2 +-
>  2 files changed, 44 insertions(+), 2 deletions(-)
> 
> diff --git a/criu/mem.c b/criu/mem.c
> index 4c6942a11..2bf9bcb56 100644
> --- a/criu/mem.c
> +++ b/criu/mem.c
> @@ -30,9 +30,11 @@
>  #include "fault-injection.h"
>  #include "prctl.h"
>  #include <compel/compel.h>
> +#include "proc_parse.h"
>  
>  #include "protobuf.h"
>  #include "images/pagemap.pb-c.h"
> +#include "images/stats.pb-c.h"
>  
>  static int task_reset_dirty_track(int pid)
>  {
> @@ -303,6 +305,7 @@ static int __parasite_dump_pages_seized(struct pstree_item *item,
>  	int ret = -1;
>  	unsigned cpp_flags = 0;
>  	unsigned long pmc_size;
> +	bool possible_pid_reuse = false;
>  
>  	if (opts.check_only)
>  		return 0;
> @@ -360,6 +363,45 @@ static int __parasite_dump_pages_seized(struct pstree_item *item,
>  			xfer.parent = NULL + 1;
>  	}
>  
> +	if (xfer.parent) {
> +		struct proc_pid_stat pps_buf;
> +		StatsEntry *stats = NULL;
> +		unsigned long dump_ticks;
> +		unsigned long clock_ticks;
> +
> +		clock_ticks = sysconf(_SC_CLK_TCK);
> +		if (clock_ticks == -1) {
> +			pr_perror("Failed to get clock ticks via sysconf");
> +			goto out_xfer;
> +		}
> +
> +		ret = parse_pid_stat(item->pid->real, &pps_buf);

I don't like the idea to parse /proc/pid/stat.
Can we use ctime for /proc/PID?

> +		if (ret < 0)
> +			goto out_xfer;
> +
> +		ret = get_parent_stats(&stats);
> +		if (ret < 0)
> +			goto out_xfer;
> +		if (stats->dump->has_dump_uptime) {
> +			dump_ticks = stats->dump->dump_uptime/(USEC_PER_SEC / clock_ticks);
> +			stats_entry__free_unpacked(stats, NULL);
> +
> +			if (pps_buf.start_time >= dump_ticks) {
> +				/* Print warning when we are not sure */
> +				if (pps_buf.start_time == dump_ticks)
> +					pr_warn("Will do full redump for pid=%d due " \
> +						"to possible pid reuse\n",
> +						item->pid->real);
> +				possible_pid_reuse = true;
> +			}
> +		} else {
> +			pr_warn_once("Parent image has no dump timestamp, " \
> +				     "pid reuse detection OFF!\n");
> +			stats_entry__free_unpacked(stats, NULL);
> +		}
> +	}
> +
> +
>  	/*
>  	 * Step 1 -- generate the pagemap
>  	 */
> @@ -386,7 +428,7 @@ static int __parasite_dump_pages_seized(struct pstree_item *item,
>  		else {
>  again:
>  			ret = generate_iovs(vma_area, pp, map, &off,
> -				has_parent);
> +				has_parent && !possible_pid_reuse);
>  			if (ret == -EAGAIN) {
>  				BUG_ON(!(pp->flags & PP_CHUNK_MODE));
>  
> diff --git a/criu/stats.c b/criu/stats.c
> index ddc852a30..975cacf71 100644
> --- a/criu/stats.c
> +++ b/criu/stats.c
> @@ -213,7 +213,7 @@ void write_stats(int what)
>  		display_stats(what, &stats);
>  }
>  
> -__maybe_unused int get_parent_stats(StatsEntry **stats)
> +int get_parent_stats(StatsEntry **stats)
>  {
>  	struct cr_img *img;
>  	int dir;
> -- 
> 2.14.3
>
Pavel Tikhomirov Feb. 13, 2018, 3:58 p.m.
On 02/13/2018 10:36 AM, Andrew Vagin wrote:
>> +		ret = parse_pid_stat(item->pid->real, &pps_buf);
> I don't like the idea to parse /proc/pid/stat.
> Can we use ctime for /proc/PID?
> 

It seems we can not.

Correct me if I'm wrong, but it seem that inode creation time is 
initialized when we first access it but _not_ at the process creation time:

#1
sleep 1000&
pid=$!
date +%H:%M:%S.%N
sleep 1
stat /proc/$pid | grep Change
kill $pid

18:53:59.393336874
Change: 2018-02-13 18:54:00.401236071 +0300

#2
sleep 1000&
pid=$!
stat /proc/$pid | grep Change
sleep 1
date +%H:%M:%S.%N
kill $pid

Change: 2018-02-13 18:54:00.410236147 +0300
18:54:01.419285696
Andrey Vagin Feb. 13, 2018, 10:29 p.m.
On Mon, Feb 12, 2018 at 01:31:01PM +0300, Pavel Tikhomirov wrote:
> From: ptikhomirov <ptikhomirov@virtuozzo.com>
> 
> We have a problem when a pid is reused between consequent dumps we can't
> understand if pagemap and pages from images of parent dump are invalid
> to restore these pid already. That can lead even to wrong memory
> restored for these pid, see the test in last patch.
> 
> So these is a try do separate processes with (likely) invalid previous
> memory dump from processes with 100% valid previous dump.
> 
> For that we use the value of /proc/<pid>/stat's start_time and also the
> timestamp of each (pre)dump. If the start time is strictly less than the
> timestamp, that means that the pagemap for these pid from previous dump
> is valid - was done for exactly the same process.
> 
> Creation time is in centiseconds by default so if predump is really fast
> (<1csec) we can have false negative decisions for some processes, but in
> case of long running processes we are fine.
> 
> https://jira.sw.ru/browse/PSBM-67502
> 
> v2: remove __maybe_unused for get_parent_stats; fix get_parent_stats to
> have static typing; print warning only if unsure; check has_dump_uptime
> 
> Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
> ---
>  criu/mem.c   | 44 +++++++++++++++++++++++++++++++++++++++++++-
>  criu/stats.c |  2 +-
>  2 files changed, 44 insertions(+), 2 deletions(-)
> 
> diff --git a/criu/mem.c b/criu/mem.c
> index 4c6942a11..2bf9bcb56 100644
> --- a/criu/mem.c
> +++ b/criu/mem.c
> @@ -30,9 +30,11 @@
>  #include "fault-injection.h"
>  #include "prctl.h"
>  #include <compel/compel.h>
> +#include "proc_parse.h"
>  
>  #include "protobuf.h"
>  #include "images/pagemap.pb-c.h"
> +#include "images/stats.pb-c.h"
>  
>  static int task_reset_dirty_track(int pid)
>  {
> @@ -303,6 +305,7 @@ static int __parasite_dump_pages_seized(struct pstree_item *item,
>  	int ret = -1;
>  	unsigned cpp_flags = 0;
>  	unsigned long pmc_size;
> +	bool possible_pid_reuse = false;
>  
>  	if (opts.check_only)
>  		return 0;
> @@ -360,6 +363,45 @@ static int __parasite_dump_pages_seized(struct pstree_item *item,
>  			xfer.parent = NULL + 1;
>  	}
>  
> +	if (xfer.parent) {
> +		struct proc_pid_stat pps_buf;
> +		StatsEntry *stats = NULL;
> +		unsigned long dump_ticks;
> +		unsigned long clock_ticks;
> +
> +		clock_ticks = sysconf(_SC_CLK_TCK);
> +		if (clock_ticks == -1) {
> +			pr_perror("Failed to get clock ticks via sysconf");
> +			goto out_xfer;
> +		}
> +
> +		ret = parse_pid_stat(item->pid->real, &pps_buf);

We already parse /proc/pid/stat on dump, so maybe we can avoid doing
this twice.

> +		if (ret < 0)
> +			goto out_xfer;
> +
> +		ret = get_parent_stats(&stats);
> +		if (ret < 0)
> +			goto out_xfer;
> +		if (stats->dump->has_dump_uptime) {
> +			dump_ticks = stats->dump->dump_uptime/(USEC_PER_SEC / clock_ticks);
> +			stats_entry__free_unpacked(stats, NULL);
> +
> +			if (pps_buf.start_time >= dump_ticks) {
> +				/* Print warning when we are not sure */
> +				if (pps_buf.start_time == dump_ticks)
> +					pr_warn("Will do full redump for pid=%d due " \
> +						"to possible pid reuse\n",
> +						item->pid->real);
> +				possible_pid_reuse = true;
> +			}
> +		} else {
> +			pr_warn_once("Parent image has no dump timestamp, " \
> +				     "pid reuse detection OFF!\n");
> +			stats_entry__free_unpacked(stats, NULL);
> +		}
> +	}
> +
> +
>  	/*
>  	 * Step 1 -- generate the pagemap
>  	 */
> @@ -386,7 +428,7 @@ static int __parasite_dump_pages_seized(struct pstree_item *item,
>  		else {
>  again:
>  			ret = generate_iovs(vma_area, pp, map, &off,
> -				has_parent);
> +				has_parent && !possible_pid_reuse);
>  			if (ret == -EAGAIN) {
>  				BUG_ON(!(pp->flags & PP_CHUNK_MODE));
>  
> diff --git a/criu/stats.c b/criu/stats.c
> index ddc852a30..975cacf71 100644
> --- a/criu/stats.c
> +++ b/criu/stats.c
> @@ -213,7 +213,7 @@ void write_stats(int what)
>  		display_stats(what, &stats);
>  }
>  
> -__maybe_unused int get_parent_stats(StatsEntry **stats)
> +int get_parent_stats(StatsEntry **stats)
>  {
>  	struct cr_img *img;
>  	int dir;
> -- 
> 2.14.3
>