[8/8] zdtm: check that pid-reuse does not break iterative memory dump

Submitted by Pavel Tikhomirov on Feb. 12, 2018, 10:31 a.m.

Details

Message ID 20180212103105.13582-9-ptikhomirov@virtuozzo.com
State New
Series "don't use wrong pagemap (from other task) on pid reuse"
Headers show

Commit Message

Pavel Tikhomirov Feb. 12, 2018, 10:31 a.m.
From: ptikhomirov <ptikhomirov@virtuozzo.com>

The idea of the test is:

1) mmap separate page and put variable there, so that other usage does
not dirty these region. Initialize the variable with VALUE_A.

2) fork a child with special pid == CHILD_NS_PID. Only if it is a first
child overwrite the variable with VALUE_B.

3) wait for the end of the next predump or end of restore with
test_waitpre and kill our child.

Note: The memory region is "clean" in parent.

4) goto (2) unles end of cr is reported by test_waitpre

So on first iteration child with pid CHILD_NS_PID was dumped with
VALUE_B, on all other iterations and on final dump other child with the
same pid exists but with VALUE_A. But on all iterations after the first
one we have these memory region "clean". So criu before the fix would
have restored the VALUE_B taking it from first child's image, but should
restore VALUE_A.

Note: Child in its turn waits termination and performs a check that variable
value doesn't change after c/r.

We should run the test with at least one predump to trigger the problem:

[root@snorch criu]# ./test/zdtm.py run --pre 1 -k always -t zdtm/transition/pid_reuse
Checking feature ns_pid
Checking feature ns_get_userns
Checking feature ns_get_parent

Patch hide | download patch | download mbox

=== Run 1/1 ================ zdtm/transition/pid_reuse

===================== Run zdtm/transition/pid_reuse in ns ======================
 DEP       pid_reuse.d
 CC        pid_reuse.o
 LINK      pid_reuse
Start test
Test is SUID
./pid_reuse --pidfile=pid_reuse.pid --outfile=pid_reuse.out
Run criu pre-dump
Send the 10 signal to  52
Run criu dump
Run criu restore
Send the 15 signal to  73
Wait for zdtm/transition/pid_reuse(73) to die for 0.100000
Test output: ================================
14:47:57.717: 11235: ERR: pid_reuse.c:76: Wrong value in a variable after restore
14:47:57.717:     4: FAIL: pid_reuse.c:110: Task 11235 exited with wrong code 1 (errno = 11 (Resource temporarily unavailable))

 <<< ================================

https://jira.sw.ru/browse/PSBM-67502

Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
---
 test/zdtm/transition/Makefile       |   1 +
 test/zdtm/transition/pid_reuse.c    | 118 ++++++++++++++++++++++++++++++++++++
 test/zdtm/transition/pid_reuse.desc |   1 +
 3 files changed, 120 insertions(+)
 create mode 100644 test/zdtm/transition/pid_reuse.c
 create mode 100644 test/zdtm/transition/pid_reuse.desc

diff --git a/test/zdtm/transition/Makefile b/test/zdtm/transition/Makefile
index f482a8a09..35301ac85 100644
--- a/test/zdtm/transition/Makefile
+++ b/test/zdtm/transition/Makefile
@@ -21,6 +21,7 @@  TST_NOFILE	=	\
 		socket-tcp6     \
 		shmem		\
 		lazy-thp	\
+		pid_reuse	\
 
 
 TST_FILE	=	\
diff --git a/test/zdtm/transition/pid_reuse.c b/test/zdtm/transition/pid_reuse.c
new file mode 100644
index 000000000..1400bf202
--- /dev/null
+++ b/test/zdtm/transition/pid_reuse.c
@@ -0,0 +1,118 @@ 
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/wait.h>
+#include <sys/mman.h>
+
+#include "zdtmtst.h"
+
+const char *test_doc = "Tests that forking tasks with same pid does not break iterative dump";
+const char *test_author = "Pavel Tikhomirov <ptikhomirov@virtuozzo.com>";
+
+enum {
+	VALUE_A = 1,
+	VALUE_B = 2,
+};
+
+#define CHILD_NS_PID 11235
+
+static int set_ns_next_pid(pid_t pid)
+{
+	char buf[32];
+	int len, fd;
+
+	fd = open("/proc/sys/kernel/ns_last_pid", O_WRONLY);
+	if (fd < 0) {
+		pr_perror("Failed to open ns_last_pid");
+		return -1;
+	}
+
+	len = snprintf(buf, sizeof(buf), "%d", pid - 1);
+	len -= write(fd, buf, len);
+	if (len)
+		pr_perror("Can't set ns_last_pid");
+	close(fd);
+
+	return len ? -1 : 0;
+}
+
+int main(int argc, char **argv)
+{
+	int pid, wpid, status;
+	bool overwrite = true;
+	bool wait = true;
+	int *variable;
+	void *mem;
+
+	test_init(argc, argv);
+
+	mem = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+	if (mem == MAP_FAILED) {
+		pr_perror("Can't mmap memory region");
+		return 1;
+	}
+
+	variable = (int *)mem;
+	*variable = VALUE_A;
+
+	test_daemon();
+
+	while (wait) {
+		if (set_ns_next_pid(CHILD_NS_PID))
+			return 1;
+
+		pid = fork();
+		if (pid == -1) {
+			pr_perror("fork");
+			return 1;
+		} else if (pid == 0) {
+			if (overwrite)
+				*variable = VALUE_B;
+			test_waitsig();
+
+			if (*variable != (overwrite ? VALUE_B : VALUE_A)) {
+				pr_err("Wrong value in a variable after restore\n");
+				exit(1);
+			}
+			exit(0);
+		}
+
+		if (pid != CHILD_NS_PID) {
+			pr_err("Child started with wrong pid %d (expected %d)\n", pid, CHILD_NS_PID);
+			kill(pid, SIGKILL);
+			waitpid(pid, NULL, 0);
+			return 1;
+		}
+
+		/* Wait for next predump/dump finish */
+		if (test_waitpre())
+			wait = false;
+
+		if (kill(pid, SIGTERM)) {
+			pr_perror("kill");
+			return 1;
+		}
+
+		wpid = waitpid(pid, &status, 0);
+		if (wpid <= 0) {
+			pr_perror("waitpid");
+			return 1;
+		}
+
+		if (!WIFEXITED(status)) {
+			fail("Task %d didn't exit", wpid);
+			return 1;
+		}
+
+		if (WEXITSTATUS(status) != 0) {
+			fail("Task %d exited with wrong code %d", wpid, WEXITSTATUS(status));
+			return 1;
+		}
+
+		overwrite = false;
+	}
+	pass();
+	return 0;
+}
diff --git a/test/zdtm/transition/pid_reuse.desc b/test/zdtm/transition/pid_reuse.desc
new file mode 100644
index 000000000..1d1a44053
--- /dev/null
+++ b/test/zdtm/transition/pid_reuse.desc
@@ -0,0 +1 @@ 
+{'flavor': 'ns uns', 'flags': 'suid', 'feature': 'ns_pid ns_get_userns ns_get_parent'}

Comments

Andrey Vagin Feb. 13, 2018, 7:57 p.m.
On Mon, Feb 12, 2018 at 01:31:05PM +0300, Pavel Tikhomirov wrote:
> From: ptikhomirov <ptikhomirov@virtuozzo.com>
> 
> The idea of the test is:
> 
> 1) mmap separate page and put variable there, so that other usage does
> not dirty these region. Initialize the variable with VALUE_A.
> 
> 2) fork a child with special pid == CHILD_NS_PID. Only if it is a first
> child overwrite the variable with VALUE_B.
> 
> 3) wait for the end of the next predump or end of restore with
> test_waitpre and kill our child.
> 
> Note: The memory region is "clean" in parent.
> 
> 4) goto (2) unles end of cr is reported by test_waitpre
> 
> So on first iteration child with pid CHILD_NS_PID was dumped with
> VALUE_B, on all other iterations and on final dump other child with the
> same pid exists but with VALUE_A. But on all iterations after the first
> one we have these memory region "clean". So criu before the fix would
> have restored the VALUE_B taking it from first child's image, but should
> restore VALUE_A.
> 
> Note: Child in its turn waits termination and performs a check that variable
> value doesn't change after c/r.
> 
> We should run the test with at least one predump to trigger the problem:
> 
> [root@snorch criu]# ./test/zdtm.py run --pre 1 -k always -t zdtm/transition/pid_reuse
> Checking feature ns_pid
> Checking feature ns_get_userns
> Checking feature ns_get_parent
> 
> === Run 1/1 ================ zdtm/transition/pid_reuse
> 
> ===================== Run zdtm/transition/pid_reuse in ns ======================
>  DEP       pid_reuse.d
>  CC        pid_reuse.o
>  LINK      pid_reuse
> Start test
> Test is SUID
> ./pid_reuse --pidfile=pid_reuse.pid --outfile=pid_reuse.out
> Run criu pre-dump
> Send the 10 signal to  52
> Run criu dump
> Run criu restore
> Send the 15 signal to  73
> Wait for zdtm/transition/pid_reuse(73) to die for 0.100000
> Test output: ================================
> 14:47:57.717: 11235: ERR: pid_reuse.c:76: Wrong value in a variable after restore
> 14:47:57.717:     4: FAIL: pid_reuse.c:110: Task 11235 exited with wrong code 1 (errno = 11 (Resource temporarily unavailable))
> 
>  <<< ================================
> 
> https://jira.sw.ru/browse/PSBM-67502
> 
> Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
> ---
>  test/zdtm/transition/Makefile       |   1 +
>  test/zdtm/transition/pid_reuse.c    | 118 ++++++++++++++++++++++++++++++++++++
>  test/zdtm/transition/pid_reuse.desc |   1 +
>  3 files changed, 120 insertions(+)
>  create mode 100644 test/zdtm/transition/pid_reuse.c
>  create mode 100644 test/zdtm/transition/pid_reuse.desc
> 
> diff --git a/test/zdtm/transition/Makefile b/test/zdtm/transition/Makefile
> index f482a8a09..35301ac85 100644
> --- a/test/zdtm/transition/Makefile
> +++ b/test/zdtm/transition/Makefile
> @@ -21,6 +21,7 @@ TST_NOFILE	=	\
>  		socket-tcp6     \
>  		shmem		\
>  		lazy-thp	\
> +		pid_reuse	\
>  
>  
>  TST_FILE	=	\
> diff --git a/test/zdtm/transition/pid_reuse.c b/test/zdtm/transition/pid_reuse.c
> new file mode 100644
> index 000000000..1400bf202
> --- /dev/null
> +++ b/test/zdtm/transition/pid_reuse.c
> @@ -0,0 +1,118 @@
> +#include <sys/types.h>
> +#include <sys/stat.h>
> +#include <fcntl.h>
> +#include <stdlib.h>
> +#include <signal.h>
> +#include <sys/wait.h>
> +#include <sys/mman.h>
> +
> +#include "zdtmtst.h"
> +
> +const char *test_doc = "Tests that forking tasks with same pid does not break iterative dump";
> +const char *test_author = "Pavel Tikhomirov <ptikhomirov@virtuozzo.com>";
> +
> +enum {
> +	VALUE_A = 1,
> +	VALUE_B = 2,
> +};
> +
> +#define CHILD_NS_PID 11235
> +
> +static int set_ns_next_pid(pid_t pid)
> +{
> +	char buf[32];
> +	int len, fd;
> +
> +	fd = open("/proc/sys/kernel/ns_last_pid", O_WRONLY);
> +	if (fd < 0) {
> +		pr_perror("Failed to open ns_last_pid");
> +		return -1;
> +	}
> +
> +	len = snprintf(buf, sizeof(buf), "%d", pid - 1);
> +	len -= write(fd, buf, len);
> +	if (len)
> +		pr_perror("Can't set ns_last_pid");
> +	close(fd);
> +
> +	return len ? -1 : 0;
> +}
> +
> +int main(int argc, char **argv)
> +{
> +	int pid, wpid, status;
> +	bool overwrite = true;
> +	bool wait = true;
> +	int *variable;
> +	void *mem;
> +
> +	test_init(argc, argv);
> +
> +	mem = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
> +	if (mem == MAP_FAILED) {
> +		pr_perror("Can't mmap memory region");
> +		return 1;
> +	}
> +
> +	variable = (int *)mem;
> +	*variable = VALUE_A;
> +
> +	test_daemon();
> +
> +	while (wait) {
> +		if (set_ns_next_pid(CHILD_NS_PID))
> +			return 1;
> +
> +		pid = fork();
> +		if (pid == -1) {
> +			pr_perror("fork");
> +			return 1;
> +		} else if (pid == 0) {
> +			if (overwrite)
> +				*variable = VALUE_B;
> +			test_waitsig();
> +
> +			if (*variable != (overwrite ? VALUE_B : VALUE_A)) {
> +				pr_err("Wrong value in a variable after restore\n");
> +				exit(1);
> +			}
> +			exit(0);
> +		}
> +
> +		if (pid != CHILD_NS_PID) {
> +			pr_err("Child started with wrong pid %d (expected %d)\n", pid, CHILD_NS_PID);
> +			kill(pid, SIGKILL);
> +			waitpid(pid, NULL, 0);
> +			return 1;
> +		}
> +
> +		/* Wait for next predump/dump finish */
> +		if (test_waitpre())
> +			wait = false;
> +
> +		if (kill(pid, SIGTERM)) {
> +			pr_perror("kill");
> +			return 1;
> +		}
> +
> +		wpid = waitpid(pid, &status, 0);
> +		if (wpid <= 0) {
> +			pr_perror("waitpid");
> +			return 1;
> +		}
> +
> +		if (!WIFEXITED(status)) {
> +			fail("Task %d didn't exit", wpid);

The process can be killed (WIFSIGNALED()).

Pls, don't decode status, it is enough to check whether status is
zero of not.

	if (status) {
		fail("The task %d exited with %x", wpid, status);
		return 1;
	}

> +			return 1;
> +		}
> +
> +		if (WEXITSTATUS(status) != 0) {
> +			fail("Task %d exited with wrong code %d", wpid, WEXITSTATUS(status));
> +			return 1;
> +		}
> +
> +		overwrite = false;
> +	}
> +	pass();
> +	return 0;
> +}
> diff --git a/test/zdtm/transition/pid_reuse.desc b/test/zdtm/transition/pid_reuse.desc
> new file mode 100644
> index 000000000..1d1a44053
> --- /dev/null
> +++ b/test/zdtm/transition/pid_reuse.desc
> @@ -0,0 +1 @@
> +{'flavor': 'ns uns', 'flags': 'suid', 'feature': 'ns_pid ns_get_userns ns_get_parent'}
> -- 
> 2.14.3
>
Pavel Tikhomirov Feb. 14, 2018, 11:36 a.m.
>> +		wpid = waitpid(pid, &status, 0);
>> +		if (wpid <= 0) {
>> +			pr_perror("waitpid");
>> +			return 1;
>> +		}
>> +
>> +		if (!WIFEXITED(status)) {
>> +			fail("Task %d didn't exit", wpid);
> 
> The process can be killed (WIFSIGNALED()) >
> Pls, don't decode status, it is enough to check whether status is
> zero of not.

These seem an undocumented behaviour to me, but it will work.

> 
> 	if (status) {
> 		fail("The task %d exited with %x", wpid, status);

 From man: "WIFEXITED(wstatus) - returns true if the child terminated 
normally", meaning "exited" == "terminated". Also obviousely 
"terminated" != "killed by signal". So, for the sake of clarity, we 
can't say "The task %d exited with %x" as it does not cover signals, but 
may be something like "Task %d died with exit status %x" will be sutable 
here.

> 		return 1;
> 	}
> 
>> +			return 1;
>> +		}
>> +
>> +		if (WEXITSTATUS(status) != 0) {
>> +			fail("Task %d exited with wrong code %d", wpid, WEXITSTATUS(status));
>> +			return 1;
>> +		}
>> +
>> +		overwrite = false;
>> +	}
>> +	pass();
>> +	return 0;
>> +}

I sent v3 fixed version with fixes.
Andrey Vagin Feb. 14, 2018, 6 p.m.
On Wed, Feb 14, 2018 at 02:36:58PM +0300, Pavel Tikhomirov wrote:
> > > +		wpid = waitpid(pid, &status, 0);
> > > +		if (wpid <= 0) {
> > > +			pr_perror("waitpid");
> > > +			return 1;
> > > +		}
> > > +
> > > +		if (!WIFEXITED(status)) {
> > > +			fail("Task %d didn't exit", wpid);
> > 
> > The process can be killed (WIFSIGNALED()) >
> > Pls, don't decode status, it is enough to check whether status is
> > zero of not.
> 
> These seem an undocumented behaviour to me, but it will work.

A lot of things are not documented, but they are parts of API by
historical reasons.

> 
> > 
> > 	if (status) {
> > 		fail("The task %d exited with %x", wpid, status);
> 
> From man: "WIFEXITED(wstatus) - returns true if the child terminated
> normally", meaning "exited" == "terminated". Also obviousely "terminated" !=
> "killed by signal". So, for the sake of clarity, we can't say "The task %d
> exited with %x" as it does not cover signals, but may be something like
> "Task %d died with exit status %x" will be sutable here.

For me, it doesn't matter how you call this. I want to have status in a
test log. Your original message doesn't contain it. It was only a
reason, why I left this comment ;)

I suggest to not deode status, because I want to minimize a test code. I
have seen this code a lot of time and I think it is useless. I want to
know about all non-zero status, and they all means errors.

> 
> > 		return 1;
> > 	}
> > 
> > > +			return 1;
> > > +		}
> > > +
> > > +		if (WEXITSTATUS(status) != 0) {
> > > +			fail("Task %d exited with wrong code %d", wpid, WEXITSTATUS(status));
> > > +			return 1;
> > > +		}
> > > +
> > > +		overwrite = false;
> > > +	}
> > > +	pass();
> > > +	return 0;
> > > +}
> 
> I sent v3 fixed version with fixes.