pstree: bump kernel pid_max value if needed

Submitted by Laurent Dufour on July 19, 2016, 4:32 p.m.

Details

Message ID 1468945970-4329-1-git-send-email-ldufour@linux.vnet.ibm.com
State Rejected
Series "pstree: bump kernel pid_max value if needed"
Headers show

Commit Message

Laurent Dufour July 19, 2016, 4:32 p.m.
When restoring on a different node, it may happen that pid_max is
below one of the pid we wanted to recreate.
This leads to a restore error when cloning the restarted process:

(00.011172) Forking task with 44794 pid (flags 0x0)
(00.011205) Error (cr-restore.c:1008): 44794: Write 44793 to sys/kernel/ns_last_pid: Invalid argument

This patch computes the largest pid value and sets the kernel pid_max if
necessary.

If the user don't have the permission to do so, the restart is
failing mentioning that we can't push the pid_max limit.

Signed-off-by: Laurent Dufour <ldufour@linux.vnet.ibm.com>
---
 criu/pstree.c | 43 +++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 41 insertions(+), 2 deletions(-)

Patch hide | download patch | download mbox

diff --git a/criu/pstree.c b/criu/pstree.c
index c2fa7486683a..30b2d5d7e7fe 100644
--- a/criu/pstree.c
+++ b/criu/pstree.c
@@ -474,7 +474,7 @@  static int read_pstree_ids(struct pstree_item *pi)
 	return 0;
 }
 
-static int read_pstree_image(void)
+static int read_pstree_image(pid_t *pid_max)
 {
 	int ret = 0, i;
 	struct cr_img *img;
@@ -511,8 +511,14 @@  static int read_pstree_image(void)
 			break;
 
 		pi->pid.virt = e->pid;
+		if (e->pid > *pid_max)
+			*pid_max = e->pid;
 		pi->pgid = e->pgid;
+		if (e->pgid > *pid_max)
+			*pid_max = e->pgid;
 		pi->sid = e->sid;
+		if (e->sid > *pid_max)
+			*pid_max = e->sid;
 		pi->pid.state = TASK_ALIVE;
 
 		if (e->ppid == 0) {
@@ -978,8 +984,41 @@  static int prepare_pstree_for_unshare(void)
 int prepare_pstree(void)
 {
 	int ret;
+	pid_t pid_max = 0, kpid_max = 0;
+	int fd;
+	char buf[20];
+
+	fd = open("/proc/sys/kernel/pid_max", O_RDONLY);
+	if (fd != 1) {
+		ret = read(fd, buf, sizeof(buf));
+		if (ret > 0) {
+			buf[ret] = 0;
+			kpid_max = strtoul(buf, NULL, 10);
+			pr_debug("kernel pid_max=%d\n", kpid_max);
+		}
+		close (fd);
+	}
+
+	ret = read_pstree_image(&pid_max);
+	pr_debug("pstree pid_max=%d\n", pid_max);
+
+	if (!ret && kpid_max && pid_max > kpid_max) {
+		/* Try to set kernel pid_max */
+		fd = open("/proc/sys/kernel/pid_max", O_WRONLY);
+		if (fd == -1) {
+			pr_perror("Can't set kernel pid_max, open fails");
+			ret = -1;
+		} else {
+			snprintf(buf, sizeof(buf), "%u", pid_max+1);
+			if (write(fd, buf, strlen(buf)) < 0) {
+				pr_perror("Can't set kernel pid_max=%s", buf);
+				ret = -1;
+			}
+			pr_info("kernel pid_max pushed to %s\n", buf);
+			close(fd);
+		}
+	}
 
-	ret = read_pstree_image();
 	if (!ret)
 		/*
 		 * Shell job may inherit sid/pgid from the current

Comments

Laurent Dufour July 19, 2016, 4:35 p.m.
On 19/07/2016 18:32, Laurent Dufour wrote:
> When restoring on a different node, it may happen that pid_max is
> below one of the pid we wanted to recreate.
> This leads to a restore error when cloning the restarted process:
> 
> (00.011172) Forking task with 44794 pid (flags 0x0)
> (00.011205) Error (cr-restore.c:1008): 44794: Write 44793 to sys/kernel/ns_last_pid: Invalid argument
> 
> This patch computes the largest pid value and sets the kernel pid_max if
> necessary.
> 
> If the user don't have the permission to do so, the restart is
> failing mentioning that we can't push the pid_max limit.
> 
> Signed-off-by: Laurent Dufour <ldufour@linux.vnet.ibm.com>
> ---
>  criu/pstree.c | 43 +++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 41 insertions(+), 2 deletions(-)
> 
> diff --git a/criu/pstree.c b/criu/pstree.c
> index c2fa7486683a..30b2d5d7e7fe 100644
> --- a/criu/pstree.c
> +++ b/criu/pstree.c
> @@ -474,7 +474,7 @@ static int read_pstree_ids(struct pstree_item *pi)
>  	return 0;
>  }
> 
> -static int read_pstree_image(void)
> +static int read_pstree_image(pid_t *pid_max)
>  {
>  	int ret = 0, i;
>  	struct cr_img *img;
> @@ -511,8 +511,14 @@ static int read_pstree_image(void)
>  			break;
> 
>  		pi->pid.virt = e->pid;
> +		if (e->pid > *pid_max)
> +			*pid_max = e->pid;
>  		pi->pgid = e->pgid;
> +		if (e->pgid > *pid_max)
> +			*pid_max = e->pgid;
>  		pi->sid = e->sid;
> +		if (e->sid > *pid_max)
> +			*pid_max = e->sid;
>  		pi->pid.state = TASK_ALIVE;
> 
>  		if (e->ppid == 0) {
> @@ -978,8 +984,41 @@ static int prepare_pstree_for_unshare(void)
>  int prepare_pstree(void)
>  {
>  	int ret;
> +	pid_t pid_max = 0, kpid_max = 0;
> +	int fd;
> +	char buf[20];
> +
> +	fd = open("/proc/sys/kernel/pid_max", O_RDONLY);
> +	if (fd != 1) {
> +		ret = read(fd, buf, sizeof(buf));
> +		if (ret > 0) {
> +			buf[ret] = 0;
> +			kpid_max = strtoul(buf, NULL, 10);
> +			pr_debug("kernel pid_max=%d\n", kpid_max);
> +		}
> +		close (fd);
> +	}
> +
> +	ret = read_pstree_image(&pid_max);
> +	pr_debug("pstree pid_max=%d\n", pid_max);
> +
> +	if (!ret && kpid_max && pid_max > kpid_max) {
> +		/* Try to set kernel pid_max */
> +		fd = open("/proc/sys/kernel/pid_max", O_WRONLY);
> +		if (fd == -1) {
> +			pr_perror("Can't set kernel pid_max, open fails");
> +			ret = -1;
> +		} else {
> +			snprintf(buf, sizeof(buf), "%u", pid_max+1);
> +			if (write(fd, buf, strlen(buf)) < 0) {
> +				pr_perror("Can't set kernel pid_max=%s", buf);
> +				ret = -1;
> +			}
> +			pr_info("kernel pid_max pushed to %s\n", buf);

Sorry, forget about it, I just realize that this message is printed even
if write is failing, need a else statement here...

I'll push a v2 asap.

> +			close(fd);
> +		}
> +	}
> 
> -	ret = read_pstree_image();
>  	if (!ret)
>  		/*
>  		 * Shell job may inherit sid/pgid from the current
>