[v4,32/41] pid: Create pid_ns helpers

Submitted by Kirill Tkhai on May 4, 2017, 4:10 p.m.

Details

Message ID 149391423769.11711.8301254682574407726.stgit@localhost.localdomain
State New
Series "Nested pid namespaces support"
Headers show

Commit Message

Kirill Tkhai May 4, 2017, 4:10 p.m.
Task may set last_pid only for its active pid namespace,
so if NSpid of a child contains more then one level, we
need external help to populate the whole pid hierarhy
(pid in parent pid_ns, pid in grand parent etc). Pid ns
helpers are used for that.

These are childred of usernsd, which are listening for
socket, and setting requested last pid in their active
pid_ns.

v4: Move destroy_pid_ns_helpers() before CR_STATE_RESTORE_SIGCHLD
change, as they must die before zombies.

v3: Block SIGCHLD during stoppinig of pid_ns helpers.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
---
 criu/cr-restore.c         |    7 +
 criu/include/namespaces.h |    3 +
 criu/namespaces.c         |  253 +++++++++++++++++++++++++++++++++++++++++++++
 criu/ns-common.c          |   51 +++++++++
 criu/pie/restorer.c       |    5 +
 5 files changed, 319 insertions(+)
 create mode 100644 criu/ns-common.c

Patch hide | download patch | download mbox

diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index 4c4ca37d7..3c35e5f08 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -1605,6 +1605,8 @@  static int restore_task_with_children(void *_arg)
 			pr_err("Can't add fd to fdstore\n");
 			return -1;
 		}
+		if (create_pid_ns_helper(pid_ns) < 0)
+			goto err;
 	}
 
 	if (restore_task_mnt_ns(current))
@@ -2038,6 +2040,10 @@  static int restore_root_task(struct pstree_item *init)
 			task_entries->nr_threads--;
 	}
 
+	ret = destroy_pid_ns_helpers();
+	if (ret < 0)
+		goto out_kill;
+
 	ret = restore_switch_stage(CR_STATE_RESTORE_SIGCHLD);
 	if (ret < 0)
 		goto out_kill;
@@ -2141,6 +2147,7 @@  static int restore_root_task(struct pstree_item *init)
 	return 0;
 
 out_kill:
+	destroy_pid_ns_helpers();
 	/*
 	 * The processes can be killed only when all of them have been created,
 	 * otherwise an external proccesses can be killed.
diff --git a/criu/include/namespaces.h b/criu/include/namespaces.h
index 37b65b0db..b81957668 100644
--- a/criu/include/namespaces.h
+++ b/criu/include/namespaces.h
@@ -267,5 +267,8 @@  static inline int pid_ns_root_off(void)
 	return 0;
 }
 extern int reserve_pid_ns_helpers(void);
+extern int create_pid_ns_helper(struct ns_id *ns);
+extern int destroy_pid_ns_helpers(void);
+extern int request_set_next_pid(int pid_ns_id, pid_t pid, int sk);
 
 #endif /* __CR_NS_H__ */
diff --git a/criu/namespaces.c b/criu/namespaces.c
index 97ea2b0e6..f65f06003 100644
--- a/criu/namespaces.c
+++ b/criu/namespaces.c
@@ -15,6 +15,7 @@ 
 #include <errno.h>
 #include <sys/ioctl.h>
 #include <sys/ptrace.h>
+#include <sys/file.h>
 
 #include "page.h"
 #include "rst-malloc.h"
@@ -38,6 +39,11 @@ 
 #include "fdstore.h"
 #include "proc_parse.h"
 
+#define __sys(foo)	foo
+#define __sys_err(ret)	(-errno)
+
+#include "ns-common.c"
+
 static struct ns_desc *ns_desc_array[] = {
 	&net_ns_desc,
 	&uts_ns_desc,
@@ -49,6 +55,8 @@  static struct ns_desc *ns_desc_array[] = {
 };
 
 static unsigned int join_ns_flags;
+/* Creation of every helper are synchronized by userns_sync_lock */
+static int nr_pid_ns_helper_created = 0;
 
 int check_namespace_opts(void)
 {
@@ -2532,5 +2540,250 @@  int reserve_pid_ns_helpers(void)
 	return walk_namespaces(&pid_ns_desc, do_reserve_pid_ns_helpers, NULL);
 }
 
+static int pid_ns_helper_sock(struct ns_id *ns)
+{
+	struct sockaddr_un addr;
+	socklen_t len;
+	int sk;
+
+	sk = socket(AF_UNIX, SOCK_DGRAM, 0);
+	if (sk < 0) {
+		pr_perror("Can't create helper socket");
+		return -1;
+	}
+	pid_ns_helper_socket_name(&addr, &len, ns->id);
+
+	if (bind(sk, (struct sockaddr *)&addr, len) < 0) {
+		pr_perror("Can't bind pid_ns sock");
+		return -1;
+	}
+
+	return sk;
+}
+
+static int pid_ns_helper(struct ns_id *ns, int sk)
+{
+	struct sockaddr_un addr;
+	struct msghdr msg = {0};
+	struct iovec iov;
+	pid_t pid;
+
+	msg.msg_name = &addr;
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+
+	while (1) {
+		int answer = 0;
+		msg.msg_namelen = sizeof(addr);
+		iov.iov_base = &pid;
+		iov.iov_len = sizeof(pid);
+
+		if (recvmsg(sk, &msg, 0) < 0) {
+			pr_perror("recv() failed to read pid");
+			break;
+		}
+
+		if (pid != 0) {
+			if (__set_next_pid(pid) < 0) {
+				pr_err("Can't set next pid\n");
+				answer = -1;
+			}
+		}
+
+		iov.iov_base = &answer;
+		iov.iov_len = sizeof(answer);
+		if (sendmsg(sk, &msg, 0) < 0) {
+			pr_perror("Can't send answer");
+			break;
+		}
+
+		if (pid == 0)
+			return 0;
+	}
+
+	return -1;
+}
+
+static int do_create_pid_ns_helper(void *arg, int unused_fd, pid_t unused_pid)
+{
+	int pid_ns_fd, mnt_ns_fd, sk, fd, i, lock_fd, transport_fd;
+	struct ns_id *ns, *tmp;
+	struct pid *pid;
+	pid_t child;
+
+	pid_ns_fd = open_proc(PROC_SELF, "ns/pid");
+	if (pid_ns_fd < 0) {
+		pr_perror("Can't open pid ns");
+		return -1;
+	}
+	ns = *(struct ns_id **)arg;
+
+	fd = fdstore_get(ns->pid.nsfd_id);
+	if (fd < 0) {
+		pr_err("Can't get pid_ns fd\n");
+		return -1;
+	}
+	if (setns(fd, CLONE_NEWPID) < 0) {
+		pr_perror("Can't setns");
+		return -1;
+	}
+	close(fd);
+
+	sk = pid_ns_helper_sock(ns);
+	if (sk < 0)
+		return -1;
+
+	pid = __pstree_pid_by_virt(ns, ns->ns_pid);
+	if (!pid) {
+		pr_err("Can't find helper reserved pid\n");
+		return -1;
+	}
+
+	tmp = ns->parent;
+	if (tmp) {
+		futex_t *f = &tmp->pid.helper_created;
+		futex_wait_while_eq(f, 0);
+	}
+
+	if (switch_ns(root_item->pid->real, &mnt_ns_desc, &mnt_ns_fd) < 0) {
+		pr_err("Can't set mnt_ns\n");
+		return -1;
+	}
+
+	lock_fd = open("/proc/" LAST_PID_PATH, O_RDONLY);
+	if (lock_fd < 0)
+		return -1;
+
+	if (restore_ns(mnt_ns_fd, &mnt_ns_desc) < 0) {
+		pr_err("Can't restore ns\n");
+		return -1;
+	}
+
+	if (flock(lock_fd, LOCK_EX)) {
+		close(lock_fd);
+		pr_perror("Can't lock %s", LAST_PID_PATH);
+		return -1;
+	}
+
+	transport_fd = get_service_fd(TRANSPORT_FD_OFF);
+	/*
+	 * Starting not from pid->level - 1, as it's helper has not created yet
+	 * (we're creating it in the moment), and the true pid for this level
+	 * is set by the task, who does close(CLONE_NEWPID) (this task is sender of fd).
+	 */
+	for (i = pid->level - 2, tmp = ns->parent; i >= 0; i--, tmp = tmp->parent)
+		if (request_set_next_pid(tmp->id, pid->ns[i].virt, transport_fd)) {
+			pr_err("Can't set next pid using helper\n");
+			flock(lock_fd, LOCK_UN);
+			close(lock_fd);
+			return -1;
+		}
+	child = fork();
+	if (child < 0) {
+		flock(lock_fd, LOCK_UN);
+		close(lock_fd);
+		pr_perror("Can't fork");
+		return -1;
+	} else if (!child) {
+		close(lock_fd);
+		exit(pid_ns_helper(ns, sk));
+	}
+	close(sk);
+	futex_set_and_wake(&ns->pid.helper_created, 1);
+	flock(lock_fd, LOCK_UN);
+	close(lock_fd);
+	nr_pid_ns_helper_created++;
+
+	if (setns(pid_ns_fd, CLONE_NEWPID) < 0) {
+		pr_perror("Restore ns");
+		return -1;
+	}
+	return 0;
+}
+
+/*
+ * Task may set last_pid only for its active pid namespace,
+ * so if NSpid of a child contains more then one level, we
+ * need external help to populate the whole pid hierarhy
+ * (pid in parent pid_ns, pid in grand parent etc). Pid ns
+ * helpers are used for that.
+ *
+ * We need a task or tasks to be a parent of pid_ns helpers.
+ * To live in common hierarhy and to be a TASK_HELPER is not
+ * possible, because it introduces circular dependencies.
+ * The same is to be children of criu main task, because
+ * we already have dependencies between it and root_item
+ * (NO more dependencies!). So, we choose usernsd for that:
+ * it always exists and have command interface.
+ */
+int create_pid_ns_helper(struct ns_id *ns)
+{
+	BUG_ON(getpid() != INIT_PID);
+
+	if (__set_next_pid(ns->ns_pid) < 0) {
+		pr_err("Can't set next fd\n");
+		return -1;
+	}
+	if (userns_call(do_create_pid_ns_helper, 0, &ns, sizeof(ns), -1) < 0) {
+		pr_err("Can't create pid_ns helper\n");
+		return -1;
+	}
+	return 0;
+}
+
+static int do_destroy_pid_ns_helper(void *arg, int fd, pid_t pid)
+{
+	int i, sk, status, sig_blocked = true, nr_ok = 0, ret = 0;
+	sigset_t sig_mask;
+	struct ns_id *ns;
+
+	if (!nr_pid_ns_helper_created)
+		return 0;
+
+	if (block_sigmask(&sig_mask, SIGCHLD)) {
+		sig_blocked = false;
+		ret = -1;
+	}
+
+	sk = get_service_fd(TRANSPORT_FD_OFF);
+
+	for (ns = ns_ids; ns; ns = ns->next) {
+		if (ns->nd != &pid_ns_desc)
+			continue;
+		if (request_set_next_pid(ns->id, 0, sk) == 0)
+			nr_ok++;
+	}
+
+	if (nr_ok != nr_pid_ns_helper_created) {
+		pr_err("Not all pid_ns helpers killed\n");
+		ret = -1;
+	}
+
+	for (i = 0; i < nr_ok; i++) {
+		if (waitpid(-1, &status, 0) < 0) {
+			pr_perror("Error during waiting pid_ns helper");
+			ret = -1;
+		}
+	}
+	nr_pid_ns_helper_created = 0;
+
+	if (sig_blocked && restore_sigmask(&sig_mask))
+		ret = -1;
+
+	return ret;
+}
+
+int destroy_pid_ns_helpers(void)
+{
+	if (!(root_ns_mask & CLONE_NEWPID))
+		return 0;
+
+	if (userns_call(do_destroy_pid_ns_helper, 0, NULL, 0, -1) < 0) {
+		pr_err("Can't create pid_ns helper\n");
+		return -1;
+	}
+	return 0;
+}
+
 struct ns_desc pid_ns_desc = NS_DESC_ENTRY(CLONE_NEWPID, "pid");
 struct ns_desc user_ns_desc = NS_DESC_ENTRY(CLONE_NEWUSER, "user");
diff --git a/criu/ns-common.c b/criu/ns-common.c
new file mode 100644
index 000000000..a8e28aa00
--- /dev/null
+++ b/criu/ns-common.c
@@ -0,0 +1,51 @@ 
+#include <sys/socket.h>
+#include <sys/un.h>
+
+void pid_ns_helper_socket_name(struct sockaddr_un *addr, socklen_t *len, unsigned int id)
+{
+	const char prefix[] = "0/criu-pid-ns-";
+	const char int_max[] = "2147483647";
+
+	*len = sizeof(*addr) - sizeof(addr->sun_path) +
+	       sizeof(prefix) - 1 + sizeof(int_max) - 1;
+
+	addr->sun_family = AF_UNIX;
+
+	memset(addr->sun_path + sizeof(prefix) - 1, '\0', sizeof(int_max) - 1);
+#ifdef CR_NOGLIBC
+	std_sprintf(addr->sun_path, "%s%d", prefix, id);
+#else
+	sprintf(addr->sun_path, "%s%d", prefix, id);
+#endif
+	addr->sun_path[0] = '\0';
+}
+
+/* Send helper a request to set next pid and receive success */
+int request_set_next_pid(int pid_ns_id, pid_t pid, int sk)
+{
+	struct sockaddr_un addr;
+	int answer, ret;
+	socklen_t len;
+
+	BUG_ON(pid == -1);
+
+	pid_ns_helper_socket_name(&addr, &len, pid_ns_id);
+	ret = __sys(sendto)(sk, &pid, sizeof(pid), 0, (struct sockaddr *)&addr, len);
+	if (ret	< 0) {
+		pr_err("Can't send request: err=%d\n", __sys_err(ret));
+		return -1;
+	}
+
+	ret = __sys(recvfrom)(sk, &answer, sizeof(answer), 0, NULL, NULL);
+	if (ret < 0) {
+		pr_err("Can't recv answer: err=%d\n", __sys_err(ret));
+		return -1;
+	}
+
+	if (answer != 0) {
+		pr_err("Error answer\n");
+		return -1;
+	}
+
+	return 0;
+}
diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c
index 030c7ff42..3b0b35710 100644
--- a/criu/pie/restorer.c
+++ b/criu/pie/restorer.c
@@ -47,6 +47,11 @@ 
 #include "restorer.h"
 #include "namespaces.h"
 
+#define __sys(foo)	sys_##foo
+#define __sys_err(ret)	ret
+
+#include "../ns-common.c"
+
 #ifndef PR_SET_PDEATHSIG
 #define PR_SET_PDEATHSIG 1
 #endif

Comments

Andrey Vagin May 5, 2017, 5:51 a.m.
On Thu, May 04, 2017 at 07:10:37PM +0300, Kirill Tkhai wrote:
> Task may set last_pid only for its active pid namespace,
> so if NSpid of a child contains more then one level, we
> need external help to populate the whole pid hierarhy
> (pid in parent pid_ns, pid in grand parent etc). Pid ns
> helpers are used for that.
> 
> These are childred of usernsd, which are listening for
> socket, and setting requested last pid in their active
> pid_ns.
> 
> v4: Move destroy_pid_ns_helpers() before CR_STATE_RESTORE_SIGCHLD
> change, as they must die before zombies.
> 
> v3: Block SIGCHLD during stoppinig of pid_ns helpers.
> 
> Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
> ---
>  criu/cr-restore.c         |    7 +
>  criu/include/namespaces.h |    3 +
>  criu/namespaces.c         |  253 +++++++++++++++++++++++++++++++++++++++++++++
>  criu/ns-common.c          |   51 +++++++++
>  criu/pie/restorer.c       |    5 +
>  5 files changed, 319 insertions(+)
>  create mode 100644 criu/ns-common.c
> 
> diff --git a/criu/cr-restore.c b/criu/cr-restore.c
> index 4c4ca37d7..3c35e5f08 100644
> --- a/criu/cr-restore.c
> +++ b/criu/cr-restore.c
> @@ -1605,6 +1605,8 @@ static int restore_task_with_children(void *_arg)
>  			pr_err("Can't add fd to fdstore\n");
>  			return -1;
>  		}
> +		if (create_pid_ns_helper(pid_ns) < 0)
> +			goto err;
>  	}
>  
>  	if (restore_task_mnt_ns(current))
> @@ -2038,6 +2040,10 @@ static int restore_root_task(struct pstree_item *init)
>  			task_entries->nr_threads--;
>  	}
>  
> +	ret = destroy_pid_ns_helpers();
> +	if (ret < 0)
> +		goto out_kill;
> +
>  	ret = restore_switch_stage(CR_STATE_RESTORE_SIGCHLD);
>  	if (ret < 0)
>  		goto out_kill;
> @@ -2141,6 +2147,7 @@ static int restore_root_task(struct pstree_item *init)
>  	return 0;
>  
>  out_kill:
> +	destroy_pid_ns_helpers();
>  	/*
>  	 * The processes can be killed only when all of them have been created,
>  	 * otherwise an external proccesses can be killed.
> diff --git a/criu/include/namespaces.h b/criu/include/namespaces.h
> index 37b65b0db..b81957668 100644
> --- a/criu/include/namespaces.h
> +++ b/criu/include/namespaces.h
> @@ -267,5 +267,8 @@ static inline int pid_ns_root_off(void)
>  	return 0;
>  }
>  extern int reserve_pid_ns_helpers(void);
> +extern int create_pid_ns_helper(struct ns_id *ns);
> +extern int destroy_pid_ns_helpers(void);
> +extern int request_set_next_pid(int pid_ns_id, pid_t pid, int sk);
>  
>  #endif /* __CR_NS_H__ */
> diff --git a/criu/namespaces.c b/criu/namespaces.c
> index 97ea2b0e6..f65f06003 100644
> --- a/criu/namespaces.c
> +++ b/criu/namespaces.c
> @@ -15,6 +15,7 @@
>  #include <errno.h>
>  #include <sys/ioctl.h>
>  #include <sys/ptrace.h>
> +#include <sys/file.h>
>  
>  #include "page.h"
>  #include "rst-malloc.h"
> @@ -38,6 +39,11 @@
>  #include "fdstore.h"
>  #include "proc_parse.h"
>  
> +#define __sys(foo)	foo
> +#define __sys_err(ret)	(-errno)
> +
> +#include "ns-common.c"
> +
>  static struct ns_desc *ns_desc_array[] = {
>  	&net_ns_desc,
>  	&uts_ns_desc,
> @@ -49,6 +55,8 @@ static struct ns_desc *ns_desc_array[] = {
>  };
>  
>  static unsigned int join_ns_flags;
> +/* Creation of every helper are synchronized by userns_sync_lock */
> +static int nr_pid_ns_helper_created = 0;
>  
>  int check_namespace_opts(void)
>  {
> @@ -2532,5 +2540,250 @@ int reserve_pid_ns_helpers(void)
>  	return walk_namespaces(&pid_ns_desc, do_reserve_pid_ns_helpers, NULL);
>  }
>  
> +static int pid_ns_helper_sock(struct ns_id *ns)
> +{
> +	struct sockaddr_un addr;
> +	socklen_t len;
> +	int sk;
> +
> +	sk = socket(AF_UNIX, SOCK_DGRAM, 0);
> +	if (sk < 0) {
> +		pr_perror("Can't create helper socket");
> +		return -1;
> +	}
> +	pid_ns_helper_socket_name(&addr, &len, ns->id);
> +
> +	if (bind(sk, (struct sockaddr *)&addr, len) < 0) {
> +		pr_perror("Can't bind pid_ns sock");
> +		return -1;
> +	}
> +
> +	return sk;
> +}
> +
> +static int pid_ns_helper(struct ns_id *ns, int sk)
> +{
> +	struct sockaddr_un addr;
> +	struct msghdr msg = {0};
> +	struct iovec iov;
> +	pid_t pid;
> +
> +	msg.msg_name = &addr;
> +	msg.msg_iov = &iov;
> +	msg.msg_iovlen = 1;
> +
> +	while (1) {
> +		int answer = 0;
> +		msg.msg_namelen = sizeof(addr);
> +		iov.iov_base = &pid;
> +		iov.iov_len = sizeof(pid);
> +
> +		if (recvmsg(sk, &msg, 0) < 0) {
> +			pr_perror("recv() failed to read pid");
> +			break;
> +		}
> +
> +		if (pid != 0) {
> +			if (__set_next_pid(pid) < 0) {
> +				pr_err("Can't set next pid\n");
> +				answer = -1;
> +			}
> +		}
> +
> +		iov.iov_base = &answer;
> +		iov.iov_len = sizeof(answer);
> +		if (sendmsg(sk, &msg, 0) < 0) {
> +			pr_perror("Can't send answer");
> +			break;
> +		}
> +
> +		if (pid == 0)
> +			return 0;
> +	}
> +
> +	return -1;
> +}
> +
> +static int do_create_pid_ns_helper(void *arg, int unused_fd, pid_t unused_pid)
> +{
> +	int pid_ns_fd, mnt_ns_fd, sk, fd, i, lock_fd, transport_fd;
> +	struct ns_id *ns, *tmp;
> +	struct pid *pid;
> +	pid_t child;
> +
> +	pid_ns_fd = open_proc(PROC_SELF, "ns/pid");
> +	if (pid_ns_fd < 0) {
> +		pr_perror("Can't open pid ns");
> +		return -1;
> +	}
> +	ns = *(struct ns_id **)arg;
> +
> +	fd = fdstore_get(ns->pid.nsfd_id);
> +	if (fd < 0) {
> +		pr_err("Can't get pid_ns fd\n");
> +		return -1;
> +	}
> +	if (setns(fd, CLONE_NEWPID) < 0) {
> +		pr_perror("Can't setns");
> +		return -1;
> +	}
> +	close(fd);
> +
> +	sk = pid_ns_helper_sock(ns);
> +	if (sk < 0)
> +		return -1;
> +
> +	pid = __pstree_pid_by_virt(ns, ns->ns_pid);
> +	if (!pid) {
> +		pr_err("Can't find helper reserved pid\n");
> +		return -1;
> +	}
> +
> +	tmp = ns->parent;
> +	if (tmp) {
> +		futex_t *f = &tmp->pid.helper_created;
> +		futex_wait_while_eq(f, 0);
> +	}
> +
> +	if (switch_ns(root_item->pid->real, &mnt_ns_desc, &mnt_ns_fd) < 0) {
> +		pr_err("Can't set mnt_ns\n");
> +		return -1;
> +	}
> +
> +	lock_fd = open("/proc/" LAST_PID_PATH, O_RDONLY);
> +	if (lock_fd < 0)
> +		return -1;
> +
> +	if (restore_ns(mnt_ns_fd, &mnt_ns_desc) < 0) {
> +		pr_err("Can't restore ns\n");
> +		return -1;
> +	}
> +
> +	if (flock(lock_fd, LOCK_EX)) {
> +		close(lock_fd);
> +		pr_perror("Can't lock %s", LAST_PID_PATH);
> +		return -1;
> +	}
> +
> +	transport_fd = get_service_fd(TRANSPORT_FD_OFF);
> +	/*
> +	 * Starting not from pid->level - 1, as it's helper has not created yet
> +	 * (we're creating it in the moment), and the true pid for this level
> +	 * is set by the task, who does close(CLONE_NEWPID) (this task is sender of fd).
> +	 */
> +	for (i = pid->level - 2, tmp = ns->parent; i >= 0; i--, tmp = tmp->parent)
> +		if (request_set_next_pid(tmp->id, pid->ns[i].virt, transport_fd)) {
> +			pr_err("Can't set next pid using helper\n");
> +			flock(lock_fd, LOCK_UN);
> +			close(lock_fd);
> +			return -1;
> +		}
> +	child = fork();
> +	if (child < 0) {
> +		flock(lock_fd, LOCK_UN);
> +		close(lock_fd);
> +		pr_perror("Can't fork");
> +		return -1;
> +	} else if (!child) {
> +		close(lock_fd);
> +		exit(pid_ns_helper(ns, sk));
> +	}
> +	close(sk);
> +	futex_set_and_wake(&ns->pid.helper_created, 1);
> +	flock(lock_fd, LOCK_UN);
> +	close(lock_fd);
> +	nr_pid_ns_helper_created++;
> +
> +	if (setns(pid_ns_fd, CLONE_NEWPID) < 0) {
> +		pr_perror("Restore ns");
> +		return -1;
> +	}
> +	return 0;
> +}
> +
> +/*
> + * Task may set last_pid only for its active pid namespace,
> + * so if NSpid of a child contains more then one level, we
> + * need external help to populate the whole pid hierarhy
> + * (pid in parent pid_ns, pid in grand parent etc). Pid ns
> + * helpers are used for that.
> + *
> + * We need a task or tasks to be a parent of pid_ns helpers.
> + * To live in common hierarhy and to be a TASK_HELPER is not
> + * possible, because it introduces circular dependencies.
> + * The same is to be children of criu main task, because
> + * we already have dependencies between it and root_item
> + * (NO more dependencies!). So, we choose usernsd for that:
> + * it always exists and have command interface.
> + */
> +int create_pid_ns_helper(struct ns_id *ns)
> +{
> +	BUG_ON(getpid() != INIT_PID);
> +
> +	if (__set_next_pid(ns->ns_pid) < 0) {
> +		pr_err("Can't set next fd\n");
> +		return -1;
> +	}
> +	if (userns_call(do_create_pid_ns_helper, 0, &ns, sizeof(ns), -1) < 0) {
> +		pr_err("Can't create pid_ns helper\n");
> +		return -1;
> +	}
> +	return 0;
> +}
> +
> +static int do_destroy_pid_ns_helper(void *arg, int fd, pid_t pid)
> +{
> +	int i, sk, status, sig_blocked = true, nr_ok = 0, ret = 0;
> +	sigset_t sig_mask;
> +	struct ns_id *ns;
> +
> +	if (!nr_pid_ns_helper_created)
> +		return 0;
> +
> +	if (block_sigmask(&sig_mask, SIGCHLD)) {
> +		sig_blocked = false;
> +		ret = -1;
> +	}
> +
> +	sk = get_service_fd(TRANSPORT_FD_OFF);
> +
> +	for (ns = ns_ids; ns; ns = ns->next) {
> +		if (ns->nd != &pid_ns_desc)
> +			continue;
> +		if (request_set_next_pid(ns->id, 0, sk) == 0)
> +			nr_ok++;
> +	}
> +
> +	if (nr_ok != nr_pid_ns_helper_created) {
> +		pr_err("Not all pid_ns helpers killed\n");
> +		ret = -1;
> +	}
> +
> +	for (i = 0; i < nr_ok; i++) {
> +		if (waitpid(-1, &status, 0) < 0) {
> +			pr_perror("Error during waiting pid_ns helper");
> +			ret = -1;
> +		}
> +	}
> +	nr_pid_ns_helper_created = 0;
> +
> +	if (sig_blocked && restore_sigmask(&sig_mask))
> +		ret = -1;
> +
> +	return ret;
> +}
> +
> +int destroy_pid_ns_helpers(void)
> +{
> +	if (!(root_ns_mask & CLONE_NEWPID))
> +		return 0;
> +
> +	if (userns_call(do_destroy_pid_ns_helper, 0, NULL, 0, -1) < 0) {
> +		pr_err("Can't create pid_ns helper\n");
> +		return -1;
> +	}
> +	return 0;
> +}
> +
>  struct ns_desc pid_ns_desc = NS_DESC_ENTRY(CLONE_NEWPID, "pid");
>  struct ns_desc user_ns_desc = NS_DESC_ENTRY(CLONE_NEWUSER, "user");
> diff --git a/criu/ns-common.c b/criu/ns-common.c
> new file mode 100644
> index 000000000..a8e28aa00
> --- /dev/null
> +++ b/criu/ns-common.c
> @@ -0,0 +1,51 @@
> +#include <sys/socket.h>
> +#include <sys/un.h>
> +
> +void pid_ns_helper_socket_name(struct sockaddr_un *addr, socklen_t *len, unsigned int id)
> +{
> +	const char prefix[] = "0/criu-pid-ns-";
> +	const char int_max[] = "2147483647";
> +
> +	*len = sizeof(*addr) - sizeof(addr->sun_path) +
> +	       sizeof(prefix) - 1 + sizeof(int_max) - 1;
> +
> +	addr->sun_family = AF_UNIX;
> +
> +	memset(addr->sun_path + sizeof(prefix) - 1, '\0', sizeof(int_max) - 1);
> +#ifdef CR_NOGLIBC
> +	std_sprintf(addr->sun_path, "%s%d", prefix, id);
> +#else
> +	sprintf(addr->sun_path, "%s%d", prefix, id);
> +#endif
> +	addr->sun_path[0] = '\0';
> +}
> +
> +/* Send helper a request to set next pid and receive success */
> +int request_set_next_pid(int pid_ns_id, pid_t pid, int sk)
> +{
> +	struct sockaddr_un addr;
> +	int answer, ret;
> +	socklen_t len;
> +
> +	BUG_ON(pid == -1);
> +
> +	pid_ns_helper_socket_name(&addr, &len, pid_ns_id);
> +	ret = __sys(sendto)(sk, &pid, sizeof(pid), 0, (struct sockaddr *)&addr, len);
> +	if (ret	< 0) {
> +		pr_err("Can't send request: err=%d\n", __sys_err(ret));
> +		return -1;
> +	}
> +
> +	ret = __sys(recvfrom)(sk, &answer, sizeof(answer), 0, NULL, NULL);
> +	if (ret < 0) {
> +		pr_err("Can't recv answer: err=%d\n", __sys_err(ret));
> +		return -1;
> +	}

criu hangs in this function in a error case.

[root@fc24 criu]# cat test/dump/zdtm/static/pty-console/31/1/restore.log  | grep -B 5 Error
(00.324568) uns: daemon calls 0x48eae0 (51, 8, 1)
(00.324583)      1: tty: Allocating fake descriptor for 0xb (reg_d 0x7f618453bff0)
(00.324605) uns: daemon calls 0x48eae0 (51, 8, 1)
(00.324639)      1: tty: Restore session 1 by 1 tty (index 0)
(00.324641)      1: Restoring resources
(00.324662)      1: Error (criu/tty.c:663): tty: Can't set sid on terminal fd 3: Operation not permitted


18064 pts/0    T      0:00  |           \_ python test/zdtm.py run -a --keep-going
20734 pts/0    T      0:00  |           |   \_ ./zdtm_ct zdtm.py
20737 pts/0    S      0:00  |           |       \_ python2 zdtm.py
20739 pts/0    T      0:00  |           |           \_ python2 zdtm.py
20784 pts/0    t      0:00  |           |               \_ ../criu/criu restore -o restore.log -D dump/zdtm/static/pty-console/31/1 -v4 --pidfile /root/git/criu/test/zdtm/static/pty-console.
20786 pts/0    S      0:00  |           |                   \_ ../criu/criu restore -o restore.log -D dump/zdtm/static/pty-console/31/1 -v4 --pidfile /root/git/criu/test/zdtm/static/pty-cons
20799 pts/0    Z      0:00  |           |                   |   \_ [criu] <defunct>
20801 pts/0    Z      0:00  |           |                   |   \_ [criu] <defunct>
20787 pts/0    S      0:00  |           |                   \_ [criu]
20800 pts/0    D      0:00  |           |                       \_ [criu]

[root@fc24 criu]# gdb -p 20786
(gdb) bt
#0  0x00007f6183ced9f3 in __recvfrom_nocancel () from target:/lib64/libpthread.so.0
#1  0x0000000000461662 in request_set_next_pid (pid_ns_id=<optimized out>, pid=<optimized out>, pid@entry=0, sk=sk@entry=1012) at criu/ns-common.c:39
#2  0x00000000004618ad in do_destroy_pid_ns_helper (pid=<optimized out>, fd=<optimized out>, arg=<optimized out>) at criu/namespaces.c:2753
#3  0x000000000042167e in usernsd (sk=5) at criu/namespaces.c:1600
#4  0x0000000000465a63 in start_usernsd () at criu/namespaces.c:1760
#5  prepare_namespace_before_tasks () at criu/namespaces.c:2431
#6  0x0000000000420746 in restore_root_task (init=0x7f618453b4d0) at criu/cr-restore.c:2118
#7  0x000000000043da13 in cr_restore_tasks () at criu/cr-restore.c:2433
#8  0x0000000000422928 in main (argc=<optimized out>, argv=0x7ffe63fe3d18, envp=<optimized out>) at criu/crtools.c:728

[root@fc24 criu]# gdb -p gdb -p 20784
(gdb) bt
#0  0x00007f6183cee2c7 in recvmsg () from target:/lib64/libpthread.so.0
#1  0x0000000000463c26 in __userns_call (func_name=func_name@entry=0x4dc101 "do_destroy_pid_ns_helper", call=<optimized out>, call@entry=0x461950 <do_destroy_pid_ns_helper>, 
    flags=<optimized out>, flags@entry=0, arg=arg@entry=0x0, arg_size=arg_size@entry=0, fd=fd@entry=-1) at criu/namespaces.c:1692
#2  0x0000000000465d40 in destroy_pid_ns_helpers () at criu/namespaces.c:2781
#3  0x0000000000420dd5 in restore_root_task (init=0x7f618453b4d0) at criu/cr-restore.c:2323
#4  0x000000000043da13 in cr_restore_tasks () at criu/cr-restore.c:2433
#5  0x0000000000422928 in main (argc=<optimized out>, argv=0x7ffe63fe3d18, envp=<optimized out>) at criu/crtools.c:728

> +
> +	if (answer != 0) {
> +		pr_err("Error answer\n");
> +		return -1;
> +	}
> +
> +	return 0;
> +}
> diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c
> index 030c7ff42..3b0b35710 100644
> --- a/criu/pie/restorer.c
> +++ b/criu/pie/restorer.c
> @@ -47,6 +47,11 @@
>  #include "restorer.h"
>  #include "namespaces.h"
>  
> +#define __sys(foo)	sys_##foo
> +#define __sys_err(ret)	ret
> +
> +#include "../ns-common.c"
> +
>  #ifndef PR_SET_PDEATHSIG
>  #define PR_SET_PDEATHSIG 1
>  #endif
>
Kirill Tkhai May 5, 2017, 3:35 p.m.
On 05.05.2017 08:51, Andrei Vagin wrote:
> On Thu, May 04, 2017 at 07:10:37PM +0300, Kirill Tkhai wrote:
>> Task may set last_pid only for its active pid namespace,
>> so if NSpid of a child contains more then one level, we
>> need external help to populate the whole pid hierarhy
>> (pid in parent pid_ns, pid in grand parent etc). Pid ns
>> helpers are used for that.
>>
>> These are childred of usernsd, which are listening for
>> socket, and setting requested last pid in their active
>> pid_ns.
>>
>> v4: Move destroy_pid_ns_helpers() before CR_STATE_RESTORE_SIGCHLD
>> change, as they must die before zombies.
>>
>> v3: Block SIGCHLD during stoppinig of pid_ns helpers.
>>
>> Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
>> ---
>>  criu/cr-restore.c         |    7 +
>>  criu/include/namespaces.h |    3 +
>>  criu/namespaces.c         |  253 +++++++++++++++++++++++++++++++++++++++++++++
>>  criu/ns-common.c          |   51 +++++++++
>>  criu/pie/restorer.c       |    5 +
>>  5 files changed, 319 insertions(+)
>>  create mode 100644 criu/ns-common.c
>>
>> diff --git a/criu/cr-restore.c b/criu/cr-restore.c
>> index 4c4ca37d7..3c35e5f08 100644
>> --- a/criu/cr-restore.c
>> +++ b/criu/cr-restore.c
>> @@ -1605,6 +1605,8 @@ static int restore_task_with_children(void *_arg)
>>  			pr_err("Can't add fd to fdstore\n");
>>  			return -1;
>>  		}
>> +		if (create_pid_ns_helper(pid_ns) < 0)
>> +			goto err;
>>  	}
>>  
>>  	if (restore_task_mnt_ns(current))
>> @@ -2038,6 +2040,10 @@ static int restore_root_task(struct pstree_item *init)
>>  			task_entries->nr_threads--;
>>  	}
>>  
>> +	ret = destroy_pid_ns_helpers();
>> +	if (ret < 0)
>> +		goto out_kill;
>> +
>>  	ret = restore_switch_stage(CR_STATE_RESTORE_SIGCHLD);
>>  	if (ret < 0)
>>  		goto out_kill;
>> @@ -2141,6 +2147,7 @@ static int restore_root_task(struct pstree_item *init)
>>  	return 0;
>>  
>>  out_kill:
>> +	destroy_pid_ns_helpers();
>>  	/*
>>  	 * The processes can be killed only when all of them have been created,
>>  	 * otherwise an external proccesses can be killed.
>> diff --git a/criu/include/namespaces.h b/criu/include/namespaces.h
>> index 37b65b0db..b81957668 100644
>> --- a/criu/include/namespaces.h
>> +++ b/criu/include/namespaces.h
>> @@ -267,5 +267,8 @@ static inline int pid_ns_root_off(void)
>>  	return 0;
>>  }
>>  extern int reserve_pid_ns_helpers(void);
>> +extern int create_pid_ns_helper(struct ns_id *ns);
>> +extern int destroy_pid_ns_helpers(void);
>> +extern int request_set_next_pid(int pid_ns_id, pid_t pid, int sk);
>>  
>>  #endif /* __CR_NS_H__ */
>> diff --git a/criu/namespaces.c b/criu/namespaces.c
>> index 97ea2b0e6..f65f06003 100644
>> --- a/criu/namespaces.c
>> +++ b/criu/namespaces.c
>> @@ -15,6 +15,7 @@
>>  #include <errno.h>
>>  #include <sys/ioctl.h>
>>  #include <sys/ptrace.h>
>> +#include <sys/file.h>
>>  
>>  #include "page.h"
>>  #include "rst-malloc.h"
>> @@ -38,6 +39,11 @@
>>  #include "fdstore.h"
>>  #include "proc_parse.h"
>>  
>> +#define __sys(foo)	foo
>> +#define __sys_err(ret)	(-errno)
>> +
>> +#include "ns-common.c"
>> +
>>  static struct ns_desc *ns_desc_array[] = {
>>  	&net_ns_desc,
>>  	&uts_ns_desc,
>> @@ -49,6 +55,8 @@ static struct ns_desc *ns_desc_array[] = {
>>  };
>>  
>>  static unsigned int join_ns_flags;
>> +/* Creation of every helper are synchronized by userns_sync_lock */
>> +static int nr_pid_ns_helper_created = 0;
>>  
>>  int check_namespace_opts(void)
>>  {
>> @@ -2532,5 +2540,250 @@ int reserve_pid_ns_helpers(void)
>>  	return walk_namespaces(&pid_ns_desc, do_reserve_pid_ns_helpers, NULL);
>>  }
>>  
>> +static int pid_ns_helper_sock(struct ns_id *ns)
>> +{
>> +	struct sockaddr_un addr;
>> +	socklen_t len;
>> +	int sk;
>> +
>> +	sk = socket(AF_UNIX, SOCK_DGRAM, 0);
>> +	if (sk < 0) {
>> +		pr_perror("Can't create helper socket");
>> +		return -1;
>> +	}
>> +	pid_ns_helper_socket_name(&addr, &len, ns->id);
>> +
>> +	if (bind(sk, (struct sockaddr *)&addr, len) < 0) {
>> +		pr_perror("Can't bind pid_ns sock");
>> +		return -1;
>> +	}
>> +
>> +	return sk;
>> +}
>> +
>> +static int pid_ns_helper(struct ns_id *ns, int sk)
>> +{
>> +	struct sockaddr_un addr;
>> +	struct msghdr msg = {0};
>> +	struct iovec iov;
>> +	pid_t pid;
>> +
>> +	msg.msg_name = &addr;
>> +	msg.msg_iov = &iov;
>> +	msg.msg_iovlen = 1;
>> +
>> +	while (1) {
>> +		int answer = 0;
>> +		msg.msg_namelen = sizeof(addr);
>> +		iov.iov_base = &pid;
>> +		iov.iov_len = sizeof(pid);
>> +
>> +		if (recvmsg(sk, &msg, 0) < 0) {
>> +			pr_perror("recv() failed to read pid");
>> +			break;
>> +		}
>> +
>> +		if (pid != 0) {
>> +			if (__set_next_pid(pid) < 0) {
>> +				pr_err("Can't set next pid\n");
>> +				answer = -1;
>> +			}
>> +		}
>> +
>> +		iov.iov_base = &answer;
>> +		iov.iov_len = sizeof(answer);
>> +		if (sendmsg(sk, &msg, 0) < 0) {
>> +			pr_perror("Can't send answer");
>> +			break;
>> +		}
>> +
>> +		if (pid == 0)
>> +			return 0;
>> +	}
>> +
>> +	return -1;
>> +}
>> +
>> +static int do_create_pid_ns_helper(void *arg, int unused_fd, pid_t unused_pid)
>> +{
>> +	int pid_ns_fd, mnt_ns_fd, sk, fd, i, lock_fd, transport_fd;
>> +	struct ns_id *ns, *tmp;
>> +	struct pid *pid;
>> +	pid_t child;
>> +
>> +	pid_ns_fd = open_proc(PROC_SELF, "ns/pid");
>> +	if (pid_ns_fd < 0) {
>> +		pr_perror("Can't open pid ns");
>> +		return -1;
>> +	}
>> +	ns = *(struct ns_id **)arg;
>> +
>> +	fd = fdstore_get(ns->pid.nsfd_id);
>> +	if (fd < 0) {
>> +		pr_err("Can't get pid_ns fd\n");
>> +		return -1;
>> +	}
>> +	if (setns(fd, CLONE_NEWPID) < 0) {
>> +		pr_perror("Can't setns");
>> +		return -1;
>> +	}
>> +	close(fd);
>> +
>> +	sk = pid_ns_helper_sock(ns);
>> +	if (sk < 0)
>> +		return -1;
>> +
>> +	pid = __pstree_pid_by_virt(ns, ns->ns_pid);
>> +	if (!pid) {
>> +		pr_err("Can't find helper reserved pid\n");
>> +		return -1;
>> +	}
>> +
>> +	tmp = ns->parent;
>> +	if (tmp) {
>> +		futex_t *f = &tmp->pid.helper_created;
>> +		futex_wait_while_eq(f, 0);
>> +	}
>> +
>> +	if (switch_ns(root_item->pid->real, &mnt_ns_desc, &mnt_ns_fd) < 0) {
>> +		pr_err("Can't set mnt_ns\n");
>> +		return -1;
>> +	}
>> +
>> +	lock_fd = open("/proc/" LAST_PID_PATH, O_RDONLY);
>> +	if (lock_fd < 0)
>> +		return -1;
>> +
>> +	if (restore_ns(mnt_ns_fd, &mnt_ns_desc) < 0) {
>> +		pr_err("Can't restore ns\n");
>> +		return -1;
>> +	}
>> +
>> +	if (flock(lock_fd, LOCK_EX)) {
>> +		close(lock_fd);
>> +		pr_perror("Can't lock %s", LAST_PID_PATH);
>> +		return -1;
>> +	}
>> +
>> +	transport_fd = get_service_fd(TRANSPORT_FD_OFF);
>> +	/*
>> +	 * Starting not from pid->level - 1, as it's helper has not created yet
>> +	 * (we're creating it in the moment), and the true pid for this level
>> +	 * is set by the task, who does close(CLONE_NEWPID) (this task is sender of fd).
>> +	 */
>> +	for (i = pid->level - 2, tmp = ns->parent; i >= 0; i--, tmp = tmp->parent)
>> +		if (request_set_next_pid(tmp->id, pid->ns[i].virt, transport_fd)) {
>> +			pr_err("Can't set next pid using helper\n");
>> +			flock(lock_fd, LOCK_UN);
>> +			close(lock_fd);
>> +			return -1;
>> +		}
>> +	child = fork();
>> +	if (child < 0) {
>> +		flock(lock_fd, LOCK_UN);
>> +		close(lock_fd);
>> +		pr_perror("Can't fork");
>> +		return -1;
>> +	} else if (!child) {
>> +		close(lock_fd);
>> +		exit(pid_ns_helper(ns, sk));
>> +	}
>> +	close(sk);
>> +	futex_set_and_wake(&ns->pid.helper_created, 1);
>> +	flock(lock_fd, LOCK_UN);
>> +	close(lock_fd);
>> +	nr_pid_ns_helper_created++;
>> +
>> +	if (setns(pid_ns_fd, CLONE_NEWPID) < 0) {
>> +		pr_perror("Restore ns");
>> +		return -1;
>> +	}
>> +	return 0;
>> +}
>> +
>> +/*
>> + * Task may set last_pid only for its active pid namespace,
>> + * so if NSpid of a child contains more then one level, we
>> + * need external help to populate the whole pid hierarhy
>> + * (pid in parent pid_ns, pid in grand parent etc). Pid ns
>> + * helpers are used for that.
>> + *
>> + * We need a task or tasks to be a parent of pid_ns helpers.
>> + * To live in common hierarhy and to be a TASK_HELPER is not
>> + * possible, because it introduces circular dependencies.
>> + * The same is to be children of criu main task, because
>> + * we already have dependencies between it and root_item
>> + * (NO more dependencies!). So, we choose usernsd for that:
>> + * it always exists and have command interface.
>> + */
>> +int create_pid_ns_helper(struct ns_id *ns)
>> +{
>> +	BUG_ON(getpid() != INIT_PID);
>> +
>> +	if (__set_next_pid(ns->ns_pid) < 0) {
>> +		pr_err("Can't set next fd\n");
>> +		return -1;
>> +	}
>> +	if (userns_call(do_create_pid_ns_helper, 0, &ns, sizeof(ns), -1) < 0) {
>> +		pr_err("Can't create pid_ns helper\n");
>> +		return -1;
>> +	}
>> +	return 0;
>> +}
>> +
>> +static int do_destroy_pid_ns_helper(void *arg, int fd, pid_t pid)
>> +{
>> +	int i, sk, status, sig_blocked = true, nr_ok = 0, ret = 0;
>> +	sigset_t sig_mask;
>> +	struct ns_id *ns;
>> +
>> +	if (!nr_pid_ns_helper_created)
>> +		return 0;
>> +
>> +	if (block_sigmask(&sig_mask, SIGCHLD)) {
>> +		sig_blocked = false;
>> +		ret = -1;
>> +	}
>> +
>> +	sk = get_service_fd(TRANSPORT_FD_OFF);
>> +
>> +	for (ns = ns_ids; ns; ns = ns->next) {
>> +		if (ns->nd != &pid_ns_desc)
>> +			continue;
>> +		if (request_set_next_pid(ns->id, 0, sk) == 0)
>> +			nr_ok++;
>> +	}
>> +
>> +	if (nr_ok != nr_pid_ns_helper_created) {
>> +		pr_err("Not all pid_ns helpers killed\n");
>> +		ret = -1;
>> +	}
>> +
>> +	for (i = 0; i < nr_ok; i++) {
>> +		if (waitpid(-1, &status, 0) < 0) {
>> +			pr_perror("Error during waiting pid_ns helper");
>> +			ret = -1;
>> +		}
>> +	}
>> +	nr_pid_ns_helper_created = 0;
>> +
>> +	if (sig_blocked && restore_sigmask(&sig_mask))
>> +		ret = -1;
>> +
>> +	return ret;
>> +}
>> +
>> +int destroy_pid_ns_helpers(void)
>> +{
>> +	if (!(root_ns_mask & CLONE_NEWPID))
>> +		return 0;
>> +
>> +	if (userns_call(do_destroy_pid_ns_helper, 0, NULL, 0, -1) < 0) {
>> +		pr_err("Can't create pid_ns helper\n");
>> +		return -1;
>> +	}
>> +	return 0;
>> +}
>> +
>>  struct ns_desc pid_ns_desc = NS_DESC_ENTRY(CLONE_NEWPID, "pid");
>>  struct ns_desc user_ns_desc = NS_DESC_ENTRY(CLONE_NEWUSER, "user");
>> diff --git a/criu/ns-common.c b/criu/ns-common.c
>> new file mode 100644
>> index 000000000..a8e28aa00
>> --- /dev/null
>> +++ b/criu/ns-common.c
>> @@ -0,0 +1,51 @@
>> +#include <sys/socket.h>
>> +#include <sys/un.h>
>> +
>> +void pid_ns_helper_socket_name(struct sockaddr_un *addr, socklen_t *len, unsigned int id)
>> +{
>> +	const char prefix[] = "0/criu-pid-ns-";
>> +	const char int_max[] = "2147483647";
>> +
>> +	*len = sizeof(*addr) - sizeof(addr->sun_path) +
>> +	       sizeof(prefix) - 1 + sizeof(int_max) - 1;
>> +
>> +	addr->sun_family = AF_UNIX;
>> +
>> +	memset(addr->sun_path + sizeof(prefix) - 1, '\0', sizeof(int_max) - 1);
>> +#ifdef CR_NOGLIBC
>> +	std_sprintf(addr->sun_path, "%s%d", prefix, id);
>> +#else
>> +	sprintf(addr->sun_path, "%s%d", prefix, id);
>> +#endif
>> +	addr->sun_path[0] = '\0';
>> +}
>> +
>> +/* Send helper a request to set next pid and receive success */
>> +int request_set_next_pid(int pid_ns_id, pid_t pid, int sk)
>> +{
>> +	struct sockaddr_un addr;
>> +	int answer, ret;
>> +	socklen_t len;
>> +
>> +	BUG_ON(pid == -1);
>> +
>> +	pid_ns_helper_socket_name(&addr, &len, pid_ns_id);
>> +	ret = __sys(sendto)(sk, &pid, sizeof(pid), 0, (struct sockaddr *)&addr, len);
>> +	if (ret	< 0) {
>> +		pr_err("Can't send request: err=%d\n", __sys_err(ret));
>> +		return -1;
>> +	}
>> +
>> +	ret = __sys(recvfrom)(sk, &answer, sizeof(answer), 0, NULL, NULL);
>> +	if (ret < 0) {
>> +		pr_err("Can't recv answer: err=%d\n", __sys_err(ret));
>> +		return -1;
>> +	}
> 
> criu hangs in this function in a error case.

I'll find a solution for this case, but not in next v5.
v5 is just for transport sockets problem resolution.
 
> [root@fc24 criu]# cat test/dump/zdtm/static/pty-console/31/1/restore.log  | grep -B 5 Error
> (00.324568) uns: daemon calls 0x48eae0 (51, 8, 1)
> (00.324583)      1: tty: Allocating fake descriptor for 0xb (reg_d 0x7f618453bff0)
> (00.324605) uns: daemon calls 0x48eae0 (51, 8, 1)
> (00.324639)      1: tty: Restore session 1 by 1 tty (index 0)
> (00.324641)      1: Restoring resources
> (00.324662)      1: Error (criu/tty.c:663): tty: Can't set sid on terminal fd 3: Operation not permitted
> 
> 
> 18064 pts/0    T      0:00  |           \_ python test/zdtm.py run -a --keep-going
> 20734 pts/0    T      0:00  |           |   \_ ./zdtm_ct zdtm.py
> 20737 pts/0    S      0:00  |           |       \_ python2 zdtm.py
> 20739 pts/0    T      0:00  |           |           \_ python2 zdtm.py
> 20784 pts/0    t      0:00  |           |               \_ ../criu/criu restore -o restore.log -D dump/zdtm/static/pty-console/31/1 -v4 --pidfile /root/git/criu/test/zdtm/static/pty-console.
> 20786 pts/0    S      0:00  |           |                   \_ ../criu/criu restore -o restore.log -D dump/zdtm/static/pty-console/31/1 -v4 --pidfile /root/git/criu/test/zdtm/static/pty-cons
> 20799 pts/0    Z      0:00  |           |                   |   \_ [criu] <defunct>
> 20801 pts/0    Z      0:00  |           |                   |   \_ [criu] <defunct>
> 20787 pts/0    S      0:00  |           |                   \_ [criu]
> 20800 pts/0    D      0:00  |           |                       \_ [criu]
> 
> [root@fc24 criu]# gdb -p 20786
> (gdb) bt
> #0  0x00007f6183ced9f3 in __recvfrom_nocancel () from target:/lib64/libpthread.so.0
> #1  0x0000000000461662 in request_set_next_pid (pid_ns_id=<optimized out>, pid=<optimized out>, pid@entry=0, sk=sk@entry=1012) at criu/ns-common.c:39
> #2  0x00000000004618ad in do_destroy_pid_ns_helper (pid=<optimized out>, fd=<optimized out>, arg=<optimized out>) at criu/namespaces.c:2753
> #3  0x000000000042167e in usernsd (sk=5) at criu/namespaces.c:1600
> #4  0x0000000000465a63 in start_usernsd () at criu/namespaces.c:1760
> #5  prepare_namespace_before_tasks () at criu/namespaces.c:2431
> #6  0x0000000000420746 in restore_root_task (init=0x7f618453b4d0) at criu/cr-restore.c:2118
> #7  0x000000000043da13 in cr_restore_tasks () at criu/cr-restore.c:2433
> #8  0x0000000000422928 in main (argc=<optimized out>, argv=0x7ffe63fe3d18, envp=<optimized out>) at criu/crtools.c:728
> 
> [root@fc24 criu]# gdb -p gdb -p 20784
> (gdb) bt
> #0  0x00007f6183cee2c7 in recvmsg () from target:/lib64/libpthread.so.0
> #1  0x0000000000463c26 in __userns_call (func_name=func_name@entry=0x4dc101 "do_destroy_pid_ns_helper", call=<optimized out>, call@entry=0x461950 <do_destroy_pid_ns_helper>, 
>     flags=<optimized out>, flags@entry=0, arg=arg@entry=0x0, arg_size=arg_size@entry=0, fd=fd@entry=-1) at criu/namespaces.c:1692
> #2  0x0000000000465d40 in destroy_pid_ns_helpers () at criu/namespaces.c:2781
> #3  0x0000000000420dd5 in restore_root_task (init=0x7f618453b4d0) at criu/cr-restore.c:2323
> #4  0x000000000043da13 in cr_restore_tasks () at criu/cr-restore.c:2433
> #5  0x0000000000422928 in main (argc=<optimized out>, argv=0x7ffe63fe3d18, envp=<optimized out>) at criu/crtools.c:728
> 
>> +
>> +	if (answer != 0) {
>> +		pr_err("Error answer\n");
>> +		return -1;
>> +	}
>> +
>> +	return 0;
>> +}
>> diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c
>> index 030c7ff42..3b0b35710 100644
>> --- a/criu/pie/restorer.c
>> +++ b/criu/pie/restorer.c
>> @@ -47,6 +47,11 @@
>>  #include "restorer.h"
>>  #include "namespaces.h"
>>  
>> +#define __sys(foo)	sys_##foo
>> +#define __sys_err(ret)	ret
>> +
>> +#include "../ns-common.c"
>> +
>>  #ifndef PR_SET_PDEATHSIG
>>  #define PR_SET_PDEATHSIG 1
>>  #endif
>>