[02/10] fdstore: add a storage for file descriptors (v2)

Submitted by Andrei Vagin on Feb. 13, 2017, 5:49 a.m.

Details

Message ID 1486964960-4872-3-git-send-email-avagin@openvz.org
State New
Series "Dump and restore nested network namespaces"
Headers show

Commit Message

Andrei Vagin Feb. 13, 2017, 5:49 a.m.
From: Andrei Vagin <avagin@virtuozzo.com>

We need a storage for file descriptors which is shared between processes
and doesn't use a lot of file descriptors. We are going to use it on
restore and if it will use file descriptors, we will have to find
descriptors which don't used by all restored processes to not confilict
with their descriptors.

There are two solutions. The first one is a service (process) which
handles to command push_fd(id, fd) and pop_fd(id, fd).

Another solution is to save descriptros in a unix socket.  It requires
only one extra descriptor which we can register as a service fd. Each
unix socket has a buffer and can fit a number of file descriptros. We
can use SK_PEEK_OFF and MSG_PEEK to get file descriptros from a socket
as many times as we need.

This patch implements the second solution.

v2: call recvmsg with MSG_PEEK
Signed-off-by: Andrei Vagin <avagin@virtuozzo.com>
---
 criu/Makefile.crtools    |   1 +
 criu/cr-restore.c        |   6 +++
 criu/fdstore.c           | 118 +++++++++++++++++++++++++++++++++++++++++++++++
 criu/include/fdstore.h   |  17 +++++++
 criu/include/servicefd.h |   1 +
 5 files changed, 143 insertions(+)
 create mode 100644 criu/fdstore.c
 create mode 100644 criu/include/fdstore.h

Patch hide | download patch | download mbox

diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools
index e095d01..afb22c5 100644
--- a/criu/Makefile.crtools
+++ b/criu/Makefile.crtools
@@ -81,6 +81,7 @@  obj-y			+= uts_ns.o
 obj-y			+= path.o
 obj-y			+= autofs.o
 obj-y			+= uffd.o
+obj-y			+= fdstore.o
 
 ifeq ($(VDSO),y)
 obj-y			+= pie-util-vdso.o
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index 7ccb136..07b7ae0 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -78,6 +78,7 @@ 
 #include "fault-injection.h"
 #include "sk-queue.h"
 #include "sigframe.h"
+#include "fdstore.h"
 
 #include "parasite-syscall.h"
 #include "files-reg.h"
@@ -1396,6 +1397,10 @@  static int restore_task_with_children(void *_arg)
 
 	/* Restore root task */
 	if (current->parent == NULL) {
+
+		if (fdstore_init())
+			goto err;
+
 		if (join_namespaces()) {
 			pr_perror("Join namespaces failed");
 			goto err;
@@ -3194,6 +3199,7 @@  static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
 	close_proc();
 	close_service_fd(ROOT_FD_OFF);
 	close_service_fd(USERNSD_SK);
+	close_service_fd(FDSTORE_SK_OFF);
 
 	__gcov_flush();
 
diff --git a/criu/fdstore.c b/criu/fdstore.c
new file mode 100644
index 0000000..d9bed4d
--- /dev/null
+++ b/criu/fdstore.c
@@ -0,0 +1,118 @@ 
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <stdio.h>
+
+#include "common/scm.h"
+#include "common/lock.h"
+#include "servicefd.h"
+#include "fdstore.h"
+#include "xmalloc.h"
+#include "rst-malloc.h"
+#include "log.h"
+
+static struct fdstore_desc {
+	int next_id;
+	mutex_t lock; /* to protect a peek offset */
+} *desc;
+
+int fdstore_init(void)
+{
+	struct sockaddr_un addr;
+	unsigned int addrlen;
+	struct stat st;
+	int sk, ret;
+
+	desc = shmalloc(sizeof(*desc));
+	if (!desc)
+		return -1;
+
+	desc->next_id = 0;
+	mutex_init(&desc->lock);
+
+	sk = socket(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0);
+	if (sk < 0) {
+		pr_perror("Unable to create a socket");
+		return -1;
+	}
+
+	if (fstat(sk, &st)) {
+		pr_perror("Unable to stat a file descriptor");
+		close(sk);
+		return -1;
+	}
+
+	addr.sun_family = AF_UNIX;
+	addrlen = snprintf(addr.sun_path, sizeof(addr.sun_path), "X/criu-fdstore-%"PRIx64, st.st_ino);
+	addrlen += sizeof(addr.sun_family);
+
+	addr.sun_path[0] = 0;
+
+	/*
+	 * This socket is connected to itself, so all messages are queued to
+	 * its receive queue. Here we are going to use this socket to store
+	 * file descriptors. For that we need to send a file descriptor in
+	 * a queue and remeber its sequence number. Then we can set SO_PEEK_OFF
+	 * to get a file descriptor without dequeuing it.
+	 */
+	if (bind(sk, (struct sockaddr *) &addr, addrlen)) {
+		pr_perror("Unable to bind a socket");
+		close(sk);
+		return -1;
+	}
+	if (connect(sk, (struct sockaddr *) &addr, addrlen)) {
+		pr_perror("Unable to connect a socket");
+		close(sk);
+		return -1;
+	}
+
+	ret = install_service_fd(FDSTORE_SK_OFF, sk);
+	close(sk);
+	if (ret < 0)
+		return -1;
+
+	return 0;
+}
+
+int fdstore_add(int fd)
+{
+	int sk = get_service_fd(FDSTORE_SK_OFF);
+	int id;
+
+	mutex_lock(&desc->lock);
+
+	if (send_fd(sk, NULL, 0, fd)) {
+		mutex_unlock(&desc->lock);
+		return -1;
+	}
+
+	id = desc->next_id++;
+
+	mutex_unlock(&desc->lock);
+
+	return id;
+}
+
+int fdstore_get(int id)
+{
+	int sk = get_service_fd(FDSTORE_SK_OFF);
+	int fd;
+
+	mutex_lock(&desc->lock);
+	if (setsockopt(sk, SOL_SOCKET, SO_PEEK_OFF, &id, sizeof(id))) {
+		mutex_unlock(&desc->lock);
+		pr_perror("Unable to a peek offset");
+		return -1;
+	}
+
+	if (__recv_fds(sk, &fd, 1, NULL, 0, MSG_PEEK) < 0) {
+		mutex_unlock(&desc->lock);
+		pr_perror("Unable to get a file descriptor with the %d id", id);
+		return -1;
+	}
+	mutex_unlock(&desc->lock);
+
+	return fd;
+}
diff --git a/criu/include/fdstore.h b/criu/include/fdstore.h
new file mode 100644
index 0000000..bdfb5fe
--- /dev/null
+++ b/criu/include/fdstore.h
@@ -0,0 +1,17 @@ 
+#ifndef __CRIU_FDSTORE_H__
+#define __CRIU_FDSTORE_H__
+
+/*
+ * fdstore is a storage for file descriptors which is shared
+ * between processes.
+ */
+
+int fdstore_init(void);
+
+/* Add a file descriptor to the storage and return its id */
+int fdstore_add(int fd);
+
+/* Get a file descriptor from a storage by id */
+int fdstore_get(int id);
+
+#endif
diff --git a/criu/include/servicefd.h b/criu/include/servicefd.h
index 5152fb6..c070480 100644
--- a/criu/include/servicefd.h
+++ b/criu/include/servicefd.h
@@ -21,6 +21,7 @@  enum sfd_type {
 	NS_FD_OFF,	/* Node's net namespace fd */
 	TRANSPORT_FD_OFF, /* to transfer file descriptors */
 	LAZY_PAGES_SK_OFF, /* socket for communication with lazy-pages daemon */
+	FDSTORE_SK_OFF, /* socket to store file descriptors */
 
 	SERVICE_FD_MAX
 };

Comments

Pavel Emelianov Feb. 13, 2017, 1:01 p.m.
On 02/13/2017 08:49 AM, Andrei Vagin wrote:
> From: Andrei Vagin <avagin@virtuozzo.com>
> 
> We need a storage for file descriptors which is shared between processes
> and doesn't use a lot of file descriptors. We are going to use it on
> restore and if it will use file descriptors, we will have to find
> descriptors which don't used by all restored processes to not confilict
> with their descriptors.
> 
> There are two solutions. The first one is a service (process) which
> handles to command push_fd(id, fd) and pop_fd(id, fd).
> 
> Another solution is to save descriptros in a unix socket.  It requires
> only one extra descriptor which we can register as a service fd. Each
> unix socket has a buffer and can fit a number of file descriptros. We
> can use SK_PEEK_OFF and MSG_PEEK to get file descriptros from a socket
> as many times as we need.
> 
> This patch implements the second solution.
> 
> v2: call recvmsg with MSG_PEEK

v3: Add synchronization?

Anyway, would you please rebase this on currend criu-dev, it already has
the fdstore patch merged in its v2 incarnation :)

> Signed-off-by: Andrei Vagin <avagin@virtuozzo.com>
> ---
>  criu/Makefile.crtools    |   1 +
>  criu/cr-restore.c        |   6 +++
>  criu/fdstore.c           | 118 +++++++++++++++++++++++++++++++++++++++++++++++
>  criu/include/fdstore.h   |  17 +++++++
>  criu/include/servicefd.h |   1 +
>  5 files changed, 143 insertions(+)
>  create mode 100644 criu/fdstore.c
>  create mode 100644 criu/include/fdstore.h
> 
> diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools
> index e095d01..afb22c5 100644
> --- a/criu/Makefile.crtools
> +++ b/criu/Makefile.crtools
> @@ -81,6 +81,7 @@ obj-y			+= uts_ns.o
>  obj-y			+= path.o
>  obj-y			+= autofs.o
>  obj-y			+= uffd.o
> +obj-y			+= fdstore.o
>  
>  ifeq ($(VDSO),y)
>  obj-y			+= pie-util-vdso.o
> diff --git a/criu/cr-restore.c b/criu/cr-restore.c
> index 7ccb136..07b7ae0 100644
> --- a/criu/cr-restore.c
> +++ b/criu/cr-restore.c
> @@ -78,6 +78,7 @@
>  #include "fault-injection.h"
>  #include "sk-queue.h"
>  #include "sigframe.h"
> +#include "fdstore.h"
>  
>  #include "parasite-syscall.h"
>  #include "files-reg.h"
> @@ -1396,6 +1397,10 @@ static int restore_task_with_children(void *_arg)
>  
>  	/* Restore root task */
>  	if (current->parent == NULL) {
> +
> +		if (fdstore_init())
> +			goto err;
> +
>  		if (join_namespaces()) {
>  			pr_perror("Join namespaces failed");
>  			goto err;
> @@ -3194,6 +3199,7 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
>  	close_proc();
>  	close_service_fd(ROOT_FD_OFF);
>  	close_service_fd(USERNSD_SK);
> +	close_service_fd(FDSTORE_SK_OFF);
>  
>  	__gcov_flush();
>  
> diff --git a/criu/fdstore.c b/criu/fdstore.c
> new file mode 100644
> index 0000000..d9bed4d
> --- /dev/null
> +++ b/criu/fdstore.c
> @@ -0,0 +1,118 @@
> +#include <sys/types.h>
> +#include <sys/socket.h>
> +#include <sys/un.h>
> +#include <sys/stat.h>
> +#include <unistd.h>
> +#include <stdio.h>
> +
> +#include "common/scm.h"
> +#include "common/lock.h"
> +#include "servicefd.h"
> +#include "fdstore.h"
> +#include "xmalloc.h"
> +#include "rst-malloc.h"
> +#include "log.h"
> +
> +static struct fdstore_desc {
> +	int next_id;
> +	mutex_t lock; /* to protect a peek offset */
> +} *desc;
> +
> +int fdstore_init(void)
> +{
> +	struct sockaddr_un addr;
> +	unsigned int addrlen;
> +	struct stat st;
> +	int sk, ret;
> +
> +	desc = shmalloc(sizeof(*desc));
> +	if (!desc)
> +		return -1;
> +
> +	desc->next_id = 0;
> +	mutex_init(&desc->lock);
> +
> +	sk = socket(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0);
> +	if (sk < 0) {
> +		pr_perror("Unable to create a socket");
> +		return -1;
> +	}
> +
> +	if (fstat(sk, &st)) {
> +		pr_perror("Unable to stat a file descriptor");
> +		close(sk);
> +		return -1;
> +	}
> +
> +	addr.sun_family = AF_UNIX;
> +	addrlen = snprintf(addr.sun_path, sizeof(addr.sun_path), "X/criu-fdstore-%"PRIx64, st.st_ino);
> +	addrlen += sizeof(addr.sun_family);
> +
> +	addr.sun_path[0] = 0;
> +
> +	/*
> +	 * This socket is connected to itself, so all messages are queued to
> +	 * its receive queue. Here we are going to use this socket to store
> +	 * file descriptors. For that we need to send a file descriptor in
> +	 * a queue and remeber its sequence number. Then we can set SO_PEEK_OFF
> +	 * to get a file descriptor without dequeuing it.
> +	 */
> +	if (bind(sk, (struct sockaddr *) &addr, addrlen)) {
> +		pr_perror("Unable to bind a socket");
> +		close(sk);
> +		return -1;
> +	}
> +	if (connect(sk, (struct sockaddr *) &addr, addrlen)) {
> +		pr_perror("Unable to connect a socket");
> +		close(sk);
> +		return -1;
> +	}
> +
> +	ret = install_service_fd(FDSTORE_SK_OFF, sk);
> +	close(sk);
> +	if (ret < 0)
> +		return -1;
> +
> +	return 0;
> +}
> +
> +int fdstore_add(int fd)
> +{
> +	int sk = get_service_fd(FDSTORE_SK_OFF);
> +	int id;
> +
> +	mutex_lock(&desc->lock);
> +
> +	if (send_fd(sk, NULL, 0, fd)) {
> +		mutex_unlock(&desc->lock);
> +		return -1;
> +	}
> +
> +	id = desc->next_id++;
> +
> +	mutex_unlock(&desc->lock);
> +
> +	return id;
> +}
> +
> +int fdstore_get(int id)
> +{
> +	int sk = get_service_fd(FDSTORE_SK_OFF);
> +	int fd;
> +
> +	mutex_lock(&desc->lock);
> +	if (setsockopt(sk, SOL_SOCKET, SO_PEEK_OFF, &id, sizeof(id))) {
> +		mutex_unlock(&desc->lock);
> +		pr_perror("Unable to a peek offset");
> +		return -1;
> +	}
> +
> +	if (__recv_fds(sk, &fd, 1, NULL, 0, MSG_PEEK) < 0) {
> +		mutex_unlock(&desc->lock);
> +		pr_perror("Unable to get a file descriptor with the %d id", id);
> +		return -1;
> +	}
> +	mutex_unlock(&desc->lock);
> +
> +	return fd;
> +}
> diff --git a/criu/include/fdstore.h b/criu/include/fdstore.h
> new file mode 100644
> index 0000000..bdfb5fe
> --- /dev/null
> +++ b/criu/include/fdstore.h
> @@ -0,0 +1,17 @@
> +#ifndef __CRIU_FDSTORE_H__
> +#define __CRIU_FDSTORE_H__
> +
> +/*
> + * fdstore is a storage for file descriptors which is shared
> + * between processes.
> + */
> +
> +int fdstore_init(void);
> +
> +/* Add a file descriptor to the storage and return its id */
> +int fdstore_add(int fd);
> +
> +/* Get a file descriptor from a storage by id */
> +int fdstore_get(int id);
> +
> +#endif
> diff --git a/criu/include/servicefd.h b/criu/include/servicefd.h
> index 5152fb6..c070480 100644
> --- a/criu/include/servicefd.h
> +++ b/criu/include/servicefd.h
> @@ -21,6 +21,7 @@ enum sfd_type {
>  	NS_FD_OFF,	/* Node's net namespace fd */
>  	TRANSPORT_FD_OFF, /* to transfer file descriptors */
>  	LAZY_PAGES_SK_OFF, /* socket for communication with lazy-pages daemon */
> +	FDSTORE_SK_OFF, /* socket to store file descriptors */
>  
>  	SERVICE_FD_MAX
>  };
>