[16/17] unix: Add support of ghost sockets

Submitted by Cyrill Gorcunov on April 27, 2018, 11:35 a.m.

Details

Message ID 20180427113505.20616-17-gorcunov@gmail.com
State New
Series "unix: Add support for ghost unix sockets"
Headers show

Commit Message

Cyrill Gorcunov April 27, 2018, 11:35 a.m.
Unix sockets may be connected via deleted socket name,
moreover the name may be reused (ie same sun_addr but
different inodes).

To be able to handle them we do a few tricks:

 - when collecting sockets we figure out if "deleted"
   mark is present on the socket and if such we rename
   it to a new unique name (~criu-%u format)

 - to not deal with deleted subdirectories and such we put
   this socket inside a current working directory then
   save the descriptor in fdstore engine

 - on restore we connect via procfs/fd/X as suggested by
   Andrew Vagin

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
---
 criu/cr-restore.c      |   4 +
 criu/include/sockets.h |   1 +
 criu/sk-unix.c         | 238 +++++++++++++++++++++++++++++++++++++------------
 3 files changed, 187 insertions(+), 56 deletions(-)

Patch hide | download patch | download mbox

diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index db913b2dae2e..ff1e4dcc34df 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -388,6 +388,10 @@  static int root_prepare_shared(void)
 	if (ret)
 		goto err;
 
+	ret = unix_resolve_ghost_addr();
+	if (ret)
+		goto err;
+
 	show_saved_files();
 err:
 	return ret;
diff --git a/criu/include/sockets.h b/criu/include/sockets.h
index db330428850c..23f5b11c1b58 100644
--- a/criu/include/sockets.h
+++ b/criu/include/sockets.h
@@ -60,6 +60,7 @@  extern int netlink_receive_one(struct nlmsghdr *hdr, struct ns_id *ns, void *arg
 
 extern int unix_sk_id_add(unsigned int ino);
 extern int unix_sk_ids_parse(char *optarg);
+extern int unix_resolve_ghost_addr(void);
 
 extern int do_dump_opt(int sk, int level, int name, void *val, int len);
 #define dump_opt(s, l, n, f)	do_dump_opt(s, l, n, f, sizeof(*f))
diff --git a/criu/sk-unix.c b/criu/sk-unix.c
index f6d3500df4a1..03aca89eee18 100644
--- a/criu/sk-unix.c
+++ b/criu/sk-unix.c
@@ -31,6 +31,7 @@ 
 #include "fdstore.h"
 #include "fdinfo.h"
 #include "kerndat.h"
+#include "rst-malloc.h"
 
 #include "protobuf.h"
 #include "images/sk-unix.pb-c.h"
@@ -90,10 +91,14 @@  struct unix_sk_desc {
 };
 
 static LIST_HEAD(unix_sockets);
+static LIST_HEAD(unix_ghost_addr);
 
 static int unix_resolve_name(int lfd, uint32_t id, struct unix_sk_desc *d,
 			     UnixSkEntry *ue, const struct fd_parms *p);
 
+struct unix_sk_info;
+static void unlink_sk(struct unix_sk_info *ui);
+
 struct unix_sk_listen_icon {
 	unsigned int			peer_ino;
 	struct unix_sk_desc		*sk_desc;
@@ -886,12 +891,14 @@  struct unix_sk_info {
 	char			*name;
 	char			*name_dir;
 	unsigned		flags;
+	int			fdstore_id;
 	struct unix_sk_info	*peer;
 	struct pprep_head	peer_resolve; /* XXX : union with the above? */
 	struct file_desc	d;
 	struct list_head	connected; /* List of sockets, connected to me */
 	struct list_head	node; /* To link in peer's connected list  */
 	struct list_head	scm_fles;
+	struct list_head	ghost_node;
 
 	/*
 	 * For DGRAM sockets with queues, we should only restore the queue
@@ -916,6 +923,7 @@  struct scm_fle {
 
 #define USK_PAIR_MASTER		0x1
 #define USK_PAIR_SLAVE		0x2
+#define USK_GHOST_FDSTORE	0x4
 
 static struct unix_sk_info *find_unix_sk_by_ino(int ino)
 {
@@ -1241,6 +1249,7 @@  static int prep_unix_sk_cwd(struct unix_sk_info *ui, int *prev_cwd_fd,
 
 static int post_open_standalone(struct file_desc *d, int fd)
 {
+	int fdstore_fd = -1, procfs_self_dir = -1, len;
 	struct unix_sk_info *ui;
 	struct unix_sk_info *peer;
 	struct sockaddr_un addr;
@@ -1269,22 +1278,43 @@  static int post_open_standalone(struct file_desc *d, int fd)
 
 	memset(&addr, 0, sizeof(addr));
 	addr.sun_family = AF_UNIX;
-	memcpy(&addr.sun_path, peer->name, peer->ue->name.len);
 
 	pr_info("\tConnect %#x to %#x\n", ui->ue->ino, peer->ue->ino);
 
 	if (prep_unix_sk_cwd(peer, &cwd_fd, NULL, &ns_fd))
 		return -1;
 
-	if (connect(fd, (struct sockaddr *)&addr,
-				sizeof(addr.sun_family) +
-				peer->ue->name.len) < 0) {
+	if (peer->flags & USK_GHOST_FDSTORE) {
+		procfs_self_dir = open_proc(PROC_SELF, "fd");
+		fdstore_fd = fdstore_get(peer->fdstore_id);
+
+		if (fdstore_fd < 0 || procfs_self_dir < 0)
+			goto err_revert_and_exit;
+
+
+		/*
+		 * WARNING: After this call we rely on revert_unix_sk_cwd
+		 * to restore the former directories so that connect
+		 * will operate inside proc/self/fd/X.
+		 */
+		if (fchdir(procfs_self_dir)) {
+			pr_perror("Can't change to procfs/self");
+			goto err_revert_and_exit;
+		}
+		len = snprintf(addr.sun_path, UNIX_PATH_MAX, "%d", fdstore_fd);
+	} else {
+		memcpy(&addr.sun_path, peer->name, peer->ue->name.len);
+		len = peer->ue->name.len;
+	}
+
+	if (connect(fd, (struct sockaddr *)&addr, sizeof(addr.sun_family) + len) < 0) {
 		pr_perror("Can't connect %#x socket", ui->ue->ino);
-		revert_unix_sk_cwd(peer, &cwd_fd, &root_fd, &ns_fd);
-		return -1;
+		goto err_revert_and_exit;
 	}
 	ui->is_connected = true;
 
+	close_safe(&procfs_self_dir);
+	close_safe(&fdstore_fd);
 	revert_unix_sk_cwd(peer, &cwd_fd, &root_fd, &ns_fd);
 
 restore_queue:
@@ -1296,51 +1326,37 @@  static int post_open_standalone(struct file_desc *d, int fd)
 	if (ui->queuer && !ui->queuer->peer_queue_restored)
 		return 1;
 	return restore_sk_common(fd, ui);
+
+err_revert_and_exit:
+	close_safe(&procfs_self_dir);
+	close_safe(&fdstore_fd);
+	revert_unix_sk_cwd(peer, &cwd_fd, &root_fd, &ns_fd);
+	return -1;
 }
 
-static int bind_deleted_unix_sk(int sk, struct unix_sk_info *ui,
-					struct sockaddr_un *addr)
+static int keep_deleted(struct unix_sk_info *ui, struct sockaddr_un *addr)
 {
-	char temp[PATH_MAX];
-	int ret;
-
-	pr_info("found duplicate unix socket bound at %s\n", addr->sun_path);
-
-	ret = snprintf(temp, sizeof(temp),
-			"%s-%s-%d", addr->sun_path, "criu-temp", getpid());
-	/* this shouldn't happen, since sun_addr is only 108 chars long */
-	if (ret < 0 || ret >= sizeof(temp)) {
-		pr_err("snprintf of %s failed?\n", addr->sun_path);
-		return -1;
-	}
-
-	ret = rename(addr->sun_path, temp);
-	if (ret < 0) {
-		pr_perror("couldn't move socket for binding");
-		return -1;
-	}
-
-	ret = bind(sk, (struct sockaddr *)addr,
-			sizeof(addr->sun_family) + ui->ue->name.len);
-	if (ret < 0) {
-		pr_perror("Can't bind socket after move");
-		return -1;
-	}
-
-	ret = rename(temp, addr->sun_path);
-	if (ret < 0) {
-		pr_perror("couldn't move socket back");
-		return -1;
+	if (ui->ue->deleted && (ui->flags & USK_GHOST_FDSTORE)) {
+		int fd = open(addr->sun_path, O_PATH);
+		if (fd < 0) {
+			pr_perror("ghost: Can't open %s", addr->sun_path);
+			return -1;
+		}
+		ui->fdstore_id = fdstore_add(fd);
+		pr_debug("ghost: %#x fdstore_id %d %s\n",
+			 ui->ue->ino, ui->fdstore_id, ui->ue->name.data);
+		close(fd);
+		return ui->fdstore_id;
 	}
-
-	/* we've handled the deleted-ness of this
-	 * socket and we don't want to delete it later
-	 * since it's not /this/ socket.
-	 */
-	ui->ue->deleted = false;
 	return 0;
 }
 
+static void drop_deleted(struct unix_sk_info *ui)
+{
+	if (ui->ue->deleted)
+		unlink_sk(ui);
+}
+
 static int bind_unix_sk(int sk, struct unix_sk_info *ui)
 {
 	struct sockaddr_un addr;
@@ -1368,19 +1384,14 @@  static int bind_unix_sk(int sk, struct unix_sk_info *ui)
 	if (ui->name[0] && prep_unix_sk_cwd(ui, &cwd_fd, NULL, &ns_fd))
 		return -1;
 
+	pr_debug("bind_unix_sk: id %#x ino %#x addr %s\n",
+		 ui->ue->id, ui->ue->ino, ui->name);
 	ret = bind(sk, (struct sockaddr *)&addr,
 			sizeof(addr.sun_family) + ui->ue->name.len);
-	if (ret < 0) {
-		if (ui->ue->has_deleted && ui->ue->deleted && errno == EADDRINUSE) {
-			if (bind_deleted_unix_sk(sk, ui, &addr))
-				goto done;
-		} else {
-			pr_perror("Can't bind socket");
-			goto done;
-		}
-	}
+	if (ret < 0)
+		goto done;
 
-	if (*ui->name && ui->ue->file_perms) {
+	if (ui->ue->file_perms) {
 		FilePermsEntry *perms = ui->ue->file_perms;
 		char fname[PATH_MAX];
 
@@ -1403,8 +1414,8 @@  static int bind_unix_sk(int sk, struct unix_sk_info *ui)
 		}
 	}
 
-	if (ui->ue->deleted && unlink((char *)ui->ue->name.data) < 0) {
-		pr_perror("failed to unlink %s", ui->ue->name.data);
+	if (keep_deleted(ui, &addr) < 0) {
+		pr_err("Can't save socket in fdstore\n");
 		goto done;
 	}
 
@@ -1416,6 +1427,7 @@  static int bind_unix_sk(int sk, struct unix_sk_info *ui)
 	exit_code = 0;
 done:
 	revert_unix_sk_cwd(ui, &cwd_fd, &root_fd, &ns_fd);
+	drop_deleted(ui);
 	return exit_code;
 }
 
@@ -1556,6 +1568,7 @@  static int open_unixsk_standalone(struct unix_sk_info *ui, int *new_fd)
 
 	fle = file_master(&ui->d);
 	pr_info_opening("standalone", ui, fle);
+
 	if (fle->stage == FLE_OPEN)
 		return post_open_standalone(&ui->d, fle->fe->fd);
 
@@ -1812,6 +1825,7 @@  static int init_unix_sk_info(struct unix_sk_info *ui, UnixSkEntry *ue)
 	ui->name_dir = (void *)ue->name_dir;
 
 	ui->flags		= 0;
+	ui->fdstore_id		= -1;
 	ui->peer		= NULL;
 	ui->queuer		= NULL;
 	ui->bound		= 0;
@@ -1826,6 +1840,109 @@  static int init_unix_sk_info(struct unix_sk_info *ui, UnixSkEntry *ue)
 	INIT_LIST_HEAD(&ui->connected);
 	INIT_LIST_HEAD(&ui->node);
 	INIT_LIST_HEAD(&ui->scm_fles);
+	INIT_LIST_HEAD(&ui->ghost_node);
+
+	return 0;
+}
+
+#define GHOST_NAME_FMT		"~criu-%u"
+#define GHOST_NAME_FMT_PREFIX	6 /* num of chars before counter */
+
+static int ghost_new_name(char *name, size_t namelen,
+			  char **name_new, size_t *namelen_new)
+{
+	char sname[64], *pos, *oldname = name;
+	static unsigned int unix_name_cnt = 0;
+	size_t k;
+
+	pr_debug("\tghost: handling name %s namelen %zu\n", name, namelen);
+
+	for (pos = &name[namelen - 1]; pos > name; pos--) {
+		if (*pos == GHOST_NAME_FMT[0])
+			break;
+	}
+
+	if (strncmp(pos, GHOST_NAME_FMT, GHOST_NAME_FMT_PREFIX) == 0) {
+		unsigned int __cnt;
+
+		if (sscanf(pos, GHOST_NAME_FMT, &__cnt) == 1) {
+			pr_debug("\tghost: unix_name_cnt %d detected\n", __cnt);
+			if (__cnt != unix_name_cnt)
+				unix_name_cnt += __cnt;
+		}
+	}
+
+	memzero(sname, sizeof(sname));
+	k = snprintf(sname, sizeof(sname), GHOST_NAME_FMT, unix_name_cnt++) + 1;
+	if (k > sizeof(sname) || k > UNIX_PATH_MAX) {
+		pr_err("\tghost: New name for socket is too long\n");
+		return -1;
+	}
+	*namelen_new = k;
+
+	*name_new = shmalloc(*namelen_new);
+	if (!*name_new) {
+		pr_err("\tghost: Can't allocate new name for socket\n");
+		return -ENOMEM;
+	}
+
+	memcpy(*name_new, sname, k);
+
+	pr_debug("\tghost: name transition %s -> %s\n", oldname, *name_new);
+	return 0;
+}
+
+int unix_resolve_ghost_addr(void)
+{
+	struct unix_sk_info *ui, *t;
+
+	pr_debug("ghost: Resolving addresses\n");
+
+	/*
+	 * Walk over ghost unix entries and find one
+	 * which gonna be a master and won't unlink
+	 * the name until all peers are connected to
+	 * this designation.
+	 */
+
+	list_for_each_entry(ui, &unix_ghost_addr, ghost_node) {
+		size_t newnamelen;
+		char *newname;
+
+		pr_debug("ghost: ino %#x peer %#x address %s\n",
+			 ui->ue->ino, ui->peer ? ui->peer->ue->ino : 0,
+			 ui->name);
+
+		unlink_sk(ui);
+
+		if (ghost_new_name(ui->name, ui->ue->name.len,
+				   &newname, &newnamelen))
+			return -1;
+
+		ui->name = newname;
+		ui->ue->name.len = newnamelen;
+		ui->ue->name.data = (void *)newname;
+
+		unlink_sk(ui);
+
+		/*
+		 * Figure out who is connected to this peer,
+		 * so the name will be removed from FS only
+		 * when last one is connected.
+		 */
+		list_for_each_entry(t, &unix_sockets, list) {
+			if (t->flags & USK_GHOST_FDSTORE)
+				continue;
+			if (ui == t || t->peer != ui)
+				continue;
+
+			pr_debug("\t\tghost: connected to us %#x -> %#x\n",
+				 t->ue->ino, ui->ue->ino);
+
+			if (!(ui->flags & USK_GHOST_FDSTORE))
+				ui->flags |= USK_GHOST_FDSTORE;
+		}
+	}
 
 	return 0;
 }
@@ -1873,6 +1990,15 @@  static int collect_one_unixsk(void *o, ProtobufCMessage *base, struct cr_img *i)
 		add_post_prepare_cb(&ui->peer_resolve);
 	}
 
+	if (ui->ue->deleted) {
+		if (!ui->name || !ui->ue->name.len || !ui->name[0]) {
+			pr_err("No name present, ino %#x\n", ui->ue->ino);
+			return -1;
+		}
+
+		list_add_tail(&ui->ghost_node, &unix_ghost_addr);
+	}
+
 	list_add_tail(&ui->list, &unix_sockets);
 	return file_desc_add(&ui->d, ui->ue->id, &unix_desc_ops);
 }

Comments

Andrei Vagin May 3, 2018, 6:14 a.m.
On Fri, Apr 27, 2018 at 02:35:04PM +0300, Cyrill Gorcunov wrote:
> Unix sockets may be connected via deleted socket name,
> moreover the name may be reused (ie same sun_addr but
> different inodes).
> 
> To be able to handle them we do a few tricks:
> 
>  - when collecting sockets we figure out if "deleted"
>    mark is present on the socket and if such we rename
>    it to a new unique name (~criu-%u format)

Can we cut it off on a second dump/restore iteration?

> 
>  - to not deal with deleted subdirectories and such we put
>    this socket inside a current working directory then
>    save the descriptor in fdstore engine
> 
>  - on restore we connect via procfs/fd/X as suggested by
>    Andrew Vagin
> 
> Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
> ---
>  criu/cr-restore.c      |   4 +
>  criu/include/sockets.h |   1 +
>  criu/sk-unix.c         | 238 +++++++++++++++++++++++++++++++++++++------------
>  3 files changed, 187 insertions(+), 56 deletions(-)
> 
> diff --git a/criu/cr-restore.c b/criu/cr-restore.c
> index db913b2dae2e..ff1e4dcc34df 100644
> --- a/criu/cr-restore.c
> +++ b/criu/cr-restore.c
> @@ -388,6 +388,10 @@ static int root_prepare_shared(void)
>  	if (ret)
>  		goto err;
>  
> +	ret = unix_resolve_ghost_addr();
> +	if (ret)
> +		goto err;
> +
>  	show_saved_files();
>  err:
>  	return ret;
> diff --git a/criu/include/sockets.h b/criu/include/sockets.h
> index db330428850c..23f5b11c1b58 100644
> --- a/criu/include/sockets.h
> +++ b/criu/include/sockets.h
> @@ -60,6 +60,7 @@ extern int netlink_receive_one(struct nlmsghdr *hdr, struct ns_id *ns, void *arg
>  
>  extern int unix_sk_id_add(unsigned int ino);
>  extern int unix_sk_ids_parse(char *optarg);
> +extern int unix_resolve_ghost_addr(void);
>  
>  extern int do_dump_opt(int sk, int level, int name, void *val, int len);
>  #define dump_opt(s, l, n, f)	do_dump_opt(s, l, n, f, sizeof(*f))
> diff --git a/criu/sk-unix.c b/criu/sk-unix.c
> index f6d3500df4a1..03aca89eee18 100644
> --- a/criu/sk-unix.c
> +++ b/criu/sk-unix.c
> @@ -31,6 +31,7 @@
>  #include "fdstore.h"
>  #include "fdinfo.h"
>  #include "kerndat.h"
> +#include "rst-malloc.h"
>  
>  #include "protobuf.h"
>  #include "images/sk-unix.pb-c.h"
> @@ -90,10 +91,14 @@ struct unix_sk_desc {
>  };
>  
>  static LIST_HEAD(unix_sockets);
> +static LIST_HEAD(unix_ghost_addr);
>  
>  static int unix_resolve_name(int lfd, uint32_t id, struct unix_sk_desc *d,
>  			     UnixSkEntry *ue, const struct fd_parms *p);
>  
> +struct unix_sk_info;
> +static void unlink_sk(struct unix_sk_info *ui);
> +
>  struct unix_sk_listen_icon {
>  	unsigned int			peer_ino;
>  	struct unix_sk_desc		*sk_desc;
> @@ -886,12 +891,14 @@ struct unix_sk_info {
>  	char			*name;
>  	char			*name_dir;
>  	unsigned		flags;
> +	int			fdstore_id;
>  	struct unix_sk_info	*peer;
>  	struct pprep_head	peer_resolve; /* XXX : union with the above? */
>  	struct file_desc	d;
>  	struct list_head	connected; /* List of sockets, connected to me */
>  	struct list_head	node; /* To link in peer's connected list  */
>  	struct list_head	scm_fles;
> +	struct list_head	ghost_node;
>  
>  	/*
>  	 * For DGRAM sockets with queues, we should only restore the queue
> @@ -916,6 +923,7 @@ struct scm_fle {
>  
>  #define USK_PAIR_MASTER		0x1
>  #define USK_PAIR_SLAVE		0x2
> +#define USK_GHOST_FDSTORE	0x4
>  
>  static struct unix_sk_info *find_unix_sk_by_ino(int ino)
>  {
> @@ -1241,6 +1249,7 @@ static int prep_unix_sk_cwd(struct unix_sk_info *ui, int *prev_cwd_fd,
>  
>  static int post_open_standalone(struct file_desc *d, int fd)
>  {
> +	int fdstore_fd = -1, procfs_self_dir = -1, len;
>  	struct unix_sk_info *ui;
>  	struct unix_sk_info *peer;
>  	struct sockaddr_un addr;
> @@ -1269,22 +1278,43 @@ static int post_open_standalone(struct file_desc *d, int fd)
>  
>  	memset(&addr, 0, sizeof(addr));
>  	addr.sun_family = AF_UNIX;
> -	memcpy(&addr.sun_path, peer->name, peer->ue->name.len);
>  
>  	pr_info("\tConnect %#x to %#x\n", ui->ue->ino, peer->ue->ino);
>  
>  	if (prep_unix_sk_cwd(peer, &cwd_fd, NULL, &ns_fd))
>  		return -1;
>  
> -	if (connect(fd, (struct sockaddr *)&addr,
> -				sizeof(addr.sun_family) +
> -				peer->ue->name.len) < 0) {
> +	if (peer->flags & USK_GHOST_FDSTORE) {
> +		procfs_self_dir = open_proc(PROC_SELF, "fd");
> +		fdstore_fd = fdstore_get(peer->fdstore_id);
> +
> +		if (fdstore_fd < 0 || procfs_self_dir < 0)
> +			goto err_revert_and_exit;
> +
> +
> +		/*
> +		 * WARNING: After this call we rely on revert_unix_sk_cwd
> +		 * to restore the former directories so that connect
> +		 * will operate inside proc/self/fd/X.
> +		 */
> +		if (fchdir(procfs_self_dir)) {
> +			pr_perror("Can't change to procfs/self");
> +			goto err_revert_and_exit;
> +		}
> +		len = snprintf(addr.sun_path, UNIX_PATH_MAX, "%d", fdstore_fd);
> +	} else {
> +		memcpy(&addr.sun_path, peer->name, peer->ue->name.len);
> +		len = peer->ue->name.len;
> +	}
> +
> +	if (connect(fd, (struct sockaddr *)&addr, sizeof(addr.sun_family) + len) < 0) {
>  		pr_perror("Can't connect %#x socket", ui->ue->ino);
> -		revert_unix_sk_cwd(peer, &cwd_fd, &root_fd, &ns_fd);
> -		return -1;
> +		goto err_revert_and_exit;
>  	}
>  	ui->is_connected = true;
>  
> +	close_safe(&procfs_self_dir);
> +	close_safe(&fdstore_fd);
>  	revert_unix_sk_cwd(peer, &cwd_fd, &root_fd, &ns_fd);
>  
>  restore_queue:
> @@ -1296,51 +1326,37 @@ static int post_open_standalone(struct file_desc *d, int fd)
>  	if (ui->queuer && !ui->queuer->peer_queue_restored)
>  		return 1;
>  	return restore_sk_common(fd, ui);
> +
> +err_revert_and_exit:
> +	close_safe(&procfs_self_dir);
> +	close_safe(&fdstore_fd);
> +	revert_unix_sk_cwd(peer, &cwd_fd, &root_fd, &ns_fd);
> +	return -1;
>  }
>  
> -static int bind_deleted_unix_sk(int sk, struct unix_sk_info *ui,
> -					struct sockaddr_un *addr)
> +static int keep_deleted(struct unix_sk_info *ui, struct sockaddr_un *addr)
>  {
> -	char temp[PATH_MAX];
> -	int ret;
> -
> -	pr_info("found duplicate unix socket bound at %s\n", addr->sun_path);
> -
> -	ret = snprintf(temp, sizeof(temp),
> -			"%s-%s-%d", addr->sun_path, "criu-temp", getpid());
> -	/* this shouldn't happen, since sun_addr is only 108 chars long */
> -	if (ret < 0 || ret >= sizeof(temp)) {
> -		pr_err("snprintf of %s failed?\n", addr->sun_path);
> -		return -1;
> -	}
> -
> -	ret = rename(addr->sun_path, temp);
> -	if (ret < 0) {
> -		pr_perror("couldn't move socket for binding");
> -		return -1;
> -	}
> -
> -	ret = bind(sk, (struct sockaddr *)addr,
> -			sizeof(addr->sun_family) + ui->ue->name.len);
> -	if (ret < 0) {
> -		pr_perror("Can't bind socket after move");
> -		return -1;
> -	}
> -
> -	ret = rename(temp, addr->sun_path);
> -	if (ret < 0) {
> -		pr_perror("couldn't move socket back");
> -		return -1;
> +	if (ui->ue->deleted && (ui->flags & USK_GHOST_FDSTORE)) {
> +		int fd = open(addr->sun_path, O_PATH);
> +		if (fd < 0) {
> +			pr_perror("ghost: Can't open %s", addr->sun_path);
> +			return -1;
> +		}
> +		ui->fdstore_id = fdstore_add(fd);
> +		pr_debug("ghost: %#x fdstore_id %d %s\n",
> +			 ui->ue->ino, ui->fdstore_id, ui->ue->name.data);
> +		close(fd);
> +		return ui->fdstore_id;
>  	}
> -
> -	/* we've handled the deleted-ness of this
> -	 * socket and we don't want to delete it later
> -	 * since it's not /this/ socket.
> -	 */
> -	ui->ue->deleted = false;
>  	return 0;
>  }
>  
> +static void drop_deleted(struct unix_sk_info *ui)
> +{
> +	if (ui->ue->deleted)
> +		unlink_sk(ui);
> +}
> +
>  static int bind_unix_sk(int sk, struct unix_sk_info *ui)
>  {
>  	struct sockaddr_un addr;
> @@ -1368,19 +1384,14 @@ static int bind_unix_sk(int sk, struct unix_sk_info *ui)
>  	if (ui->name[0] && prep_unix_sk_cwd(ui, &cwd_fd, NULL, &ns_fd))
>  		return -1;
>  
> +	pr_debug("bind_unix_sk: id %#x ino %#x addr %s\n",
> +		 ui->ue->id, ui->ue->ino, ui->name);
>  	ret = bind(sk, (struct sockaddr *)&addr,
>  			sizeof(addr.sun_family) + ui->ue->name.len);
> -	if (ret < 0) {
> -		if (ui->ue->has_deleted && ui->ue->deleted && errno == EADDRINUSE) {
> -			if (bind_deleted_unix_sk(sk, ui, &addr))
> -				goto done;
> -		} else {
> -			pr_perror("Can't bind socket");
> -			goto done;
> -		}
> -	}
> +	if (ret < 0)
> +		goto done;
>  
> -	if (*ui->name && ui->ue->file_perms) {
> +	if (ui->ue->file_perms) {
>  		FilePermsEntry *perms = ui->ue->file_perms;
>  		char fname[PATH_MAX];
>  
> @@ -1403,8 +1414,8 @@ static int bind_unix_sk(int sk, struct unix_sk_info *ui)
>  		}
>  	}
>  
> -	if (ui->ue->deleted && unlink((char *)ui->ue->name.data) < 0) {
> -		pr_perror("failed to unlink %s", ui->ue->name.data);
> +	if (keep_deleted(ui, &addr) < 0) {
> +		pr_err("Can't save socket in fdstore\n");
>  		goto done;
>  	}
>  
> @@ -1416,6 +1427,7 @@ static int bind_unix_sk(int sk, struct unix_sk_info *ui)
>  	exit_code = 0;
>  done:
>  	revert_unix_sk_cwd(ui, &cwd_fd, &root_fd, &ns_fd);
> +	drop_deleted(ui);
>  	return exit_code;
>  }
>  
> @@ -1556,6 +1568,7 @@ static int open_unixsk_standalone(struct unix_sk_info *ui, int *new_fd)
>  
>  	fle = file_master(&ui->d);
>  	pr_info_opening("standalone", ui, fle);
> +
>  	if (fle->stage == FLE_OPEN)
>  		return post_open_standalone(&ui->d, fle->fe->fd);
>  
> @@ -1812,6 +1825,7 @@ static int init_unix_sk_info(struct unix_sk_info *ui, UnixSkEntry *ue)
>  	ui->name_dir = (void *)ue->name_dir;
>  
>  	ui->flags		= 0;
> +	ui->fdstore_id		= -1;
>  	ui->peer		= NULL;
>  	ui->queuer		= NULL;
>  	ui->bound		= 0;
> @@ -1826,6 +1840,109 @@ static int init_unix_sk_info(struct unix_sk_info *ui, UnixSkEntry *ue)
>  	INIT_LIST_HEAD(&ui->connected);
>  	INIT_LIST_HEAD(&ui->node);
>  	INIT_LIST_HEAD(&ui->scm_fles);
> +	INIT_LIST_HEAD(&ui->ghost_node);
> +
> +	return 0;
> +}
> +
> +#define GHOST_NAME_FMT		"~criu-%u"
> +#define GHOST_NAME_FMT_PREFIX	6 /* num of chars before counter */
> +
> +static int ghost_new_name(char *name, size_t namelen,
> +			  char **name_new, size_t *namelen_new)

Pls write the comment why we need to generate a new name for ghost
sockets

> +{
> +	char sname[64], *pos, *oldname = name;
> +	static unsigned int unix_name_cnt = 0;
> +	size_t k;
> +
> +	pr_debug("\tghost: handling name %s namelen %zu\n", name, namelen);
> +
> +	for (pos = &name[namelen - 1]; pos > name; pos--) {
> +		if (*pos == GHOST_NAME_FMT[0])
> +			break;
> +	}
> +
> +	if (strncmp(pos, GHOST_NAME_FMT, GHOST_NAME_FMT_PREFIX) == 0) {
> +		unsigned int __cnt;
> +
> +		if (sscanf(pos, GHOST_NAME_FMT, &__cnt) == 1) {
> +			pr_debug("\tghost: unix_name_cnt %d detected\n", __cnt);
> +			if (__cnt != unix_name_cnt)
> +				unix_name_cnt += __cnt;

What is going on here?
> +		}
> +	}
> +
> +	memzero(sname, sizeof(sname));
> +	k = snprintf(sname, sizeof(sname), GHOST_NAME_FMT, unix_name_cnt++) + 1;

Where is a ghost sockekt will be created?

Do we really want to kill an origin name without any footprint?

> +	if (k > sizeof(sname) || k > UNIX_PATH_MAX) {
> +		pr_err("\tghost: New name for socket is too long\n");
> +		return -1;
> +	}
> +	*namelen_new = k;
> +
> +	*name_new = shmalloc(*namelen_new);
> +	if (!*name_new) {
> +		pr_err("\tghost: Can't allocate new name for socket\n");
> +		return -ENOMEM;
> +	}
> +
> +	memcpy(*name_new, sname, k);
> +
> +	pr_debug("\tghost: name transition %s -> %s\n", oldname, *name_new);
> +	return 0;
> +}
> +
> +int unix_resolve_ghost_addr(void)
> +{
> +	struct unix_sk_info *ui, *t;
> +
> +	pr_debug("ghost: Resolving addresses\n");
> +
> +	/*
> +	 * Walk over ghost unix entries and find one
> +	 * which gonna be a master and won't unlink
> +	 * the name until all peers are connected to
> +	 * this designation.

Are you sure that this comment is still valid?

> +	 */
> +
> +	list_for_each_entry(ui, &unix_ghost_addr, ghost_node) {
> +		size_t newnamelen;
> +		char *newname;
> +
> +		pr_debug("ghost: ino %#x peer %#x address %s\n",
> +			 ui->ue->ino, ui->peer ? ui->peer->ue->ino : 0,
> +			 ui->name);
> +
> +		unlink_sk(ui);
> +
> +		if (ghost_new_name(ui->name, ui->ue->name.len,
> +				   &newname, &newnamelen))
> +			return -1;
> +
> +		ui->name = newname;
> +		ui->ue->name.len = newnamelen;
> +		ui->ue->name.data = (void *)newname;
> +
> +		unlink_sk(ui);
> +
> +		/*
> +		 * Figure out who is connected to this peer,
> +		 * so the name will be removed from FS only
> +		 * when last one is connected.
> +		 */
> +		list_for_each_entry(t, &unix_sockets, list) {
> +			if (t->flags & USK_GHOST_FDSTORE)
> +				continue;
> +			if (ui == t || t->peer != ui)
> +				continue;
> +
> +			pr_debug("\t\tghost: connected to us %#x -> %#x\n",
> +				 t->ue->ino, ui->ue->ino);
> +
> +			if (!(ui->flags & USK_GHOST_FDSTORE))
> +				ui->flags |= USK_GHOST_FDSTORE;
> +		}
> +	}
>  
>  	return 0;
>  }
> @@ -1873,6 +1990,15 @@ static int collect_one_unixsk(void *o, ProtobufCMessage *base, struct cr_img *i)
>  		add_post_prepare_cb(&ui->peer_resolve);
>  	}
>  
> +	if (ui->ue->deleted) {
> +		if (!ui->name || !ui->ue->name.len || !ui->name[0]) {
> +			pr_err("No name present, ino %#x\n", ui->ue->ino);
> +			return -1;
> +		}
> +
> +		list_add_tail(&ui->ghost_node, &unix_ghost_addr);
> +	}
> +
>  	list_add_tail(&ui->list, &unix_sockets);
>  	return file_desc_add(&ui->d, ui->ue->id, &unix_desc_ops);
>  }
> -- 
> 2.14.3
>
Cyrill Gorcunov May 3, 2018, 9:57 a.m.
On Wed, May 02, 2018 at 11:14:01PM -0700, Andrey Vagin wrote:
> On Fri, Apr 27, 2018 at 02:35:04PM +0300, Cyrill Gorcunov wrote:
> > Unix sockets may be connected via deleted socket name,
> > moreover the name may be reused (ie same sun_addr but
> > different inodes).
> > 
> > To be able to handle them we do a few tricks:
> > 
> >  - when collecting sockets we figure out if "deleted"
> >    mark is present on the socket and if such we rename
> >    it to a new unique name (~criu-%u format)
> 
> Can we cut it off on a second dump/restore iteration?

We analyze indices on next iterations. Or you meant something different?