[v7,7/9] unix: Add support of ghost sockets

Submitted by Cyrill Gorcunov on May 23, 2018, 4:06 p.m.

Details

Message ID 20180523160615.31428-8-gorcunov@gmail.com
State New
Series "Add support of deleted unix sockets"
Headers show

Commit Message

Cyrill Gorcunov May 23, 2018, 4:06 p.m.
Unix sockets may be connected via deleted socket name,
moreover the name may be reused (ie same sun_addr but
different inodes).

To be able to handle them we do a few tricks:

 - when collecting sockets we figure out if "deleted"
   mark is present on the socket and if such we order
   this sockets creation and deletion with mutex, together
   with adding missing directories, and save this descriptors
   in fdstore if there are peers connected to

 - on restore we connect via procfs/fd/X as suggested by
   Andrew Vagin

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
---
 criu/cr-restore.c      |   4 +
 criu/include/sockets.h |   1 +
 criu/sk-unix.c         | 353 +++++++++++++++++++++++++++++++++++++++++--------
 3 files changed, 305 insertions(+), 53 deletions(-)

Patch hide | download patch | download mbox

diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index e969c24cd1d8..645a0e724970 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -384,6 +384,10 @@  static int root_prepare_shared(void)
 	if (ret)
 		goto err;
 
+	ret = unix_prepare_root_shared();
+	if (ret)
+		goto err;
+
 	ret = add_fake_unix_queuers();
 	if (ret)
 		goto err;
diff --git a/criu/include/sockets.h b/criu/include/sockets.h
index 1d0e1f29304c..f2085ace70b2 100644
--- a/criu/include/sockets.h
+++ b/criu/include/sockets.h
@@ -60,6 +60,7 @@  extern int netlink_receive_one(struct nlmsghdr *hdr, struct ns_id *ns, void *arg
 
 extern int unix_sk_id_add(unsigned int ino);
 extern int unix_sk_ids_parse(char *optarg);
+extern int unix_prepare_root_shared(void);
 
 extern int do_dump_opt(int sk, int level, int name, void *val, int len);
 #define dump_opt(s, l, n, f)	do_dump_opt(s, l, n, f, sizeof(*f))
diff --git a/criu/sk-unix.c b/criu/sk-unix.c
index 88859da02f35..e39f313aaa0b 100644
--- a/criu/sk-unix.c
+++ b/criu/sk-unix.c
@@ -9,6 +9,7 @@ 
 #include <sys/un.h>
 #include <stdlib.h>
 #include <dlfcn.h>
+#include <libgen.h>
 
 #include "libnetlink.h"
 #include "cr_options.h"
@@ -31,6 +32,7 @@ 
 #include "fdstore.h"
 #include "fdinfo.h"
 #include "kerndat.h"
+#include "rst-malloc.h"
 
 #include "protobuf.h"
 #include "images/sk-unix.pb-c.h"
@@ -89,11 +91,21 @@  struct unix_sk_desc {
 	UnixSkEntry		*ue;
 };
 
+/*
+ * The mutex_ghost is accessed from different tasks,
+ * so make sure it is in shared memory.
+ */
+static mutex_t *mutex_ghost;
+
 static LIST_HEAD(unix_sockets);
+static LIST_HEAD(unix_ghost_addr);
 
 static int unix_resolve_name(int lfd, uint32_t id, struct unix_sk_desc *d,
 			     UnixSkEntry *ue, const struct fd_parms *p);
 
+struct unix_sk_info;
+static int unlink_sk(struct unix_sk_info *ui);
+
 struct unix_sk_listen_icon {
 	unsigned int			peer_ino;
 	struct unix_sk_desc		*sk_desc;
@@ -886,12 +898,15 @@  struct unix_sk_info {
 	char			*name;
 	char			*name_dir;
 	unsigned		flags;
+	int			fdstore_id;
 	struct unix_sk_info	*peer;
 	struct pprep_head	peer_resolve; /* XXX : union with the above? */
 	struct file_desc	d;
 	struct list_head	connected; /* List of sockets, connected to me */
 	struct list_head	node; /* To link in peer's connected list  */
 	struct list_head	scm_fles;
+	struct list_head	ghost_node;
+	size_t			ghost_dir_pos;
 
 	/*
 	 * For DGRAM sockets with queues, we should only restore the queue
@@ -916,6 +931,8 @@  struct scm_fle {
 
 #define USK_PAIR_MASTER		0x1
 #define USK_PAIR_SLAVE		0x2
+#define USK_GHOST_FDSTORE	0x4	/* bound but removed address */
+#define USK_GHOST_RENAMED	0x8	/* temporary renamed address */
 
 static struct unix_sk_info *find_unix_sk_by_ino(int ino)
 {
@@ -1241,6 +1258,7 @@  static int prep_unix_sk_cwd(struct unix_sk_info *ui, int *prev_cwd_fd,
 
 static int post_open_standalone(struct file_desc *d, int fd)
 {
+	int fdstore_fd = -1, procfs_self_dir = -1, len;
 	struct unix_sk_info *ui;
 	struct unix_sk_info *peer;
 	struct sockaddr_un addr;
@@ -1269,22 +1287,49 @@  static int post_open_standalone(struct file_desc *d, int fd)
 
 	memset(&addr, 0, sizeof(addr));
 	addr.sun_family = AF_UNIX;
-	memcpy(&addr.sun_path, peer->name, peer->ue->name.len);
 
 	pr_info("\tConnect %d to %d\n", ui->ue->ino, peer->ue->ino);
 
-	if (prep_unix_sk_cwd(peer, &cwd_fd, NULL, &ns_fd))
+	if (prep_unix_sk_cwd(peer, &cwd_fd, &root_fd, &ns_fd))
 		return -1;
 
-	if (connect(fd, (struct sockaddr *)&addr,
-				sizeof(addr.sun_family) +
-				peer->ue->name.len) < 0) {
+	if (peer->flags & USK_GHOST_FDSTORE) {
+		procfs_self_dir = open_proc(getpid(), "fd");
+		fdstore_fd = fdstore_get(peer->fdstore_id);
+
+		if (fdstore_fd < 0 || procfs_self_dir < 0)
+			goto err_revert_and_exit;
+
+		/*
+		 * WARNING: After this call we rely on revert_unix_sk_cwd
+		 * to restore the former directories so that connect
+		 * will operate inside proc/$pid/fd/X.
+		 */
+		if (fchdir(procfs_self_dir)) {
+			pr_perror("Can't change to procfs");
+			goto err_revert_and_exit;
+		}
+		len = snprintf(addr.sun_path, UNIX_PATH_MAX, "%d", fdstore_fd);
+	} else {
+		memcpy(&addr.sun_path, peer->name, peer->ue->name.len);
+		len = peer->ue->name.len;
+	}
+
+	/*
+	 * Make sure the target is not being renamed at the moment
+	 * while we're connecting in sake of ghost sockets.
+	 */
+	mutex_lock(mutex_ghost);
+	if (connect(fd, (struct sockaddr *)&addr, sizeof(addr.sun_family) + len) < 0) {
 		pr_perror("Can't connect %d socket", ui->ue->ino);
-		revert_unix_sk_cwd(peer, &cwd_fd, &root_fd, &ns_fd);
-		return -1;
+		goto err_revert_and_exit;
 	}
+	mutex_unlock(mutex_ghost);
+
 	ui->is_connected = true;
 
+	close_safe(&procfs_self_dir);
+	close_safe(&fdstore_fd);
 	revert_unix_sk_cwd(peer, &cwd_fd, &root_fd, &ns_fd);
 
 restore_queue:
@@ -1296,48 +1341,126 @@  static int post_open_standalone(struct file_desc *d, int fd)
 	if (ui->queuer && !ui->queuer->peer_queue_restored)
 		return 1;
 	return restore_sk_common(fd, ui);
+
+err_revert_and_exit:
+	close_safe(&procfs_self_dir);
+	close_safe(&fdstore_fd);
+	revert_unix_sk_cwd(peer, &cwd_fd, &root_fd, &ns_fd);
+	return -1;
 }
 
-static int bind_deleted_unix_sk(int sk, struct unix_sk_info *ui,
-					struct sockaddr_un *addr)
+static int keep_deleted(struct unix_sk_info *ui)
 {
-	char temp[PATH_MAX];
-	int ret;
+	if (ui->flags & USK_GHOST_FDSTORE) {
+		int fd = open(ui->name, O_PATH);
+		if (fd < 0) {
+			pr_perror("ghost: Can't open id %#x ino %d addr %s",
+				  ui->ue->id, ui->ue->ino, ui->name);
+			return -1;
+		}
+		ui->fdstore_id = fdstore_add(fd);
+		pr_debug("ghost: id %#x %d fdstore_id %d %s\n",
+			 ui->ue->id, ui->ue->ino, ui->fdstore_id, ui->name);
+		close(fd);
+		return ui->fdstore_id;
+	}
+	return 0;
+}
 
-	pr_info("found duplicate unix socket bound at %s\n", addr->sun_path);
+static int drop_deleted(struct unix_sk_info *ui)
+{
+	if (ui->ue->deleted)
+		return unlink_sk(ui);
+	return 0;
+}
 
-	ret = snprintf(temp, sizeof(temp),
-			"%s-%s-%d", addr->sun_path, "criu-temp", getpid());
-	/* this shouldn't happen, since sun_addr is only 108 chars long */
-	if (ret < 0 || ret >= sizeof(temp)) {
-		pr_err("snprintf of %s failed?\n", addr->sun_path);
-		return -1;
+#define UNIX_GHOST_FMT "%s.criu-sk-ghost"
+
+/*
+ * When path where socket lives is deleted, we need to reconstruct
+ * it back up but allow caller to remove it after.
+ */
+static int bind_on_deleted(int sk, struct unix_sk_info *ui)
+{
+	char path[PATH_MAX], *pos;
+	struct sockaddr_un addr;
+	int ret;
+
+	if (ui->ue->name.len >= sizeof(path)) {
+		pr_err("ghost: Too long name for socket\n");
+		return -ENOSPC;
 	}
 
-	ret = rename(addr->sun_path, temp);
-	if (ret < 0) {
-		pr_perror("couldn't move socket for binding");
-		return -1;
+	memcpy(path, ui->name, ui->ue->name.len);
+	path[ui->ue->name.len] = '\0';
+
+	for (pos = strrchr(path, '/'); pos;
+	     pos = strrchr(path, '/')) {
+		*pos = '\0';
+
+		ret = access(path, R_OK | W_OK | X_OK);
+		if (ret == 0) {
+			ui->ghost_dir_pos = pos - path;
+			pr_debug("ghost: detected F_OK %s\n", path);
+			break;
+		}
+
+		if (errno != ENOENT) {
+			ret = -errno;
+			pr_perror("ghost: Can't access %s\n", path);
+			return ret;
+		}
 	}
 
-	ret = bind(sk, (struct sockaddr *)addr,
-			sizeof(addr->sun_family) + ui->ue->name.len);
-	if (ret < 0) {
-		pr_perror("Can't bind socket after move");
-		return -1;
+	memcpy(path, ui->name, ui->ue->name.len);
+	path[ui->ue->name.len] = '\0';
+
+	pos = dirname(path);
+	pr_debug("ghost: creating %s\n", pos);
+	ret = mkdirpat(AT_FDCWD, pos, 0755);
+	if (ret) {
+		errno = -ret;
+		pr_perror("ghost: Can't create %s\n", pos);
+		return ret;
 	}
 
-	ret = rename(temp, addr->sun_path);
+	memset(&addr, 0, sizeof(addr));
+	addr.sun_family = AF_UNIX;
+	memcpy(&addr.sun_path, ui->name, ui->ue->name.len);
+
+	ret = bind(sk, (struct sockaddr *)&addr,
+		   sizeof(addr.sun_family) + ui->ue->name.len);
 	if (ret < 0) {
-		pr_perror("couldn't move socket back");
-		return -1;
+		/*
+		 * In case if there some real living socket
+		 * with same name just move it aside for a
+		 * while, we will move it back once ghost
+		 * socket is processed.
+		 */
+		if (errno == EADDRINUSE) {
+			char path[PATH_MAX];
+
+			snprintf(path, sizeof(path), UNIX_GHOST_FMT, ui->name);
+			if (rename(ui->name, path)) {
+				ret = -errno;
+				pr_perror("ghost: Can't rename id %#x ino %d addr %s -> %s\n",
+					  ui->ue->id, ui->ue->ino, ui->name, path);
+				return ret;
+			}
+			ui->flags |= USK_GHOST_RENAMED;
+			pr_debug("ghost: id %#x ino %d renamed %s -> %s\n",
+				 ui->ue->id, ui->ue->ino, ui->name, path);
+			ret = bind(sk, (struct sockaddr *)&addr,
+				   sizeof(addr.sun_family) + ui->ue->name.len);
+		}
+		if (ret < 0) {
+			ret = -errno;
+			pr_perror("ghost: Can't bind on socket id %#x ino %d addr %s",
+				  ui->ue->id, ui->ue->ino, ui->name);
+			return ret;
+		}
 	}
 
-	/* we've handled the deleted-ness of this
-	 * socket and we don't want to delete it later
-	 * since it's not /this/ socket.
-	 */
-	ui->ue->deleted = false;
 	return 0;
 }
 
@@ -1365,22 +1488,40 @@  static int bind_unix_sk(int sk, struct unix_sk_info *ui)
 	addr.sun_family = AF_UNIX;
 	memcpy(&addr.sun_path, ui->name, ui->ue->name.len);
 
-	if (ui->name[0] && prep_unix_sk_cwd(ui, &cwd_fd, NULL, &ns_fd))
+	if (ui->name[0] && prep_unix_sk_cwd(ui, &cwd_fd, &root_fd, &ns_fd))
 		return -1;
 
-	ret = bind(sk, (struct sockaddr *)&addr,
-			sizeof(addr.sun_family) + ui->ue->name.len);
+	/*
+	 * Order binding for sake of ghost sockets. We might rename
+	 * existing socket to some temp name, bind ghost, delete it,
+	 * and finally move the former back, thus while we're doing
+	 * this stuff we should not be interruped by connection
+	 * from another sockets.
+	 *
+	 * FIXME: Probably wort make it per address rather for
+	 * optimization sake.
+	 */
+	mutex_lock(mutex_ghost);
+
+	if (ui->flags & USK_GHOST_FDSTORE) {
+		pr_debug("ghost: bind id %#x ino %d addr %s\n",
+			 ui->ue->id, ui->ue->ino, ui->name);
+		ret = bind_on_deleted(sk, ui);
+		if (ret)
+			errno = -ret;
+	} else {
+		pr_debug("bind id %#x ino %d addr %s\n",
+			 ui->ue->id, ui->ue->ino, ui->name);
+		ret = bind(sk, (struct sockaddr *)&addr,
+			   sizeof(addr.sun_family) + ui->ue->name.len);
+	}
 	if (ret < 0) {
-		if (ui->ue->has_deleted && ui->ue->deleted && errno == EADDRINUSE) {
-			if (bind_deleted_unix_sk(sk, ui, &addr))
-				goto done;
-		} else {
-			pr_perror("Can't bind socket");
-			goto done;
-		}
+		pr_perror("Can't bind id %#x ino %d addr %s",
+			  ui->ue->id, ui->ue->ino, ui->name);
+		goto done;
 	}
 
-	if (*ui->name && ui->ue->file_perms) {
+	if (ui->ue->file_perms) {
 		FilePermsEntry *perms = ui->ue->file_perms;
 		char fname[PATH_MAX];
 
@@ -1403,8 +1544,8 @@  static int bind_unix_sk(int sk, struct unix_sk_info *ui)
 		}
 	}
 
-	if (ui->ue->deleted && unlink((char *)ui->ue->name.data) < 0) {
-		pr_perror("failed to unlink %s", ui->ue->name.data);
+	if (keep_deleted(ui) < 0) {
+		pr_err("Can't save socket in fdstore\n");
 		goto done;
 	}
 
@@ -1416,6 +1557,9 @@  static int bind_unix_sk(int sk, struct unix_sk_info *ui)
 	exit_code = 0;
 done:
 	revert_unix_sk_cwd(ui, &cwd_fd, &root_fd, &ns_fd);
+	if (drop_deleted(ui))
+		exit_code = -1;
+	mutex_unlock(mutex_ghost);
 	return exit_code;
 }
 
@@ -1551,11 +1695,27 @@  static int setup_second_end(int *sks, struct fdinfo_list_entry *second_end)
 static int open_unixsk_standalone(struct unix_sk_info *ui, int *new_fd)
 {
 	struct unix_sk_info *queuer = ui->queuer;
-	struct fdinfo_list_entry *fle;
+	struct unix_sk_info *peer = ui->peer;
+	struct fdinfo_list_entry *fle, *fle_peer;
 	int sk;
 
 	fle = file_master(&ui->d);
 	pr_info_opening("standalone", ui, fle);
+
+	/*
+	 * If we're about to connect to the peer which
+	 * has been bound to removed address we should
+	 * wait until it is processed and put into fdstore
+	 * engine, later we will use the engine to connect
+	 * into it in a special way.
+	 */
+	if (peer && (peer->flags & USK_GHOST_FDSTORE)) {
+		fle_peer = file_master(&peer->d);
+		if (fle_peer->stage < FLE_OPEN) {
+			return 1;
+		}
+	}
+
 	if (fle->stage == FLE_OPEN)
 		return post_open_standalone(&ui->d, fle->fe->fd);
 
@@ -1758,15 +1918,15 @@  static struct file_desc_ops unix_desc_ops = {
  * Make FS clean from sockets we're about to
  * restore. See for how we bind them for details
  */
-static void unlink_sk(struct unix_sk_info *ui)
+static int unlink_sk(struct unix_sk_info *ui)
 {
-	int ret, cwd_fd = -1, root_fd = -1, ns_fd = -1;
+	int ret = 0, cwd_fd = -1, root_fd = -1, ns_fd = -1;
 
 	if (!ui->name || ui->name[0] == '\0' || (ui->ue->uflags & USK_EXTERN))
-		return;
+		return 0;
 
 	if (prep_unix_sk_cwd(ui, &cwd_fd, &root_fd, NULL))
-		return;
+		return -1;
 
 	ret = unlinkat(AT_FDCWD, ui->name, 0) ? -1 : 0;
 	if (ret < 0 && errno != ENOENT) {
@@ -1774,13 +1934,55 @@  static void unlink_sk(struct unix_sk_info *ui)
 			ui->ue->ino, ui->ue->peer,
 			ui->name ? (ui->name[0] ? ui->name : &ui->name[1]) : "-",
 			ui->name_dir ? ui->name_dir : "-");
+		ret = -errno;
+		goto out;
 	} else if (ret == 0) {
 		pr_debug("Unlinked socket %d peer %d (name %s dir %s)\n",
 			 ui->ue->ino, ui->ue->peer,
 			 ui->name ? (ui->name[0] ? ui->name : &ui->name[1]) : "-",
 			 ui->name_dir ? ui->name_dir : "-");
 	}
+
+	if (ui->ghost_dir_pos) {
+		char path[PATH_MAX], *pos;
+
+		memcpy(path, ui->name, ui->ue->name.len);
+		path[ui->ue->name.len] = '\0';
+
+		for (pos = strrchr(path, '/');
+		     pos && (pos - path) > ui->ghost_dir_pos;
+		     pos = strrchr(path, '/')) {
+			*pos = '\0';
+			if (rmdir(path)) {
+				ret = - errno;
+				pr_perror("ghost: Can't remove %s\n", path);
+				goto out;
+			}
+			pr_debug("ghost: Removed %s\n", path);
+		}
+	}
+
+	/*
+	 * If it was a ghost socket we should move original
+	 * socket back into place.
+	 */
+	if (ui->flags & USK_GHOST_RENAMED) {
+		char path[PATH_MAX];
+
+		snprintf(path, sizeof(path), UNIX_GHOST_FMT, ui->name);
+		if (rename(path, ui->name)) {
+			pr_perror("ghost: Can't rename id %#x ino %d addr %s -> %s\n",
+				  ui->ue->id, ui->ue->ino, path, ui->name);
+			ret = -errno;
+		} else {
+			pr_debug("ghost: id %#x ino %d addr %s -> %s\n",
+				 ui->ue->id, ui->ue->ino, path, ui->name);
+		}
+	}
+
+out:
 	revert_unix_sk_cwd(ui, &cwd_fd, &root_fd, &ns_fd);
+	return ret;
 }
 
 static void try_resolve_unix_peer(struct unix_sk_info *ui);
@@ -1812,6 +2014,8 @@  static int init_unix_sk_info(struct unix_sk_info *ui, UnixSkEntry *ue)
 	ui->name_dir = (void *)ue->name_dir;
 
 	ui->flags		= 0;
+	ui->fdstore_id		= -1;
+	ui->ghost_dir_pos	= 0;
 	ui->peer		= NULL;
 	ui->queuer		= NULL;
 	ui->bound		= 0;
@@ -1826,6 +2030,40 @@  static int init_unix_sk_info(struct unix_sk_info *ui, UnixSkEntry *ue)
 	INIT_LIST_HEAD(&ui->connected);
 	INIT_LIST_HEAD(&ui->node);
 	INIT_LIST_HEAD(&ui->scm_fles);
+	INIT_LIST_HEAD(&ui->ghost_node);
+
+	return 0;
+}
+
+int unix_prepare_root_shared(void)
+{
+	struct unix_sk_info *ui;
+
+	mutex_ghost = shmalloc(sizeof(*mutex_ghost));
+	if (!mutex_ghost) {
+		pr_err("ghost: Can't allocate mutex\n");
+		return -ENOMEM;
+	}
+	mutex_init(mutex_ghost);
+
+	pr_debug("ghost: Resolving addresses\n");
+
+	list_for_each_entry(ui, &unix_ghost_addr, ghost_node) {
+		pr_debug("ghost: id %#x type %s state %s ino %d peer %d address %s\n",
+			 ui->ue->id, socket_type_name(ui->ue->type),
+			 tcp_state_name(ui->ue->state),
+			 ui->ue->ino, ui->peer ? ui->peer->ue->ino : 0,
+			 ui->name);
+
+		/*
+		 * Drop any existing trash on the FS and mark the
+		 * peer as a ghost one, so we will put it into
+		 * fdstore to be able to connect into it even
+		 * when the address is removed from the FS.
+		 */
+		unlink_sk(ui);
+		ui->flags |= USK_GHOST_FDSTORE;
+	}
 
 	return 0;
 }
@@ -1873,6 +2111,15 @@  static int collect_one_unixsk(void *o, ProtobufCMessage *base, struct cr_img *i)
 		add_post_prepare_cb(&ui->peer_resolve);
 	}
 
+	if (ui->ue->deleted) {
+		if (!ui->name || !ui->ue->name.len || !ui->name[0]) {
+			pr_err("No name present, ino %d\n", ui->ue->ino);
+			return -1;
+		}
+
+		list_add_tail(&ui->ghost_node, &unix_ghost_addr);
+	}
+
 	list_add_tail(&ui->list, &unix_sockets);
 	return file_desc_add(&ui->d, ui->ue->id, &unix_desc_ops);
 }

Comments

Andrei Vagin June 4, 2018, 7:52 p.m.
On Wed, May 23, 2018 at 07:06:13PM +0300, Cyrill Gorcunov wrote:
> Unix sockets may be connected via deleted socket name,
> moreover the name may be reused (ie same sun_addr but
> different inodes).
> 
> To be able to handle them we do a few tricks:
> 
>  - when collecting sockets we figure out if "deleted"
>    mark is present on the socket and if such we order
>    this sockets creation and deletion with mutex, together
>    with adding missing directories, and save this descriptors
>    in fdstore if there are peers connected to
> 
>  - on restore we connect via procfs/fd/X as suggested by
>    Andrew Vagin
> 
> Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
> ---
>  criu/cr-restore.c      |   4 +
>  criu/include/sockets.h |   1 +
>  criu/sk-unix.c         | 353 +++++++++++++++++++++++++++++++++++++++++--------
>  3 files changed, 305 insertions(+), 53 deletions(-)
> 
> diff --git a/criu/cr-restore.c b/criu/cr-restore.c
> index e969c24cd1d8..645a0e724970 100644
> --- a/criu/cr-restore.c
> +++ b/criu/cr-restore.c
> @@ -384,6 +384,10 @@ static int root_prepare_shared(void)
>  	if (ret)
>  		goto err;
>  
> +	ret = unix_prepare_root_shared();
> +	if (ret)
> +		goto err;
> +
>  	ret = add_fake_unix_queuers();
>  	if (ret)
>  		goto err;
> diff --git a/criu/include/sockets.h b/criu/include/sockets.h
> index 1d0e1f29304c..f2085ace70b2 100644
> --- a/criu/include/sockets.h
> +++ b/criu/include/sockets.h
> @@ -60,6 +60,7 @@ extern int netlink_receive_one(struct nlmsghdr *hdr, struct ns_id *ns, void *arg
>  
>  extern int unix_sk_id_add(unsigned int ino);
>  extern int unix_sk_ids_parse(char *optarg);
> +extern int unix_prepare_root_shared(void);
>  
>  extern int do_dump_opt(int sk, int level, int name, void *val, int len);
>  #define dump_opt(s, l, n, f)	do_dump_opt(s, l, n, f, sizeof(*f))
> diff --git a/criu/sk-unix.c b/criu/sk-unix.c
> index 88859da02f35..e39f313aaa0b 100644
> --- a/criu/sk-unix.c
> +++ b/criu/sk-unix.c
> @@ -9,6 +9,7 @@
>  #include <sys/un.h>
>  #include <stdlib.h>
>  #include <dlfcn.h>
> +#include <libgen.h>
>  
>  #include "libnetlink.h"
>  #include "cr_options.h"
> @@ -31,6 +32,7 @@
>  #include "fdstore.h"
>  #include "fdinfo.h"
>  #include "kerndat.h"
> +#include "rst-malloc.h"
>  
>  #include "protobuf.h"
>  #include "images/sk-unix.pb-c.h"
> @@ -89,11 +91,21 @@ struct unix_sk_desc {
>  	UnixSkEntry		*ue;
>  };
>  
> +/*
> + * The mutex_ghost is accessed from different tasks,
> + * so make sure it is in shared memory.
> + */
> +static mutex_t *mutex_ghost;
> +
>  static LIST_HEAD(unix_sockets);
> +static LIST_HEAD(unix_ghost_addr);
>  
>  static int unix_resolve_name(int lfd, uint32_t id, struct unix_sk_desc *d,
>  			     UnixSkEntry *ue, const struct fd_parms *p);
>  
> +struct unix_sk_info;
> +static int unlink_sk(struct unix_sk_info *ui);
> +
>  struct unix_sk_listen_icon {
>  	unsigned int			peer_ino;
>  	struct unix_sk_desc		*sk_desc;
> @@ -886,12 +898,15 @@ struct unix_sk_info {
>  	char			*name;
>  	char			*name_dir;
>  	unsigned		flags;
> +	int			fdstore_id;
>  	struct unix_sk_info	*peer;
>  	struct pprep_head	peer_resolve; /* XXX : union with the above? */
>  	struct file_desc	d;
>  	struct list_head	connected; /* List of sockets, connected to me */
>  	struct list_head	node; /* To link in peer's connected list  */
>  	struct list_head	scm_fles;
> +	struct list_head	ghost_node;
> +	size_t			ghost_dir_pos;
>  
>  	/*
>  	 * For DGRAM sockets with queues, we should only restore the queue
> @@ -916,6 +931,8 @@ struct scm_fle {
>  
>  #define USK_PAIR_MASTER		0x1
>  #define USK_PAIR_SLAVE		0x2
> +#define USK_GHOST_FDSTORE	0x4	/* bound but removed address */
> +#define USK_GHOST_RENAMED	0x8	/* temporary renamed address */
>  
>  static struct unix_sk_info *find_unix_sk_by_ino(int ino)
>  {
> @@ -1241,6 +1258,7 @@ static int prep_unix_sk_cwd(struct unix_sk_info *ui, int *prev_cwd_fd,
>  
>  static int post_open_standalone(struct file_desc *d, int fd)
>  {
> +	int fdstore_fd = -1, procfs_self_dir = -1, len;
>  	struct unix_sk_info *ui;
>  	struct unix_sk_info *peer;
>  	struct sockaddr_un addr;
> @@ -1269,22 +1287,49 @@ static int post_open_standalone(struct file_desc *d, int fd)
>  
>  	memset(&addr, 0, sizeof(addr));
>  	addr.sun_family = AF_UNIX;
> -	memcpy(&addr.sun_path, peer->name, peer->ue->name.len);
>  
>  	pr_info("\tConnect %d to %d\n", ui->ue->ino, peer->ue->ino);
>  
> -	if (prep_unix_sk_cwd(peer, &cwd_fd, NULL, &ns_fd))
> +	if (prep_unix_sk_cwd(peer, &cwd_fd, &root_fd, &ns_fd))
>  		return -1;
>  
> -	if (connect(fd, (struct sockaddr *)&addr,
> -				sizeof(addr.sun_family) +
> -				peer->ue->name.len) < 0) {
> +	if (peer->flags & USK_GHOST_FDSTORE) {
> +		procfs_self_dir = open_proc(getpid(), "fd");
> +		fdstore_fd = fdstore_get(peer->fdstore_id);
> +
> +		if (fdstore_fd < 0 || procfs_self_dir < 0)
> +			goto err_revert_and_exit;
> +
> +		/*
> +		 * WARNING: After this call we rely on revert_unix_sk_cwd
> +		 * to restore the former directories so that connect
> +		 * will operate inside proc/$pid/fd/X.
> +		 */
> +		if (fchdir(procfs_self_dir)) {
> +			pr_perror("Can't change to procfs");
> +			goto err_revert_and_exit;
> +		}
> +		len = snprintf(addr.sun_path, UNIX_PATH_MAX, "%d", fdstore_fd);
> +	} else {
> +		memcpy(&addr.sun_path, peer->name, peer->ue->name.len);
> +		len = peer->ue->name.len;
> +	}
> +
> +	/*
> +	 * Make sure the target is not being renamed at the moment
> +	 * while we're connecting in sake of ghost sockets.
> +	 */
> +	mutex_lock(mutex_ghost);
> +	if (connect(fd, (struct sockaddr *)&addr, sizeof(addr.sun_family) + len) < 0) {
>  		pr_perror("Can't connect %d socket", ui->ue->ino);
> -		revert_unix_sk_cwd(peer, &cwd_fd, &root_fd, &ns_fd);
> -		return -1;
> +		goto err_revert_and_exit;
>  	}
> +	mutex_unlock(mutex_ghost);
> +
>  	ui->is_connected = true;
>  
> +	close_safe(&procfs_self_dir);
> +	close_safe(&fdstore_fd);
>  	revert_unix_sk_cwd(peer, &cwd_fd, &root_fd, &ns_fd);
>  
>  restore_queue:
> @@ -1296,48 +1341,126 @@ static int post_open_standalone(struct file_desc *d, int fd)
>  	if (ui->queuer && !ui->queuer->peer_queue_restored)
>  		return 1;
>  	return restore_sk_common(fd, ui);
> +
> +err_revert_and_exit:
> +	close_safe(&procfs_self_dir);
> +	close_safe(&fdstore_fd);
> +	revert_unix_sk_cwd(peer, &cwd_fd, &root_fd, &ns_fd);
> +	return -1;
>  }
>  
> -static int bind_deleted_unix_sk(int sk, struct unix_sk_info *ui,
> -					struct sockaddr_un *addr)
> +static int keep_deleted(struct unix_sk_info *ui)
>  {
> -	char temp[PATH_MAX];
> -	int ret;
> +	if (ui->flags & USK_GHOST_FDSTORE) {
> +		int fd = open(ui->name, O_PATH);
> +		if (fd < 0) {
> +			pr_perror("ghost: Can't open id %#x ino %d addr %s",
> +				  ui->ue->id, ui->ue->ino, ui->name);
> +			return -1;
> +		}
> +		ui->fdstore_id = fdstore_add(fd);
> +		pr_debug("ghost: id %#x %d fdstore_id %d %s\n",
> +			 ui->ue->id, ui->ue->ino, ui->fdstore_id, ui->name);
> +		close(fd);
> +		return ui->fdstore_id;
> +	}
> +	return 0;
> +}
>  
> -	pr_info("found duplicate unix socket bound at %s\n", addr->sun_path);
> +static int drop_deleted(struct unix_sk_info *ui)
> +{
> +	if (ui->ue->deleted)
> +		return unlink_sk(ui);
> +	return 0;
> +}
>  
> -	ret = snprintf(temp, sizeof(temp),
> -			"%s-%s-%d", addr->sun_path, "criu-temp", getpid());
> -	/* this shouldn't happen, since sun_addr is only 108 chars long */
> -	if (ret < 0 || ret >= sizeof(temp)) {
> -		pr_err("snprintf of %s failed?\n", addr->sun_path);
> -		return -1;
> +#define UNIX_GHOST_FMT "%s.criu-sk-ghost"
> +
> +/*
> + * When path where socket lives is deleted, we need to reconstruct
> + * it back up but allow caller to remove it after.
> + */
> +static int bind_on_deleted(int sk, struct unix_sk_info *ui)
> +{
> +	char path[PATH_MAX], *pos;
> +	struct sockaddr_un addr;
> +	int ret;
> +
> +	if (ui->ue->name.len >= sizeof(path)) {
> +		pr_err("ghost: Too long name for socket\n");
> +		return -ENOSPC;
>  	}
>  
> -	ret = rename(addr->sun_path, temp);
> -	if (ret < 0) {
> -		pr_perror("couldn't move socket for binding");
> -		return -1;
> +	memcpy(path, ui->name, ui->ue->name.len);
> +	path[ui->ue->name.len] = '\0';
> +
> +	for (pos = strrchr(path, '/'); pos;
> +	     pos = strrchr(path, '/')) {
> +		*pos = '\0';
> +
> +		ret = access(path, R_OK | W_OK | X_OK);
> +		if (ret == 0) {
> +			ui->ghost_dir_pos = pos - path;
> +			pr_debug("ghost: detected F_OK %s\n", path);
> +			break;
> +		}
> +
> +		if (errno != ENOENT) {
> +			ret = -errno;
> +			pr_perror("ghost: Can't access %s\n", path);
> +			return ret;
> +		}
>  	}
>  
> -	ret = bind(sk, (struct sockaddr *)addr,
> -			sizeof(addr->sun_family) + ui->ue->name.len);
> -	if (ret < 0) {
> -		pr_perror("Can't bind socket after move");
> -		return -1;
> +	memcpy(path, ui->name, ui->ue->name.len);
> +	path[ui->ue->name.len] = '\0';
> +
> +	pos = dirname(path);
> +	pr_debug("ghost: creating %s\n", pos);
> +	ret = mkdirpat(AT_FDCWD, pos, 0755);
> +	if (ret) {
> +		errno = -ret;
> +		pr_perror("ghost: Can't create %s\n", pos);
> +		return ret;
>  	}
>  
> -	ret = rename(temp, addr->sun_path);
> +	memset(&addr, 0, sizeof(addr));
> +	addr.sun_family = AF_UNIX;
> +	memcpy(&addr.sun_path, ui->name, ui->ue->name.len);
> +
> +	ret = bind(sk, (struct sockaddr *)&addr,
> +		   sizeof(addr.sun_family) + ui->ue->name.len);
>  	if (ret < 0) {
> -		pr_perror("couldn't move socket back");
> -		return -1;
> +		/*
> +		 * In case if there some real living socket
> +		 * with same name just move it aside for a
> +		 * while, we will move it back once ghost
> +		 * socket is processed.
> +		 */
> +		if (errno == EADDRINUSE) {
> +			char path[PATH_MAX];
> +
> +			snprintf(path, sizeof(path), UNIX_GHOST_FMT, ui->name);
> +			if (rename(ui->name, path)) {
> +				ret = -errno;
> +				pr_perror("ghost: Can't rename id %#x ino %d addr %s -> %s\n",
> +					  ui->ue->id, ui->ue->ino, ui->name, path);
> +				return ret;
> +			}
> +			ui->flags |= USK_GHOST_RENAMED;

Why do we use a global flag for this? Why we can't rename this file
back after bind()?

> +			pr_debug("ghost: id %#x ino %d renamed %s -> %s\n",
> +				 ui->ue->id, ui->ue->ino, ui->name, path);
> +			ret = bind(sk, (struct sockaddr *)&addr,
> +				   sizeof(addr.sun_family) + ui->ue->name.len);
> +		}
> +		if (ret < 0) {
> +			ret = -errno;
> +			pr_perror("ghost: Can't bind on socket id %#x ino %d addr %s",
> +				  ui->ue->id, ui->ue->ino, ui->name);
> +			return ret;
> +		}
>  	}
>  
> -	/* we've handled the deleted-ness of this
> -	 * socket and we don't want to delete it later
> -	 * since it's not /this/ socket.
> -	 */
> -	ui->ue->deleted = false;
>  	return 0;
>  }
>  
> @@ -1365,22 +1488,40 @@ static int bind_unix_sk(int sk, struct unix_sk_info *ui)
>  	addr.sun_family = AF_UNIX;
>  	memcpy(&addr.sun_path, ui->name, ui->ue->name.len);
>  
> -	if (ui->name[0] && prep_unix_sk_cwd(ui, &cwd_fd, NULL, &ns_fd))
> +	if (ui->name[0] && prep_unix_sk_cwd(ui, &cwd_fd, &root_fd, &ns_fd))
>  		return -1;
>  
> -	ret = bind(sk, (struct sockaddr *)&addr,
> -			sizeof(addr.sun_family) + ui->ue->name.len);
> +	/*
> +	 * Order binding for sake of ghost sockets. We might rename
> +	 * existing socket to some temp name, bind ghost, delete it,
> +	 * and finally move the former back, thus while we're doing
> +	 * this stuff we should not be interruped by connection
> +	 * from another sockets.
> +	 *
> +	 * FIXME: Probably wort make it per address rather for
> +	 * optimization sake.
> +	 */
> +	mutex_lock(mutex_ghost);
> +
> +	if (ui->flags & USK_GHOST_FDSTORE) {
> +		pr_debug("ghost: bind id %#x ino %d addr %s\n",
> +			 ui->ue->id, ui->ue->ino, ui->name);
> +		ret = bind_on_deleted(sk, ui);
> +		if (ret)
> +			errno = -ret;
> +	} else {
> +		pr_debug("bind id %#x ino %d addr %s\n",
> +			 ui->ue->id, ui->ue->ino, ui->name);
> +		ret = bind(sk, (struct sockaddr *)&addr,
> +			   sizeof(addr.sun_family) + ui->ue->name.len);
> +	}
>  	if (ret < 0) {
> -		if (ui->ue->has_deleted && ui->ue->deleted && errno == EADDRINUSE) {
> -			if (bind_deleted_unix_sk(sk, ui, &addr))
> -				goto done;
> -		} else {
> -			pr_perror("Can't bind socket");
> -			goto done;
> -		}
> +		pr_perror("Can't bind id %#x ino %d addr %s",
> +			  ui->ue->id, ui->ue->ino, ui->name);
> +		goto done;
>  	}
>  
> -	if (*ui->name && ui->ue->file_perms) {
> +	if (ui->ue->file_perms) {
>  		FilePermsEntry *perms = ui->ue->file_perms;
>  		char fname[PATH_MAX];
>  
> @@ -1403,8 +1544,8 @@ static int bind_unix_sk(int sk, struct unix_sk_info *ui)
>  		}
>  	}
>  
> -	if (ui->ue->deleted && unlink((char *)ui->ue->name.data) < 0) {
> -		pr_perror("failed to unlink %s", ui->ue->name.data);
> +	if (keep_deleted(ui) < 0) {
> +		pr_err("Can't save socket in fdstore\n");
>  		goto done;
>  	}
>  
> @@ -1416,6 +1557,9 @@ static int bind_unix_sk(int sk, struct unix_sk_info *ui)
>  	exit_code = 0;
>  done:
>  	revert_unix_sk_cwd(ui, &cwd_fd, &root_fd, &ns_fd);
> +	if (drop_deleted(ui))
> +		exit_code = -1;
> +	mutex_unlock(mutex_ghost);
>  	return exit_code;
>  }
>  
> @@ -1551,11 +1695,27 @@ static int setup_second_end(int *sks, struct fdinfo_list_entry *second_end)
>  static int open_unixsk_standalone(struct unix_sk_info *ui, int *new_fd)
>  {
>  	struct unix_sk_info *queuer = ui->queuer;
> -	struct fdinfo_list_entry *fle;
> +	struct unix_sk_info *peer = ui->peer;
> +	struct fdinfo_list_entry *fle, *fle_peer;
>  	int sk;
>  
>  	fle = file_master(&ui->d);
>  	pr_info_opening("standalone", ui, fle);
> +
> +	/*
> +	 * If we're about to connect to the peer which
> +	 * has been bound to removed address we should
> +	 * wait until it is processed and put into fdstore
> +	 * engine, later we will use the engine to connect
> +	 * into it in a special way.
> +	 */
> +	if (peer && (peer->flags & USK_GHOST_FDSTORE)) {
> +		fle_peer = file_master(&peer->d);
> +		if (fle_peer->stage < FLE_OPEN) {
> +			return 1;
> +		}
> +	}
> +
>  	if (fle->stage == FLE_OPEN)
>  		return post_open_standalone(&ui->d, fle->fe->fd);
>  
> @@ -1758,15 +1918,15 @@ static struct file_desc_ops unix_desc_ops = {
>   * Make FS clean from sockets we're about to
>   * restore. See for how we bind them for details
>   */
> -static void unlink_sk(struct unix_sk_info *ui)
> +static int unlink_sk(struct unix_sk_info *ui)
>  {
> -	int ret, cwd_fd = -1, root_fd = -1, ns_fd = -1;
> +	int ret = 0, cwd_fd = -1, root_fd = -1, ns_fd = -1;
>  
>  	if (!ui->name || ui->name[0] == '\0' || (ui->ue->uflags & USK_EXTERN))
> -		return;
> +		return 0;
>  
>  	if (prep_unix_sk_cwd(ui, &cwd_fd, &root_fd, NULL))
> -		return;
> +		return -1;
>  
>  	ret = unlinkat(AT_FDCWD, ui->name, 0) ? -1 : 0;
>  	if (ret < 0 && errno != ENOENT) {
> @@ -1774,13 +1934,55 @@ static void unlink_sk(struct unix_sk_info *ui)
>  			ui->ue->ino, ui->ue->peer,
>  			ui->name ? (ui->name[0] ? ui->name : &ui->name[1]) : "-",
>  			ui->name_dir ? ui->name_dir : "-");
> +		ret = -errno;
> +		goto out;
>  	} else if (ret == 0) {
>  		pr_debug("Unlinked socket %d peer %d (name %s dir %s)\n",
>  			 ui->ue->ino, ui->ue->peer,
>  			 ui->name ? (ui->name[0] ? ui->name : &ui->name[1]) : "-",
>  			 ui->name_dir ? ui->name_dir : "-");
>  	}
> +
> +	if (ui->ghost_dir_pos) {
> +		char path[PATH_MAX], *pos;
> +
> +		memcpy(path, ui->name, ui->ue->name.len);
> +		path[ui->ue->name.len] = '\0';
> +
> +		for (pos = strrchr(path, '/');
> +		     pos && (pos - path) > ui->ghost_dir_pos;
> +		     pos = strrchr(path, '/')) {
> +			*pos = '\0';
> +			if (rmdir(path)) {
> +				ret = - errno;
> +				pr_perror("ghost: Can't remove %s\n", path);
> +				goto out;
> +			}
> +			pr_debug("ghost: Removed %s\n", path);
> +		}
> +	}
> +
> +	/*
> +	 * If it was a ghost socket we should move original
> +	 * socket back into place.
> +	 */
> +	if (ui->flags & USK_GHOST_RENAMED) {
> +		char path[PATH_MAX];
> +
> +		snprintf(path, sizeof(path), UNIX_GHOST_FMT, ui->name);
> +		if (rename(path, ui->name)) {
> +			pr_perror("ghost: Can't rename id %#x ino %d addr %s -> %s\n",
> +				  ui->ue->id, ui->ue->ino, path, ui->name);
> +			ret = -errno;
> +		} else {
> +			pr_debug("ghost: id %#x ino %d addr %s -> %s\n",
> +				 ui->ue->id, ui->ue->ino, path, ui->name);
> +		}
> +	}
> +
> +out:
>  	revert_unix_sk_cwd(ui, &cwd_fd, &root_fd, &ns_fd);
> +	return ret;
>  }
>  
>  static void try_resolve_unix_peer(struct unix_sk_info *ui);
> @@ -1812,6 +2014,8 @@ static int init_unix_sk_info(struct unix_sk_info *ui, UnixSkEntry *ue)
>  	ui->name_dir = (void *)ue->name_dir;
>  
>  	ui->flags		= 0;
> +	ui->fdstore_id		= -1;
> +	ui->ghost_dir_pos	= 0;
>  	ui->peer		= NULL;
>  	ui->queuer		= NULL;
>  	ui->bound		= 0;
> @@ -1826,6 +2030,40 @@ static int init_unix_sk_info(struct unix_sk_info *ui, UnixSkEntry *ue)
>  	INIT_LIST_HEAD(&ui->connected);
>  	INIT_LIST_HEAD(&ui->node);
>  	INIT_LIST_HEAD(&ui->scm_fles);
> +	INIT_LIST_HEAD(&ui->ghost_node);
> +
> +	return 0;
> +}
> +
> +int unix_prepare_root_shared(void)
> +{
> +	struct unix_sk_info *ui;
> +
> +	mutex_ghost = shmalloc(sizeof(*mutex_ghost));
> +	if (!mutex_ghost) {
> +		pr_err("ghost: Can't allocate mutex\n");
> +		return -ENOMEM;
> +	}
> +	mutex_init(mutex_ghost);
> +
> +	pr_debug("ghost: Resolving addresses\n");
> +
> +	list_for_each_entry(ui, &unix_ghost_addr, ghost_node) {
> +		pr_debug("ghost: id %#x type %s state %s ino %d peer %d address %s\n",
> +			 ui->ue->id, socket_type_name(ui->ue->type),
> +			 tcp_state_name(ui->ue->state),
> +			 ui->ue->ino, ui->peer ? ui->peer->ue->ino : 0,
> +			 ui->name);
> +
> +		/*
> +		 * Drop any existing trash on the FS and mark the
> +		 * peer as a ghost one, so we will put it into
> +		 * fdstore to be able to connect into it even
> +		 * when the address is removed from the FS.
> +		 */
> +		unlink_sk(ui);

Hm. If a socket is a ghost one, we don't know an owner of a socket file
on a file system, so we don't have rights to delete it, do we?

> +		ui->flags |= USK_GHOST_FDSTORE;
> +	}
>  
>  	return 0;
>  }
> @@ -1873,6 +2111,15 @@ static int collect_one_unixsk(void *o, ProtobufCMessage *base, struct cr_img *i)
>  		add_post_prepare_cb(&ui->peer_resolve);
>  	}
>  
> +	if (ui->ue->deleted) {
> +		if (!ui->name || !ui->ue->name.len || !ui->name[0]) {
> +			pr_err("No name present, ino %d\n", ui->ue->ino);
> +			return -1;
> +		}
> +
> +		list_add_tail(&ui->ghost_node, &unix_ghost_addr);
> +	}
> +
>  	list_add_tail(&ui->list, &unix_sockets);
>  	return file_desc_add(&ui->d, ui->ue->id, &unix_desc_ops);
>  }
> -- 
> 2.14.3
>
Cyrill Gorcunov June 4, 2018, 8:18 p.m.
On Mon, Jun 04, 2018 at 12:52:20PM -0700, Andrey Vagin wrote:
> > +
> > +			snprintf(path, sizeof(path), UNIX_GHOST_FMT, ui->name);
> > +			if (rename(ui->name, path)) {
> > +				ret = -errno;
> > +				pr_perror("ghost: Can't rename id %#x ino %d addr %s -> %s\n",
> > +					  ui->ue->id, ui->ue->ino, ui->name, path);
> > +				return ret;
> > +			}
> > +			ui->flags |= USK_GHOST_RENAMED;
> 
> Why do we use a global flag for this? Why we can't rename this file
> back after bind()?

Because once we have it renamed we exit out of this routine and
start restoring file permissions on this socket, iow, there gonna
be more manipulations on this address later.

> > +	pr_debug("ghost: Resolving addresses\n");
> > +
> > +	list_for_each_entry(ui, &unix_ghost_addr, ghost_node) {
> > +		pr_debug("ghost: id %#x type %s state %s ino %d peer %d address %s\n",
> > +			 ui->ue->id, socket_type_name(ui->ue->type),
> > +			 tcp_state_name(ui->ue->state),
> > +			 ui->ue->ino, ui->peer ? ui->peer->ue->ino : 0,
> > +			 ui->name);
> > +
> > +		/*
> > +		 * Drop any existing trash on the FS and mark the
> > +		 * peer as a ghost one, so we will put it into
> > +		 * fdstore to be able to connect into it even
> > +		 * when the address is removed from the FS.
> > +		 */
> > +		unlink_sk(ui);
> 
> Hm. If a socket is a ghost one, we don't know an owner of a socket file
> on a file system, so we don't have rights to delete it, do we?

We always do that, been cleaning any existing socket appeared on
the file system after the checkpoint. Actually there should not
be any file. But to be fair I don't remember the details why
we start removing any existing trash in firs place, been too
long ago.
Andrei Vagin June 4, 2018, 10:31 p.m.
On Mon, Jun 04, 2018 at 11:18:56PM +0300, Cyrill Gorcunov wrote:
> On Mon, Jun 04, 2018 at 12:52:20PM -0700, Andrey Vagin wrote:
> > > +
> > > +			snprintf(path, sizeof(path), UNIX_GHOST_FMT, ui->name);
> > > +			if (rename(ui->name, path)) {
> > > +				ret = -errno;
> > > +				pr_perror("ghost: Can't rename id %#x ino %d addr %s -> %s\n",
> > > +					  ui->ue->id, ui->ue->ino, ui->name, path);
> > > +				return ret;
> > > +			}
> > > +			ui->flags |= USK_GHOST_RENAMED;
> > 
> > Why do we use a global flag for this? Why we can't rename this file
> > back after bind()?
> 
> Because once we have it renamed we exit out of this routine and
> start restoring file permissions on this socket, iow, there gonna
> be more manipulations on this address later.
> 
> > > +	pr_debug("ghost: Resolving addresses\n");
> > > +
> > > +	list_for_each_entry(ui, &unix_ghost_addr, ghost_node) {
> > > +		pr_debug("ghost: id %#x type %s state %s ino %d peer %d address %s\n",
> > > +			 ui->ue->id, socket_type_name(ui->ue->type),
> > > +			 tcp_state_name(ui->ue->state),
> > > +			 ui->ue->ino, ui->peer ? ui->peer->ue->ino : 0,
> > > +			 ui->name);
> > > +
> > > +		/*
> > > +		 * Drop any existing trash on the FS and mark the
> > > +		 * peer as a ghost one, so we will put it into
> > > +		 * fdstore to be able to connect into it even
> > > +		 * when the address is removed from the FS.
> > > +		 */
> > > +		unlink_sk(ui);
> > 
> > Hm. If a socket is a ghost one, we don't know an owner of a socket file
> > on a file system, so we don't have rights to delete it, do we?
> 
> We always do that, been cleaning any existing socket appeared on
> the file system after the checkpoint. Actually there should not
> be any file. But to be fair I don't remember the details why
> we start removing any existing trash in firs place, been too
> long ago.

I don't understand why there is any special case for ghost sockets.