[06/19] unix: Collect bindmounted unix sockets

Submitted by Cyrill Gorcunov on Sept. 14, 2018, 2:08 p.m.

Details

Message ID 20180914140842.6462-7-gorcunov@gmail.com
State New
Series "unix: Add support for bindmounted dgram sockets"
Headers show

Commit Message

Cyrill Gorcunov Sept. 14, 2018, 2:08 p.m.
Mount points might be beindmount to some resources (say unix binded
sockets) thus when times come to do real bind mount call we need
to prepare appropriate resource first.

On dump procedure we walk over all bind-mounts and check if
the mountpoint is a unix socket saving the mnt_id into
the image then. To distinguish such sockets from others
we use UNIX_UFLAGS__BINDMOUNT flag.

Note at moment we support only DGRAM closed sockets.

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
---
 criu/cr-dump.c         |  3 ++
 criu/include/sockets.h |  1 +
 criu/sk-unix.c         | 91 +++++++++++++++++++++++++++++++++++++++---
 images/sk-unix.proto   |  1 +
 4 files changed, 91 insertions(+), 5 deletions(-)

Patch hide | download patch | download mbox

diff --git a/criu/cr-dump.c b/criu/cr-dump.c
index cbc72f01614d..79922e377279 100644
--- a/criu/cr-dump.c
+++ b/criu/cr-dump.c
@@ -1839,6 +1839,9 @@  int cr_dump_tasks(pid_t pid)
 	if (collect_namespaces(true) < 0)
 		goto err;
 
+	if (collect_unix_bindmounts() < 0)
+		goto err;
+
 	glob_imgset = cr_glob_imgset_open(O_DUMP);
 	if (!glob_imgset)
 		goto err;
diff --git a/criu/include/sockets.h b/criu/include/sockets.h
index 371d1a5095b7..d56e25d07c12 100644
--- a/criu/include/sockets.h
+++ b/criu/include/sockets.h
@@ -41,6 +41,7 @@  extern int add_fake_unix_queuers(void);
 extern int fix_external_unix_sockets(void);
 extern int prepare_scms(void);
 extern int unix_note_scm_rights(int id_for, uint32_t *file_ids, int *fds, int n_ids);
+extern int collect_unix_bindmounts(void);
 
 extern struct collect_image_info netlink_sk_cinfo;
 
diff --git a/criu/sk-unix.c b/criu/sk-unix.c
index 78040a5c10fe..87f652689331 100644
--- a/criu/sk-unix.c
+++ b/criu/sk-unix.c
@@ -74,6 +74,9 @@  struct unix_sk_desc {
 	unsigned char		shutdown;
 	bool			deleted;
 
+	bool			bindmount;
+	unsigned int		mnt_id;
+
 	mode_t			mode;
 	uid_t			uid;
 	gid_t			gid;
@@ -391,6 +394,9 @@  static int dump_one_unix_fd(int lfd, uint32_t id, const struct fd_parms *p)
 	if (unix_resolve_name(lfd, id, sk, ue, p))
 		goto err;
 
+	if (sk->bindmount)
+		ue->uflags |= UNIX_UFLAGS__BINDMOUNT;
+
 	/*
 	 * Check if this socket is connected to criu service.
 	 * Dump it like closed one and mark it for restore.
@@ -579,11 +585,16 @@  static int unix_resolve_name(int lfd, uint32_t id, struct unix_sk_desc *d,
 	if (d->namelen == 0 || name[0] == '\0')
 		return 0;
 
-	if (kdat.sk_unix_file && (root_ns_mask & CLONE_NEWNS)) {
-		if (get_mnt_id(lfd, &mnt_id))
-			return -1;
-		ue->mnt_id = mnt_id;
-		ue->has_mnt_id = mnt_id;
+	if (!d->bindmount) {
+		if (kdat.sk_unix_file && (root_ns_mask & CLONE_NEWNS)) {
+			if (get_mnt_id(lfd, &mnt_id))
+				return -1;
+			ue->mnt_id = mnt_id;
+			ue->has_mnt_id = mnt_id;
+		}
+	} else {
+		ue->mnt_id = d->mnt_id;
+		ue->has_mnt_id = true;
 	}
 
 	if (ue->mnt_id >= 0)
@@ -715,6 +726,7 @@  static int unix_collect_one(const struct unix_diag_msg *m,
 	INIT_LIST_HEAD(&d->peer_list);
 	INIT_LIST_HEAD(&d->peer_node);
 	d->fd = -1;
+	d->mnt_id = -1;
 
 	if (tb[UNIX_DIAG_SHUTDOWN])
 		d->shutdown = nla_get_u8(tb[UNIX_DIAG_SHUTDOWN]);
@@ -910,6 +922,75 @@  int fix_external_unix_sockets(void)
 	return -1;
 }
 
+int collect_unix_bindmounts(void)
+{
+	struct mount_info *mi;
+	struct stat st = {};
+	int ns_old = -1;
+	int ret = 0;
+
+	pr_debug("Collecting unix bindmounts\n");
+
+	for (mi = mntinfo; mi; mi = mi->next) {
+		if (list_empty(&mi->mnt_bind))
+			continue;
+
+		if (switch_ns(mi->nsid->ns_pid, &mnt_ns_desc, &ns_old) < 0) {
+			pr_err("Can't switch ns to mnt_id %d", mi->mnt_id);
+			if (restore_ns(ns_old, &mnt_ns_desc)) {
+				pr_err("Can't switch mount ns back from mnt_id %d\n", mi->mnt_id);
+				return -1;
+			}
+			return -1;
+		}
+
+		if (stat(mi->mountpoint, &st)) {
+			pr_warn("Can't stat on %s: %m\n", mi->mountpoint);
+			if (restore_ns(ns_old, &mnt_ns_desc)) {
+				pr_err("Can't switch mount ns back from mnt_id %d\n", mi->mnt_id);
+				return -1;
+			}
+			continue;
+		}
+
+		if (S_ISSOCK(st.st_mode)) {
+			struct unix_sk_desc *sk;
+
+			list_for_each_entry(sk, &unix_sockets, list) {
+				if (sk->vfs_ino == (int)st.st_ino &&
+				    sk->vfs_dev == (int)st.st_dev) {
+					pr_debug("Found sock s_dev %#x ino %d bindmounted mnt_id %d %s\n",
+						 (int)st.st_dev, (int)st.st_ino, mi->mnt_id, mi->mountpoint);
+					if (sk->bindmount) {
+						pr_err("Many bindings for sockets are not yet supported %d at %s\n",
+						       (int)st.st_ino, mi->mountpoint);
+						ret = -1;
+					} else {
+						sk->mnt_id = mi->mnt_id;
+						sk->bindmount = true;
+					}
+					if (sk->type != SOCK_DGRAM && sk->state != TCP_CLOSE) {
+						pr_err("Unsupported bindmounted socket ino %d at %s\n",
+						       (int)st.st_ino, mi->mountpoint);
+						ret = -1;
+					}
+					break;
+				}
+			}
+		}
+
+		if (restore_ns(ns_old, &mnt_ns_desc)) {
+			pr_err("Can't switch mount ns back from %d\n", mi->nsid->ns_pid);
+			return -1;
+		}
+
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
 struct unix_sk_info {
 	UnixSkEntry		*ue;
 	struct list_head	list;
diff --git a/images/sk-unix.proto b/images/sk-unix.proto
index b7173eac9119..feb51328390f 100644
--- a/images/sk-unix.proto
+++ b/images/sk-unix.proto
@@ -18,6 +18,7 @@  enum unix_uflags {
 	SERVICE		= 2;
 	CALLBACK	= 4;
 	INHERIT		= 8;
+	BINDMOUNT	= 16;
 }
 
 message unix_sk_entry {

Comments

Andrey Vagin Sept. 18, 2018, 11:59 p.m.
On Fri, Sep 14, 2018 at 05:08:29PM +0300, Cyrill Gorcunov wrote:
> Mount points might be beindmount to some resources (say unix binded
> sockets) thus when times come to do real bind mount call we need
> to prepare appropriate resource first.
> 
> On dump procedure we walk over all bind-mounts and check if
> the mountpoint is a unix socket saving the mnt_id into
> the image then. To distinguish such sockets from others
> we use UNIX_UFLAGS__BINDMOUNT flag.
> 
> Note at moment we support only DGRAM closed sockets.
> 
> Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
> ---
>  criu/cr-dump.c         |  3 ++
>  criu/include/sockets.h |  1 +
>  criu/sk-unix.c         | 91 +++++++++++++++++++++++++++++++++++++++---
>  images/sk-unix.proto   |  1 +
>  4 files changed, 91 insertions(+), 5 deletions(-)
> 
> diff --git a/criu/cr-dump.c b/criu/cr-dump.c
> index cbc72f01614d..79922e377279 100644
> --- a/criu/cr-dump.c
> +++ b/criu/cr-dump.c
> @@ -1839,6 +1839,9 @@ int cr_dump_tasks(pid_t pid)
>  	if (collect_namespaces(true) < 0)
>  		goto err;
>  
> +	if (collect_unix_bindmounts() < 0)
> +		goto err;
> +
>  	glob_imgset = cr_glob_imgset_open(O_DUMP);
>  	if (!glob_imgset)
>  		goto err;
> diff --git a/criu/include/sockets.h b/criu/include/sockets.h
> index 371d1a5095b7..d56e25d07c12 100644
> --- a/criu/include/sockets.h
> +++ b/criu/include/sockets.h
> @@ -41,6 +41,7 @@ extern int add_fake_unix_queuers(void);
>  extern int fix_external_unix_sockets(void);
>  extern int prepare_scms(void);
>  extern int unix_note_scm_rights(int id_for, uint32_t *file_ids, int *fds, int n_ids);
> +extern int collect_unix_bindmounts(void);
>  
>  extern struct collect_image_info netlink_sk_cinfo;
>  
> diff --git a/criu/sk-unix.c b/criu/sk-unix.c
> index 78040a5c10fe..87f652689331 100644
> --- a/criu/sk-unix.c
> +++ b/criu/sk-unix.c
> @@ -74,6 +74,9 @@ struct unix_sk_desc {
>  	unsigned char		shutdown;
>  	bool			deleted;
>  
> +	bool			bindmount;
> +	unsigned int		mnt_id;
> +
>  	mode_t			mode;
>  	uid_t			uid;
>  	gid_t			gid;
> @@ -391,6 +394,9 @@ static int dump_one_unix_fd(int lfd, uint32_t id, const struct fd_parms *p)
>  	if (unix_resolve_name(lfd, id, sk, ue, p))
>  		goto err;
>  
> +	if (sk->bindmount)
> +		ue->uflags |= UNIX_UFLAGS__BINDMOUNT;
> +
>  	/*
>  	 * Check if this socket is connected to criu service.
>  	 * Dump it like closed one and mark it for restore.
> @@ -579,11 +585,16 @@ static int unix_resolve_name(int lfd, uint32_t id, struct unix_sk_desc *d,
>  	if (d->namelen == 0 || name[0] == '\0')
>  		return 0;
>  
> -	if (kdat.sk_unix_file && (root_ns_mask & CLONE_NEWNS)) {
> -		if (get_mnt_id(lfd, &mnt_id))
> -			return -1;
> -		ue->mnt_id = mnt_id;
> -		ue->has_mnt_id = mnt_id;
> +	if (!d->bindmount) {
> +		if (kdat.sk_unix_file && (root_ns_mask & CLONE_NEWNS)) {
> +			if (get_mnt_id(lfd, &mnt_id))
> +				return -1;
> +			ue->mnt_id = mnt_id;
> +			ue->has_mnt_id = mnt_id;
> +		}
> +	} else {
> +		ue->mnt_id = d->mnt_id;
> +		ue->has_mnt_id = true;
>  	}
>  
>  	if (ue->mnt_id >= 0)
> @@ -715,6 +726,7 @@ static int unix_collect_one(const struct unix_diag_msg *m,
>  	INIT_LIST_HEAD(&d->peer_list);
>  	INIT_LIST_HEAD(&d->peer_node);
>  	d->fd = -1;
> +	d->mnt_id = -1;
>  
>  	if (tb[UNIX_DIAG_SHUTDOWN])
>  		d->shutdown = nla_get_u8(tb[UNIX_DIAG_SHUTDOWN]);
> @@ -910,6 +922,75 @@ int fix_external_unix_sockets(void)
>  	return -1;
>  }
>  
> +int collect_unix_bindmounts(void)
> +{
> +	struct mount_info *mi;
> +	struct stat st = {};
> +	int ns_old = -1;
> +	int ret = 0;
> +
> +	pr_debug("Collecting unix bindmounts\n");
> +
> +	for (mi = mntinfo; mi; mi = mi->next) {
> +		if (list_empty(&mi->mnt_bind))
> +			continue;
> +
> +		if (switch_ns(mi->nsid->ns_pid, &mnt_ns_desc, &ns_old) < 0) {
> +			pr_err("Can't switch ns to mnt_id %d", mi->mnt_id);
> +			if (restore_ns(ns_old, &mnt_ns_desc)) {
> +				pr_err("Can't switch mount ns back from mnt_id %d\n", mi->mnt_id);
> +				return -1;
> +			}

I think we can move this restore_ns to the end under the err label.
> +			return -1;
> +		}
> +
> +		if (stat(mi->mountpoint, &st)) {

can we use fstatat(mi->nsid->mnt.root, mi->mountpoint) here?


> +			pr_warn("Can't stat on %s: %m\n", mi->mountpoint);
> +			if (restore_ns(ns_old, &mnt_ns_desc)) {
> +				pr_err("Can't switch mount ns back from mnt_id %d\n", mi->mnt_id);
> +				return -1;
> +			}
> +			continue;
> +		}
> +
> +		if (S_ISSOCK(st.st_mode)) {
> +			struct unix_sk_desc *sk;
> +
> +			list_for_each_entry(sk, &unix_sockets, list) {

				if (sk->vfs_ino != (int)st.st_ino)
					continue;
...

> +				if (sk->vfs_ino == (int)st.st_ino &&
> +				    sk->vfs_dev == (int)st.st_dev) {
> +					pr_debug("Found sock s_dev %#x ino %d bindmounted mnt_id %d %s\n",
> +						 (int)st.st_dev, (int)st.st_ino, mi->mnt_id, mi->mountpoint);

too many indents

> +					if (sk->bindmount) {
> +						pr_err("Many bindings for sockets are not yet supported %d at %s\n",
> +						       (int)st.st_ino, mi->mountpoint);
> +						ret = -1;
> +					} else {
> +						sk->mnt_id = mi->mnt_id;
> +						sk->bindmount = true;
> +					}
> +					if (sk->type != SOCK_DGRAM && sk->state != TCP_CLOSE) {
> +						pr_err("Unsupported bindmounted socket ino %d at %s\n",
> +						       (int)st.st_ino, mi->mountpoint);
> +						ret = -1;
> +					}
> +					break;
> +				}
> +			}
> +		}
> +
> +		if (restore_ns(ns_old, &mnt_ns_desc)) {

restore_ns() should be called only once before exiting from this func

> +			pr_err("Can't switch mount ns back from %d\n", mi->nsid->ns_pid);
> +			return -1;
> +		}
> +
> +		if (ret)
> +			break;
> +	}
> +
> +	return ret;
> +}
> +
>  struct unix_sk_info {
>  	UnixSkEntry		*ue;
>  	struct list_head	list;
> diff --git a/images/sk-unix.proto b/images/sk-unix.proto
> index b7173eac9119..feb51328390f 100644
> --- a/images/sk-unix.proto
> +++ b/images/sk-unix.proto
> @@ -18,6 +18,7 @@ enum unix_uflags {
>  	SERVICE		= 2;
>  	CALLBACK	= 4;
>  	INHERIT		= 8;
> +	BINDMOUNT	= 16;
>  }
>  
>  message unix_sk_entry {
> -- 
> 2.17.1
>
Cyrill Gorcunov Sept. 19, 2018, 9:10 a.m.
On Tue, Sep 18, 2018 at 04:59:02PM -0700, Andrey Vagin wrote:
> > +
> > +		if (switch_ns(mi->nsid->ns_pid, &mnt_ns_desc, &ns_old) < 0) {
> > +			pr_err("Can't switch ns to mnt_id %d", mi->mnt_id);
> > +			if (restore_ns(ns_old, &mnt_ns_desc)) {
> > +				pr_err("Can't switch mount ns back from mnt_id %d\n", mi->mnt_id);
> > +				return -1;
> > +			}
> 
> I think we can move this restore_ns to the end under the err label.

Will update

> > +			return -1;
> > +		}
> > +
> > +		if (stat(mi->mountpoint, &st)) {
> 
> can we use fstatat(mi->nsid->mnt.root, mi->mountpoint) here?

Nope.

(00.031787) unix: Collecting unix bindmounts
(00.031819) Warn  (criu/sk-unix.c:949): unix: Can't stat on ./zdtm/static/bind-mount-unix.test/criu-bind-log: Not a directory
(00.031845) Warn  (criu/sk-unix.c:949): unix: Can't stat on ./zdtm/static/bind-mount-unix.test: Not a directory

> ...
> 
> > +				if (sk->vfs_ino == (int)st.st_ino &&
> > +				    sk->vfs_dev == (int)st.st_dev) {
> > +					pr_debug("Found sock s_dev %#x ino %d bindmounted mnt_id %d %s\n",
> > +						 (int)st.st_dev, (int)st.st_ino, mi->mnt_id, mi->mountpoint);
> 
i> too many indents

ok, will rework

> > +		if (restore_ns(ns_old, &mnt_ns_desc)) {
> 
> restore_ns() should be called only once before exiting from this func

ok

	Cyrill