[1/4] sk-unix: Save mnt_id for bindmounted entries

Submitted by Cyrill Gorcunov on Aug. 8, 2017, 11:25 a.m.

Details

Message ID 1502191510-15586-2-git-send-email-gorcunov@openvz.org
State New
Series "sk-unix: Handle bindmounted dgram sockets"
Headers show

Commit Message

Cyrill Gorcunov Aug. 8, 2017, 11:25 a.m.
Mount points might be beindmount to some resources (say unix binded
sockets) thus when times come to do real bind mount call we need
to prepare appropriate resource first.

On dump procedure we walk over all bind-mounts and check if
the mountpoint is a unix socket saving the mnt_id into the image
then.

Note at moment we support only DGRAM closed sockets.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
---
 criu/cr-dump.c         |  3 +++
 criu/include/sockets.h |  1 +
 criu/sk-unix.c         | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++
 images/sk-unix.proto   |  5 ++++
 4 files changed, 78 insertions(+)

Patch hide | download patch | download mbox

diff --git a/criu/cr-dump.c b/criu/cr-dump.c
index 06f3966c05cf..52857dc6c1c7 100644
--- a/criu/cr-dump.c
+++ b/criu/cr-dump.c
@@ -1898,6 +1898,9 @@  int cr_dump_tasks(pid_t pid)
 	if (collect_namespaces(true) < 0)
 		goto err;
 
+	if (collect_unix_bindmounts() < 0)
+		goto err;
+
 	glob_imgset = cr_glob_imgset_open(O_DUMP);
 	if (!glob_imgset)
 		goto err;
diff --git a/criu/include/sockets.h b/criu/include/sockets.h
index 9881d5bf3bc1..64673c5e5dc8 100644
--- a/criu/include/sockets.h
+++ b/criu/include/sockets.h
@@ -37,6 +37,7 @@  extern int collect_sockets(struct ns_id *);
 extern struct collect_image_info inet_sk_cinfo;
 extern struct collect_image_info unix_sk_cinfo;
 extern int fix_external_unix_sockets(void);
+extern int collect_unix_bindmounts(void);
 
 extern struct collect_image_info netlink_sk_cinfo;
 
diff --git a/criu/sk-unix.c b/criu/sk-unix.c
index 765d9ae9bddc..bf013c5ca4de 100644
--- a/criu/sk-unix.c
+++ b/criu/sk-unix.c
@@ -86,6 +86,10 @@  struct unix_sk_desc {
 	struct list_head	peer_node;
 
 	UnixSkEntry		*ue;
+
+	/* To handle bindmounts over socks */
+	struct unix_diag_vfs	uv;		/* FIXME merge with rel_name_desc_t */
+	int			mnt_id;
 };
 
 static LIST_HEAD(unix_sockets);
@@ -314,6 +318,11 @@  static int dump_one_unix_fd(int lfd, u32 id, const struct fd_parms *p)
 	ue->opts	= skopts;
 	ue->uflags	= 0;
 
+	if (sk->mnt_id) {
+		ue->has_mnt_id	= true;
+		ue->mnt_id	= sk->mnt_id;
+	}
+
 	if (sk->rel_name) {
 		if (resolve_rel_name(sk, p))
 			goto err;
@@ -533,6 +542,8 @@  static int unix_process_name(struct unix_sk_desc *d, const struct unix_diag_msg
 		}
 
 		uv = RTA_DATA(tb[UNIX_DIAG_VFS]);
+		memcpy(&d->uv, uv, sizeof(d->uv));
+
 		if (name[0] != '/') {
 			/*
 			 * Relative names are be resolved later at first
@@ -787,6 +798,64 @@  int fix_external_unix_sockets(void)
 	return -1;
 }
 
+int collect_unix_bindmounts(void)
+{
+	int mntns_fd, old_ns = -1;
+	struct mount_info *mi;
+	struct stat st = {};
+	int ret = 0;
+
+	for (mi = mntinfo; mi; mi = mi->next) {
+		if (list_empty(&mi->mnt_bind))
+			continue;
+
+		mntns_fd = open_proc(mi->nsid->ns_pid, "ns/mnt");
+		if (mntns_fd < 0) {
+			pr_err("Can't open %d/ns/mnt\n", mi->nsid->ns_pid);
+			return -1;
+		}
+
+		if (switch_ns_by_fd(mntns_fd, &mnt_ns_desc, &old_ns)) {
+			pr_err("Can't switch mount ns for %d\n", mi->nsid->ns_pid);
+			return -1;
+		}
+
+		if (stat(mi->mountpoint, &st)) {
+			pr_perror("Can't stat on %s", mi->mountpoint);
+			return switch_ns_by_fd(old_ns, &mnt_ns_desc, NULL);
+		}
+
+		if (S_ISSOCK(st.st_mode)) {
+			struct unix_sk_desc *sk;
+
+			list_for_each_entry(sk, &unix_sockets, list) {
+				if (sk->uv.udiag_vfs_ino == (int)st.st_ino &&
+				    sk->uv.udiag_vfs_dev == (int)st.st_dev) {
+					pr_debug("Found sock s_dev %#x ino %#x bindmounted mnt_id %d %s\n",
+						 (int)st.st_dev, (int)st.st_ino, mi->mnt_id, mi->mountpoint);
+					sk->mnt_id = mi->mnt_id;
+					if (sk->type != SOCK_DGRAM && sk->state != TCP_CLOSE) {
+						pr_err("Unsupported bindmounted socket ino %#x at %s\n",
+						       (int)st.st_ino, mi->mountpoint);
+						ret = -1;
+						break;
+					}
+				}
+			}
+		}
+
+		if (switch_ns_by_fd(old_ns, &mnt_ns_desc, NULL)) {
+			pr_err("Can't switch mount ns back from %d\n", mi->nsid->ns_pid);
+			return -1;
+		}
+
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
 struct unix_sk_info {
 	UnixSkEntry *ue;
 	struct list_head list;
diff --git a/images/sk-unix.proto b/images/sk-unix.proto
index 2a33fe220d1e..9186e23f4062 100644
--- a/images/sk-unix.proto
+++ b/images/sk-unix.proto
@@ -50,4 +50,9 @@  message unix_sk_entry {
 	optional bool			deleted		= 15;
 
 	optional uint32			ns_id		= 16;
+
+	/*
+	 * Bind-mounted sockets.
+	 */
+	optional uint32			mnt_id		= 17;
 }

Comments

root Aug. 9, 2017, 11:37 p.m.
On Tue, Aug 08, 2017 at 02:25:07PM +0300, Cyrill Gorcunov wrote:
> Mount points might be beindmount to some resources (say unix binded
> sockets) thus when times come to do real bind mount call we need
> to prepare appropriate resource first.
> 
> On dump procedure we walk over all bind-mounts and check if
> the mountpoint is a unix socket saving the mnt_id into the image
> then.
> 
> Note at moment we support only DGRAM closed sockets.
> 
> Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
> ---
>  criu/cr-dump.c         |  3 +++
>  criu/include/sockets.h |  1 +
>  criu/sk-unix.c         | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  images/sk-unix.proto   |  5 ++++
>  4 files changed, 78 insertions(+)
> 
> diff --git a/criu/cr-dump.c b/criu/cr-dump.c
> index 06f3966c05cf..52857dc6c1c7 100644
> --- a/criu/cr-dump.c
> +++ b/criu/cr-dump.c
> @@ -1898,6 +1898,9 @@ int cr_dump_tasks(pid_t pid)
>  	if (collect_namespaces(true) < 0)
>  		goto err;
>  
> +	if (collect_unix_bindmounts() < 0)
> +		goto err;
> +
>  	glob_imgset = cr_glob_imgset_open(O_DUMP);
>  	if (!glob_imgset)
>  		goto err;
> diff --git a/criu/include/sockets.h b/criu/include/sockets.h
> index 9881d5bf3bc1..64673c5e5dc8 100644
> --- a/criu/include/sockets.h
> +++ b/criu/include/sockets.h
> @@ -37,6 +37,7 @@ extern int collect_sockets(struct ns_id *);
>  extern struct collect_image_info inet_sk_cinfo;
>  extern struct collect_image_info unix_sk_cinfo;
>  extern int fix_external_unix_sockets(void);
> +extern int collect_unix_bindmounts(void);
>  
>  extern struct collect_image_info netlink_sk_cinfo;
>  
> diff --git a/criu/sk-unix.c b/criu/sk-unix.c
> index 765d9ae9bddc..bf013c5ca4de 100644
> --- a/criu/sk-unix.c
> +++ b/criu/sk-unix.c
> @@ -86,6 +86,10 @@ struct unix_sk_desc {
>  	struct list_head	peer_node;
>  
>  	UnixSkEntry		*ue;
> +
> +	/* To handle bindmounts over socks */
> +	struct unix_diag_vfs	uv;		/* FIXME merge with rel_name_desc_t */
> +	int			mnt_id;
>  };
>  
>  static LIST_HEAD(unix_sockets);
> @@ -314,6 +318,11 @@ static int dump_one_unix_fd(int lfd, u32 id, const struct fd_parms *p)
>  	ue->opts	= skopts;
>  	ue->uflags	= 0;
>  
> +	if (sk->mnt_id) {
> +		ue->has_mnt_id	= true;
> +		ue->mnt_id	= sk->mnt_id;
> +	}
> +
>  	if (sk->rel_name) {
>  		if (resolve_rel_name(sk, p))
>  			goto err;
> @@ -533,6 +542,8 @@ static int unix_process_name(struct unix_sk_desc *d, const struct unix_diag_msg
>  		}
>  
>  		uv = RTA_DATA(tb[UNIX_DIAG_VFS]);
> +		memcpy(&d->uv, uv, sizeof(d->uv));
> +
>  		if (name[0] != '/') {
>  			/*
>  			 * Relative names are be resolved later at first
> @@ -787,6 +798,64 @@ int fix_external_unix_sockets(void)
>  	return -1;
>  }
>  
> +int collect_unix_bindmounts(void)
> +{
> +	int mntns_fd, old_ns = -1;
> +	struct mount_info *mi;
> +	struct stat st = {};
> +	int ret = 0;
> +
> +	for (mi = mntinfo; mi; mi = mi->next) {
> +		if (list_empty(&mi->mnt_bind))
> +			continue;
> +
> +		mntns_fd = open_proc(mi->nsid->ns_pid, "ns/mnt");
> +		if (mntns_fd < 0) {
> +			pr_err("Can't open %d/ns/mnt\n", mi->nsid->ns_pid);
> +			return -1;
> +		}
> +
> +		if (switch_ns_by_fd(mntns_fd, &mnt_ns_desc, &old_ns)) {

can we use open_mountpoint() and statat? A path to a socket may be
over-mounted.

> +			pr_err("Can't switch mount ns for %d\n", mi->nsid->ns_pid);
> +			return -1;
> +		}
> +
> +		if (stat(mi->mountpoint, &st)) {
> +			pr_perror("Can't stat on %s", mi->mountpoint);
> +			return switch_ns_by_fd(old_ns, &mnt_ns_desc, NULL);
> +		}
> +
> +		if (S_ISSOCK(st.st_mode)) {
> +			struct unix_sk_desc *sk;
> +
> +			list_for_each_entry(sk, &unix_sockets, list) {
> +				if (sk->uv.udiag_vfs_ino == (int)st.st_ino &&
> +				    sk->uv.udiag_vfs_dev == (int)st.st_dev) {
> +					pr_debug("Found sock s_dev %#x ino %#x bindmounted mnt_id %d %s\n",
> +						 (int)st.st_dev, (int)st.st_ino, mi->mnt_id, mi->mountpoint);
> +					sk->mnt_id = mi->mnt_id;

what if one socket will be mounted into two or more places?

> +					if (sk->type != SOCK_DGRAM && sk->state != TCP_CLOSE) {
> +						pr_err("Unsupported bindmounted socket ino %#x at %s\n",
> +						       (int)st.st_ino, mi->mountpoint);
> +						ret = -1;
> +						break;
> +					}

					break;

> +				}
> +			}
> +		}
> +
> +		if (switch_ns_by_fd(old_ns, &mnt_ns_desc, NULL)) {

restore_ns

> +			pr_err("Can't switch mount ns back from %d\n", mi->nsid->ns_pid);
> +			return -1;
> +		}
> +
> +		if (ret)
> +			break;
> +	}
> +
> +	return ret;
> +}
> +
>  struct unix_sk_info {
>  	UnixSkEntry *ue;
>  	struct list_head list;
> diff --git a/images/sk-unix.proto b/images/sk-unix.proto
> index 2a33fe220d1e..9186e23f4062 100644
> --- a/images/sk-unix.proto
> +++ b/images/sk-unix.proto
> @@ -50,4 +50,9 @@ message unix_sk_entry {
>  	optional bool			deleted		= 15;
>  
>  	optional uint32			ns_id		= 16;
> +
> +	/*
> +	 * Bind-mounted sockets.
> +	 */
> +	optional uint32			mnt_id		= 17;
>  }
> -- 
> 2.7.5
> 
> _______________________________________________
> CRIU mailing list
> CRIU@openvz.org
> https://lists.openvz.org/mailman/listinfo/criu
Cyrill Gorcunov Aug. 10, 2017, 7 a.m.
On Wed, Aug 09, 2017 at 04:37:51PM -0700, Andrei Vagin wrote:
...
> > +
> > +		if (switch_ns_by_fd(mntns_fd, &mnt_ns_desc, &old_ns)) {
> 
> can we use open_mountpoint() and statat? A path to a socket may be
> over-mounted.

I'm not sure if we're allowed to call it on "checkpoint" state, but
will figure out, thanks!

> > +
> > +			list_for_each_entry(sk, &unix_sockets, list) {
> > +				if (sk->uv.udiag_vfs_ino == (int)st.st_ino &&
> > +				    sk->uv.udiag_vfs_dev == (int)st.st_dev) {
> > +					pr_debug("Found sock s_dev %#x ino %#x bindmounted mnt_id %d %s\n",
> > +						 (int)st.st_dev, (int)st.st_ino, mi->mnt_id, mi->mountpoint);
> > +					sk->mnt_id = mi->mnt_id;
> 
> what if one socket will be mounted into two or more places?

Then its mnt_id will be the last matched, true it may cause problems.
Will rework thanks!

> 
> > +					if (sk->type != SOCK_DGRAM && sk->state != TCP_CLOSE) {
> > +						pr_err("Unsupported bindmounted socket ino %#x at %s\n",
> > +						       (int)st.st_ino, mi->mountpoint);
> > +						ret = -1;
> > +						break;
> > +					}
> 
> 					break;
> 
> > +				}
> > +			}
> > +		}
> > +
> > +		if (switch_ns_by_fd(old_ns, &mnt_ns_desc, NULL)) {
> 
> restore_ns

Thanks!
Cyrill Gorcunov Aug. 12, 2017, 9:40 a.m.
On Wed, Aug 09, 2017 at 04:37:51PM -0700, Andrei Vagin wrote:
> > +
> > +		if (switch_ns_by_fd(mntns_fd, &mnt_ns_desc, &old_ns)) {
> 
> can we use open_mountpoint() and statat? A path to a socket may be
> over-mounted.

Doesn't work

(00.021241) Error (criu/mount.c:1037): mnt: Can't open ./zdtm/static/bind-mount-unix.test/criu-bind-log: No such device or address
(00.021246) Error (criu/sk-unix.c:816): sk unix: Can't open ./zdtm/static/bind-mount-unix.test/criu-bind-log
Andrei Vagin Aug. 29, 2017, 11:24 p.m.
On Tue, Aug 08, 2017 at 02:25:07PM +0300, Cyrill Gorcunov wrote:
> Mount points might be beindmount to some resources (say unix binded
> sockets) thus when times come to do real bind mount call we need
> to prepare appropriate resource first.
> 
> On dump procedure we walk over all bind-mounts and check if
> the mountpoint is a unix socket saving the mnt_id into the image
> then.
> 
> Note at moment we support only DGRAM closed sockets.
> 
> Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
> ---
>  criu/cr-dump.c         |  3 +++
>  criu/include/sockets.h |  1 +
>  criu/sk-unix.c         | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  images/sk-unix.proto   |  5 ++++
>  4 files changed, 78 insertions(+)
> 
> diff --git a/criu/cr-dump.c b/criu/cr-dump.c
> index 06f3966c05cf..52857dc6c1c7 100644
> --- a/criu/cr-dump.c
> +++ b/criu/cr-dump.c
> @@ -1898,6 +1898,9 @@ int cr_dump_tasks(pid_t pid)
>  	if (collect_namespaces(true) < 0)
>  		goto err;
>  
> +	if (collect_unix_bindmounts() < 0)
> +		goto err;
> +
>  	glob_imgset = cr_glob_imgset_open(O_DUMP);
>  	if (!glob_imgset)
>  		goto err;
> diff --git a/criu/include/sockets.h b/criu/include/sockets.h
> index 9881d5bf3bc1..64673c5e5dc8 100644
> --- a/criu/include/sockets.h
> +++ b/criu/include/sockets.h
> @@ -37,6 +37,7 @@ extern int collect_sockets(struct ns_id *);
>  extern struct collect_image_info inet_sk_cinfo;
>  extern struct collect_image_info unix_sk_cinfo;
>  extern int fix_external_unix_sockets(void);
> +extern int collect_unix_bindmounts(void);
>  
>  extern struct collect_image_info netlink_sk_cinfo;
>  
> diff --git a/criu/sk-unix.c b/criu/sk-unix.c
> index 765d9ae9bddc..bf013c5ca4de 100644
> --- a/criu/sk-unix.c
> +++ b/criu/sk-unix.c
> @@ -86,6 +86,10 @@ struct unix_sk_desc {
>  	struct list_head	peer_node;
>  
>  	UnixSkEntry		*ue;
> +
> +	/* To handle bindmounts over socks */
> +	struct unix_diag_vfs	uv;		/* FIXME merge with rel_name_desc_t */
> +	int			mnt_id;
>  };
>  
>  static LIST_HEAD(unix_sockets);
> @@ -314,6 +318,11 @@ static int dump_one_unix_fd(int lfd, u32 id, const struct fd_parms *p)
>  	ue->opts	= skopts;
>  	ue->uflags	= 0;
>  
> +	if (sk->mnt_id) {
> +		ue->has_mnt_id	= true;
> +		ue->mnt_id	= sk->mnt_id;
> +	}
> +
>  	if (sk->rel_name) {
>  		if (resolve_rel_name(sk, p))
>  			goto err;
> @@ -533,6 +542,8 @@ static int unix_process_name(struct unix_sk_desc *d, const struct unix_diag_msg
>  		}
>  
>  		uv = RTA_DATA(tb[UNIX_DIAG_VFS]);
> +		memcpy(&d->uv, uv, sizeof(d->uv));
> +
>  		if (name[0] != '/') {
>  			/*
>  			 * Relative names are be resolved later at first
> @@ -787,6 +798,64 @@ int fix_external_unix_sockets(void)
>  	return -1;
>  }
>  
> +int collect_unix_bindmounts(void)
> +{
> +	int mntns_fd, old_ns = -1;
> +	struct mount_info *mi;
> +	struct stat st = {};
> +	int ret = 0;
> +
> +	for (mi = mntinfo; mi; mi = mi->next) {
> +		if (list_empty(&mi->mnt_bind))
> +			continue;
> +
> +		mntns_fd = open_proc(mi->nsid->ns_pid, "ns/mnt");
> +		if (mntns_fd < 0) {
> +			pr_err("Can't open %d/ns/mnt\n", mi->nsid->ns_pid);
> +			return -1;
> +		}
> +
> +		if (switch_ns_by_fd(mntns_fd, &mnt_ns_desc, &old_ns)) {
> +			pr_err("Can't switch mount ns for %d\n", mi->nsid->ns_pid);
> +			return -1;
> +		}
> +
> +		if (stat(mi->mountpoint, &st)) {
> +			pr_perror("Can't stat on %s", mi->mountpoint);
> +			return switch_ns_by_fd(old_ns, &mnt_ns_desc, NULL);
> +		}
> +
> +		if (S_ISSOCK(st.st_mode)) {
> +			struct unix_sk_desc *sk;
> +
> +			list_for_each_entry(sk, &unix_sockets, list) {
> +				if (sk->uv.udiag_vfs_ino == (int)st.st_ino &&
> +				    sk->uv.udiag_vfs_dev == (int)st.st_dev) {
> +					pr_debug("Found sock s_dev %#x ino %#x bindmounted mnt_id %d %s\n",
> +						 (int)st.st_dev, (int)st.st_ino, mi->mnt_id, mi->mountpoint);
> +					sk->mnt_id = mi->mnt_id;
> +					if (sk->type != SOCK_DGRAM && sk->state != TCP_CLOSE) {
> +						pr_err("Unsupported bindmounted socket ino %#x at %s\n",
> +						       (int)st.st_ino, mi->mountpoint);
> +						ret = -1;
> +						break;
> +					}
> +				}
> +			}

You have to report an error, if you can't find a socker.

> +		}
> +
> +		if (switch_ns_by_fd(old_ns, &mnt_ns_desc, NULL)) {
> +			pr_err("Can't switch mount ns back from %d\n", mi->nsid->ns_pid);
> +			return -1;
> +		}
> +
> +		if (ret)
> +			break;
> +	}
> +
> +	return ret;
> +}
> +
>  struct unix_sk_info {
>  	UnixSkEntry *ue;
>  	struct list_head list;
> diff --git a/images/sk-unix.proto b/images/sk-unix.proto
> index 2a33fe220d1e..9186e23f4062 100644
> --- a/images/sk-unix.proto
> +++ b/images/sk-unix.proto
> @@ -50,4 +50,9 @@ message unix_sk_entry {
>  	optional bool			deleted		= 15;
>  
>  	optional uint32			ns_id		= 16;
> +
> +	/*
> +	 * Bind-mounted sockets.
> +	 */
> +	optional uint32			mnt_id		= 17;

A socket can be bind-mounted in a few places.

>  }
> -- 
> 2.7.5
>
Cyrill Gorcunov Aug. 30, 2017, 6:43 a.m.
On Tue, Aug 29, 2017 at 04:24:04PM -0700, Andrey Vagin wrote:
...
> > +					sk->mnt_id = mi->mnt_id;
> > +					if (sk->type != SOCK_DGRAM && sk->state != TCP_CLOSE) {
> > +						pr_err("Unsupported bindmounted socket ino %#x at %s\n",
> > +						       (int)st.st_ino, mi->mountpoint);
> > +						ret = -1;
> > +						break;
> > +					}
> > +				}
> > +			}
> 
> You have to report an error, if you can't find a socker.

True, I'll update it on top.

> > +
> > +	/*
> > +	 * Bind-mounted sockets.
> > +	 */
> > +	optional uint32			mnt_id		= 17;
> 
> A socket can be bind-mounted in a few places.

Already addressed in v2 I've sent:

+                                       if (sk->mnt_id) {
+                                               pr_err("Many bindings for sockets are not yet supported %#x at %s\n",
+                                                      (int)st.st_ino, mi->mountpoint);
+                                               ret = -1;
+                                       } else
+                                               sk->mnt_id = mi->mnt_id;
+                                       if (sk->type != SOCK_DGRAM && sk->state != TCP_CLOSE) {
+                                               pr_err("Unsupported bindmounted socket ino %#x at %s\n",
+                                                      (int)st.st_ino, mi->mountpoint);
+                                               ret = -1;
+                                       }
+                                       break;