[v3,3/4] mount: Forced mount unmounted binfmt_misc to do not lost its content

Submitted by Kirill Tkhai on June 29, 2016, 11:49 a.m.

Details

Message ID 146720096501.2821.4485538828063811488.stgit@pro
State Rejected
Series "Save content of unmounted binfmt_misc"
Headers show

Commit Message

Kirill Tkhai June 29, 2016, 11:49 a.m.
Umount does not remove binfmt_misc content. If it's mounted once again,
the same entries remain registered.

Criu does not dump content of umounted binfmt_misc. So, after C/R we
lose it at all.

This patch forces mounting of unmounted binfmt_misc before we collect
mountpoints. If it's unmounted, we mount it back and add this mount
to the list of forced mounted mountpoints. Next patch need this
list to mark the mount in special way in dump image.

v2: Print error in case of umount() fail.
    Move add_forced_mount() to another patch.
v3: Close binfmt_misc dir before its umount().

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
---
 criu/cr-dump.c       |   13 ++++++++++++
 criu/include/mount.h |    1 +
 criu/mount.c         |   56 ++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 70 insertions(+)

Patch hide | download patch | download mbox

diff --git a/criu/cr-dump.c b/criu/cr-dump.c
index 7609544..90da57b 100644
--- a/criu/cr-dump.c
+++ b/criu/cr-dump.c
@@ -747,6 +747,16 @@  static int dump_task_core_all(struct parasite_ctl *ctl,
 	return ret;
 }
 
+static int prepare_dump__tasks_freezed(void)
+{
+	int ret;
+
+	/* Tasks freezed, so we do not race with systemd's autofs unmounter */
+	ret = try_mount_binfmt_misc(root_item->pid.real);
+
+	return ret;
+}
+
 static int collect_pstree_ids_predump(void)
 {
 	struct pstree_item *item;
@@ -1682,6 +1692,9 @@  int cr_dump_tasks(pid_t pid)
 	if (collect_pstree())
 		goto err;
 
+	if (prepare_dump__tasks_freezed())
+		goto err;
+
 	if (collect_pstree_ids())
 		goto err;
 
diff --git a/criu/include/mount.h b/criu/include/mount.h
index 423b6d0..bd15468 100644
--- a/criu/include/mount.h
+++ b/criu/include/mount.h
@@ -129,4 +129,5 @@  extern int read_mnt_ns_img(void);
 extern void cleanup_mnt_ns(void);
 extern void cleanup_forced_mounts(void);
 
+extern int try_mount_binfmt_misc(pid_t pid);
 #endif /* __CR_MOUNT_H__ */
diff --git a/criu/mount.c b/criu/mount.c
index 4a288e4..df8a87d 100644
--- a/criu/mount.c
+++ b/criu/mount.c
@@ -3761,4 +3761,60 @@  void cleanup_forced_mounts(void)
 	}
 }
 
+#define BINFMT_MISC_HOME "/proc/sys/fs/binfmt_misc"
+
+int try_mount_binfmt_misc(pid_t pid)
+{
+	int num, mnt_fd, ret, exit_code = -1;
+	struct dirent *de;
+	DIR *dir;
+
+	ret = switch_ns(pid, &mnt_ns_desc, &mnt_fd);
+	if (ret < 0) {
+		pr_err("Can't switch mnt_ns\n");
+		goto out;
+	}
+
+	ret = mount("binfmt_misc", BINFMT_MISC_HOME, "binfmt_misc", 0, NULL);
+	if (ret < 0) {
+		if (errno == EPERM) {
+			pr_info("Can't mount binfmt_misc: EPERM. Running in user_ns?\n");
+			exit_code = 0;
+			goto restore_ns;
+		}
+		if (errno != EBUSY && errno != ENODEV && errno != ENOENT) {
+			pr_perror("Can't mount binfmt_misc");
+			goto restore_ns;
+		}
+		pr_info("Prepare binfmt_misc: skipping(%d)\n", errno);
+	} else {
+		dir = opendir(BINFMT_MISC_HOME);
+		if (!dir) {
+			pr_perror("Can't read binfmt_misc dir");
+			goto restore_ns;
+		}
+
+		num = 0;
+		/* ".", "..", "register", "status" */
+		while (num <= 4 && (de = readdir(dir)) != NULL)
+			num++;
+		closedir(dir);
+		if (num <= 4) {
+			/* No entries */
+			if (umount(BINFMT_MISC_HOME) < 0)
+				pr_perror("Can't umount "BINFMT_MISC_HOME"\n");
+		} else {
+			ret = add_forced_mount(pid, BINFMT_MISC_HOME);
+			if (ret)
+				goto restore_ns;
+		}
+	}
+
+	exit_code = 0;
+restore_ns:
+	ret = restore_ns(mnt_fd, &mnt_ns_desc);
+out:
+	return ret ? -1 : exit_code;
+}
+
 struct ns_desc mnt_ns_desc = NS_DESC_ENTRY(CLONE_NEWNS, "mnt");

Comments

Andrey Vagin July 15, 2016, 10:07 p.m.
On Wed, Jun 29, 2016 at 02:49:25PM +0300, Kirill Tkhai wrote:
> Umount does not remove binfmt_misc content. If it's mounted once again,
> the same entries remain registered.
> 
> Criu does not dump content of umounted binfmt_misc. So, after C/R we
> lose it at all.
> 
> This patch forces mounting of unmounted binfmt_misc before we collect
> mountpoints. If it's unmounted, we mount it back and add this mount
> to the list of forced mounted mountpoints. Next patch need this
> list to mark the mount in special way in dump image.
> 
> v2: Print error in case of umount() fail.
>     Move add_forced_mount() to another patch.
> v3: Close binfmt_misc dir before its umount().
> 
> Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
> ---
>  criu/cr-dump.c       |   13 ++++++++++++
>  criu/include/mount.h |    1 +
>  criu/mount.c         |   56 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 70 insertions(+)
> 
> diff --git a/criu/cr-dump.c b/criu/cr-dump.c
> index 7609544..90da57b 100644
> --- a/criu/cr-dump.c
> +++ b/criu/cr-dump.c
> @@ -747,6 +747,16 @@ static int dump_task_core_all(struct parasite_ctl *ctl,
>  	return ret;
>  }
>  
> +static int prepare_dump__tasks_freezed(void)
> +{
> +	int ret;
> +
> +	/* Tasks freezed, so we do not race with systemd's autofs unmounter */
> +	ret = try_mount_binfmt_misc(root_item->pid.real);
> +
> +	return ret;
> +}
> +
>  static int collect_pstree_ids_predump(void)
>  {
>  	struct pstree_item *item;
> @@ -1682,6 +1692,9 @@ int cr_dump_tasks(pid_t pid)
>  	if (collect_pstree())
>  		goto err;
>  
> +	if (prepare_dump__tasks_freezed())
> +		goto err;
> +
>  	if (collect_pstree_ids())
>  		goto err;
>  
> diff --git a/criu/include/mount.h b/criu/include/mount.h
> index 423b6d0..bd15468 100644
> --- a/criu/include/mount.h
> +++ b/criu/include/mount.h
> @@ -129,4 +129,5 @@ extern int read_mnt_ns_img(void);
>  extern void cleanup_mnt_ns(void);
>  extern void cleanup_forced_mounts(void);
>  
> +extern int try_mount_binfmt_misc(pid_t pid);
>  #endif /* __CR_MOUNT_H__ */
> diff --git a/criu/mount.c b/criu/mount.c
> index 4a288e4..df8a87d 100644
> --- a/criu/mount.c
> +++ b/criu/mount.c
> @@ -3761,4 +3761,60 @@ void cleanup_forced_mounts(void)
>  	}
>  }
>  
> +#define BINFMT_MISC_HOME "/proc/sys/fs/binfmt_misc"
> +
> +int try_mount_binfmt_misc(pid_t pid)
> +{
> +	int num, mnt_fd, ret, exit_code = -1;
> +	struct dirent *de;
> +	DIR *dir;
> +
> +	ret = switch_ns(pid, &mnt_ns_desc, &mnt_fd);
> +	if (ret < 0) {
> +		pr_err("Can't switch mnt_ns\n");
> +		goto out;
> +	}
> +
> +	ret = mount("binfmt_misc", BINFMT_MISC_HOME, "binfmt_misc", 0, NULL);
> +	if (ret < 0) {
> +		if (errno == EPERM) {
> +			pr_info("Can't mount binfmt_misc: EPERM. Running in user_ns?\n");
> +			exit_code = 0;
> +			goto restore_ns;
> +		}
> +		if (errno != EBUSY && errno != ENODEV && errno != ENOENT) {
> +			pr_perror("Can't mount binfmt_misc");
> +			goto restore_ns;
> +		}
> +		pr_info("Prepare binfmt_misc: skipping(%d)\n", errno);
> +	} else {
> +		dir = opendir(BINFMT_MISC_HOME);
> +		if (!dir) {
> +			pr_perror("Can't read binfmt_misc dir");
> +			goto restore_ns;
> +		}
> +
> +		num = 0;
> +		/* ".", "..", "register", "status" */
> +		while (num <= 4 && (de = readdir(dir)) != NULL)
> +			num++;
> +		closedir(dir);
> +		if (num <= 4) {
> +			/* No entries */
> +			if (umount(BINFMT_MISC_HOME) < 0)
> +				pr_perror("Can't umount "BINFMT_MISC_HOME"\n");
> +		} else {
> +			ret = add_forced_mount(pid, BINFMT_MISC_HOME);

Why do we need to add a fake mount? Maybe it's better to dump it right
here?

> +			if (ret)
> +				goto restore_ns;
> +		}
> +	}
> +
> +	exit_code = 0;
> +restore_ns:
> +	ret = restore_ns(mnt_fd, &mnt_ns_desc);
> +out:
> +	return ret ? -1 : exit_code;
> +}
> +
>  struct ns_desc mnt_ns_desc = NS_DESC_ENTRY(CLONE_NEWNS, "mnt");
> 
> _______________________________________________
> CRIU mailing list
> CRIU@openvz.org
> https://lists.openvz.org/mailman/listinfo/criu
Kirill Tkhai July 18, 2016, 9:02 a.m.
Hi, Andrew!

On 16.07.2016 01:07, Andrew Vagin wrote:
> On Wed, Jun 29, 2016 at 02:49:25PM +0300, Kirill Tkhai wrote:
>> Umount does not remove binfmt_misc content. If it's mounted once again,
>> the same entries remain registered.
>>
>> Criu does not dump content of umounted binfmt_misc. So, after C/R we
>> lose it at all.
>>
>> This patch forces mounting of unmounted binfmt_misc before we collect
>> mountpoints. If it's unmounted, we mount it back and add this mount
>> to the list of forced mounted mountpoints. Next patch need this
>> list to mark the mount in special way in dump image.
>>
>> v2: Print error in case of umount() fail.
>>     Move add_forced_mount() to another patch.
>> v3: Close binfmt_misc dir before its umount().
>>
>> Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
>> ---
>>  criu/cr-dump.c       |   13 ++++++++++++
>>  criu/include/mount.h |    1 +
>>  criu/mount.c         |   56 ++++++++++++++++++++++++++++++++++++++++++++++++++
>>  3 files changed, 70 insertions(+)
>>
>> diff --git a/criu/cr-dump.c b/criu/cr-dump.c
>> index 7609544..90da57b 100644
>> --- a/criu/cr-dump.c
>> +++ b/criu/cr-dump.c
>> @@ -747,6 +747,16 @@ static int dump_task_core_all(struct parasite_ctl *ctl,
>>  	return ret;
>>  }
>>  
>> +static int prepare_dump__tasks_freezed(void)
>> +{
>> +	int ret;
>> +
>> +	/* Tasks freezed, so we do not race with systemd's autofs unmounter */
>> +	ret = try_mount_binfmt_misc(root_item->pid.real);
>> +
>> +	return ret;
>> +}
>> +
>>  static int collect_pstree_ids_predump(void)
>>  {
>>  	struct pstree_item *item;
>> @@ -1682,6 +1692,9 @@ int cr_dump_tasks(pid_t pid)
>>  	if (collect_pstree())
>>  		goto err;
>>  
>> +	if (prepare_dump__tasks_freezed())
>> +		goto err;
>> +
>>  	if (collect_pstree_ids())
>>  		goto err;
>>  
>> diff --git a/criu/include/mount.h b/criu/include/mount.h
>> index 423b6d0..bd15468 100644
>> --- a/criu/include/mount.h
>> +++ b/criu/include/mount.h
>> @@ -129,4 +129,5 @@ extern int read_mnt_ns_img(void);
>>  extern void cleanup_mnt_ns(void);
>>  extern void cleanup_forced_mounts(void);
>>  
>> +extern int try_mount_binfmt_misc(pid_t pid);
>>  #endif /* __CR_MOUNT_H__ */
>> diff --git a/criu/mount.c b/criu/mount.c
>> index 4a288e4..df8a87d 100644
>> --- a/criu/mount.c
>> +++ b/criu/mount.c
>> @@ -3761,4 +3761,60 @@ void cleanup_forced_mounts(void)
>>  	}
>>  }
>>  
>> +#define BINFMT_MISC_HOME "/proc/sys/fs/binfmt_misc"
>> +
>> +int try_mount_binfmt_misc(pid_t pid)
>> +{
>> +	int num, mnt_fd, ret, exit_code = -1;
>> +	struct dirent *de;
>> +	DIR *dir;
>> +
>> +	ret = switch_ns(pid, &mnt_ns_desc, &mnt_fd);
>> +	if (ret < 0) {
>> +		pr_err("Can't switch mnt_ns\n");
>> +		goto out;
>> +	}
>> +
>> +	ret = mount("binfmt_misc", BINFMT_MISC_HOME, "binfmt_misc", 0, NULL);
>> +	if (ret < 0) {
>> +		if (errno == EPERM) {
>> +			pr_info("Can't mount binfmt_misc: EPERM. Running in user_ns?\n");
>> +			exit_code = 0;
>> +			goto restore_ns;
>> +		}
>> +		if (errno != EBUSY && errno != ENODEV && errno != ENOENT) {
>> +			pr_perror("Can't mount binfmt_misc");
>> +			goto restore_ns;
>> +		}
>> +		pr_info("Prepare binfmt_misc: skipping(%d)\n", errno);
>> +	} else {
>> +		dir = opendir(BINFMT_MISC_HOME);
>> +		if (!dir) {
>> +			pr_perror("Can't read binfmt_misc dir");
>> +			goto restore_ns;
>> +		}
>> +
>> +		num = 0;
>> +		/* ".", "..", "register", "status" */
>> +		while (num <= 4 && (de = readdir(dir)) != NULL)
>> +			num++;
>> +		closedir(dir);
>> +		if (num <= 4) {
>> +			/* No entries */
>> +			if (umount(BINFMT_MISC_HOME) < 0)
>> +				pr_perror("Can't umount "BINFMT_MISC_HOME"\n");
>> +		} else {
>> +			ret = add_forced_mount(pid, BINFMT_MISC_HOME);
> 
> Why do we need to add a fake mount? Maybe it's better to dump it right
> here?

It was here for using generic dumping code. But you may ignore this, according to Pavel
review, I'll remake patchset in other way.
 
>> +			if (ret)
>> +				goto restore_ns;
>> +		}
>> +	}
>> +
>> +	exit_code = 0;
>> +restore_ns:
>> +	ret = restore_ns(mnt_fd, &mnt_ns_desc);
>> +out:
>> +	return ret ? -1 : exit_code;
>> +}
>> +
>>  struct ns_desc mnt_ns_desc = NS_DESC_ENTRY(CLONE_NEWNS, "mnt");
>>
>> _______________________________________________
>> CRIU mailing list
>> CRIU@openvz.org
>> https://lists.openvz.org/mailman/listinfo/criu