[v3,2/4] mount: Add forced mounts sceleton

Submitted by Kirill Tkhai on June 29, 2016, 11:49 a.m.

Details

Message ID 146720095638.2821.12981714940525647835.stgit@pro
State Rejected
Series "Save content of unmounted binfmt_misc"
Headers show

Commit Message

Kirill Tkhai June 29, 2016, 11:49 a.m.
Mechanism for mounts, which are forced by criu during dump stage.

v2: New patch, splitted from v1's "[2/3] mount: Forced mount unmounted binfmt_misc to do not lost its content"
    Cleanup for dump failed case is added

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
---
 criu/cr-dump.c       |    1 +
 criu/include/mount.h |    1 +
 criu/mount.c         |   76 ++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 78 insertions(+)

Patch hide | download patch | download mbox

diff --git a/criu/cr-dump.c b/criu/cr-dump.c
index 00d28e9..7609544 100644
--- a/criu/cr-dump.c
+++ b/criu/cr-dump.c
@@ -1588,6 +1588,7 @@  static int cr_dump_finish(int ret)
 	if (ret || post_dump_ret || opts.final_state == TASK_ALIVE) {
 		network_unlock();
 		delete_link_remaps();
+		cleanup_forced_mounts();
 	}
 	pstree_switch_state(root_item,
 			    (ret || post_dump_ret) ?
diff --git a/criu/include/mount.h b/criu/include/mount.h
index c7992ac..423b6d0 100644
--- a/criu/include/mount.h
+++ b/criu/include/mount.h
@@ -127,5 +127,6 @@  extern int ext_mount_add(char *key, char *val);
 extern int mntns_maybe_create_roots(void);
 extern int read_mnt_ns_img(void);
 extern void cleanup_mnt_ns(void);
+extern void cleanup_forced_mounts(void);
 
 #endif /* __CR_MOUNT_H__ */
diff --git a/criu/mount.c b/criu/mount.c
index 55f248d..4a288e4 100644
--- a/criu/mount.c
+++ b/criu/mount.c
@@ -41,6 +41,15 @@ 
 #define LOG_PREFIX "mnt: "
 
 static struct fstype fstypes[];
+static LIST_HEAD(forced_mounts_list);
+
+struct forced_mount {
+	struct list_head list;
+	unsigned int ns_id;
+	unsigned int mnt_id;
+	char *path;
+	pid_t pid;
+};
 
 int ext_mount_add(char *key, char *val)
 {
@@ -3685,4 +3694,71 @@  int dump_mnt_namespaces(void)
 	return 0;
 }
 
+static int add_forced_mount(pid_t pid, const char *path)
+{
+	struct forced_mount *fm;
+	int fd, mnt_id, ret = 0;
+	unsigned int ns_id;
+
+	if (read_ns_id(pid, &mnt_ns_desc, &ns_id) < 0 || !ns_id) {
+		pr_err("Can't read mnt_ns id\n");
+		return -1;
+	}
+
+	fm = xmalloc(sizeof(*fm));
+	if (!fm)
+		return -1;
+
+	fd = open(path, O_PATH|O_DIRECTORY);
+	if (fd < 0) {
+		pr_perror("Can't open %s\n", path);
+		goto err_free;
+	}
+
+	ret = get_fd_mntid(fd, &mnt_id);
+	close(fd);
+
+	if (ret < 0) {
+		pr_err("Can't get mnt_id of %s\n", path);
+		goto err_free;
+	}
+
+	fm->path = xstrdup(path);
+	if (!fm->path)
+		goto err_free;
+
+	fm->ns_id = ns_id;
+	fm->mnt_id = mnt_id;
+	fm->pid = pid;
+	list_add(&fm->list, &forced_mounts_list);
+
+	return 0;
+err_free:
+	pr_err("Can't add forced mount\n");
+	free(fm);
+	return -1;
+}
+
+void cleanup_forced_mounts(void)
+{
+	struct forced_mount *fm;
+	int mnt_fd, ret;
+
+	list_for_each_entry(fm, &forced_mounts_list, list) {
+		ret = switch_ns(fm->pid, &mnt_ns_desc, &mnt_fd);
+		if (ret) {
+			pr_err("Can't switch to pid's %u mnt_ns\n", fm->pid);
+			continue;
+		}
+
+		if (umount(fm->path) < 0)
+			pr_perror("Can't umount forced mount %s\n", fm->path);
+
+		if (restore_ns(mnt_fd, &mnt_ns_desc)) {
+			pr_err("cleanup_forced_mounts exiting with wrong mnt_ns\n");
+			return;
+		}
+	}
+}
+
 struct ns_desc mnt_ns_desc = NS_DESC_ENTRY(CLONE_NEWNS, "mnt");

Comments

Pavel Emelianov July 13, 2016, 12:46 p.m.
On 06/29/2016 02:49 PM, Kirill Tkhai wrote:
> Mechanism for mounts, which are forced by criu during dump stage.
> 
> v2: New patch, splitted from v1's "[2/3] mount: Forced mount unmounted binfmt_misc to do not lost its content"
>     Cleanup for dump failed case is added
> 
> Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
> ---
>  criu/cr-dump.c       |    1 +
>  criu/include/mount.h |    1 +
>  criu/mount.c         |   76 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 78 insertions(+)
> 
> diff --git a/criu/cr-dump.c b/criu/cr-dump.c
> index 00d28e9..7609544 100644
> --- a/criu/cr-dump.c
> +++ b/criu/cr-dump.c
> @@ -1588,6 +1588,7 @@ static int cr_dump_finish(int ret)
>  	if (ret || post_dump_ret || opts.final_state == TASK_ALIVE) {
>  		network_unlock();
>  		delete_link_remaps();
> +		cleanup_forced_mounts();
>  	}
>  	pstree_switch_state(root_item,
>  			    (ret || post_dump_ret) ?
> diff --git a/criu/include/mount.h b/criu/include/mount.h
> index c7992ac..423b6d0 100644
> --- a/criu/include/mount.h
> +++ b/criu/include/mount.h
> @@ -127,5 +127,6 @@ extern int ext_mount_add(char *key, char *val);
>  extern int mntns_maybe_create_roots(void);
>  extern int read_mnt_ns_img(void);
>  extern void cleanup_mnt_ns(void);
> +extern void cleanup_forced_mounts(void);
>  
>  #endif /* __CR_MOUNT_H__ */
> diff --git a/criu/mount.c b/criu/mount.c
> index 55f248d..4a288e4 100644
> --- a/criu/mount.c
> +++ b/criu/mount.c
> @@ -41,6 +41,15 @@
>  #define LOG_PREFIX "mnt: "
>  
>  static struct fstype fstypes[];
> +static LIST_HEAD(forced_mounts_list);
> +
> +struct forced_mount {
> +	struct list_head list;
> +	unsigned int ns_id;
> +	unsigned int mnt_id;
> +	char *path;
> +	pid_t pid;
> +};

Why separate structure? We have mount_info already, let's add
necessary flags on it.

>  
>  int ext_mount_add(char *key, char *val)
>  {
> @@ -3685,4 +3694,71 @@ int dump_mnt_namespaces(void)
>  	return 0;
>  }
>  
> +static int add_forced_mount(pid_t pid, const char *path)
> +{
> +	struct forced_mount *fm;
> +	int fd, mnt_id, ret = 0;
> +	unsigned int ns_id;
> +
> +	if (read_ns_id(pid, &mnt_ns_desc, &ns_id) < 0 || !ns_id) {

I don't get why we need to read this stuff from kernel, while we have all
the ns_id engine at hands. If you need to add binfmt_misc mountpoint, you
want it per-namespace, not per-task, don't you?

> +		pr_err("Can't read mnt_ns id\n");
> +		return -1;
> +	}
> +
> +	fm = xmalloc(sizeof(*fm));
> +	if (!fm)
> +		return -1;
> +
> +	fd = open(path, O_PATH|O_DIRECTORY);
> +	if (fd < 0) {
> +		pr_perror("Can't open %s\n", path);
> +		goto err_free;
> +	}
> +
> +	ret = get_fd_mntid(fd, &mnt_id);
> +	close(fd);
> +
> +	if (ret < 0) {
> +		pr_err("Can't get mnt_id of %s\n", path);
> +		goto err_free;
> +	}
> +
> +	fm->path = xstrdup(path);
> +	if (!fm->path)
> +		goto err_free;
> +
> +	fm->ns_id = ns_id;
> +	fm->mnt_id = mnt_id;
> +	fm->pid = pid;
> +	list_add(&fm->list, &forced_mounts_list);
> +
> +	return 0;
> +err_free:
> +	pr_err("Can't add forced mount\n");
> +	free(fm);
> +	return -1;
> +}
> +
> +void cleanup_forced_mounts(void)
> +{
> +	struct forced_mount *fm;
> +	int mnt_fd, ret;
> +
> +	list_for_each_entry(fm, &forced_mounts_list, list) {
> +		ret = switch_ns(fm->pid, &mnt_ns_desc, &mnt_fd);
> +		if (ret) {
> +			pr_err("Can't switch to pid's %u mnt_ns\n", fm->pid);
> +			continue;
> +		}
> +
> +		if (umount(fm->path) < 0)
> +			pr_perror("Can't umount forced mount %s\n", fm->path);
> +
> +		if (restore_ns(mnt_fd, &mnt_ns_desc)) {
> +			pr_err("cleanup_forced_mounts exiting with wrong mnt_ns\n");
> +			return;
> +		}
> +	}
> +}
> +
>  struct ns_desc mnt_ns_desc = NS_DESC_ENTRY(CLONE_NEWNS, "mnt");
> 
> _______________________________________________
> CRIU mailing list
> CRIU@openvz.org
> https://lists.openvz.org/mailman/listinfo/criu
> .
>
Kirill Tkhai July 13, 2016, 1:43 p.m.
On 13.07.2016 15:46, Pavel Emelyanov wrote:
> On 06/29/2016 02:49 PM, Kirill Tkhai wrote:
>> Mechanism for mounts, which are forced by criu during dump stage.
>>
>> v2: New patch, splitted from v1's "[2/3] mount: Forced mount unmounted binfmt_misc to do not lost its content"
>>     Cleanup for dump failed case is added
>>
>> Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
>> ---
>>  criu/cr-dump.c       |    1 +
>>  criu/include/mount.h |    1 +
>>  criu/mount.c         |   76 ++++++++++++++++++++++++++++++++++++++++++++++++++
>>  3 files changed, 78 insertions(+)
>>
>> diff --git a/criu/cr-dump.c b/criu/cr-dump.c
>> index 00d28e9..7609544 100644
>> --- a/criu/cr-dump.c
>> +++ b/criu/cr-dump.c
>> @@ -1588,6 +1588,7 @@ static int cr_dump_finish(int ret)
>>  	if (ret || post_dump_ret || opts.final_state == TASK_ALIVE) {
>>  		network_unlock();
>>  		delete_link_remaps();
>> +		cleanup_forced_mounts();
>>  	}
>>  	pstree_switch_state(root_item,
>>  			    (ret || post_dump_ret) ?
>> diff --git a/criu/include/mount.h b/criu/include/mount.h
>> index c7992ac..423b6d0 100644
>> --- a/criu/include/mount.h
>> +++ b/criu/include/mount.h
>> @@ -127,5 +127,6 @@ extern int ext_mount_add(char *key, char *val);
>>  extern int mntns_maybe_create_roots(void);
>>  extern int read_mnt_ns_img(void);
>>  extern void cleanup_mnt_ns(void);
>> +extern void cleanup_forced_mounts(void);
>>  
>>  #endif /* __CR_MOUNT_H__ */
>> diff --git a/criu/mount.c b/criu/mount.c
>> index 55f248d..4a288e4 100644
>> --- a/criu/mount.c
>> +++ b/criu/mount.c
>> @@ -41,6 +41,15 @@
>>  #define LOG_PREFIX "mnt: "
>>  
>>  static struct fstype fstypes[];
>> +static LIST_HEAD(forced_mounts_list);
>> +
>> +struct forced_mount {
>> +	struct list_head list;
>> +	unsigned int ns_id;
>> +	unsigned int mnt_id;
>> +	char *path;
>> +	pid_t pid;
>> +};
> 
> Why separate structure? We have mount_info already, let's add
> necessary flags on it.

Firstly, we add such mounts, when namespaces are not collected,
and there are no any mount_infos populated. If we create a new
mount_info, it will be terrible and crutch'ly to use it in
mnt_ns collecting engine.

Secondly, the patch introduces some new fields, which are not
need for common case. We need list entry to keep forced mounts
order, and to cleanup them in proper way in case of dump failed.

We use separate structure for ghost files, and this simplifies
logic, so why should we overload mountpoints another way?!
I think we shouldn't.
 
>>  
>>  int ext_mount_add(char *key, char *val)
>>  {
>> @@ -3685,4 +3694,71 @@ int dump_mnt_namespaces(void)
>>  	return 0;
>>  }
>>  
>> +static int add_forced_mount(pid_t pid, const char *path)
>> +{
>> +	struct forced_mount *fm;
>> +	int fd, mnt_id, ret = 0;
>> +	unsigned int ns_id;
>> +
>> +	if (read_ns_id(pid, &mnt_ns_desc, &ns_id) < 0 || !ns_id) {
> 
> I don't get why we need to read this stuff from kernel, while we have all
> the ns_id engine at hands. If you need to add binfmt_misc mountpoint, you
> want it per-namespace, not per-task, don't you?

Which engine do you mean? Namespaces are not parced yet; no information at all.
See next patch, and places where we add forced mounts.

Mounts is per-namespace, but you need a task to attach a ns. You can't save
"mnt:[4026531840]" and attach to it when you want. If so, you should iterate
over tasks list and look for one, who owns it. Linux doesn't provide a list
of used namespaces.
 
>> +		pr_err("Can't read mnt_ns id\n");
>> +		return -1;
>> +	}
>> +
>> +	fm = xmalloc(sizeof(*fm));
>> +	if (!fm)
>> +		return -1;
>> +
>> +	fd = open(path, O_PATH|O_DIRECTORY);
>> +	if (fd < 0) {
>> +		pr_perror("Can't open %s\n", path);
>> +		goto err_free;
>> +	}
>> +
>> +	ret = get_fd_mntid(fd, &mnt_id);
>> +	close(fd);
>> +
>> +	if (ret < 0) {
>> +		pr_err("Can't get mnt_id of %s\n", path);
>> +		goto err_free;
>> +	}
>> +
>> +	fm->path = xstrdup(path);
>> +	if (!fm->path)
>> +		goto err_free;
>> +
>> +	fm->ns_id = ns_id;
>> +	fm->mnt_id = mnt_id;
>> +	fm->pid = pid;
>> +	list_add(&fm->list, &forced_mounts_list);
>> +
>> +	return 0;
>> +err_free:
>> +	pr_err("Can't add forced mount\n");
>> +	free(fm);
>> +	return -1;
>> +}
>> +
>> +void cleanup_forced_mounts(void)
>> +{
>> +	struct forced_mount *fm;
>> +	int mnt_fd, ret;
>> +
>> +	list_for_each_entry(fm, &forced_mounts_list, list) {
>> +		ret = switch_ns(fm->pid, &mnt_ns_desc, &mnt_fd);
>> +		if (ret) {
>> +			pr_err("Can't switch to pid's %u mnt_ns\n", fm->pid);
>> +			continue;
>> +		}
>> +
>> +		if (umount(fm->path) < 0)
>> +			pr_perror("Can't umount forced mount %s\n", fm->path);
>> +
>> +		if (restore_ns(mnt_fd, &mnt_ns_desc)) {
>> +			pr_err("cleanup_forced_mounts exiting with wrong mnt_ns\n");
>> +			return;
>> +		}
>> +	}
>> +}
>> +
>>  struct ns_desc mnt_ns_desc = NS_DESC_ENTRY(CLONE_NEWNS, "mnt");
>>
>> _______________________________________________
>> CRIU mailing list
>> CRIU@openvz.org
>> https://lists.openvz.org/mailman/listinfo/criu
>> .
>>
>
Pavel Emelianov July 13, 2016, 2:34 p.m.
On 07/13/2016 04:43 PM, Kirill Tkhai wrote:
> On 13.07.2016 15:46, Pavel Emelyanov wrote:
>> On 06/29/2016 02:49 PM, Kirill Tkhai wrote:
>>> Mechanism for mounts, which are forced by criu during dump stage.
>>>
>>> v2: New patch, splitted from v1's "[2/3] mount: Forced mount unmounted binfmt_misc to do not lost its content"
>>>     Cleanup for dump failed case is added
>>>
>>> Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
>>> ---
>>>  criu/cr-dump.c       |    1 +
>>>  criu/include/mount.h |    1 +
>>>  criu/mount.c         |   76 ++++++++++++++++++++++++++++++++++++++++++++++++++
>>>  3 files changed, 78 insertions(+)
>>>
>>> diff --git a/criu/cr-dump.c b/criu/cr-dump.c
>>> index 00d28e9..7609544 100644
>>> --- a/criu/cr-dump.c
>>> +++ b/criu/cr-dump.c
>>> @@ -1588,6 +1588,7 @@ static int cr_dump_finish(int ret)
>>>  	if (ret || post_dump_ret || opts.final_state == TASK_ALIVE) {
>>>  		network_unlock();
>>>  		delete_link_remaps();
>>> +		cleanup_forced_mounts();
>>>  	}
>>>  	pstree_switch_state(root_item,
>>>  			    (ret || post_dump_ret) ?
>>> diff --git a/criu/include/mount.h b/criu/include/mount.h
>>> index c7992ac..423b6d0 100644
>>> --- a/criu/include/mount.h
>>> +++ b/criu/include/mount.h
>>> @@ -127,5 +127,6 @@ extern int ext_mount_add(char *key, char *val);
>>>  extern int mntns_maybe_create_roots(void);
>>>  extern int read_mnt_ns_img(void);
>>>  extern void cleanup_mnt_ns(void);
>>> +extern void cleanup_forced_mounts(void);
>>>  
>>>  #endif /* __CR_MOUNT_H__ */
>>> diff --git a/criu/mount.c b/criu/mount.c
>>> index 55f248d..4a288e4 100644
>>> --- a/criu/mount.c
>>> +++ b/criu/mount.c
>>> @@ -41,6 +41,15 @@
>>>  #define LOG_PREFIX "mnt: "
>>>  
>>>  static struct fstype fstypes[];
>>> +static LIST_HEAD(forced_mounts_list);
>>> +
>>> +struct forced_mount {
>>> +	struct list_head list;
>>> +	unsigned int ns_id;
>>> +	unsigned int mnt_id;
>>> +	char *path;
>>> +	pid_t pid;
>>> +};
>>
>> Why separate structure? We have mount_info already, let's add
>> necessary flags on it.
> 
> Firstly, we add such mounts, when namespaces are not collected,
> and there are no any mount_infos populated. If we create a new
> mount_info, it will be terrible and crutch'ly to use it in
> mnt_ns collecting engine.
> 
> Secondly, the patch introduces some new fields, which are not
> need for common case. We need list entry to keep forced mounts
> order, and to cleanup them in proper way in case of dump failed.
> 
> We use separate structure for ghost files, and this simplifies
> logic, so why should we overload mountpoints another way?!
> I think we shouldn't.
>  
>>>  
>>>  int ext_mount_add(char *key, char *val)
>>>  {
>>> @@ -3685,4 +3694,71 @@ int dump_mnt_namespaces(void)
>>>  	return 0;
>>>  }
>>>  
>>> +static int add_forced_mount(pid_t pid, const char *path)
>>> +{
>>> +	struct forced_mount *fm;
>>> +	int fd, mnt_id, ret = 0;
>>> +	unsigned int ns_id;
>>> +
>>> +	if (read_ns_id(pid, &mnt_ns_desc, &ns_id) < 0 || !ns_id) {
>>
>> I don't get why we need to read this stuff from kernel, while we have all
>> the ns_id engine at hands. If you need to add binfmt_misc mountpoint, you
>> want it per-namespace, not per-task, don't you?
> 
> Which engine do you mean? Namespaces are not parced yet; no information at all.

Thus don't add forced mounts that early. Add them once namespace is parsed
and collected, you don't need this thing before.

> See next patch, and places where we add forced mounts.
> 
> Mounts is per-namespace, but you need a task to attach a ns. You can't save
> "mnt:[4026531840]" and attach to it when you want. If so, you should iterate
> over tasks list and look for one, who owns it. Linux doesn't provide a list
> of used namespaces.
>  
>>> +		pr_err("Can't read mnt_ns id\n");
>>> +		return -1;
>>> +	}
>>> +
>>> +	fm = xmalloc(sizeof(*fm));
>>> +	if (!fm)
>>> +		return -1;
>>> +
>>> +	fd = open(path, O_PATH|O_DIRECTORY);
>>> +	if (fd < 0) {
>>> +		pr_perror("Can't open %s\n", path);
>>> +		goto err_free;
>>> +	}
>>> +
>>> +	ret = get_fd_mntid(fd, &mnt_id);
>>> +	close(fd);
>>> +
>>> +	if (ret < 0) {
>>> +		pr_err("Can't get mnt_id of %s\n", path);
>>> +		goto err_free;
>>> +	}
>>> +
>>> +	fm->path = xstrdup(path);
>>> +	if (!fm->path)
>>> +		goto err_free;
>>> +
>>> +	fm->ns_id = ns_id;
>>> +	fm->mnt_id = mnt_id;
>>> +	fm->pid = pid;
>>> +	list_add(&fm->list, &forced_mounts_list);
>>> +
>>> +	return 0;
>>> +err_free:
>>> +	pr_err("Can't add forced mount\n");
>>> +	free(fm);
>>> +	return -1;
>>> +}
>>> +
>>> +void cleanup_forced_mounts(void)
>>> +{
>>> +	struct forced_mount *fm;
>>> +	int mnt_fd, ret;
>>> +
>>> +	list_for_each_entry(fm, &forced_mounts_list, list) {
>>> +		ret = switch_ns(fm->pid, &mnt_ns_desc, &mnt_fd);
>>> +		if (ret) {
>>> +			pr_err("Can't switch to pid's %u mnt_ns\n", fm->pid);
>>> +			continue;
>>> +		}
>>> +
>>> +		if (umount(fm->path) < 0)
>>> +			pr_perror("Can't umount forced mount %s\n", fm->path);
>>> +
>>> +		if (restore_ns(mnt_fd, &mnt_ns_desc)) {
>>> +			pr_err("cleanup_forced_mounts exiting with wrong mnt_ns\n");
>>> +			return;
>>> +		}
>>> +	}
>>> +}
>>> +
>>>  struct ns_desc mnt_ns_desc = NS_DESC_ENTRY(CLONE_NEWNS, "mnt");
>>>
>>> _______________________________________________
>>> CRIU mailing list
>>> CRIU@openvz.org
>>> https://lists.openvz.org/mailman/listinfo/criu
>>> .
>>>
>>
> .
>
Kirill Tkhai July 13, 2016, 2:52 p.m.
On 13.07.2016 17:34, Pavel Emelyanov wrote:
> On 07/13/2016 04:43 PM, Kirill Tkhai wrote:
>> On 13.07.2016 15:46, Pavel Emelyanov wrote:
>>> On 06/29/2016 02:49 PM, Kirill Tkhai wrote:
>>>> Mechanism for mounts, which are forced by criu during dump stage.
>>>>
>>>> v2: New patch, splitted from v1's "[2/3] mount: Forced mount unmounted binfmt_misc to do not lost its content"
>>>>     Cleanup for dump failed case is added
>>>>
>>>> Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
>>>> ---
>>>>  criu/cr-dump.c       |    1 +
>>>>  criu/include/mount.h |    1 +
>>>>  criu/mount.c         |   76 ++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>  3 files changed, 78 insertions(+)
>>>>
>>>> diff --git a/criu/cr-dump.c b/criu/cr-dump.c
>>>> index 00d28e9..7609544 100644
>>>> --- a/criu/cr-dump.c
>>>> +++ b/criu/cr-dump.c
>>>> @@ -1588,6 +1588,7 @@ static int cr_dump_finish(int ret)
>>>>  	if (ret || post_dump_ret || opts.final_state == TASK_ALIVE) {
>>>>  		network_unlock();
>>>>  		delete_link_remaps();
>>>> +		cleanup_forced_mounts();
>>>>  	}
>>>>  	pstree_switch_state(root_item,
>>>>  			    (ret || post_dump_ret) ?
>>>> diff --git a/criu/include/mount.h b/criu/include/mount.h
>>>> index c7992ac..423b6d0 100644
>>>> --- a/criu/include/mount.h
>>>> +++ b/criu/include/mount.h
>>>> @@ -127,5 +127,6 @@ extern int ext_mount_add(char *key, char *val);
>>>>  extern int mntns_maybe_create_roots(void);
>>>>  extern int read_mnt_ns_img(void);
>>>>  extern void cleanup_mnt_ns(void);
>>>> +extern void cleanup_forced_mounts(void);
>>>>  
>>>>  #endif /* __CR_MOUNT_H__ */
>>>> diff --git a/criu/mount.c b/criu/mount.c
>>>> index 55f248d..4a288e4 100644
>>>> --- a/criu/mount.c
>>>> +++ b/criu/mount.c
>>>> @@ -41,6 +41,15 @@
>>>>  #define LOG_PREFIX "mnt: "
>>>>  
>>>>  static struct fstype fstypes[];
>>>> +static LIST_HEAD(forced_mounts_list);
>>>> +
>>>> +struct forced_mount {
>>>> +	struct list_head list;
>>>> +	unsigned int ns_id;
>>>> +	unsigned int mnt_id;
>>>> +	char *path;
>>>> +	pid_t pid;
>>>> +};
>>>
>>> Why separate structure? We have mount_info already, let's add
>>> necessary flags on it.
>>
>> Firstly, we add such mounts, when namespaces are not collected,
>> and there are no any mount_infos populated. If we create a new
>> mount_info, it will be terrible and crutch'ly to use it in
>> mnt_ns collecting engine.
>>
>> Secondly, the patch introduces some new fields, which are not
>> need for common case. We need list entry to keep forced mounts
>> order, and to cleanup them in proper way in case of dump failed.
>>
>> We use separate structure for ghost files, and this simplifies
>> logic, so why should we overload mountpoints another way?!
>> I think we shouldn't.
>>  
>>>>  
>>>>  int ext_mount_add(char *key, char *val)
>>>>  {
>>>> @@ -3685,4 +3694,71 @@ int dump_mnt_namespaces(void)
>>>>  	return 0;
>>>>  }
>>>>  
>>>> +static int add_forced_mount(pid_t pid, const char *path)
>>>> +{
>>>> +	struct forced_mount *fm;
>>>> +	int fd, mnt_id, ret = 0;
>>>> +	unsigned int ns_id;
>>>> +
>>>> +	if (read_ns_id(pid, &mnt_ns_desc, &ns_id) < 0 || !ns_id) {
>>>
>>> I don't get why we need to read this stuff from kernel, while we have all
>>> the ns_id engine at hands. If you need to add binfmt_misc mountpoint, you
>>> want it per-namespace, not per-task, don't you?
>>
>> Which engine do you mean? Namespaces are not parced yet; no information at all.
> 
> Thus don't add forced mounts that early. Add them once namespace is parsed
> and collected, you don't need this thing before.

What is the problem of parsing namespace on mounting? Anyway you need umount
them in case of dumping fail, and there is need task's pid to switch its
namespace.

You'd escape read_ns_id(), but you'd get another problems (such as determining
of that namespace is root, etc). You'd merge pre-dump code and ns-parcing code.
It's really terrible, I've tried.

I don't think using of read_ns_id() is a huge problem.
 
>> See next patch, and places where we add forced mounts.
>>
>> Mounts is per-namespace, but you need a task to attach a ns. You can't save
>> "mnt:[4026531840]" and attach to it when you want. If so, you should iterate
>> over tasks list and look for one, who owns it. Linux doesn't provide a list
>> of used namespaces.
>>  
>>>> +		pr_err("Can't read mnt_ns id\n");
>>>> +		return -1;
>>>> +	}
>>>> +
>>>> +	fm = xmalloc(sizeof(*fm));
>>>> +	if (!fm)
>>>> +		return -1;
>>>> +
>>>> +	fd = open(path, O_PATH|O_DIRECTORY);
>>>> +	if (fd < 0) {
>>>> +		pr_perror("Can't open %s\n", path);
>>>> +		goto err_free;
>>>> +	}
>>>> +
>>>> +	ret = get_fd_mntid(fd, &mnt_id);
>>>> +	close(fd);
>>>> +
>>>> +	if (ret < 0) {
>>>> +		pr_err("Can't get mnt_id of %s\n", path);
>>>> +		goto err_free;
>>>> +	}
>>>> +
>>>> +	fm->path = xstrdup(path);
>>>> +	if (!fm->path)
>>>> +		goto err_free;
>>>> +
>>>> +	fm->ns_id = ns_id;
>>>> +	fm->mnt_id = mnt_id;
>>>> +	fm->pid = pid;
>>>> +	list_add(&fm->list, &forced_mounts_list);
>>>> +
>>>> +	return 0;
>>>> +err_free:
>>>> +	pr_err("Can't add forced mount\n");
>>>> +	free(fm);
>>>> +	return -1;
>>>> +}
>>>> +
>>>> +void cleanup_forced_mounts(void)
>>>> +{
>>>> +	struct forced_mount *fm;
>>>> +	int mnt_fd, ret;
>>>> +
>>>> +	list_for_each_entry(fm, &forced_mounts_list, list) {
>>>> +		ret = switch_ns(fm->pid, &mnt_ns_desc, &mnt_fd);
>>>> +		if (ret) {
>>>> +			pr_err("Can't switch to pid's %u mnt_ns\n", fm->pid);
>>>> +			continue;
>>>> +		}
>>>> +
>>>> +		if (umount(fm->path) < 0)
>>>> +			pr_perror("Can't umount forced mount %s\n", fm->path);
>>>> +
>>>> +		if (restore_ns(mnt_fd, &mnt_ns_desc)) {
>>>> +			pr_err("cleanup_forced_mounts exiting with wrong mnt_ns\n");
>>>> +			return;
>>>> +		}
>>>> +	}
>>>> +}
>>>> +
>>>>  struct ns_desc mnt_ns_desc = NS_DESC_ENTRY(CLONE_NEWNS, "mnt");
>>>>
>>>> _______________________________________________
>>>> CRIU mailing list
>>>> CRIU@openvz.org
>>>> https://lists.openvz.org/mailman/listinfo/criu
>>>> .
>>>>
>>>
>> .
>>
>
Pavel Emelianov July 14, 2016, 10:58 a.m.
On 07/13/2016 05:52 PM, Kirill Tkhai wrote:
> 
> 
> On 13.07.2016 17:34, Pavel Emelyanov wrote:
>> On 07/13/2016 04:43 PM, Kirill Tkhai wrote:
>>> On 13.07.2016 15:46, Pavel Emelyanov wrote:
>>>> On 06/29/2016 02:49 PM, Kirill Tkhai wrote:
>>>>> Mechanism for mounts, which are forced by criu during dump stage.
>>>>>
>>>>> v2: New patch, splitted from v1's "[2/3] mount: Forced mount unmounted binfmt_misc to do not lost its content"
>>>>>     Cleanup for dump failed case is added
>>>>>
>>>>> Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
>>>>> ---
>>>>>  criu/cr-dump.c       |    1 +
>>>>>  criu/include/mount.h |    1 +
>>>>>  criu/mount.c         |   76 ++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>  3 files changed, 78 insertions(+)
>>>>>
>>>>> diff --git a/criu/cr-dump.c b/criu/cr-dump.c
>>>>> index 00d28e9..7609544 100644
>>>>> --- a/criu/cr-dump.c
>>>>> +++ b/criu/cr-dump.c
>>>>> @@ -1588,6 +1588,7 @@ static int cr_dump_finish(int ret)
>>>>>  	if (ret || post_dump_ret || opts.final_state == TASK_ALIVE) {
>>>>>  		network_unlock();
>>>>>  		delete_link_remaps();
>>>>> +		cleanup_forced_mounts();
>>>>>  	}
>>>>>  	pstree_switch_state(root_item,
>>>>>  			    (ret || post_dump_ret) ?
>>>>> diff --git a/criu/include/mount.h b/criu/include/mount.h
>>>>> index c7992ac..423b6d0 100644
>>>>> --- a/criu/include/mount.h
>>>>> +++ b/criu/include/mount.h
>>>>> @@ -127,5 +127,6 @@ extern int ext_mount_add(char *key, char *val);
>>>>>  extern int mntns_maybe_create_roots(void);
>>>>>  extern int read_mnt_ns_img(void);
>>>>>  extern void cleanup_mnt_ns(void);
>>>>> +extern void cleanup_forced_mounts(void);
>>>>>  
>>>>>  #endif /* __CR_MOUNT_H__ */
>>>>> diff --git a/criu/mount.c b/criu/mount.c
>>>>> index 55f248d..4a288e4 100644
>>>>> --- a/criu/mount.c
>>>>> +++ b/criu/mount.c
>>>>> @@ -41,6 +41,15 @@
>>>>>  #define LOG_PREFIX "mnt: "
>>>>>  
>>>>>  static struct fstype fstypes[];
>>>>> +static LIST_HEAD(forced_mounts_list);
>>>>> +
>>>>> +struct forced_mount {
>>>>> +	struct list_head list;
>>>>> +	unsigned int ns_id;
>>>>> +	unsigned int mnt_id;
>>>>> +	char *path;
>>>>> +	pid_t pid;
>>>>> +};
>>>>
>>>> Why separate structure? We have mount_info already, let's add
>>>> necessary flags on it.
>>>
>>> Firstly, we add such mounts, when namespaces are not collected,
>>> and there are no any mount_infos populated. If we create a new
>>> mount_info, it will be terrible and crutch'ly to use it in
>>> mnt_ns collecting engine.
>>>
>>> Secondly, the patch introduces some new fields, which are not
>>> need for common case. We need list entry to keep forced mounts
>>> order, and to cleanup them in proper way in case of dump failed.
>>>
>>> We use separate structure for ghost files, and this simplifies
>>> logic, so why should we overload mountpoints another way?!
>>> I think we shouldn't.
>>>  
>>>>>  
>>>>>  int ext_mount_add(char *key, char *val)
>>>>>  {
>>>>> @@ -3685,4 +3694,71 @@ int dump_mnt_namespaces(void)
>>>>>  	return 0;
>>>>>  }
>>>>>  
>>>>> +static int add_forced_mount(pid_t pid, const char *path)
>>>>> +{
>>>>> +	struct forced_mount *fm;
>>>>> +	int fd, mnt_id, ret = 0;
>>>>> +	unsigned int ns_id;
>>>>> +
>>>>> +	if (read_ns_id(pid, &mnt_ns_desc, &ns_id) < 0 || !ns_id) {
>>>>
>>>> I don't get why we need to read this stuff from kernel, while we have all
>>>> the ns_id engine at hands. If you need to add binfmt_misc mountpoint, you
>>>> want it per-namespace, not per-task, don't you?
>>>
>>> Which engine do you mean? Namespaces are not parced yet; no information at all.
>>
>> Thus don't add forced mounts that early. Add them once namespace is parsed
>> and collected, you don't need this thing before.
> 
> What is the problem of parsing namespace on mounting? 

The less time we go to parse ANYTHING in /proc is better.

> Anyway you need umount
> them in case of dumping fail, and there is need task's pid to switch its
> namespace.
> 
> You'd escape read_ns_id(), but you'd get another problems (such as determining
> of that namespace is root, etc). 

There's no problem in determining whether the namespace is root or not, we
have 'type' field in it :)

> You'd merge pre-dump code and ns-parcing code. It's really terrible, I've tried.
> 
> I don't think using of read_ns_id() is a huge problem.
>  
>>> See next patch, and places where we add forced mounts.
>>>
>>> Mounts is per-namespace, but you need a task to attach a ns. You can't save
>>> "mnt:[4026531840]" and attach to it when you want. If so, you should iterate
>>> over tasks list and look for one, who owns it. Linux doesn't provide a list
>>> of used namespaces.
>>>  
>>>>> +		pr_err("Can't read mnt_ns id\n");
>>>>> +		return -1;
>>>>> +	}
>>>>> +
>>>>> +	fm = xmalloc(sizeof(*fm));
>>>>> +	if (!fm)
>>>>> +		return -1;
>>>>> +
>>>>> +	fd = open(path, O_PATH|O_DIRECTORY);
>>>>> +	if (fd < 0) {
>>>>> +		pr_perror("Can't open %s\n", path);
>>>>> +		goto err_free;
>>>>> +	}
>>>>> +
>>>>> +	ret = get_fd_mntid(fd, &mnt_id);
>>>>> +	close(fd);
>>>>> +
>>>>> +	if (ret < 0) {
>>>>> +		pr_err("Can't get mnt_id of %s\n", path);
>>>>> +		goto err_free;
>>>>> +	}
>>>>> +
>>>>> +	fm->path = xstrdup(path);
>>>>> +	if (!fm->path)
>>>>> +		goto err_free;
>>>>> +
>>>>> +	fm->ns_id = ns_id;
>>>>> +	fm->mnt_id = mnt_id;
>>>>> +	fm->pid = pid;
>>>>> +	list_add(&fm->list, &forced_mounts_list);
>>>>> +
>>>>> +	return 0;
>>>>> +err_free:
>>>>> +	pr_err("Can't add forced mount\n");
>>>>> +	free(fm);
>>>>> +	return -1;
>>>>> +}
>>>>> +
>>>>> +void cleanup_forced_mounts(void)
>>>>> +{
>>>>> +	struct forced_mount *fm;
>>>>> +	int mnt_fd, ret;
>>>>> +
>>>>> +	list_for_each_entry(fm, &forced_mounts_list, list) {
>>>>> +		ret = switch_ns(fm->pid, &mnt_ns_desc, &mnt_fd);
>>>>> +		if (ret) {
>>>>> +			pr_err("Can't switch to pid's %u mnt_ns\n", fm->pid);
>>>>> +			continue;
>>>>> +		}
>>>>> +
>>>>> +		if (umount(fm->path) < 0)
>>>>> +			pr_perror("Can't umount forced mount %s\n", fm->path);
>>>>> +
>>>>> +		if (restore_ns(mnt_fd, &mnt_ns_desc)) {
>>>>> +			pr_err("cleanup_forced_mounts exiting with wrong mnt_ns\n");
>>>>> +			return;
>>>>> +		}
>>>>> +	}
>>>>> +}
>>>>> +
>>>>>  struct ns_desc mnt_ns_desc = NS_DESC_ENTRY(CLONE_NEWNS, "mnt");
>>>>>
>>>>> _______________________________________________
>>>>> CRIU mailing list
>>>>> CRIU@openvz.org
>>>>> https://lists.openvz.org/mailman/listinfo/criu
>>>>> .
>>>>>
>>>>
>>> .
>>>
>>
> .
>