[3/5] files: Rework clean_one_remap to order ghost dirs removal

Submitted by Cyrill Gorcunov on Feb. 22, 2019, 4:02 p.m.

Details

Message ID 20190222160257.27697-4-gorcunov@gmail.com
State New
Series "Fix order problem in ghost files cleanup"
Headers show

Commit Message

Cyrill Gorcunov Feb. 22, 2019, 4:02 p.m.
Ghost files may lay inside ghost directories so we should
order their cleanup to eliminate situation where we remove
toplevel directory which still has not yet cleaned up entries

 | (00.002446) Unlink remap static/unlink_dir.test/0.cr.1.ghost
 | (00.002461) Unlink remap static/unlink_dir.test/1.cr.2.ghost
 | (00.002467) Unlink remap static/unlink_dir.test/gd1/gd2
 | (00.002477) Error (criu/files-reg.c:794): Couldn't unlink remap / static/unlink_dir.test/gd1/gd2: Directory not empty
 | (00.002480) Unlink remap static/unlink_dir.test/gd1/gd2/2.cr.4.ghost
 | (00.002490) Unlink remap static/unlink_dir.test/gd1/gd2/3.cr.5.ghost

Thus upon dentries collection lets order them so they would be
cleaned up in reverse order. Same time if two directories are
pointing to same entry we should not fail if ENOENT returned.

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
---
 criu/files-reg.c | 115 ++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 108 insertions(+), 7 deletions(-)

Patch hide | download patch | download mbox

diff --git a/criu/files-reg.c b/criu/files-reg.c
index 9d5e1d743117..0b3f6493f718 100644
--- a/criu/files-reg.c
+++ b/criu/files-reg.c
@@ -43,6 +43,7 @@ 
 
 #include "files-reg.h"
 #include "plugin.h"
+#include "criu-log.h"
 
 int setfsuid(uid_t fsuid);
 int setfsgid(gid_t fsuid);
@@ -681,6 +682,89 @@  static int prepare_one_remap(struct remap_info *ri)
 	return ret;
 }
 
+static int remap_info_cmp(const void *_a, const void *_b)
+{
+	struct remap_info *a = ((struct remap_info **)_a)[0];
+	struct remap_info *b = ((struct remap_info **)_b)[0];
+	int32_t mnt_id_a = a->rfi->rfe->mnt_id;
+	int32_t mnt_id_b = b->rfi->rfe->mnt_id;
+
+	/*
+	 * If entries are laying on same mount, which is
+	 * a common case, we can safely use paths comparision.
+	 */
+	if (mnt_id_a == mnt_id_b)
+		return -strcmp(a->rfi->path, b->rfi->path);
+
+	/*
+	 * Otherwise simply order by mnt_id order, we just
+	 * can't do anything beter (it is early stage where
+	 * mounts are not yet processed).
+	 */
+	return mnt_id_a > mnt_id_b ? 1 : -1;
+}
+
+/*
+ * Ghost directories may carry ghost files but file descriptors
+ * are unordered in compare with this ghost paths, thus on cleanup
+ * we might try to remove the directory itself without waiting
+ * all files (and subdirectories) are cleaned up first.
+ *
+ * What we do here is we're move all ghost dirs into own list,
+ * sort them (to address subdirectories order) and move back
+ * to the end of the remap list.
+ */
+static int order_remap_dirs(void)
+{
+	struct remap_info *ri, *tmp;
+	struct remap_info **p, **t;
+	size_t nr_remaps = 0, i;
+	LIST_HEAD(ghost_dirs);
+
+	list_for_each_entry_safe(ri, tmp, &remaps, list) {
+		if (ri->rpe->remap_type != REMAP_TYPE__GHOST)
+			continue;
+		if (!ri->rfi->remap->is_dir)
+			continue;
+		list_move_tail(&ri->list, &ghost_dirs);
+		nr_remaps++;
+	}
+
+	if (list_empty(&ghost_dirs))
+		return 0;
+
+	p = t = xmalloc(sizeof(*p) * nr_remaps);
+	if (!p) {
+		list_splice_tail_init(&ghost_dirs, &remaps);
+		return -ENOMEM;
+	}
+
+	list_for_each_entry_safe(ri, tmp, &ghost_dirs, list) {
+		list_del_init(&ri->list);
+		p[0] = ri, p++;
+	}
+
+	qsort(t, nr_remaps, sizeof(t[0]), remap_info_cmp);
+
+	for (i = 0; i < nr_remaps; i++) {
+		list_add_tail(&t[i]->list, &remaps);
+		pr_debug("remap: ghost mov dir %s\n", t[i]->rfi->path);
+	}
+
+	if (!pr_quelled(LOG_DEBUG)) {
+		list_for_each_entry_safe(ri, tmp, &remaps, list) {
+			if (ri->rpe->remap_type != REMAP_TYPE__GHOST)
+				continue;
+			pr_debug("remap: ghost ord %3s %s\n",
+				 ri->rfi->remap->is_dir ? "dir" : "fil",
+				 ri->rfi->path);
+		}
+	}
+
+	xfree(t);
+	return 0;
+}
+
 int prepare_remaps(void)
 {
 	struct remap_info *ri;
@@ -696,7 +780,21 @@  int prepare_remaps(void)
 			break;
 	}
 
-	return ret;
+	return ret ? : order_remap_dirs();
+}
+
+static int clean_ghost_dir(int rmntns_root, struct remap_info *ri)
+{
+	struct file_remap *remap = ri->rfi->remap;
+	int ret = unlinkat(rmntns_root, remap->rpath, AT_REMOVEDIR);
+	/*
+	 * When deleting ghost directories here is two issues:
+	 * - names might duplicate, so we may receive ENOENT
+	 *   and should not treat it as an error
+	 */
+	if (ret)
+		return errno == ENOENT ? 0 : -errno;
+	return 0;
 }
 
 static int clean_one_remap(struct remap_info *ri)
@@ -733,15 +831,17 @@  static int clean_one_remap(struct remap_info *ri)
 
 	pr_info("Unlink remap %s\n", remap->rpath);
 
-	ret = unlinkat(rmntns_root, remap->rpath, remap->is_dir ? AT_REMOVEDIR : 0);
-	if (ret < 0) {
-		close(rmntns_root);
-		pr_perror("Couldn't unlink remap %s %s", path, remap->rpath);
+	if (remap->is_dir)
+		ret = clean_ghost_dir(rmntns_root, ri);
+	else
+		ret = unlinkat(rmntns_root, remap->rpath, 0);
+
+	if (ret) {
+		pr_perror("Couldn't unlink remap %d %s", rmntns_root, remap->rpath);
 		return -1;
 	}
-	close(rmntns_root);
-	remap->rpath[0] = 0;
 
+	remap->rpath[0] = 0;
 	return 0;
 }
 
@@ -767,6 +867,7 @@  static struct collect_image_info remap_cinfo = {
 	.pb_type = PB_REMAP_FPATH,
 	.priv_size = sizeof(struct remap_info),
 	.collect = collect_one_remap,
+	.flags = COLLECT_SHARED,
 };
 
 /* Tiny files don't need to generate chunks in ghost image. */

Comments

Pavel Tikhomirov Feb. 25, 2019, 2:20 p.m.
On 2/22/19 7:02 PM, Cyrill Gorcunov wrote:
> Ghost files may lay inside ghost directories so we should
> order their cleanup to eliminate situation where we remove
> toplevel directory which still has not yet cleaned up entries
> 
>   | (00.002446) Unlink remap static/unlink_dir.test/0.cr.1.ghost
>   | (00.002461) Unlink remap static/unlink_dir.test/1.cr.2.ghost
>   | (00.002467) Unlink remap static/unlink_dir.test/gd1/gd2
>   | (00.002477) Error (criu/files-reg.c:794): Couldn't unlink remap / static/unlink_dir.test/gd1/gd2: Directory not empty
>   | (00.002480) Unlink remap static/unlink_dir.test/gd1/gd2/2.cr.4.ghost
>   | (00.002490) Unlink remap static/unlink_dir.test/gd1/gd2/3.cr.5.ghost
> 
> Thus upon dentries collection lets order them so they would be
> cleaned up in reverse order. Same time if two directories are
> pointing to same entry we should not fail if ENOENT returned.
> 
> Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
> ---
>   criu/files-reg.c | 115 ++++++++++++++++++++++++++++++++++++++++++++---
>   1 file changed, 108 insertions(+), 7 deletions(-)
> 
> diff --git a/criu/files-reg.c b/criu/files-reg.c
> index 9d5e1d743117..0b3f6493f718 100644
> --- a/criu/files-reg.c
> +++ b/criu/files-reg.c
> @@ -43,6 +43,7 @@
>   
>   #include "files-reg.h"
>   #include "plugin.h"
> +#include "criu-log.h"
>   
>   int setfsuid(uid_t fsuid);
>   int setfsgid(gid_t fsuid);
> @@ -681,6 +682,89 @@ static int prepare_one_remap(struct remap_info *ri)
>   	return ret;
>   }
>   
> +static int remap_info_cmp(const void *_a, const void *_b)
> +{
> +	struct remap_info *a = ((struct remap_info **)_a)[0];
> +	struct remap_info *b = ((struct remap_info **)_b)[0];
> +	int32_t mnt_id_a = a->rfi->rfe->mnt_id;
> +	int32_t mnt_id_b = b->rfi->rfe->mnt_id;
> +
> +	/*
> +	 * If entries are laying on same mount, which is
> +	 * a common case, we can safely use paths comparision.
> +	 */
> +	if (mnt_id_a == mnt_id_b)
> +		return -strcmp(a->rfi->path, b->rfi->path);
> +
> +	/*
> +	 * Otherwise simply order by mnt_id order, we just
> +	 * can't do anything beter (it is early stage where
> +	 * mounts are not yet processed).

What exact problem is with different mounts, can you show an example 
paths? (AFAICS Mount trees are already built at these point)

> +	 */
> +	return mnt_id_a > mnt_id_b ? 1 : -1;
> +}
> +
> +/*
> + * Ghost directories may carry ghost files but file descriptors
> + * are unordered in compare with this ghost paths, thus on cleanup
> + * we might try to remove the directory itself without waiting
> + * all files (and subdirectories) are cleaned up first.
> + *
> + * What we do here is we're move all ghost dirs into own list,
> + * sort them (to address subdirectories order) and move back
> + * to the end of the remap list.
> + */
> +static int order_remap_dirs(void)
> +{
> +	struct remap_info *ri, *tmp;
> +	struct remap_info **p, **t;
> +	size_t nr_remaps = 0, i;
> +	LIST_HEAD(ghost_dirs);
> +
> +	list_for_each_entry_safe(ri, tmp, &remaps, list) {
> +		if (ri->rpe->remap_type != REMAP_TYPE__GHOST)
> +			continue;
> +		if (!ri->rfi->remap->is_dir)
> +			continue;
> +		list_move_tail(&ri->list, &ghost_dirs);
> +		nr_remaps++;
> +	}
> +
> +	if (list_empty(&ghost_dirs))
> +		return 0;
> +
> +	p = t = xmalloc(sizeof(*p) * nr_remaps);
> +	if (!p) {
> +		list_splice_tail_init(&ghost_dirs, &remaps);
> +		return -ENOMEM;
> +	}
> +
> +	list_for_each_entry_safe(ri, tmp, &ghost_dirs, list) {
> +		list_del_init(&ri->list);
> +		p[0] = ri, p++;
> +	}
> +
> +	qsort(t, nr_remaps, sizeof(t[0]), remap_info_cmp);
> +
> +	for (i = 0; i < nr_remaps; i++) {
> +		list_add_tail(&t[i]->list, &remaps);
> +		pr_debug("remap: ghost mov dir %s\n", t[i]->rfi->path);
> +	}
> +
> +	if (!pr_quelled(LOG_DEBUG)) {
> +		list_for_each_entry_safe(ri, tmp, &remaps, list) {
> +			if (ri->rpe->remap_type != REMAP_TYPE__GHOST)
> +				continue;
> +			pr_debug("remap: ghost ord %3s %s\n",
> +				 ri->rfi->remap->is_dir ? "dir" : "fil",
> +				 ri->rfi->path);
> +		}
> +	}
> +
> +	xfree(t);
> +	return 0;
> +}
> +
>   int prepare_remaps(void)
>   {
>   	struct remap_info *ri;
> @@ -696,7 +780,21 @@ int prepare_remaps(void)
>   			break;
>   	}
>   
> -	return ret;
> +	return ret ? : order_remap_dirs();
> +}
> +
> +static int clean_ghost_dir(int rmntns_root, struct remap_info *ri)
> +{
> +	struct file_remap *remap = ri->rfi->remap;
> +	int ret = unlinkat(rmntns_root, remap->rpath, AT_REMOVEDIR);
> +	/*
> +	 * When deleting ghost directories here is two issues:
> +	 * - names might duplicate, so we may receive ENOENT
> +	 *   and should not treat it as an error
> +	 */
> +	if (ret)
> +		return errno == ENOENT ? 0 : -errno;
> +	return 0;
>   }
>   
>   static int clean_one_remap(struct remap_info *ri)
> @@ -733,15 +831,17 @@ static int clean_one_remap(struct remap_info *ri)
>   
>   	pr_info("Unlink remap %s\n", remap->rpath);
>   
> -	ret = unlinkat(rmntns_root, remap->rpath, remap->is_dir ? AT_REMOVEDIR : 0);
> -	if (ret < 0) {
> -		close(rmntns_root);
> -		pr_perror("Couldn't unlink remap %s %s", path, remap->rpath);
> +	if (remap->is_dir)
> +		ret = clean_ghost_dir(rmntns_root, ri);
> +	else
> +		ret = unlinkat(rmntns_root, remap->rpath, 0);
> +
> +	if (ret) {
> +		pr_perror("Couldn't unlink remap %d %s", rmntns_root, remap->rpath);
>   		return -1;
>   	}
> -	close(rmntns_root);

Don't you leak rmntns_root fd here?

> -	remap->rpath[0] = 0;
>   
> +	remap->rpath[0] = 0;
>   	return 0;
>   }
>   
> @@ -767,6 +867,7 @@ static struct collect_image_info remap_cinfo = {
>   	.pb_type = PB_REMAP_FPATH,
>   	.priv_size = sizeof(struct remap_info),
>   	.collect = collect_one_remap,
> +	.flags = COLLECT_SHARED,
>   };
>   
>   /* Tiny files don't need to generate chunks in ghost image. */
>
Cyrill Gorcunov Feb. 25, 2019, 2:31 p.m.
On Mon, Feb 25, 2019 at 02:20:11PM +0000, Pavel Tikhomirov wrote:
...
> > +
> > +	/*
> > +	 * Otherwise simply order by mnt_id order, we just
> > +	 * can't do anything beter (it is early stage where
> > +	 * mounts are not yet processed).
> 
> What exact problem is with different mounts, can you show an example 
> paths? (AFAICS Mount trees are already built at these point)

Can we have same "paths" under different mounts? I think we can.
Thus we can order only paths which are under same mount.
And no, at this moment they are not yet constructed.

> >   
> > -	ret = unlinkat(rmntns_root, remap->rpath, remap->is_dir ? AT_REMOVEDIR : 0);
> > -	if (ret < 0) {
> > -		close(rmntns_root);
> > -		pr_perror("Couldn't unlink remap %s %s", path, remap->rpath);
> > +	if (remap->is_dir)
> > +		ret = clean_ghost_dir(rmntns_root, ri);
> > +	else
> > +		ret = unlinkat(rmntns_root, remap->rpath, 0);
> > +
> > +	if (ret) {
> > +		pr_perror("Couldn't unlink remap %d %s", rmntns_root, remap->rpath);
> >   		return -1;
> >   	}
> > -	close(rmntns_root);
> 
> Don't you leak rmntns_root fd here?

Sharp eyes! Thanks, indeed! Will update.

	Cyrill
Pavel Tikhomirov Feb. 25, 2019, 3:46 p.m.
On 2/25/19 5:31 PM, Cyrill Gorcunov wrote:
> On Mon, Feb 25, 2019 at 02:20:11PM +0000, Pavel Tikhomirov wrote:
> ...
>>> +
>>> +	/*
>>> +	 * Otherwise simply order by mnt_id order, we just
>>> +	 * can't do anything beter (it is early stage where
>>> +	 * mounts are not yet processed).
>>
>> What exact problem is with different mounts, can you show an example
>> paths? (AFAICS Mount trees are already built at these point)
> 
> Can we have same "paths" under different mounts? I think we can.

AFAICS For now we can't handle open files/directories on overmounted 
mounts. Thus "same paths" will fail on dump in the check in 
dump_one_reg_file which I've recently introduced to protect from wrongly 
restoring such files as ghost-files =)

> Thus we can order only paths which are under same mount

But you are right, when we will support restoring files on overmounted 
mounts we will need something like these.

> And no, at this moment they are not yet constructed.

Mount trees are created in:

restore_task_with_children (for root task)
  prepare_namespace
   prepare_mnt_ns

and we are in:

restore_task_with_children
  root_prepare_chared
   prepare_remaps
    order_remap_dirs

which is called later.

> 
>>>    
>>> -	ret = unlinkat(rmntns_root, remap->rpath, remap->is_dir ? AT_REMOVEDIR : 0);
>>> -	if (ret < 0) {
>>> -		close(rmntns_root);
>>> -		pr_perror("Couldn't unlink remap %s %s", path, remap->rpath);
>>> +	if (remap->is_dir)
>>> +		ret = clean_ghost_dir(rmntns_root, ri);
>>> +	else
>>> +		ret = unlinkat(rmntns_root, remap->rpath, 0);
>>> +
>>> +	if (ret) {
>>> +		pr_perror("Couldn't unlink remap %d %s", rmntns_root, remap->rpath);
>>>    		return -1;
>>>    	}
>>> -	close(rmntns_root);
>>
>> Don't you leak rmntns_root fd here?
> 
> Sharp eyes! Thanks, indeed! Will update.
> 
> 	Cyrill
>
Cyrill Gorcunov Feb. 26, 2019, 8:23 a.m.
On Mon, Feb 25, 2019 at 03:46:41PM +0000, Pavel Tikhomirov wrote:
> 
> > And no, at this moment they are not yet constructed.
> 
> Mount trees are created in:
> 
> restore_task_with_children (for root task)
>   prepare_namespace
>    prepare_mnt_ns
> 
> and we are in:
> 
> restore_task_with_children
>   root_prepare_chared
>    prepare_remaps
>     order_remap_dirs
> 
> which is called later.

Indeed. I'll drop the comment.