[4/5] kdat: Cache kdat object into /run/criu.kdat (v2)

Submitted by Pavel Emelianov on April 26, 2017, 1:37 p.m.

Details

Message ID 7bd36516-b92e-bc7c-e8bd-3e9ca30ecd91@virtuozzo.com
State New
Series "Speed up kdat checks"
Headers show

Commit Message

Pavel Emelianov April 26, 2017, 1:37 p.m.
Doing kerndat checks on every criu start is way too slow. We
need some way to speed this checks up on a particular box.

As suggested by Andre, Dima and Mike let's try to keep the
collected kdat bits into some tmpfs file. Keeping it on
tmpfs would invaludate this cache on every machine reboot.

There have been many suggestions how to generate this file,
my proposal is to create it once the kdat object is filled
by criu, w/o any explicit command. Optionally we can add
'criu kdat --save|--drop' actions to manage this file.

v2:
* don't ignore return code of write() (some glibcs complain)
* unlink tmp file in case rename failed

Signed-off-by: Pavel Emelyanov <xemul@virtuozzo.com>
---
 criu/include/kerndat.h |  6 ++++
 criu/include/magic.h   |  7 +++++
 criu/kerndat.c         | 85 +++++++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 97 insertions(+), 1 deletion(-)

Patch hide | download patch | download mbox

diff --git a/criu/include/kerndat.h b/criu/include/kerndat.h
index 6b621c6..b1722eb 100644
--- a/criu/include/kerndat.h
+++ b/criu/include/kerndat.h
@@ -29,6 +29,12 @@  enum loginuid_func {
 };
 
 struct kerndat_s {
+	/*
+	 * XXX: This structure is written AS IS into a tmpfs file,
+	 * so any changes in it should result in changed KDAT_MAGIC
+	 * value to avoid loading of bad kdat bits.
+	 */
+	u32 magic;
 	dev_t shmem_dev;
 	int last_cap;
 	u64 zero_page_pfn;
diff --git a/criu/include/magic.h b/criu/include/magic.h
index 059d005..2297930 100644
--- a/criu/include/magic.h
+++ b/criu/include/magic.h
@@ -116,4 +116,11 @@ 
 #define STATS_MAGIC		0x57093306 /* Ostashkov */
 #define IRMAP_CACHE_MAGIC	0x57004059 /* Ivanovo */
 
+/*
+ * Magic for kerndat_s structure. When changing, keep the
+ * old magics to avoid magic re-use.
+ */
+
+#define KDAT_MAGIC_1		0x57023458 /* Torzhok */
+
 #endif /* __CR_MAGIC_H__ */
diff --git a/criu/kerndat.c b/criu/kerndat.c
index 87a7b62..3aca1e4 100644
--- a/criu/kerndat.c
+++ b/criu/kerndat.c
@@ -650,10 +650,93 @@  close:
 	bclose(&f);
 	return ret;
 }
+
+#define KERNDAT_CACHE_FILE	"/run/criu.kdat"
+#define KERNDAT_CACHE_FILE_TMP	"/run/.criu.kdat"
+
+static int kerndat_try_load_cache(void)
+{
+	int fd, ret;
+
+	fd = open(KERNDAT_CACHE_FILE, O_RDONLY);
+	if (fd < 0) {
+		if (errno == ENOENT)
+			return 1;
+
+		pr_perror("Can't load %s", KERNDAT_CACHE_FILE);
+		return -1;
+	}
+
+	ret = read(fd, &kdat, sizeof(kdat));
+	if (ret < 0) {
+		pr_perror("Can'r read kdat cache");
+		return -1;
+	}
+
+	close(fd);
+
+	if (ret != sizeof(kdat) || kdat.magic != KDAT_MAGIC_1) {
+		pr_warn("Stale %s file\n", KERNDAT_CACHE_FILE);
+		unlink(KERNDAT_CACHE_FILE);
+		return 1;
+	}
+
+	pr_info("Loaded criu.kdat cache\n");
+	return 0;
+}
+
+static int kerndat_save_cache(void)
+{
+	int fd, ret;
+	struct statfs s;
+
+	fd = open(KERNDAT_CACHE_FILE_TMP, O_CREAT | O_EXCL | O_WRONLY, 0600);
+	if (fd < 0) {
+		/*
+		 * It can happen that we race with some other criu
+		 * instance. That's OK, just ignore this error and
+		 * proceed.
+		 */
+		if (errno == EEXIST)
+			return 0;
+
+		pr_perror("Can't save %s", KERNDAT_CACHE_FILE_TMP);
+		return -1;
+	}
+
+	if (fstatfs(fd, &s) < 0) {
+		pr_perror("Can't statfs %s", KERNDAT_CACHE_FILE_TMP);
+		return -1;
+	}
+
+	if (s.f_type != TMPFS_MAGIC) {
+		pr_warn("Can't keep criu.kdat on tempfs\n");
+		close(fd);
+		return 0;
+	}
+
+	kdat.magic = KDAT_MAGIC_1;
+	ret = write(fd, &kdat, sizeof(kdat));
+	if (ret != sizeof(kdat))
+		pr_warn("Kerndat cache writing failed.\n");
+	close(fd);
+
+	ret = rename(KERNDAT_CACHE_FILE_TMP, KERNDAT_CACHE_FILE);
+	if (ret < 0) {
+		pr_perror("Can't save %s", KERNDAT_CACHE_FILE);
+		unlink(KERNDAT_CACHE_FILE_TMP);
+	}
+	return ret;
+}
+
 int kerndat_init(void)
 {
 	int ret;
 
+	ret = kerndat_try_load_cache();
+	if (ret <= 0)
+		return ret;
+
 	ret = check_pagemap();
 	if (!ret)
 		ret = kerndat_get_shmemdev();
@@ -691,5 +774,5 @@  int kerndat_init(void)
 	kerndat_lsm();
 	kerndat_mmap_min_addr();
 
-	return ret;
+	return kerndat_save_cache();
 }

Comments

Mike Rapoport April 27, 2017, 5:37 a.m.
On Wed, Apr 26, 2017 at 04:37:34PM +0300, Pavel Emelyanov wrote:
> Doing kerndat checks on every criu start is way too slow. We
> need some way to speed this checks up on a particular box.
> 
> As suggested by Andre, Dima and Mike let's try to keep the
> collected kdat bits into some tmpfs file. Keeping it on
> tmpfs would invaludate this cache on every machine reboot.
> 
> There have been many suggestions how to generate this file,
> my proposal is to create it once the kdat object is filled
> by criu, w/o any explicit command. Optionally we can add
> 'criu kdat --save|--drop' actions to manage this file.
> 
> v2:
> * don't ignore return code of write() (some glibcs complain)
> * unlink tmp file in case rename failed
> 
> Signed-off-by: Pavel Emelyanov <xemul@virtuozzo.com>
> ---
>  criu/include/kerndat.h |  6 ++++
>  criu/include/magic.h   |  7 +++++
>  criu/kerndat.c         | 85 +++++++++++++++++++++++++++++++++++++++++++++++++-
>  3 files changed, 97 insertions(+), 1 deletion(-)
> 
> diff --git a/criu/include/kerndat.h b/criu/include/kerndat.h
> index 6b621c6..b1722eb 100644
> --- a/criu/include/kerndat.h
> +++ b/criu/include/kerndat.h
> @@ -29,6 +29,12 @@ enum loginuid_func {
>  };
> 
>  struct kerndat_s {
> +	/*
> +	 * XXX: This structure is written AS IS into a tmpfs file,
> +	 * so any changes in it should result in changed KDAT_MAGIC
> +	 * value to avoid loading of bad kdat bits.
> +	 */
> +	u32 magic;
>  	dev_t shmem_dev;
>  	int last_cap;
>  	u64 zero_page_pfn;
> diff --git a/criu/include/magic.h b/criu/include/magic.h
> index 059d005..2297930 100644
> --- a/criu/include/magic.h
> +++ b/criu/include/magic.h
> @@ -116,4 +116,11 @@
>  #define STATS_MAGIC		0x57093306 /* Ostashkov */
>  #define IRMAP_CACHE_MAGIC	0x57004059 /* Ivanovo */
> 
> +/*
> + * Magic for kerndat_s structure. When changing, keep the
> + * old magics to avoid magic re-use.
> + */
> +
> +#define KDAT_MAGIC_1		0x57023458 /* Torzhok */
> +
>  #endif /* __CR_MAGIC_H__ */
> diff --git a/criu/kerndat.c b/criu/kerndat.c
> index 87a7b62..3aca1e4 100644
> --- a/criu/kerndat.c
> +++ b/criu/kerndat.c
> @@ -650,10 +650,93 @@ close:
>  	bclose(&f);
>  	return ret;
>  }
> +
> +#define KERNDAT_CACHE_FILE	"/run/criu.kdat"
> +#define KERNDAT_CACHE_FILE_TMP	"/run/.criu.kdat"

What about running criu as non-root user?

> +static int kerndat_try_load_cache(void)
> +{
> +	int fd, ret;
> +
> +	fd = open(KERNDAT_CACHE_FILE, O_RDONLY);
> +	if (fd < 0) {
> +		if (errno == ENOENT)
> +			return 1;
> +
> +		pr_perror("Can't load %s", KERNDAT_CACHE_FILE);
> +		return -1;
> +	}
> +
> +	ret = read(fd, &kdat, sizeof(kdat));
> +	if (ret < 0) {
> +		pr_perror("Can'r read kdat cache");
> +		return -1;
> +	}
> +
> +	close(fd);
> +
> +	if (ret != sizeof(kdat) || kdat.magic != KDAT_MAGIC_1) {
> +		pr_warn("Stale %s file\n", KERNDAT_CACHE_FILE);
> +		unlink(KERNDAT_CACHE_FILE);
> +		return 1;
> +	}
> +
> +	pr_info("Loaded criu.kdat cache\n");
> +	return 0;
> +}
> +
> +static int kerndat_save_cache(void)
> +{
> +	int fd, ret;
> +	struct statfs s;
> +
> +	fd = open(KERNDAT_CACHE_FILE_TMP, O_CREAT | O_EXCL | O_WRONLY, 0600);
> +	if (fd < 0) {
> +		/*
> +		 * It can happen that we race with some other criu
> +		 * instance. That's OK, just ignore this error and
> +		 * proceed.
> +		 */
> +		if (errno == EEXIST)
> +			return 0;
> +
> +		pr_perror("Can't save %s", KERNDAT_CACHE_FILE_TMP);
> +		return -1;
> +	}
> +
> +	if (fstatfs(fd, &s) < 0) {
> +		pr_perror("Can't statfs %s", KERNDAT_CACHE_FILE_TMP);
> +		return -1;
> +	}
> +
> +	if (s.f_type != TMPFS_MAGIC) {
> +		pr_warn("Can't keep criu.kdat on tempfs\n");
> +		close(fd);
> +		return 0;
> +	}
> +
> +	kdat.magic = KDAT_MAGIC_1;
> +	ret = write(fd, &kdat, sizeof(kdat));
> +	if (ret != sizeof(kdat))
> +		pr_warn("Kerndat cache writing failed.\n");
> +	close(fd);
> +
> +	ret = rename(KERNDAT_CACHE_FILE_TMP, KERNDAT_CACHE_FILE);
> +	if (ret < 0) {
> +		pr_perror("Can't save %s", KERNDAT_CACHE_FILE);
> +		unlink(KERNDAT_CACHE_FILE_TMP);
> +	}
> +	return ret;
> +}
> +
>  int kerndat_init(void)
>  {
>  	int ret;
> 
> +	ret = kerndat_try_load_cache();
> +	if (ret <= 0)
> +		return ret;
> +
>  	ret = check_pagemap();
>  	if (!ret)
>  		ret = kerndat_get_shmemdev();
> @@ -691,5 +774,5 @@ int kerndat_init(void)
>  	kerndat_lsm();
>  	kerndat_mmap_min_addr();
> 
> -	return ret;
> +	return kerndat_save_cache();
>  }
> -- 
> 2.5.5
> 
> _______________________________________________
> CRIU mailing list
> CRIU@openvz.org
> https://lists.openvz.org/mailman/listinfo/criu
>
Adrian Reber April 27, 2017, 6:11 a.m.
On Thu, Apr 27, 2017 at 08:37:09AM +0300, Mike Rapoport wrote:
> On Wed, Apr 26, 2017 at 04:37:34PM +0300, Pavel Emelyanov wrote:
> > Doing kerndat checks on every criu start is way too slow. We
> > need some way to speed this checks up on a particular box.
> > 
> > As suggested by Andre, Dima and Mike let's try to keep the
> > collected kdat bits into some tmpfs file. Keeping it on
> > tmpfs would invaludate this cache on every machine reboot.
> > 
> > There have been many suggestions how to generate this file,
> > my proposal is to create it once the kdat object is filled
> > by criu, w/o any explicit command. Optionally we can add
> > 'criu kdat --save|--drop' actions to manage this file.
> > 
> > v2:
> > * don't ignore return code of write() (some glibcs complain)
> > * unlink tmp file in case rename failed
> > 
> > Signed-off-by: Pavel Emelyanov <xemul@virtuozzo.com>
> > ---
> >  criu/include/kerndat.h |  6 ++++
> >  criu/include/magic.h   |  7 +++++
> >  criu/kerndat.c         | 85 +++++++++++++++++++++++++++++++++++++++++++++++++-
> >  3 files changed, 97 insertions(+), 1 deletion(-)
> > 
> > diff --git a/criu/include/kerndat.h b/criu/include/kerndat.h
> > index 6b621c6..b1722eb 100644
> > --- a/criu/include/kerndat.h
> > +++ b/criu/include/kerndat.h
> > @@ -29,6 +29,12 @@ enum loginuid_func {
> >  };
> > 
> >  struct kerndat_s {
> > +	/*
> > +	 * XXX: This structure is written AS IS into a tmpfs file,
> > +	 * so any changes in it should result in changed KDAT_MAGIC
> > +	 * value to avoid loading of bad kdat bits.
> > +	 */
> > +	u32 magic;
> >  	dev_t shmem_dev;
> >  	int last_cap;
> >  	u64 zero_page_pfn;
> > diff --git a/criu/include/magic.h b/criu/include/magic.h
> > index 059d005..2297930 100644
> > --- a/criu/include/magic.h
> > +++ b/criu/include/magic.h
> > @@ -116,4 +116,11 @@
> >  #define STATS_MAGIC		0x57093306 /* Ostashkov */
> >  #define IRMAP_CACHE_MAGIC	0x57004059 /* Ivanovo */
> > 
> > +/*
> > + * Magic for kerndat_s structure. When changing, keep the
> > + * old magics to avoid magic re-use.
> > + */
> > +
> > +#define KDAT_MAGIC_1		0x57023458 /* Torzhok */
> > +
> >  #endif /* __CR_MAGIC_H__ */
> > diff --git a/criu/kerndat.c b/criu/kerndat.c
> > index 87a7b62..3aca1e4 100644
> > --- a/criu/kerndat.c
> > +++ b/criu/kerndat.c
> > @@ -650,10 +650,93 @@ close:
> >  	bclose(&f);
> >  	return ret;
> >  }
> > +
> > +#define KERNDAT_CACHE_FILE	"/run/criu.kdat"
> > +#define KERNDAT_CACHE_FILE_TMP	"/run/.criu.kdat"
> 
> What about running criu as non-root user?

Could the location be Makefile dependent? For Fedora I would rather put
the file in /run/criu/criu.kdat and create the directory via tmpfiles.d?

		Adrian
Pavel Emelianov April 27, 2017, 1:53 p.m.
On 04/27/2017 09:11 AM, Adrian Reber wrote:
> On Thu, Apr 27, 2017 at 08:37:09AM +0300, Mike Rapoport wrote:
>> On Wed, Apr 26, 2017 at 04:37:34PM +0300, Pavel Emelyanov wrote:
>>> Doing kerndat checks on every criu start is way too slow. We
>>> need some way to speed this checks up on a particular box.
>>>
>>> As suggested by Andre, Dima and Mike let's try to keep the
>>> collected kdat bits into some tmpfs file. Keeping it on
>>> tmpfs would invaludate this cache on every machine reboot.
>>>
>>> There have been many suggestions how to generate this file,
>>> my proposal is to create it once the kdat object is filled
>>> by criu, w/o any explicit command. Optionally we can add
>>> 'criu kdat --save|--drop' actions to manage this file.
>>>
>>> v2:
>>> * don't ignore return code of write() (some glibcs complain)
>>> * unlink tmp file in case rename failed
>>>
>>> Signed-off-by: Pavel Emelyanov <xemul@virtuozzo.com>
>>> ---
>>>  criu/include/kerndat.h |  6 ++++
>>>  criu/include/magic.h   |  7 +++++
>>>  criu/kerndat.c         | 85 +++++++++++++++++++++++++++++++++++++++++++++++++-
>>>  3 files changed, 97 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/criu/include/kerndat.h b/criu/include/kerndat.h
>>> index 6b621c6..b1722eb 100644
>>> --- a/criu/include/kerndat.h
>>> +++ b/criu/include/kerndat.h
>>> @@ -29,6 +29,12 @@ enum loginuid_func {
>>>  };
>>>
>>>  struct kerndat_s {
>>> +	/*
>>> +	 * XXX: This structure is written AS IS into a tmpfs file,
>>> +	 * so any changes in it should result in changed KDAT_MAGIC
>>> +	 * value to avoid loading of bad kdat bits.
>>> +	 */
>>> +	u32 magic;
>>>  	dev_t shmem_dev;
>>>  	int last_cap;
>>>  	u64 zero_page_pfn;
>>> diff --git a/criu/include/magic.h b/criu/include/magic.h
>>> index 059d005..2297930 100644
>>> --- a/criu/include/magic.h
>>> +++ b/criu/include/magic.h
>>> @@ -116,4 +116,11 @@
>>>  #define STATS_MAGIC		0x57093306 /* Ostashkov */
>>>  #define IRMAP_CACHE_MAGIC	0x57004059 /* Ivanovo */
>>>
>>> +/*
>>> + * Magic for kerndat_s structure. When changing, keep the
>>> + * old magics to avoid magic re-use.
>>> + */
>>> +
>>> +#define KDAT_MAGIC_1		0x57023458 /* Torzhok */
>>> +
>>>  #endif /* __CR_MAGIC_H__ */
>>> diff --git a/criu/kerndat.c b/criu/kerndat.c
>>> index 87a7b62..3aca1e4 100644
>>> --- a/criu/kerndat.c
>>> +++ b/criu/kerndat.c
>>> @@ -650,10 +650,93 @@ close:
>>>  	bclose(&f);
>>>  	return ret;
>>>  }
>>> +
>>> +#define KERNDAT_CACHE_FILE	"/run/criu.kdat"
>>> +#define KERNDAT_CACHE_FILE_TMP	"/run/.criu.kdat"
>>
>> What about running criu as non-root user?

It, well, doesn't quite work by itself :) But yes, good catch, need to
address that too somehow.

> Could the location be Makefile dependent? For Fedora I would rather put
> the file in /run/criu/criu.kdat

Will do, thanks.

> and create the directory via tmpfiles.d?

I don't know what the tmpfiles.d is :) Would you explain, please?

-- Pavel
Adrian Reber April 27, 2017, 3 p.m.
On Thu, Apr 27, 2017 at 04:53:50PM +0300, Pavel Emelyanov wrote:
> On 04/27/2017 09:11 AM, Adrian Reber wrote:
> > On Thu, Apr 27, 2017 at 08:37:09AM +0300, Mike Rapoport wrote:
> >> On Wed, Apr 26, 2017 at 04:37:34PM +0300, Pavel Emelyanov wrote:
> >>> Doing kerndat checks on every criu start is way too slow. We
> >>> need some way to speed this checks up on a particular box.
> >>>
> >>> As suggested by Andre, Dima and Mike let's try to keep the
> >>> collected kdat bits into some tmpfs file. Keeping it on
> >>> tmpfs would invaludate this cache on every machine reboot.
> >>>
> >>> There have been many suggestions how to generate this file,
> >>> my proposal is to create it once the kdat object is filled
> >>> by criu, w/o any explicit command. Optionally we can add
> >>> 'criu kdat --save|--drop' actions to manage this file.
> >>>
> >>> v2:
> >>> * don't ignore return code of write() (some glibcs complain)
> >>> * unlink tmp file in case rename failed
> >>>
> >>> Signed-off-by: Pavel Emelyanov <xemul@virtuozzo.com>
> >>> ---
> >>>  criu/include/kerndat.h |  6 ++++
> >>>  criu/include/magic.h   |  7 +++++
> >>>  criu/kerndat.c         | 85 +++++++++++++++++++++++++++++++++++++++++++++++++-
> >>>  3 files changed, 97 insertions(+), 1 deletion(-)
> >>>
> >>> diff --git a/criu/include/kerndat.h b/criu/include/kerndat.h
> >>> index 6b621c6..b1722eb 100644
> >>> --- a/criu/include/kerndat.h
> >>> +++ b/criu/include/kerndat.h
> >>> @@ -29,6 +29,12 @@ enum loginuid_func {
> >>>  };
> >>>
> >>>  struct kerndat_s {
> >>> +	/*
> >>> +	 * XXX: This structure is written AS IS into a tmpfs file,
> >>> +	 * so any changes in it should result in changed KDAT_MAGIC
> >>> +	 * value to avoid loading of bad kdat bits.
> >>> +	 */
> >>> +	u32 magic;
> >>>  	dev_t shmem_dev;
> >>>  	int last_cap;
> >>>  	u64 zero_page_pfn;
> >>> diff --git a/criu/include/magic.h b/criu/include/magic.h
> >>> index 059d005..2297930 100644
> >>> --- a/criu/include/magic.h
> >>> +++ b/criu/include/magic.h
> >>> @@ -116,4 +116,11 @@
> >>>  #define STATS_MAGIC		0x57093306 /* Ostashkov */
> >>>  #define IRMAP_CACHE_MAGIC	0x57004059 /* Ivanovo */
> >>>
> >>> +/*
> >>> + * Magic for kerndat_s structure. When changing, keep the
> >>> + * old magics to avoid magic re-use.
> >>> + */
> >>> +
> >>> +#define KDAT_MAGIC_1		0x57023458 /* Torzhok */
> >>> +
> >>>  #endif /* __CR_MAGIC_H__ */
> >>> diff --git a/criu/kerndat.c b/criu/kerndat.c
> >>> index 87a7b62..3aca1e4 100644
> >>> --- a/criu/kerndat.c
> >>> +++ b/criu/kerndat.c
> >>> @@ -650,10 +650,93 @@ close:
> >>>  	bclose(&f);
> >>>  	return ret;
> >>>  }
> >>> +
> >>> +#define KERNDAT_CACHE_FILE	"/run/criu.kdat"
> >>> +#define KERNDAT_CACHE_FILE_TMP	"/run/.criu.kdat"
> >>
> >> What about running criu as non-root user?
> 
> It, well, doesn't quite work by itself :) But yes, good catch, need to
> address that too somehow.
> 
> > Could the location be Makefile dependent? For Fedora I would rather put
> > the file in /run/criu/criu.kdat
> 
> Will do, thanks.
> 
> > and create the directory via tmpfiles.d?
> 
> I don't know what the tmpfiles.d is :) Would you explain, please?

https://www.freedesktop.org/software/systemd/man/tmpfiles.d.html

The distribution package could install a tmpfiles.d file (for systemd
based distribution) which creates a directory in /run with the necessary
access rights. I have lots of files in /usr/lib/tmpfiles.d/ and the
appropriate line for criu could look like

D /run/criu 0700 root root

Working with the git checkout criu could use the file you mentioned
/run/criu.kdat as that would not require to create an additional
directory in /run/. For the packaged version of criu it would be good to
point criu.kdat to /run/criu and install the mentioned tmpfiles.d from
above.

		Adrian