stats/pid-reuse: put stats to image directory instead of cwd

Submitted by Pavel Tikhomirov on March 26, 2018, 8:12 a.m.

Details

Message ID 20180326081226.13473-1-ptikhomirov@virtuozzo.com
State Rejected
Series "stats/pid-reuse: put stats to image directory instead of cwd"
Headers show

Commit Message

Pavel Tikhomirov March 26, 2018, 8:12 a.m.
Statistics for a special dump/pre-dump/restore action should be
collected in the images directory of these action. We need these for
pid-reuse detection as we need to read the stats of parent predump.

Now then work-dir option is set statistics image is put to the work-dir
and on each iteration these image will replace previos image.

For backward compatibility with older versions of p.haul create a
symlink in cwd so that p.haul still has access to the latest stats
image. Yet I plan to make p.haul work with stats through images dir.

https://jira.sw.ru/browse/PSBM-82864
Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
---
 criu/stats.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 60 insertions(+), 1 deletion(-)

Patch hide | download patch | download mbox

diff --git a/criu/stats.c b/criu/stats.c
index d344ad336..2c0f8bda4 100644
--- a/criu/stats.c
+++ b/criu/stats.c
@@ -1,6 +1,7 @@ 
 #include <unistd.h>
 #include <fcntl.h>
 #include <sys/time.h>
+#include <sys/stat.h>
 #include "int.h"
 #include "atomic.h"
 #include "cr_options.h"
@@ -157,6 +158,53 @@  static void display_stats(int what, StatsEntry *stats)
 		return;
 }
 
+static int fd_to_path(int fd, char *buf, int size) {
+	char proc_self_fd_path[PATH_MAX];
+	int ret;
+
+	if (snprintf(proc_self_fd_path, PATH_MAX, "/proc/self/fd/%d", fd) < 0) {
+		pr_perror("Failed to format /proc/self/fd/* path");
+		return -1;
+	}
+
+	ret = readlink(proc_self_fd_path, buf, size-1);
+	if (ret < 0) {
+		pr_perror("Failed to readlink %s path", proc_self_fd_path);
+		return -1;
+	}
+	buf[ret] = '\0';
+
+	return 0;
+}
+
+static void symlink_stats(char *target, char *name)
+{
+	char link_path[PATH_MAX];
+	struct stat st;
+	int ret;
+
+	if (snprintf(link_path, PATH_MAX, imgset_template[CR_FD_STATS].fmt, name) < 0)
+		goto exit;
+
+	ret = lstat(link_path, &st);
+	if (ret == -1 && errno != ENOENT)
+		goto exit;
+	else if (ret == 0) {
+		/* Real image occupies these location, no symlink needed */
+		if (!S_ISLNK(st.st_mode))
+			return;
+
+		/* Unlink symlink of previous iteration */
+		if (unlink(link_path) == -1)
+			goto exit;
+	}
+
+	if(!symlink(target, link_path))
+		return;
+exit:
+	pr_warn("Error when trying to create stats symlink");
+}
+
 void write_stats(int what)
 {
 	StatsEntry stats = STATS_ENTRY__INIT;
@@ -203,9 +251,20 @@  void write_stats(int what)
 	} else
 		return;
 
-	img = open_image_at(AT_FDCWD, CR_FD_STATS, O_DUMP, name);
+	img = open_image(CR_FD_STATS, O_DUMP, name);
 	if (img) {
+		char image_path[PATH_MAX];
+
 		pb_write_one(img, &stats, PB_STATS);
+
+		/*
+		 * Backward compatibility: old p.haul whants to read stats
+		 * from working directory, but not from images directory, so
+		 * create a symlink for it.
+		 */
+		if (!fd_to_path(img->fd, image_path, PATH_MAX))
+			symlink_stats(image_path, name);
+
 		close_image(img);
 	}
 

Comments

Pavel Emelianov March 26, 2018, 8:54 a.m.
On 03/26/2018 11:12 AM, Pavel Tikhomirov wrote:
> Statistics for a special dump/pre-dump/restore action should be
> collected in the images directory of these action. We need these for
> pid-reuse detection as we need to read the stats of parent predump.

What's wrong with checking pid-reuse form stats sitting in workdir?

> Now then work-dir option is set statistics image is put to the work-dir
> and on each iteration these image will replace previos image.
> 
> For backward compatibility with older versions of p.haul create a
> symlink in cwd so that p.haul still has access to the latest stats
> image. Yet I plan to make p.haul work with stats through images dir.
> 
> https://jira.sw.ru/browse/PSBM-82864
> Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
> ---
>  criu/stats.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 60 insertions(+), 1 deletion(-)
> 
> diff --git a/criu/stats.c b/criu/stats.c
> index d344ad336..2c0f8bda4 100644
> --- a/criu/stats.c
> +++ b/criu/stats.c
> @@ -1,6 +1,7 @@
>  #include <unistd.h>
>  #include <fcntl.h>
>  #include <sys/time.h>
> +#include <sys/stat.h>
>  #include "int.h"
>  #include "atomic.h"
>  #include "cr_options.h"
> @@ -157,6 +158,53 @@ static void display_stats(int what, StatsEntry *stats)
>  		return;
>  }
>  
> +static int fd_to_path(int fd, char *buf, int size) {
> +	char proc_self_fd_path[PATH_MAX];
> +	int ret;
> +
> +	if (snprintf(proc_self_fd_path, PATH_MAX, "/proc/self/fd/%d", fd) < 0) {
> +		pr_perror("Failed to format /proc/self/fd/* path");
> +		return -1;
> +	}
> +
> +	ret = readlink(proc_self_fd_path, buf, size-1);
> +	if (ret < 0) {
> +		pr_perror("Failed to readlink %s path", proc_self_fd_path);
> +		return -1;
> +	}
> +	buf[ret] = '\0';
> +
> +	return 0;
> +}
> +
> +static void symlink_stats(char *target, char *name)
> +{
> +	char link_path[PATH_MAX];
> +	struct stat st;
> +	int ret;
> +
> +	if (snprintf(link_path, PATH_MAX, imgset_template[CR_FD_STATS].fmt, name) < 0)
> +		goto exit;
> +
> +	ret = lstat(link_path, &st);
> +	if (ret == -1 && errno != ENOENT)
> +		goto exit;
> +	else if (ret == 0) {
> +		/* Real image occupies these location, no symlink needed */
> +		if (!S_ISLNK(st.st_mode))
> +			return;
> +
> +		/* Unlink symlink of previous iteration */
> +		if (unlink(link_path) == -1)
> +			goto exit;
> +	}
> +
> +	if(!symlink(target, link_path))
> +		return;
> +exit:
> +	pr_warn("Error when trying to create stats symlink");
> +}
> +
>  void write_stats(int what)
>  {
>  	StatsEntry stats = STATS_ENTRY__INIT;
> @@ -203,9 +251,20 @@ void write_stats(int what)
>  	} else
>  		return;
>  
> -	img = open_image_at(AT_FDCWD, CR_FD_STATS, O_DUMP, name);
> +	img = open_image(CR_FD_STATS, O_DUMP, name);
>  	if (img) {
> +		char image_path[PATH_MAX];
> +
>  		pb_write_one(img, &stats, PB_STATS);
> +
> +		/*
> +		 * Backward compatibility: old p.haul whants to read stats
> +		 * from working directory, but not from images directory, so
> +		 * create a symlink for it.
> +		 */
> +		if (!fd_to_path(img->fd, image_path, PATH_MAX))
> +			symlink_stats(image_path, name);
> +
>  		close_image(img);
>  	}
>  
>
Pavel Tikhomirov March 26, 2018, 9:16 a.m.
On 03/26/2018 11:54 AM, Pavel Emelyanov wrote:
> On 03/26/2018 11:12 AM, Pavel Tikhomirov wrote:
>> Statistics for a special dump/pre-dump/restore action should be
>> collected in the images directory of these action. We need these for
>> pid-reuse detection as we need to read the stats of parent predump.
> 
> What's wrong with checking pid-reuse form stats sitting in workdir?

We can read previous dump statistics from workdir before 
cr_*dump_finish, I agree with that, it is easy.

But Kirill says that "writing image outside of image directory is 
architectural problem", actually I think these way too for at least two 
reasons:

1) When stats image is in images dir we can latter analize stats for 
each iteration if we want it. Now doing migration in VZ7 we have only 
stats of last dump in the end.

2) If for instance stats image was not created, we will see for which 
iteration it had happened (there will be no file in directory). Now we 
wrongly and silently reuse stats of irrelevant older iteration, and will 
do wrong assumptions based on wrong data.

> 
>> Now then work-dir option is set statistics image is put to the work-dir
>> and on each iteration these image will replace previos image.
>>
>> For backward compatibility with older versions of p.haul create a
>> symlink in cwd so that p.haul still has access to the latest stats
>> image. Yet I plan to make p.haul work with stats through images dir.
>>
>> https://jira.sw.ru/browse/PSBM-82864
>> Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
>> ---
>>   criu/stats.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>>   1 file changed, 60 insertions(+), 1 deletion(-)
>>
>> diff --git a/criu/stats.c b/criu/stats.c
>> index d344ad336..2c0f8bda4 100644
>> --- a/criu/stats.c
>> +++ b/criu/stats.c
>> @@ -1,6 +1,7 @@
>>   #include <unistd.h>
>>   #include <fcntl.h>
>>   #include <sys/time.h>
>> +#include <sys/stat.h>
>>   #include "int.h"
>>   #include "atomic.h"
>>   #include "cr_options.h"
>> @@ -157,6 +158,53 @@ static void display_stats(int what, StatsEntry *stats)
>>   		return;
>>   }
>>   
>> +static int fd_to_path(int fd, char *buf, int size) {
>> +	char proc_self_fd_path[PATH_MAX];
>> +	int ret;
>> +
>> +	if (snprintf(proc_self_fd_path, PATH_MAX, "/proc/self/fd/%d", fd) < 0) {
>> +		pr_perror("Failed to format /proc/self/fd/* path");
>> +		return -1;
>> +	}
>> +
>> +	ret = readlink(proc_self_fd_path, buf, size-1);
>> +	if (ret < 0) {
>> +		pr_perror("Failed to readlink %s path", proc_self_fd_path);
>> +		return -1;
>> +	}
>> +	buf[ret] = '\0';
>> +
>> +	return 0;
>> +}
>> +
>> +static void symlink_stats(char *target, char *name)
>> +{
>> +	char link_path[PATH_MAX];
>> +	struct stat st;
>> +	int ret;
>> +
>> +	if (snprintf(link_path, PATH_MAX, imgset_template[CR_FD_STATS].fmt, name) < 0)
>> +		goto exit;
>> +
>> +	ret = lstat(link_path, &st);
>> +	if (ret == -1 && errno != ENOENT)
>> +		goto exit;
>> +	else if (ret == 0) {
>> +		/* Real image occupies these location, no symlink needed */
>> +		if (!S_ISLNK(st.st_mode))
>> +			return;
>> +
>> +		/* Unlink symlink of previous iteration */
>> +		if (unlink(link_path) == -1)
>> +			goto exit;
>> +	}
>> +
>> +	if(!symlink(target, link_path))
>> +		return;
>> +exit:
>> +	pr_warn("Error when trying to create stats symlink");
>> +}
>> +
>>   void write_stats(int what)
>>   {
>>   	StatsEntry stats = STATS_ENTRY__INIT;
>> @@ -203,9 +251,20 @@ void write_stats(int what)
>>   	} else
>>   		return;
>>   
>> -	img = open_image_at(AT_FDCWD, CR_FD_STATS, O_DUMP, name);
>> +	img = open_image(CR_FD_STATS, O_DUMP, name);
>>   	if (img) {
>> +		char image_path[PATH_MAX];
>> +
>>   		pb_write_one(img, &stats, PB_STATS);
>> +
>> +		/*
>> +		 * Backward compatibility: old p.haul whants to read stats
>> +		 * from working directory, but not from images directory, so
>> +		 * create a symlink for it.
>> +		 */
>> +		if (!fd_to_path(img->fd, image_path, PATH_MAX))
>> +			symlink_stats(image_path, name);
>> +
>>   		close_image(img);
>>   	}
>>   
>>
>
Pavel Emelianov March 26, 2018, 10:43 a.m.
On 03/26/2018 12:16 PM, Pavel Tikhomirov wrote:
> On 03/26/2018 11:54 AM, Pavel Emelyanov wrote:
>> On 03/26/2018 11:12 AM, Pavel Tikhomirov wrote:
>>> Statistics for a special dump/pre-dump/restore action should be
>>> collected in the images directory of these action. We need these for
>>> pid-reuse detection as we need to read the stats of parent predump.
>>
>> What's wrong with checking pid-reuse form stats sitting in workdir?
> 
> We can read previous dump statistics from workdir before 
> cr_*dump_finish, I agree with that, it is easy.
> 
> But Kirill says that "writing image outside of image directory is 

Stats file is not an image. We just use PB format for it and reuse existing
image-writing machinery.

> architectural problem", actually I think these way too for at least two 
> reasons:
> 
> 1) When stats image is in images dir we can latter analize stats for 
> each iteration if we want it. Now doing migration in VZ7 we have only 
> stats of last dump in the end.

The same problem's for log files (I hope nobody has moved them into images dir
so far), isn't it? Use separate workdir for each iteration.

> 2) If for instance stats image was not created,

Why can this happen?

> we will see for which > iteration it had happened (there will be no file in directory). Now we 
> wrongly and silently reuse stats of irrelevant older iteration, and will 
> do wrong assumptions based on wrong data.
> 
>>
>>> Now then work-dir option is set statistics image is put to the work-dir
>>> and on each iteration these image will replace previos image.
>>>
>>> For backward compatibility with older versions of p.haul create a
>>> symlink in cwd so that p.haul still has access to the latest stats
>>> image. Yet I plan to make p.haul work with stats through images dir.
>>>
>>> https://jira.sw.ru/browse/PSBM-82864
>>> Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
>>> ---
>>>   criu/stats.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>>>   1 file changed, 60 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/criu/stats.c b/criu/stats.c
>>> index d344ad336..2c0f8bda4 100644
>>> --- a/criu/stats.c
>>> +++ b/criu/stats.c
>>> @@ -1,6 +1,7 @@
>>>   #include <unistd.h>
>>>   #include <fcntl.h>
>>>   #include <sys/time.h>
>>> +#include <sys/stat.h>
>>>   #include "int.h"
>>>   #include "atomic.h"
>>>   #include "cr_options.h"
>>> @@ -157,6 +158,53 @@ static void display_stats(int what, StatsEntry *stats)
>>>   		return;
>>>   }
>>>   
>>> +static int fd_to_path(int fd, char *buf, int size) {
>>> +	char proc_self_fd_path[PATH_MAX];
>>> +	int ret;
>>> +
>>> +	if (snprintf(proc_self_fd_path, PATH_MAX, "/proc/self/fd/%d", fd) < 0) {
>>> +		pr_perror("Failed to format /proc/self/fd/* path");
>>> +		return -1;
>>> +	}
>>> +
>>> +	ret = readlink(proc_self_fd_path, buf, size-1);
>>> +	if (ret < 0) {
>>> +		pr_perror("Failed to readlink %s path", proc_self_fd_path);
>>> +		return -1;
>>> +	}
>>> +	buf[ret] = '\0';
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static void symlink_stats(char *target, char *name)
>>> +{
>>> +	char link_path[PATH_MAX];
>>> +	struct stat st;
>>> +	int ret;
>>> +
>>> +	if (snprintf(link_path, PATH_MAX, imgset_template[CR_FD_STATS].fmt, name) < 0)
>>> +		goto exit;
>>> +
>>> +	ret = lstat(link_path, &st);
>>> +	if (ret == -1 && errno != ENOENT)
>>> +		goto exit;
>>> +	else if (ret == 0) {
>>> +		/* Real image occupies these location, no symlink needed */
>>> +		if (!S_ISLNK(st.st_mode))
>>> +			return;
>>> +
>>> +		/* Unlink symlink of previous iteration */
>>> +		if (unlink(link_path) == -1)
>>> +			goto exit;
>>> +	}
>>> +
>>> +	if(!symlink(target, link_path))
>>> +		return;
>>> +exit:
>>> +	pr_warn("Error when trying to create stats symlink");
>>> +}
>>> +
>>>   void write_stats(int what)
>>>   {
>>>   	StatsEntry stats = STATS_ENTRY__INIT;
>>> @@ -203,9 +251,20 @@ void write_stats(int what)
>>>   	} else
>>>   		return;
>>>   
>>> -	img = open_image_at(AT_FDCWD, CR_FD_STATS, O_DUMP, name);
>>> +	img = open_image(CR_FD_STATS, O_DUMP, name);
>>>   	if (img) {
>>> +		char image_path[PATH_MAX];
>>> +
>>>   		pb_write_one(img, &stats, PB_STATS);
>>> +
>>> +		/*
>>> +		 * Backward compatibility: old p.haul whants to read stats
>>> +		 * from working directory, but not from images directory, so
>>> +		 * create a symlink for it.
>>> +		 */
>>> +		if (!fd_to_path(img->fd, image_path, PATH_MAX))
>>> +			symlink_stats(image_path, name);
>>> +
>>>   		close_image(img);
>>>   	}
>>>   
>>>
>>
>
Cyrill Gorcunov March 26, 2018, 11:02 a.m.
On Mon, Mar 26, 2018 at 01:43:11PM +0300, Pavel Emelyanov wrote:
> On 03/26/2018 12:16 PM, Pavel Tikhomirov wrote:
> > On 03/26/2018 11:54 AM, Pavel Emelyanov wrote:
> >> On 03/26/2018 11:12 AM, Pavel Tikhomirov wrote:
> >>> Statistics for a special dump/pre-dump/restore action should be
> >>> collected in the images directory of these action. We need these for
> >>> pid-reuse detection as we need to read the stats of parent predump.
> >>
> >> What's wrong with checking pid-reuse form stats sitting in workdir?
> > 
> > We can read previous dump statistics from workdir before 
> > cr_*dump_finish, I agree with that, it is easy.
> > 
> > But Kirill says that "writing image outside of image directory is 
> 
> Stats file is not an image. We just use PB format for it and reuse existing
> image-writing machinery.

Stats file is context dependant and logically bound to images. At least
keeping it inside image drectory gives you confidence the data written
there belongs exactly to the images produced. Because pid reuse uses
data from stats to operate on memory engine it become not longer
a random stat just to view, but mandatory data required for proper
restore.

If you still prefer stats file to be on its own then I think we
need use some other image to carry this information.

> > architectural problem", actually I think these way too for at least two 
> > reasons:
> > 
> > 1) When stats image is in images dir we can latter analize stats for 
> > each iteration if we want it. Now doing migration in VZ7 we have only 
> > stats of last dump in the end.
> 
> The same problem's for log files (I hope nobody has moved them into images dir
> so far), isn't it? Use separate workdir for each iteration.

Exactly, same problem. And for vz7 the logs are kept in the image
directory via vzctl option passed. Thus when a user comes to us with
a problem we ask him to provide us logs from the image directory and
if logs on self are not enough we can ask to pack images directory
where the *complete* state of everything is present which we need
to analyze a problem.
Pavel Tikhomirov March 26, 2018, 11:18 a.m.
On 03/26/2018 01:43 PM, Pavel Emelyanov wrote:
>> 2) If for instance stats image was not created,
> Why can this happen?
> 

What if open_image_at will fail on allocation?
Pavel Emelianov March 26, 2018, 11:19 a.m.
On 03/26/2018 02:02 PM, Cyrill Gorcunov wrote:
> On Mon, Mar 26, 2018 at 01:43:11PM +0300, Pavel Emelyanov wrote:
>> On 03/26/2018 12:16 PM, Pavel Tikhomirov wrote:
>>> On 03/26/2018 11:54 AM, Pavel Emelyanov wrote:
>>>> On 03/26/2018 11:12 AM, Pavel Tikhomirov wrote:
>>>>> Statistics for a special dump/pre-dump/restore action should be
>>>>> collected in the images directory of these action. We need these for
>>>>> pid-reuse detection as we need to read the stats of parent predump.
>>>>
>>>> What's wrong with checking pid-reuse form stats sitting in workdir?
>>>
>>> We can read previous dump statistics from workdir before 
>>> cr_*dump_finish, I agree with that, it is easy.
>>>
>>> But Kirill says that "writing image outside of image directory is 
>>
>> Stats file is not an image. We just use PB format for it and reuse existing
>> image-writing machinery.
> 
> Stats file is context dependant and logically bound to images. At least
> keeping it inside image drectory gives you confidence the data written
> there belongs exactly to the images produced.

So are logs, but we keep them in workdir anyway.

> Because pid reuse uses
> data from stats to operate on memory engine it become not longer
> a random stat just to view, but mandatory data required for proper
> restore.

I've missed that fast somehow. What's there in stats image the pid-reuse needs?

> If you still prefer stats file to be on its own then I think we
> need use some other image to carry this information.
> 
>>> architectural problem", actually I think these way too for at least two 
>>> reasons:
>>>
>>> 1) When stats image is in images dir we can latter analize stats for 
>>> each iteration if we want it. Now doing migration in VZ7 we have only 
>>> stats of last dump in the end.
>>
>> The same problem's for log files (I hope nobody has moved them into images dir
>> so far), isn't it? Use separate workdir for each iteration.
> 
> Exactly, same problem. And for vz7 the logs are kept in the image
> directory via vzctl option passed. 

Gents, by default criu keeps work dir and images together, some efforts should be
done to keep them separately. What option are you talking about?

> Thus when a user comes to us with
> a problem we ask him to provide us logs from the image directory and
> if logs on self are not enough we can ask to pack images directory
> where the *complete* state of everything is present which we need
> to analyze a problem.
> .
>
Pavel Emelianov March 26, 2018, 11:20 a.m.
On 03/26/2018 02:18 PM, Pavel Tikhomirov wrote:
> On 03/26/2018 01:43 PM, Pavel Emelyanov wrote:
>>> 2) If for instance stats image was not created,
>> Why can this happen?
>>
> 
> What if open_image_at will fail on allocation?

Then the whole dump will fail and images in it should be considered as useless.
Pavel Tikhomirov March 26, 2018, 11:22 a.m.
On 03/26/2018 02:20 PM, Pavel Emelyanov wrote:
> On 03/26/2018 02:18 PM, Pavel Tikhomirov wrote:
>> On 03/26/2018 01:43 PM, Pavel Emelyanov wrote:
>>>> 2) If for instance stats image was not created,
>>> Why can this happen?
>>>
>>
>> What if open_image_at will fail on allocation?
> 
> Then the whole dump will fail and images in it should be considered as useless.

For now we don't fail dump if stats was not written in write_stats.

>
Pavel Tikhomirov March 26, 2018, 11:33 a.m.
On 03/26/2018 02:19 PM, Pavel Emelyanov wrote:
> On 03/26/2018 02:02 PM, Cyrill Gorcunov wrote:
>> On Mon, Mar 26, 2018 at 01:43:11PM +0300, Pavel Emelyanov wrote:
>>> On 03/26/2018 12:16 PM, Pavel Tikhomirov wrote:
>>>> On 03/26/2018 11:54 AM, Pavel Emelyanov wrote:
>>>>> On 03/26/2018 11:12 AM, Pavel Tikhomirov wrote:
>>>>>> Statistics for a special dump/pre-dump/restore action should be
>>>>>> collected in the images directory of these action. We need these for
>>>>>> pid-reuse detection as we need to read the stats of parent predump.
>>>>>
>>>>> What's wrong with checking pid-reuse form stats sitting in workdir?
>>>>
>>>> We can read previous dump statistics from workdir before
>>>> cr_*dump_finish, I agree with that, it is easy.
>>>>
>>>> But Kirill says that "writing image outside of image directory is
>>>
>>> Stats file is not an image. We just use PB format for it and reuse existing
>>> image-writing machinery.
>>
>> Stats file is context dependant and logically bound to images. At least
>> keeping it inside image drectory gives you confidence the data written
>> there belongs exactly to the images produced.
> 
> So are logs, but we keep them in workdir anyway.
> 
>> Because pid reuse uses
>> data from stats to operate on memory engine it become not longer
>> a random stat just to view, but mandatory data required for proper
>> restore.
> 
> I've missed that fast somehow. What's there in stats image the pid-reuse needs?

StatsEntry->dump->dump_uptime

> 
>> If you still prefer stats file to be on its own then I think we
>> need use some other image to carry this information.
>>
>>>> architectural problem", actually I think these way too for at least two
>>>> reasons:
>>>>
>>>> 1) When stats image is in images dir we can latter analize stats for
>>>> each iteration if we want it. Now doing migration in VZ7 we have only
>>>> stats of last dump in the end.
>>>
>>> The same problem's for log files (I hope nobody has moved them into images dir
>>> so far), isn't it? Use separate workdir for each iteration.
>>
>> Exactly, same problem. And for vz7 the logs are kept in the image
>> directory via vzctl option passed.
> 
> Gents, by default criu keeps work dir and images together, some efforts should be
> done to keep them separately. What option are you talking about?

Likely --log-file is used for logs and for images and workdir: 
--images-dir and --work-dir.

> 
>> Thus when a user comes to us with
>> a problem we ask him to provide us logs from the image directory and
>> if logs on self are not enough we can ask to pack images directory
>> where the *complete* state of everything is present which we need
>> to analyze a problem.
>> .
>>
>
Cyrill Gorcunov March 26, 2018, 11:48 a.m.
On Mon, Mar 26, 2018 at 02:19:44PM +0300, Pavel Emelyanov wrote:
> > 
> > Exactly, same problem. And for vz7 the logs are kept in the image
> > directory via vzctl option passed. 
> 
> Gents, by default criu keeps work dir and images together, some efforts should be
> done to keep them separately. What option are you talking about?

I think it is due to vzctl/prlctl + phaul + criu binding, someone set
working dir out of images dir :) Still I think it doesn't matter much
since stats start being used for restore procedure it either should be
kept in images directory, or we need to use another image file for
this kind of info, hm?
Pavel Emelianov March 26, 2018, 11:48 a.m.
On 03/26/2018 02:33 PM, Pavel Tikhomirov wrote:
> 
> 
> On 03/26/2018 02:19 PM, Pavel Emelyanov wrote:
>> On 03/26/2018 02:02 PM, Cyrill Gorcunov wrote:
>>> On Mon, Mar 26, 2018 at 01:43:11PM +0300, Pavel Emelyanov wrote:
>>>> On 03/26/2018 12:16 PM, Pavel Tikhomirov wrote:
>>>>> On 03/26/2018 11:54 AM, Pavel Emelyanov wrote:
>>>>>> On 03/26/2018 11:12 AM, Pavel Tikhomirov wrote:
>>>>>>> Statistics for a special dump/pre-dump/restore action should be
>>>>>>> collected in the images directory of these action. We need these for
>>>>>>> pid-reuse detection as we need to read the stats of parent predump.
>>>>>>
>>>>>> What's wrong with checking pid-reuse form stats sitting in workdir?
>>>>>
>>>>> We can read previous dump statistics from workdir before
>>>>> cr_*dump_finish, I agree with that, it is easy.
>>>>>
>>>>> But Kirill says that "writing image outside of image directory is
>>>>
>>>> Stats file is not an image. We just use PB format for it and reuse existing
>>>> image-writing machinery.
>>>
>>> Stats file is context dependant and logically bound to images. At least
>>> keeping it inside image drectory gives you confidence the data written
>>> there belongs exactly to the images produced.
>>
>> So are logs, but we keep them in workdir anyway.
>>
>>> Because pid reuse uses
>>> data from stats to operate on memory engine it become not longer
>>> a random stat just to view, but mandatory data required for proper
>>> restore.
>>
>> I've missed that fast somehow. What's there in stats image the pid-reuse needs?
> 
> StatsEntry->dump->dump_uptime

Ah. I see. So, first of all, TIMING_whatever is not about time-stamps, it's about
"time taken to perform some action". Uptime doesn't fit into this and should not
have been added there. Next, as a dumping ground for various dump-related stuff
we traditionally use the inventory.img. And last, to detect pid-reuse you do not
want to have the uptime of the whole system in the iamges :) You want the time the
process has been alive for. This time is more valuable, as we currently have a bug 
with oracle restore -- "time alive" gets re-set to zero upon restore and oracle kills
all its kids. Had we this time and the ability to ... restore it somehow (time
namespace might be helpful) we'd solve it.

>>
>>> If you still prefer stats file to be on its own then I think we
>>> need use some other image to carry this information.
>>>
>>>>> architectural problem", actually I think these way too for at least two
>>>>> reasons:
>>>>>
>>>>> 1) When stats image is in images dir we can latter analize stats for
>>>>> each iteration if we want it. Now doing migration in VZ7 we have only
>>>>> stats of last dump in the end.
>>>>
>>>> The same problem's for log files (I hope nobody has moved them into images dir
>>>> so far), isn't it? Use separate workdir for each iteration.
>>>
>>> Exactly, same problem. And for vz7 the logs are kept in the image
>>> directory via vzctl option passed.
>>
>> Gents, by default criu keeps work dir and images together, some efforts should be
>> done to keep them separately. What option are you talking about?
> 
> Likely --log-file is used for logs and for images and workdir: 
> --images-dir and --work-dir.

Don't use --work-dir and work dir will be taken from --image-dir :)

>>
>>> Thus when a user comes to us with
>>> a problem we ask him to provide us logs from the image directory and
>>> if logs on self are not enough we can ask to pack images directory
>>> where the *complete* state of everything is present which we need
>>> to analyze a problem.
>>> .
>>>
>>
>
Pavel Emelianov March 26, 2018, 11:50 a.m.
On 03/26/2018 02:48 PM, Cyrill Gorcunov wrote:
> On Mon, Mar 26, 2018 at 02:19:44PM +0300, Pavel Emelyanov wrote:
>>>
>>> Exactly, same problem. And for vz7 the logs are kept in the image
>>> directory via vzctl option passed. 
>>
>> Gents, by default criu keeps work dir and images together, some efforts should be
>> done to keep them separately. What option are you talking about?
> 
> I think it is due to vzctl/prlctl + phaul + criu binding, someone set
> working dir out of images dir :)
Yup! P.haul configures the work-dir to some local dir, while images dir to be an NFS 
share, as images need to get transfered over the wire while logs, stats, sockets and 
other crap in work dir shouldn't.

> Still I think it doesn't matter much> since stats start being used for restore procedure it either should be
> kept in images directory, or we need to use another image file for
> this kind of info, hm?
> .
>
Andrey Vagin March 28, 2018, 5:07 p.m.
On Mon, Mar 26, 2018 at 01:43:11PM +0300, Pavel Emelyanov wrote:
> On 03/26/2018 12:16 PM, Pavel Tikhomirov wrote:
> > On 03/26/2018 11:54 AM, Pavel Emelyanov wrote:
> >> On 03/26/2018 11:12 AM, Pavel Tikhomirov wrote:
> >>> Statistics for a special dump/pre-dump/restore action should be
> >>> collected in the images directory of these action. We need these for
> >>> pid-reuse detection as we need to read the stats of parent predump.
> >>
> >> What's wrong with checking pid-reuse form stats sitting in workdir?
> > 
> > We can read previous dump statistics from workdir before 
> > cr_*dump_finish, I agree with that, it is easy.
> > 
> > But Kirill says that "writing image outside of image directory is 
> 
> Stats file is not an image. We just use PB format for it and reuse existing
> image-writing machinery.
> 
> > architectural problem", actually I think these way too for at least two 
> > reasons:
> > 
> > 1) When stats image is in images dir we can latter analize stats for 
> > each iteration if we want it. Now doing migration in VZ7 we have only 
> > stats of last dump in the end.
> 
> The same problem's for log files (I hope nobody has moved them into images dir
> so far), isn't it? Use separate workdir for each iteration.

It isn't the same, because a name of a log file is specified in options,
but the name of stats file is hardcoded.

> 
> > 2) If for instance stats image was not created,
> 
> Why can this happen?
> 
> > we will see for which > iteration it had happened (there will be no file in directory). Now we 
> > wrongly and silently reuse stats of irrelevant older iteration, and will 
> > do wrong assumptions based on wrong data.
> > 
> >>
> >>> Now then work-dir option is set statistics image is put to the work-dir
> >>> and on each iteration these image will replace previos image.
> >>>
> >>> For backward compatibility with older versions of p.haul create a
> >>> symlink in cwd so that p.haul still has access to the latest stats
> >>> image. Yet I plan to make p.haul work with stats through images dir.
> >>>
> >>> https://jira.sw.ru/browse/PSBM-82864
> >>> Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
> >>> ---
> >>>   criu/stats.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
> >>>   1 file changed, 60 insertions(+), 1 deletion(-)
> >>>
> >>> diff --git a/criu/stats.c b/criu/stats.c
> >>> index d344ad336..2c0f8bda4 100644
> >>> --- a/criu/stats.c
> >>> +++ b/criu/stats.c
> >>> @@ -1,6 +1,7 @@
> >>>   #include <unistd.h>
> >>>   #include <fcntl.h>
> >>>   #include <sys/time.h>
> >>> +#include <sys/stat.h>
> >>>   #include "int.h"
> >>>   #include "atomic.h"
> >>>   #include "cr_options.h"
> >>> @@ -157,6 +158,53 @@ static void display_stats(int what, StatsEntry *stats)
> >>>   		return;
> >>>   }
> >>>   
> >>> +static int fd_to_path(int fd, char *buf, int size) {
> >>> +	char proc_self_fd_path[PATH_MAX];
> >>> +	int ret;
> >>> +
> >>> +	if (snprintf(proc_self_fd_path, PATH_MAX, "/proc/self/fd/%d", fd) < 0) {
> >>> +		pr_perror("Failed to format /proc/self/fd/* path");
> >>> +		return -1;
> >>> +	}
> >>> +
> >>> +	ret = readlink(proc_self_fd_path, buf, size-1);
> >>> +	if (ret < 0) {
> >>> +		pr_perror("Failed to readlink %s path", proc_self_fd_path);
> >>> +		return -1;
> >>> +	}
> >>> +	buf[ret] = '\0';
> >>> +
> >>> +	return 0;
> >>> +}
> >>> +
> >>> +static void symlink_stats(char *target, char *name)
> >>> +{
> >>> +	char link_path[PATH_MAX];
> >>> +	struct stat st;
> >>> +	int ret;
> >>> +
> >>> +	if (snprintf(link_path, PATH_MAX, imgset_template[CR_FD_STATS].fmt, name) < 0)
> >>> +		goto exit;
> >>> +
> >>> +	ret = lstat(link_path, &st);
> >>> +	if (ret == -1 && errno != ENOENT)
> >>> +		goto exit;
> >>> +	else if (ret == 0) {
> >>> +		/* Real image occupies these location, no symlink needed */
> >>> +		if (!S_ISLNK(st.st_mode))
> >>> +			return;
> >>> +
> >>> +		/* Unlink symlink of previous iteration */
> >>> +		if (unlink(link_path) == -1)
> >>> +			goto exit;
> >>> +	}
> >>> +
> >>> +	if(!symlink(target, link_path))
> >>> +		return;
> >>> +exit:
> >>> +	pr_warn("Error when trying to create stats symlink");
> >>> +}
> >>> +
> >>>   void write_stats(int what)
> >>>   {
> >>>   	StatsEntry stats = STATS_ENTRY__INIT;
> >>> @@ -203,9 +251,20 @@ void write_stats(int what)
> >>>   	} else
> >>>   		return;
> >>>   
> >>> -	img = open_image_at(AT_FDCWD, CR_FD_STATS, O_DUMP, name);
> >>> +	img = open_image(CR_FD_STATS, O_DUMP, name);
> >>>   	if (img) {
> >>> +		char image_path[PATH_MAX];
> >>> +
> >>>   		pb_write_one(img, &stats, PB_STATS);
> >>> +
> >>> +		/*
> >>> +		 * Backward compatibility: old p.haul whants to read stats
> >>> +		 * from working directory, but not from images directory, so
> >>> +		 * create a symlink for it.
> >>> +		 */
> >>> +		if (!fd_to_path(img->fd, image_path, PATH_MAX))
> >>> +			symlink_stats(image_path, name);
> >>> +
> >>>   		close_image(img);
> >>>   	}
> >>>   
> >>>
> >>
> > 
>
Andrey Vagin March 28, 2018, 5:36 p.m.
On Mon, Mar 26, 2018 at 02:48:51PM +0300, Pavel Emelyanov wrote:
> On 03/26/2018 02:33 PM, Pavel Tikhomirov wrote:
> > 
> > 
> > On 03/26/2018 02:19 PM, Pavel Emelyanov wrote:
> >> On 03/26/2018 02:02 PM, Cyrill Gorcunov wrote:
> >>> On Mon, Mar 26, 2018 at 01:43:11PM +0300, Pavel Emelyanov wrote:
> >>>> On 03/26/2018 12:16 PM, Pavel Tikhomirov wrote:
> >>>>> On 03/26/2018 11:54 AM, Pavel Emelyanov wrote:
> >>>>>> On 03/26/2018 11:12 AM, Pavel Tikhomirov wrote:
> >>>>>>> Statistics for a special dump/pre-dump/restore action should be
> >>>>>>> collected in the images directory of these action. We need these for
> >>>>>>> pid-reuse detection as we need to read the stats of parent predump.
> >>>>>>
> >>>>>> What's wrong with checking pid-reuse form stats sitting in workdir?
> >>>>>
> >>>>> We can read previous dump statistics from workdir before
> >>>>> cr_*dump_finish, I agree with that, it is easy.
> >>>>>
> >>>>> But Kirill says that "writing image outside of image directory is
> >>>>
> >>>> Stats file is not an image. We just use PB format for it and reuse existing
> >>>> image-writing machinery.
> >>>
> >>> Stats file is context dependant and logically bound to images. At least
> >>> keeping it inside image drectory gives you confidence the data written
> >>> there belongs exactly to the images produced.
> >>
> >> So are logs, but we keep them in workdir anyway.
> >>
> >>> Because pid reuse uses
> >>> data from stats to operate on memory engine it become not longer
> >>> a random stat just to view, but mandatory data required for proper
> >>> restore.
> >>
> >> I've missed that fast somehow. What's there in stats image the pid-reuse needs?
> > 
> > StatsEntry->dump->dump_uptime
> 
> Ah. I see. So, first of all, TIMING_whatever is not about time-stamps, it's about
> "time taken to perform some action". Uptime doesn't fit into this and should not
> have been added there. Next, as a dumping ground for various dump-related stuff
> we traditionally use the inventory.img. And last, to detect pid-reuse you do not
> want to have the uptime of the whole system in the iamges :) You want the time the
> process has been alive for.

It is a good idea.

> This time is more valuable, as we currently have a bug 
> with oracle restore -- "time alive" gets re-set to zero upon restore and oracle kills
> all its kids. Had we this time and the ability to ... restore it somehow (time
> namespace might be helpful) we'd solve it.

In this context, if a process died and then it was restored, this should be
detected as pid-reuse.

> 
> >>
> >>> If you still prefer stats file to be on its own then I think we
> >>> need use some other image to carry this information.
> >>>
> >>>>> architectural problem", actually I think these way too for at least two
> >>>>> reasons:
> >>>>>
> >>>>> 1) When stats image is in images dir we can latter analize stats for
> >>>>> each iteration if we want it. Now doing migration in VZ7 we have only
> >>>>> stats of last dump in the end.
> >>>>
> >>>> The same problem's for log files (I hope nobody has moved them into images dir
> >>>> so far), isn't it? Use separate workdir for each iteration.
> >>>
> >>> Exactly, same problem. And for vz7 the logs are kept in the image
> >>> directory via vzctl option passed.
> >>
> >> Gents, by default criu keeps work dir and images together, some efforts should be
> >> done to keep them separately. What option are you talking about?
> > 
> > Likely --log-file is used for logs and for images and workdir: 
> > --images-dir and --work-dir.
> 
> Don't use --work-dir and work dir will be taken from --image-dir :)
> 
> >>
> >>> Thus when a user comes to us with
> >>> a problem we ask him to provide us logs from the image directory and
> >>> if logs on self are not enough we can ask to pack images directory
> >>> where the *complete* state of everything is present which we need
> >>> to analyze a problem.
> >>> .
> >>>
> >>
> > 
>
Pavel Emelianov March 28, 2018, 7 p.m.
>>> 1) When stats image is in images dir we can latter analize stats for 
>>> each iteration if we want it. Now doing migration in VZ7 we have only 
>>> stats of last dump in the end.
>>
>> The same problem's for log files (I hope nobody has moved them into images dir
>> so far), isn't it? Use separate workdir for each iteration.
> 
> It isn't the same, because a name of a log file is specified in options,
> but the name of stats file is hardcoded.

But the location for stats file (work-dir) is specified in CLI options. You can
have separate work-dirs for separate actions to (quoting) "later alaize stats
for each iteration".

-- Pavel
Andrey Vagin March 28, 2018, 7:45 p.m.
On Wed, Mar 28, 2018 at 10:00:04PM +0300, Pavel Emelyanov wrote:
> 
> >>> 1) When stats image is in images dir we can latter analize stats for 
> >>> each iteration if we want it. Now doing migration in VZ7 we have only 
> >>> stats of last dump in the end.
> >>
> >> The same problem's for log files (I hope nobody has moved them into images dir
> >> so far), isn't it? Use separate workdir for each iteration.
> > 
> > It isn't the same, because a name of a log file is specified in options,
> > but the name of stats file is hardcoded.
> 
> But the location for stats file (work-dir) is specified in CLI options. You can
> have separate work-dirs for separate actions to (quoting) "later alaize stats
> for each iteration".

It is impossible to specify a work dir for a previous iteration...

> 
> -- Pavel
Pavel Tikhomirov March 29, 2018, 4:51 a.m.

Pavel Emelianov March 29, 2018, 7:03 p.m.
On 03/28/2018 10:45 PM, Andrew Vagin wrote:
> On Wed, Mar 28, 2018 at 10:00:04PM +0300, Pavel Emelyanov wrote:
>>
>>>>> 1) When stats image is in images dir we can latter analize stats for 
>>>>> each iteration if we want it. Now doing migration in VZ7 we have only 
>>>>> stats of last dump in the end.
>>>>
>>>> The same problem's for log files (I hope nobody has moved them into images dir
>>>> so far), isn't it? Use separate workdir for each iteration.
>>>
>>> It isn't the same, because a name of a log file is specified in options,
>>> but the name of stats file is hardcoded.
>>
>> But the location for stats file (work-dir) is specified in CLI options. You can
>> have separate work-dirs for separate actions to (quoting) "later alaize stats
>> for each iteration".
> 
> It is impossible to specify a work dir for a previous iteration...

For previous of course, but why would you need it?