seize: Take --timeout option into account when freezing processes

Submitted by Cyrill Gorcunov on May 26, 2016, 1:46 p.m.

Details

Message ID 1464270415-20988-1-git-send-email-gorcunov@virtuozzo.com
State Rejected
Series "seize: Take --timeout option into account when freezing processes"
Headers show

Commit Message

Cyrill Gorcunov May 26, 2016, 1:46 p.m.
When we're freezing processes we don't count on anything but
rather do 5 attempts with constantly increasing delay.

Lets rather do the following:

 - take --timeout option into account (which is 5 seconds
   by default) and split it into 100 ms steps;

 - when frezing processes check freezer status every 100 ms.

Same time it looks that 5 seconds by default is too small
for high loaded containers. Lets increase it to 10 seconds
by default.

Reported-by: Stanislav Kinsburskiy <skinsbursky@virtuozzo.com>
Signed-off-by: Cyrill Gorcunov <gorcunov@virtuozzo.com>
---
 criu/include/cr_options.h |  2 +-
 criu/seize.c              | 30 ++++++++++++++++++++----------
 2 files changed, 21 insertions(+), 11 deletions(-)

Patch hide | download patch | download mbox

diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h
index b6ae3a1..a9ca1f8 100644
--- a/criu/include/cr_options.h
+++ b/criu/include/cr_options.h
@@ -38,7 +38,7 @@  struct cg_root_opt {
  */
 #define DEFAULT_GHOST_LIMIT	(1 << 20)
 
-#define DEFAULT_TIMEOUT		5
+#define DEFAULT_TIMEOUT		10
 
 struct irmap;
 
diff --git a/criu/seize.c b/criu/seize.c
index 0ea7a28..863a21e 100644
--- a/criu/seize.c
+++ b/criu/seize.c
@@ -244,10 +244,27 @@  static int freezer_detach(void)
 
 static int freeze_processes(void)
 {
-	int i, fd, exit_code = -1;
+	int fd, exit_code = -1;
 	char path[PATH_MAX];
 	const char *state = thawed;
 
+	static const unsigned long step_ms = 100;
+	unsigned long nr_attempts = (opts.timeout * 1000000) / step_ms;
+	unsigned long i;
+
+	const struct timespec req = {
+		.tv_nsec	= step_ms * 1000000,
+		.tv_sec		= 0,
+	};
+
+	if (unlikely(!nr_attempts)) {
+		/*
+		 * If timeout is turned off, lets
+		 * wait for at least 10 seconds.
+		 */
+		nr_attempts = (10 * 1000000) / step_ms;
+	}
+
 	snprintf(path, sizeof(path), "%s/freezer.state", opts.freeze_cgroup);
 	fd = open(path, O_RDWR);
 	if (fd < 0) {
@@ -275,10 +292,7 @@  static int freeze_processes(void)
 	 * freezer.state.
 	 * Here is one extra attempt to check that everything are frozen.
 	 */
-	for (i = 0; i <= NR_ATTEMPTS; i++) {
-		struct timespec req = {};
-		u64 timeout;
-
+	for (i = 0; i <= nr_attempts; i++) {
 		if (seize_cgroup_tree(opts.freeze_cgroup, state) < 0)
 			goto err;
 
@@ -300,14 +314,10 @@  static int freeze_processes(void)
 
 		if (alarm_timeouted())
 			goto err;
-
-		timeout = 100000000 * (i + 1); /* 100 msec */
-		req.tv_nsec = timeout % 1000000000;
-		req.tv_sec = timeout / 1000000000;
 		nanosleep(&req, NULL);
 	}
 
-	if (i > NR_ATTEMPTS) {
+	if (i > nr_attempts) {
 		pr_err("Unable to freeze cgroup %s\n", opts.freeze_cgroup);
 		goto err;
 	}

Comments

Andrey Vagin May 27, 2016, 6:56 p.m.
It's false alarm.

On Thu, May 26, 2016 at 02:27:21PM +0000, Patchwork wrote:
> == Series Details ==
> 
> Series: seize: Take --timeout option into account when freezing processes
> URL   : https://zdtm.openvz.org/series/181/
> State : failure
> 
> == Logs ==
> 
> For more details see: https://travis-ci.org/criupatchwork/criu/builds/133116843
> _______________________________________________
> CRIU mailing list
> CRIU@openvz.org
> https://lists.openvz.org/mailman/listinfo/criu
Pavel Emelianov May 27, 2016, 7:18 p.m.
On 05/26/2016 04:46 PM, Cyrill Gorcunov wrote:
> When we're freezing processes we don't count on anything but
> rather do 5 attempts with constantly increasing delay.
> 
> Lets rather do the following:
> 
>  - take --timeout option into account (which is 5 seconds
>    by default) and split it into 100 ms steps;

Increasing delays make sense I would say. Would you keep them?

>  - when frezing processes check freezer status every 100 ms.
> 
> Same time it looks that 5 seconds by default is too small
> for high loaded containers. Lets increase it to 10 seconds
> by default.
> 
> Reported-by: Stanislav Kinsburskiy <skinsbursky@virtuozzo.com>
> Signed-off-by: Cyrill Gorcunov <gorcunov@virtuozzo.com>
> ---
>  criu/include/cr_options.h |  2 +-
>  criu/seize.c              | 30 ++++++++++++++++++++----------
>  2 files changed, 21 insertions(+), 11 deletions(-)
> 
> diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h
> index b6ae3a1..a9ca1f8 100644
> --- a/criu/include/cr_options.h
> +++ b/criu/include/cr_options.h
> @@ -38,7 +38,7 @@ struct cg_root_opt {
>   */
>  #define DEFAULT_GHOST_LIMIT	(1 << 20)
>  
> -#define DEFAULT_TIMEOUT		5
> +#define DEFAULT_TIMEOUT		10
>  
>  struct irmap;
>  
> diff --git a/criu/seize.c b/criu/seize.c
> index 0ea7a28..863a21e 100644
> --- a/criu/seize.c
> +++ b/criu/seize.c
> @@ -244,10 +244,27 @@ static int freezer_detach(void)
>  
>  static int freeze_processes(void)
>  {
> -	int i, fd, exit_code = -1;
> +	int fd, exit_code = -1;
>  	char path[PATH_MAX];
>  	const char *state = thawed;
>  
> +	static const unsigned long step_ms = 100;
> +	unsigned long nr_attempts = (opts.timeout * 1000000) / step_ms;
> +	unsigned long i;
> +
> +	const struct timespec req = {
> +		.tv_nsec	= step_ms * 1000000,
> +		.tv_sec		= 0,
> +	};
> +
> +	if (unlikely(!nr_attempts)) {

"If timeout if off" means if (!opts.timeout) ;)

> +		/*
> +		 * If timeout is turned off, lets
> +		 * wait for at least 10 seconds.
> +		 */
> +		nr_attempts = (10 * 1000000) / step_ms;

nr_attempts = DEFAULT_TIMEOUT?

> +	}
> +
>  	snprintf(path, sizeof(path), "%s/freezer.state", opts.freeze_cgroup);
>  	fd = open(path, O_RDWR);
>  	if (fd < 0) {
> @@ -275,10 +292,7 @@ static int freeze_processes(void)
>  	 * freezer.state.
>  	 * Here is one extra attempt to check that everything are frozen.
>  	 */
> -	for (i = 0; i <= NR_ATTEMPTS; i++) {
> -		struct timespec req = {};
> -		u64 timeout;
> -
> +	for (i = 0; i <= nr_attempts; i++) {
>  		if (seize_cgroup_tree(opts.freeze_cgroup, state) < 0)
>  			goto err;
>  
> @@ -300,14 +314,10 @@ static int freeze_processes(void)
>  
>  		if (alarm_timeouted())
>  			goto err;
> -
> -		timeout = 100000000 * (i + 1); /* 100 msec */
> -		req.tv_nsec = timeout % 1000000000;
> -		req.tv_sec = timeout / 1000000000;
>  		nanosleep(&req, NULL);
>  	}
>  
> -	if (i > NR_ATTEMPTS) {
> +	if (i > nr_attempts) {
>  		pr_err("Unable to freeze cgroup %s\n", opts.freeze_cgroup);
>  		goto err;
>  	}
>
Kirill Gorkunov May 27, 2016, 7:37 p.m.
On Fri, May 27, 2016 at 10:18:17PM +0300, Pavel Emelyanov wrote:
> On 05/26/2016 04:46 PM, Cyrill Gorcunov wrote:
> > When we're freezing processes we don't count on anything but
> > rather do 5 attempts with constantly increasing delay.
> > 
> > Lets rather do the following:
> > 
> >  - take --timeout option into account (which is 5 seconds
> >    by default) and split it into 100 ms steps;
> 
> Increasing delays make sense I would say. Would you keep them?

Not right now. Constant step makes computations easier here.
Note that we're taking into account the argument right now
and can calcs step in one pass instead of adding some
exponential aproximation (we could of course but I think
as a first step bette to keep it simplier).

> >  
> >  static int freeze_processes(void)
> >  {
> > -	int i, fd, exit_code = -1;
> > +	int fd, exit_code = -1;
> >  	char path[PATH_MAX];
> >  	const char *state = thawed;
> >  
> > +	static const unsigned long step_ms = 100;
> > +	unsigned long nr_attempts = (opts.timeout * 1000000) / step_ms;
> > +	unsigned long i;
> > +
> > +	const struct timespec req = {
> > +		.tv_nsec	= step_ms * 1000000,
> > +		.tv_sec		= 0,
> > +	};
> > +
> > +	if (unlikely(!nr_attempts)) {
> 
> "If timeout if off" means if (!opts.timeout) ;)

One can increase @step_ms one day and integer arithmetics
end up with nr_attempts = 0 but opts.timeout != 0, so
@nr_attempts should be used here. But true, comment is
misleading a bit ;)

> > +		/*
> > +		 * If timeout is turned off, lets
> > +		 * wait for at least 10 seconds.
> > +		 */
> > +		nr_attempts = (10 * 1000000) / step_ms;
> 
> nr_attempts = DEFAULT_TIMEOUT?

No :) @nr_attempts are cummulative value, this is done
for sake to vary @step_ms if needed.

	Cyrill
Stanislav Kinsburskiy May 29, 2016, 6:35 p.m.
27.05.2016 21:18, Pavel Emelyanov пишет:
> On 05/26/2016 04:46 PM, Cyrill Gorcunov wrote:
>> When we're freezing processes we don't count on anything but
>> rather do 5 attempts with constantly increasing delay.
>>
>> Lets rather do the following:
>>
>>   - take --timeout option into account (which is 5 seconds
>>     by default) and split it into 100 ms steps;
> Increasing delays make sense I would say. Would you keep them?
>

Could you elaborate, what's the sense in this particular case?
(BTW, current implementation, without this patch, always waste 0,5 for 
the last iteration without check for freezer state).

>>   - when frezing processes check freezer status every 100 ms.
>>
>> Same time it looks that 5 seconds by default is too small
>> for high loaded containers. Lets increase it to 10 seconds
>> by default.
>>
>> Reported-by: Stanislav Kinsburskiy <skinsbursky@virtuozzo.com>
>> Signed-off-by: Cyrill Gorcunov <gorcunov@virtuozzo.com>
>> ---
>>   criu/include/cr_options.h |  2 +-
>>   criu/seize.c              | 30 ++++++++++++++++++++----------
>>   2 files changed, 21 insertions(+), 11 deletions(-)
>>
>> diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h
>> index b6ae3a1..a9ca1f8 100644
>> --- a/criu/include/cr_options.h
>> +++ b/criu/include/cr_options.h
>> @@ -38,7 +38,7 @@ struct cg_root_opt {
>>    */
>>   #define DEFAULT_GHOST_LIMIT	(1 << 20)
>>   
>> -#define DEFAULT_TIMEOUT		5
>> +#define DEFAULT_TIMEOUT		10
>>   
>>   struct irmap;
>>   
>> diff --git a/criu/seize.c b/criu/seize.c
>> index 0ea7a28..863a21e 100644
>> --- a/criu/seize.c
>> +++ b/criu/seize.c
>> @@ -244,10 +244,27 @@ static int freezer_detach(void)
>>   
>>   static int freeze_processes(void)
>>   {
>> -	int i, fd, exit_code = -1;
>> +	int fd, exit_code = -1;
>>   	char path[PATH_MAX];
>>   	const char *state = thawed;
>>   
>> +	static const unsigned long step_ms = 100;
>> +	unsigned long nr_attempts = (opts.timeout * 1000000) / step_ms;
>> +	unsigned long i;
>> +
>> +	const struct timespec req = {
>> +		.tv_nsec	= step_ms * 1000000,
>> +		.tv_sec		= 0,
>> +	};
>> +
>> +	if (unlikely(!nr_attempts)) {
> "If timeout if off" means if (!opts.timeout) ;)
>
>> +		/*
>> +		 * If timeout is turned off, lets
>> +		 * wait for at least 10 seconds.
>> +		 */
>> +		nr_attempts = (10 * 1000000) / step_ms;
> nr_attempts = DEFAULT_TIMEOUT?
>
>> +	}
>> +
>>   	snprintf(path, sizeof(path), "%s/freezer.state", opts.freeze_cgroup);
>>   	fd = open(path, O_RDWR);
>>   	if (fd < 0) {
>> @@ -275,10 +292,7 @@ static int freeze_processes(void)
>>   	 * freezer.state.
>>   	 * Here is one extra attempt to check that everything are frozen.
>>   	 */
>> -	for (i = 0; i <= NR_ATTEMPTS; i++) {
>> -		struct timespec req = {};
>> -		u64 timeout;
>> -
>> +	for (i = 0; i <= nr_attempts; i++) {
>>   		if (seize_cgroup_tree(opts.freeze_cgroup, state) < 0)
>>   			goto err;
>>   
>> @@ -300,14 +314,10 @@ static int freeze_processes(void)
>>   
>>   		if (alarm_timeouted())
>>   			goto err;
>> -
>> -		timeout = 100000000 * (i + 1); /* 100 msec */
>> -		req.tv_nsec = timeout % 1000000000;
>> -		req.tv_sec = timeout / 1000000000;
>>   		nanosleep(&req, NULL);
>>   	}
>>   
>> -	if (i > NR_ATTEMPTS) {
>> +	if (i > nr_attempts) {
>>   		pr_err("Unable to freeze cgroup %s\n", opts.freeze_cgroup);
>>   		goto err;
>>   	}
>>