[Devel,libvzctl,v2] Suppress SUNRPC traffic on "fast stop".

Submitted by Stanislav Kinsburskiy on June 26, 2017, 3:04 p.m.

Details

Message ID 20170626150418.23760.17969.stgit@skinsbursky-vz7.qa.sw.ru
State New
Series "Suppress SUNRPC traffic on "fast stop"."
Headers show

Commit Message

Stanislav Kinsburskiy June 26, 2017, 3:04 p.m.
This is needed to break infinite loop in SUNRPC state machine, leading to
unstoppable container in case of unreachable network.

https://jira.sw.ru/browse/PSBM-67544

Note: needs kernel rh7-3.10.0-514.16.1.vz7.32.12 or newer

v2:
1) Now per-net SUNRPC "kill-tasks" handle for container init process is used
rather than VE cgroup handle.

Signed-off-by: Stanislav Kinsburskiy <skinsbursky@virtuozzo.com>
---
 lib/env_nsops.c |   52 ++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 48 insertions(+), 4 deletions(-)

Patch hide | download patch | download mbox

diff --git a/lib/env_nsops.c b/lib/env_nsops.c
index d62dff4..74d6402 100644
--- a/lib/env_nsops.c
+++ b/lib/env_nsops.c
@@ -1067,27 +1067,71 @@  static int ns_env_kill(struct vzctl_env_handle *h)
 	return 0;
 }
 
+static int write_sunrpc_kill(struct vzctl_env_handle *h, unsigned value)
+{
+	pid_t pid;
+	int fd;
+	ssize_t res;
+	char path[PATH_MAX];
+	char *val = value ? "1" : "0";
+
+	if (cg_env_get_init_pid(h->ctid, &pid))
+		return -1;
+
+	snprintf(path, sizeof(path), "/proc/%d/net/rpc/kill-tasks", pid);
+
+	if (access(path, F_OK))
+		return 0;
+
+	fd = open(path, O_WRONLY);
+	if (fd == -1) {
+		vzctl_err(-1, errno, "Failed to open %s: %s", path, strerror(errno));
+		return -1;
+	}
+
+	res = write(fd, val, strlen(val) + 1);
+	close(fd);
+
+	if (res != strlen(val) + 1) {
+		 vzctl_err(-1, errno, "Unable to %s SUNRPC traffic",
+				 value ? "suppress" : "release");
+		 return -1;
+	}
+
+
+	return 0;
+}
+
 static int ns_env_stop_force(struct vzctl_env_handle *h)
 {
-	int ret, rc;
+	int ret, rc, sunrpc_suppressed;
+
+	sunrpc_suppressed = write_sunrpc_kill(h, 1);
 
 	logger(0, 0, "Forcibly stop the Container...");
 
 	ret = cg_freezer_cmd(EID(h), VZCTL_CMD_FREEZE);
 	if (ret)
-		return ret;
+		goto release_sunrpc;
 
 	rc = ns_env_kill(h);
 
 	/* Unfreeze unconditionally */
 	ret = cg_freezer_cmd(EID(h), VZCTL_CMD_RESUME);
-	if (ret || rc)
-		return ret ?: rc;
+	if (ret || rc) {
+		ret = ret ?: rc;
+		goto release_sunrpc;
+	}
 
 	if (wait_env_state(h, VZCTL_ENV_STOPPED, MAX_SHTD_TM))
 		return vzctl_err(-1, 0, "Failed to stop Container:"
 				" operation timed out");
 	return 0;
+
+release_sunrpc:
+	if (sunrpc_suppressed > 0)
+		(void) write_sunrpc_kill(h, 0);
+	return ret;
 }
 
 static int ns_env_cleanup(struct vzctl_env_handle *h, int flags)

Comments

Stanislav Kinsburskiy July 4, 2017, 3:01 p.m.
Igor, could you please review?


26.06.2017 17:04, Stanislav Kinsburskiy пишет:
> This is needed to break infinite loop in SUNRPC state machine, leading to
> unstoppable container in case of unreachable network.
>
> https://jira.sw.ru/browse/PSBM-67544
>
> Note: needs kernel rh7-3.10.0-514.16.1.vz7.32.12 or newer
>
> v2:
> 1) Now per-net SUNRPC "kill-tasks" handle for container init process is used
> rather than VE cgroup handle.
>
> Signed-off-by: Stanislav Kinsburskiy <skinsbursky@virtuozzo.com>
> ---
>   lib/env_nsops.c |   52 ++++++++++++++++++++++++++++++++++++++++++++++++----
>   1 file changed, 48 insertions(+), 4 deletions(-)
>
> diff --git a/lib/env_nsops.c b/lib/env_nsops.c
> index d62dff4..74d6402 100644
> --- a/lib/env_nsops.c
> +++ b/lib/env_nsops.c
> @@ -1067,27 +1067,71 @@ static int ns_env_kill(struct vzctl_env_handle *h)
>   	return 0;
>   }
>   
> +static int write_sunrpc_kill(struct vzctl_env_handle *h, unsigned value)
> +{
> +	pid_t pid;
> +	int fd;
> +	ssize_t res;
> +	char path[PATH_MAX];
> +	char *val = value ? "1" : "0";
> +
> +	if (cg_env_get_init_pid(h->ctid, &pid))
> +		return -1;
> +
> +	snprintf(path, sizeof(path), "/proc/%d/net/rpc/kill-tasks", pid);
> +
> +	if (access(path, F_OK))
> +		return 0;
> +
> +	fd = open(path, O_WRONLY);
> +	if (fd == -1) {
> +		vzctl_err(-1, errno, "Failed to open %s: %s", path, strerror(errno));
> +		return -1;
> +	}
> +
> +	res = write(fd, val, strlen(val) + 1);
> +	close(fd);
> +
> +	if (res != strlen(val) + 1) {
> +		 vzctl_err(-1, errno, "Unable to %s SUNRPC traffic",
> +				 value ? "suppress" : "release");
> +		 return -1;
> +	}
> +
> +
> +	return 0;
> +}
> +
>   static int ns_env_stop_force(struct vzctl_env_handle *h)
>   {
> -	int ret, rc;
> +	int ret, rc, sunrpc_suppressed;
> +
> +	sunrpc_suppressed = write_sunrpc_kill(h, 1);
>   
>   	logger(0, 0, "Forcibly stop the Container...");
>   
>   	ret = cg_freezer_cmd(EID(h), VZCTL_CMD_FREEZE);
>   	if (ret)
> -		return ret;
> +		goto release_sunrpc;
>   
>   	rc = ns_env_kill(h);
>   
>   	/* Unfreeze unconditionally */
>   	ret = cg_freezer_cmd(EID(h), VZCTL_CMD_RESUME);
> -	if (ret || rc)
> -		return ret ?: rc;
> +	if (ret || rc) {
> +		ret = ret ?: rc;
> +		goto release_sunrpc;
> +	}
>   
>   	if (wait_env_state(h, VZCTL_ENV_STOPPED, MAX_SHTD_TM))
>   		return vzctl_err(-1, 0, "Failed to stop Container:"
>   				" operation timed out");
>   	return 0;
> +
> +release_sunrpc:
> +	if (sunrpc_suppressed > 0)
> +		(void) write_sunrpc_kill(h, 0);
> +	return ret;
>   }
>   
>   static int ns_env_cleanup(struct vzctl_env_handle *h, int flags)
>
> _______________________________________________
> Devel mailing list
> Devel@openvz.org
> https://lists.openvz.org/mailman/listinfo/devel
Igor Sukhih July 5, 2017, 11:09 a.m.
On 07/04/2017 06:01 PM, Stanislav Kinsburskiy wrote:
> Igor, could you please review?
>
>
> 26.06.2017 17:04, Stanislav Kinsburskiy пишет:
>> This is needed to break infinite loop in SUNRPC state machine, 
>> leading to
>> unstoppable container in case of unreachable network.
>>
>> https://jira.sw.ru/browse/PSBM-67544
>>
>> Note: needs kernel rh7-3.10.0-514.16.1.vz7.32.12 or newer
>>
>> v2:
>> 1) Now per-net SUNRPC "kill-tasks" handle for container init process 
>> is used
>> rather than VE cgroup handle.
>>
>> Signed-off-by: Stanislav Kinsburskiy <skinsbursky@virtuozzo.com>
>> ---
>>   lib/env_nsops.c |   52 
>> ++++++++++++++++++++++++++++++++++++++++++++++++----
>>   1 file changed, 48 insertions(+), 4 deletions(-)
>>
>> diff --git a/lib/env_nsops.c b/lib/env_nsops.c
>> index d62dff4..74d6402 100644
>> --- a/lib/env_nsops.c
>> +++ b/lib/env_nsops.c
>> @@ -1067,27 +1067,71 @@ static int ns_env_kill(struct 
>> vzctl_env_handle *h)
>>       return 0;
>>   }
>>   +static int write_sunrpc_kill(struct vzctl_env_handle *h, unsigned 
>> value)
>> +{
>> +    pid_t pid;
>> +    int fd;
>> +    ssize_t res;
>> +    char path[PATH_MAX];
>> +    char *val = value ? "1" : "0";
>> +
>> +    if (cg_env_get_init_pid(h->ctid, &pid))
>> +        return -1;
>> +
>> +    snprintf(path, sizeof(path), "/proc/%d/net/rpc/kill-tasks", pid);
>> +
>> +    if (access(path, F_OK))
>> +        return 0;
>> +
>> +    fd = open(path, O_WRONLY);
>> +    if (fd == -1) {
>> +        vzctl_err(-1, errno, "Failed to open %s: %s", path, 
>> strerror(errno));
>> +        return -1;
>> +    }
>> +
>> +    res = write(fd, val, strlen(val) + 1);
>> +    close(fd);
>> +
>> +    if (res != strlen(val) + 1) {
>> +         vzctl_err(-1, errno, "Unable to %s SUNRPC traffic",
>> +                 value ? "suppress" : "release");
>> +         return -1;
>> +    }
>> +
>> +
>> +    return 0;
       ^^^^^  return 1;

>>
>> +
>> +release_sunrpc:
>> +    if (sunrpc_suppressed > 0)
>> +        (void) write_sunrpc_kill(h, 0);
>> +    return ret;
>>   }
>>     static int ns_env_cleanup(struct vzctl_env_handle *h, int flags)