criu: raise the task file limit for specific commands

Submitted by Andrei Vagin on April 16, 2019, 5:29 a.m.

Details

Message ID 20190416052955.874-1-avagin@gmail.com
State New
Series "criu: raise the task file limit for specific commands"
Headers show

Commit Message

Andrei Vagin April 16, 2019, 5:29 a.m.
We don't need to do this from early_init.

Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Andrei Vagin <avagin@gmail.com>
---
 criu/cr-dump.c    | 11 +++++++++++
 criu/cr-restore.c | 11 +++++++++++
 criu/crtools.c    | 49 -------------------------------------------------
 3 files changed, 22 insertions(+), 49 deletions(-)

Patch hide | download patch | download mbox

diff --git a/criu/cr-dump.c b/criu/cr-dump.c
index d4ed40b91..001dd2a6e 100644
--- a/criu/cr-dump.c
+++ b/criu/cr-dump.c
@@ -1557,6 +1557,11 @@  int cr_pre_dump_tasks(pid_t pid)
 	struct pstree_item *item;
 	int ret = -1;
 
+	/*
+	 * We might need a lot of pipes to fetch huge number of pages to dump.
+	 */
+	rlimit_unlimit_nofile();
+
 	root_item = alloc_pstree_item();
 	if (!root_item)
 		goto err;
@@ -1754,6 +1759,12 @@  int cr_dump_tasks(pid_t pid)
 	pr_info("Dumping processes (pid: %d)\n", pid);
 	pr_info("========================================\n");
 
+	/*
+	 * CRIU will fetch all file descriptors for each task. A task can have
+	 * many opened files, so we need to raise a file limit.
+	 */
+	rlimit_unlimit_nofile();
+
 	root_item = alloc_pstree_item();
 	if (!root_item)
 		goto err;
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index 6a3d62450..c313abf19 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -2330,6 +2330,17 @@  int cr_restore_tasks(void)
 {
 	int ret = -1;
 
+	/*
+	 * Service fd engine implies that file descriptors
+	 * used won't be borrowed by the rest of the code
+	 * and default 1024 limit is not enough for high
+	 * loaded test/containers. Thus use kdat engine
+	 * to fetch current system level limit for numbers
+	 * of files allowed to open up and lift up own
+	 * limits.
+	 */
+	rlimit_unlimit_nofile();
+
 	if (cr_plugin_init(CR_PLUGIN_STAGE__RESTORE))
 		return -1;
 
diff --git a/criu/crtools.c b/criu/crtools.c
index 6a2955287..883a50317 100644
--- a/criu/crtools.c
+++ b/criu/crtools.c
@@ -68,55 +68,6 @@  static void rlimit_unlimit_nofile(void)
 
 static int early_init(const char *cmd)
 {
-	static const char *nofile_cmds[] = {
-		"swrk", "service",
-		"dump", "pre-dump",
-		"restore",
-	};
-	size_t i;
-
-	/*
-	 * Service fd engine implies that file descriptors
-	 * used won't be borrowed by the rest of the code
-	 * and default 1024 limit is not enough for high
-	 * loaded test/containers. Thus use kdat engine
-	 * to fetch current system level limit for numbers
-	 * of files allowed to open up and lift up own
-	 * limits.
-	 *
-	 * Note we have to do it before the service fd
-	 * get inited and we dont exit with errors here
-	 * because in worst scenario where clash of fd
-	 * happen we simply exit with explicit error
-	 * during real action stage.
-	 *
-	 * Same time raising limits cause kernel fdtable
-	 * to bloat so we do this only on the @nofile_cmds:
-	 *
-	 *  - on dump criu needs additional files for sfd,
-	 *    thus if container already has many files opened
-	 *    we need to have at least not less space when
-	 *    fetching fds from a target process;
-	 *
-	 *  - on pre-dump we might need a lot of pipes to
-	 *    fetch huge number of pages to dump;
-	 *
-	 *  - on restore we still need to raise limits since
-	 *    there is no guarantee that on dump we've not
-	 *    been hitting fd limit already;
-	 *
-	 *  - swrk and service obtain requests on the fly,
-	 *    thus we don't know if on of above will be
-	 *    there thus raise limits.
-	 */
-	for (i = 0; i < ARRAY_SIZE(nofile_cmds); i++) {
-		if (strcmp(nofile_cmds[i], cmd))
-			continue;
-		if (!kerndat_files_stat(true))
-			rlimit_unlimit_nofile();
-		break;
-	}
-
 	if (init_service_fd())
 		return 1;
 

Comments

Andrei Vagin April 16, 2019, 5:34 a.m.
Ignore this one 

On Tue, Apr 16, 2019 at 08:29:55AM +0300, Andrei Vagin wrote:
> We don't need to do this from early_init.
> 
> Cc: Cyrill Gorcunov <gorcunov@gmail.com>
> Signed-off-by: Andrei Vagin <avagin@gmail.com>
> ---
>  criu/cr-dump.c    | 11 +++++++++++
>  criu/cr-restore.c | 11 +++++++++++
>  criu/crtools.c    | 49 -------------------------------------------------
>  3 files changed, 22 insertions(+), 49 deletions(-)
> 
> diff --git a/criu/cr-dump.c b/criu/cr-dump.c
> index d4ed40b91..001dd2a6e 100644
> --- a/criu/cr-dump.c
> +++ b/criu/cr-dump.c
> @@ -1557,6 +1557,11 @@ int cr_pre_dump_tasks(pid_t pid)
>  	struct pstree_item *item;
>  	int ret = -1;
>  
> +	/*
> +	 * We might need a lot of pipes to fetch huge number of pages to dump.
> +	 */
> +	rlimit_unlimit_nofile();
> +
>  	root_item = alloc_pstree_item();
>  	if (!root_item)
>  		goto err;
> @@ -1754,6 +1759,12 @@ int cr_dump_tasks(pid_t pid)
>  	pr_info("Dumping processes (pid: %d)\n", pid);
>  	pr_info("========================================\n");
>  
> +	/*
> +	 * CRIU will fetch all file descriptors for each task. A task can have
> +	 * many opened files, so we need to raise a file limit.
> +	 */
> +	rlimit_unlimit_nofile();
> +
>  	root_item = alloc_pstree_item();
>  	if (!root_item)
>  		goto err;
> diff --git a/criu/cr-restore.c b/criu/cr-restore.c
> index 6a3d62450..c313abf19 100644
> --- a/criu/cr-restore.c
> +++ b/criu/cr-restore.c
> @@ -2330,6 +2330,17 @@ int cr_restore_tasks(void)
>  {
>  	int ret = -1;
>  
> +	/*
> +	 * Service fd engine implies that file descriptors
> +	 * used won't be borrowed by the rest of the code
> +	 * and default 1024 limit is not enough for high
> +	 * loaded test/containers. Thus use kdat engine
> +	 * to fetch current system level limit for numbers
> +	 * of files allowed to open up and lift up own
> +	 * limits.
> +	 */
> +	rlimit_unlimit_nofile();
> +
>  	if (cr_plugin_init(CR_PLUGIN_STAGE__RESTORE))
>  		return -1;
>  
> diff --git a/criu/crtools.c b/criu/crtools.c
> index 6a2955287..883a50317 100644
> --- a/criu/crtools.c
> +++ b/criu/crtools.c
> @@ -68,55 +68,6 @@ static void rlimit_unlimit_nofile(void)
>  
>  static int early_init(const char *cmd)
>  {
> -	static const char *nofile_cmds[] = {
> -		"swrk", "service",
> -		"dump", "pre-dump",
> -		"restore",
> -	};
> -	size_t i;
> -
> -	/*
> -	 * Service fd engine implies that file descriptors
> -	 * used won't be borrowed by the rest of the code
> -	 * and default 1024 limit is not enough for high
> -	 * loaded test/containers. Thus use kdat engine
> -	 * to fetch current system level limit for numbers
> -	 * of files allowed to open up and lift up own
> -	 * limits.
> -	 *
> -	 * Note we have to do it before the service fd
> -	 * get inited and we dont exit with errors here
> -	 * because in worst scenario where clash of fd
> -	 * happen we simply exit with explicit error
> -	 * during real action stage.
> -	 *
> -	 * Same time raising limits cause kernel fdtable
> -	 * to bloat so we do this only on the @nofile_cmds:
> -	 *
> -	 *  - on dump criu needs additional files for sfd,
> -	 *    thus if container already has many files opened
> -	 *    we need to have at least not less space when
> -	 *    fetching fds from a target process;
> -	 *
> -	 *  - on pre-dump we might need a lot of pipes to
> -	 *    fetch huge number of pages to dump;
> -	 *
> -	 *  - on restore we still need to raise limits since
> -	 *    there is no guarantee that on dump we've not
> -	 *    been hitting fd limit already;
> -	 *
> -	 *  - swrk and service obtain requests on the fly,
> -	 *    thus we don't know if on of above will be
> -	 *    there thus raise limits.
> -	 */
> -	for (i = 0; i < ARRAY_SIZE(nofile_cmds); i++) {
> -		if (strcmp(nofile_cmds[i], cmd))
> -			continue;
> -		if (!kerndat_files_stat(true))
> -			rlimit_unlimit_nofile();
> -		break;
> -	}
> -
>  	if (init_service_fd())
>  		return 1;
>  
> -- 
> 2.14.5
>