[RFC,5/5] files: Shrink big fdtable on close in is_pseudosuper mode

Submitted by Kirill Tkhai on Jan. 12, 2018, 3:46 p.m.

Details

Message ID 151577200880.11063.6343492461030114438.stgit@localhost.localdomain
State New
Series "Shrink big fdtable on criu restore"
Headers show

Commit Message

Kirill Tkhai Jan. 12, 2018, 3:46 p.m.
This trick is going to be used for criu restore,
to release excess memory occupied by service files.
We check a closing fd, and if it's a half of max
available fdtable number, we try to shrink the fdstable
and decrease amoung of memory needed to store task's fds.
I use is_pseudosuper state to detect restore, but
it's not obligatory variable, and we can introduce
another one.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
---
 fs/file.c |   51 +++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 41 insertions(+), 10 deletions(-)

Patch hide | download patch | download mbox

diff --git a/fs/file.c b/fs/file.c
index f009eb9bf1c8..b85e8ee6143b 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -22,6 +22,7 @@ 
 #include <linux/spinlock.h>
 #include <linux/rcupdate.h>
 #include <linux/workqueue.h>
+#include <linux/ve.h>
 
 int sysctl_nr_open __read_mostly = 1024*1024;
 int sysctl_nr_open_min = BITS_PER_LONG;
@@ -69,19 +70,22 @@  static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt, bool shrink
 {
 	unsigned int cpy, set;
 
-	BUG_ON(nfdt->max_fds < ofdt->max_fds);
+	BUG_ON((nfdt->max_fds < ofdt->max_fds) != shrink);
 
-	cpy = ofdt->max_fds * sizeof(struct file *);
+	cpy = min(ofdt->max_fds, nfdt->max_fds) * sizeof(struct file *);
 	set = (nfdt->max_fds - ofdt->max_fds) * sizeof(struct file *);
 	memcpy(nfdt->fd, ofdt->fd, cpy);
-	memset((char *)(nfdt->fd) + cpy, 0, set);
+	if (!shrink)
+		memset((char *)(nfdt->fd) + cpy, 0, set);
 
-	cpy = ofdt->max_fds / BITS_PER_BYTE;
+	cpy = min(ofdt->max_fds, nfdt->max_fds) / BITS_PER_BYTE;
 	set = (nfdt->max_fds - ofdt->max_fds) / BITS_PER_BYTE;
 	memcpy(nfdt->open_fds, ofdt->open_fds, cpy);
-	memset((char *)(nfdt->open_fds) + cpy, 0, set);
+	if (!shrink)
+		memset((char *)(nfdt->open_fds) + cpy, 0, set);
 	memcpy(nfdt->close_on_exec, ofdt->close_on_exec, cpy);
-	memset((char *)(nfdt->close_on_exec) + cpy, 0, set);
+	if (!shrink)
+		memset((char *)(nfdt->close_on_exec) + cpy, 0, set);
 }
 
 static unsigned int fdtable_align(unsigned int nr)
@@ -169,16 +173,26 @@  static int expand_fdtable(struct files_struct *files, int nr, bool shrink)
 	spin_lock(&files->file_lock);
 	if (!new_fdt)
 		return -ENOMEM;
+	cur_fdt = files_fdtable(files);
 	/*
 	 * extremely unlikely race - sysctl_nr_open decreased between the check in
 	 * caller and alloc_fdtable().  Cheaper to catch it here...
 	 */
-	if (unlikely(new_fdt->max_fds <= nr)) {
+	if (unlikely((new_fdt->max_fds <= nr && !shrink) ||
+		     (shrink && new_fdt->max_fds >= cur_fdt->max_fds))) {
 		__free_fdtable(new_fdt);
 		return -EMFILE;
 	}
-	cur_fdt = files_fdtable(files);
-	BUG_ON(nr < cur_fdt->max_fds);
+	if (unlikely(shrink)) {
+		int i;
+		i = find_last_bit(cur_fdt->open_fds, cur_fdt->max_fds);
+		i = fdtable_align(i);
+		if (i == cur_fdt->max_fds) {
+			__free_fdtable(new_fdt);
+			return 1;
+		}
+	}
+	BUG_ON((nr < cur_fdt->max_fds) != shrink);
 	copy_fdtable(new_fdt, cur_fdt, shrink);
 	rcu_assign_pointer(files->fdt, new_fdt);
 	if (cur_fdt != &files->fdtab)
@@ -207,7 +221,7 @@  static int expand_files(struct files_struct *files, int nr, bool shrink)
 	fdt = files_fdtable(files);
 
 	/* Do we need to expand? */
-	if (nr < fdt->max_fds)
+	if (nr < fdt->max_fds && !shrink)
 		return expanded;
 
 	/* Can we expand? */
@@ -222,6 +236,15 @@  static int expand_files(struct files_struct *files, int nr, bool shrink)
 		goto repeat;
 	}
 
+	if (unlikely(shrink)) {
+		unsigned int i;
+		i = find_last_bit(fdt->open_fds, fdt->max_fds);
+		nr = i;
+		i = fdtable_align(i);
+		if (i >= fdt->max_fds)
+			return expanded;
+	}
+
 	/* All good, so we try */
 	files->resize_in_progress = true;
 	expanded = expand_fdtable(files, nr, shrink);
@@ -637,6 +660,14 @@  int __close_fd(struct files_struct *files, unsigned fd)
 	rcu_assign_pointer(fdt->fd[fd], NULL);
 	__clear_close_on_exec(fd, fdt);
 	__put_unused_fd(files, fd);
+
+	/* Try to shrink fdt and to free memory */
+	if (unlikely(fd * 2 >= fdt->max_fds &&
+		     fd > (1024 / sizeof(struct file *))) &&
+		     get_exec_env() != get_ve0() &&
+		     get_exec_env()->is_pseudosuper)
+		expand_files(files, fd, true);
+
 	spin_unlock(&files->file_lock);
 	return filp_close(file, files);