[Devel,RHEL7,COMMIT] ve/fs/fadvise: introduce FADV_DEACTIVATE flag

Submitted by Konstantin Khorenko on Jan. 11, 2017, 3:30 p.m.

Details

Message ID 201701111530.v0BFU39x011992@finist_cl7.x64_64.work.ct
State New
Series "fs/fadvise: introduce FADV_DEACTIVATE flag"
Headers show

Commit Message

Konstantin Khorenko Jan. 11, 2017, 3:30 p.m.
The commit is pushed to "branch-rh7-3.10.0-514.vz7.27.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-514.vz7.27.9
------>
commit 8d30c1ed7eb9c3174eec56a5d0dc29316fd86a39
Author: Andrey Ryabinin <aryabinin@virtuozzo.com>
Date:   Wed Jan 11 19:30:03 2017 +0400

    ve/fs/fadvise: introduce FADV_DEACTIVATE flag
    
    FADV_DEACTIVATE advises kernel to move file pages from active to
    inactive list.
    
    This allows Chunk Servers (CS) to mark particular page cache parts to be
    reclaimed in the first turn.
    
    https://jira.sw.ru/browse/PSBM-57915
    
    Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
---
 include/uapi/linux/fadvise.h |  1 +
 mm/fadvise.c                 | 43 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+)

Patch hide | download patch | download mbox

diff --git a/include/uapi/linux/fadvise.h b/include/uapi/linux/fadvise.h
index a3e0703..b6ade7e 100644
--- a/include/uapi/linux/fadvise.h
+++ b/include/uapi/linux/fadvise.h
@@ -17,6 +17,7 @@ 
 #define POSIX_FADV_DONTNEED	4 /* Don't need these pages.  */
 #define POSIX_FADV_NOREUSE	5 /* Data will be accessed once.  */
 #endif
+#define FADV_DEACTIVATE		32 /* Mark pages as good candidates for reclaim */
 
 #ifdef __KERNEL__
 extern int generic_fadvise(struct file* file, loff_t off, loff_t len, int adv);
diff --git a/mm/fadvise.c b/mm/fadvise.c
index 0b25007..50beef3 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c
@@ -22,6 +22,43 @@ 
 
 #include <asm/unistd.h>
 
+static void fadvise_deactivate(struct address_space *mapping,
+		pgoff_t start, pgoff_t end)
+{
+	struct pagevec pvec;
+	pgoff_t index = start;
+	int i;
+
+	if (start > end)
+		return;
+
+	/*
+	 * Note: this function may get called on a shmem/tmpfs mapping:
+	 * pagevec_lookup() might then return 0 prematurely (because it
+	 * got a gangful of swap entries); but it's hardly worth worrying
+	 * about - it can rarely have anything to free from such a mapping
+	 * (most pages are dirty), and already skips over any difficulties.
+	 */
+
+	pagevec_init(&pvec, 0);
+	while (index <= end && pagevec_lookup(&pvec, mapping, index,
+			min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
+		for (i = 0; i < pagevec_count(&pvec); i++) {
+			struct page *page = pvec.pages[i];
+
+			/* We rely upon deletion not changing page->index */
+			index = page->index;
+			if (index > end)
+				break;
+
+			deactivate_page(page);
+		}
+		pagevec_release(&pvec);
+		cond_resched();
+		index++;
+	}
+}
+
 /*
  * POSIX_FADV_WILLNEED could set PG_Referenced, and POSIX_FADV_NOREUSE could
  * deactivate the pages and clear PG_Referenced.
@@ -47,6 +84,7 @@  int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
 		case POSIX_FADV_WILLNEED:
 		case POSIX_FADV_NOREUSE:
 		case POSIX_FADV_DONTNEED:
+		case FADV_DEACTIVATE:
 			/* no bad return value, but ignore advice */
 			break;
 		default:
@@ -127,6 +165,11 @@  int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
 			}
 		}
 		break;
+	case FADV_DEACTIVATE:
+		start_index = (offset+(PAGE_CACHE_SIZE-1)) >> PAGE_CACHE_SHIFT;
+		end_index = (endbyte >> PAGE_CACHE_SHIFT);
+		fadvise_deactivate(mapping, start_index, end_index);
+		break;
 	default:
 		ret = -EINVAL;
 	}