[RHEL8,COMMIT] khugepaged: skip collapse if uffd-wp detected

Submitted by Konstantin Khorenko on April 20, 2020, 7:34 a.m.

Details

Message ID 202004200734.03K7Yh35016866@finist_co8.work.ct
State New
Series "Series without cover letter"
Headers show

Commit Message

Konstantin Khorenko April 20, 2020, 7:34 a.m.
The commit is pushed to "branch-rh8-4.18.0-80.1.2.vz8.3.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh8-4.18.0-80.1.2.vz8.3.6
------>
commit 2078ade991b2b244064b681f6be8a72cf3dfcc38
Author: Peter Xu <peterx@redhat.com>
Date:   Mon Apr 20 10:34:42 2020 +0300

    khugepaged: skip collapse if uffd-wp detected
    
    Don't collapse the huge PMD if there is any userfault write protected
    small PTEs.  The problem is that the write protection is in small page
    granularity and there's no way to keep all these write protection
    information if the small pages are going to be merged into a huge PMD.
    
    The same thing needs to be considered for swap entries and migration
    entries.  So do the check as well disregarding khugepaged_max_ptes_swap.
    
    Signed-off-by: Peter Xu <peterx@redhat.com>
    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
    Reviewed-by: Jerome Glisse <jglisse@redhat.com>
    Reviewed-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
    Cc: Andrea Arcangeli <aarcange@redhat.com>
    Cc: Bobby Powers <bobbypowers@gmail.com>
    Cc: Brian Geffon <bgeffon@google.com>
    Cc: David Hildenbrand <david@redhat.com>
    Cc: Denis Plotnikov <dplotnikov@virtuozzo.com>
    Cc: "Dr . David Alan Gilbert" <dgilbert@redhat.com>
    Cc: Hugh Dickins <hughd@google.com>
    Cc: Johannes Weiner <hannes@cmpxchg.org>
    Cc: "Kirill A . Shutemov" <kirill@shutemov.name>
    Cc: Martin Cracauer <cracauer@cons.org>
    Cc: Marty McFadden <mcfadden8@llnl.gov>
    Cc: Maya Gokhale <gokhale2@llnl.gov>
    Cc: Mel Gorman <mgorman@suse.de>
    Cc: Mike Kravetz <mike.kravetz@oracle.com>
    Cc: Pavel Emelyanov <xemul@openvz.org>
    Cc: Rik van Riel <riel@redhat.com>
    Cc: Shaohua Li <shli@fb.com>
    Link: http://lkml.kernel.org/r/20200220163112.11409-12-peterx@redhat.com
    Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
    
    https://jira.sw.ru/browse/PSBM-102938
    (cherry picked from commit e1e267c7928fe387e5e1cffeafb0de2d0473663a)
    Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
---
 include/trace/events/huge_memory.h |  1 +
 mm/khugepaged.c                    | 23 +++++++++++++++++++++++
 2 files changed, 24 insertions(+)

Patch hide | download patch | download mbox

diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h
index dd4db334bd63..2d7bad9cb976 100644
--- a/include/trace/events/huge_memory.h
+++ b/include/trace/events/huge_memory.h
@@ -13,6 +13,7 @@ 
 	EM( SCAN_PMD_NULL,		"pmd_null")			\
 	EM( SCAN_EXCEED_NONE_PTE,	"exceed_none_pte")		\
 	EM( SCAN_PTE_NON_PRESENT,	"pte_non_present")		\
+	EM( SCAN_PTE_UFFD_WP,		"pte_uffd_wp")			\
 	EM( SCAN_PAGE_RO,		"no_writable_page")		\
 	EM( SCAN_LACK_REFERENCED_PAGE,	"lack_referenced_page")		\
 	EM( SCAN_PAGE_NULL,		"page_null")			\
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index dff5c75d5310..66616e73bb00 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -29,6 +29,7 @@  enum scan_result {
 	SCAN_PMD_NULL,
 	SCAN_EXCEED_NONE_PTE,
 	SCAN_PTE_NON_PRESENT,
+	SCAN_PTE_UFFD_WP,
 	SCAN_PAGE_RO,
 	SCAN_LACK_REFERENCED_PAGE,
 	SCAN_PAGE_NULL,
@@ -1124,6 +1125,15 @@  static int khugepaged_scan_pmd(struct mm_struct *mm,
 		pte_t pteval = *_pte;
 		if (is_swap_pte(pteval)) {
 			if (++unmapped <= khugepaged_max_ptes_swap) {
+				/*
+				 * Always be strict with uffd-wp
+				 * enabled swap entries.  Please see
+				 * comment below for pte_uffd_wp().
+				 */
+				if (pte_swp_uffd_wp(pteval)) {
+					result = SCAN_PTE_UFFD_WP;
+					goto out_unmap;
+				}
 				continue;
 			} else {
 				result = SCAN_EXCEED_SWAP_PTE;
@@ -1143,6 +1153,19 @@  static int khugepaged_scan_pmd(struct mm_struct *mm,
 			result = SCAN_PTE_NON_PRESENT;
 			goto out_unmap;
 		}
+		if (pte_uffd_wp(pteval)) {
+			/*
+			 * Don't collapse the page if any of the small
+			 * PTEs are armed with uffd write protection.
+			 * Here we can also mark the new huge pmd as
+			 * write protected if any of the small ones is
+			 * marked but that could bring uknown
+			 * userfault messages that falls outside of
+			 * the registered range.  So, just be simple.
+			 */
+			result = SCAN_PTE_UFFD_WP;
+			goto out_unmap;
+		}
 		if (pte_write(pteval))
 			writable = true;