[RHEL7,COMMIT] ms/huge pagecache: extend mremap pmd rmap lockout to files

Submitted by Konstantin Khorenko on Jan. 10, 2019, 11 a.m.


Message ID 201901101100.x0AB0QeG005464@finist-ce7.sw.ru
State New
Series "ms/huge pagecache: extend mremap pmd rmap lockout to files"
Headers show

Commit Message

Konstantin Khorenko Jan. 10, 2019, 11 a.m.
The commit is pushed to "branch-rh7-3.10.0-957.1.3.vz7.83.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-957.1.3.vz7.83.4
commit 4fd2d9451ae7d60ce3f7c3d39484b143d428702e
Author: Hugh Dickins <hughd@google.com>
Date:   Thu May 19 17:12:57 2016 -0700

    ms/huge pagecache: extend mremap pmd rmap lockout to files
    Whatever huge pagecache implementation we go with, file rmap locking
    must be added to anon rmap locking, when mremap's move_page_tables()
    finds a pmd_trans_huge pmd entry: a simple change, let's do it now.
    Factor out take_rmap_locks() and drop_rmap_locks() to handle the locking
    for make move_ptes() and move_page_tables(), and delete the
    VM_BUG_ON_VMA which rejected vm_file and required anon_vma.
    Signed-off-by: Hugh Dickins <hughd@google.com>
    Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
    Cc: Andrea Arcangeli <aarcange@redhat.com>
    Cc: Andres Lagar-Cavilla <andreslc@google.com>
    Cc: Yang Shi <yang.shi@linaro.org>
    Cc: Ning Qu <quning@gmail.com>
    Cc: Mel Gorman <mgorman@techsingularity.net>
    Cc: Andres Lagar-Cavilla <andreslc@google.com>
    Cc: Konstantin Khlebnikov <koct9i@gmail.com>
    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
    (cherry picked from commit 1d069b7dd56728a0eb6acb138dce0d37600dee00)
    Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
    ms commit 1d069b7dd567
    Porting w/o removing VM_BUG_ON_VMA() check -- we need it,
    since hugepages are for anon mapping only in our kernel.
    Patchset description:
    Patch "mm: speed up mremap by 20x on large regions" introduces
    optimization: when a moved region has source and destination addresses
    and size equal to multiple of PMD_SIZE; PTEs are not really copyed.
    Instead of this, new PMD pointer is changed to point to old PTEs, while
    old PMD is cleared.
    This may be useful, when CRIU remaps large memory areas on restore (but
    really, alignment to PMD_SIZE is not very often, though possible).
 mm/mremap.c | 40 ++++++++++++++++++++++------------------
 1 file changed, 22 insertions(+), 18 deletions(-)

Patch hide | download patch | download mbox

diff --git a/mm/mremap.c b/mm/mremap.c
index 4e705fe3c747..9e65d144853f 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -74,6 +74,22 @@  static pmd_t *alloc_new_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
 	return pmd;
+static void take_rmap_locks(struct vm_area_struct *vma)
+	if (vma->vm_file)
+		mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex);
+	if (vma->anon_vma)
+		anon_vma_lock_write(vma->anon_vma);
+static void drop_rmap_locks(struct vm_area_struct *vma)
+	if (vma->anon_vma)
+		anon_vma_unlock_write(vma->anon_vma);
+	if (vma->vm_file)
+		mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex);
 static pte_t move_soft_dirty_pte(pte_t pte)
@@ -94,8 +110,6 @@  static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 		struct vm_area_struct *new_vma, pmd_t *new_pmd,
 		unsigned long new_addr, bool need_rmap_locks, bool *need_flush)
-	struct address_space *mapping = NULL;
-	struct anon_vma *anon_vma = NULL;
 	struct mm_struct *mm = vma->vm_mm;
 	pte_t *old_pte, *new_pte, pte;
 	spinlock_t *old_ptl, *new_ptl;
@@ -120,16 +134,8 @@  static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 	 *   serialize access to individual ptes, but only rmap traversal
 	 *   order guarantees that we won't miss both the old and new ptes).
-	if (need_rmap_locks) {
-		if (vma->vm_file) {
-			mapping = vma->vm_file->f_mapping;
-			mutex_lock(&mapping->i_mmap_mutex);
-		}
-		if (vma->anon_vma) {
-			anon_vma = vma->anon_vma;
-			anon_vma_lock_write(anon_vma);
-		}
-	}
+	if (need_rmap_locks)
+		take_rmap_locks(vma);
 	 * We don't have to worry about the ordering of src and dst
@@ -172,10 +178,8 @@  static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 		*need_flush = true;
 	pte_unmap_unlock(old_pte - 1, old_ptl);
-	if (anon_vma)
-		anon_vma_unlock_write(anon_vma);
-	if (mapping)
-		mutex_unlock(&mapping->i_mmap_mutex);
+	if (need_rmap_locks)
+		drop_rmap_locks(vma);
@@ -217,13 +221,13 @@  unsigned long move_page_tables(struct vm_area_struct *vma,
 				VM_BUG_ON(vma->vm_file || !vma->anon_vma);
 				/* See comment in move_ptes() */
 				if (need_rmap_locks)
-					anon_vma_lock_write(vma->anon_vma);
+					take_rmap_locks(vma);
 				err = move_huge_pmd(vma, new_vma, old_addr,
 						    new_addr, old_end,
 						    old_pmd, new_pmd,
 				if (need_rmap_locks)
-					anon_vma_unlock_write(vma->anon_vma);
+					drop_rmap_locks(vma);
 			if (err > 0)