[Devel,RHEL7,COMMIT] x86/vdso/prctl: fail if vdso present in ARCH_MAP_VDSO_32

Submitted by Konstantin Khorenko on May 31, 2017, 11:59 a.m.

Details

Message ID 201705311159.v4VBxoNU016463@finist_cl7.x64_64.work.ct
State New
Series "x86: C/R for ia32 tasks"
Headers show

Commit Message

Konstantin Khorenko May 31, 2017, 11:59 a.m.
The commit is pushed to "branch-rh7-3.10.0-514.16.1.vz7.32.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-514.16.1.vz7.32.5
------>
commit 5d77ed0bc0b800682a1afca94c1a982f87457af7
Author: Dmitry Safonov <dsafonov@virtuozzo.com>
Date:   Wed May 31 15:59:49 2017 +0400

    x86/vdso/prctl: fail if vdso present in ARCH_MAP_VDSO_32
    
    Patchset description:
    x86: C/R for ia32 tasks
    
    Here are:
    - Adjustments to ms API for arch_prctl(ARCH_MAP_VDSO_32).
    - Rework of my ms kernel patch for mremapping vdso to vzkernel
      (vdso part was rewritten in ~v3.16 so here is a bycicle for older kernel)
    - Fix for mmap() bug which resulted in returning pointer over 4GB for
      ia32 syscalls (the largest part of the set).
    
    With those patches + 1 additional patch to vz-criu (u5 branch),
    it's possible to C/R ia32 CTs.
    I've sent the patch to criu-ml, it's trivial and affects only ia32 C/R,
    so soon I'll apply it to the vz7-u5 branch.
    
    There are two issues known to me:
    - zdtm autofs test (it hangs, so no C/R was tested).
    - vsyscall page appears on all 32-bit tasks after C/R.
      It's emulated, placed over 4Gb, so the only side-effect is
      for applications, reading /proc/.../maps and those are not
      ready to meet such mapping (such applications not known to me).
    
    I've tested C/R on centos-6-x86 ia32 container with vzctl,
    all looks fine to me.
    
    Dmitry Safonov (9):
      x86/vdso/prctl: fail if vdso present in ARCH_MAP_VDSO_32
      x86/mm: Support mremap()'ing vdso vma
      x86/vdso/prctl: Return size of vdso blob for ARCH_MAP_VDSO_32
      ms/x86/mm: Introduce arch_rnd() to compute 32/64 mmap random base
      ms/x86/mm: Add task_size parameter to mmap_base()
      ms/x86/mm: Introduce mmap_compat_base() for 32-bit mmap()
      ms/x86/mm: Make mmap(MAP_32BIT) work correctly
      ms/x86/hugetlb: Adjust to the new native/compat mmap bases
      x86/mm: Make in_compat_syscall() work during exec
    
    Kees Cook (2):
      ms/x86: standardize mmap_rnd() usage
      ms/mm: expose arch_mmap_rnd when available
    
    https://jira.sw.ru/browse/PSBM-66485
    
    ================================================
    This patch description:
    
    Vanilla API to map vdso with prctl() checks if vdso/vvar area is
    already present and returns EEXIST in that case.
    
    With the current API previous vdso/vvar blob is unmapped during
    prctl() - but (1) that's less flexible and (2) CRIU calls
    ARCH_MAP_VDSO_32 to check if it can do 32-bit C/R and expects
    EEXIST.
    
    Adjust API to be the same as in ms.
    
    Signed-off-by: Dmitry Safonov <dsafonov@virtuozzo.com>
---
 arch/x86/vdso/vdso32-setup.c | 29 +++++++++++++++--------------
 include/linux/mm.h           |  2 ++
 mm/mmap.c                    | 20 ++++++++++++++++++++
 3 files changed, 37 insertions(+), 14 deletions(-)

Patch hide | download patch | download mbox

diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index 5056d0e..59ec0e2 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -494,11 +494,20 @@  up_fail:
 
 #ifdef CONFIG_X86_64
 
+static bool vdso_or_vvar_present(struct mm_struct *mm)
+{
+	struct vm_area_struct *vma;
+
+	for (vma = mm->mmap; vma; vma = vma->vm_next)
+		if (vma_is_vdso_or_vvar(vma, mm))
+			return true;
+	return false;
+}
+
 int do_map_compat_vdso(unsigned long req_addr)
 {
 	struct mm_struct *mm = current->mm;
 	unsigned long vdso_addr;
-	struct vm_area_struct *vdso_vma;
 	int ret;
 	bool compat;
 
@@ -514,7 +523,11 @@  int do_map_compat_vdso(unsigned long req_addr)
 		goto up_fail;
 	}
 
-	/* Don't wanna copy security checks like security_mmap_addr() */
+	if (vdso_or_vvar_present(mm)) {
+		ret = -EEXIST;
+		goto up_fail;
+	}
+
 	vdso_addr = get_unmapped_area(NULL, req_addr, PAGE_SIZE, 0, 0);
 	if (IS_ERR_VALUE(vdso_addr)) {
 		ret = vdso_addr;
@@ -526,18 +539,6 @@  int do_map_compat_vdso(unsigned long req_addr)
 		goto up_fail;
 	}
 
-	/*
-	 * Firstly, unmap old vdso - as install_special_mapping may not
-	 * do rlimit/cgroup accounting right - get rid of the old one by
-	 * remove_vma().
-	 */
-	vdso_vma = find_vma_intersection(mm, (unsigned long)mm->context.vdso,
-			(unsigned long)mm->context.vdso +
-			PAGE_SIZE*init_uts_ns.vdso.nr_pages);
-	if (vdso_vma)
-		do_munmap(mm, vdso_vma->vm_start,
-			vdso_vma->vm_end - vdso_vma->vm_start);
-
 	ret = __arch_setup_additional_pages(req_addr, compat);
 	if (ret)
 		current->mm->context.vdso = NULL;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a8f5630..c7a1acd 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1848,6 +1848,8 @@  extern struct file *get_mm_exe_file(struct mm_struct *mm);
 extern struct file *get_task_exe_file(struct task_struct *task);
 
 extern int may_expand_vm(struct mm_struct *mm, unsigned long npages);
+extern bool vma_is_vdso_or_vvar(const struct vm_area_struct *vma,
+				   const struct mm_struct *mm);
 extern int install_special_mapping(struct mm_struct *mm,
 				   unsigned long addr, unsigned long len,
 				   unsigned long flags, struct page **pages);
diff --git a/mm/mmap.c b/mm/mmap.c
index 54c188e..33d3244 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -3173,6 +3173,26 @@  out:
 	return ret;
 }
 
+bool vma_is_vdso_or_vvar(const struct vm_area_struct *vma,
+		   const struct mm_struct *mm)
+{
+	/*
+	 * As we have uts name virtualization, we can't tell if area
+	 * is VVAR/VDSO the same way as in mainline: vma->vm_private_data
+	 * is different, allocated in uts_prep_vdso_pages_locked().
+	 * As install_special_mapping() can be used currently only by
+	 * uprobes (besides vdso and vvar), check if special mapping
+	 * is related to uprobes, if not - it's vdso/vvar.
+	 */
+	struct page *xol_page = NULL;
+
+	if (mm->uprobes_state.xol_area)
+		xol_page = mm->uprobes_state.xol_area->page;
+
+	return (vma->vm_ops == &special_mapping_vmops) &&
+		(vma->vm_private_data != xol_page);
+}
+
 static DEFINE_MUTEX(mm_all_locks_mutex);
 
 static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)