[Devel] x86/vdso: Add 64-bit vdso map API

Submitted by Stanislav Kinsburskiy on July 12, 2017, 9:30 a.m.

Details

Message ID 4e0653fa-70b5-4064-9302-18bb71ecda31@email.android.com
State New
Series "x86/vdso: Add 64-bit vdso map API"
Headers show

Commit Message

Stanislav Kinsburskiy July 12, 2017, 9:30 a.m.
I'm sorry, but this patch can and has to be split into series of patches.

11 июля 2017 г. 20:48 пользователь Dmitry Safonov <dsafonov@virtuozzo.com> написал:
Mainstream already has arch_prctl(MAP_VDSO_64), but this was
ommited for simplicity and we only have arch_prctl(MAP_VDSO_32).
This was not a problem as previously we needed MAP_VDSO_32 only
for ia32 applications C/R.

But as we've made vdso pages to be per-UTS-ns, pages differ between
host and uts-ns. As CRIU restore starts from init-ns, vdso that's
being preserved into restored application belongs to host and
thou has host's ve_time_spec.

Using this API we can map vdso in restored CT and it'll belong
to uts-ns of CT.

https://jira.sw.ru/browse/PSBM-67017

Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Signed-off-by: Dmitry Safonov <dsafonov@virtuozzo.com>

---
 arch/x86/include/asm/elf.h   |  6 +++++-
 arch/x86/kernel/process_64.c | 14 ++++++-------
 arch/x86/vdso/vdso32-setup.c | 12 +----------
 arch/x86/vdso/vma.c          | 48 ++++++++++++++++++++++++++++++++++++--------
 4 files changed, 53 insertions(+), 27 deletions(-)

--
2.13.1

_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Patch hide | download patch | download mbox

diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 8b0f63910b06..920690b3a5d5 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -353,7 +353,11 @@  extern int syscall32_setup_pages(struct linux_binprm *, int exstack);
 #define compat_arch_setup_additional_pages      syscall32_setup_pages

 #ifdef CONFIG_X86_64
-extern int do_map_compat_vdso(unsigned long addr);
+extern bool vdso_or_vvar_present(struct mm_struct *mm);
+extern int do_map_vdso_64(unsigned long addr);
+# ifdef CONFIG_COMPAT
+extern int do_map_vdso_32(unsigned long addr);
+# endif
 #endif

 extern unsigned long arch_randomize_brk(struct mm_struct *mm);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index d2e444cb7209..252f9f0ecc0f 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -580,16 +580,16 @@  long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
         }

 #ifdef CONFIG_CHECKPOINT_RESTORE
+# ifdef CONFIG_COMPAT
         case ARCH_MAP_VDSO_32:
-               return do_map_compat_vdso(addr);
+               return do_map_vdso_32(addr);
+# endif

-       /*
-        * x32 and 64 vDSO remap API is omitted for simplicity.
-        * We do need 32-bit vDSO blob mapping for compatible
-        * applications Restore, but not x32/64 (at least, for now).
-        */
-       case ARCH_MAP_VDSO_X32:
         case ARCH_MAP_VDSO_64:
+               return do_map_vdso_64(addr);
+
+       /* x32 vDSO remap API is omitted for simplicity. */
+       case ARCH_MAP_VDSO_X32:
 #endif

         default:
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index 81a16c803f11..30b99959daed 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -512,17 +512,7 @@  up_fail:

 #ifdef CONFIG_X86_64

-static bool vdso_or_vvar_present(struct mm_struct *mm)
-{
-       struct vm_area_struct *vma;
-
-       for (vma = mm->mmap; vma; vma = vma->vm_next)
-               if (vma_is_vdso_or_vvar(vma, mm))
-                       return true;
-       return false;
-}
-
-int do_map_compat_vdso(unsigned long req_addr)
+int do_map_vdso_32(unsigned long req_addr)
 {
         struct mm_struct *mm = current->mm;
         unsigned long vdso_addr;
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index ad0e0ac14f83..accca8edc62b 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -171,28 +171,52 @@  static unsigned long vdso_addr(unsigned long start, unsigned len)
         return addr;
 }

+bool vdso_or_vvar_present(struct mm_struct *mm)
+{
+       struct vm_area_struct *vma;
+
+       for (vma = mm->mmap; vma; vma = vma->vm_next)
+               if (vma_is_vdso_or_vvar(vma, mm))
+                       return true;
+       return false;
+}
+
 /* Setup a VMA at program startup for the vsyscall page.
    Not called for compat tasks */
 static int setup_additional_pages(struct linux_binprm *bprm,
                                   int uses_interp,
                                   struct page **pages,
-                                 unsigned size)
+                                 unsigned size,
+                                 unsigned long req_addr)
 {
         struct mm_struct *mm = current->mm;
-       unsigned long addr;
+       unsigned long addr = req_addr;
         int ret;

         if (!vdso_enabled)
                 return 0;

         down_write(&mm->mmap_sem);
-       addr = vdso_addr(mm->start_stack, size);
+
+       if (vdso_or_vvar_present(mm)) {
+               ret = -EEXIST;
+               goto up_fail;
+       }
+
+       if (!req_addr)
+               addr = vdso_addr(mm->start_stack, size);
+
         addr = get_unmapped_area(NULL, addr, size, 0, 0);
         if (IS_ERR_VALUE(addr)) {
                 ret = addr;
                 goto up_fail;
         }

+       if (req_addr && req_addr != addr) {
+               ret = -EFAULT;
+               goto up_fail;
+       }
+
         current->mm->context.vdso = (void *)addr;

         ret = install_special_mapping(mm, addr, size,
@@ -211,7 +235,8 @@  up_fail:

 static DEFINE_MUTEX(vdso_mutex);

-static int uts_arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+static int uts_arch_setup_additional_pages(struct linux_binprm *bprm,
+               int uses_interp, unsigned long addr)
 {
         struct uts_namespace *uts_ns = current->nsproxy->uts_ns;
         struct ve_struct *ve = get_exec_env();
@@ -303,9 +328,11 @@  static int uts_arch_setup_additional_pages(struct linux_binprm *bprm, int uses_i
                  LINUX_VERSION_CODE, new_version, ve->veid);

 map_uts:
-       return setup_additional_pages(bprm, uses_interp, uts_ns->vdso.pages, uts_ns->vdso.size);
+       return setup_additional_pages(bprm, uses_interp, uts_ns->vdso.pages,
+               uts_ns->vdso.size, addr);
 map_init_uts:
-       return setup_additional_pages(bprm, uses_interp, init_uts_ns.vdso.pages, init_uts_ns.vdso.size);
+       return setup_additional_pages(bprm, uses_interp, init_uts_ns.vdso.pages,
+               init_uts_ns.vdso.size, addr);
 out_unlock:
         mutex_unlock(&vdso_mutex);
         return -ENOMEM;
@@ -313,14 +340,19 @@  out_unlock:

 int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 {
-       return uts_arch_setup_additional_pages(bprm, uses_interp);
+       return uts_arch_setup_additional_pages(bprm, uses_interp, 0);
+}
+
+int do_map_vdso_64(unsigned long req_addr)
+{
+       return uts_arch_setup_additional_pages(0, 0, req_addr);
 }

 #ifdef CONFIG_X86_X32_ABI
 int x32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 {
         return setup_additional_pages(bprm, uses_interp, vdsox32_pages,
-                                     vdsox32_size);
+                                     vdsox32_size, 0);
 }
 #endif


Comments

Konstantin Khorenko July 12, 2017, 9:38 a.m.
On 07/12/2017 12:30 PM, Stanislav Kinsburskiy wrote:
> I'm sorry, but this patch can and has to be split into series of patches.

Dima, please do.

i've applied the patch as is for now without waiting for review and split - in order to get a build with NFS client migration faster.
Will revert and apply splitted and reviewed series when available.


> 11 июля 2017 г. 20:48 пользователь Dmitry Safonov <dsafonov@virtuozzo.com> написал:
>
>     Mainstream already has arch_prctl(MAP_VDSO_64), but this was
>     ommited for simplicity and we only have arch_prctl(MAP_VDSO_32).
>     This was not a problem as previously we needed MAP_VDSO_32 only
>     for ia32 applications C/R.
>
>     But as we've made vdso pages to be per-UTS-ns, pages differ between
>     host and uts-ns. As CRIU restore starts from init-ns, vdso that's
>     being preserved into restored application belongs to host and
>     thou has host's ve_time_spec.
>
>     Using this API we can map vdso in restored CT and it'll belong
>     to uts-ns of CT.
>
>     https://jira.sw.ru/browse/PSBM-67017
>
>     Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
>     Signed-off-by: Dmitry Safonov <dsafonov@virtuozzo.com>
>     ---
>      arch/x86/include/asm/elf.h   |  6 +++++-
>      arch/x86/kernel/process_64.c | 14 ++++++-------
>      arch/x86/vdso/vdso32-setup.c | 12 +----------
>      arch/x86/vdso/vma.c          | 48 ++++++++++++++++++++++++++++++++++++--------
>      4 files changed, 53 insertions(+), 27 deletions(-)
>
>     diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
>     index 8b0f63910b06..920690b3a5d5 100644
>     --- a/arch/x86/include/asm/elf.h
>     +++ b/arch/x86/include/asm/elf.h
>     @@ -353,7 +353,11 @@ extern int syscall32_setup_pages(struct linux_binprm *, int exstack);
>      #define compat_arch_setup_additional_pages      syscall32_setup_pages
>
>      #ifdef CONFIG_X86_64
>     -extern int do_map_compat_vdso(unsigned long addr);
>     +extern bool vdso_or_vvar_present(struct mm_struct *mm);
>     +extern int do_map_vdso_64(unsigned long addr);
>     +# ifdef CONFIG_COMPAT
>     +extern int do_map_vdso_32(unsigned long addr);
>     +# endif
>      #endif
>
>      extern unsigned long arch_randomize_brk(struct mm_struct *mm);
>     diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
>     index d2e444cb7209..252f9f0ecc0f 100644
>     --- a/arch/x86/kernel/process_64.c
>     +++ b/arch/x86/kernel/process_64.c
>     @@ -580,16 +580,16 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
>              }
>
>      #ifdef CONFIG_CHECKPOINT_RESTORE
>     +# ifdef CONFIG_COMPAT
>              case ARCH_MAP_VDSO_32:
>     -               return do_map_compat_vdso(addr);
>     +               return do_map_vdso_32(addr);
>     +# endif
>
>     -       /*
>     -        * x32 and 64 vDSO remap API is omitted for simplicity.
>     -        * We do need 32-bit vDSO blob mapping for compatible
>     -        * applications Restore, but not x32/64 (at least, for now).
>     -        */
>     -       case ARCH_MAP_VDSO_X32:
>              case ARCH_MAP_VDSO_64:
>     +               return do_map_vdso_64(addr);
>     +
>     +       /* x32 vDSO remap API is omitted for simplicity. */
>     +       case ARCH_MAP_VDSO_X32:
>      #endif
>
>              default:
>     diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
>     index 81a16c803f11..30b99959daed 100644
>     --- a/arch/x86/vdso/vdso32-setup.c
>     +++ b/arch/x86/vdso/vdso32-setup.c
>     @@ -512,17 +512,7 @@ up_fail:
>
>      #ifdef CONFIG_X86_64
>
>     -static bool vdso_or_vvar_present(struct mm_struct *mm)
>     -{
>     -       struct vm_area_struct *vma;
>     -
>     -       for (vma = mm->mmap; vma; vma = vma->vm_next)
>     -               if (vma_is_vdso_or_vvar(vma, mm))
>     -                       return true;
>     -       return false;
>     -}
>     -
>     -int do_map_compat_vdso(unsigned long req_addr)
>     +int do_map_vdso_32(unsigned long req_addr)
>      {
>              struct mm_struct *mm = current->mm;
>              unsigned long vdso_addr;
>     diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
>     index ad0e0ac14f83..accca8edc62b 100644
>     --- a/arch/x86/vdso/vma.c
>     +++ b/arch/x86/vdso/vma.c
>     @@ -171,28 +171,52 @@ static unsigned long vdso_addr(unsigned long start, unsigned len)
>              return addr;
>      }
>
>     +bool vdso_or_vvar_present(struct mm_struct *mm)
>     +{
>     +       struct vm_area_struct *vma;
>     +
>     +       for (vma = mm->mmap; vma; vma = vma->vm_next)
>     +               if (vma_is_vdso_or_vvar(vma, mm))
>     +                       return true;
>     +       return false;
>     +}
>     +
>      /* Setup a VMA at program startup for the vsyscall page.
>         Not called for compat tasks */
>      static int setup_additional_pages(struct linux_binprm *bprm,
>                                        int uses_interp,
>                                        struct page **pages,
>     -                                 unsigned size)
>     +                                 unsigned size,
>     +                                 unsigned long req_addr)
>      {
>              struct mm_struct *mm = current->mm;
>     -       unsigned long addr;
>     +       unsigned long addr = req_addr;
>              int ret;
>
>              if (!vdso_enabled)
>                      return 0;
>
>              down_write(&mm->mmap_sem);
>     -       addr = vdso_addr(mm->start_stack, size);
>     +
>     +       if (vdso_or_vvar_present(mm)) {
>     +               ret = -EEXIST;
>     +               goto up_fail;
>     +       }
>     +
>     +       if (!req_addr)
>     +               addr = vdso_addr(mm->start_stack, size);
>     +
>              addr = get_unmapped_area(NULL, addr, size, 0, 0);
>              if (IS_ERR_VALUE(addr)) {
>                      ret = addr;
>                      goto up_fail;
>              }
>
>     +       if (req_addr && req_addr != addr) {
>     +               ret = -EFAULT;
>     +               goto up_fail;
>     +       }
>     +
>              current->mm->context.vdso = (void *)addr;
>
>              ret = install_special_mapping(mm, addr, size,
>     @@ -211,7 +235,8 @@ up_fail:
>
>      static DEFINE_MUTEX(vdso_mutex);
>
>     -static int uts_arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
>     +static int uts_arch_setup_additional_pages(struct linux_binprm *bprm,
>     +               int uses_interp, unsigned long addr)
>      {
>              struct uts_namespace *uts_ns = current->nsproxy->uts_ns;
>              struct ve_struct *ve = get_exec_env();
>     @@ -303,9 +328,11 @@ static int uts_arch_setup_additional_pages(struct linux_binprm *bprm, int uses_i
>                       LINUX_VERSION_CODE, new_version, ve->veid);
>
>      map_uts:
>     -       return setup_additional_pages(bprm, uses_interp, uts_ns->vdso.pages, uts_ns->vdso.size);
>     +       return setup_additional_pages(bprm, uses_interp, uts_ns->vdso.pages,
>     +               uts_ns->vdso.size, addr);
>      map_init_uts:
>     -       return setup_additional_pages(bprm, uses_interp, init_uts_ns.vdso.pages, init_uts_ns.vdso.size);
>     +       return setup_additional_pages(bprm, uses_interp, init_uts_ns.vdso.pages,
>     +               init_uts_ns.vdso.size, addr);
>      out_unlock:
>              mutex_unlock(&vdso_mutex);
>              return -ENOMEM;
>     @@ -313,14 +340,19 @@ out_unlock:
>
>      int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
>      {
>     -       return uts_arch_setup_additional_pages(bprm, uses_interp);
>     +       return uts_arch_setup_additional_pages(bprm, uses_interp, 0);
>     +}
>     +
>     +int do_map_vdso_64(unsigned long req_addr)
>     +{
>     +       return uts_arch_setup_additional_pages(0, 0, req_addr);
>      }
>
>      #ifdef CONFIG_X86_X32_ABI
>      int x32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
>      {
>              return setup_additional_pages(bprm, uses_interp, vdsox32_pages,
>     -                                     vdsox32_size);
>     +                                     vdsox32_size, 0);
>      }
>      #endif
>
>     --
>     2.13.1
>
>     _______________________________________________
>     Devel mailing list
>     Devel@openvz.org
>     https://lists.openvz.org/mailman/listinfo/devel
>
>
>
>
> _______________________________________________
> Devel mailing list
> Devel@openvz.org
> https://lists.openvz.org/mailman/listinfo/devel
>