[RHEL8,COMMIT] ve, x86_64: add per-ve vdso mapping.

Submitted by Konstantin Khorenko on Oct. 29, 2020, 11:17 a.m.

Details

Message ID 202010291117.09TBH9Fv1545592@finist-co8.sw.ru
State New
Series "Series without cover letter"
Headers show

Commit Message

Konstantin Khorenko Oct. 29, 2020, 11:17 a.m.
The commit is pushed to "branch-rh8-4.18.0-193.6.3.vz8.4.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh8-4.18.0-193.6.3.vz8.4.14
------>
commit cc46e73a3bf96224128be5f6f1001a5ad75df658
Author: Andrey Ryabinin <aryabinin@virtuozzo.com>
Date:   Thu Oct 29 14:17:09 2020 +0300

    ve, x86_64: add per-ve vdso mapping.
    
    Make vdso mapping per-ve. This will allow per container modification
    of the linux version in .note section of vdso and monotonic time.
    
    https://jira.sw.ru/browse/PSBM-121668
    Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
---
 arch/x86/entry/vdso/vma.c    |  3 ++-
 arch/x86/kernel/process_64.c |  2 +-
 include/linux/ve.h           |  2 ++
 kernel/ve/ve.c               | 43 +++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 48 insertions(+), 2 deletions(-)

Patch hide | download patch | download mbox

diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index eb3d85f87884..c48deffc1473 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -291,7 +291,8 @@  int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 	if (!vdso64_enabled)
 		return 0;
 
-	return map_vdso_randomized(&vdso_image_64);
+
+	return map_vdso_randomized(get_exec_env()->vdso_64);
 }
 
 #ifdef CONFIG_COMPAT
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index c1c8d66cbe70..a010d4b9d126 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -689,7 +689,7 @@  long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
 		return prctl_map_vdso(&vdso_image_32, arg2);
 # endif
 	case ARCH_MAP_VDSO_64:
-		return prctl_map_vdso(&vdso_image_64, arg2);
+		return prctl_map_vdso(get_exec_env()->vdso_64, arg2);
 #endif
 
 	default:
diff --git a/include/linux/ve.h b/include/linux/ve.h
index 0341bb915923..1c37b81321df 100644
--- a/include/linux/ve.h
+++ b/include/linux/ve.h
@@ -15,6 +15,7 @@ 
 #include <linux/kmapset.h>
 #include <linux/kthread.h>
 #include <linux/binfmts.h>
+#include <asm/vdso.h>
 
 struct nsproxy;
 struct veip_struct;
@@ -93,6 +94,7 @@  struct ve_struct {
 #ifdef CONFIG_CONNECTOR
 	struct cn_private	*cn;
 #endif
+	struct vdso_image	*vdso_64;
 };
 
 #define VE_MEMINFO_DEFAULT	1	/* default behaviour */
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index 193fdb95daab..6cc31fae5b1d 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -57,6 +57,7 @@  struct ve_struct ve0 = {
 	.netns_avail_nr		= ATOMIC_INIT(INT_MAX),
 	.netns_max_nr		= INT_MAX,
 	.meminfo_val		= VE_MEMINFO_SYSTEM,
+	.vdso_64		= (struct vdso_image*)&vdso_image_64,
 };
 EXPORT_SYMBOL(ve0);
 
@@ -539,6 +540,33 @@  static __u64 ve_setup_iptables_mask(__u64 init_mask)
 }
 #endif
 
+static int copy_vdso(struct ve_struct *ve)
+{
+	const struct vdso_image *vdso_src = &vdso_image_64;
+	struct vdso_image *vdso;
+	void *vdso_data;
+
+	if (ve->vdso_64)
+		return 0;
+
+	vdso = kmemdup(vdso_src, sizeof(*vdso), GFP_KERNEL);
+	if (!vdso)
+		return -ENOMEM;
+
+	vdso_data = kmalloc(vdso_src->size, GFP_KERNEL);
+	if (!vdso_data) {
+		kfree(vdso);
+		return -ENOMEM;
+	}
+
+	memcpy(vdso_data, vdso_src->data, vdso_src->size);
+
+	vdso->data = vdso_data;
+
+	ve->vdso_64 = vdso;
+	return 0;
+}
+
 static struct cgroup_subsys_state *ve_create(struct cgroup_subsys_state *parent_css)
 {
 	struct ve_struct *ve = &ve0;
@@ -564,6 +592,9 @@  static struct cgroup_subsys_state *ve_create(struct cgroup_subsys_state *parent_
 	if (err)
 		goto err_log;
 
+	if (copy_vdso(ve))
+		goto err_vdso;
+
 	ve->features = VE_FEATURES_DEF;
 	ve->_randomize_va_space = ve0._randomize_va_space;
 
@@ -587,6 +618,8 @@  static struct cgroup_subsys_state *ve_create(struct cgroup_subsys_state *parent_
 
 	return &ve->css;
 
+err_vdso:
+	ve_log_destroy(ve);
 err_log:
 	free_percpu(ve->sched_lat_ve.cur);
 err_lat:
@@ -625,12 +658,22 @@  static void ve_offline(struct cgroup_subsys_state *css)
 	ve->ve_name = NULL;
 }
 
+static void ve_free_vdso(struct ve_struct *ve)
+{
+	if (ve->vdso_64 == &vdso_image_64)
+		return;
+
+	kfree(ve->vdso_64->data);
+	kfree(ve->vdso_64);
+}
+
 static void ve_destroy(struct cgroup_subsys_state *css)
 {
 	struct ve_struct *ve = css_to_ve(css);
 
 	kmapset_unlink(&ve->sysfs_perms_key, &sysfs_ve_perms_set);
 	ve_log_destroy(ve);
+	ve_free_vdso(ve);
 #if IS_ENABLED(CONFIG_BINFMT_MISC)
 	kfree(ve->binfmt_misc);
 #endif