[Devel,rh7,1/2] vdso: virtualized monotonic gettime through vdso

Submitted by Andrey Ryabinin on May 24, 2017, 3:21 p.m.

Details

Message ID 20170524152143.28441-1-aryabinin@virtuozzo.com
State New
Series "Series without cover letter"
Headers show

Commit Message

Andrey Ryabinin May 24, 2017, 3:21 p.m.
We already have infrastructure for virtualized vdso, however we use
it only to change LINUX_VERSION_NAME in container. Simply store container's
start time - ve->start_timespec in vdso variable - VDSO64_ve_start_timespec,
and use it in __vdso_clock_gettime() to calculate container's monotonic time.

Make uts_arch_setup_additional_pages()/uts_prep_vdso_pages_locked() to always
setup new vdso, since previous policy to setup vdso only if uts_ns->name.release
wouldn't work for virtualized __vdso_clock_gettime()

https://jira.sw.ru/browse/PSBM-66451
Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
---
 arch/x86/vdso/vclock_gettime.c | 45 +++++++++++++++++++++++++++++++++++++++++-
 arch/x86/vdso/vdso32-setup.c   | 10 ++++------
 arch/x86/vdso/vma.c            |  7 +++----
 3 files changed, 51 insertions(+), 11 deletions(-)

Patch hide | download patch | download mbox

diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index f079e1fd5633..3a5b319984c7 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -27,6 +27,10 @@ 
 
 #define gtod (&VVAR(vsyscall_gtod_data))
 
+struct timespec VDSO64_ve_start_timespec;
+extern struct timespec VDSO32_ve_start_timespec
+	__attribute__((weak, alias("VDSO64_ve_start_timespec")));
+
 notrace static cycle_t vread_tsc(void)
 {
 	cycle_t ret = (cycle_t)rdtsc_ordered();
@@ -175,6 +179,43 @@  notrace static int __always_inline do_realtime(struct timespec *ts)
 	return mode;
 }
 
+notrace static struct timespec *get_ve_timespec(void)
+{
+	struct timespec *ret;
+	asm volatile ("lea VDSO64_ve_start_timespec(%%rip),%0\n": "=r"(ret));
+	return ret;
+}
+
+notrace static void vdso_set_normalized_timespec(struct timespec *ts, time_t sec, s64 nsec)
+{
+	while (nsec >= NSEC_PER_SEC) {
+		/*
+		 * The following asm() prevents the compiler from
+		 * optimising this loop into a modulo operation. See
+		 * also __iter_div_u64_rem() in include/linux/time.h
+		 */
+		asm("" : "+rm"(nsec));
+		nsec -= NSEC_PER_SEC;
+		++sec;
+	}
+	while (nsec < 0) {
+		asm("" : "+rm"(nsec));
+		nsec += NSEC_PER_SEC;
+		--sec;
+	}
+	ts->tv_sec = sec;
+	ts->tv_nsec = nsec;
+}
+
+static void monotonic_time_to_ve(struct timespec *ts)
+{
+	struct timespec *ve_timespec = get_ve_timespec();
+
+	vdso_set_normalized_timespec(ts,
+		ts->tv_sec - ve_timespec->tv_sec,
+		ts->tv_nsec - ve_timespec->tv_nsec);
+}
+
 notrace static int do_monotonic(struct timespec *ts)
 {
 	unsigned long seq;
@@ -190,8 +231,9 @@  notrace static int do_monotonic(struct timespec *ts)
 		ns += vgetsns(&mode);
 		ns >>= gtod->clock.shift;
 	} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
-	timespec_add_ns(ts, ns);
 
+	timespec_add_ns(ts, ns);
+	monotonic_time_to_ve(ts);
 	return mode;
 }
 
@@ -215,6 +257,7 @@  notrace static int do_monotonic_coarse(struct timespec *ts)
 		ts->tv_nsec = gtod->monotonic_time_coarse.tv_nsec;
 	} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
 
+	monotonic_time_to_ve(ts);
 	return 0;
 }
 
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index 5056d0ec9ab7..a082f1541f3c 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -344,10 +344,8 @@  static struct page **uts_prep_vdso_pages_locked(int map)
 		 * preallocated one.
 		 */
 		new_version = KERNEL_VERSION(n1, n2, n3);
-		if (new_version == LINUX_VERSION_CODE)
-			goto out;
 #ifdef CONFIG_X86_32
-		else {
+		{
 			/*
 			 * Native x86-32 mode requires vDSO runtime
 			 * relocations applied which is not supported
@@ -370,8 +368,8 @@  static struct page **uts_prep_vdso_pages_locked(int map)
 		 * better than walk out with error.
 		 */
 		pr_warn_once("Wrong release uts name format detected."
-			     " Ignoring vDSO virtualization.\n");
-		goto out;
+			     " Using host's uts name.\n");
+		new_version = LINUX_VERSION_CODE;
 	}
 
 	mutex_lock(&vdso32_mutex);
@@ -402,6 +400,7 @@  static struct page **uts_prep_vdso_pages_locked(int map)
 
 	addr = page_address(new_pages[0]);
 	*((int *)(addr + uts_ns->vdso32.version_off)) = new_version;
+	*((struct timespec*)(VDSO32_SYMBOL(uts_ns->vdso.addr, ve_start_timespec))) = ve->start_timespec;
 	smp_wmb();
 
 	pages = uts_ns->vdso32.pages = new_pages;
@@ -411,7 +410,6 @@  static struct page **uts_prep_vdso_pages_locked(int map)
 
 out_unlock:
 	mutex_unlock(&vdso32_mutex);
-out:
 	down_write(&mm->mmap_sem);
 	return pages;
 }
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index a862c79e2e91..ad0e0ac14f83 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -244,8 +244,6 @@  static int uts_arch_setup_additional_pages(struct linux_binprm *bprm, int uses_i
 		 * preallocated one.
 		 */
 		new_version = KERNEL_VERSION(n1, n2, n3);
-		if (new_version == LINUX_VERSION_CODE)
-			goto map_init_uts;
 	} else {
 		/*
 		 * If admin is passed malformed string here
@@ -254,8 +252,8 @@  static int uts_arch_setup_additional_pages(struct linux_binprm *bprm, int uses_i
 		 * better than walk out with error.
 		 */
 		pr_warn_once("Wrong release uts name format detected."
-			     " Ignoring vDSO virtualization.\n");
-		goto map_init_uts;
+			     " Using host's uts name.\n");
+		new_version = LINUX_VERSION_CODE;
 	}
 
 	mutex_lock(&vdso_mutex);
@@ -296,6 +294,7 @@  static int uts_arch_setup_additional_pages(struct linux_binprm *bprm, int uses_i
 	}
 
 	*((int *)(uts_ns->vdso.addr + uts_ns->vdso.version_off)) = new_version;
+	*((struct timespec*)(VDSO64_SYMBOL(uts_ns->vdso.addr, ve_start_timespec))) = ve->start_timespec;
 	smp_wmb();
 	uts_ns->vdso.pages = new_pages;
 	mutex_unlock(&vdso_mutex);

Comments

Dmitry Safonov May 24, 2017, 4:29 p.m.
On 05/24/2017 06:21 PM, Andrey Ryabinin wrote:
> We already have infrastructure for virtualized vdso, however we use
> it only to change LINUX_VERSION_NAME in container. Simply store container's
> start time - ve->start_timespec in vdso variable - VDSO64_ve_start_timespec,
> and use it in __vdso_clock_gettime() to calculate container's monotonic time.
> 
> Make uts_arch_setup_additional_pages()/uts_prep_vdso_pages_locked() to always
> setup new vdso, since previous policy to setup vdso only if uts_ns->name.release
> wouldn't work for virtualized __vdso_clock_gettime()
> 
> https://jira.sw.ru/browse/PSBM-66451
> Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>

Well, that's all looks good, but I've two questions:

1. Where VDSO64_ve_start_timespec is set? I do not see that part.

2. The part with unconditional creation of vdso pages array in
    uts_arch_setup_additional_pages()/uts_prep_vdso_pages_locked()
    will result in each exec() creating new vdso for all tasks in CT.
    So, that will result in n*8kB additional memory, where n is
    number of exec()'ed tasks in CTs.
    I'm not sure how much can be n, so that may be OK.
    But can we find a way to make it p*8kB, where p - is nr. of CTs?

    As far as I can see, vdso pages array was also copied for UTS
    virtualization where it was needed previously, so (2) question may be
    not related to this patches set, but for the way how it is done
    already. FWIW, what I mean here - it would be worth to have one copy
    of vdso pages per-CT.

    May be I'm trying to rearrange the deck chairs on the Titanic and
    that increase is just insignificant.

> ---
>   arch/x86/vdso/vclock_gettime.c | 45 +++++++++++++++++++++++++++++++++++++++++-
>   arch/x86/vdso/vdso32-setup.c   | 10 ++++------
>   arch/x86/vdso/vma.c            |  7 +++----
>   3 files changed, 51 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
> index f079e1fd5633..3a5b319984c7 100644
> --- a/arch/x86/vdso/vclock_gettime.c
> +++ b/arch/x86/vdso/vclock_gettime.c
> @@ -27,6 +27,10 @@
>   
>   #define gtod (&VVAR(vsyscall_gtod_data))
>   
> +struct timespec VDSO64_ve_start_timespec;
> +extern struct timespec VDSO32_ve_start_timespec
> +	__attribute__((weak, alias("VDSO64_ve_start_timespec")));
> +
>   notrace static cycle_t vread_tsc(void)
>   {
>   	cycle_t ret = (cycle_t)rdtsc_ordered();
> @@ -175,6 +179,43 @@ notrace static int __always_inline do_realtime(struct timespec *ts)
>   	return mode;
>   }
>   
> +notrace static struct timespec *get_ve_timespec(void)
> +{
> +	struct timespec *ret;
> +	asm volatile ("lea VDSO64_ve_start_timespec(%%rip),%0\n": "=r"(ret));
> +	return ret;
> +}
> +
> +notrace static void vdso_set_normalized_timespec(struct timespec *ts, time_t sec, s64 nsec)
> +{
> +	while (nsec >= NSEC_PER_SEC) {
> +		/*
> +		 * The following asm() prevents the compiler from
> +		 * optimising this loop into a modulo operation. See
> +		 * also __iter_div_u64_rem() in include/linux/time.h
> +		 */
> +		asm("" : "+rm"(nsec));
> +		nsec -= NSEC_PER_SEC;
> +		++sec;
> +	}
> +	while (nsec < 0) {
> +		asm("" : "+rm"(nsec));
> +		nsec += NSEC_PER_SEC;
> +		--sec;
> +	}
> +	ts->tv_sec = sec;
> +	ts->tv_nsec = nsec;
> +}
> +
> +static void monotonic_time_to_ve(struct timespec *ts)
> +{
> +	struct timespec *ve_timespec = get_ve_timespec();
> +
> +	vdso_set_normalized_timespec(ts,
> +		ts->tv_sec - ve_timespec->tv_sec,
> +		ts->tv_nsec - ve_timespec->tv_nsec);
> +}
> +
>   notrace static int do_monotonic(struct timespec *ts)
>   {
>   	unsigned long seq;
> @@ -190,8 +231,9 @@ notrace static int do_monotonic(struct timespec *ts)
>   		ns += vgetsns(&mode);
>   		ns >>= gtod->clock.shift;
>   	} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
> -	timespec_add_ns(ts, ns);
>   
> +	timespec_add_ns(ts, ns);
> +	monotonic_time_to_ve(ts);
>   	return mode;
>   }
>   
> @@ -215,6 +257,7 @@ notrace static int do_monotonic_coarse(struct timespec *ts)
>   		ts->tv_nsec = gtod->monotonic_time_coarse.tv_nsec;
>   	} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
>   
> +	monotonic_time_to_ve(ts);
>   	return 0;
>   }
>   
> diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
> index 5056d0ec9ab7..a082f1541f3c 100644
> --- a/arch/x86/vdso/vdso32-setup.c
> +++ b/arch/x86/vdso/vdso32-setup.c
> @@ -344,10 +344,8 @@ static struct page **uts_prep_vdso_pages_locked(int map)
>   		 * preallocated one.
>   		 */
>   		new_version = KERNEL_VERSION(n1, n2, n3);
> -		if (new_version == LINUX_VERSION_CODE)
> -			goto out;
>   #ifdef CONFIG_X86_32
> -		else {
> +		{
>   			/*
>   			 * Native x86-32 mode requires vDSO runtime
>   			 * relocations applied which is not supported
> @@ -370,8 +368,8 @@ static struct page **uts_prep_vdso_pages_locked(int map)
>   		 * better than walk out with error.
>   		 */
>   		pr_warn_once("Wrong release uts name format detected."
> -			     " Ignoring vDSO virtualization.\n");
> -		goto out;
> +			     " Using host's uts name.\n");
> +		new_version = LINUX_VERSION_CODE;
>   	}
>   
>   	mutex_lock(&vdso32_mutex);
> @@ -402,6 +400,7 @@ static struct page **uts_prep_vdso_pages_locked(int map)
>   
>   	addr = page_address(new_pages[0]);
>   	*((int *)(addr + uts_ns->vdso32.version_off)) = new_version;
> +	*((struct timespec*)(VDSO32_SYMBOL(uts_ns->vdso.addr, ve_start_timespec))) = ve->start_timespec;
>   	smp_wmb();
>   
>   	pages = uts_ns->vdso32.pages = new_pages;
> @@ -411,7 +410,6 @@ static struct page **uts_prep_vdso_pages_locked(int map)
>   
>   out_unlock:
>   	mutex_unlock(&vdso32_mutex);
> -out:
>   	down_write(&mm->mmap_sem);
>   	return pages;
>   }
> diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
> index a862c79e2e91..ad0e0ac14f83 100644
> --- a/arch/x86/vdso/vma.c
> +++ b/arch/x86/vdso/vma.c
> @@ -244,8 +244,6 @@ static int uts_arch_setup_additional_pages(struct linux_binprm *bprm, int uses_i
>   		 * preallocated one.
>   		 */
>   		new_version = KERNEL_VERSION(n1, n2, n3);
> -		if (new_version == LINUX_VERSION_CODE)
> -			goto map_init_uts;
>   	} else {
>   		/*
>   		 * If admin is passed malformed string here
> @@ -254,8 +252,8 @@ static int uts_arch_setup_additional_pages(struct linux_binprm *bprm, int uses_i
>   		 * better than walk out with error.
>   		 */
>   		pr_warn_once("Wrong release uts name format detected."
> -			     " Ignoring vDSO virtualization.\n");
> -		goto map_init_uts;
> +			     " Using host's uts name.\n");
> +		new_version = LINUX_VERSION_CODE;
>   	}
>   
>   	mutex_lock(&vdso_mutex);
> @@ -296,6 +294,7 @@ static int uts_arch_setup_additional_pages(struct linux_binprm *bprm, int uses_i
>   	}
>   
>   	*((int *)(uts_ns->vdso.addr + uts_ns->vdso.version_off)) = new_version;
> +	*((struct timespec*)(VDSO64_SYMBOL(uts_ns->vdso.addr, ve_start_timespec))) = ve->start_timespec;
>   	smp_wmb();
>   	uts_ns->vdso.pages = new_pages;
>   	mutex_unlock(&vdso_mutex);
>
Andrey Ryabinin May 24, 2017, 4:34 p.m.
On 05/24/2017 07:29 PM, Dmitry Safonov wrote:
> On 05/24/2017 06:21 PM, Andrey Ryabinin wrote:
>> We already have infrastructure for virtualized vdso, however we use
>> it only to change LINUX_VERSION_NAME in container. Simply store container's
>> start time - ve->start_timespec in vdso variable - VDSO64_ve_start_timespec,
>> and use it in __vdso_clock_gettime() to calculate container's monotonic time.
>>
>> Make uts_arch_setup_additional_pages()/uts_prep_vdso_pages_locked() to always
>> setup new vdso, since previous policy to setup vdso only if uts_ns->name.release
>> wouldn't work for virtualized __vdso_clock_gettime()
>>
>> https://jira.sw.ru/browse/PSBM-66451
>> Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
> 
> Well, that's all looks good, but I've two questions:
> 
> 1. Where VDSO64_ve_start_timespec is set? I do not see that part.
> 

in uts_arch_setup_additional_pages() and uts_prep_vdso_pages_locked()

> 2. The part with unconditional creation of vdso pages array in
>    uts_arch_setup_additional_pages()/uts_prep_vdso_pages_locked()
>    will result in each exec() creating new vdso for all tasks in CT.
>    So, that will result in n*8kB additional memory, where n is
>    number of exec()'ed tasks in CTs.
>    I'm not sure how much can be n, so that may be OK.
>    But can we find a way to make it p*8kB, where p - is nr. of CTs?

Nope, vdso pages created only once, and attached to uts_ns. Once pages created
all follow up execs will reuse them. So it's one vdso per uts_ns.

> 
>    As far as I can see, vdso pages array was also copied for UTS
>    virtualization where it was needed previously, so (2) question may be
>    not related to this patches set, but for the way how it is done
>    already. FWIW, what I mean here - it would be worth to have one copy
>    of vdso pages per-CT.
> 
>    May be I'm trying to rearrange the deck chairs on the Titanic and
>    that increase is just insignificant.
> 
>> ---
Dmitry Safonov May 24, 2017, 4:38 p.m.
On 05/24/2017 07:34 PM, Andrey Ryabinin wrote:
> 
> 
> On 05/24/2017 07:29 PM, Dmitry Safonov wrote:
>> On 05/24/2017 06:21 PM, Andrey Ryabinin wrote:
>>> We already have infrastructure for virtualized vdso, however we use
>>> it only to change LINUX_VERSION_NAME in container. Simply store container's
>>> start time - ve->start_timespec in vdso variable - VDSO64_ve_start_timespec,
>>> and use it in __vdso_clock_gettime() to calculate container's monotonic time.
>>>
>>> Make uts_arch_setup_additional_pages()/uts_prep_vdso_pages_locked() to always
>>> setup new vdso, since previous policy to setup vdso only if uts_ns->name.release
>>> wouldn't work for virtualized __vdso_clock_gettime()
>>>
>>> https://jira.sw.ru/browse/PSBM-66451
>>> Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
>>
>> Well, that's all looks good, but I've two questions:
>>
>> 1. Where VDSO64_ve_start_timespec is set? I do not see that part.
>>
> 
> in uts_arch_setup_additional_pages() and uts_prep_vdso_pages_locked()

Uhm, yes, but that means that ve_ts_write() is broken for
running CT.

> 
>> 2. The part with unconditional creation of vdso pages array in
>>     uts_arch_setup_additional_pages()/uts_prep_vdso_pages_locked()
>>     will result in each exec() creating new vdso for all tasks in CT.
>>     So, that will result in n*8kB additional memory, where n is
>>     number of exec()'ed tasks in CTs.
>>     I'm not sure how much can be n, so that may be OK.
>>     But can we find a way to make it p*8kB, where p - is nr. of CTs?
> 
> Nope, vdso pages created only once, and attached to uts_ns. Once pages created
> all follow up execs will reuse them. So it's one vdso per uts_ns.

Oh, good - I've just misread that code.
So, that should be fine.

> 
>>
>>     As far as I can see, vdso pages array was also copied for UTS
>>     virtualization where it was needed previously, so (2) question may be
>>     not related to this patches set, but for the way how it is done
>>     already. FWIW, what I mean here - it would be worth to have one copy
>>     of vdso pages per-CT.
>>
>>     May be I'm trying to rearrange the deck chairs on the Titanic and
>>     that increase is just insignificant.
>>
>>> ---
Andrey Ryabinin May 24, 2017, 4:45 p.m.
On 05/24/2017 07:38 PM, Dmitry Safonov wrote:
> On 05/24/2017 07:34 PM, Andrey Ryabinin wrote:
>>
>>
>> On 05/24/2017 07:29 PM, Dmitry Safonov wrote:
>>> On 05/24/2017 06:21 PM, Andrey Ryabinin wrote:
>>>> We already have infrastructure for virtualized vdso, however we use
>>>> it only to change LINUX_VERSION_NAME in container. Simply store container's
>>>> start time - ve->start_timespec in vdso variable - VDSO64_ve_start_timespec,
>>>> and use it in __vdso_clock_gettime() to calculate container's monotonic time.
>>>>
>>>> Make uts_arch_setup_additional_pages()/uts_prep_vdso_pages_locked() to always
>>>> setup new vdso, since previous policy to setup vdso only if uts_ns->name.release
>>>> wouldn't work for virtualized __vdso_clock_gettime()
>>>>
>>>> https://jira.sw.ru/browse/PSBM-66451
>>>> Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
>>>
>>> Well, that's all looks good, but I've two questions:
>>>
>>> 1. Where VDSO64_ve_start_timespec is set? I do not see that part.
>>>
>>
>> in uts_arch_setup_additional_pages() and uts_prep_vdso_pages_locked()
> 
> Uhm, yes, but that means that ve_ts_write() is broken for
> running CT.
> 

Do we care? Changing monotonic time on the fly doesn't sound like a brilliant idea.
Dmitry Safonov May 24, 2017, 4:45 p.m.
On 05/24/2017 07:45 PM, Andrey Ryabinin wrote:
> 
> 
> On 05/24/2017 07:38 PM, Dmitry Safonov wrote:
>> On 05/24/2017 07:34 PM, Andrey Ryabinin wrote:
>>>
>>>
>>> On 05/24/2017 07:29 PM, Dmitry Safonov wrote:
>>>> On 05/24/2017 06:21 PM, Andrey Ryabinin wrote:
>>>>> We already have infrastructure for virtualized vdso, however we use
>>>>> it only to change LINUX_VERSION_NAME in container. Simply store container's
>>>>> start time - ve->start_timespec in vdso variable - VDSO64_ve_start_timespec,
>>>>> and use it in __vdso_clock_gettime() to calculate container's monotonic time.
>>>>>
>>>>> Make uts_arch_setup_additional_pages()/uts_prep_vdso_pages_locked() to always
>>>>> setup new vdso, since previous policy to setup vdso only if uts_ns->name.release
>>>>> wouldn't work for virtualized __vdso_clock_gettime()
>>>>>
>>>>> https://jira.sw.ru/browse/PSBM-66451
>>>>> Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
>>>>
>>>> Well, that's all looks good, but I've two questions:
>>>>
>>>> 1. Where VDSO64_ve_start_timespec is set? I do not see that part.
>>>>
>>>
>>> in uts_arch_setup_additional_pages() and uts_prep_vdso_pages_locked()
>>
>> Uhm, yes, but that means that ve_ts_write() is broken for
>> running CT.
>>
> 
> Do we care? Changing monotonic time on the fly doesn't sound like a brilliant idea.

Ok, if that's not used by vz tools, then:
Reviewed-by: Dmitry Safonov <dsafonov@virtuozzo.com>
Dmitry Safonov May 25, 2017, 8:42 a.m.
On 05/24/2017 07:45 PM, Dmitry Safonov wrote:
> On 05/24/2017 07:45 PM, Andrey Ryabinin wrote:
>>
>>
>> On 05/24/2017 07:38 PM, Dmitry Safonov wrote:
>>> On 05/24/2017 07:34 PM, Andrey Ryabinin wrote:
>>>>
>>>>
>>>> On 05/24/2017 07:29 PM, Dmitry Safonov wrote:
>>>>> On 05/24/2017 06:21 PM, Andrey Ryabinin wrote:
>>>>>> We already have infrastructure for virtualized vdso, however we use
>>>>>> it only to change LINUX_VERSION_NAME in container. Simply store 
>>>>>> container's
>>>>>> start time - ve->start_timespec in vdso variable - 
>>>>>> VDSO64_ve_start_timespec,
>>>>>> and use it in __vdso_clock_gettime() to calculate container's 
>>>>>> monotonic time.
>>>>>>
>>>>>> Make 
>>>>>> uts_arch_setup_additional_pages()/uts_prep_vdso_pages_locked() to 
>>>>>> always
>>>>>> setup new vdso, since previous policy to setup vdso only if 
>>>>>> uts_ns->name.release
>>>>>> wouldn't work for virtualized __vdso_clock_gettime()
>>>>>>
>>>>>> https://jira.sw.ru/browse/PSBM-66451
>>>>>> Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
>>>>>
>>>>> Well, that's all looks good, but I've two questions:
>>>>>
>>>>> 1. Where VDSO64_ve_start_timespec is set? I do not see that part.
>>>>>
>>>>
>>>> in uts_arch_setup_additional_pages() and uts_prep_vdso_pages_locked()
>>>
>>> Uhm, yes, but that means that ve_ts_write() is broken for
>>> running CT.
>>>
>>
>> Do we care? Changing monotonic time on the fly doesn't sound like a 
>> brilliant idea.
> 
> Ok, if that's not used by vz tools, then:
> Reviewed-by: Dmitry Safonov <dsafonov@virtuozzo.com>

What I suggest - may we add a third patch that changes ve_ts_write()
to return -EACCESS or something if vdso pages array already exists for
UTS ns?
Andrey Ryabinin May 25, 2017, 8:49 a.m.
On 05/25/2017 11:42 AM, Dmitry Safonov wrote:
> On 05/24/2017 07:45 PM, Dmitry Safonov wrote:
>> On 05/24/2017 07:45 PM, Andrey Ryabinin wrote:
>>>
>>>
>>> On 05/24/2017 07:38 PM, Dmitry Safonov wrote:
>>>> On 05/24/2017 07:34 PM, Andrey Ryabinin wrote:
>>>>>
>>>>>
>>>>> On 05/24/2017 07:29 PM, Dmitry Safonov wrote:
>>>>>> On 05/24/2017 06:21 PM, Andrey Ryabinin wrote:
>>>>>>> We already have infrastructure for virtualized vdso, however we use
>>>>>>> it only to change LINUX_VERSION_NAME in container. Simply store container's
>>>>>>> start time - ve->start_timespec in vdso variable - VDSO64_ve_start_timespec,
>>>>>>> and use it in __vdso_clock_gettime() to calculate container's monotonic time.
>>>>>>>
>>>>>>> Make uts_arch_setup_additional_pages()/uts_prep_vdso_pages_locked() to always
>>>>>>> setup new vdso, since previous policy to setup vdso only if uts_ns->name.release
>>>>>>> wouldn't work for virtualized __vdso_clock_gettime()
>>>>>>>
>>>>>>> https://jira.sw.ru/browse/PSBM-66451
>>>>>>> Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
>>>>>>
>>>>>> Well, that's all looks good, but I've two questions:
>>>>>>
>>>>>> 1. Where VDSO64_ve_start_timespec is set? I do not see that part.
>>>>>>
>>>>>
>>>>> in uts_arch_setup_additional_pages() and uts_prep_vdso_pages_locked()
>>>>
>>>> Uhm, yes, but that means that ve_ts_write() is broken for
>>>> running CT.
>>>>
>>>
>>> Do we care? Changing monotonic time on the fly doesn't sound like a brilliant idea.
>>
>> Ok, if that's not used by vz tools, then:
>> Reviewed-by: Dmitry Safonov <dsafonov@virtuozzo.com>
> 
> What I suggest - may we add a third patch that changes ve_ts_write()
> to return -EACCESS or something if vdso pages array already exists for
> UTS ns?
> 

Go ahead.