[Devel,RHEL7,COMMIT] ms/KVM: x86: drop error recovery in em_jmp_far and em_ret_far

Submitted by Konstantin Khorenko on Jan. 11, 2017, 2:56 p.m.

Details

Message ID 201701111456.v0BEuH0h011857@finist_cl7.x64_64.work.ct
State New
Series "Fresh KVM CVE fixes for update 4 kernel"
Headers show

Commit Message

Konstantin Khorenko Jan. 11, 2017, 2:56 p.m.
The commit is pushed to "branch-rh7-3.10.0-514.vz7.27.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-514.vz7.27.9
------>
commit 55d41d1ae7e2e9bc4911e47615e7dbb67cbadb2b
Author: Radim Krčmář <rkrcmar@redhat.com>
Date:   Wed Jan 11 18:56:17 2017 +0400

    ms/KVM: x86: drop error recovery in em_jmp_far and em_ret_far
    
    em_jmp_far and em_ret_far assumed that setting IP can only fail in 64
    bit mode, but syzkaller proved otherwise (and SDM agrees).
    Code segment was restored upon failure, but it was left uninitialized
    outside of long mode, which could lead to a leak of host kernel stack.
    We could have fixed that by always saving and restoring the CS, but we
    take a simpler approach and just break any guest that manages to fail
    as the error recovery is error-prone and modern CPUs don't need emulator
    for this.
    
    Found by syzkaller:
    
      WARNING: CPU: 2 PID: 3668 at arch/x86/kvm/emulate.c:2217 em_ret_far+0x428/0x480
      Kernel panic - not syncing: panic_on_warn set ...
    
      CPU: 2 PID: 3668 Comm: syz-executor Not tainted 4.9.0-rc4+ #49
      Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
       [...]
      Call Trace:
       [...] __dump_stack lib/dump_stack.c:15
       [...] dump_stack+0xb3/0x118 lib/dump_stack.c:51
       [...] panic+0x1b7/0x3a3 kernel/panic.c:179
       [...] __warn+0x1c4/0x1e0 kernel/panic.c:542
       [...] warn_slowpath_null+0x2c/0x40 kernel/panic.c:585
       [...] em_ret_far+0x428/0x480 arch/x86/kvm/emulate.c:2217
       [...] em_ret_far_imm+0x17/0x70 arch/x86/kvm/emulate.c:2227
       [...] x86_emulate_insn+0x87a/0x3730 arch/x86/kvm/emulate.c:5294
       [...] x86_emulate_instruction+0x520/0x1ba0 arch/x86/kvm/x86.c:5545
       [...] emulate_instruction arch/x86/include/asm/kvm_host.h:1116
       [...] complete_emulated_io arch/x86/kvm/x86.c:6870
       [...] complete_emulated_mmio+0x4e9/0x710 arch/x86/kvm/x86.c:6934
       [...] kvm_arch_vcpu_ioctl_run+0x3b7a/0x5a90 arch/x86/kvm/x86.c:6978
       [...] kvm_vcpu_ioctl+0x61e/0xdd0 arch/x86/kvm/../../../virt/kvm/kvm_main.c:2557
       [...] vfs_ioctl fs/ioctl.c:43
       [...] do_vfs_ioctl+0x18c/0x1040 fs/ioctl.c:679
       [...] SYSC_ioctl fs/ioctl.c:694
       [...] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:685
       [...] entry_SYSCALL_64_fastpath+0x1f/0xc2
    
    Reported-by: Dmitry Vyukov <dvyukov@google.com>
    Cc: stable@vger.kernel.org
    Fixes: d1442d85cc30 ("KVM: x86: Handle errors when RIP is set during far jumps")
    Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
    
    Backport of ms commit 2117d5398c81554fbf803f5fd1dc55eb78216c0c
    Fixes CVE-2016-9756
    https://vulners.com/cve/CVE-2016-9756
    
    https://jira.sw.ru/browse/PSBM-58195
    
    Signed-off-by: Evgeny Yakovlev <eyakovlev@virtuozzo.com>
---
 arch/x86/kvm/emulate.c | 36 +++++++++++-------------------------
 1 file changed, 11 insertions(+), 25 deletions(-)

Patch hide | download patch | download mbox

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index bce2c74..f9da33c 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2125,16 +2125,10 @@  static int em_iret(struct x86_emulate_ctxt *ctxt)
 static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
 {
 	int rc;
-	unsigned short sel, old_sel;
-	struct desc_struct old_desc, new_desc;
-	const struct x86_emulate_ops *ops = ctxt->ops;
+	unsigned short sel;
+	struct desc_struct new_desc;
 	u8 cpl = ctxt->ops->cpl(ctxt);
 
-	/* Assignment of RIP may only fail in 64-bit mode */
-	if (ctxt->mode == X86EMUL_MODE_PROT64)
-		ops->get_segment(ctxt, &old_sel, &old_desc, NULL,
-				 VCPU_SREG_CS);
-
 	memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
 
 	rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
@@ -2144,12 +2138,10 @@  static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
 		return rc;
 
 	rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
-	if (rc != X86EMUL_CONTINUE) {
-		WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64);
-		/* assigning eip failed; restore the old cs */
-		ops->set_segment(ctxt, old_sel, &old_desc, 0, VCPU_SREG_CS);
-		return rc;
-	}
+	/* Error handling is not implemented. */
+	if (rc != X86EMUL_CONTINUE)
+		return X86EMUL_UNHANDLEABLE;
+
 	return rc;
 }
 
@@ -2209,14 +2201,8 @@  static int em_ret_far(struct x86_emulate_ctxt *ctxt)
 {
 	int rc;
 	unsigned long eip, cs;
-	u16 old_cs;
 	int cpl = ctxt->ops->cpl(ctxt);
-	struct desc_struct old_desc, new_desc;
-	const struct x86_emulate_ops *ops = ctxt->ops;
-
-	if (ctxt->mode == X86EMUL_MODE_PROT64)
-		ops->get_segment(ctxt, &old_cs, &old_desc, NULL,
-				 VCPU_SREG_CS);
+	struct desc_struct new_desc;
 
 	rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
 	if (rc != X86EMUL_CONTINUE)
@@ -2233,10 +2219,10 @@  static int em_ret_far(struct x86_emulate_ctxt *ctxt)
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 	rc = assign_eip_far(ctxt, eip, &new_desc);
-	if (rc != X86EMUL_CONTINUE) {
-		WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64);
-		ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
-	}
+	/* Error handling is not implemented. */
+	if (rc != X86EMUL_CONTINUE)
+		return X86EMUL_UNHANDLEABLE;
+
 	return rc;
 }
 

Comments

Vasily Averin Jan. 11, 2017, 3:29 p.m.
According to Red Hat this issue have low impact,
I do not see here any Virtuozzo specific, it isn't critical for us.
so we're not going to force fix and will wait until it will be fixed in RHEL7 kernels.

On 2017-01-11 18:01, Konstantin Khorenko wrote:
> Please consider to create a ReadyKernel live patch for this issue.
> 
> -- 
> Best regards,
> 
> Konstantin Khorenko,
> Virtuozzo Linux Kernel Team
> 
> On 01/11/2017 05:56 PM, Konstantin Khorenko wrote:
>> The commit is pushed to "branch-rh7-3.10.0-514.vz7.27.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
>> after rh7-3.10.0-514.vz7.27.9
>> ------>
>> commit 55d41d1ae7e2e9bc4911e47615e7dbb67cbadb2b
>> Author: Radim Krčmář <rkrcmar@redhat.com>
>> Date:   Wed Jan 11 18:56:17 2017 +0400
>>
>>     ms/KVM: x86: drop error recovery in em_jmp_far and em_ret_far
>>
>>     em_jmp_far and em_ret_far assumed that setting IP can only fail in 64
>>     bit mode, but syzkaller proved otherwise (and SDM agrees).
>>     Code segment was restored upon failure, but it was left uninitialized
>>     outside of long mode, which could lead to a leak of host kernel stack.
>>     We could have fixed that by always saving and restoring the CS, but we
>>     take a simpler approach and just break any guest that manages to fail
>>     as the error recovery is error-prone and modern CPUs don't need emulator
>>     for this.
>>
>>     Found by syzkaller:
>>
>>       WARNING: CPU: 2 PID: 3668 at arch/x86/kvm/emulate.c:2217 em_ret_far+0x428/0x480
>>       Kernel panic - not syncing: panic_on_warn set ...
>>
>>       CPU: 2 PID: 3668 Comm: syz-executor Not tainted 4.9.0-rc4+ #49
>>       Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
>>        [...]
>>       Call Trace:
>>        [...] __dump_stack lib/dump_stack.c:15
>>        [...] dump_stack+0xb3/0x118 lib/dump_stack.c:51
>>        [...] panic+0x1b7/0x3a3 kernel/panic.c:179
>>        [...] __warn+0x1c4/0x1e0 kernel/panic.c:542
>>        [...] warn_slowpath_null+0x2c/0x40 kernel/panic.c:585
>>        [...] em_ret_far+0x428/0x480 arch/x86/kvm/emulate.c:2217
>>        [...] em_ret_far_imm+0x17/0x70 arch/x86/kvm/emulate.c:2227
>>        [...] x86_emulate_insn+0x87a/0x3730 arch/x86/kvm/emulate.c:5294
>>        [...] x86_emulate_instruction+0x520/0x1ba0 arch/x86/kvm/x86.c:5545
>>        [...] emulate_instruction arch/x86/include/asm/kvm_host.h:1116
>>        [...] complete_emulated_io arch/x86/kvm/x86.c:6870
>>        [...] complete_emulated_mmio+0x4e9/0x710 arch/x86/kvm/x86.c:6934
>>        [...] kvm_arch_vcpu_ioctl_run+0x3b7a/0x5a90 arch/x86/kvm/x86.c:6978
>>        [...] kvm_vcpu_ioctl+0x61e/0xdd0 arch/x86/kvm/../../../virt/kvm/kvm_main.c:2557
>>        [...] vfs_ioctl fs/ioctl.c:43
>>        [...] do_vfs_ioctl+0x18c/0x1040 fs/ioctl.c:679
>>        [...] SYSC_ioctl fs/ioctl.c:694
>>        [...] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:685
>>        [...] entry_SYSCALL_64_fastpath+0x1f/0xc2
>>
>>     Reported-by: Dmitry Vyukov <dvyukov@google.com>
>>     Cc: stable@vger.kernel.org
>>     Fixes: d1442d85cc30 ("KVM: x86: Handle errors when RIP is set during far jumps")
>>     Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
>>
>>     Backport of ms commit 2117d5398c81554fbf803f5fd1dc55eb78216c0c
>>     Fixes CVE-2016-9756
>>     https://vulners.com/cve/CVE-2016-9756
>>
>>     https://jira.sw.ru/browse/PSBM-58195
>>
>>     Signed-off-by: Evgeny Yakovlev <eyakovlev@virtuozzo.com>
>> ---
>>  arch/x86/kvm/emulate.c | 36 +++++++++++-------------------------
>>  1 file changed, 11 insertions(+), 25 deletions(-)
>>
>> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
>> index bce2c74..f9da33c 100644
>> --- a/arch/x86/kvm/emulate.c
>> +++ b/arch/x86/kvm/emulate.c
>> @@ -2125,16 +2125,10 @@ static int em_iret(struct x86_emulate_ctxt *ctxt)
>>  static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
>>  {
>>      int rc;
>> -    unsigned short sel, old_sel;
>> -    struct desc_struct old_desc, new_desc;
>> -    const struct x86_emulate_ops *ops = ctxt->ops;
>> +    unsigned short sel;
>> +    struct desc_struct new_desc;
>>      u8 cpl = ctxt->ops->cpl(ctxt);
>>
>> -    /* Assignment of RIP may only fail in 64-bit mode */
>> -    if (ctxt->mode == X86EMUL_MODE_PROT64)
>> -        ops->get_segment(ctxt, &old_sel, &old_desc, NULL,
>> -                 VCPU_SREG_CS);
>> -
>>      memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
>>
>>      rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
>> @@ -2144,12 +2138,10 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
>>          return rc;
>>
>>      rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
>> -    if (rc != X86EMUL_CONTINUE) {
>> -        WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64);
>> -        /* assigning eip failed; restore the old cs */
>> -        ops->set_segment(ctxt, old_sel, &old_desc, 0, VCPU_SREG_CS);
>> -        return rc;
>> -    }
>> +    /* Error handling is not implemented. */
>> +    if (rc != X86EMUL_CONTINUE)
>> +        return X86EMUL_UNHANDLEABLE;
>> +
>>      return rc;
>>  }
>>
>> @@ -2209,14 +2201,8 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt)
>>  {
>>      int rc;
>>      unsigned long eip, cs;
>> -    u16 old_cs;
>>      int cpl = ctxt->ops->cpl(ctxt);
>> -    struct desc_struct old_desc, new_desc;
>> -    const struct x86_emulate_ops *ops = ctxt->ops;
>> -
>> -    if (ctxt->mode == X86EMUL_MODE_PROT64)
>> -        ops->get_segment(ctxt, &old_cs, &old_desc, NULL,
>> -                 VCPU_SREG_CS);
>> +    struct desc_struct new_desc;
>>
>>      rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
>>      if (rc != X86EMUL_CONTINUE)
>> @@ -2233,10 +2219,10 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt)
>>      if (rc != X86EMUL_CONTINUE)
>>          return rc;
>>      rc = assign_eip_far(ctxt, eip, &new_desc);
>> -    if (rc != X86EMUL_CONTINUE) {
>> -        WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64);
>> -        ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
>> -    }
>> +    /* Error handling is not implemented. */
>> +    if (rc != X86EMUL_CONTINUE)
>> +        return X86EMUL_UNHANDLEABLE;
>> +
>>      return rc;
>>  }
>>
>> .
>>
>