[Devel,6/6] KVM: async_pf: Let guest support delivery of async_pf from guest mode

Submitted by Denis Plotnikov on Sept. 20, 2017, 2:31 p.m.

Details

Message ID 1505917865-26405-7-git-send-email-dplotnikov@virtuozzo.com
State New
Series "backporting async_pf injection functionality"
Headers show

Commit Message

Denis Plotnikov Sept. 20, 2017, 2:31 p.m.
From: Wanpeng Li <wanpeng.li@hotmail.com>

Adds another flag bit (bit 2) to MSR_KVM_ASYNC_PF_EN. If bit 2 is 1,
async page faults are delivered to L1 as #PF vmexits; if bit 2 is 0,
kvm_can_do_async_pf returns 0 if in guest mode.

This is similar to what svm.c wanted to do all along, but it is only
enabled for Linux as L1 hypervisor.  Foreign hypervisors must never
receive async page faults as vmexits, because they'd probably be very
confused about that.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
(cherry picked from commit 52a5c155cf79f1f059bffebf4d06d0249573e659)
fix #PSBM-56498
Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>
---
 Documentation/virtual/kvm/msr.txt    | 5 +++--
 arch/x86/include/asm/kvm_host.h      | 1 +
 arch/x86/include/uapi/asm/kvm_para.h | 1 +
 arch/x86/kernel/kvm.c                | 7 ++++++-
 arch/x86/kvm/mmu.c                   | 2 +-
 arch/x86/kvm/vmx.c                   | 2 +-
 arch/x86/kvm/x86.c                   | 5 +++--
 7 files changed, 16 insertions(+), 7 deletions(-)

Patch hide | download patch | download mbox

diff --git a/Documentation/virtual/kvm/msr.txt b/Documentation/virtual/kvm/msr.txt
index 2a71c8f..2a3452f 100644
--- a/Documentation/virtual/kvm/msr.txt
+++ b/Documentation/virtual/kvm/msr.txt
@@ -166,10 +166,11 @@  MSR_KVM_SYSTEM_TIME: 0x12
 MSR_KVM_ASYNC_PF_EN: 0x4b564d02
 	data: Bits 63-6 hold 64-byte aligned physical address of a
 	64 byte memory area which must be in guest RAM and must be
-	zeroed. Bits 5-2 are reserved and should be zero. Bit 0 is 1
+	zeroed. Bits 5-3 are reserved and should be zero. Bit 0 is 1
 	when asynchronous page faults are enabled on the vcpu 0 when
 	disabled. Bit 1 is 1 if asynchronous page faults can be injected
-	when vcpu is in cpl == 0.
+	when vcpu is in cpl == 0. Bit 2 is 1 if asynchronous page faults
+	are delivered to L1 as #PF vmexits.
 
 	First 4 byte of 64 byte memory location will be written to by
 	the hypervisor at the time of asynchronous page fault (APF)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ac1a7c1..e8056a7 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -596,6 +596,7 @@  struct kvm_vcpu_arch {
 		bool send_user_only;
 		u32 host_apf_reason;
 		unsigned long nested_apf_token;
+		bool delivery_as_pf_vmexit;
 	} apf;
 
 	/* OSVW MSRs (AMD only) */
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 23f966fe..47bee05 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -65,6 +65,7 @@  struct kvm_clock_pairing {
 
 #define KVM_ASYNC_PF_ENABLED			(1 << 0)
 #define KVM_ASYNC_PF_SEND_ALWAYS		(1 << 1)
+#define KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT	(1 << 2)
 
 /* Operations for KVM_HC_MMU_OP */
 #define KVM_MMU_OP_WRITE_PTE            1
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 14d04e7..32d5f5a 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -333,7 +333,12 @@  static void kvm_guest_cpu_init(void)
 #ifdef CONFIG_PREEMPT
 		pa |= KVM_ASYNC_PF_SEND_ALWAYS;
 #endif
-		wrmsrl(MSR_KVM_ASYNC_PF_EN, pa | KVM_ASYNC_PF_ENABLED);
+		pa |= KVM_ASYNC_PF_ENABLED;
+
+		/* Async page fault support for L1 hypervisor is optional */
+		if (wrmsr_safe(MSR_KVM_ASYNC_PF_EN,
+			(pa | KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT) & 0xffffffff, pa >> 32) < 0)
+			wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
 		__this_cpu_write(apf_reason.enabled, 1);
 		printk(KERN_INFO"KVM setup async PF for cpu %d\n",
 		       smp_processor_id());
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index e199f38..bb15151 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3489,7 +3489,7 @@  bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
 		     kvm_event_needs_reinjection(vcpu)))
 		return false;
 
-	if (is_guest_mode(vcpu))
+	if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu))
 		return false;
 
 	return kvm_x86_ops->interrupt_allowed(vcpu);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 2c14d1b..9bdb7e7 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -7737,7 +7737,7 @@  static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 		if (is_nmi(intr_info))
 			return false;
 		else if (is_page_fault(intr_info))
-			return enable_ept;
+			return !vmx->vcpu.arch.apf.host_apf_reason && enable_ept;
 		else if (is_no_device(intr_info) &&
 			 !(vmcs12->guest_cr0 & X86_CR0_TS))
 			return false;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b67a745..478faf1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2033,8 +2033,8 @@  static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
 {
 	gpa_t gpa = data & ~0x3f;
 
-	/* Bits 2:5 are reserved, Should be zero */
-	if (data & 0x3c)
+	/* Bits 3:5 are reserved, Should be zero */
+	if (data & 0x38)
 		return 1;
 
 	vcpu->arch.apf.msr_val = data;
@@ -2050,6 +2050,7 @@  static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
 		return 1;
 
 	vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS);
+	vcpu->arch.apf.delivery_as_pf_vmexit = data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
 	kvm_async_pf_wakeup_all(vcpu);
 	return 0;
 }