[Devel,RHEL7,COMMIT] ms/KVM: i8254: use atomic_t instead of pit.inject_lock

Submitted by Konstantin Khorenko on Dec. 9, 2016, 1:23 p.m.

Details

Message ID 201612091323.uB9DNOpD024064@finist_cl7.x64_64.work.ct
State New
Series "KVM-PIT discard mode fixes"
Headers show

Commit Message

Konstantin Khorenko Dec. 9, 2016, 1:23 p.m.
The commit is pushed to "branch-rh7-3.10.0-327.36.1.vz7.20.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-327.36.1.vz7.20.14
------>
commit 4bdc4c285fc92a0a029ef7b9a285a61b23650597
Author: Radim Krčmář <rkrcmar@redhat.com>
Date:   Fri Dec 9 17:23:24 2016 +0400

    ms/KVM: i8254: use atomic_t instead of pit.inject_lock
    
    The lock was an overkill, the same can be done with atomics.
    
    A mb() was added in kvm_pit_ack_irq, to pair with implicit barrier
    between pit_timer_fn and pit_do_work.  The mb() prevents a race that
    could happen if pending == 0 and irq_ack == 0:
    
      kvm_pit_ack_irq:                | pit_timer_fn:
       p = atomic_read(&ps->pending); |
                                      |  atomic_inc(&ps->pending);
                                      |  queue_work(pit_do_work);
                                      | pit_do_work:
                                      |  atomic_xchg(&ps->irq_ack, 0);
                                      |  return;
       atomic_set(&ps->irq_ack, 1);   |
       if (p == 0) return;            |
    
    where the interrupt would not be delivered in this tick of pit_timer_fn.
    PIT would have eventually delivered the interrupt, but we sacrifice
    perofmance to make sure that interrupts are not needlessly delayed.
    
    sfence isn't enough: atomic_dec_if_positive does atomic_read first and
    x86 can reorder loads before stores.  lfence isn't enough: store can
    pass lfence, turning it into a nop.  A compiler barrier would be more
    than enough as CPU needs to stall for unbelievably long to use fences.
    
    This patch doesn't do anything in kvm_pit_reset_reinject, because any
    order of resets can race, but the result differs by at most one
    interrupt, which is ok, because it's the same result as if the reset
    happened at a slightly different time.  (Original code didn't protect
    the reset path with a proper lock, so users have to be robust.)
    
    Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
    
    ms commit ddf54503e2bbed01958cf5fb16ad6378971d2468
    ("KVM: i8254: use atomic_t instead of pit.inject_lock")
    
    Fixes https://jira.sw.ru/browse/PSBM-56972
    Signed-off-by: Evgeny Yakovlev <eyakovlev@virtuozzo.com>
---
 arch/x86/kvm/i8254.c | 56 +++++++++++++++++++++-------------------------------
 arch/x86/kvm/i8254.h |  3 +--
 2 files changed, 24 insertions(+), 35 deletions(-)

Patch hide | download patch | download mbox

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 4dedeb0..28c3830 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -236,11 +236,13 @@  static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
 	struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state,
 						 irq_ack_notifier);
 
-	spin_lock(&ps->inject_lock);
+	atomic_set(&ps->irq_ack, 1);
+	/* irq_ack should be set before pending is read.  Order accesses with
+	 * inc(pending) in pit_timer_fn and xchg(irq_ack, 0) in pit_do_work.
+	 */
+	smp_mb();
 	if (atomic_dec_if_positive(&ps->pending) > 0 && ps->reinject)
 		queue_kthread_work(&ps->pit->worker, &ps->pit->expired);
-	ps->irq_ack = 1;
-	spin_unlock(&ps->inject_lock);
 }
 
 void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
@@ -271,36 +273,25 @@  static void pit_do_work(struct kthread_work *work)
 	struct kvm_vcpu *vcpu;
 	int i;
 	struct kvm_kpit_state *ps = &pit->pit_state;
-	int inject = 0;
 
-	/* Try to inject pending interrupts when
-	 * last one has been acked.
+	if (ps->reinject && !atomic_xchg(&ps->irq_ack, 0))
+		return;
+
+	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1, false);
+	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0, false);
+
+	/*
+	 * Provides NMI watchdog support via Virtual Wire mode.
+	 * The route is: PIT -> LVT0 in NMI mode.
+	 *
+	 * Note: Our Virtual Wire implementation does not follow
+	 * the MP specification.  We propagate a PIT interrupt to all
+	 * VCPUs and only when LVT0 is in NMI mode.  The interrupt can
+	 * also be simultaneously delivered through PIC and IOAPIC.
 	 */
-	spin_lock(&ps->inject_lock);
-	if (!ps->reinject)
-		inject = 1;
-	else if (ps->irq_ack) {
-		ps->irq_ack = 0;
-		inject = 1;
-	}
-	spin_unlock(&ps->inject_lock);
-	if (inject) {
-		kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1, false);
-		kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0, false);
-
-		/*
-		 * Provides NMI watchdog support via Virtual Wire mode.
-		 * The route is: PIT -> PIC -> LVT0 in NMI mode.
-		 *
-		 * Note: Our Virtual Wire implementation is simplified, only
-		 * propagating PIT interrupts to all VCPUs when they have set
-		 * LVT0 to NMI delivery. Other PIC interrupts are just sent to
-		 * VCPU0, and only if its LVT0 is in EXTINT mode.
-		 */
-		if (atomic_read(&kvm->arch.vapics_in_nmi_mode) > 0)
-			kvm_for_each_vcpu(i, vcpu, kvm)
-				kvm_apic_nmi_wd_deliver(vcpu);
-	}
+	if (atomic_read(&kvm->arch.vapics_in_nmi_mode) > 0)
+		kvm_for_each_vcpu(i, vcpu, kvm)
+			kvm_apic_nmi_wd_deliver(vcpu);
 }
 
 static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
@@ -323,7 +314,7 @@  static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
 static inline void kvm_pit_reset_reinject(struct kvm_pit *pit)
 {
 	atomic_set(&pit->pit_state.pending, 0);
-	pit->pit_state.irq_ack = 1;
+	atomic_set(&pit->pit_state.irq_ack, 1);
 }
 
 static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
@@ -684,7 +675,6 @@  struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
 
 	mutex_init(&pit->pit_state.lock);
 	mutex_lock(&pit->pit_state.lock);
-	spin_lock_init(&pit->pit_state.inject_lock);
 
 	pid = get_pid(task_tgid(current));
 	pid_nr = pid_vnr(pid);
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index dd1b16b..fa5f907 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -33,8 +33,7 @@  struct kvm_kpit_state {
 	u32    speaker_data_on;
 	struct mutex lock;
 	struct kvm_pit *pit;
-	spinlock_t inject_lock;
-	unsigned long irq_ack;
+	atomic_t irq_ack;
 	struct kvm_irq_ack_notifier irq_ack_notifier;
 };