[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v5 15/17] vmx: Add some scheduler hooks for VT-d posted interrupts



This patch adds the following arch hooks in scheduler:
- vmx_pre_ctx_switch_pi():
It is called before context switch, we update the posted
interrupt descriptor when the vCPU is preempted, go to sleep,
or is blocked.

- vmx_post_ctx_switch_pi()
It is called after context switch, we update the posted
interrupt descriptor when the vCPU is going to run.

- arch_vcpu_wake_prepare()
It will be called when waking up the vCPU, we update
the posted interrupt descriptor when the vCPU is unblocked.

CC: Keir Fraser <keir@xxxxxxx>
CC: Jan Beulich <jbeulich@xxxxxxxx>
CC: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
CC: Kevin Tian <kevin.tian@xxxxxxxxx>
CC: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
CC: Dario Faggioli <dario.faggioli@xxxxxxxxxx>
Sugguested-by: Dario Faggioli <dario.faggioli@xxxxxxxxxx>
Signed-off-by: Feng Wu <feng.wu@xxxxxxxxx>
---
v5:
- Rename arch_vcpu_wake to arch_vcpu_wake_prepare
- Make arch_vcpu_wake_prepare() inline for ARM
- Merge the ARM dummy hook with together
- Changes to some code comments
- Leave 'pi_ctxt_switch_from' and 'pi_ctxt_switch_to' NULL if
  PI is disabled or the vCPU is not in HVM
- Coding style

v4:
- Newly added

 xen/arch/x86/domain.c              |  11 +++
 xen/arch/x86/hvm/vmx/vmx.c         | 147 +++++++++++++++++++++++++++++++++++++
 xen/common/schedule.c              |   2 +
 xen/include/asm-arm/domain.h       |   2 +
 xen/include/asm-x86/domain.h       |   3 +
 xen/include/asm-x86/hvm/hvm.h      |   2 +
 xen/include/asm-x86/hvm/vmx/vmcs.h |   8 ++
 7 files changed, 175 insertions(+)

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 045f6ff..130f859 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1605,9 +1605,20 @@ void context_switch(struct vcpu *prev, struct vcpu *next)
 
     set_current(next);
 
+    /*
+     * When switching from non-idle to idle, we only do a lazy context switch.
+     * However, in order for posted interrupt (if available and enabled) to
+     * work properly, we at least need to update the descriptors.
+     */
+    if ( prev->arch.pi_ctxt_switch_from && !is_idle_vcpu(prev) )
+        prev->arch.pi_ctxt_switch_from(prev);
+
     if ( (per_cpu(curr_vcpu, cpu) == next) ||
          (is_idle_domain(nextd) && cpu_online(cpu)) )
     {
+        if ( next->arch.pi_ctxt_switch_to && !is_idle_vcpu(next) )
+            next->arch.pi_ctxt_switch_to(next);
+
         local_irq_enable();
     }
     else
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index c8a4371..758809a 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -67,6 +67,8 @@ enum handler_return { HNDL_done, HNDL_unhandled, 
HNDL_exception_raised };
 
 static void vmx_ctxt_switch_from(struct vcpu *v);
 static void vmx_ctxt_switch_to(struct vcpu *v);
+static void vmx_pre_ctx_switch_pi(struct vcpu *v);
+static void vmx_post_ctx_switch_pi(struct vcpu *v);
 
 static int  vmx_alloc_vlapic_mapping(struct domain *d);
 static void vmx_free_vlapic_mapping(struct domain *d);
@@ -117,10 +119,20 @@ static int vmx_vcpu_initialise(struct vcpu *v)
     INIT_LIST_HEAD(&v->arch.hvm_vmx.pi_blocked_vcpu_list);
     INIT_LIST_HEAD(&v->arch.hvm_vmx.pi_vcpu_on_set_list);
 
+    v->arch.hvm_vmx.pi_block_cpu = -1;
+
+    spin_lock_init(&v->arch.hvm_vmx.pi_lock);
+
     v->arch.schedule_tail    = vmx_do_resume;
     v->arch.ctxt_switch_from = vmx_ctxt_switch_from;
     v->arch.ctxt_switch_to   = vmx_ctxt_switch_to;
 
+    if ( iommu_intpost && is_hvm_vcpu(v) )
+    {
+        v->arch.pi_ctxt_switch_from = vmx_pre_ctx_switch_pi;
+        v->arch.pi_ctxt_switch_to = vmx_post_ctx_switch_pi;
+    }
+
     if ( (rc = vmx_create_vmcs(v)) != 0 )
     {
         dprintk(XENLOG_WARNING,
@@ -718,6 +730,140 @@ static void vmx_fpu_leave(struct vcpu *v)
     }
 }
 
+void arch_vcpu_wake_prepare(struct vcpu *v)
+{
+    unsigned long gflags;
+
+    if ( !iommu_intpost || !is_hvm_vcpu(v) || !has_arch_pdevs(v->domain) )
+        return;
+
+    spin_lock_irqsave(&v->arch.hvm_vmx.pi_lock, gflags);
+
+    if ( likely(vcpu_runnable(v)) ||
+         !test_bit(_VPF_blocked, &v->pause_flags) )
+    {
+        struct pi_desc *pi_desc = &v->arch.hvm_vmx.pi_desc;
+        unsigned long flags;
+
+        /*
+         * We don't need to send notification event to a non-running
+         * vcpu, the interrupt information will be delivered to it before
+         * VM-ENTRY when the vcpu is scheduled to run next time.
+         */
+        pi_set_sn(pi_desc);
+
+        /*
+         * Set 'NV' feild back to posted_intr_vector, so the
+         * Posted-Interrupts can be delivered to the vCPU by
+         * VT-d HW after it is scheduled to run.
+         */
+        write_atomic((uint8_t*)&pi_desc->nv, posted_intr_vector);
+
+        /*
+         * Delete the vCPU from the related block list
+         * if we are resuming from blocked state
+         */
+        if ( v->arch.hvm_vmx.pi_block_cpu != -1 )
+        {
+            spin_lock_irqsave(&per_cpu(pi_blocked_vcpu_lock,
+                              v->arch.hvm_vmx.pi_block_cpu), flags);
+            list_del_init(&v->arch.hvm_vmx.pi_blocked_vcpu_list);
+            spin_unlock_irqrestore(&per_cpu(pi_blocked_vcpu_lock,
+                                    v->arch.hvm_vmx.pi_block_cpu), flags);
+        }
+    }
+
+    spin_unlock_irqrestore(&v->arch.hvm_vmx.pi_lock, gflags);
+}
+
+static void vmx_pre_ctx_switch_pi(struct vcpu *v)
+{
+    struct pi_desc *pi_desc = &v->arch.hvm_vmx.pi_desc;
+    struct pi_desc old, new;
+    unsigned long flags, gflags;
+
+    if ( !has_arch_pdevs(v->domain) )
+        return;
+
+    spin_lock_irqsave(&v->arch.hvm_vmx.pi_lock, gflags);
+
+    if ( vcpu_runnable(v) || !test_bit(_VPF_blocked, &v->pause_flags) )
+    {
+        /*
+         * The vCPU has been preempted or went to sleep. We don't need to send
+         * notification event to a non-running vcpu, the interrupt information
+         * will be delivered to it before VM-ENTRY when the vcpu is scheduled
+         * to run next time.
+         */
+        pi_set_sn(pi_desc);
+
+    }
+    else if ( test_bit(_VPF_blocked, &v->pause_flags) )
+    {
+        /*
+         * The vCPU is blocking, we need to add it to one of the per pCPU 
lists.
+         * We save v->processor to v->arch.hvm_vmx.pi_block_cpu and use it for
+         * the per-CPU list, we also save it to posted-interrupt descriptor and
+         * make it as the destination of the wake-up notification event.
+         */
+        v->arch.hvm_vmx.pi_block_cpu = v->processor;
+        spin_lock_irqsave(&per_cpu(pi_blocked_vcpu_lock,
+                          v->arch.hvm_vmx.pi_block_cpu), flags);
+        list_add_tail(&v->arch.hvm_vmx.pi_blocked_vcpu_list,
+                      &per_cpu(pi_blocked_vcpu, v->arch.hvm_vmx.pi_block_cpu));
+        spin_unlock_irqrestore(&per_cpu(pi_blocked_vcpu_lock,
+                           v->arch.hvm_vmx.pi_block_cpu), flags);
+
+        do {
+            old.control = new.control = pi_desc->control;
+
+            /* Should not block the vCPU if an interrupt was posted for it */
+            if ( pi_test_on(&old) )
+            {
+                spin_unlock_irqrestore(&v->arch.hvm_vmx.pi_lock, gflags);
+                vcpu_unblock(v);
+                return;
+            }
+
+            /*
+             * Change the 'NDST' field to v->arch.hvm_vmx.pi_block_cpu,
+             * so when external interrupts from assigned deivces happen,
+             * wakeup notifiction event will go to
+             * v->arch.hvm_vmx.pi_block_cpu, then in pi_wakeup_interrupt()
+             * we can find the vCPU in the right list to wake up.
+             */
+            if ( x2apic_enabled )
+                new.ndst = cpu_physical_id(v->arch.hvm_vmx.pi_block_cpu);
+            else
+                new.ndst = MASK_INSR(cpu_physical_id(
+                                 v->arch.hvm_vmx.pi_block_cpu),
+                                 PI_xAPIC_NDST_MASK);
+            pi_clear_sn(&new);
+            new.nv = pi_wakeup_vector;
+        } while ( cmpxchg(&pi_desc->control, old.control, new.control)
+                  != old.control );
+    }
+
+    spin_unlock_irqrestore(&v->arch.hvm_vmx.pi_lock, gflags);
+}
+
+static void vmx_post_ctx_switch_pi(struct vcpu *v)
+{
+    struct pi_desc *pi_desc = &v->arch.hvm_vmx.pi_desc;
+
+    if ( !has_arch_pdevs(v->domain) )
+        return;
+
+    if ( x2apic_enabled )
+        write_atomic(&pi_desc->ndst, cpu_physical_id(v->processor));
+    else
+        write_atomic(&pi_desc->ndst,
+                     MASK_INSR(cpu_physical_id(v->processor),
+                     PI_xAPIC_NDST_MASK));
+
+    pi_clear_sn(pi_desc);
+}
+
 static void vmx_ctxt_switch_from(struct vcpu *v)
 {
     /*
@@ -756,6 +902,7 @@ static void vmx_ctxt_switch_to(struct vcpu *v)
 
     vmx_restore_guest_msrs(v);
     vmx_restore_dr(v);
+    vmx_post_ctx_switch_pi(v);
 }
 
 
diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index 3eefed7..bc49098 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -412,6 +412,8 @@ void vcpu_wake(struct vcpu *v)
     unsigned long flags;
     spinlock_t *lock = vcpu_schedule_lock_irqsave(v, &flags);
 
+    arch_vcpu_wake_prepare(v);
+
     if ( likely(vcpu_runnable(v)) )
     {
         if ( v->runstate.state >= RUNSTATE_blocked )
diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
index 56aa208..cffe2c6 100644
--- a/xen/include/asm-arm/domain.h
+++ b/xen/include/asm-arm/domain.h
@@ -301,6 +301,8 @@ static inline register_t vcpuid_to_vaffinity(unsigned int 
vcpuid)
     return vaff;
 }
 
+static inline void arch_vcpu_wake_prepare(struct vcpu *v) {}
+
 #endif /* __ASM_DOMAIN_H__ */
 
 /*
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index 0fce09e..979210a 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -481,6 +481,9 @@ struct arch_vcpu
     void (*ctxt_switch_from) (struct vcpu *);
     void (*ctxt_switch_to) (struct vcpu *);
 
+    void (*pi_ctxt_switch_from) (struct vcpu *);
+    void (*pi_ctxt_switch_to) (struct vcpu *);
+
     struct vpmu_struct vpmu;
 
     /* Virtual Machine Extensions */
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index 3cac64f..95f5357 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -545,6 +545,8 @@ static inline bool_t hvm_altp2m_supported(void)
     return hvm_funcs.altp2m_supported;
 }
 
+void arch_vcpu_wake_prepare(struct vcpu *v);
+
 #ifndef NDEBUG
 /* Permit use of the Forced Emulation Prefix in HVM guests */
 extern bool_t opt_hvm_fep;
diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h 
b/xen/include/asm-x86/hvm/vmx/vmcs.h
index 9a986d0..209fb39 100644
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -164,6 +164,14 @@ struct arch_vmx_struct {
 
     struct list_head     pi_blocked_vcpu_list;
     struct list_head     pi_vcpu_on_set_list;
+
+    /*
+     * Before vCPU is blocked, it is added to the global per-cpu list
+     * of 'pi_block_cpu', then VT-d engine can send wakeup notification
+     * event to 'pi_block_cpu' and wakeup the related vCPU.
+     */
+    int                  pi_block_cpu;
+    spinlock_t           pi_lock;
 };
 
 int vmx_create_vmcs(struct vcpu *v);
-- 
2.1.0


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.