[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [RFC v2 13/15] Update Posted-Interrupts Descriptor during vCPU scheduling



The basic idea here is:
1. When vCPU's state is RUNSTATE_running,
        - set 'NV' to 'Notification Vector'.
        - Clear 'SN' to accpet PI.
        - set 'NDST' to the right pCPU.
2. When vCPU's state is RUNSTATE_blocked,
        - set 'NV' to 'Wake-up Vector', so we can wake up the
          related vCPU when posted-interrupt happens for it.
        - Clear 'SN' to accpet PI.
3. When vCPU's state is RUNSTATE_runnable/RUNSTATE_offline,
        - Set 'SN' to suppress non-urgent interrupts.
          (Current, we only support non-urgent interrupts)
        - Set 'NV' back to 'Notification Vector' if needed.

Signed-off-by: Feng Wu <feng.wu@xxxxxxxxx>
---
 xen/arch/x86/hvm/vmx/vmx.c | 130 +++++++++++++++++++++++++++++++++++++++++++++
 xen/common/schedule.c      |   5 ++
 xen/include/xen/sched.h    |   2 +
 3 files changed, 137 insertions(+)

diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 556a584..cdcc012 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1711,6 +1711,131 @@ static void vmx_handle_eoi(u8 vector)
     __vmwrite(GUEST_INTR_STATUS, status);
 }
 
+static void vmx_pi_desc_update(struct vcpu *v, int old_state)
+{
+    struct pi_desc *pi_desc = &v->arch.hvm_vmx.pi_desc;
+    struct pi_desc old, new;
+    unsigned long flags;
+
+    if ( !iommu_intpost )
+        return;
+
+    switch ( v->runstate.state )
+    {
+    case RUNSTATE_runnable:
+    case RUNSTATE_offline:
+        /*
+         * We don't need to send notification event to a non-running
+         * vcpu, the interrupt information will be delivered to it before
+         * VM-ENTRY when the vcpu is scheduled to run next time.
+         */
+        pi_desc->sn = 1;
+
+        /*
+         * If the state is transferred from RUNSTATE_blocked,
+         * we should set 'NV' feild back to posted_intr_vector,
+         * so the Posted-Interrupts can be delivered to the vCPU
+         * by VT-d HW after it is scheduled to run.
+         */
+        if ( old_state == RUNSTATE_blocked )
+        {
+            do
+            {
+                old.control = new.control = pi_desc->control;
+                new.nv = posted_intr_vector;
+            }
+            while ( cmpxchg(&pi_desc->control, old.control, new.control)
+                    != old.control );
+
+           /*
+            * Delete the vCPU from the related block list
+            * if we are resuming from blocked state
+            */
+           spin_lock_irqsave(&per_cpu(blocked_vcpu_lock,
+                             v->pre_pcpu), flags);
+           list_del(&v->blocked_vcpu_list);
+           spin_unlock_irqrestore(&per_cpu(blocked_vcpu_lock,
+                                  v->pre_pcpu), flags);
+        }
+        break;
+
+    case RUNSTATE_blocked:
+        /*
+         * The vCPU is blocked on the block list.
+         * Add the blocked vCPU on the list of the
+         * vcpu->pre_pcpu, which is the destination
+         * of the wake-up notification event.
+         */
+        v->pre_pcpu = v->processor;
+        spin_lock_irqsave(&per_cpu(blocked_vcpu_lock,
+                          v->pre_pcpu), flags);
+        list_add_tail(&v->blocked_vcpu_list,
+                      &per_cpu(blocked_vcpu, v->pre_pcpu));
+        spin_unlock_irqrestore(&per_cpu(blocked_vcpu_lock,
+                               v->pre_pcpu), flags);
+
+        do
+        {
+            old.control = new.control = pi_desc->control;
+
+            /*
+             * We should not block the vCPU if
+             * an interrupt was posted for it.
+             */
+
+            if ( old.on == 1 )
+            {
+                /*
+                 * The vCPU will be removed from the block list
+                 * during its state transferring from RUNSTATE_blocked
+                 * to RUNSTATE_runnable after the following tasklet
+                 * is scheduled to run.
+                 */
+                tasklet_schedule(&v->vcpu_wakeup_tasklet);
+                return;
+            }
+
+            /*
+             * Change the 'NDST' field to v->pre_pcpu, so when
+             * external interrupts from assigned deivces happen,
+             * wakeup notifiction event will go to v->pre_pcpu,
+             * then in pi_wakeup_interrupt() we can find the
+             * vCPU in the right list to wake up.
+             */
+            if ( x2apic_enabled )
+                new.ndst = cpu_physical_id(v->pre_pcpu);
+            else
+                new.ndst = MASK_INSR(cpu_physical_id(v->pre_pcpu),
+                                     PI_xAPIC_NDST_MASK);
+            new.sn = 0;
+            new.nv = pi_wakeup_vector;
+        }
+        while ( cmpxchg(&pi_desc->control, old.control, new.control)
+                != old.control );
+        break;
+
+    case RUNSTATE_running:
+        ASSERT( pi_desc->sn == 1 );
+
+        do
+        {
+            old.control = new.control = pi_desc->control;
+            if ( x2apic_enabled )
+                new.ndst = cpu_physical_id(v->processor);
+            else
+                new.ndst = (cpu_physical_id(v->processor) << 8) & 0xFF00;
+
+            new.sn = 0;
+        }
+        while ( cmpxchg(&pi_desc->control, old.control, new.control)
+                != old.control );
+        break;
+
+    default:
+        break;
+    }
+}
+
 void vmx_hypervisor_cpuid_leaf(uint32_t sub_idx,
                                uint32_t *eax, uint32_t *ebx,
                                uint32_t *ecx, uint32_t *edx)
@@ -1842,7 +1967,12 @@ const struct hvm_function_table * __init start_vmx(void)
         alloc_direct_apic_vector(&posted_intr_vector, 
pi_notification_interrupt);
 
         if ( iommu_intpost )
+        {
             alloc_direct_apic_vector(&pi_wakeup_vector, pi_wakeup_interrupt);
+            vmx_function_table.pi_desc_update = vmx_pi_desc_update;
+        }
+        else
+            vmx_function_table.pi_desc_update = NULL;
     }
     else
     {
diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index 88770c6..3c6b2e1 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -142,6 +142,7 @@ static inline void vcpu_runstate_change(
     struct vcpu *v, int new_state, s_time_t new_entry_time)
 {
     s_time_t delta;
+    int old_state;
 
     ASSERT(v->runstate.state != new_state);
     ASSERT(spin_is_locked(per_cpu(schedule_data,v->processor).schedule_lock));
@@ -157,7 +158,11 @@ static inline void vcpu_runstate_change(
         v->runstate.state_entry_time = new_entry_time;
     }
 
+    old_state = v->runstate.state;
     v->runstate.state = new_state;
+
+    if ( is_hvm_vcpu(v) && hvm_funcs.pi_desc_update )
+        hvm_funcs.pi_desc_update(v, old_state);
 }
 
 void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate)
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 4a7e6b3..71d228a 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -142,6 +142,8 @@ struct vcpu
 
     int              processor;
 
+    int              pre_pcpu;
+
     vcpu_info_t     *vcpu_info;
 
     struct domain   *domain;
-- 
2.1.0


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.