[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [for-xen-4.5 PATCH v2 2/2] dpci: Add ZOMBIE state to allow the softirq to finish with the dpci_pirq.



When we want to cancel an outstanding 'struct hvm_pirq_dpci' we perform
and cmpxch on the state to set it to zero. That is OK on the teardown
paths as it is guarnateed that the do_IRQ action handler has been removed.
Hence no more interrupts can be scheduled. But with the introduction
of "dpci: Fix list corruption if INTx device is used and an IRQ timeout is 
invoked."
we now utilize the pt_pirq_softirq_cancel when we want to cancel
outstanding operations. However once we cancel them the do_IRQ is
free to schedule them back in - even if said 'struct hvm_pirq_dpci'
is still on the dpci_list.

The code base before this patch could follow this race:

\-timer_softirq_action
        pt_irq_time_out calls pt_pirq_softirq_cancel which cmpxchg the state to 
0.
        pirq_dpci is still on dpci_list.
\- dpci_sofitrq
        while (!list_emptry(&our_list))
                list_del, but has not yet done 'entry->next = LIST_POISON1;'
[interrupt happens]
        raise_softirq checks state which is zero. Adds pirq_dpci to the 
dpci_list.
[interrupt is done, back to dpci_softirq]
        finishes the entry->next = LIST_POISON1;
                .. test STATE_SCHED returns true, so executes the 
hvm_dirq_assist.
        ends the loop, exits.

\- dpci_softirq
        while (!list_emtpry)
                list_del, but ->next already has LIST_POISON1 and we blow up.

This patch in combination adds two extra paths:

1) in raise_softirq, we do delay scheduling of dcpi_pirq until STATE_ZOMBIE is 
cleared.
2) dpci_softirq will pick up the cancelled dpci_pirq and then clear the 
STATE_ZOMBIE.

Using the example above the code-paths would be now:
\- timer_softirq_action
        pt_irq_time_out calls pt_pirq_softirq_cancel which cmpxchg the state to 
STATE_ZOMBIE.
        pirq_dpci is still on dpci_list.
\- dpci_sofitrq
        while (!list_emptry(&our_list))
                list_del, but has not yet done 'entry->next = LIST_POISON1;'
[interrupt happens]
        raise_softirq checks state, it is STATE_ZOMBIE so returns.
[interrupt is done, back to dpci_softirq]
        finishes the entry->next = LIST_POISON1;
                .. test STATE_SCHED returns true, so executes the 
hvm_dirq_assist.
        ends the loop, exits.

Reported-and-Tested-by: Sander Eikelenboom <linux@xxxxxxxxxxxxxx>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>
---
 xen/drivers/passthrough/io.c | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c
index 2039d31..1a26973 100644
--- a/xen/drivers/passthrough/io.c
+++ b/xen/drivers/passthrough/io.c
@@ -50,20 +50,26 @@ static DEFINE_PER_CPU(struct list_head, dpci_list);
 
 enum {
     STATE_SCHED,
-    STATE_RUN
+    STATE_RUN,
+    STATE_ZOMBIE
 };
 
 /*
  * This can be called multiple times, but the softirq is only raised once.
- * That is until the STATE_SCHED state has been cleared. The state can be
- * cleared by: the 'dpci_softirq' (when it has executed 'hvm_dirq_assist'),
+ * That is until the STATE_SCHED and STATE_ZOMBIE state has been cleared. The
+ * STATE_SCHED and STATE_ZOMBIE state can be cleared by the 'dpci_softirq'
  * or by 'pt_pirq_softirq_cancel' (which will try to clear the state before
- * the softirq had a chance to run).
+ * (when it has executed 'hvm_dirq_assist'). The STATE_SCHED can be cleared
+ * by 'pt_pirq_softirq_cancel' (which will try to clear the state before the
+ * softirq had a chance to run).
  */
 static void raise_softirq_for(struct hvm_pirq_dpci *pirq_dpci)
 {
     unsigned long flags;
 
+    if ( test_bit(STATE_ZOMBIE, &pirq_dpci->state) )
+        return;
+
     if ( test_and_set_bit(STATE_SCHED, &pirq_dpci->state) )
         return;
 
@@ -85,7 +91,7 @@ static void raise_softirq_for(struct hvm_pirq_dpci *pirq_dpci)
  */
 bool_t pt_pirq_softirq_active(struct hvm_pirq_dpci *pirq_dpci)
 {
-    if ( pirq_dpci->state & ((1 << STATE_RUN) | (1 << STATE_SCHED)) )
+    if ( pirq_dpci->state & ((1 << STATE_RUN) | (1 << STATE_SCHED) | (1 << 
STATE_ZOMBIE) ) )
         return 1;
 
     /*
@@ -111,7 +117,7 @@ static void pt_pirq_softirq_cancel(struct hvm_pirq_dpci 
*pirq_dpci,
 
     ASSERT(spin_is_locked(&d->event_lock));
 
-    switch ( cmpxchg(&pirq_dpci->state, 1 << STATE_SCHED, 0) )
+    switch ( cmpxchg(&pirq_dpci->state, 1 << STATE_SCHED, 1 << STATE_ZOMBIE ) )
     {
     case (1 << STATE_SCHED):
         /*
@@ -122,6 +128,7 @@ static void pt_pirq_softirq_cancel(struct hvm_pirq_dpci 
*pirq_dpci,
         /* fallthrough. */
     case (1 << STATE_RUN):
     case (1 << STATE_RUN) | (1 << STATE_SCHED):
+    case (1 << STATE_RUN) | (1 << STATE_SCHED) | (1 << STATE_ZOMBIE):
         /*
          * The reason it is OK to reset 'dom' when STATE_RUN bit is set is due
          * to a shortcut the 'dpci_softirq' implements. It stashes the 'dom'
@@ -786,6 +793,7 @@ unlock:
 static void dpci_softirq(void)
 {
     unsigned int cpu = smp_processor_id();
+    unsigned int reset = 0;
     LIST_HEAD(our_list);
 
     local_irq_disable();
@@ -812,7 +820,15 @@ static void dpci_softirq(void)
             hvm_dirq_assist(d, pirq_dpci);
             put_domain(d);
         }
+        else
+            reset = 1;
+
         clear_bit(STATE_RUN, &pirq_dpci->state);
+        if ( reset )
+        {
+            clear_bit(STATE_ZOMBIE, &pirq_dpci->state);
+            reset = 0;
+        }
     }
 }
 
-- 
1.9.3


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.