|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [RFC PATCH] dpci: Put the dpci back on the list if running on another CPU.
> >> Additionally I think it should be considered whether the bitmap
> >> approach of interpreting ->state is the right one, and we don't
> >> instead want a clean 3-state (idle, sched, run) model.
> >
> > Could you elaborate a bit more please? As in three different unsigned int
> > (or bool_t) that set in what state we are in?
>
> An enum { STATE_IDLE, STATE_SCHED, STATE_RUN }. Especially
> if my comment above turns out to be wrong, you'd have no real
> need for the SCHED and RUN flags to be set at the same time.
I cobbled what I believe is what you were thinking off.
As you can see to preserve the existing functionality such as
being able to schedule N amount of interrupt injections
for the N interrupts we might get - I modified '->masked'
to be an atomic counter.
The end result is that we can still live-lock. Unless we:
- Drop on the floor the injection of N interrupts and
just deliever at max one per VMX_EXIT (and not bother
with interrupts arriving when we are in the VMX handler).
- Alter the softirq code slightly - to have an variant
which will only iterate once over the pending softirq
bits per call. (so save an copy of the bitmap on the
stack when entering the softirq handler - and use that.
We could also xor it against the current to catch any
non-duplicate bits being set that we should deal with).
Here is the compile, but not run-time tested patch.
From e7d8bcd7c5d32c520554a4ad69c4716246036002 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>
Date: Tue, 17 Mar 2015 13:31:52 -0400
Subject: [RFC PATCH] dpci: Switch to tristate instead of bitmap
*TODO*:
- Writeup.
- Tests
Suggested-by: Jan Beulich <jbuelich@xxxxxxxx>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>
---
xen/drivers/passthrough/io.c | 140 ++++++++++++++++++++++++-------------------
xen/include/xen/hvm/irq.h | 4 +-
2 files changed, 82 insertions(+), 62 deletions(-)
diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c
index ae050df..663e104 100644
--- a/xen/drivers/passthrough/io.c
+++ b/xen/drivers/passthrough/io.c
@@ -30,42 +30,28 @@
static DEFINE_PER_CPU(struct list_head, dpci_list);
/*
- * These two bit states help to safely schedule, deschedule, and wait until
- * the softirq has finished.
- *
- * The semantics behind these two bits is as follow:
- * - STATE_SCHED - whoever modifies it has to ref-count the domain (->dom).
- * - STATE_RUN - only softirq is allowed to set and clear it. If it has
- * been set hvm_dirq_assist will RUN with a saved value of the
- * 'struct domain' copied from 'pirq_dpci->dom' before STATE_RUN was set.
- *
- * The usual states are: STATE_SCHED(set) -> STATE_RUN(set) ->
- * STATE_SCHED(unset) -> STATE_RUN(unset).
- *
- * However the states can also diverge such as: STATE_SCHED(set) ->
- * STATE_SCHED(unset) -> STATE_RUN(set) -> STATE_RUN(unset). That means
- * the 'hvm_dirq_assist' never run and that the softirq did not do any
- * ref-counting.
- */
-
-enum {
- STATE_SCHED,
- STATE_RUN
-};
-
-/*
* This can be called multiple times, but the softirq is only raised once.
- * That is until the STATE_SCHED state has been cleared. The state can be
- * cleared by: the 'dpci_softirq' (when it has executed 'hvm_dirq_assist'),
- * or by 'pt_pirq_softirq_reset' (which will try to clear the state before
+ * That is until state is in init. The state can be changed by:
+ * the 'dpci_softirq' (when it has executed 'hvm_dirq_assist'),
+ * or by 'pt_pirq_softirq_reset' (which will try to init the state before
* the softirq had a chance to run).
*/
static void raise_softirq_for(struct hvm_pirq_dpci *pirq_dpci)
{
unsigned long flags;
- if ( test_and_set_bit(STATE_SCHED, &pirq_dpci->state) )
+ switch ( cmpxchg(&pirq_dpci->state, STATE_INIT, STATE_SCHED) )
+ {
+ case STATE_RUN:
+ case STATE_SCHED:
+ /*
+ * The pirq_dpci->mapping has been incremented to let us know
+ * how many we have left to do.
+ */
return;
+ case STATE_INIT:
+ break;
+ }
get_knownalive_domain(pirq_dpci->dom);
@@ -85,7 +71,7 @@ static void raise_softirq_for(struct hvm_pirq_dpci *pirq_dpci)
*/
bool_t pt_pirq_softirq_active(struct hvm_pirq_dpci *pirq_dpci)
{
- if ( pirq_dpci->state & ((1 << STATE_RUN) | (1 << STATE_SCHED)) )
+ if ( pirq_dpci->state != STATE_INIT )
return 1;
/*
@@ -109,22 +95,22 @@ static void pt_pirq_softirq_reset(struct hvm_pirq_dpci
*pirq_dpci)
ASSERT(spin_is_locked(&d->event_lock));
- switch ( cmpxchg(&pirq_dpci->state, 1 << STATE_SCHED, 0) )
+ switch ( cmpxchg(&pirq_dpci->state, STATE_SCHED, STATE_INIT) )
{
- case (1 << STATE_SCHED):
+ case STATE_SCHED:
/*
- * We are going to try to de-schedule the softirq before it goes in
- * STATE_RUN. Whoever clears STATE_SCHED MUST refcount the 'dom'.
+ * We are going to try to de-schedule the softirq before it goes to
+ * running state. Whoever moves from sched MUST refcount the 'dom'.
*/
put_domain(d);
/* fallthrough. */
- case (1 << STATE_RUN):
- case (1 << STATE_RUN) | (1 << STATE_SCHED):
+ case STATE_RUN:
+ case STATE_INIT:
/*
- * The reason it is OK to reset 'dom' when STATE_RUN bit is set is due
+ * The reason it is OK to reset 'dom' when init or running is set is
due
* to a shortcut the 'dpci_softirq' implements. It stashes the 'dom'
- * in local variable before it sets STATE_RUN - and therefore will not
- * dereference '->dom' which would crash.
+ * in local variable before it sets state to running - and therefore
+ * will not dereference '->dom' which would crash.
*/
pirq_dpci->dom = NULL;
break;
@@ -135,7 +121,7 @@ static void pt_pirq_softirq_reset(struct hvm_pirq_dpci
*pirq_dpci)
* or never initialized it). Note that we hold the lock that
* 'hvm_dirq_assist' could be spinning on.
*/
- pirq_dpci->masked = 0;
+ atomic_set(&pirq_dpci->masked, 0);
}
bool_t pt_irq_need_timer(uint32_t flags)
@@ -149,7 +135,8 @@ static int pt_irq_guest_eoi(struct domain *d, struct
hvm_pirq_dpci *pirq_dpci,
if ( __test_and_clear_bit(_HVM_IRQ_DPCI_EOI_LATCH_SHIFT,
&pirq_dpci->flags) )
{
- pirq_dpci->masked = 0;
+ ASSERT(atomic_read(&pirq_dpci->masked) <= 1);
+ atomic_set(&pirq_dpci->masked, 0);
pirq_dpci->pending = 0;
pirq_guest_eoi(dpci_pirq(pirq_dpci));
}
@@ -278,6 +265,8 @@ int pt_irq_create_bind(
* As such on every 'pt_irq_create_bind' call we MUST set it.
*/
pirq_dpci->dom = d;
+ atomic_set(&pirq_dpci->masked, 0);
+ pirq_dpci->state = STATE_INIT;
/* bind after hvm_irq_dpci is setup to avoid race with irq
handler*/
rc = pirq_guest_bind(d->vcpu[0], info, 0);
if ( rc == 0 && pt_irq_bind->u.msi.gtable )
@@ -398,6 +387,8 @@ int pt_irq_create_bind(
if ( pt_irq_need_timer(pirq_dpci->flags) )
init_timer(&pirq_dpci->timer, pt_irq_time_out, pirq_dpci, 0);
/* Deal with gsi for legacy devices */
+ atomic_set(&pirq_dpci->masked, 0);
+ pirq_dpci->state = STATE_INIT;
rc = pirq_guest_bind(d->vcpu[0], info, share);
if ( unlikely(rc) )
{
@@ -576,6 +567,7 @@ bool_t pt_pirq_cleanup_check(struct hvm_pirq_dpci *dpci)
if ( !dpci->flags && !pt_pirq_softirq_active(dpci) )
{
dpci->dom = NULL;
+ dpci->state = STATE_INIT;
return 1;
}
return 0;
@@ -617,7 +609,7 @@ int hvm_do_IRQ_dpci(struct domain *d, struct pirq *pirq)
!(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
return 0;
- pirq_dpci->masked = 1;
+ atomic_inc(&pirq_dpci->masked);
raise_softirq_for(pirq_dpci);
return 1;
}
@@ -672,12 +664,13 @@ void hvm_dpci_msi_eoi(struct domain *d, int vector)
spin_unlock(&d->event_lock);
}
-static void hvm_dirq_assist(struct domain *d, struct hvm_pirq_dpci *pirq_dpci)
+static bool_t hvm_dirq_assist(struct domain *d, struct hvm_pirq_dpci
*pirq_dpci)
{
+ int more;
ASSERT(d->arch.hvm_domain.irq.dpci);
spin_lock(&d->event_lock);
- if ( test_and_clear_bool(pirq_dpci->masked) )
+ while ( (more = !atomic_dec_and_test(&pirq_dpci->masked)) )
{
struct pirq *pirq = dpci_pirq(pirq_dpci);
const struct dev_intx_gsi_link *digl;
@@ -687,17 +680,13 @@ static void hvm_dirq_assist(struct domain *d, struct
hvm_pirq_dpci *pirq_dpci)
send_guest_pirq(d, pirq);
if ( pirq_dpci->flags & HVM_IRQ_DPCI_GUEST_MSI )
- {
- spin_unlock(&d->event_lock);
- return;
- }
+ break;
}
if ( pirq_dpci->flags & HVM_IRQ_DPCI_GUEST_MSI )
{
vmsi_deliver_pirq(d, pirq_dpci);
- spin_unlock(&d->event_lock);
- return;
+ break;
}
list_for_each_entry ( digl, &pirq_dpci->digl_list, list )
@@ -710,8 +699,7 @@ static void hvm_dirq_assist(struct domain *d, struct
hvm_pirq_dpci *pirq_dpci)
{
/* for translated MSI to INTx interrupt, eoi as early as possible
*/
__msi_pirq_eoi(pirq_dpci);
- spin_unlock(&d->event_lock);
- return;
+ break;
}
/*
@@ -725,6 +713,8 @@ static void hvm_dirq_assist(struct domain *d, struct
hvm_pirq_dpci *pirq_dpci)
set_timer(&pirq_dpci->timer, NOW() + PT_IRQ_TIME_OUT);
}
spin_unlock(&d->event_lock);
+
+ return more;
}
static void __hvm_dpci_eoi(struct domain *d,
@@ -781,7 +771,7 @@ unlock:
}
/*
- * Note: 'pt_pirq_softirq_reset' can clear the STATE_SCHED before we get to
+ * Note: 'pt_pirq_softirq_reset' can move the state to init before we get to
* doing it. If that is the case we let 'pt_pirq_softirq_reset' do
ref-counting.
*/
static void dpci_softirq(void)
@@ -797,23 +787,53 @@ static void dpci_softirq(void)
{
struct hvm_pirq_dpci *pirq_dpci;
struct domain *d;
+ bool_t again = 0;
pirq_dpci = list_entry(our_list.next, struct hvm_pirq_dpci,
softirq_list);
list_del(&pirq_dpci->softirq_list);
d = pirq_dpci->dom;
smp_mb(); /* 'd' MUST be saved before we set/clear the bits. */
- if ( test_and_set_bit(STATE_RUN, &pirq_dpci->state) )
- BUG();
- /*
- * The one who clears STATE_SCHED MUST refcount the domain.
- */
- if ( test_and_clear_bit(STATE_SCHED, &pirq_dpci->state) )
+
+ switch ( cmpxchg(&pirq_dpci->state, STATE_SCHED, STATE_RUN) )
+ {
+ case STATE_INIT:
+ /* pt_pirq_softirq_reset cleared it. Let it do the ref-counting.
*/
+ continue;
+ case STATE_RUN:
+ again = 1;
+ /* Fall through. */
+ case STATE_SCHED:
+ break;
+ }
+ if ( !again )
{
- hvm_dirq_assist(d, pirq_dpci);
+ /*
+ * The one who changes sched to running MUST refcount the domain.
+ */
+ again = hvm_dirq_assist(d, pirq_dpci);
put_domain(d);
+ switch ( cmpxchg(&pirq_dpci->state, STATE_RUN, STATE_INIT) )
+ {
+ case STATE_SCHED:
+ case STATE_INIT:
+ BUG();
+ case STATE_RUN:
+ break;
+ }
+ }
+ if ( again )
+ {
+ unsigned long flags;
+
+ /* Put back on the list and retry. */
+ local_irq_save(flags);
+ list_add_tail(&pirq_dpci->softirq_list, &this_cpu(dpci_list));
+ local_irq_restore(flags);
+
+ raise_softirq(HVM_DPCI_SOFTIRQ);
+ continue;
}
- clear_bit(STATE_RUN, &pirq_dpci->state);
}
}
diff --git a/xen/include/xen/hvm/irq.h b/xen/include/xen/hvm/irq.h
index 3996f1f..48dbc51 100644
--- a/xen/include/xen/hvm/irq.h
+++ b/xen/include/xen/hvm/irq.h
@@ -93,8 +93,8 @@ struct hvm_irq_dpci {
/* Machine IRQ to guest device/intx mapping. */
struct hvm_pirq_dpci {
uint32_t flags;
- unsigned int state;
- bool_t masked;
+ enum { STATE_INIT, STATE_SCHED, STATE_RUN } state;
+ atomic_t masked;
uint16_t pending;
struct list_head digl_list;
struct domain *dom;
--
2.1.0
>
> Jan
>
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |