|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v7 for-xen-4.5 1/2] dpci: Move from an hvm_irq_dpci (and struct domain) to an hvm_dirq_dpci model.
When an interrupt for an PCI (or PCIe) passthrough device
is to be sent to a guest, we find the appropiate
'hvm_dirq_dpci' structure for the interrupt (PIRQ), set
a bit, and schedule an tasklet.
Then the 'hvm_dirq_assist' tasklet gets called with the 'struct
domain' from which we it iterates over all of the 'hvm_dirq_dpci'
which are mapped to the guest. However, it is important to
note that when we setup the 'hvm_dirq_dpci' we have a field
for the 'struct domain' that we can use instead of
having to pass in the 'struct domain'.
As such this patch moves the tasklet to the 'struct hvm_dirq_dpci'
and sets the 'dom' field to the domain. We also double-check
that the '->dom' is not reset before using it.
We have to be careful with this as that means we MUST have
'dom' set before pirq_guest_bind() is called. As such
we add the 'pirq_dpci->dom = d;' to cover for all such
cases.
The mechanism to tear it down is more complex as there
are two ways it can be executed:
a) pci_clean_dpci_irq. This gets called when the guest is
being destroyed. We end up calling 'tasklet_kill'.
The scenarios in which the 'struct pirq' (and subsequently
the 'hvm_pirq_dpci') gets destroyed is when:
- guest did not use the pirq at all after setup.
- guest did use pirq, but decided to mask and left it in that
state.
- guest did use pirq, but crashed.
In all of those scenarios we end up calling 'tasklet_kill'
which will spin on the tasklet if it is running.
b) pt_irq_destroy_bind (guest disables the MSI). We double-check
that the softirq has run by piggy-backing on the existing
'pirq_cleanup_check' mechanism which calls 'pt_pirq_cleanup_check'.
We add the extra call to 'pt_pirq_softirq_active' in
'pt_pirq_cleanup_check'.
NOTE: Guests that use event channels unbind first the
event channel from PIRQs, so the 'pt_pirq_cleanup_check'
won't be called as eventch is set to zero. In that case
we either clean it up via the a) mechanism. It is OK
to re-use the tasklet when 'pt_irq_create_bind' is called
afterwards.
There is an extra scenario regardless of eventch being
set or not: the guest did 'pt_irq_destroy_bind' while an
interrupt was triggered and tasklet was scheduled (but had not
been run). It is OK to still run the tasklet as
hvm_dirq_assist won't do anything (as the flags are
set to zero). As such we can exit out of hvm_dirq_assist
without doing anything.
We also stop using in hvm_dirq_assist the pt_pirq_iterate.
When an interrupt for an PCI (or PCIe) passthrough device
is to be sent to a guest, we find the appropiate
'hvm_dirq_dpci' structure for the interrupt (PIRQ), set
a bit, and schedule an tasklet.
As we are now called from dpci_softirq with the outstanding
'struct hvm_pirq_dpci' there is no need to call pt_pirq_iterate
which will iterate over all of the PIRQs and call us with every
one that is mapped. And then _hvm_dirq_assist figuring out
which one to execute.
This is a inefficient and not fair as:
- We iterate starting at PIRQ 0 and up every time. That means
the PCIe devices that have lower PIRQs get to be called
first.
- If we have many PCIe devices passed in with many PIRQs and
most of the time only the highest numbered PIRQ gets an
interrupt - we iterate over many PIRQs.
Since we know which 'hvm_pirq_dpci' to check - as the tasklet is
called for a specific 'struct hvm_pirq_dpci' - we do that
in this patch.
Suggested-by: Jan Beulich <jbeulich@xxxxxxxx>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>
---
xen/drivers/passthrough/io.c | 76 +++++++++++++++++++++++++++++--------------
xen/drivers/passthrough/pci.c | 4 +--
xen/include/xen/hvm/irq.h | 2 +-
3 files changed, 55 insertions(+), 27 deletions(-)
diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c
index 4cd32b5..8534d63 100644
--- a/xen/drivers/passthrough/io.c
+++ b/xen/drivers/passthrough/io.c
@@ -27,7 +27,7 @@
#include <xen/hvm/irq.h>
#include <xen/tasklet.h>
-static void hvm_dirq_assist(unsigned long _d);
+static void hvm_dirq_assist(unsigned long arg);
bool_t pt_irq_need_timer(uint32_t flags)
{
@@ -114,9 +114,6 @@ int pt_irq_create_bind(
spin_unlock(&d->event_lock);
return -ENOMEM;
}
- softirq_tasklet_init(
- &hvm_irq_dpci->dirq_tasklet,
- hvm_dirq_assist, (unsigned long)d);
for ( i = 0; i < NR_HVM_IRQS; i++ )
INIT_LIST_HEAD(&hvm_irq_dpci->girq[i]);
@@ -130,6 +127,18 @@ int pt_irq_create_bind(
return -ENOMEM;
}
pirq_dpci = pirq_dpci(info);
+ /*
+ * The 'pt_irq_create_bind' can be called right after 'pt_irq_destroy_bind'
+ * was called. The 'pirq_cleanup_check' which would free the structure
+ * is only called if the event channel for the PIRQ is active. However
+ * OS-es that use event channels usually bind the PIRQ to an event channel
+ * and also unbind it before 'pt_irq_destroy_bind' is called which means
+ * we end up re-using the 'dpci' structure. This can be easily reproduced
+ * with unloading and loading the driver for the device.
+ *
+ * As such on every 'pt_irq_create_bind' call we MUST reset the values.
+ */
+ pirq_dpci->dom = d;
switch ( pt_irq_bind->irq_type )
{
@@ -156,6 +165,7 @@ int pt_irq_create_bind(
{
pirq_dpci->gmsi.gflags = 0;
pirq_dpci->gmsi.gvec = 0;
+ pirq_dpci->dom = NULL;
pirq_dpci->flags = 0;
pirq_cleanup_check(info, d);
spin_unlock(&d->event_lock);
@@ -232,7 +242,6 @@ int pt_irq_create_bind(
{
unsigned int share;
- pirq_dpci->dom = d;
if ( pt_irq_bind->irq_type == PT_IRQ_TYPE_MSI_TRANSLATE )
{
pirq_dpci->flags = HVM_IRQ_DPCI_MAPPED |
@@ -391,6 +400,7 @@ int pt_irq_destroy_bind(
msixtbl_pt_unregister(d, pirq);
if ( pt_irq_need_timer(pirq_dpci->flags) )
kill_timer(&pirq_dpci->timer);
+ /* hvm_dirq_assist can handle this. */
pirq_dpci->dom = NULL;
pirq_dpci->flags = 0;
pirq_cleanup_check(pirq, d);
@@ -415,11 +425,18 @@ void pt_pirq_init(struct domain *d, struct hvm_pirq_dpci
*dpci)
{
INIT_LIST_HEAD(&dpci->digl_list);
dpci->gmsi.dest_vcpu_id = -1;
+ softirq_tasklet_init(&dpci->tasklet, hvm_dirq_assist, (unsigned long)dpci);
}
bool_t pt_pirq_cleanup_check(struct hvm_pirq_dpci *dpci)
{
- return !dpci->flags;
+ if ( !dpci->flags )
+ {
+ tasklet_kill(&dpci->tasklet);
+ dpci->dom = NULL;
+ return 1;
+ }
+ return 0;
}
int pt_pirq_iterate(struct domain *d,
@@ -459,7 +476,7 @@ int hvm_do_IRQ_dpci(struct domain *d, struct pirq *pirq)
return 0;
pirq_dpci->masked = 1;
- tasklet_schedule(&dpci->dirq_tasklet);
+ tasklet_schedule(&pirq_dpci->tasklet);
return 1;
}
@@ -513,9 +530,27 @@ void hvm_dpci_msi_eoi(struct domain *d, int vector)
spin_unlock(&d->event_lock);
}
-static int _hvm_dirq_assist(struct domain *d, struct hvm_pirq_dpci *pirq_dpci,
- void *arg)
+static void hvm_dirq_assist(unsigned long arg)
{
+ struct hvm_pirq_dpci *pirq_dpci = (struct hvm_pirq_dpci *)arg;
+ struct domain *d = pirq_dpci->dom;
+
+ /*
+ * We can be racing with 'pt_irq_destroy_bind' - with us being scheduled
+ * right before 'pirq_guest_unbind' gets called - but us not yet executed.
+ *
+ * And '->dom' gets cleared later in the destroy path. We exit and clear
+ * 'mapping' - which is OK as later in this code we would
+ * do nothing except clear the ->masked field anyhow.
+ */
+ if ( !d )
+ {
+ pirq_dpci->masked = 0;
+ return;
+ }
+ ASSERT(d->arch.hvm_domain.irq.dpci);
+
+ spin_lock(&d->event_lock);
if ( test_and_clear_bool(pirq_dpci->masked) )
{
struct pirq *pirq = dpci_pirq(pirq_dpci);
@@ -526,13 +561,17 @@ static int _hvm_dirq_assist(struct domain *d, struct
hvm_pirq_dpci *pirq_dpci,
send_guest_pirq(d, pirq);
if ( pirq_dpci->flags & HVM_IRQ_DPCI_GUEST_MSI )
- return 0;
+ {
+ spin_unlock(&d->event_lock);
+ return;
+ }
}
if ( pirq_dpci->flags & HVM_IRQ_DPCI_GUEST_MSI )
{
vmsi_deliver_pirq(d, pirq_dpci);
- return 0;
+ spin_unlock(&d->event_lock);
+ return;
}
list_for_each_entry ( digl, &pirq_dpci->digl_list, list )
@@ -545,7 +584,8 @@ static int _hvm_dirq_assist(struct domain *d, struct
hvm_pirq_dpci *pirq_dpci,
{
/* for translated MSI to INTx interrupt, eoi as early as possible
*/
__msi_pirq_eoi(pirq_dpci);
- return 0;
+ spin_unlock(&d->event_lock);
+ return;
}
/*
@@ -558,18 +598,6 @@ static int _hvm_dirq_assist(struct domain *d, struct
hvm_pirq_dpci *pirq_dpci,
ASSERT(pt_irq_need_timer(pirq_dpci->flags));
set_timer(&pirq_dpci->timer, NOW() + PT_IRQ_TIME_OUT);
}
-
- return 0;
-}
-
-static void hvm_dirq_assist(unsigned long _d)
-{
- struct domain *d = (struct domain *)_d;
-
- ASSERT(d->arch.hvm_domain.irq.dpci);
-
- spin_lock(&d->event_lock);
- pt_pirq_iterate(d, _hvm_dirq_assist, NULL);
spin_unlock(&d->event_lock);
}
diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index 1eba833..81e8a3a 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -767,6 +767,8 @@ static int pci_clean_dpci_irq(struct domain *d,
xfree(digl);
}
+ tasklet_kill(&pirq_dpci->tasklet);
+
return 0;
}
@@ -784,8 +786,6 @@ static void pci_clean_dpci_irqs(struct domain *d)
hvm_irq_dpci = domain_get_irq_dpci(d);
if ( hvm_irq_dpci != NULL )
{
- tasklet_kill(&hvm_irq_dpci->dirq_tasklet);
-
pt_pirq_iterate(d, pci_clean_dpci_irq, NULL);
d->arch.hvm_domain.irq.dpci = NULL;
diff --git a/xen/include/xen/hvm/irq.h b/xen/include/xen/hvm/irq.h
index c89f4b1..94a550a 100644
--- a/xen/include/xen/hvm/irq.h
+++ b/xen/include/xen/hvm/irq.h
@@ -88,7 +88,6 @@ struct hvm_irq_dpci {
DECLARE_BITMAP(isairq_map, NR_ISAIRQS);
/* Record of mapped Links */
uint8_t link_cnt[NR_LINK];
- struct tasklet dirq_tasklet;
};
/* Machine IRQ to guest device/intx mapping. */
@@ -100,6 +99,7 @@ struct hvm_pirq_dpci {
struct domain *dom;
struct hvm_gmsi_info gmsi;
struct timer timer;
+ struct tasklet tasklet;
};
void pt_pirq_init(struct domain *, struct hvm_pirq_dpci *);
--
1.9.3
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |