|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [RFC PATCH 08/24] ARM: GICv3: introduce separate pending_irq structs for LPIs
On Thu, 12 Jan 2017, Andre Przywara wrote:
> Hi Stefano,
>
> as just mentioned in my last reply, I missed that email last time. Sorry
> for that.
>
> Replying to the comments that still apply to the new drop ...
>
> On 28/10/16 02:04, Stefano Stabellini wrote:
> > On Wed, 28 Sep 2016, Andre Przywara wrote:
> >> For the same reason that allocating a struct irq_desc for each
> >> possible LPI is not an option, having a struct pending_irq for each LPI
> >> is also not feasible. However we actually only need those when an
> >> interrupt is on a vCPU (or is about to be injected).
> >> Maintain a list of those structs that we can use for the lifecycle of
> >> a guest LPI. We allocate new entries if necessary, however reuse
> >> pre-owned entries whenever possible.
> >> Teach the existing VGIC functions to find the right pointer when being
> >> given a virtual LPI number.
> >>
> >> Signed-off-by: Andre Przywara <andre.przywara@xxxxxxx>
> >> ---
> >> xen/arch/arm/gic.c | 3 +++
> >> xen/arch/arm/vgic-v3.c | 2 ++
> >> xen/arch/arm/vgic.c | 56
> >> ++++++++++++++++++++++++++++++++++++++++---
> >> xen/include/asm-arm/domain.h | 1 +
> >> xen/include/asm-arm/gic-its.h | 10 ++++++++
> >> xen/include/asm-arm/vgic.h | 9 +++++++
> >> 6 files changed, 78 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
> >> index 63c744a..ebe4035 100644
> >> --- a/xen/arch/arm/gic.c
> >> +++ b/xen/arch/arm/gic.c
> >> @@ -506,6 +506,9 @@ static void gic_update_one_lr(struct vcpu *v, int i)
> >> struct vcpu *v_target = vgic_get_target_vcpu(v, irq);
> >> irq_set_affinity(p->desc,
> >> cpumask_of(v_target->processor));
> >> }
> >> + /* If this was an LPI, mark this struct as available again. */
> >> + if ( p->irq >= 8192 )
> >> + p->irq = 0;
> >
> > I believe that 0 is a valid irq number, we need to come up with a
> > different invalid_irq value, and we should #define it. We could also
> > consider checking if the irq is inflight (linked to the inflight list)
> > instead of using irq == 0 to understand if it is reusable.
>
> But those pending_irqs here are used by LPIs only, where everything
> below 8192 is invalid. So that seemed like an easy and straightforward
> value to use. The other, statically allocated pending_irqs would never
> read an IRQ number above 8192. When searching for an empty pending_irq
> for a new LPI, we would never touch any of the statically allocated
> structs, so this is safe, isn't it?
I think you are right. Still, please #define it.
> >> }
> >> }
> >> }
> >> diff --git a/xen/arch/arm/vgic-v3.c b/xen/arch/arm/vgic-v3.c
> >> index ec038a3..e9b6490 100644
> >> --- a/xen/arch/arm/vgic-v3.c
> >> +++ b/xen/arch/arm/vgic-v3.c
> >> @@ -1388,6 +1388,8 @@ static int vgic_v3_vcpu_init(struct vcpu *v)
> >> if ( v->vcpu_id == last_cpu || (v->vcpu_id == (d->max_vcpus - 1)) )
> >> v->arch.vgic.flags |= VGIC_V3_RDIST_LAST;
> >>
> >> + INIT_LIST_HEAD(&v->arch.vgic.pending_lpi_list);
> >> +
> >> return 0;
> >> }
> >>
> >> diff --git a/xen/arch/arm/vgic.c b/xen/arch/arm/vgic.c
> >> index 0965119..b961551 100644
> >> --- a/xen/arch/arm/vgic.c
> >> +++ b/xen/arch/arm/vgic.c
> >> @@ -31,6 +31,8 @@
> >> #include <asm/mmio.h>
> >> #include <asm/gic.h>
> >> #include <asm/vgic.h>
> >> +#include <asm/gic_v3_defs.h>
> >> +#include <asm/gic-its.h>
> >>
> >> static inline struct vgic_irq_rank *vgic_get_rank(struct vcpu *v, int
> >> rank)
> >> {
> >> @@ -61,7 +63,7 @@ struct vgic_irq_rank *vgic_rank_irq(struct vcpu *v,
> >> unsigned int irq)
> >> return vgic_get_rank(v, rank);
> >> }
> >>
> >> -static void vgic_init_pending_irq(struct pending_irq *p, unsigned int
> >> virq)
> >> +void vgic_init_pending_irq(struct pending_irq *p, unsigned int virq)
> >> {
> >> INIT_LIST_HEAD(&p->inflight);
> >> INIT_LIST_HEAD(&p->lr_queue);
> >> @@ -244,10 +246,14 @@ struct vcpu *vgic_get_target_vcpu(struct vcpu *v,
> >> unsigned int virq)
> >>
> >> static int vgic_get_virq_priority(struct vcpu *v, unsigned int virq)
> >> {
> >> - struct vgic_irq_rank *rank = vgic_rank_irq(v, virq);
> >> + struct vgic_irq_rank *rank;
> >> unsigned long flags;
> >> int priority;
> >>
> >> + if ( virq >= 8192 )
> >
> > Please introduce a convenience static inline function such as:
> >
> > bool is_lpi(unsigned int irq)
>
> Sure.
>
> >> + return gicv3_lpi_get_priority(v->domain, virq);
> >> +
> >> + rank = vgic_rank_irq(v, virq);
> >> vgic_lock_rank(v, rank, flags);
> >> priority = rank->priority[virq & INTERRUPT_RANK_MASK];
> >> vgic_unlock_rank(v, rank, flags);
> >> @@ -446,13 +452,55 @@ int vgic_to_sgi(struct vcpu *v, register_t sgir,
> >> enum gic_sgi_mode irqmode, int
> >> return 1;
> >> }
> >>
> >> +/*
> >> + * Holding struct pending_irq's for each possible virtual LPI in each
> >> domain
> >> + * requires too much Xen memory, also a malicious guest could potentially
> >> + * spam Xen with LPI map requests. We cannot cover those with (guest
> >> allocated)
> >> + * ITS memory, so we use a dynamic scheme of allocating struct
> >> pending_irq's
> >> + * on demand.
> >> + */
> >> +struct pending_irq *lpi_to_pending(struct vcpu *v, unsigned int lpi,
> >> + bool allocate)
> >> +{
> >> + struct lpi_pending_irq *lpi_irq, *empty = NULL;
> >> +
> >> + /* TODO: locking! */
> >
> > Yeah, this needs to be fixed in v1 :-)
>
> I fixed that in the RFC v2 post.
>
> >
> >> + list_for_each_entry(lpi_irq, &v->arch.vgic.pending_lpi_list, entry)
> >> + {
> >> + if ( lpi_irq->pirq.irq == lpi )
> >> + return &lpi_irq->pirq;
> >> +
> >> + if ( lpi_irq->pirq.irq == 0 && !empty )
> >> + empty = lpi_irq;
> >> + }
> >
> > This is another one of those cases where a list is too slow for the hot
> > path. The idea of allocating pending_irq struct on demand is good, but
> > storing them in a linked list would kill performance. Probably the best
> > thing we could do is an hashtable and we should preallocate the initial
> > array of elements. I don't know what the size of the initial array
> > should be, but we can start around 50, and change it in the future once
> > we do tests with real workloads. Of course the other key parameter is
> > the hash function, not sure which one is the right one, but ideally we
> > would never have to allocate new pending_irq struct for LPIs because the
> > preallocated set would suffice.
>
> As I mentioned in the last post, I expect this number to be really low
> (less than 5).
Are you able to check this assumption in a real scenario? If not you,
somebody else?
> Let's face it: If you have multiple interrupts pending
> for a significant amount of time you won't make any actual progress in
> the guest, because it's busy with handling interrupts.
> So my picture of LPI handling is:
> 1) A device triggers an MSI, so the host receives the LPI. Ideally this
> will be handled by the pCPU where the right VCPU is running atm, so it
> will exit to EL2. Xen will handle the LPI by assigning one struct
> pending_irq to it and will inject it into the guest.
> 2) The VCPU gets to run again and calls the interrupt handler, because
> the (virtual) LPI is pending.
> 3) The (Linux) IRQ handler reads the ICC_IAR register to learn the IRQ
> number, and will get the virtual LPI number.
> => At this point the LPI is done when it comes to the VGIC. The LR state
> will be set to 0 (neither pending or active). This is independent of the
> EOI the handler will execute soon (or later).
> 4) On the next exit the VGIC code will discover that the IRQ is done
> (LR.state == 0) and will discard the struct pending_irq (set the LPI
> number to 0 to make it available to the next LPI).
I am following
> Even if there would be multiple LPIs pending at the same time (because
> the guest had interrupts disabled, for instance), I believe they can be
> all handled without exiting. Upon EOIing (priority-dropping, really) the
> first LPI, the next virtual LPI would fire, calling the interrupt
> handler again, and so no. Unless the kernel decides to do something that
> exits (even accessing the hardware normally wouldn't, I believe), we can
> clear all pending LPIs in one go.
>
> So I have a hard time to imagine how we can really have many LPIs
> pending and thus struct pending_irqs allocated.
> Note that this may differ from SPIs, for instance, because the IRQ life
> cycle is more complex there (extending till the EOI).
>
> Does that make some sense? Or am I missing something here?
In my tests with much smaller platforms than the ones existing today, I
could easily have 2-3 interrupts pending at the same time without much
load and without any SR-IOV NICs or any other fancy PCIE hardware. It
would be nice to test on Cavium ThunderX for example. It's also easy to
switch to rbtrees.
> > I could be convinced that a list is sufficient if we do some real
> > benchmarking and it turns out that lpi_to_pending always resolve in less
> > than ~5 steps.
>
> I can try to do this once I get it running on some silicon ...
>
> >> + if ( !allocate )
> >> + return NULL;
> >> +
> >> + if ( !empty )
> >> + {
> >> + empty = xzalloc(struct lpi_pending_irq);
> >> + vgic_init_pending_irq(&empty->pirq, lpi);
> >> + list_add_tail(&empty->entry, &v->arch.vgic.pending_lpi_list);
> >> + } else
> >> + {
> >> + empty->pirq.status = 0;
> >> + empty->pirq.irq = lpi;
> >> + }
> >> +
> >> + return &empty->pirq;
> >> +}
> >> +
> >> struct pending_irq *irq_to_pending(struct vcpu *v, unsigned int irq)
> >> {
> >> struct pending_irq *n;
> >> +
> >
> > spurious change
> >
> >
> >> /* Pending irqs allocation strategy: the first vgic.nr_spis irqs
> >> * are used for SPIs; the rests are used for per cpu irqs */
> >> if ( irq < 32 )
> >> n = &v->arch.vgic.pending_irqs[irq];
> >> + else if ( irq >= 8192 )
> >
> > Use the new static inline
> >
> >
> >> + n = lpi_to_pending(v, irq, true);
> >> else
> >> n = &v->domain->arch.vgic.pending_irqs[irq - 32];
> >> return n;
> >> @@ -480,7 +528,7 @@ void vgic_clear_pending_irqs(struct vcpu *v)
> >> void vgic_vcpu_inject_irq(struct vcpu *v, unsigned int virq)
> >> {
> >> uint8_t priority;
> >> - struct pending_irq *iter, *n = irq_to_pending(v, virq);
> >> + struct pending_irq *iter, *n;
> >> unsigned long flags;
> >> bool_t running;
> >>
> >> @@ -488,6 +536,8 @@ void vgic_vcpu_inject_irq(struct vcpu *v, unsigned int
> >> virq)
> >>
> >> spin_lock_irqsave(&v->arch.vgic.lock, flags);
> >>
> >> + n = irq_to_pending(v, virq);
> >
> > Why this change?
>
> Because we now need to hold the lock before calling irq_to_pending(),
> which now may call lpi_to_pending().
>
> >> /* vcpu offline */
> >> if ( test_bit(_VPF_down, &v->pause_flags) )
> >> {
> >> diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
> >> index 9452fcd..ae8a9de 100644
> >> --- a/xen/include/asm-arm/domain.h
> >> +++ b/xen/include/asm-arm/domain.h
> >> @@ -249,6 +249,7 @@ struct arch_vcpu
> >> paddr_t rdist_base;
> >> #define VGIC_V3_RDIST_LAST (1 << 0) /* last vCPU of the rdist */
> >> uint8_t flags;
> >> + struct list_head pending_lpi_list;
> >> } vgic;
> >>
> >> /* Timer registers */
> >> diff --git a/xen/include/asm-arm/gic-its.h b/xen/include/asm-arm/gic-its.h
> >> index 4e9841a..1f881c0 100644
> >> --- a/xen/include/asm-arm/gic-its.h
> >> +++ b/xen/include/asm-arm/gic-its.h
> >> @@ -136,6 +136,12 @@ int gicv3_lpi_allocate_host_lpi(struct host_its *its,
> >> int gicv3_lpi_drop_host_lpi(struct host_its *its,
> >> uint32_t devid, uint32_t eventid,
> >> uint32_t host_lpi);
> >> +
> >> +static inline int gicv3_lpi_get_priority(struct domain *d, uint32_t lpi)
> >> +{
> >> + return GIC_PRI_IRQ;
> >> +}
> >
> > Does it mean that we don't allow changes to LPI priorities?
>
> This is placeholder code for now, until we learn about the virtual
> property table in patch 11/24 (where this function gets amended).
> The new code drop gets away without this function here entirely.
>
> Cheers,
> Andre.
>
> >> #else
> >>
> >> static inline void gicv3_its_dt_init(const struct dt_device_node *node)
> >> @@ -175,6 +181,10 @@ static inline int gicv3_lpi_drop_host_lpi(struct
> >> host_its *its,
> >> {
> >> return 0;
> >> }
> >> +static inline int gicv3_lpi_get_priority(struct domain *d, uint32_t lpi)
> >> +{
> >> + return GIC_PRI_IRQ;
> >> +}
> >>
> >> #endif /* CONFIG_HAS_ITS */
> >>
> >> diff --git a/xen/include/asm-arm/vgic.h b/xen/include/asm-arm/vgic.h
> >> index 300f461..4e29ba6 100644
> >> --- a/xen/include/asm-arm/vgic.h
> >> +++ b/xen/include/asm-arm/vgic.h
> >> @@ -83,6 +83,12 @@ struct pending_irq
> >> struct list_head lr_queue;
> >> };
> >>
> >> +struct lpi_pending_irq
> >> +{
> >> + struct list_head entry;
> >> + struct pending_irq pirq;
> >> +};
> >> +
> >> #define NR_INTERRUPT_PER_RANK 32
> >> #define INTERRUPT_RANK_MASK (NR_INTERRUPT_PER_RANK - 1)
> >>
> >> @@ -296,8 +302,11 @@ extern struct vcpu *vgic_get_target_vcpu(struct vcpu
> >> *v, unsigned int virq);
> >> extern void vgic_vcpu_inject_irq(struct vcpu *v, unsigned int virq);
> >> extern void vgic_vcpu_inject_spi(struct domain *d, unsigned int virq);
> >> extern void vgic_clear_pending_irqs(struct vcpu *v);
> >> +extern void vgic_init_pending_irq(struct pending_irq *p, unsigned int
> >> virq);
> >> extern struct pending_irq *irq_to_pending(struct vcpu *v, unsigned int
> >> irq);
> >> extern struct pending_irq *spi_to_pending(struct domain *d, unsigned int
> >> irq);
> >> +extern struct pending_irq *lpi_to_pending(struct vcpu *v, unsigned int
> >> irq,
> >> + bool allocate);
> >> extern struct vgic_irq_rank *vgic_rank_offset(struct vcpu *v, int b, int
> >> n, int s);
> >> extern struct vgic_irq_rank *vgic_rank_irq(struct vcpu *v, unsigned int
> >> irq);
> >> extern int vgic_emulate(struct cpu_user_regs *regs, union hsr hsr);
> >> --
> >> 2.9.0
> >>
> >
>
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |