[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [RFC PATCH 08/24] ARM: GICv3: introduce separate pending_irq structs for LPIs



Hi Stefano,

as just mentioned in my last reply, I missed that email last time. Sorry
for that.

Replying to the comments that still apply to the new drop ...

On 28/10/16 02:04, Stefano Stabellini wrote:
> On Wed, 28 Sep 2016, Andre Przywara wrote:
>> For the same reason that allocating a struct irq_desc for each
>> possible LPI is not an option, having a struct pending_irq for each LPI
>> is also not feasible. However we actually only need those when an
>> interrupt is on a vCPU (or is about to be injected).
>> Maintain a list of those structs that we can use for the lifecycle of
>> a guest LPI. We allocate new entries if necessary, however reuse
>> pre-owned entries whenever possible.
>> Teach the existing VGIC functions to find the right pointer when being
>> given a virtual LPI number.
>>
>> Signed-off-by: Andre Przywara <andre.przywara@xxxxxxx>
>> ---
>>  xen/arch/arm/gic.c            |  3 +++
>>  xen/arch/arm/vgic-v3.c        |  2 ++
>>  xen/arch/arm/vgic.c           | 56 
>> ++++++++++++++++++++++++++++++++++++++++---
>>  xen/include/asm-arm/domain.h  |  1 +
>>  xen/include/asm-arm/gic-its.h | 10 ++++++++
>>  xen/include/asm-arm/vgic.h    |  9 +++++++
>>  6 files changed, 78 insertions(+), 3 deletions(-)
>>
>> diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
>> index 63c744a..ebe4035 100644
>> --- a/xen/arch/arm/gic.c
>> +++ b/xen/arch/arm/gic.c
>> @@ -506,6 +506,9 @@ static void gic_update_one_lr(struct vcpu *v, int i)
>>                  struct vcpu *v_target = vgic_get_target_vcpu(v, irq);
>>                  irq_set_affinity(p->desc, cpumask_of(v_target->processor));
>>              }
>> +            /* If this was an LPI, mark this struct as available again. */
>> +            if ( p->irq >= 8192 )
>> +                p->irq = 0;
> 
> I believe that 0 is a valid irq number, we need to come up with a
> different invalid_irq value, and we should #define it. We could also
> consider checking if the irq is inflight (linked to the inflight list)
> instead of using irq == 0 to understand if it is reusable.

But those pending_irqs here are used by LPIs only, where everything
below 8192 is invalid. So that seemed like an easy and straightforward
value to use. The other, statically allocated pending_irqs would never
read an IRQ number above 8192. When searching for an empty pending_irq
for a new LPI, we would never touch any of the statically allocated
structs, so this is safe, isn't it?

>>          }
>>      }
>>  }
>> diff --git a/xen/arch/arm/vgic-v3.c b/xen/arch/arm/vgic-v3.c
>> index ec038a3..e9b6490 100644
>> --- a/xen/arch/arm/vgic-v3.c
>> +++ b/xen/arch/arm/vgic-v3.c
>> @@ -1388,6 +1388,8 @@ static int vgic_v3_vcpu_init(struct vcpu *v)
>>      if ( v->vcpu_id == last_cpu || (v->vcpu_id == (d->max_vcpus - 1)) )
>>          v->arch.vgic.flags |= VGIC_V3_RDIST_LAST;
>>  
>> +    INIT_LIST_HEAD(&v->arch.vgic.pending_lpi_list);
>> +
>>      return 0;
>>  }
>>  
>> diff --git a/xen/arch/arm/vgic.c b/xen/arch/arm/vgic.c
>> index 0965119..b961551 100644
>> --- a/xen/arch/arm/vgic.c
>> +++ b/xen/arch/arm/vgic.c
>> @@ -31,6 +31,8 @@
>>  #include <asm/mmio.h>
>>  #include <asm/gic.h>
>>  #include <asm/vgic.h>
>> +#include <asm/gic_v3_defs.h>
>> +#include <asm/gic-its.h>
>>  
>>  static inline struct vgic_irq_rank *vgic_get_rank(struct vcpu *v, int rank)
>>  {
>> @@ -61,7 +63,7 @@ struct vgic_irq_rank *vgic_rank_irq(struct vcpu *v, 
>> unsigned int irq)
>>      return vgic_get_rank(v, rank);
>>  }
>>  
>> -static void vgic_init_pending_irq(struct pending_irq *p, unsigned int virq)
>> +void vgic_init_pending_irq(struct pending_irq *p, unsigned int virq)
>>  {
>>      INIT_LIST_HEAD(&p->inflight);
>>      INIT_LIST_HEAD(&p->lr_queue);
>> @@ -244,10 +246,14 @@ struct vcpu *vgic_get_target_vcpu(struct vcpu *v, 
>> unsigned int virq)
>>  
>>  static int vgic_get_virq_priority(struct vcpu *v, unsigned int virq)
>>  {
>> -    struct vgic_irq_rank *rank = vgic_rank_irq(v, virq);
>> +    struct vgic_irq_rank *rank;
>>      unsigned long flags;
>>      int priority;
>>  
>> +    if ( virq >= 8192 )
> 
> Please introduce a convenience static inline function such as:
> 
>   bool is_lpi(unsigned int irq)

Sure.

>> +        return gicv3_lpi_get_priority(v->domain, virq);
>> +
>> +    rank = vgic_rank_irq(v, virq);
>>      vgic_lock_rank(v, rank, flags);
>>      priority = rank->priority[virq & INTERRUPT_RANK_MASK];
>>      vgic_unlock_rank(v, rank, flags);
>> @@ -446,13 +452,55 @@ int vgic_to_sgi(struct vcpu *v, register_t sgir, enum 
>> gic_sgi_mode irqmode, int
>>      return 1;
>>  }
>>  
>> +/*
>> + * Holding struct pending_irq's for each possible virtual LPI in each domain
>> + * requires too much Xen memory, also a malicious guest could potentially
>> + * spam Xen with LPI map requests. We cannot cover those with (guest 
>> allocated)
>> + * ITS memory, so we use a dynamic scheme of allocating struct pending_irq's
>> + * on demand.
>> + */
>> +struct pending_irq *lpi_to_pending(struct vcpu *v, unsigned int lpi,
>> +                                   bool allocate)
>> +{
>> +    struct lpi_pending_irq *lpi_irq, *empty = NULL;
>> +
>> +    /* TODO: locking! */
> 
> Yeah, this needs to be fixed in v1 :-)

I fixed that in the RFC v2 post.

> 
>> +    list_for_each_entry(lpi_irq, &v->arch.vgic.pending_lpi_list, entry)
>> +    {
>> +        if ( lpi_irq->pirq.irq == lpi )
>> +            return &lpi_irq->pirq;
>> +
>> +        if ( lpi_irq->pirq.irq == 0 && !empty )
>> +            empty = lpi_irq;
>> +    }
> 
> This is another one of those cases where a list is too slow for the hot
> path. The idea of allocating pending_irq struct on demand is good, but
> storing them in a linked list would kill performance. Probably the best
> thing we could do is an hashtable and we should preallocate the initial
> array of elements. I don't know what the size of the initial array
> should be, but we can start around 50, and change it in the future once
> we do tests with real workloads. Of course the other key parameter is
> the hash function, not sure which one is the right one, but ideally we
> would never have to allocate new pending_irq struct for LPIs because the
> preallocated set would suffice.

As I mentioned in the last post, I expect this number to be really low
(less than 5). Let's face it: If you have multiple interrupts pending
for a significant amount of time you won't make any actual progress in
the guest, because it's busy with handling interrupts.
So my picture of LPI handling is:
1) A device triggers an MSI, so the host receives the LPI. Ideally this
will be handled by the pCPU where the right VCPU is running atm, so it
will exit to EL2. Xen will handle the LPI by assigning one struct
pending_irq to it and will inject it into the guest.
2) The VCPU gets to run again and calls the interrupt handler, because
the (virtual) LPI is pending.
3) The (Linux) IRQ handler reads the ICC_IAR register to learn the IRQ
number, and will get the virtual LPI number.
=> At this point the LPI is done when it comes to the VGIC. The LR state
will be set to 0 (neither pending or active). This is independent of the
EOI the handler will execute soon (or later).
4) On the next exit the VGIC code will discover that the IRQ is done
(LR.state == 0) and will discard the struct pending_irq (set the LPI
number to 0 to make it available to the next LPI).

Even if there would be multiple LPIs pending at the same time (because
the guest had interrupts disabled, for instance), I believe they can be
all handled without exiting. Upon EOIing (priority-dropping, really) the
first LPI, the next virtual LPI would fire, calling the interrupt
handler again, and so no. Unless the kernel decides to do something that
exits (even accessing the hardware normally wouldn't, I believe), we can
clear all pending LPIs in one go.

So I have a hard time to imagine how we can really have many LPIs
pending and thus struct pending_irqs allocated.
Note that this may differ from SPIs, for instance, because the IRQ life
cycle is more complex there (extending till the EOI).

Does that make some sense? Or am I missing something here?

> I could be convinced that a list is sufficient if we do some real
> benchmarking and it turns out that lpi_to_pending always resolve in less
> than ~5 steps.

I can try to do this once I get it running on some silicon ...

>> +    if ( !allocate )
>> +        return NULL;
>> +
>> +    if ( !empty )
>> +    {
>> +        empty = xzalloc(struct lpi_pending_irq);
>> +        vgic_init_pending_irq(&empty->pirq, lpi);
>> +        list_add_tail(&empty->entry, &v->arch.vgic.pending_lpi_list);
>> +    } else
>> +    {
>> +        empty->pirq.status = 0;
>> +        empty->pirq.irq = lpi;
>> +    }
>> +
>> +    return &empty->pirq;
>> +}
>> +
>>  struct pending_irq *irq_to_pending(struct vcpu *v, unsigned int irq)
>>  {
>>      struct pending_irq *n;
>> +
> 
> spurious change
> 
> 
>>      /* Pending irqs allocation strategy: the first vgic.nr_spis irqs
>>       * are used for SPIs; the rests are used for per cpu irqs */
>>      if ( irq < 32 )
>>          n = &v->arch.vgic.pending_irqs[irq];
>> +    else if ( irq >= 8192 )
> 
> Use the new static inline
> 
> 
>> +        n = lpi_to_pending(v, irq, true);
>>      else
>>          n = &v->domain->arch.vgic.pending_irqs[irq - 32];
>>      return n;
>> @@ -480,7 +528,7 @@ void vgic_clear_pending_irqs(struct vcpu *v)
>>  void vgic_vcpu_inject_irq(struct vcpu *v, unsigned int virq)
>>  {
>>      uint8_t priority;
>> -    struct pending_irq *iter, *n = irq_to_pending(v, virq);
>> +    struct pending_irq *iter, *n;
>>      unsigned long flags;
>>      bool_t running;
>>  
>> @@ -488,6 +536,8 @@ void vgic_vcpu_inject_irq(struct vcpu *v, unsigned int 
>> virq)
>>  
>>      spin_lock_irqsave(&v->arch.vgic.lock, flags);
>>  
>> +    n = irq_to_pending(v, virq);
> 
> Why this change?

Because we now need to hold the lock before calling irq_to_pending(),
which now may call lpi_to_pending().

>>      /* vcpu offline */
>>      if ( test_bit(_VPF_down, &v->pause_flags) )
>>      {
>> diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
>> index 9452fcd..ae8a9de 100644
>> --- a/xen/include/asm-arm/domain.h
>> +++ b/xen/include/asm-arm/domain.h
>> @@ -249,6 +249,7 @@ struct arch_vcpu
>>          paddr_t rdist_base;
>>  #define VGIC_V3_RDIST_LAST  (1 << 0)        /* last vCPU of the rdist */
>>          uint8_t flags;
>> +        struct list_head pending_lpi_list;
>>      } vgic;
>>  
>>      /* Timer registers  */
>> diff --git a/xen/include/asm-arm/gic-its.h b/xen/include/asm-arm/gic-its.h
>> index 4e9841a..1f881c0 100644
>> --- a/xen/include/asm-arm/gic-its.h
>> +++ b/xen/include/asm-arm/gic-its.h
>> @@ -136,6 +136,12 @@ int gicv3_lpi_allocate_host_lpi(struct host_its *its,
>>  int gicv3_lpi_drop_host_lpi(struct host_its *its,
>>                              uint32_t devid, uint32_t eventid,
>>                              uint32_t host_lpi);
>> +
>> +static inline int gicv3_lpi_get_priority(struct domain *d, uint32_t lpi)
>> +{
>> +    return GIC_PRI_IRQ;
>> +}
> 
> Does it mean that we don't allow changes to LPI priorities?

This is placeholder code for now, until we learn about the virtual
property table in patch 11/24 (where this function gets amended).
The new code drop gets away without this function here entirely.

Cheers,
Andre.

>>  #else
>>  
>>  static inline void gicv3_its_dt_init(const struct dt_device_node *node)
>> @@ -175,6 +181,10 @@ static inline int gicv3_lpi_drop_host_lpi(struct 
>> host_its *its,
>>  {
>>      return 0;
>>  }
>> +static inline int gicv3_lpi_get_priority(struct domain *d, uint32_t lpi)
>> +{
>> +    return GIC_PRI_IRQ;
>> +}
>>  
>>  #endif /* CONFIG_HAS_ITS */
>>  
>> diff --git a/xen/include/asm-arm/vgic.h b/xen/include/asm-arm/vgic.h
>> index 300f461..4e29ba6 100644
>> --- a/xen/include/asm-arm/vgic.h
>> +++ b/xen/include/asm-arm/vgic.h
>> @@ -83,6 +83,12 @@ struct pending_irq
>>      struct list_head lr_queue;
>>  };
>>  
>> +struct lpi_pending_irq
>> +{
>> +    struct list_head entry;
>> +    struct pending_irq pirq;
>> +};
>> +
>>  #define NR_INTERRUPT_PER_RANK   32
>>  #define INTERRUPT_RANK_MASK (NR_INTERRUPT_PER_RANK - 1)
>>  
>> @@ -296,8 +302,11 @@ extern struct vcpu *vgic_get_target_vcpu(struct vcpu 
>> *v, unsigned int virq);
>>  extern void vgic_vcpu_inject_irq(struct vcpu *v, unsigned int virq);
>>  extern void vgic_vcpu_inject_spi(struct domain *d, unsigned int virq);
>>  extern void vgic_clear_pending_irqs(struct vcpu *v);
>> +extern void vgic_init_pending_irq(struct pending_irq *p, unsigned int virq);
>>  extern struct pending_irq *irq_to_pending(struct vcpu *v, unsigned int irq);
>>  extern struct pending_irq *spi_to_pending(struct domain *d, unsigned int 
>> irq);
>> +extern struct pending_irq *lpi_to_pending(struct vcpu *v, unsigned int irq,
>> +                                          bool allocate);
>>  extern struct vgic_irq_rank *vgic_rank_offset(struct vcpu *v, int b, int n, 
>> int s);
>>  extern struct vgic_irq_rank *vgic_rank_irq(struct vcpu *v, unsigned int 
>> irq);
>>  extern int vgic_emulate(struct cpu_user_regs *regs, union hsr hsr);
>> -- 
>> 2.9.0
>>
> 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.