[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [RFC PATCH 08/24] ARM: GICv3: introduce separate pending_irq structs for LPIs



On Thu, 12 Jan 2017, Andre Przywara wrote:
> Hi Stefano,
> 
> as just mentioned in my last reply, I missed that email last time. Sorry
> for that.
> 
> Replying to the comments that still apply to the new drop ...
> 
> On 28/10/16 02:04, Stefano Stabellini wrote:
> > On Wed, 28 Sep 2016, Andre Przywara wrote:
> >> For the same reason that allocating a struct irq_desc for each
> >> possible LPI is not an option, having a struct pending_irq for each LPI
> >> is also not feasible. However we actually only need those when an
> >> interrupt is on a vCPU (or is about to be injected).
> >> Maintain a list of those structs that we can use for the lifecycle of
> >> a guest LPI. We allocate new entries if necessary, however reuse
> >> pre-owned entries whenever possible.
> >> Teach the existing VGIC functions to find the right pointer when being
> >> given a virtual LPI number.
> >>
> >> Signed-off-by: Andre Przywara <andre.przywara@xxxxxxx>
> >> ---
> >>  xen/arch/arm/gic.c            |  3 +++
> >>  xen/arch/arm/vgic-v3.c        |  2 ++
> >>  xen/arch/arm/vgic.c           | 56 
> >> ++++++++++++++++++++++++++++++++++++++++---
> >>  xen/include/asm-arm/domain.h  |  1 +
> >>  xen/include/asm-arm/gic-its.h | 10 ++++++++
> >>  xen/include/asm-arm/vgic.h    |  9 +++++++
> >>  6 files changed, 78 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
> >> index 63c744a..ebe4035 100644
> >> --- a/xen/arch/arm/gic.c
> >> +++ b/xen/arch/arm/gic.c
> >> @@ -506,6 +506,9 @@ static void gic_update_one_lr(struct vcpu *v, int i)
> >>                  struct vcpu *v_target = vgic_get_target_vcpu(v, irq);
> >>                  irq_set_affinity(p->desc, 
> >> cpumask_of(v_target->processor));
> >>              }
> >> +            /* If this was an LPI, mark this struct as available again. */
> >> +            if ( p->irq >= 8192 )
> >> +                p->irq = 0;
> > 
> > I believe that 0 is a valid irq number, we need to come up with a
> > different invalid_irq value, and we should #define it. We could also
> > consider checking if the irq is inflight (linked to the inflight list)
> > instead of using irq == 0 to understand if it is reusable.
> 
> But those pending_irqs here are used by LPIs only, where everything
> below 8192 is invalid. So that seemed like an easy and straightforward
> value to use. The other, statically allocated pending_irqs would never
> read an IRQ number above 8192. When searching for an empty pending_irq
> for a new LPI, we would never touch any of the statically allocated
> structs, so this is safe, isn't it?

I think you are right. Still, please #define it.


> >>          }
> >>      }
> >>  }
> >> diff --git a/xen/arch/arm/vgic-v3.c b/xen/arch/arm/vgic-v3.c
> >> index ec038a3..e9b6490 100644
> >> --- a/xen/arch/arm/vgic-v3.c
> >> +++ b/xen/arch/arm/vgic-v3.c
> >> @@ -1388,6 +1388,8 @@ static int vgic_v3_vcpu_init(struct vcpu *v)
> >>      if ( v->vcpu_id == last_cpu || (v->vcpu_id == (d->max_vcpus - 1)) )
> >>          v->arch.vgic.flags |= VGIC_V3_RDIST_LAST;
> >>  
> >> +    INIT_LIST_HEAD(&v->arch.vgic.pending_lpi_list);
> >> +
> >>      return 0;
> >>  }
> >>  
> >> diff --git a/xen/arch/arm/vgic.c b/xen/arch/arm/vgic.c
> >> index 0965119..b961551 100644
> >> --- a/xen/arch/arm/vgic.c
> >> +++ b/xen/arch/arm/vgic.c
> >> @@ -31,6 +31,8 @@
> >>  #include <asm/mmio.h>
> >>  #include <asm/gic.h>
> >>  #include <asm/vgic.h>
> >> +#include <asm/gic_v3_defs.h>
> >> +#include <asm/gic-its.h>
> >>  
> >>  static inline struct vgic_irq_rank *vgic_get_rank(struct vcpu *v, int 
> >> rank)
> >>  {
> >> @@ -61,7 +63,7 @@ struct vgic_irq_rank *vgic_rank_irq(struct vcpu *v, 
> >> unsigned int irq)
> >>      return vgic_get_rank(v, rank);
> >>  }
> >>  
> >> -static void vgic_init_pending_irq(struct pending_irq *p, unsigned int 
> >> virq)
> >> +void vgic_init_pending_irq(struct pending_irq *p, unsigned int virq)
> >>  {
> >>      INIT_LIST_HEAD(&p->inflight);
> >>      INIT_LIST_HEAD(&p->lr_queue);
> >> @@ -244,10 +246,14 @@ struct vcpu *vgic_get_target_vcpu(struct vcpu *v, 
> >> unsigned int virq)
> >>  
> >>  static int vgic_get_virq_priority(struct vcpu *v, unsigned int virq)
> >>  {
> >> -    struct vgic_irq_rank *rank = vgic_rank_irq(v, virq);
> >> +    struct vgic_irq_rank *rank;
> >>      unsigned long flags;
> >>      int priority;
> >>  
> >> +    if ( virq >= 8192 )
> > 
> > Please introduce a convenience static inline function such as:
> > 
> >   bool is_lpi(unsigned int irq)
> 
> Sure.
> 
> >> +        return gicv3_lpi_get_priority(v->domain, virq);
> >> +
> >> +    rank = vgic_rank_irq(v, virq);
> >>      vgic_lock_rank(v, rank, flags);
> >>      priority = rank->priority[virq & INTERRUPT_RANK_MASK];
> >>      vgic_unlock_rank(v, rank, flags);
> >> @@ -446,13 +452,55 @@ int vgic_to_sgi(struct vcpu *v, register_t sgir, 
> >> enum gic_sgi_mode irqmode, int
> >>      return 1;
> >>  }
> >>  
> >> +/*
> >> + * Holding struct pending_irq's for each possible virtual LPI in each 
> >> domain
> >> + * requires too much Xen memory, also a malicious guest could potentially
> >> + * spam Xen with LPI map requests. We cannot cover those with (guest 
> >> allocated)
> >> + * ITS memory, so we use a dynamic scheme of allocating struct 
> >> pending_irq's
> >> + * on demand.
> >> + */
> >> +struct pending_irq *lpi_to_pending(struct vcpu *v, unsigned int lpi,
> >> +                                   bool allocate)
> >> +{
> >> +    struct lpi_pending_irq *lpi_irq, *empty = NULL;
> >> +
> >> +    /* TODO: locking! */
> > 
> > Yeah, this needs to be fixed in v1 :-)
> 
> I fixed that in the RFC v2 post.
> 
> > 
> >> +    list_for_each_entry(lpi_irq, &v->arch.vgic.pending_lpi_list, entry)
> >> +    {
> >> +        if ( lpi_irq->pirq.irq == lpi )
> >> +            return &lpi_irq->pirq;
> >> +
> >> +        if ( lpi_irq->pirq.irq == 0 && !empty )
> >> +            empty = lpi_irq;
> >> +    }
> > 
> > This is another one of those cases where a list is too slow for the hot
> > path. The idea of allocating pending_irq struct on demand is good, but
> > storing them in a linked list would kill performance. Probably the best
> > thing we could do is an hashtable and we should preallocate the initial
> > array of elements. I don't know what the size of the initial array
> > should be, but we can start around 50, and change it in the future once
> > we do tests with real workloads. Of course the other key parameter is
> > the hash function, not sure which one is the right one, but ideally we
> > would never have to allocate new pending_irq struct for LPIs because the
> > preallocated set would suffice.
> 
> As I mentioned in the last post, I expect this number to be really low
> (less than 5). 

Are you able to check this assumption in a real scenario? If not you,
somebody else?


> Let's face it: If you have multiple interrupts pending
> for a significant amount of time you won't make any actual progress in
> the guest, because it's busy with handling interrupts.
> So my picture of LPI handling is:
> 1) A device triggers an MSI, so the host receives the LPI. Ideally this
> will be handled by the pCPU where the right VCPU is running atm, so it
> will exit to EL2. Xen will handle the LPI by assigning one struct
> pending_irq to it and will inject it into the guest.
> 2) The VCPU gets to run again and calls the interrupt handler, because
> the (virtual) LPI is pending.
> 3) The (Linux) IRQ handler reads the ICC_IAR register to learn the IRQ
> number, and will get the virtual LPI number.
> => At this point the LPI is done when it comes to the VGIC. The LR state
> will be set to 0 (neither pending or active). This is independent of the
> EOI the handler will execute soon (or later).
> 4) On the next exit the VGIC code will discover that the IRQ is done
> (LR.state == 0) and will discard the struct pending_irq (set the LPI
> number to 0 to make it available to the next LPI).

I am following


> Even if there would be multiple LPIs pending at the same time (because
> the guest had interrupts disabled, for instance), I believe they can be
> all handled without exiting. Upon EOIing (priority-dropping, really) the
> first LPI, the next virtual LPI would fire, calling the interrupt
> handler again, and so no. Unless the kernel decides to do something that
> exits (even accessing the hardware normally wouldn't, I believe), we can
> clear all pending LPIs in one go.
> 
> So I have a hard time to imagine how we can really have many LPIs
> pending and thus struct pending_irqs allocated.
> Note that this may differ from SPIs, for instance, because the IRQ life
> cycle is more complex there (extending till the EOI).
> 
> Does that make some sense? Or am I missing something here?

In my tests with much smaller platforms than the ones existing today, I
could easily have 2-3 interrupts pending at the same time without much
load and without any SR-IOV NICs or any other fancy PCIE hardware. It
would be nice to test on Cavium ThunderX for example. It's also easy to
switch to rbtrees.


> > I could be convinced that a list is sufficient if we do some real
> > benchmarking and it turns out that lpi_to_pending always resolve in less
> > than ~5 steps.
> 
> I can try to do this once I get it running on some silicon ...
> 
> >> +    if ( !allocate )
> >> +        return NULL;
> >> +
> >> +    if ( !empty )
> >> +    {
> >> +        empty = xzalloc(struct lpi_pending_irq);
> >> +        vgic_init_pending_irq(&empty->pirq, lpi);
> >> +        list_add_tail(&empty->entry, &v->arch.vgic.pending_lpi_list);
> >> +    } else
> >> +    {
> >> +        empty->pirq.status = 0;
> >> +        empty->pirq.irq = lpi;
> >> +    }
> >> +
> >> +    return &empty->pirq;
> >> +}
> >> +
> >>  struct pending_irq *irq_to_pending(struct vcpu *v, unsigned int irq)
> >>  {
> >>      struct pending_irq *n;
> >> +
> > 
> > spurious change
> > 
> > 
> >>      /* Pending irqs allocation strategy: the first vgic.nr_spis irqs
> >>       * are used for SPIs; the rests are used for per cpu irqs */
> >>      if ( irq < 32 )
> >>          n = &v->arch.vgic.pending_irqs[irq];
> >> +    else if ( irq >= 8192 )
> > 
> > Use the new static inline
> > 
> > 
> >> +        n = lpi_to_pending(v, irq, true);
> >>      else
> >>          n = &v->domain->arch.vgic.pending_irqs[irq - 32];
> >>      return n;
> >> @@ -480,7 +528,7 @@ void vgic_clear_pending_irqs(struct vcpu *v)
> >>  void vgic_vcpu_inject_irq(struct vcpu *v, unsigned int virq)
> >>  {
> >>      uint8_t priority;
> >> -    struct pending_irq *iter, *n = irq_to_pending(v, virq);
> >> +    struct pending_irq *iter, *n;
> >>      unsigned long flags;
> >>      bool_t running;
> >>  
> >> @@ -488,6 +536,8 @@ void vgic_vcpu_inject_irq(struct vcpu *v, unsigned int 
> >> virq)
> >>  
> >>      spin_lock_irqsave(&v->arch.vgic.lock, flags);
> >>  
> >> +    n = irq_to_pending(v, virq);
> > 
> > Why this change?
> 
> Because we now need to hold the lock before calling irq_to_pending(),
> which now may call lpi_to_pending().
> 
> >>      /* vcpu offline */
> >>      if ( test_bit(_VPF_down, &v->pause_flags) )
> >>      {
> >> diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
> >> index 9452fcd..ae8a9de 100644
> >> --- a/xen/include/asm-arm/domain.h
> >> +++ b/xen/include/asm-arm/domain.h
> >> @@ -249,6 +249,7 @@ struct arch_vcpu
> >>          paddr_t rdist_base;
> >>  #define VGIC_V3_RDIST_LAST  (1 << 0)        /* last vCPU of the rdist */
> >>          uint8_t flags;
> >> +        struct list_head pending_lpi_list;
> >>      } vgic;
> >>  
> >>      /* Timer registers  */
> >> diff --git a/xen/include/asm-arm/gic-its.h b/xen/include/asm-arm/gic-its.h
> >> index 4e9841a..1f881c0 100644
> >> --- a/xen/include/asm-arm/gic-its.h
> >> +++ b/xen/include/asm-arm/gic-its.h
> >> @@ -136,6 +136,12 @@ int gicv3_lpi_allocate_host_lpi(struct host_its *its,
> >>  int gicv3_lpi_drop_host_lpi(struct host_its *its,
> >>                              uint32_t devid, uint32_t eventid,
> >>                              uint32_t host_lpi);
> >> +
> >> +static inline int gicv3_lpi_get_priority(struct domain *d, uint32_t lpi)
> >> +{
> >> +    return GIC_PRI_IRQ;
> >> +}
> > 
> > Does it mean that we don't allow changes to LPI priorities?
> 
> This is placeholder code for now, until we learn about the virtual
> property table in patch 11/24 (where this function gets amended).
> The new code drop gets away without this function here entirely.
> 
> Cheers,
> Andre.
> 
> >>  #else
> >>  
> >>  static inline void gicv3_its_dt_init(const struct dt_device_node *node)
> >> @@ -175,6 +181,10 @@ static inline int gicv3_lpi_drop_host_lpi(struct 
> >> host_its *its,
> >>  {
> >>      return 0;
> >>  }
> >> +static inline int gicv3_lpi_get_priority(struct domain *d, uint32_t lpi)
> >> +{
> >> +    return GIC_PRI_IRQ;
> >> +}
> >>  
> >>  #endif /* CONFIG_HAS_ITS */
> >>  
> >> diff --git a/xen/include/asm-arm/vgic.h b/xen/include/asm-arm/vgic.h
> >> index 300f461..4e29ba6 100644
> >> --- a/xen/include/asm-arm/vgic.h
> >> +++ b/xen/include/asm-arm/vgic.h
> >> @@ -83,6 +83,12 @@ struct pending_irq
> >>      struct list_head lr_queue;
> >>  };
> >>  
> >> +struct lpi_pending_irq
> >> +{
> >> +    struct list_head entry;
> >> +    struct pending_irq pirq;
> >> +};
> >> +
> >>  #define NR_INTERRUPT_PER_RANK   32
> >>  #define INTERRUPT_RANK_MASK (NR_INTERRUPT_PER_RANK - 1)
> >>  
> >> @@ -296,8 +302,11 @@ extern struct vcpu *vgic_get_target_vcpu(struct vcpu 
> >> *v, unsigned int virq);
> >>  extern void vgic_vcpu_inject_irq(struct vcpu *v, unsigned int virq);
> >>  extern void vgic_vcpu_inject_spi(struct domain *d, unsigned int virq);
> >>  extern void vgic_clear_pending_irqs(struct vcpu *v);
> >> +extern void vgic_init_pending_irq(struct pending_irq *p, unsigned int 
> >> virq);
> >>  extern struct pending_irq *irq_to_pending(struct vcpu *v, unsigned int 
> >> irq);
> >>  extern struct pending_irq *spi_to_pending(struct domain *d, unsigned int 
> >> irq);
> >> +extern struct pending_irq *lpi_to_pending(struct vcpu *v, unsigned int 
> >> irq,
> >> +                                          bool allocate);
> >>  extern struct vgic_irq_rank *vgic_rank_offset(struct vcpu *v, int b, int 
> >> n, int s);
> >>  extern struct vgic_irq_rank *vgic_rank_irq(struct vcpu *v, unsigned int 
> >> irq);
> >>  extern int vgic_emulate(struct cpu_user_regs *regs, union hsr hsr);
> >> -- 
> >> 2.9.0
> >>
> > 
> 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.