[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v6 07/36] ARM: GICv3 ITS: introduce host LPI array



On Fri, 7 Apr 2017, Andre Przywara wrote:
> The number of LPIs on a host can be potentially huge (millions),
> although in practise will be mostly reasonable. So prematurely allocating
> an array of struct irq_desc's for each LPI is not an option.
> However Xen itself does not care about LPIs, as every LPI will be injected
> into a guest (Dom0 for now).
> Create a dense data structure (8 Bytes) for each LPI which holds just
> enough information to determine the virtual IRQ number and the VCPU into
> which the LPI needs to be injected.
> Also to not artificially limit the number of LPIs, we create a 2-level
> table for holding those structures.
> This patch introduces functions to initialize these tables and to
> create, lookup and destroy entries for a given LPI.
> By using the naturally atomic access guarantee the native uint64_t data
> type gives us, we allocate and access LPI information in a way that does
> not require a lock.
> 
> Signed-off-by: Andre Przywara <andre.przywara@xxxxxxx>

Reviewed-by: Stefano Stabellini <sstabellini@xxxxxxxxxx>


> ---
>  xen/arch/arm/gic-v3-lpi.c        | 230 
> +++++++++++++++++++++++++++++++++++++++
>  xen/include/asm-arm/gic_v3_its.h |   6 +
>  xen/include/asm-arm/irq.h        |   8 ++
>  3 files changed, 244 insertions(+)
> 
> diff --git a/xen/arch/arm/gic-v3-lpi.c b/xen/arch/arm/gic-v3-lpi.c
> index d8a4f5a..292f2d0 100644
> --- a/xen/arch/arm/gic-v3-lpi.c
> +++ b/xen/arch/arm/gic-v3-lpi.c
> @@ -20,14 +20,37 @@
>  
>  #include <xen/lib.h>
>  #include <xen/mm.h>
> +#include <xen/sched.h>
>  #include <xen/sizes.h>
>  #include <xen/warning.h>
> +#include <asm/atomic.h>
> +#include <asm/domain.h>
>  #include <asm/gic.h>
>  #include <asm/gic_v3_defs.h>
>  #include <asm/gic_v3_its.h>
>  #include <asm/io.h>
>  #include <asm/page.h>
>  
> +/*
> + * There could be a lot of LPIs on the host side, and they always go to
> + * a guest. So having a struct irq_desc for each of them would be wasteful
> + * and useless.
> + * Instead just store enough information to find the right VCPU to inject
> + * those LPIs into, which just requires the virtual LPI number.
> + * To avoid a global lock on this data structure, this is using a lockless
> + * approach relying on the architectural atomicity of native data types:
> + * We read or write the "data" view of this union atomically, then can
> + * access the broken-down fields in our local copy.
> + */
> +union host_lpi {
> +    uint64_t data;
> +    struct {
> +        uint32_t virt_lpi;
> +        uint16_t dom_id;
> +        uint16_t vcpu_id;
> +    };
> +};
> +
>  #define LPI_PROPTABLE_NEEDS_FLUSHING    (1U << 0)
>  
>  /* Global state */
> @@ -35,12 +58,23 @@ static struct {
>      /* The global LPI property table, shared by all redistributors. */
>      uint8_t *lpi_property;
>      /*
> +     * A two-level table to lookup LPIs firing on the host and look up the
> +     * VCPU and virtual LPI number to inject into.
> +     */
> +    union host_lpi **host_lpis;
> +    /*
>       * Number of physical LPIs the host supports. This is a property of
>       * the GIC hardware. We depart from the habit of naming these things
>       * "physical" in Xen, as the GICv3/4 spec uses the term "physical LPI"
>       * in a different context to differentiate them from "virtual LPIs".
>       */
>      unsigned long int max_host_lpi_ids;
> +    /*
> +     * Protects allocation and deallocation of host LPIs and next_free_lpi,
> +     * but not the actual data stored in the host_lpi entry.
> +     */
> +    spinlock_t host_lpis_lock;
> +    uint32_t next_free_lpi;
>      unsigned int flags;
>  } lpi_data;
>  
> @@ -53,6 +87,28 @@ struct lpi_redist_data {
>  static DEFINE_PER_CPU(struct lpi_redist_data, lpi_redist);
>  
>  #define MAX_NR_HOST_LPIS   (lpi_data.max_host_lpi_ids - LPI_OFFSET)
> +#define HOST_LPIS_PER_PAGE      (PAGE_SIZE / sizeof(union host_lpi))
> +
> +static union host_lpi *gic_get_host_lpi(uint32_t plpi)
> +{
> +    union host_lpi *block;
> +
> +    if ( !is_lpi(plpi) || plpi >= MAX_NR_HOST_LPIS + LPI_OFFSET )
> +        return NULL;
> +
> +    ASSERT(plpi >= LPI_OFFSET);
> +
> +    plpi -= LPI_OFFSET;
> +
> +    block = lpi_data.host_lpis[plpi / HOST_LPIS_PER_PAGE];
> +    if ( !block )
> +        return NULL;
> +
> +    /* Matches the write barrier in allocation code. */
> +    smp_rmb();
> +
> +    return &block[plpi % HOST_LPIS_PER_PAGE];
> +}
>  
>  /*
>   * An ITS can refer to redistributors in two ways: either by an ID (possibly
> @@ -220,8 +276,18 @@ int gicv3_lpi_init_rdist(void __iomem * rdist_base)
>  static unsigned int max_lpi_bits = 20;
>  integer_param("max_lpi_bits", max_lpi_bits);
>  
> +/*
> + * Allocate the 2nd level array for host LPIs. This one holds pointers
> + * to the page with the actual "union host_lpi" entries. Our LPI limit
> + * avoids excessive memory usage.
> + */
>  int gicv3_lpi_init_host_lpis(unsigned int host_lpi_bits)
>  {
> +    unsigned int nr_lpi_ptrs;
> +
> +    /* We rely on the data structure being atomically accessible. */
> +    BUILD_BUG_ON(sizeof(union host_lpi) > sizeof(unsigned long));
> +
>      /*
>       * An implementation needs to support at least 14 bits of LPI IDs.
>       * Tell the user about it, the actual number is reported below.
> @@ -240,11 +306,175 @@ int gicv3_lpi_init_host_lpis(unsigned int 
> host_lpi_bits)
>      if ( lpi_data.max_host_lpi_ids > BIT(24) )
>          warning_add("Using high number of LPIs, limit memory usage with 
> max_lpi_bits\n");
>  
> +    spin_lock_init(&lpi_data.host_lpis_lock);
> +    lpi_data.next_free_lpi = 0;
> +
> +    nr_lpi_ptrs = MAX_NR_HOST_LPIS / (PAGE_SIZE / sizeof(union host_lpi));
> +    lpi_data.host_lpis = xzalloc_array(union host_lpi *, nr_lpi_ptrs);
> +    if ( !lpi_data.host_lpis )
> +        return -ENOMEM;
> +
>      printk("GICv3: using at most %lu LPIs on the host.\n", MAX_NR_HOST_LPIS);
>  
>      return 0;
>  }
>  
> +static int find_unused_host_lpi(uint32_t start, uint32_t *index)
> +{
> +    unsigned int chunk;
> +    uint32_t i = *index;
> +
> +    ASSERT(spin_is_locked(&lpi_data.host_lpis_lock));
> +
> +    for ( chunk = start;
> +          chunk < MAX_NR_HOST_LPIS / HOST_LPIS_PER_PAGE;
> +          chunk++ )
> +    {
> +        /* If we hit an unallocated chunk, use entry 0 in that one. */
> +        if ( !lpi_data.host_lpis[chunk] )
> +        {
> +            *index = 0;
> +            return chunk;
> +        }
> +
> +        /* Find an unallocated entry in this chunk. */
> +        for ( ; i < HOST_LPIS_PER_PAGE; i += LPI_BLOCK )
> +        {
> +            if ( lpi_data.host_lpis[chunk][i].dom_id == DOMID_INVALID )
> +            {
> +                *index = i;
> +                return chunk;
> +            }
> +        }
> +        i = 0;
> +    }
> +
> +    return -1;
> +}
> +
> +/*
> + * Allocate a block of 32 LPIs on the given host ITS for device "devid",
> + * starting with "eventid". Put them into the respective ITT by issuing a
> + * MAPTI command for each of them.
> + */
> +int gicv3_allocate_host_lpi_block(struct domain *d, uint32_t *first_lpi)
> +{
> +    uint32_t lpi, lpi_idx;
> +    int chunk;
> +    int i;
> +
> +    spin_lock(&lpi_data.host_lpis_lock);
> +    lpi_idx = lpi_data.next_free_lpi % HOST_LPIS_PER_PAGE;
> +    chunk = find_unused_host_lpi(lpi_data.next_free_lpi / HOST_LPIS_PER_PAGE,
> +                                 &lpi_idx);
> +
> +    if ( chunk == - 1 )          /* rescan for a hole from the beginning */
> +    {
> +        lpi_idx = 0;
> +        chunk = find_unused_host_lpi(0, &lpi_idx);
> +        if ( chunk == -1 )
> +        {
> +            spin_unlock(&lpi_data.host_lpis_lock);
> +            return -ENOSPC;
> +        }
> +    }
> +
> +    /* If we hit an unallocated chunk, we initialize it and use entry 0. */
> +    if ( !lpi_data.host_lpis[chunk] )
> +    {
> +        union host_lpi *new_chunk;
> +
> +        /* TODO: NUMA locality for quicker IRQ path? */
> +        new_chunk = alloc_xenheap_page();
> +        if ( !new_chunk )
> +        {
> +            spin_unlock(&lpi_data.host_lpis_lock);
> +            return -ENOMEM;
> +        }
> +
> +        for ( i = 0; i < HOST_LPIS_PER_PAGE; i += LPI_BLOCK )
> +            new_chunk[i].dom_id = DOMID_INVALID;
> +
> +        /*
> +         * Make sure all slots are really marked empty before publishing the
> +         * new chunk.
> +         */
> +        smp_wmb();
> +
> +        lpi_data.host_lpis[chunk] = new_chunk;
> +        lpi_idx = 0;
> +    }
> +
> +    lpi = chunk * HOST_LPIS_PER_PAGE + lpi_idx;
> +
> +    for ( i = 0; i < LPI_BLOCK; i++ )
> +    {
> +        union host_lpi hlpi;
> +
> +        /*
> +         * Mark this host LPI as belonging to the domain, but don't assign
> +         * any virtual LPI or a VCPU yet.
> +         */
> +        hlpi.virt_lpi = INVALID_LPI;
> +        hlpi.dom_id = d->domain_id;
> +        hlpi.vcpu_id = INVALID_VCPU_ID;
> +        write_u64_atomic(&lpi_data.host_lpis[chunk][lpi_idx + i].data,
> +                         hlpi.data);
> +
> +        /*
> +         * Enable this host LPI, so we don't have to do this during the
> +         * guest's runtime.
> +         */
> +        lpi_data.lpi_property[lpi + i] |= LPI_PROP_ENABLED;
> +    }
> +
> +    lpi_data.next_free_lpi = lpi + LPI_BLOCK;
> +
> +    /*
> +     * We have allocated and initialized the host LPI entries, so it's safe
> +     * to drop the lock now. Access to the structures can be done 
> concurrently
> +     * as it involves only an atomic uint64_t access.
> +     */
> +    spin_unlock(&lpi_data.host_lpis_lock);
> +
> +    if ( lpi_data.flags & LPI_PROPTABLE_NEEDS_FLUSHING )
> +        clean_and_invalidate_dcache_va_range(&lpi_data.lpi_property[lpi],
> +                                             LPI_BLOCK);
> +
> +    *first_lpi = lpi + LPI_OFFSET;
> +
> +    return 0;
> +}
> +
> +void gicv3_free_host_lpi_block(uint32_t first_lpi)
> +{
> +    union host_lpi *hlpi, empty_lpi = { .dom_id = DOMID_INVALID };
> +    int i;
> +
> +    /* This should only be called with the beginning of a block. */
> +    ASSERT((first_lpi % LPI_BLOCK) == 0);
> +
> +    hlpi = gic_get_host_lpi(first_lpi);
> +    if ( !hlpi )
> +        return;         /* Nothing to free here. */
> +
> +    spin_lock(&lpi_data.host_lpis_lock);
> +
> +    for ( i = 0; i < LPI_BLOCK; i++ )
> +        write_u64_atomic(&hlpi[i].data, empty_lpi.data);
> +
> +    /*
> +     * Make sure the next allocation can reuse this block, as we do only
> +     * forward scanning when finding an unused block.
> +     */
> +    if ( lpi_data.next_free_lpi > first_lpi )
> +        lpi_data.next_free_lpi = first_lpi;
> +
> +    spin_unlock(&lpi_data.host_lpis_lock);
> +
> +    return;
> +}
> +
>  /*
>   * Local variables:
>   * mode: C
> diff --git a/xen/include/asm-arm/gic_v3_its.h 
> b/xen/include/asm-arm/gic_v3_its.h
> index 13794e0..a96c9dc 100644
> --- a/xen/include/asm-arm/gic_v3_its.h
> +++ b/xen/include/asm-arm/gic_v3_its.h
> @@ -103,6 +103,9 @@
>  #define HOST_ITS_FLUSH_CMD_QUEUE        (1U << 0)
>  #define HOST_ITS_USES_PTA               (1U << 1)
>  
> +/* We allocate LPIs on the hosts in chunks of 32 to reduce handling 
> overhead. */
> +#define LPI_BLOCK                       32U
> +
>  /* data structure for each hardware ITS */
>  struct host_its {
>      struct list_head entry;
> @@ -141,6 +144,9 @@ uint64_t gicv3_get_redist_address(unsigned int cpu, bool 
> use_pta);
>  /* Map a collection for this host CPU to each host ITS. */
>  int gicv3_its_setup_collection(unsigned int cpu);
>  
> +int gicv3_allocate_host_lpi_block(struct domain *d, uint32_t *first_lpi);
> +void gicv3_free_host_lpi_block(uint32_t first_lpi);
> +
>  #else
>  
>  static inline void gicv3_its_dt_init(const struct dt_device_node *node)
> diff --git a/xen/include/asm-arm/irq.h b/xen/include/asm-arm/irq.h
> index f940092..7c76626 100644
> --- a/xen/include/asm-arm/irq.h
> +++ b/xen/include/asm-arm/irq.h
> @@ -28,6 +28,9 @@ struct arch_irq_desc {
>  
>  #define LPI_OFFSET      8192
>  
> +/* LPIs are always numbered starting at 8192, so 0 is a good invalid case. */
> +#define INVALID_LPI     0
> +
>  #define nr_irqs NR_IRQS
>  #define nr_static_irqs NR_IRQS
>  #define arch_hwdom_irqs(domid) NR_IRQS
> @@ -41,6 +44,11 @@ struct irq_desc *__irq_to_desc(int irq);
>  
>  void do_IRQ(struct cpu_user_regs *regs, unsigned int irq, int is_fiq);
>  
> +static inline bool is_lpi(unsigned int irq)
> +{
> +    return irq >= LPI_OFFSET;
> +}
> +
>  #define domain_pirq_to_irq(d, pirq) (pirq)
>  
>  bool_t is_assignable_irq(unsigned int irq);
> -- 
> 2.9.0
> 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.