[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH RFC] xen: arm: context switch vtimer PPI state.



Hi Ian,

On 26/01/15 15:55, Ian Campbell wrote:
> ... instead of artificially masking the timer interrupt in the timer
> state and relying on the guest to unmask (which it isn't required to
> do per the h/w spec, although Linux does)
> 
> To do this introduce the concept of routing a PPI to the currently
> running VCPU. For such interrupts irq_get_domain returns NULL.

> Then we make use of the GICD I[SC]ACTIVER registers to save and
> restore the active state of the interrupt, which prevents the nested
> invocations which the current masking works around.
> 
> For edge triggered interrupts we also need to context switch the
> pending bit via I[SC]PENDR. Note that for level triggered interrupts
> SPENDR sets a latch which is only cleared by ICPENDR (and not by h/w
> state changes), therefore we do not want to context switch the pending
> state for level PPIs -- instead we rely on the context switch of the
> peripheral to restore the correct level.
> 
> RFC Only:
>  - Not implemented for GIC v3 yet.
>  - Lightly tested with hackbench on systems with level and edge
>    triggered vtimer PPI.
>  - Is irq_get_domain == NULL to indicate route-to-current-vcpu the
>    best idea? Any alternatives?

I have introduced an irq_guest structure in my platform device
passthrough series (https://patches.linaro.org/43012/).

Maybe you could extend it to avoid things like irq_get_domain == NULL.

> Signed-off-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
> ---
>  xen/arch/arm/gic-v2.c        |   84 
> ++++++++++++++++++++++++++++++++++++++++++
>  xen/arch/arm/gic.c           |   32 +++++++++++++---
>  xen/arch/arm/irq.c           |   48 ++++++++++++++++++++++--
>  xen/arch/arm/time.c          |   26 +------------
>  xen/arch/arm/vtimer.c        |   24 ++++++++++--
>  xen/include/asm-arm/domain.h |   11 ++++++
>  xen/include/asm-arm/gic.h    |   14 +++++++
>  xen/include/asm-arm/irq.h    |    1 +
>  8 files changed, 204 insertions(+), 36 deletions(-)
> 
> diff --git a/xen/arch/arm/gic-v2.c b/xen/arch/arm/gic-v2.c
> index 31fb81a..9074aca 100644
> --- a/xen/arch/arm/gic-v2.c
> +++ b/xen/arch/arm/gic-v2.c
> @@ -156,6 +156,45 @@ static void gicv2_save_state(struct vcpu *v)
>      writel_gich(0, GICH_HCR);
>  }
>  
> +static void gicv2_save_and_mask_hwppi(struct vcpu *v, const unsigned virq,
> +                                      struct hwppi_state *s)
> +{
> +    struct pending_irq *p = irq_to_pending(v, virq);
> +    const unsigned int offs = virq / 32;
> +    const unsigned int mask = (1u << (virq % 32));
> +    const unsigned int pendingr = readl_gicd(GICD_ISPENDR + offs*4);
> +    const unsigned int activer = readl_gicd(GICD_ISACTIVER + offs*4);
> +    const unsigned int enabler = readl_gicd(GICD_ISENABLER + offs*4);
> +    const bool is_edge = !!(p->desc->arch.type & DT_IRQ_TYPE_EDGE_BOTH);

"For each supported PPI, it is IMPLEMENTATION DEFINED whether software
can program the corresponding Int_config field."

So I would not rely on arch.type as it may not be in sync with the register.

It will be more problematic with the upcoming patch to let the guest
configure ICFGR0.

> +
> +    ASSERT(!is_idle_vcpu(v));
> +
> +    s->active = !!(activer & mask);
> +    s->enabled = !!(enabler & mask);
> +    s->pending = !!(pendingr & mask);
> +    s->inprogress = test_and_clear_bit(_IRQ_INPROGRESS, &p->desc->status);
> +
> +    /* Write a 1 to IC...R to clear the corresponding bit of state */
> +    if ( s->active )
> +        writel_gicd(mask, GICD_ICACTIVER + offs*4);
> +    /*
> +     * For an edge interrupt clear the pending state, for a level interrupt
> +     * this clears the latch there is no need since saving the peripheral 
> state
> +     * (and/or restoring the next VCPU) will cause the correct action.
> +     */
> +    if ( is_edge && s->pending )
> +        writel_gicd(mask, GICD_ICPENDR + offs*4);
> +
> +    if ( s->enabled )
> +    {
> +        writel_gicd(mask, GICD_ICENABLER + offs*4);
> +        set_bit(_IRQ_DISABLED, &p->desc->status);

I would prefer if you use gicv2_irq_disable rather than open coding.

> +    }
> +
> +    ASSERT(!(readl_gicd(GICD_ISACTIVER + offs*4) & mask));
> +    ASSERT(!(readl_gicd(GICD_ISENABLER + offs*4) & mask));
> +}
> +
>  static void gicv2_restore_state(const struct vcpu *v)
>  {
>      int i;
> @@ -168,6 +207,49 @@ static void gicv2_restore_state(const struct vcpu *v)
>      writel_gich(GICH_HCR_EN, GICH_HCR);
>  }
>  
> +static void gicv2_restore_hwppi(struct vcpu *v, const unsigned virq,
> +                                const struct hwppi_state *s)
> +{
> +    struct pending_irq *p = irq_to_pending(v, virq);
> +    const unsigned int offs = virq / 32;
> +    const unsigned int mask = (1u << (virq % 32));
> +    struct irq_desc *desc = irq_to_desc(virq);
> +    const bool is_edge = !!(desc->arch.type & DT_IRQ_TYPE_EDGE_BOTH);
> +    struct pending_irq *pending = irq_to_pending(v, virq);
> +
> +    pending->desc = desc; /* Migrate to new physical processor */
> +
> +    /*
> +     * The IRQ must always have been set inactive and masked etc by
> +     * the saving of the previous state via save_and_mask_hwppi.
> +     */
> +    ASSERT(!(readl_gicd(GICD_ISACTIVER + offs*4) & mask));
> +    ASSERT(!(readl_gicd(GICD_ISENABLER + offs*4) & mask));
> +
> +    if ( s->active )
> +        writel_gicd(mask, GICD_ICACTIVER + offs*4);
> +
> +    /*
> +     * Restore pending state for edge triggered interrupts only. For
> +     * level triggered interrupts the level will be restored as
> +     * necessary by restoring the state of the relevant peripheral.
> +     *
> +     * For a level triggered interrupt ISPENDR acts as a *latch* which
> +     * is only cleared by ICPENDR (i.e. the input level is no longer
> +     * relevant). We certainly do not want that here.
> +     */
> +    if ( is_edge && s->pending )
> +        writel_gicd(mask, GICD_ISPENDR + offs*4);
> +    if ( s->enabled )
> +    {
> +        clear_bit(_IRQ_DISABLED, &p->desc->status);
> +        dsb(sy);
> +        writel_gicd(mask, GICD_ISENABLER + offs*4);
> +    }
> +    if ( s->inprogress )
> +        set_bit(_IRQ_INPROGRESS, &p->desc->status);
> +}
> +
>  static void gicv2_dump_state(const struct vcpu *v)
>  {
>      int i;
> @@ -660,7 +742,9 @@ const static struct gic_hw_operations gicv2_ops = {
>      .info                = &gicv2_info,
>      .secondary_init      = gicv2_secondary_cpu_init,
>      .save_state          = gicv2_save_state,
> +    .save_and_mask_hwppi = gicv2_save_and_mask_hwppi,
>      .restore_state       = gicv2_restore_state,
> +    .restore_hwppi       = gicv2_restore_hwppi,
>      .dump_state          = gicv2_dump_state,
>      .gicv_setup          = gicv2v_setup,
>      .gic_host_irq_type   = &gicv2_host_irq_type,
> diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
> index 7d4ee19..7ea980d 100644
> --- a/xen/arch/arm/gic.c
> +++ b/xen/arch/arm/gic.c
> @@ -81,6 +81,12 @@ void gic_save_state(struct vcpu *v)
>      isb();
>  }
>  
> +void gic_save_and_mask_hwppi(struct vcpu *v, unsigned virq,
> +                             struct hwppi_state *s)
> +{

ASSERT(virq >= 16 && virq < 32);

> +    gic_hw_ops->save_and_mask_hwppi(v, virq, s);
> +}
> +
>  void gic_restore_state(struct vcpu *v)
>  {
>      ASSERT(!local_irq_is_enabled());
> @@ -94,6 +100,12 @@ void gic_restore_state(struct vcpu *v)
>      gic_restore_pending_irqs(v);
>  }
>  
> +void gic_restore_hwppi(struct vcpu *v, const unsigned virq,
> +                       const struct hwppi_state *s)
> +{

Ditto

> +    gic_hw_ops->restore_hwppi(v, virq, s);
> +}
> +
>  /*
>   * needs to be called with a valid cpu_mask, ie each cpu in the mask has
>   * already called gic_cpu_init
> @@ -125,11 +137,15 @@ void gic_route_irq_to_xen(struct irq_desc *desc, const 
> cpumask_t *cpu_mask,
>  
>  /* Program the GIC to route an interrupt to a guest
>   *   - desc.lock must be held
> + *   - d may be NULL, in which case interrupt *must* be a PPI and is routed 
> to
> + *     the vcpu currently running when that PPI fires. In this case the code
> + *     responsible for the related hardware must save and restore the PPI 
> with
> + *     gic_save_and_mask_hwppi/gic_restore_hwppi.
> + *   - if d is non-NULL then the interrupt *must* be an SPI.
>   */

the d == NULL sounds more an hackish way to specify this IRQ is routed
to any guest. I would prefer if you introduce a separate function and
duplicate the relevant bits.

>  void gic_route_irq_to_guest(struct domain *d, struct irq_desc *desc,
>                              const cpumask_t *cpu_mask, unsigned int priority)
>  {
> -    struct pending_irq *p;
>      ASSERT(spin_is_locked(&desc->lock));
>  
>      desc->handler = gic_hw_ops->gic_guest_irq_type;
> @@ -137,10 +153,16 @@ void gic_route_irq_to_guest(struct domain *d, struct 
> irq_desc *desc,
>  
>      gic_set_irq_properties(desc, cpumask_of(smp_processor_id()), 
> GIC_PRI_IRQ);
>  
> -    /* Use vcpu0 to retrieve the pending_irq struct. Given that we only
> -     * route SPIs to guests, it doesn't make any difference. */
> -    p = irq_to_pending(d->vcpu[0], desc->irq);
> -    p->desc = desc;
> +    if ( d )
> +    {
> +        struct pending_irq *p;
> +
> +        /* Use vcpu0 to retrieve the pending_irq struct. Given that we only
> +         * route SPIs to guests, it doesn't make any difference. */
> +        p = irq_to_pending(d->vcpu[0], desc->irq);
> +        p->desc = desc;
> +    }
> +    /* else p->desc init'd on ctxt restore in gic_restore_hwppi */
>  }
>  
>  int gic_irq_xlate(const u32 *intspec, unsigned int intsize,
> diff --git a/xen/arch/arm/irq.c b/xen/arch/arm/irq.c
> index ebdf19a..93c38ff 100644
> --- a/xen/arch/arm/irq.c
> +++ b/xen/arch/arm/irq.c
> @@ -202,6 +202,19 @@ void do_IRQ(struct cpu_user_regs *regs, unsigned int 
> irq, int is_fiq)
>          goto out;
>      }
>  
> +    if ( irq == current->arch.virt_timer.irq )

You can't compare irq with current->arch.virt_timer.irq. The later is a
physical IRQ and the former a vIRQ.

For guest, the virtual timer IRQ may be different than the platform.

> +    {
> +        ASSERT(!irq_get_domain(desc));
> +
> +        desc->handler->end(desc);
> +
> +        set_bit(_IRQ_INPROGRESS, &desc->status);
> +        desc->arch.eoi_cpu = smp_processor_id();

This will become wrong when the vCPU will be migrated.

But it looks like nobody is using desc->arch.eoi_cpu. So we should drop it.

> +
> +        vgic_vcpu_inject_irq(current, irq);
> +        goto out_no_end;
> +    }
> +
>      if ( test_bit(_IRQ_GUEST, &desc->status) )
>      {
>          struct domain *d = irq_get_domain(desc);
> @@ -346,8 +359,12 @@ int setup_irq(unsigned int irq, unsigned int irqflags, 
> struct irqaction *new)
>          struct domain *d = irq_get_domain(desc);
>  
>          spin_unlock_irqrestore(&desc->lock, flags);
> -        printk(XENLOG_ERR "ERROR: IRQ %u is already in use by the domain 
> %u\n",
> -               irq, d->domain_id);
> +        if ( d )
> +            printk(XENLOG_ERR "ERROR: IRQ %u is already in use by domain 
> %u\n",
> +                   irq, d->domain_id);
> +        else
> +            printk(XENLOG_ERR
> +                   "ERROR: IRQ %u is already in use by <current-vcpu>\n", 
> irq);
>          return -EBUSY;
>      }
>  
> @@ -378,6 +395,10 @@ err:
>      return rc;
>  }
>  
> +/*
> + * If d == NULL then IRQ is routed to current vcpu at time it is received and
> + * must be a PPI.
> + */
>  int route_irq_to_guest(struct domain *d, unsigned int irq,
>                         const char * devname)
>  {
> @@ -386,6 +407,8 @@ int route_irq_to_guest(struct domain *d, unsigned int irq,
>      unsigned long flags;
>      int retval = 0;
>  
> +    ASSERT( d || ( irq >=16 && irq < 32 ) );
> +

Please no ASSERT in this function. This will be soon expose to the guest
(see my device passthrough series).

Regards,

-- 
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.