[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v4 11/28] x86/vvtd: Process interrupt remapping request



On Fri, Feb 09, 2018 at 05:44:17PM +0000, Roger Pau Monné wrote:
>On Fri, Nov 17, 2017 at 02:22:18PM +0800, Chao Gao wrote:
>> When a remapping interrupt request arrives, remapping hardware computes the
>> interrupt_index per the algorithm described in VTD spec
>> "Interrupt Remapping Table", interprets the IRTE and generates a remapped
>> interrupt request.
>> 
>> This patch introduces viommu_handle_irq_request() to emulate the process how
>> remapping hardware handles a remapping interrupt request. This patch
>> also introduces a counter inflight_intr, which is used to count the number
>> of interrupt are being handled. The reason why we should have this
>> counter is VT-d hardware should drain in-flight interrups before setting
>> flags to show that some operations are completed. These operations
>> include enabling interrupt remapping and performing a kind of invalidation
>> requests. In vvtd, we also try to drain in-flight interrupts by waiting
>> the inflight_intr is decreased to 0.
>> 
>> Signed-off-by: Chao Gao <chao.gao@xxxxxxxxx>
>> Signed-off-by: Lan Tianyu <tianyu.lan@xxxxxxxxx>
>> 
>> ---
>> v4:
>>  - use "#define" to define interrupt remapping transition faults
>>  rather than using an enum
>>  - use switch-case rather than if-else in irq_remapping_request_index()
>>  and vvtd_irq_request_sanity_check()
>>  - introduce a counter inflight_intr
>> 
>> v3:
>>  - Encode map_guest_page()'s error into void* to avoid using another 
>> parameter
>> ---
>>  xen/drivers/passthrough/vtd/iommu.h |  15 +++
>>  xen/drivers/passthrough/vtd/vvtd.c  | 219 
>> ++++++++++++++++++++++++++++++++++++
>>  2 files changed, 234 insertions(+)
>> 
>> diff --git a/xen/drivers/passthrough/vtd/iommu.h 
>> b/xen/drivers/passthrough/vtd/iommu.h
>> index 9c59aeb..82edd2a 100644
>> --- a/xen/drivers/passthrough/vtd/iommu.h
>> +++ b/xen/drivers/passthrough/vtd/iommu.h
>> @@ -216,6 +216,15 @@
>>  #define dma_frcd_source_id(c) (c & 0xffff)
>>  #define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */
>>  
>> +/* Interrupt remapping transition faults */
>> +#define VTD_FR_IR_REQ_RSVD      0x20
>> +#define VTD_FR_IR_INDEX_OVER    0x21
>> +#define VTD_FR_IR_ENTRY_P       0x22
>> +#define VTD_FR_IR_ROOT_INVAL    0x23
>> +#define VTD_FR_IR_IRTE_RSVD     0x24
>> +#define VTD_FR_IR_REQ_COMPAT    0x25
>> +#define VTD_FR_IR_SID_ERR       0x26
>> +
>>  /*
>>   * 0: Present
>>   * 1-11: Reserved
>> @@ -356,6 +365,12 @@ struct iremap_entry {
>>  };
>>  
>>  /*
>> + * When VT-d doesn't enable extended interrupt mode, hardware interprets
>> + * 8-bits ([15:8]) of Destination-ID field in the IRTEs.
>> + */
>> +#define IRTE_xAPIC_DEST_MASK 0xff00
>> +
>> +/*
>>   * Posted-interrupt descriptor address is 64 bits with 64-byte aligned, only
>>   * the upper 26 bits of lest significiant 32 bits is available.
>>   */
>> diff --git a/xen/drivers/passthrough/vtd/vvtd.c 
>> b/xen/drivers/passthrough/vtd/vvtd.c
>> index 06e522a..927e715 100644
>> --- a/xen/drivers/passthrough/vtd/vvtd.c
>> +++ b/xen/drivers/passthrough/vtd/vvtd.c
>> @@ -22,11 +22,15 @@
>>  #include <xen/types.h>
>>  #include <xen/viommu.h>
>>  #include <xen/xmalloc.h>
>> +#include <asm/apic.h>
>>  #include <asm/current.h>
>> +#include <asm/event.h>
>> +#include <asm/io_apic.h>
>>  #include <asm/hvm/domain.h>
>>  #include <asm/p2m.h>
>>  
>>  #include "iommu.h"
>> +#include "vtd.h"
>>  
>>  /* Supported capabilities by vvtd */
>>  #define VVTD_MAX_CAPS VIOMMU_CAP_IRQ_REMAPPING
>> @@ -52,6 +56,8 @@ struct vvtd {
>>      uint64_t base_addr;
>>      /* Point back to the owner domain */
>>      struct domain *domain;
>> +    /* # of in-flight interrupts */
>> +    atomic_t inflight_intr;
>>  
>>      struct hvm_hw_vvtd hw;
>>      void *irt_base;
>> @@ -181,6 +187,109 @@ static void unmap_guest_pages(void *va, uint32_t nr)
>>          put_page_and_type(mfn_to_page(mfn[i]));
>>  }
>>  
>> +static int vvtd_delivery(struct domain *d, uint8_t vector,
>> +                         uint32_t dest, bool dest_mode,
>> +                         uint8_t delivery_mode, uint8_t trig_mode)
>> +{
>> +    struct vlapic *target;
>> +    struct vcpu *v;
>> +
>> +    switch ( delivery_mode )
>> +    {
>> +    case dest_LowestPrio:
>> +        target = vlapic_lowest_prio(d, NULL, 0, dest, dest_mode);
>> +        if ( target != NULL )
>> +        {
>> +            vvtd_debug("d%d: dest=v%d dlm=%x vector=%d trig_mode=%d\n",
>> +                       vlapic_domain(target)->domain_id,
>> +                       vlapic_vcpu(target)->vcpu_id,
>> +                       delivery_mode, vector, trig_mode);
>> +            vlapic_set_irq(target, vector, trig_mode);
>> +            break;
>> +        }
>> +        vvtd_debug("d%d: null round robin: vector=%02x\n",
>> +                   d->domain_id, vector);
>> +        break;
>> +
>> +    case dest_Fixed:
>> +        for_each_vcpu ( d, v )
>> +            if ( vlapic_match_dest(vcpu_vlapic(v), NULL, 0, dest, 
>> dest_mode) )
>> +            {
>> +                vvtd_debug("d%d: dest=v%d dlm=%x vector=%d trig_mode=%d\n",
>> +                           v->domain->domain_id, v->vcpu_id,
>> +                           delivery_mode, vector, trig_mode);
>> +                vlapic_set_irq(vcpu_vlapic(v), vector, trig_mode);
>> +            }
>> +        break;
>> +
>> +    case dest_NMI:
>> +        for_each_vcpu ( d, v )
>> +            if ( vlapic_match_dest(vcpu_vlapic(v), NULL, 0, dest, 
>> dest_mode) &&
>> +                 !test_and_set_bool(v->nmi_pending) )
>> +                vcpu_kick(v);
>
>Doing this loops here seems quite bad from a preformance PoV,
>specially taking into account that this code is going to be used with
>> 128 vCPUs.

Maybe. But i prefer to not do optimization at this early stage.

>
>> +        break;
>> +
>> +    default:
>> +        gdprintk(XENLOG_WARNING, "Unsupported VTD delivery mode %d\n",
>> +                 delivery_mode);
>> +        return -EINVAL;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +/* Computing the IRTE index for a given interrupt request. When success, 
>> return
>> + * 0 and set index to reference the corresponding IRTE. Otherwise, return < 
>> 0,
>> + * i.e. -1 when the irq request isn't an remapping format.
>> + */
>> +static int irq_remapping_request_index(
>> +    const struct arch_irq_remapping_request *irq, uint32_t *index)
>> +{
>> +    switch ( irq->type )
>> +    {
>> +    case VIOMMU_REQUEST_IRQ_MSI:
>> +    {
>> +        struct msi_msg_remap_entry msi_msg =
>> +        {
>> +            .address_lo = { .val = irq->msg.msi.addr },
>
>Can't you just use .address_lo.val = irq->...

Will do.

>
>> +            .data = irq->msg.msi.data,
>> +        };
>> +
>> +        if ( !msi_msg.address_lo.format )
>> +            return -1;
>
>In all the other functions you already return some kind of meaningful
>error code, please do so here also.

Ok.

>
>> +
>> +        *index = (msi_msg.address_lo.index_15 << 15) +
>> +                msi_msg.address_lo.index_0_14;
>> +        if ( msi_msg.address_lo.SHV )
>> +            *index += (uint16_t)msi_msg.data;
>> +        break;
>> +    }
>> +
>> +    case VIOMMU_REQUEST_IRQ_APIC:
>> +    {
>> +        struct IO_APIC_route_remap_entry remap_rte = { .val = irq->msg.rte 
>> };
>> +
>> +        if ( !remap_rte.format )
>> +            return -1;
>> +
>> +        *index = (remap_rte.index_15 << 15) + remap_rte.index_0_14;
>> +        break;
>> +    }
>> +
>> +    default:
>> +        ASSERT_UNREACHABLE();
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static inline uint32_t irte_dest(struct vvtd *vvtd, uint32_t dest)
>> +{
>> +    /* In xAPIC mode, only 8-bits([15:8]) are valid */
>> +    return vvtd->hw.eim_enabled ? dest
>> +                                : MASK_EXTR(dest, IRTE_xAPIC_DEST_MASK);
>> +}
>> +
>>  static void write_gcmd_ire(struct vvtd *vvtd, uint32_t val)
>>  {
>>      bool set = val & DMA_GCMD_IRE;
>> @@ -323,6 +432,115 @@ static const struct hvm_mmio_ops vvtd_mmio_ops = {
>>      .write = vvtd_write
>>  };
>>  
>> +static void vvtd_handle_fault(struct vvtd *vvtd,
>> +                              const struct arch_irq_remapping_request *irq,
>> +                              struct iremap_entry *irte,
>> +                              unsigned int fault)
>> +{
>> +    switch ( fault )
>> +    {
>> +    case VTD_FR_IR_SID_ERR:
>> +    case VTD_FR_IR_IRTE_RSVD:
>> +    case VTD_FR_IR_ENTRY_P:
>> +        if ( qinval_fault_disable(*irte) )
>> +            break;
>> +    /* fall through */
>> +    case VTD_FR_IR_REQ_RSVD:
>> +    case VTD_FR_IR_INDEX_OVER:
>> +    case VTD_FR_IR_ROOT_INVAL:
>> +        /* TODO: handle fault (e.g. record and report this fault to VM */
>> +        break;
>> +
>> +    default:
>> +        vvtd_debug("d%d can't handle VT-d fault %x\n", 
>> vvtd->domain->domain_id,
>> +                   fault);
>> +    }
>> +    return;
>> +}
>> +
>> +static bool vvtd_irq_request_sanity_check(const struct vvtd *vvtd,
>> +                                   const struct arch_irq_remapping_request 
>> *irq)
>> +{
>> +    switch ( irq->type )
>> +    {
>> +    case VIOMMU_REQUEST_IRQ_APIC:
>> +    {
>> +        struct IO_APIC_route_remap_entry rte = { .val = irq->msg.rte };
>> +
>> +        return !rte.reserved;
>> +    }
>> +
>> +    case VIOMMU_REQUEST_IRQ_MSI:
>> +        return true;
>> +    }
>> +
>> +    ASSERT_UNREACHABLE();
>> +    return false;
>> +}
>> +
>> +static int vvtd_get_entry(struct vvtd *vvtd,
>> +                          const struct arch_irq_remapping_request *irq,
>> +                          struct iremap_entry *dest)
>const for both vvtd and dest?

constify vvtd is ok. 'dest' is used to store the entry corresponding to the 
request.
So 'dest' cannot be const.

>
>> +{
>> +    uint32_t entry;
>> +    struct iremap_entry irte;
>> +    int ret = irq_remapping_request_index(irq, &entry);
>> +
>> +    ASSERT(!ret);
>> +
>> +    vvtd_debug("d%d: interpret a request with index %x\n",
>> +               vvtd->domain->domain_id, entry);
>> +
>> +    if ( !vvtd_irq_request_sanity_check(vvtd, irq) )
>> +        return VTD_FR_IR_REQ_RSVD;
>> +    else if ( entry > vvtd->hw.irt_max_entry )
>> +        return VTD_FR_IR_INDEX_OVER;
>> +    else if ( !vvtd->irt_base )
>
>No need for the 'else', since you are already using return.
>
>> +        return VTD_FR_IR_ROOT_INVAL;
>> +
>> +    irte = ((struct iremap_entry*)vvtd->irt_base)[entry];
>> +
>> +    if ( !qinval_present(irte) )
>> +        ret = VTD_FR_IR_ENTRY_P;
>> +    else if ( (irte.remap.res_1 || irte.remap.res_2 || irte.remap.res_3 ||
>> +               irte.remap.res_4) )
>> +        ret = VTD_FR_IR_IRTE_RSVD;
>> +
>> +    /* FIXME: We don't check against the source ID */
>> +
>> +    dest->val = irte.val;
>> +
>> +    return ret;
>> +}
>> +
>> +static int vvtd_handle_irq_request(const struct domain *d,
>
>constifying domain here is not the best practice IMHO. In the function
>you are actually modifying vvtd, which is fine because it's a pointer
>but it's conceptually inside of domain.

Ok.

>
>> +                                   const struct arch_irq_remapping_request 
>> *irq)
>> +{
>> +    struct iremap_entry irte;
>> +    int ret;
>> +    struct vvtd *vvtd = domain_vvtd(d);
>> +
>> +    if ( !vvtd || !vvtd->hw.intremap_enabled )
>> +        return -ENODEV;
>> +
>> +    atomic_inc(&vvtd->inflight_intr);
>> +    ret = vvtd_get_entry(vvtd, irq, &irte);
>> +    if ( ret )
>> +    {
>> +        vvtd_handle_fault(vvtd, irq, &irte, ret);
>> +        goto out;
>> +    }
>> +
>> +    ret = vvtd_delivery(vvtd->domain, irte.remap.vector,
>> +                        irte_dest(vvtd, irte.remap.dst),
>> +                        irte.remap.dm, irte.remap.dlm,
>> +                        irte.remap.tm);
>> +
>> + out:
>> +    atomic_dec(&vvtd->inflight_intr);
>
>So inflight_intr seem to be quite pointless, you only use it in this
>function and it's never read AFAICT.

I will introduce this field when it is first used.

Thanks
Chao

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.