[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH V2 24/25] x86/vvtd: Add queued invalidation (QI) support



On Wed, Aug 09, 2017 at 04:34:25PM -0400, Lan Tianyu wrote:
> From: Chao Gao <chao.gao@xxxxxxxxx>
> 
> Queued Invalidation Interface is an expanded invalidation interface with
> extended capabilities. Hardware implementations report support for queued
> invalidation interface through the Extended Capability Register. The queued
> invalidation interface uses an Invalidation Queue (IQ), which is a circular
> buffer in system memory. Software submits commands by writing Invalidation
> Descriptors to the IQ.
> 
> In this patch, a new function viommu_process_iq() is used for emulating how
> hardware handles invalidation requests through QI.

It seems like this is an extended feature, which is not needed for
basic functionality. Would it be possible to have this series focus on
the bare-minimum functionality, leaving everything else to a separate
series?

> Signed-off-by: Chao Gao <chao.gao@xxxxxxxxx>
> Signed-off-by: Lan Tianyu <tianyu.lan@xxxxxxxxx>
> ---
>  xen/drivers/passthrough/vtd/iommu.h |  29 ++++-
>  xen/drivers/passthrough/vtd/vvtd.c  | 244 
> ++++++++++++++++++++++++++++++++++++
>  2 files changed, 272 insertions(+), 1 deletion(-)
> 
> diff --git a/xen/drivers/passthrough/vtd/iommu.h 
> b/xen/drivers/passthrough/vtd/iommu.h
> index a9e905b..eac0fbe 100644
> --- a/xen/drivers/passthrough/vtd/iommu.h
> +++ b/xen/drivers/passthrough/vtd/iommu.h
> @@ -204,6 +204,32 @@
>  #define DMA_IRTA_S(val)         (val & 0xf)
>  #define DMA_IRTA_SIZE(val)      (1UL << (DMA_IRTA_S(val) + 1))
>  
> +/* IQH_REG */
> +#define DMA_IQH_QH_SHIFT        4
> +#define DMA_IQH_QH(val)         ((val >> 4) & 0x7fffULL)

Missing parentheses around val (here and below).

> +
> +/* IQT_REG */
> +#define DMA_IQT_QT_SHIFT        4
> +#define DMA_IQT_QT(val)         ((val >> 4) & 0x7fffULL)
> +#define DMA_IQT_RSVD            0xfffffffffff80007ULL

~0x7fffULL?

> +/* IQA_REG */
> +#define DMA_MGAW                39  /* Maximum Guest Address Width */

I've got the feeling this is also in the CPUID info, in which case
shouldn't this match what's reported there?

Or is it expected to have the IOMMU report support for address width
different than the processor?

> +#define DMA_IQA_ADDR(val)       (val & ~0xfffULL)
> +#define DMA_IQA_QS(val)         (val & 0x7)
> +#define DMA_IQA_ENTRY_PER_PAGE  (1 << 8)
> +#define DMA_IQA_RSVD            (~((1ULL << DMA_MGAW) -1 ) | 0xff8ULL)

There seems to be a fair amount of magic constants here, shouldn't
those be added as defines?

> +
> +/* IECTL_REG */
> +#define DMA_IECTL_IM_BIT 31

_SHIFT.

> +#define DMA_IECTL_IM            (1 << DMA_IECTL_IM_BIT)
> +#define DMA_IECTL_IP_BIT 30
> +#define DMA_IECTL_IP (((u64)1) << DMA_IECTL_IP_BIT)
> +
> +/* ICS_REG */
> +#define DMA_ICS_IWC_BIT         0
> +#define DMA_ICS_IWC             (1 << DMA_ICS_IWC_BIT)
> +
>  /* PMEN_REG */
>  #define DMA_PMEN_EPM    (((u32)1) << 31)
>  #define DMA_PMEN_PRS    (((u32)1) << 0)
> @@ -238,7 +264,8 @@
>  #define DMA_FSTS_PPF (1U << DMA_FSTS_PPF_BIT)
>  #define DMA_FSTS_AFO (1U << 2)
>  #define DMA_FSTS_APF (1U << 3)
> -#define DMA_FSTS_IQE (1U << 4)
> +#define DMA_FSTS_IQE_BIT 4
> +#define DMA_FSTS_IQE (1U << DMA_FSTS_IQE_BIT)
>  #define DMA_FSTS_ICE (1U << 5)
>  #define DMA_FSTS_ITE (1U << 6)
>  #define DMA_FSTS_PRO_BIT 7
> diff --git a/xen/drivers/passthrough/vtd/vvtd.c 
> b/xen/drivers/passthrough/vtd/vvtd.c
> index f1e6d01..4f5e28e 100644
> --- a/xen/drivers/passthrough/vtd/vvtd.c
> +++ b/xen/drivers/passthrough/vtd/vvtd.c
> @@ -428,6 +428,185 @@ static int vvtd_record_fault(struct vvtd *vvtd,
>      return X86EMUL_OKAY;
>  }
>  
> +/*
> + * Process a invalidation descriptor. Currently, only two types descriptors,
> + * Interrupt Entry Cache Invalidation Descritor and Invalidation Wait
> + * Descriptor are handled.
> + * @vvtd: the virtual vtd instance
> + * @i: the index of the invalidation descriptor to be processed
> + *
> + * If success return 0, or return -1 when failure.
> + */
> +static int process_iqe(struct vvtd *vvtd, int i)
> +{
> +    uint64_t iqa, addr;
> +    struct qinval_entry *qinval_page;
> +    void *pg;
> +    int ret;
> +
> +    vvtd_get_reg_quad(vvtd, DMAR_IQA_REG, iqa);
> +    ret = map_guest_page(vvtd->domain, DMA_IQA_ADDR(iqa)>>PAGE_SHIFT,
> +                         (void**)&qinval_page);
> +    if ( ret )
> +    {
> +        gdprintk(XENLOG_ERR, "Can't map guest IRT (rc %d)", ret);

VVTD_DEBUG?

> +        return -1;

return ret;

> +    }
> +
> +    switch ( qinval_page[i].q.inv_wait_dsc.lo.type )
> +    {
> +    case TYPE_INVAL_WAIT:
> +        if ( qinval_page[i].q.inv_wait_dsc.lo.sw )
> +        {
> +            addr = (qinval_page[i].q.inv_wait_dsc.hi.saddr << 2);
> +            ret = map_guest_page(vvtd->domain, addr >> PAGE_SHIFT, &pg);
> +            if ( ret )
> +            {
> +                gdprintk(XENLOG_ERR, "Can't map guest memory to inform guest 
> "
> +                         "IWC completion (rc %d)", ret);
> +                goto error;
> +            }
> +            *(uint32_t *)((uint64_t)pg + (addr & ~PAGE_MASK)) =
                             ^ no need to cast AFAICT

> +                qinval_page[i].q.inv_wait_dsc.lo.sdata;
> +            unmap_guest_page(pg);

Since this is kind of a sporadic usage, maybe you could use
hvm_copy_to_guest_phys?

> +        }
> +
> +        /*
> +         * The following code generates an invalidation completion event
> +         * indicating the invalidation wait descriptor completion. Note that
> +         * the following code fragment is not tested properly.
> +         */
> +        if ( qinval_page[i].q.inv_wait_dsc.lo.iflag )
> +        {
> +            uint32_t ie_data, ie_addr;
> +            if ( !vvtd_test_and_set_bit(vvtd, DMAR_ICS_REG, DMA_ICS_IWC_BIT) 
> )
> +            {
> +                __vvtd_set_bit(vvtd, DMAR_IECTL_REG, DMA_IECTL_IP_BIT);
> +                if ( !vvtd_test_bit(vvtd, DMAR_IECTL_REG, DMA_IECTL_IM_BIT) )
> +                {
> +                    ie_data = vvtd_get_reg(vvtd, DMAR_IEDATA_REG);
> +                    ie_addr = vvtd_get_reg(vvtd, DMAR_IEADDR_REG);
> +                    vvtd_generate_interrupt(vvtd, ie_addr, ie_data);
> +                    __vvtd_clear_bit(vvtd, DMAR_IECTL_REG, DMA_IECTL_IP_BIT);
> +                }
> +            }
> +        }
> +        break;
> +
> +    case TYPE_INVAL_IEC:
> +        /*
> +         * Currently, no cache is preserved in hypervisor. Only need to 
> update
> +         * pIRTEs which are modified in binding process.
> +         */
> +        break;
> +
> +    default:
> +        goto error;
> +    }
> +
> +    unmap_guest_page((void*)qinval_page);
> +    return 0;
> +
> + error:
> +    unmap_guest_page((void*)qinval_page);
> +    gdprintk(XENLOG_ERR, "Internal error in Queue Invalidation.\n");
> +    domain_crash(vvtd->domain);

Is it really needed to crash the guest?

> +    return -1;

return ret;

> +}
> +
> +/*
> + * Invalidate all the descriptors in Invalidation Queue.
> + */
> +static void vvtd_process_iq(struct vvtd *vvtd)
> +{
> +    uint64_t iqh, iqt, iqa, max_entry, i;
> +    int ret = 0;
> +
> +    /*
> +     * No new descriptor is fetched from the Invalidation Queue until
> +     * software clears the IQE field in the Fault Status Register
> +     */
> +    if ( vvtd_test_bit(vvtd, DMAR_FSTS_REG, DMA_FSTS_IQE_BIT) )
> +        return;
> +
> +    vvtd_get_reg_quad(vvtd, DMAR_IQH_REG, iqh);
> +    vvtd_get_reg_quad(vvtd, DMAR_IQT_REG, iqt);
> +    vvtd_get_reg_quad(vvtd, DMAR_IQA_REG, iqa);
> +
> +    max_entry = DMA_IQA_ENTRY_PER_PAGE << DMA_IQA_QS(iqa);
> +    iqh = DMA_IQH_QH(iqh);
> +    iqt = DMA_IQT_QT(iqt);
> +
> +    ASSERT(iqt < max_entry);

Is the guest allowed to write to DMAR_IQT_REG? Is there a chance it
can write a value that could make the ASSERT trigger?

> +    if ( iqh == iqt )
> +        return;
> +
> +    i = iqh;
> +    while ( i != iqt )

This looks like it wants to be a for loop.

> +    {
> +        ret = process_iqe(vvtd, i);
> +        if ( ret )
> +            break;
> +        else

Unneeded else.

> +            i = (i + 1) % max_entry;
> +        vvtd_set_reg_quad(vvtd, DMAR_IQH_REG, i << DMA_IQH_QH_SHIFT);

Can't you do this at the end of the loop instead of doing it on every
iterations?

> +    }
> +
> +    /*
> +     * When IQE set, IQH references the desriptor associated with the error.
> +     */
> +    if ( ret )
> +        vvtd_report_non_recoverable_fault(vvtd, DMA_FSTS_IQE_BIT);
> +}
> +
> +static int vvtd_write_iqt(struct vvtd *vvtd, unsigned long val)
> +{
> +    uint64_t iqa;
> +
> +    if ( val & DMA_IQT_RSVD )
> +    {
> +        VVTD_DEBUG(VVTD_DBG_RW, "Attempt to set reserved bits in "
> +                   "Invalidation Queue Tail.");

Please try to not split the debug messages into separate lines, it
makes grepping for them harder.

> +        return X86EMUL_OKAY;
> +    }
> +
> +    vvtd_get_reg_quad(vvtd, DMAR_IQA_REG, iqa);
> +    if ( DMA_IQT_QT(val) >= DMA_IQA_ENTRY_PER_PAGE << DMA_IQA_QS(iqa) )
> +    {
> +        VVTD_DEBUG(VVTD_DBG_RW, "IQT: Value %lx exceeded supported max "
> +                   "index.", val);

Same here.

> +        return X86EMUL_OKAY;
> +    }
> +
> +    vvtd_set_reg_quad(vvtd, DMAR_IQT_REG, val);
> +    vvtd_process_iq(vvtd);

Newline.

> +    return X86EMUL_OKAY;
> +}
> +
> +static int vvtd_write_iqa(struct vvtd *vvtd, unsigned long val)
> +{
> +    if ( val & DMA_IQA_RSVD )
> +    {
> +        VVTD_DEBUG(VVTD_DBG_RW, "Attempt to set reserved bits in "
> +                   "Invalidation Queue Address.");
> +        return X86EMUL_OKAY;
> +    }
> +
> +    vvtd_set_reg_quad(vvtd, DMAR_IQA_REG, val);
> +    return X86EMUL_OKAY;
> +}
> +
> +static int vvtd_write_ics(struct vvtd *vvtd, uint32_t val)
> +{
> +    if ( val & DMA_ICS_IWC )
> +    {
> +        __vvtd_clear_bit(vvtd, DMAR_ICS_REG, DMA_ICS_IWC_BIT);
> +        /*When IWC field is cleared, the IP field needs to be cleared */
> +        __vvtd_clear_bit(vvtd, DMAR_IECTL_REG, DMA_IECTL_IP_BIT);
> +    }
> +    return X86EMUL_OKAY;
> +}
> +
>  static int vvtd_write_frcd3(struct vvtd *vvtd, uint32_t val)
>  {
>      /* Writing a 1 means clear fault */
> @@ -439,6 +618,29 @@ static int vvtd_write_frcd3(struct vvtd *vvtd, uint32_t 
> val)
>      return X86EMUL_OKAY;
>  }
>  
> +static int vvtd_write_iectl(struct vvtd *vvtd, uint32_t val)
> +{
> +    /*
> +     * Only DMA_IECTL_IM bit is writable. Generate pending event when unmask.
> +     */
> +    if ( !(val & DMA_IECTL_IM) )
> +    {
> +        /* Clear IM and clear IP */
> +        __vvtd_clear_bit(vvtd, DMAR_IECTL_REG, DMA_IECTL_IM_BIT);
> +        if ( vvtd_test_and_clear_bit(vvtd, DMAR_IECTL_REG, DMA_IECTL_IP_BIT) 
> )
> +        {
> +            uint32_t ie_data, ie_addr;

Newline.

> +            ie_data = vvtd_get_reg(vvtd, DMAR_IEDATA_REG);
> +            ie_addr = vvtd_get_reg(vvtd, DMAR_IEADDR_REG);
> +            vvtd_generate_interrupt(vvtd, ie_addr, ie_data);
> +        }
> +    }
> +    else
> +        __vvtd_set_bit(vvtd, DMAR_IECTL_REG, DMA_IECTL_IM_BIT);
> +
> +    return X86EMUL_OKAY;
> +}
> +
>  static int vvtd_write_fectl(struct vvtd *vvtd, uint32_t val)
>  {
>      /*
> @@ -481,6 +683,10 @@ static int vvtd_write_fsts(struct vvtd *vvtd, uint32_t 
> val)
>      if ( !((vvtd_get_reg(vvtd, DMAR_FSTS_REG) & DMA_FSTS_FAULTS)) )
>          __vvtd_clear_bit(vvtd, DMAR_FECTL_REG, DMA_FECTL_IP_BIT);
>  
> +    /* Continue to deal invalidation when IQE is clear */
> +    if ( !vvtd_test_bit(vvtd, DMAR_FSTS_REG, DMA_FSTS_IQE_BIT) )
> +        vvtd_process_iq(vvtd);
> +
>      return X86EMUL_OKAY;
>  }
>  
> @@ -639,6 +845,36 @@ static int vvtd_write(struct vcpu *v, unsigned long addr,
>              ret = vvtd_write_frcd3(vvtd, val);
>              break;
>  
> +        case DMAR_IECTL_REG:
> +            ret = vvtd_write_iectl(vvtd, val);
> +            break;
> +
> +        case DMAR_ICS_REG:
> +            ret = vvtd_write_ics(vvtd, val);
> +            break;
> +
> +        case DMAR_IQT_REG:
> +            ret = vvtd_write_iqt(vvtd, (uint32_t)val);
> +            break;
> +
> +        case DMAR_IQA_REG:
> +        {
> +            uint32_t iqa_hi;
> +
> +            iqa_hi = vvtd_get_reg(vvtd, DMAR_IQA_REG_HI);
> +            ret = vvtd_write_iqa(vvtd, (uint32_t)val | ((uint64_t)iqa_hi << 
> 32));

Line length.

> +            break;
> +        }
> +
> +        case DMAR_IQA_REG_HI:
> +        {
> +            uint32_t iqa_lo;
> +
> +            iqa_lo = vvtd_get_reg(vvtd, DMAR_IQA_REG);
> +            ret = vvtd_write_iqa(vvtd, (val << 32) | iqa_lo);
> +            break;
> +        }
> +
>          case DMAR_IEDATA_REG:
>          case DMAR_IEADDR_REG:
>          case DMAR_IEUADDR_REG:
> @@ -668,6 +904,14 @@ static int vvtd_write(struct vcpu *v, unsigned long addr,
>              ret = vvtd_write_frcd3(vvtd, val >> 32);
>              break;
>  
> +        case DMAR_IQT_REG:
> +            ret = vvtd_write_iqt(vvtd, val);
> +            break;
> +
> +        case DMAR_IQA_REG:
> +            ret = vvtd_write_iqa(vvtd, val);
> +            break;
> +
>          default:
>              ret = X86EMUL_UNHANDLEABLE;
>              break;
> -- 
> 1.8.3.1
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxx
> https://lists.xen.org/xen-devel

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.