[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH V3 26/29] x86/vvtd: Handle interrupt translation faults



On Thu, Sep 21, 2017 at 11:02:07PM -0400, Lan Tianyu wrote:
> From: Chao Gao <chao.gao@xxxxxxxxx>
> 
> Interrupt translation faults are non-recoverable fault. When faults
> are triggered, it needs to populate fault info to Fault Recording
> Registers and inject vIOMMU msi interrupt to notify guest IOMMU driver
> to deal with faults.
> 
> This patch emulates hardware's handling interrupt translation
> faults (more information about the process can be found in VT-d spec,
> chipter "Translation Faults", section "Non-Recoverable Fault
> Reporting" and section "Non-Recoverable Logging").
> Specifically, viommu_record_fault() records the fault information and
> viommu_report_non_recoverable_fault() reports faults to software.
> Currently, only Primary Fault Logging is supported and the Number of
> Fault-recording Registers is 1.
> 
> Signed-off-by: Chao Gao <chao.gao@xxxxxxxxx>
> Signed-off-by: Lan Tianyu <tianyu.lan@xxxxxxxxx>
> ---
>  xen/drivers/passthrough/vtd/iommu.h |  60 +++++++--
>  xen/drivers/passthrough/vtd/vvtd.c  | 252 
> +++++++++++++++++++++++++++++++++++-
>  2 files changed, 301 insertions(+), 11 deletions(-)
> 
> diff --git a/xen/drivers/passthrough/vtd/iommu.h 
> b/xen/drivers/passthrough/vtd/iommu.h
> index 790384f..e19b045 100644
> --- a/xen/drivers/passthrough/vtd/iommu.h
> +++ b/xen/drivers/passthrough/vtd/iommu.h
> @@ -198,26 +198,66 @@
>  #define DMA_CCMD_CAIG_MASK(x) (((u64)x) & ((u64) 0x3 << 59))
>  
>  /* FECTL_REG */
> -#define DMA_FECTL_IM (((u64)1) << 31)
> +#define DMA_FECTL_IM_SHIFT 31
> +#define DMA_FECTL_IM (1U << DMA_FECTL_IM_SHIFT)
> +#define DMA_FECTL_IP_SHIFT 30
> +#define DMA_FECTL_IP (1U << DMA_FECTL_IP_SHIFT)

Is it fine to change those from uint64_t to unsigned int?

>  
>  /* FSTS_REG */
> -#define DMA_FSTS_PFO ((u64)1 << 0)
> -#define DMA_FSTS_PPF ((u64)1 << 1)
> -#define DMA_FSTS_AFO ((u64)1 << 2)
> -#define DMA_FSTS_APF ((u64)1 << 3)
> -#define DMA_FSTS_IQE ((u64)1 << 4)
> -#define DMA_FSTS_ICE ((u64)1 << 5)
> -#define DMA_FSTS_ITE ((u64)1 << 6)
> -#define DMA_FSTS_FAULTS    DMA_FSTS_PFO | DMA_FSTS_PPF | DMA_FSTS_AFO | 
> DMA_FSTS_APF | DMA_FSTS_IQE | DMA_FSTS_ICE | DMA_FSTS_ITE
> +#define DMA_FSTS_PFO_SHIFT 0
> +#define DMA_FSTS_PFO (1U << DMA_FSTS_PFO_SHIFT)
> +#define DMA_FSTS_PPF_SHIFT 1
> +#define DMA_FSTS_PPF (1U << DMA_FSTS_PPF_SHIFT)
> +#define DMA_FSTS_AFO (1U << 2)
> +#define DMA_FSTS_APF (1U << 3)
> +#define DMA_FSTS_IQE (1U << 4)
> +#define DMA_FSTS_ICE (1U << 5)
> +#define DMA_FSTS_ITE (1U << 6)

This seemingly non-functional changes should be done in a separate
patch.

> +#define DMA_FSTS_PRO_SHIFT 7
> +#define DMA_FSTS_PRO (1U << DMA_FSTS_PRO_SHIFT)
> +#define DMA_FSTS_FAULTS    (DMA_FSTS_PFO | DMA_FSTS_PPF | DMA_FSTS_AFO | \
> +                            DMA_FSTS_APF | DMA_FSTS_IQE | DMA_FSTS_ICE | \
> +                            DMA_FSTS_ITE | DMA_FSTS_PRO)
> +#define DMA_FSTS_RW1CS     (DMA_FSTS_PFO | DMA_FSTS_AFO | DMA_FSTS_APF | \
> +                            DMA_FSTS_IQE | DMA_FSTS_ICE | DMA_FSTS_ITE | \
> +                            DMA_FSTS_PRO)
>  #define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff)
>  
>  /* FRCD_REG, 32 bits access */
> -#define DMA_FRCD_F (((u64)1) << 31)
> +#define DMA_FRCD_LEN            0x10
> +#define DMA_FRCD2_OFFSET        0x8
> +#define DMA_FRCD3_OFFSET        0xc
> +#define DMA_FRCD_F_SHIFT        31
> +#define DMA_FRCD_F ((u64)1 << DMA_FRCD_F_SHIFT)
>  #define dma_frcd_type(d) ((d >> 30) & 1)
>  #define dma_frcd_fault_reason(c) (c & 0xff)
>  #define dma_frcd_source_id(c) (c & 0xffff)
>  #define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */
>  
> +struct vtd_fault_record_register
> +{
> +    union {
> +        struct {
> +            uint64_t lo;
> +            uint64_t hi;
> +        } bits;
> +        struct {
> +            uint64_t rsvd0          :12,
> +                     fault_info     :52;
> +            uint64_t source_id      :16,
> +                     rsvd1          :9,
> +                     pmr            :1,  /* Privilege Mode Requested */
> +                     exe            :1,  /* Execute Permission Requested */
> +                     pasid_p        :1,  /* PASID Present */
> +                     fault_reason   :8,  /* Fault Reason */
> +                     pasid_val      :20, /* PASID Value */
> +                     addr_type      :2,  /* Address Type */
> +                     type           :1,  /* Type. (0) Write (1) 
> Read/AtomicOp */
> +                     fault          :1;  /* Fault */
> +        } fields;
> +    };
> +};
> +
>  enum VTD_FAULT_TYPE
>  {
>      /* Interrupt remapping transition faults */
> diff --git a/xen/drivers/passthrough/vtd/vvtd.c 
> b/xen/drivers/passthrough/vtd/vvtd.c
> index bd1cadd..745941c 100644
> --- a/xen/drivers/passthrough/vtd/vvtd.c
> +++ b/xen/drivers/passthrough/vtd/vvtd.c
> @@ -19,6 +19,7 @@
>   */
>  
>  #include <xen/domain_page.h>
> +#include <xen/lib.h>
>  #include <xen/sched.h>
>  #include <xen/types.h>
>  #include <xen/viommu.h>
> @@ -41,6 +42,7 @@ unsigned int vvtd_caps = VIOMMU_CAP_IRQ_REMAPPING;
>  struct hvm_hw_vvtd_status {
>      uint32_t eim_enabled : 1,
>               intremap_enabled : 1;
> +    uint32_t fault_index;
>      uint32_t irt_max_entry;
>      /* Interrupt remapping table base gfn */
>      uint64_t irt;
> @@ -86,6 +88,22 @@ struct vvtd *domain_vvtd(struct domain *d)
>      return (d->viommu) ? d->viommu->priv : NULL;
>  }
>  
> +static inline int vvtd_test_and_set_bit(struct vvtd *vvtd, uint32_t reg, int 
> nr)
> +{
> +    return test_and_set_bit(nr, &vvtd->regs->data32[reg/sizeof(uint32_t)]);
> +}
> +
> +static inline int vvtd_test_and_clear_bit(struct vvtd *vvtd, uint32_t reg,
> +                                          int nr)
> +{
> +    return test_and_clear_bit(nr, &vvtd->regs->data32[reg/sizeof(uint32_t)]);
> +}
> +
> +static inline int vvtd_test_bit(struct vvtd *vvtd, uint32_t reg, int nr)
> +{
> +    return test_bit(nr, &vvtd->regs->data32[reg/sizeof(uint32_t)]);
> +}
> +
>  static inline void vvtd_set_bit(struct vvtd *vvtd, uint32_t reg, int nr)
>  {
>      __set_bit(nr, &vvtd->regs->data32[reg/sizeof(uint32_t)]);

You seem to use a mix of __ (locked) and non-locked bitopts, as said
before, please get your locking straight, and use the bitops that you
need accordingly.

> @@ -206,6 +224,23 @@ static int vvtd_delivery(struct domain *d, uint8_t 
> vector,
>      return 0;
>  }
>  
> +void vvtd_generate_interrupt(const struct vvtd *vvtd, uint32_t addr,
> +                             uint32_t data)
> +{
> +    uint8_t dest, dm, dlm, tm, vector;
> +
> +    vvtd_debug("Sending interrupt %x %x to d%d",
> +               addr, data, vvtd->domain->domain_id);
> +
> +    dest = MASK_EXTR(addr, MSI_ADDR_DEST_ID_MASK);
> +    dm = !!(addr & MSI_ADDR_DESTMODE_MASK);

dm wants to be bool instead.

> +    dlm = MASK_EXTR(data, MSI_DATA_DELIVERY_MODE_MASK);
> +    tm = MASK_EXTR(data, MSI_DATA_TRIGGER_MASK);
> +    vector = data & MSI_DATA_VECTOR_MASK;

Please use MASK_EXTR.

You can also initialize all of them at declaration.

> +
> +    vvtd_delivery(vvtd->domain, vector, dest, dm, dlm, tm);
> +}
> +
>  static uint32_t irq_remapping_request_index(
>      const struct arch_irq_remapping_request *irq)
>  {
> @@ -243,6 +278,207 @@ static inline uint32_t irte_dest(struct vvtd *vvtd, 
> uint32_t dest)
>             MASK_EXTR(dest, IRTE_xAPIC_DEST_MASK);
>  }
>  
> +static void vvtd_report_non_recoverable_fault(struct vvtd *vvtd, int reason)
> +{
> +    uint32_t fsts;
> +
> +    fsts = vvtd_get_reg(vvtd, DMAR_FSTS_REG);

Initialize at declaration time.

> +    vvtd_set_bit(vvtd, DMAR_FSTS_REG, reason);
> +
> +    /*
> +     * Accoroding to VT-d spec "Non-Recoverable Fault Event" chapter, if
> +     * there are any previously reported interrupt conditions that are yet to
> +     * be sevices by software, the Fault Event interrrupt is not generated.
> +     */
> +    if ( fsts & DMA_FSTS_FAULTS )
> +        return;
> +
> +    vvtd_set_bit(vvtd, DMAR_FECTL_REG, DMA_FECTL_IP_SHIFT);
> +    if ( !vvtd_test_bit(vvtd, DMAR_FECTL_REG, DMA_FECTL_IM_SHIFT) )
> +    {
> +        uint32_t fe_data, fe_addr;

Missing newline.

> +        fe_data = vvtd_get_reg(vvtd, DMAR_FEDATA_REG);
> +        fe_addr = vvtd_get_reg(vvtd, DMAR_FEADDR_REG);

Initialize at declaration.

> +        vvtd_generate_interrupt(vvtd, fe_addr, fe_data);
> +        vvtd_clear_bit(vvtd, DMAR_FECTL_REG, DMA_FECTL_IP_SHIFT);
> +    }
> +}
> +
> +static void vvtd_update_ppf(struct vvtd *vvtd)
> +{
> +    int i;
> +    uint64_t cap = vvtd_get_reg(vvtd, DMAR_CAP_REG);

This returns a uint32_t. But I see cap is used only once, so there's
no point in having a local variable for it.

> +    unsigned int base = cap_fault_reg_offset(cap);
> +
> +    for ( i = 0; i < cap_num_fault_regs(cap); i++ )
> +    {
> +        if ( vvtd_test_bit(vvtd, base + i * DMA_FRCD_LEN + DMA_FRCD3_OFFSET,
> +                           DMA_FRCD_F_SHIFT) )
> +        {
> +            vvtd_report_non_recoverable_fault(vvtd, DMA_FSTS_PPF_SHIFT);
> +            return;
> +        }
> +    }
> +    /*
> +     * No Primary Fault is in Fault Record Registers, thus clear PPF bit in
> +     * FSTS.
> +     */
> +    vvtd_clear_bit(vvtd, DMAR_FSTS_REG, DMA_FSTS_PPF_SHIFT);
> +
> +    /* If no fault is in FSTS, clear pending bit in FECTL. */
> +    if ( !(vvtd_get_reg(vvtd, DMAR_FSTS_REG) & DMA_FSTS_FAULTS) )
> +        vvtd_clear_bit(vvtd, DMAR_FECTL_REG, DMA_FECTL_IP_SHIFT);
> +}
> +
> +/*
> + * Commit a fault to emulated Fault Record Registers.
> + */
> +static void vvtd_commit_frcd(struct vvtd *vvtd, int idx,
> +                             struct vtd_fault_record_register *frcd)

frcd wants to be const.

> +{
> +    uint64_t cap = vvtd_get_reg(vvtd, DMAR_CAP_REG);
> +    unsigned int base = cap_fault_reg_offset(cap);

Same here.

> +
> +    vvtd_set_reg_quad(vvtd, base + idx * DMA_FRCD_LEN, frcd->bits.lo);
> +    vvtd_set_reg_quad(vvtd, base + idx * DMA_FRCD_LEN + 8, frcd->bits.hi);
> +    vvtd_update_ppf(vvtd);
> +}
> +
> +/*
> + * Allocate a FRCD for the caller. If success, return the FRI. Or, return -1
> + * when failure.
> + */
> +static int vvtd_alloc_frcd(struct vvtd *vvtd)
> +{
> +    int prev;
> +    uint64_t cap = vvtd_get_reg(vvtd, DMAR_CAP_REG);
> +    unsigned int base = cap_fault_reg_offset(cap);
> +
> +    /* Set the F bit to indicate the FRCD is in use. */
> +    if ( !vvtd_test_and_set_bit(vvtd,
> +                                base + vvtd->status.fault_index * 
> DMA_FRCD_LEN +
> +                                DMA_FRCD3_OFFSET, DMA_FRCD_F_SHIFT) )
> +    {
> +        prev = vvtd->status.fault_index;
> +        vvtd->status.fault_index = (prev + 1) % cap_num_fault_regs(cap);
> +        return vvtd->status.fault_index;

I would prefer that you return the index as an unsigned int parameter
passed by reference rather than as the return value of the function,
but that might not be the preference of others.

> +    }
> +    return -ENOMEM;
> +}
> +
> +static void vvtd_free_frcd(struct vvtd *vvtd, int i)
> +{
> +    uint64_t cap = vvtd_get_reg(vvtd, DMAR_CAP_REG);
> +    unsigned int base = cap_fault_reg_offset(cap);
> +
> +    vvtd_clear_bit(vvtd, base + i * DMA_FRCD_LEN + DMA_FRCD3_OFFSET,
> +                   DMA_FRCD_F_SHIFT);
> +}
> +
> +static int vvtd_record_fault(struct vvtd *vvtd,
> +                             struct arch_irq_remapping_request *request,
> +                             int reason)
> +{
> +    struct vtd_fault_record_register frcd;
> +    int fault_index;
> +
> +    switch(reason)
> +    {
> +    case VTD_FR_IR_REQ_RSVD:
> +    case VTD_FR_IR_INDEX_OVER:
> +    case VTD_FR_IR_ENTRY_P:
> +    case VTD_FR_IR_ROOT_INVAL:
> +    case VTD_FR_IR_IRTE_RSVD:
> +    case VTD_FR_IR_REQ_COMPAT:
> +    case VTD_FR_IR_SID_ERR:
> +        if ( vvtd_test_bit(vvtd, DMAR_FSTS_REG, DMA_FSTS_PFO_SHIFT) )
> +            return X86EMUL_OKAY;
> +
> +        /* No available Fault Record means Fault overflowed */
> +        fault_index = vvtd_alloc_frcd(vvtd);
> +        if ( fault_index == -1 )

Erm, wouldn't vvtd_alloc_frcd return -ENOMEM in case of error? Ie: you
should check if ( fault_index < 0 ).

> +        {
> +            vvtd_report_non_recoverable_fault(vvtd, DMA_FSTS_PFO_SHIFT);
> +            return X86EMUL_OKAY;
> +        }
> +        memset(&frcd, 0, sizeof(frcd));
> +        frcd.fields.fault_reason = (uint8_t)reason;
> +        frcd.fields.fault_info = 
> ((uint64_t)irq_remapping_request_index(request)) << 36;

This line is clearly too long.

> +        frcd.fields.source_id = (uint16_t)request->source_id;

Why do you need the casting for reason and source_id?

> +        frcd.fields.fault = 1;
> +        vvtd_commit_frcd(vvtd, fault_index, &frcd);
> +        return X86EMUL_OKAY;
> +
> +    default:
> +        ASSERT_UNREACHABLE();
> +        break;
> +    }
> +
> +    gdprintk(XENLOG_ERR, "Can't handle vVTD Fault (reason 0x%x).", reason);
> +    domain_crash(vvtd->domain);
> +    return X86EMUL_OKAY;
> +}
> +
> +static int vvtd_write_frcd3(struct vvtd *vvtd, uint32_t val)
> +{
> +    /* Writing a 1 means clear fault */
> +    if ( val & DMA_FRCD_F )
> +    {
> +        vvtd_free_frcd(vvtd, 0);
> +        vvtd_update_ppf(vvtd);
> +    }
> +    return X86EMUL_OKAY;
> +}
> +
> +static int vvtd_write_fectl(struct vvtd *vvtd, uint32_t val)
> +{
> +    /*
> +     * Only DMA_FECTL_IM bit is writable. Generate pending event when unmask.
> +     */
> +    if ( !(val & DMA_FECTL_IM) )
> +    {
> +        /* Clear IM */
> +        vvtd_clear_bit(vvtd, DMAR_FECTL_REG, DMA_FECTL_IM_SHIFT);
> +        if ( vvtd_test_and_clear_bit(vvtd, DMAR_FECTL_REG, 
> DMA_FECTL_IP_SHIFT) )
> +        {
> +            uint32_t fe_data, fe_addr;
> +
> +            fe_data = vvtd_get_reg(vvtd, DMAR_FEDATA_REG);
> +            fe_addr = vvtd_get_reg(vvtd, DMAR_FEADDR_REG);
> +            vvtd_generate_interrupt(vvtd, fe_addr, fe_data);

You don't need all this local variables, just put the calls to
vvtd_get_reg at vvtd_generate_interrupt.

> +        }
> +    }
> +    else
> +        vvtd_set_bit(vvtd, DMAR_FECTL_REG, DMA_FECTL_IM_SHIFT);
> +
> +    return X86EMUL_OKAY;
> +}
> +
> +static int vvtd_write_fsts(struct vvtd *vvtd, uint32_t val)
> +{
> +    int i, max_fault_index = DMA_FSTS_PRO_SHIFT;
> +    uint64_t bits_to_clear = val & DMA_FSTS_RW1CS;
> +
> +    if ( bits_to_clear )
> +    {

i wants to be unsigned int and declared here, inside of the if.

> +        i = find_first_bit(&bits_to_clear, max_fault_index / 8 + 1);
> +        while ( i <= max_fault_index )
> +        {
> +            vvtd_clear_bit(vvtd, DMAR_FSTS_REG, i);
> +            i = find_next_bit(&bits_to_clear, max_fault_index / 8 + 1, i + 
> 1);
> +        }

A for would be more suitable for this loop.

> +    }
> +
> +    /*
> +     * Clear IP field when all status fields in the Fault Status Register
> +     * being clear.
> +     */
> +    if ( !((vvtd_get_reg(vvtd, DMAR_FSTS_REG) & DMA_FSTS_FAULTS)) )
> +        vvtd_clear_bit(vvtd, DMAR_FECTL_REG, DMA_FECTL_IP_SHIFT);
> +
> +    return X86EMUL_OKAY;
> +}
> +
>  static void vvtd_handle_gcmd_ire(struct vvtd *vvtd, uint32_t val)
>  {
>      vvtd_info("%sable Interrupt Remapping",
> @@ -336,7 +572,9 @@ static int vvtd_write(struct vcpu *v, unsigned long addr,
>                        unsigned int len, unsigned long val)
>  {
>      struct vvtd *vvtd = domain_vvtd(v->domain);
> +    uint64_t cap = vvtd_get_reg(vvtd, DMAR_CAP_REG);
>      unsigned int offset = addr - vvtd->base_addr;
> +    unsigned int fault_offset = cap_fault_reg_offset(cap);

Again vvtd_get_reg return a uint32_t, and you don't seem to use it
elsewhere apart from cap_fault_reg_offset so please ditch it.

>      vvtd_info("Write offset %x len %d val %lx\n", offset, len, val);
>  
> @@ -350,6 +588,12 @@ static int vvtd_write(struct vcpu *v, unsigned long addr,
>          case DMAR_GCMD_REG:
>              return vvtd_write_gcmd(vvtd, val);
>  
> +        case DMAR_FSTS_REG:
> +            return vvtd_write_fsts(vvtd, val);
> +
> +        case DMAR_FECTL_REG:
> +            return vvtd_write_fectl(vvtd, val);
> +
>          case DMAR_IEDATA_REG:
>          case DMAR_IEADDR_REG:
>          case DMAR_IEUADDR_REG:
> @@ -362,6 +606,9 @@ static int vvtd_write(struct vcpu *v, unsigned long addr,
>              break;
>  
>          default:
> +            if ( offset == fault_offset + DMA_FRCD3_OFFSET )

Parentheses around the addition.

Roger.

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.