[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH V3 11/29] x86/hvm: Introduce a emulated VTD for HVM



On Thu, Sep 21, 2017 at 11:01:52PM -0400, Lan Tianyu wrote:
> From: Chao Gao <chao.gao@xxxxxxxxx>
> 
> This patch adds create/destroy function for the emulated VTD
> and adapts it to the common VIOMMU abstraction.
> 
> Signed-off-by: Chao Gao <chao.gao@xxxxxxxxx>
> Signed-off-by: Lan Tianyu <tianyu.lan@xxxxxxxxx>
> ---
>  xen/drivers/passthrough/vtd/Makefile |   7 +-
>  xen/drivers/passthrough/vtd/iommu.h  |  23 +++++-
>  xen/drivers/passthrough/vtd/vvtd.c   | 147 
> +++++++++++++++++++++++++++++++++++
>  3 files changed, 170 insertions(+), 7 deletions(-)
>  create mode 100644 xen/drivers/passthrough/vtd/vvtd.c
> 
> diff --git a/xen/drivers/passthrough/vtd/Makefile 
> b/xen/drivers/passthrough/vtd/Makefile
> index f302653..163c7fe 100644
> --- a/xen/drivers/passthrough/vtd/Makefile
> +++ b/xen/drivers/passthrough/vtd/Makefile
> @@ -1,8 +1,9 @@
>  subdir-$(CONFIG_X86) += x86
>  
> -obj-y += iommu.o
>  obj-y += dmar.o
> -obj-y += utils.o
> -obj-y += qinval.o
>  obj-y += intremap.o
> +obj-y += iommu.o
> +obj-y += qinval.o
>  obj-y += quirks.o
> +obj-y += utils.o

Why do you need to shuffle the list above?

Also I'm not sure the Intel vIOMMU implementation should live here. As
you can see the path is:

xen/drivers/passthrough/vtd/

The vIOMMU is not tied to passthrough at all, so I would rather place
it in:

xen/drivers/vvtd/

Or maybe you can create something like:

xen/drivers/viommu/

So that all vIOMMU implementations can share some code.

> +obj-$(CONFIG_VIOMMU) += vvtd.o
> diff --git a/xen/drivers/passthrough/vtd/iommu.h 
> b/xen/drivers/passthrough/vtd/iommu.h
> index d7e433e..ef038c9 100644
> --- a/xen/drivers/passthrough/vtd/iommu.h
> +++ b/xen/drivers/passthrough/vtd/iommu.h
> @@ -66,6 +66,12 @@
>  #define VER_MAJOR(v)        (((v) & 0xf0) >> 4)
>  #define VER_MINOR(v)        ((v) & 0x0f)
>  
> +/* Supported Adjusted Guest Address Widths */
> +#define DMA_CAP_SAGAW_SHIFT         8
> + /* 39-bit AGAW, 3-level page-table */
> +#define DMA_CAP_SAGAW_39bit         (0x2ULL << DMA_CAP_SAGAW_SHIFT)
> +#define DMA_CAP_ND_64K              6ULL
> +
>  /*
>   * Decoding Capability Register
>   */
> @@ -74,6 +80,7 @@
>  #define cap_write_drain(c)     (((c) >> 54) & 1)
>  #define cap_max_amask_val(c)   (((c) >> 48) & 0x3f)
>  #define cap_num_fault_regs(c)  ((((c) >> 40) & 0xff) + 1)
> +#define cap_set_num_fault_regs(c)  ((((c) - 1) & 0xff) << 40)
>  #define cap_pgsel_inv(c)       (((c) >> 39) & 1)
>  
>  #define cap_super_page_val(c)  (((c) >> 34) & 0xf)
> @@ -85,11 +92,13 @@
>  #define cap_sps_1tb(c)         ((c >> 37) & 1)
>  
>  #define cap_fault_reg_offset(c)    ((((c) >> 24) & 0x3ff) * 16)
> +#define cap_set_fault_reg_offset(c) ((((c) / 16) & 0x3ff) << 24 )
>  
>  #define cap_isoch(c)        (((c) >> 23) & 1)
>  #define cap_qos(c)        (((c) >> 22) & 1)
>  #define cap_mgaw(c)        ((((c) >> 16) & 0x3f) + 1)
> -#define cap_sagaw(c)        (((c) >> 8) & 0x1f)
> +#define cap_set_mgaw(c)     ((((c) - 1) & 0x3f) << 16)
> +#define cap_sagaw(c)        (((c) >> DMA_CAP_SAGAW_SHIFT) & 0x1f)
>  #define cap_caching_mode(c)    (((c) >> 7) & 1)
>  #define cap_phmr(c)        (((c) >> 6) & 1)
>  #define cap_plmr(c)        (((c) >> 5) & 1)
> @@ -104,10 +113,16 @@
>  #define ecap_niotlb_iunits(e)    ((((e) >> 24) & 0xff) + 1)
>  #define ecap_iotlb_offset(e)     ((((e) >> 8) & 0x3ff) * 16)
>  #define ecap_coherent(e)         ((e >> 0) & 0x1)
> -#define ecap_queued_inval(e)     ((e >> 1) & 0x1)
> +#define DMA_ECAP_QI_SHIFT        1
> +#define DMA_ECAP_QI              (1ULL << DMA_ECAP_QI_SHIFT)
> +#define ecap_queued_inval(e)     ((e >> DMA_ECAP_QI_SHIFT) & 0x1)

Looks like this could be based on MASK_EXTR instead, but seeing how
the file is full of open-coded mask extracts I'm not sure it's worth
it anymore.

>  #define ecap_dev_iotlb(e)        ((e >> 2) & 0x1)
> -#define ecap_intr_remap(e)       ((e >> 3) & 0x1)
> -#define ecap_eim(e)              ((e >> 4) & 0x1)
> +#define DMA_ECAP_IR_SHIFT        3
> +#define DMA_ECAP_IR              (1ULL << DMA_ECAP_IR_SHIFT)
> +#define ecap_intr_remap(e)       ((e >> DMA_ECAP_IR_SHIFT) & 0x1)
> +#define DMA_ECAP_EIM_SHIFT       4
> +#define DMA_ECAP_EIM             (1ULL << DMA_ECAP_EIM_SHIFT)
> +#define ecap_eim(e)              ((e >> DMA_ECAP_EIM_SHIFT) & 0x1)

Maybe worth placing all the DMA_ECAP_* defines in a separate section?
Seems like how it's done for other features like DMA_FSTS or
DMA_CCMD.

>  #define ecap_cache_hints(e)      ((e >> 5) & 0x1)
>  #define ecap_pass_thru(e)        ((e >> 6) & 0x1)
>  #define ecap_snp_ctl(e)          ((e >> 7) & 0x1)
> diff --git a/xen/drivers/passthrough/vtd/vvtd.c 
> b/xen/drivers/passthrough/vtd/vvtd.c
> new file mode 100644
> index 0000000..c851ec7
> --- /dev/null
> +++ b/xen/drivers/passthrough/vtd/vvtd.c
> @@ -0,0 +1,147 @@
> +/*
> + * vvtd.c
> + *
> + * virtualize VTD for HVM.
> + *
> + * Copyright (C) 2017 Chao Gao, Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms and conditions of the GNU General Public
> + * License, version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public
> + * License along with this program; If not, see 
> <http://www.gnu.org/licenses/>.
> + */
> +
> +#include <xen/domain_page.h>
> +#include <xen/sched.h>
> +#include <xen/types.h>
> +#include <xen/viommu.h>
> +#include <xen/xmalloc.h>
> +#include <asm/current.h>
> +#include <asm/hvm/domain.h>
> +#include <asm/page.h>
> +
> +#include "iommu.h"
> +
> +/* Supported capabilities by vvtd */
> +unsigned int vvtd_caps = VIOMMU_CAP_IRQ_REMAPPING;

static?

Or even better, why is this not a define like VIOMMU_MAX_CAPS or
similar.

> +
> +union hvm_hw_vvtd_regs {
> +    uint32_t data32[256];
> +    uint64_t data64[128];
> +};

Do you really need to store all the register space instead of only
storing specific registers?

> +
> +struct vvtd {
> +    /* Address range of remapping hardware register-set */
> +    uint64_t base_addr;
> +    uint64_t length;

The length field doesn't seem to be used below.

> +    /* Point back to the owner domain */
> +    struct domain *domain;
> +    union hvm_hw_vvtd_regs *regs;

Does this need to be a pointer?

> +    struct page_info *regs_page;
> +};
> +
> +static inline void vvtd_set_reg(struct vvtd *vtd, uint32_t reg, uint32_t 
> value)
> +{
> +    vtd->regs->data32[reg/sizeof(uint32_t)] = value;
> +}
> +
> +static inline uint32_t vvtd_get_reg(struct vvtd *vtd, uint32_t reg)
> +{
> +    return vtd->regs->data32[reg/sizeof(uint32_t)];
> +}
> +
> +static inline void vvtd_set_reg_quad(struct vvtd *vtd, uint32_t reg,
> +                                     uint64_t value)
> +{
> +    vtd->regs->data64[reg/sizeof(uint64_t)] = value;
> +}
> +
> +static inline uint64_t vvtd_get_reg_quad(struct vvtd *vtd, uint32_t reg)
> +{
> +    return vtd->regs->data64[reg/sizeof(uint64_t)];
> +}
> +
> +static void vvtd_reset(struct vvtd *vvtd, uint64_t capability)
> +{
> +    uint64_t cap = cap_set_num_fault_regs(1ULL) |
> +                   cap_set_fault_reg_offset(0x220ULL) |
> +                   cap_set_mgaw(39ULL) | DMA_CAP_SAGAW_39bit |
> +                   DMA_CAP_ND_64K;
> +    uint64_t ecap = DMA_ECAP_IR | DMA_ECAP_EIM | DMA_ECAP_QI;
> +
> +    vvtd_set_reg(vvtd, DMAR_VER_REG, 0x10UL);
> +    vvtd_set_reg_quad(vvtd, DMAR_CAP_REG, cap);
> +    vvtd_set_reg_quad(vvtd, DMAR_ECAP_REG, ecap);
> +    vvtd_set_reg(vvtd, DMAR_FECTL_REG, 0x80000000UL);
> +    vvtd_set_reg(vvtd, DMAR_IECTL_REG, 0x80000000UL);
> +}
> +
> +static int vvtd_create(struct domain *d, struct viommu *viommu)
> +{
> +    struct vvtd *vvtd;
> +    int ret;
> +
> +    if ( !is_hvm_domain(d) || (viommu->base_address & (PAGE_SIZE - 1)) ||
> +        (~vvtd_caps & viommu->caps) )
> +        return -EINVAL;
> +
> +    ret = -ENOMEM;
> +    vvtd = xzalloc_bytes(sizeof(struct vvtd));
> +    if ( !vvtd )
> +        return ret;
> +
> +    vvtd->regs_page = alloc_domheap_page(d, MEMF_no_owner);
> +    if ( !vvtd->regs_page )
> +        goto out1;
> +
> +    vvtd->regs = __map_domain_page_global(vvtd->regs_page);
> +    if ( !vvtd->regs )
> +        goto out2;
> +    clear_page(vvtd->regs);

Not sure why vvtd->regs needs to be a pointer, and why it needs to use
a full page. AFAICT the size of hvm_hw_vvtd_regs is 1024B, so you are
wasting 3/4 of a page.

> +
> +    vvtd_reset(vvtd, viommu->caps);
> +    vvtd->base_addr = viommu->base_address;
> +    vvtd->domain = d;
> +
> +    viommu->priv = vvtd;
> +
> +    return 0;
> +
> + out2:
> +    free_domheap_page(vvtd->regs_page);
> + out1:
> +    xfree(vvtd);
> +    return ret;

You should try to avoid using labels. I think this can be solved by
not allocating a separate page for regs.

> +}
> +
> +static int vvtd_destroy(struct viommu *viommu)
> +{
> +    struct vvtd *vvtd = viommu->priv;
> +
> +    if ( vvtd )
> +    {
> +        unmap_domain_page_global(vvtd->regs);
> +        free_domheap_page(vvtd->regs_page);
> +        xfree(vvtd);
> +    }
> +    return 0;
> +}
> +
> +struct viommu_ops vvtd_hvm_vmx_ops = {
> +    .create = vvtd_create,
> +    .destroy = vvtd_destroy
> +};
> +
> +static int vvtd_register(void)
> +{
> +    viommu_register_type(VIOMMU_TYPE_INTEL_VTD, &vvtd_hvm_vmx_ops);
> +    return 0;
> +}
> +__initcall(vvtd_register);

As commented in another patch I think the vIOMMU types should be
registered using a method similar to REGISTER_SCHEDULER.

Thanks, Roger.

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.