[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [RFC for-4.5 08/12] xen/passthrough: iommu: Split generic IOMMU code
The generic IOMMU framework code (xen/drivers/passthrough/iommu.c) contains functions specific to x86 and PCI. Split the framework in 3 distincts files: - iommu.c: contains generic functions shared between x86 and ARM (when it will be supported) - iommu_pci.c: contains specific functions for PCI passthrough - iommu_x86.c: contains specific functions for x86 iommu_pci.c will be only compiled when PCI is supported by the architecture (eg. HAS_PCI is defined). This patch is mostly code movement in new files. Signed-off-by: Julien Grall <julien.grall@xxxxxxxxxx> Cc: Xiantao Zhang <xiantao.zhang@xxxxxxxxx> Cc: Jan Beulich <jbeulich@xxxxxxxx> --- xen/drivers/passthrough/Makefile | 6 +- xen/drivers/passthrough/iommu.c | 473 +---------------------------------- xen/drivers/passthrough/iommu_pci.c | 468 ++++++++++++++++++++++++++++++++++ xen/drivers/passthrough/iommu_x86.c | 65 +++++ xen/drivers/passthrough/vtd/iommu.c | 42 ++-- xen/include/asm-x86/iommu.h | 46 ++++ xen/include/xen/hvm/iommu.h | 1 + xen/include/xen/iommu.h | 42 ++-- 8 files changed, 625 insertions(+), 518 deletions(-) create mode 100644 xen/drivers/passthrough/iommu_pci.c create mode 100644 xen/drivers/passthrough/iommu_x86.c create mode 100644 xen/include/asm-x86/iommu.h diff --git a/xen/drivers/passthrough/Makefile b/xen/drivers/passthrough/Makefile index 7c40fa5..51e0a0d 100644 --- a/xen/drivers/passthrough/Makefile +++ b/xen/drivers/passthrough/Makefile @@ -3,5 +3,7 @@ subdir-$(x86) += amd subdir-$(x86_64) += x86 obj-y += iommu.o -obj-y += io.o -obj-y += pci.o +obj-$(x86) += iommu_x86.o +obj-$(HAS_PCI) += iommu_pci.o +obj-$(x86) += io.o +obj-$(HAS_PCI) += pci.o diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c index 0a26956..d733878 100644 --- a/xen/drivers/passthrough/iommu.c +++ b/xen/drivers/passthrough/iommu.c @@ -24,7 +24,6 @@ #include <xsm/xsm.h> static void parse_iommu_param(char *s); -static int iommu_populate_page_table(struct domain *d); static void iommu_dump_p2m_table(unsigned char key); /* @@ -180,86 +179,7 @@ void __init iommu_dom0_init(struct domain *d) return hd->platform_ops->dom0_init(d); } -int iommu_add_device(struct pci_dev *pdev) -{ - struct hvm_iommu *hd; - int rc; - u8 devfn; - - if ( !pdev->domain ) - return -EINVAL; - - ASSERT(spin_is_locked(&pcidevs_lock)); - - hd = domain_hvm_iommu(pdev->domain); - if ( !iommu_enabled || !hd->platform_ops ) - return 0; - - rc = hd->platform_ops->add_device(pdev->devfn, pdev); - if ( rc || !pdev->phantom_stride ) - return rc; - - for ( devfn = pdev->devfn ; ; ) - { - devfn += pdev->phantom_stride; - if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) ) - return 0; - rc = hd->platform_ops->add_device(devfn, pdev); - if ( rc ) - printk(XENLOG_WARNING "IOMMU: add %04x:%02x:%02x.%u failed (%d)\n", - pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn), rc); - } -} - -int iommu_enable_device(struct pci_dev *pdev) -{ - struct hvm_iommu *hd; - - if ( !pdev->domain ) - return -EINVAL; - - ASSERT(spin_is_locked(&pcidevs_lock)); - - hd = domain_hvm_iommu(pdev->domain); - if ( !iommu_enabled || !hd->platform_ops || - !hd->platform_ops->enable_device ) - return 0; - - return hd->platform_ops->enable_device(pdev); -} - -int iommu_remove_device(struct pci_dev *pdev) -{ - struct hvm_iommu *hd; - u8 devfn; - - if ( !pdev->domain ) - return -EINVAL; - - hd = domain_hvm_iommu(pdev->domain); - if ( !iommu_enabled || !hd->platform_ops ) - return 0; - - for ( devfn = pdev->devfn ; pdev->phantom_stride; ) - { - int rc; - - devfn += pdev->phantom_stride; - if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) ) - break; - rc = hd->platform_ops->remove_device(devfn, pdev); - if ( !rc ) - continue; - - printk(XENLOG_ERR "IOMMU: remove %04x:%02x:%02x.%u failed (%d)\n", - pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn), rc); - return rc; - } - - return hd->platform_ops->remove_device(pdev->devfn, pdev); -} - -static void iommu_teardown(struct domain *d) +void iommu_teardown(struct domain *d) { const struct hvm_iommu *hd = domain_hvm_iommu(d); @@ -268,151 +188,6 @@ static void iommu_teardown(struct domain *d) tasklet_schedule(&iommu_pt_cleanup_tasklet); } -/* - * If the device isn't owned by dom0, it means it already - * has been assigned to other domain, or it doesn't exist. - */ -static int device_assigned(u16 seg, u8 bus, u8 devfn) -{ - struct pci_dev *pdev; - - spin_lock(&pcidevs_lock); - pdev = pci_get_pdev_by_domain(dom0, seg, bus, devfn); - spin_unlock(&pcidevs_lock); - - return pdev ? 0 : -EBUSY; -} - -static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn) -{ - struct hvm_iommu *hd = domain_hvm_iommu(d); - struct pci_dev *pdev; - int rc = 0; - - if ( !iommu_enabled || !hd->platform_ops ) - return 0; - - /* Prevent device assign if mem paging or mem sharing have been - * enabled for this domain */ - if ( unlikely(!need_iommu(d) && - (d->arch.hvm_domain.mem_sharing_enabled || - d->mem_event->paging.ring_page)) ) - return -EXDEV; - - if ( !spin_trylock(&pcidevs_lock) ) - return -ERESTART; - - if ( need_iommu(d) <= 0 ) - { - if ( !iommu_use_hap_pt(d) ) - { - rc = iommu_populate_page_table(d); - if ( rc ) - { - spin_unlock(&pcidevs_lock); - return rc; - } - } - d->need_iommu = 1; - } - - pdev = pci_get_pdev_by_domain(dom0, seg, bus, devfn); - if ( !pdev ) - { - rc = pci_get_pdev(seg, bus, devfn) ? -EBUSY : -ENODEV; - goto done; - } - - pdev->fault.count = 0; - - if ( (rc = hd->platform_ops->assign_device(d, devfn, pdev)) ) - goto done; - - for ( ; pdev->phantom_stride; rc = 0 ) - { - devfn += pdev->phantom_stride; - if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) ) - break; - rc = hd->platform_ops->assign_device(d, devfn, pdev); - if ( rc ) - printk(XENLOG_G_WARNING "d%d: assign %04x:%02x:%02x.%u failed (%d)\n", - d->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), - rc); - } - - done: - if ( !has_arch_pdevs(d) && need_iommu(d) ) - iommu_teardown(d); - spin_unlock(&pcidevs_lock); - - return rc; -} - -static int iommu_populate_page_table(struct domain *d) -{ - struct hvm_iommu *hd = domain_hvm_iommu(d); - struct page_info *page; - int rc = 0, n = 0; - - d->need_iommu = -1; - - this_cpu(iommu_dont_flush_iotlb) = 1; - spin_lock(&d->page_alloc_lock); - - if ( unlikely(d->is_dying) ) - rc = -ESRCH; - - while ( !rc && (page = page_list_remove_head(&d->page_list)) ) - { - if ( is_hvm_domain(d) || - (page->u.inuse.type_info & PGT_type_mask) == PGT_writable_page ) - { - BUG_ON(SHARED_M2P(mfn_to_gmfn(d, page_to_mfn(page)))); - rc = hd->platform_ops->map_page( - d, mfn_to_gmfn(d, page_to_mfn(page)), page_to_mfn(page), - IOMMUF_readable|IOMMUF_writable); - if ( rc ) - { - page_list_add(page, &d->page_list); - break; - } - } - page_list_add_tail(page, &d->arch.relmem_list); - if ( !(++n & 0xff) && !page_list_empty(&d->page_list) && - hypercall_preempt_check() ) - rc = -ERESTART; - } - - if ( !rc ) - { - /* - * The expectation here is that generally there are many normal pages - * on relmem_list (the ones we put there) and only few being in an - * offline/broken state. The latter ones are always at the head of the - * list. Hence we first move the whole list, and then move back the - * first few entries. - */ - page_list_move(&d->page_list, &d->arch.relmem_list); - while ( (page = page_list_first(&d->page_list)) != NULL && - (page->count_info & (PGC_state|PGC_broken)) ) - { - page_list_del(page, &d->page_list); - page_list_add_tail(page, &d->arch.relmem_list); - } - } - - spin_unlock(&d->page_alloc_lock); - this_cpu(iommu_dont_flush_iotlb) = 0; - - if ( !rc ) - iommu_iotlb_flush_all(d); - else if ( rc != -ERESTART ) - iommu_teardown(d); - - return rc; -} - - void iommu_domain_destroy(struct domain *d) { struct hvm_iommu *hd = domain_hvm_iommu(d); @@ -499,53 +274,6 @@ void iommu_iotlb_flush_all(struct domain *d) hd->platform_ops->iotlb_flush_all(d); } -/* caller should hold the pcidevs_lock */ -int deassign_device(struct domain *d, u16 seg, u8 bus, u8 devfn) -{ - struct hvm_iommu *hd = domain_hvm_iommu(d); - struct pci_dev *pdev = NULL; - int ret = 0; - - if ( !iommu_enabled || !hd->platform_ops ) - return -EINVAL; - - ASSERT(spin_is_locked(&pcidevs_lock)); - pdev = pci_get_pdev_by_domain(d, seg, bus, devfn); - if ( !pdev ) - return -ENODEV; - - while ( pdev->phantom_stride ) - { - devfn += pdev->phantom_stride; - if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) ) - break; - ret = hd->platform_ops->reassign_device(d, dom0, devfn, pdev); - if ( !ret ) - continue; - - printk(XENLOG_G_ERR "d%d: deassign %04x:%02x:%02x.%u failed (%d)\n", - d->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), ret); - return ret; - } - - devfn = pdev->devfn; - ret = hd->platform_ops->reassign_device(d, dom0, devfn, pdev); - if ( ret ) - { - dprintk(XENLOG_G_ERR, - "d%d: deassign device (%04x:%02x:%02x.%u) failed\n", - d->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); - return ret; - } - - pdev->fault.count = 0; - - if ( !has_arch_pdevs(d) && need_iommu(d) ) - iommu_teardown(d); - - return ret; -} - int __init iommu_setup(void) { int rc = -ENODEV; @@ -586,86 +314,6 @@ int __init iommu_setup(void) return rc; } -static int iommu_get_device_group( - struct domain *d, u16 seg, u8 bus, u8 devfn, - XEN_GUEST_HANDLE_64(uint32) buf, int max_sdevs) -{ - struct hvm_iommu *hd = domain_hvm_iommu(d); - struct pci_dev *pdev; - int group_id, sdev_id; - u32 bdf; - int i = 0; - const struct iommu_ops *ops = hd->platform_ops; - - if ( !iommu_enabled || !ops || !ops->get_device_group_id ) - return 0; - - group_id = ops->get_device_group_id(seg, bus, devfn); - - spin_lock(&pcidevs_lock); - for_each_pdev( d, pdev ) - { - if ( (pdev->seg != seg) || - ((pdev->bus == bus) && (pdev->devfn == devfn)) ) - continue; - - if ( xsm_get_device_group(XSM_HOOK, (seg << 16) | (pdev->bus << 8) | pdev->devfn) ) - continue; - - sdev_id = ops->get_device_group_id(seg, pdev->bus, pdev->devfn); - if ( (sdev_id == group_id) && (i < max_sdevs) ) - { - bdf = 0; - bdf |= (pdev->bus & 0xff) << 16; - bdf |= (pdev->devfn & 0xff) << 8; - - if ( unlikely(copy_to_guest_offset(buf, i, &bdf, 1)) ) - { - spin_unlock(&pcidevs_lock); - return -1; - } - i++; - } - } - spin_unlock(&pcidevs_lock); - - return i; -} - -void iommu_update_ire_from_apic( - unsigned int apic, unsigned int reg, unsigned int value) -{ - const struct iommu_ops *ops = iommu_get_ops(); - ops->update_ire_from_apic(apic, reg, value); -} - -int iommu_update_ire_from_msi( - struct msi_desc *msi_desc, struct msi_msg *msg) -{ - const struct iommu_ops *ops = iommu_get_ops(); - return iommu_intremap ? ops->update_ire_from_msi(msi_desc, msg) : 0; -} - -void iommu_read_msi_from_ire( - struct msi_desc *msi_desc, struct msi_msg *msg) -{ - const struct iommu_ops *ops = iommu_get_ops(); - if ( iommu_intremap ) - ops->read_msi_from_ire(msi_desc, msg); -} - -unsigned int iommu_read_apic_from_ire(unsigned int apic, unsigned int reg) -{ - const struct iommu_ops *ops = iommu_get_ops(); - return ops->read_apic_from_ire(apic, reg); -} - -int __init iommu_setup_hpet_msi(struct msi_desc *msi) -{ - const struct iommu_ops *ops = iommu_get_ops(); - return ops->setup_hpet_msi ? ops->setup_hpet_msi(msi) : -ENODEV; -} - void iommu_resume() { const struct iommu_ops *ops = iommu_get_ops(); @@ -696,125 +344,6 @@ void iommu_crash_shutdown(void) iommu_enabled = iommu_intremap = 0; } -int iommu_do_domctl( - struct xen_domctl *domctl, struct domain *d, - XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl) -{ - u16 seg; - u8 bus, devfn; - int ret = 0; - - if ( !iommu_enabled ) - return -ENOSYS; - - switch ( domctl->cmd ) - { - case XEN_DOMCTL_get_device_group: - { - u32 max_sdevs; - XEN_GUEST_HANDLE_64(uint32) sdevs; - - ret = xsm_get_device_group(XSM_HOOK, domctl->u.get_device_group.machine_sbdf); - if ( ret ) - break; - - seg = domctl->u.get_device_group.machine_sbdf >> 16; - bus = (domctl->u.get_device_group.machine_sbdf >> 8) & 0xff; - devfn = domctl->u.get_device_group.machine_sbdf & 0xff; - max_sdevs = domctl->u.get_device_group.max_sdevs; - sdevs = domctl->u.get_device_group.sdev_array; - - ret = iommu_get_device_group(d, seg, bus, devfn, sdevs, max_sdevs); - if ( ret < 0 ) - { - dprintk(XENLOG_ERR, "iommu_get_device_group() failed!\n"); - ret = -EFAULT; - domctl->u.get_device_group.num_sdevs = 0; - } - else - { - domctl->u.get_device_group.num_sdevs = ret; - ret = 0; - } - if ( __copy_field_to_guest(u_domctl, domctl, u.get_device_group) ) - ret = -EFAULT; - } - break; - - case XEN_DOMCTL_test_assign_device: - ret = xsm_test_assign_device(XSM_HOOK, domctl->u.assign_device.machine_sbdf); - if ( ret ) - break; - - seg = domctl->u.assign_device.machine_sbdf >> 16; - bus = (domctl->u.assign_device.machine_sbdf >> 8) & 0xff; - devfn = domctl->u.assign_device.machine_sbdf & 0xff; - - if ( device_assigned(seg, bus, devfn) ) - { - printk(XENLOG_G_INFO - "%04x:%02x:%02x.%u already assigned, or non-existent\n", - seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); - ret = -EINVAL; - } - break; - - case XEN_DOMCTL_assign_device: - if ( unlikely(d->is_dying) ) - { - ret = -EINVAL; - break; - } - - ret = xsm_assign_device(XSM_HOOK, d, domctl->u.assign_device.machine_sbdf); - if ( ret ) - break; - - seg = domctl->u.assign_device.machine_sbdf >> 16; - bus = (domctl->u.assign_device.machine_sbdf >> 8) & 0xff; - devfn = domctl->u.assign_device.machine_sbdf & 0xff; - - ret = device_assigned(seg, bus, devfn) ?: - assign_device(d, seg, bus, devfn); - if ( ret == -ERESTART ) - ret = hypercall_create_continuation(__HYPERVISOR_domctl, - "h", u_domctl); - else if ( ret ) - printk(XENLOG_G_ERR "XEN_DOMCTL_assign_device: " - "assign %04x:%02x:%02x.%u to dom%d failed (%d)\n", - seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), - d->domain_id, ret); - - break; - - case XEN_DOMCTL_deassign_device: - ret = xsm_deassign_device(XSM_HOOK, d, domctl->u.assign_device.machine_sbdf); - if ( ret ) - break; - - seg = domctl->u.assign_device.machine_sbdf >> 16; - bus = (domctl->u.assign_device.machine_sbdf >> 8) & 0xff; - devfn = domctl->u.assign_device.machine_sbdf & 0xff; - - spin_lock(&pcidevs_lock); - ret = deassign_device(d, seg, bus, devfn); - spin_unlock(&pcidevs_lock); - if ( ret ) - printk(XENLOG_G_ERR - "deassign %04x:%02x:%02x.%u from dom%d failed (%d)\n", - seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), - d->domain_id, ret); - - break; - - default: - ret = -ENOSYS; - break; - } - - return ret; -} - static void iommu_dump_p2m_table(unsigned char key) { struct domain *d; diff --git a/xen/drivers/passthrough/iommu_pci.c b/xen/drivers/passthrough/iommu_pci.c new file mode 100644 index 0000000..5b9d937 --- /dev/null +++ b/xen/drivers/passthrough/iommu_pci.c @@ -0,0 +1,468 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + */ + +#include <xen/sched.h> +#include <xen/iommu.h> +#include <xen/paging.h> +#include <xen/guest_access.h> +#include <xen/event.h> +#include <xen/softirq.h> +#include <xsm/xsm.h> + +static int iommu_populate_page_table(struct domain *d) +{ + struct hvm_iommu *hd = domain_hvm_iommu(d); + struct page_info *page; + int rc = 0, n = 0; + + d->need_iommu = -1; + + this_cpu(iommu_dont_flush_iotlb) = 1; + spin_lock(&d->page_alloc_lock); + + if ( unlikely(d->is_dying) ) + rc = -ESRCH; + + + while ( !rc && (page = page_list_remove_head(&d->page_list)) ) + { + if ( is_hvm_domain(d) || + (page->u.inuse.type_info & PGT_type_mask) == PGT_writable_page ) + { + BUG_ON(SHARED_M2P(mfn_to_gmfn(d, page_to_mfn(page)))); + rc = hd->platform_ops->map_page( + d, mfn_to_gmfn(d, page_to_mfn(page)), page_to_mfn(page), + IOMMUF_readable|IOMMUF_writable); + if ( rc ) + { + page_list_add(page, &d->page_list); + break; + } + } + page_list_add_tail(page, &d->arch.relmem_list); + if ( !(++n & 0xff) && !page_list_empty(&d->page_list) && + hypercall_preempt_check() ) + rc = -ERESTART; + } + + if ( !rc ) + { + /* + * The expectation here is that generally there are many normal pages + * on relmem_list (the ones we put there) and only few being in an + * offline/broken state. The latter ones are always at the head of the + * list. Hence we first move the whole list, and then move back the + * first few entries. + */ + page_list_move(&d->page_list, &d->arch.relmem_list); + while ( (page = page_list_first(&d->page_list)) != NULL && + (page->count_info & (PGC_state|PGC_broken)) ) + { + page_list_del(page, &d->page_list); + page_list_add_tail(page, &d->arch.relmem_list); + } + } + + spin_unlock(&d->page_alloc_lock); + this_cpu(iommu_dont_flush_iotlb) = 0; + + if ( !rc ) + iommu_iotlb_flush_all(d); + else if ( rc != -ERESTART ) + iommu_teardown(d); + + return rc; +} + +int iommu_add_device(struct pci_dev *pdev) +{ + struct hvm_iommu *hd; + int rc; + u8 devfn; + + if ( !pdev->domain ) + return -EINVAL; + + ASSERT(spin_is_locked(&pcidevs_lock)); + + hd = domain_hvm_iommu(pdev->domain); + if ( !iommu_enabled || !hd->platform_ops ) + return 0; + + rc = hd->platform_ops->add_device(pdev->devfn, pdev); + if ( rc || !pdev->phantom_stride ) + return rc; + + for ( devfn = pdev->devfn ; ; ) + { + devfn += pdev->phantom_stride; + if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) ) + return 0; + rc = hd->platform_ops->add_device(devfn, pdev); + if ( rc ) + printk(XENLOG_WARNING "IOMMU: add %04x:%02x:%02x.%u failed (%d)\n", + pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn), rc); + } +} + +int iommu_enable_device(struct pci_dev *pdev) +{ + struct hvm_iommu *hd; + + if ( !pdev->domain ) + return -EINVAL; + + ASSERT(spin_is_locked(&pcidevs_lock)); + + hd = domain_hvm_iommu(pdev->domain); + if ( !iommu_enabled || !hd->platform_ops || + !hd->platform_ops->enable_device ) + return 0; + + return hd->platform_ops->enable_device(pdev); +} + +int iommu_remove_device(struct pci_dev *pdev) +{ + struct hvm_iommu *hd; + u8 devfn; + + if ( !pdev->domain ) + return -EINVAL; + + hd = domain_hvm_iommu(pdev->domain); + if ( !iommu_enabled || !hd->platform_ops ) + return 0; + + for ( devfn = pdev->devfn ; pdev->phantom_stride; ) + { + int rc; + + devfn += pdev->phantom_stride; + if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) ) + break; + rc = hd->platform_ops->remove_device(devfn, pdev); + if ( !rc ) + continue; + + printk(XENLOG_ERR "IOMMU: remove %04x:%02x:%02x.%u failed (%d)\n", + pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn), rc); + return rc; + } + + return hd->platform_ops->remove_device(pdev->devfn, pdev); +} + +/* + * If the device isn't owned by dom0, it means it already + * has been assigned to other domain, or it doesn't exist. + */ +static int device_assigned(u16 seg, u8 bus, u8 devfn) +{ + struct pci_dev *pdev = NULL; + + spin_lock(&pcidevs_lock); + pdev = pci_get_pdev_by_domain(dom0, seg, bus, devfn); + spin_unlock(&pcidevs_lock); + + return pdev ? 0 : -EBUSY; +} + +static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn) +{ + struct hvm_iommu *hd = domain_hvm_iommu(d); + struct pci_dev *pdev; + int rc = 0; + + if ( !iommu_enabled || !hd->platform_ops ) + return 0; + + /* Prevent device assign if mem paging or mem sharing have been + * enabled for this domain */ + if ( unlikely(!need_iommu(d) && + (mem_sharing_enabled(d) || + d->mem_event->paging.ring_page)) ) + return -EXDEV; + + if ( !spin_trylock(&pcidevs_lock) ) + return -ERESTART; + + if ( need_iommu(d) <= 0 ) + { + if ( !iommu_use_hap_pt(d) ) + { + rc = iommu_populate_page_table(d); + if ( rc ) + { + spin_unlock(&pcidevs_lock); + return rc; + } + } + d->need_iommu = 1; + } + + pdev = pci_get_pdev_by_domain(dom0, seg, bus, devfn); + if ( !pdev ) + { + rc = pci_get_pdev(seg, bus, devfn) ? -EBUSY : -ENODEV; + goto done; + } + + pdev->fault.count = 0; + + if ( (rc = hd->platform_ops->assign_device(d, devfn, pdev)) ) + goto done; + + for ( ; pdev->phantom_stride; rc = 0 ) + { + devfn += pdev->phantom_stride; + if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) ) + break; + rc = hd->platform_ops->assign_device(d, devfn, pdev); + if ( rc ) + printk(XENLOG_G_WARNING "d%d: assign %04x:%02x:%02x.%u failed (%d)\n", + d->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), + rc); + } + + done: + if ( !has_arch_pdevs(d) && need_iommu(d) ) + iommu_teardown(d); + spin_unlock(&pcidevs_lock); + + return rc; +} + +/* caller should hold the pcidevs_lock */ +int deassign_device(struct domain *d, u16 seg, u8 bus, u8 devfn) +{ + struct hvm_iommu *hd = domain_hvm_iommu(d); + struct pci_dev *pdev = NULL; + int ret = 0; + + if ( !iommu_enabled || !hd->platform_ops ) + return -EINVAL; + + ASSERT(spin_is_locked(&pcidevs_lock)); + pdev = pci_get_pdev_by_domain(d, seg, bus, devfn); + if ( !pdev ) + return -ENODEV; + + while ( pdev->phantom_stride ) + { + devfn += pdev->phantom_stride; + if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) ) + break; + ret = hd->platform_ops->reassign_device(d, dom0, devfn, pdev); + if ( !ret ) + continue; + + printk(XENLOG_G_ERR "d%d: deassign %04x:%02x:%02x.%u failed (%d)\n", + d->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), ret); + return ret; + } + + devfn = pdev->devfn; + ret = hd->platform_ops->reassign_device(d, dom0, devfn, pdev); + if ( ret ) + { + dprintk(XENLOG_G_ERR, + "d%d: deassign device (%04x:%02x:%02x.%u) failed\n", + d->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); + return ret; + } + + pdev->fault.count = 0; + + if ( !has_arch_pdevs(d) && need_iommu(d) ) + iommu_teardown(d); + + return ret; +} + +static int iommu_get_device_group( + struct domain *d, u16 seg, u8 bus, u8 devfn, + XEN_GUEST_HANDLE_64(uint32) buf, int max_sdevs) +{ + struct hvm_iommu *hd = domain_hvm_iommu(d); + struct pci_dev *pdev; + int group_id, sdev_id; + u32 bdf; + int i = 0; + const struct iommu_ops *ops = hd->platform_ops; + + if ( !iommu_enabled || !ops || !ops->get_device_group_id ) + return 0; + + group_id = ops->get_device_group_id(seg, bus, devfn); + + spin_lock(&pcidevs_lock); + for_each_pdev( d, pdev ) + { + if ( (pdev->seg != seg) || + ((pdev->bus == bus) && (pdev->devfn == devfn)) ) + continue; + + if ( xsm_get_device_group(XSM_HOOK, (seg << 16) | (pdev->bus << 8) | pdev->devfn) ) + continue; + + sdev_id = ops->get_device_group_id(seg, pdev->bus, pdev->devfn); + if ( (sdev_id == group_id) && (i < max_sdevs) ) + { + bdf = 0; + bdf |= (pdev->bus & 0xff) << 16; + bdf |= (pdev->devfn & 0xff) << 8; + + if ( unlikely(copy_to_guest_offset(buf, i, &bdf, 1)) ) + { + spin_unlock(&pcidevs_lock); + return -1; + } + i++; + } + } + + spin_unlock(&pcidevs_lock); + + return i; +} + +int iommu_do_domctl( + struct xen_domctl *domctl, struct domain *d, + XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl) +{ + u16 seg; + u8 bus, devfn; + int ret = 0; + + if ( !iommu_enabled ) + return -ENOSYS; + + switch ( domctl->cmd ) + { + case XEN_DOMCTL_get_device_group: + { + u32 max_sdevs; + XEN_GUEST_HANDLE_64(uint32) sdevs; + + ret = xsm_get_device_group(XSM_HOOK, domctl->u.get_device_group.machine_sbdf); + if ( ret ) + break; + + seg = domctl->u.get_device_group.machine_sbdf >> 16; + bus = (domctl->u.get_device_group.machine_sbdf >> 8) & 0xff; + devfn = domctl->u.get_device_group.machine_sbdf & 0xff; + max_sdevs = domctl->u.get_device_group.max_sdevs; + sdevs = domctl->u.get_device_group.sdev_array; + + ret = iommu_get_device_group(d, seg, bus, devfn, sdevs, max_sdevs); + if ( ret < 0 ) + { + dprintk(XENLOG_ERR, "iommu_get_device_group() failed!\n"); + ret = -EFAULT; + domctl->u.get_device_group.num_sdevs = 0; + } + else + { + domctl->u.get_device_group.num_sdevs = ret; + ret = 0; + } + if ( __copy_field_to_guest(u_domctl, domctl, u.get_device_group) ) + ret = -EFAULT; + } + break; + + case XEN_DOMCTL_test_assign_device: + ret = xsm_test_assign_device(XSM_HOOK, domctl->u.assign_device.machine_sbdf); + if ( ret ) + break; + + seg = domctl->u.assign_device.machine_sbdf >> 16; + bus = (domctl->u.assign_device.machine_sbdf >> 8) & 0xff; + devfn = domctl->u.assign_device.machine_sbdf & 0xff; + + if ( device_assigned(seg, bus, devfn) ) + { + printk(XENLOG_G_INFO + "%04x:%02x:%02x.%u already assigned, or non-existent\n", + seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); + ret = -EINVAL; + } + break; + + case XEN_DOMCTL_assign_device: + if ( unlikely(d->is_dying) ) + { + ret = -EINVAL; + break; + } + + ret = xsm_assign_device(XSM_HOOK, d, domctl->u.assign_device.machine_sbdf); + if ( ret ) + break; + + seg = domctl->u.assign_device.machine_sbdf >> 16; + bus = (domctl->u.assign_device.machine_sbdf >> 8) & 0xff; + devfn = domctl->u.assign_device.machine_sbdf & 0xff; + + ret = device_assigned(seg, bus, devfn) ?: + assign_device(d, seg, bus, devfn); + if ( ret == -ERESTART ) + ret = hypercall_create_continuation(__HYPERVISOR_domctl, + "h", u_domctl); + else if ( ret ) + printk(XENLOG_G_ERR "XEN_DOMCTL_assign_device: " + "assign %04x:%02x:%02x.%u to dom%d failed (%d)\n", + seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), + d->domain_id, ret); + + break; + + case XEN_DOMCTL_deassign_device: + ret = xsm_deassign_device(XSM_HOOK, d, domctl->u.assign_device.machine_sbdf); + if ( ret ) + break; + + seg = domctl->u.assign_device.machine_sbdf >> 16; + bus = (domctl->u.assign_device.machine_sbdf >> 8) & 0xff; + devfn = domctl->u.assign_device.machine_sbdf & 0xff; + + spin_lock(&pcidevs_lock); + ret = deassign_device(d, seg, bus, devfn); + spin_unlock(&pcidevs_lock); + if ( ret ) + printk(XENLOG_G_ERR + "deassign %04x:%02x:%02x.%u from dom%d failed (%d)\n", + seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), + d->domain_id, ret); + + break; + + default: + ret = -ENOSYS; + break; + } + + return ret; +} + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/xen/drivers/passthrough/iommu_x86.c b/xen/drivers/passthrough/iommu_x86.c new file mode 100644 index 0000000..bd3c23b --- /dev/null +++ b/xen/drivers/passthrough/iommu_x86.c @@ -0,0 +1,65 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + */ + +#include <xen/sched.h> +#include <xen/iommu.h> +#include <xen/paging.h> +#include <xen/guest_access.h> +#include <xen/event.h> +#include <xen/softirq.h> +#include <xsm/xsm.h> + +void iommu_update_ire_from_apic( + unsigned int apic, unsigned int reg, unsigned int value) +{ + const struct iommu_ops *ops = iommu_get_ops(); + ops->update_ire_from_apic(apic, reg, value); +} + +int iommu_update_ire_from_msi( + struct msi_desc *msi_desc, struct msi_msg *msg) +{ + const struct iommu_ops *ops = iommu_get_ops(); + return iommu_intremap ? ops->update_ire_from_msi(msi_desc, msg) : 0; +} + +void iommu_read_msi_from_ire( + struct msi_desc *msi_desc, struct msi_msg *msg) +{ + const struct iommu_ops *ops = iommu_get_ops(); + if ( iommu_intremap ) + ops->read_msi_from_ire(msi_desc, msg); +} + +unsigned int iommu_read_apic_from_ire(unsigned int apic, unsigned int reg) +{ + const struct iommu_ops *ops = iommu_get_ops(); + return ops->read_apic_from_ire(apic, reg); +} + +int __init iommu_setup_hpet_msi(struct msi_desc *msi) +{ + const struct iommu_ops *ops = iommu_get_ops(); + return ops->setup_hpet_msi ? ops->setup_hpet_msi(msi) : -ENODEV; +} + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c index d5ce5b7..faa794b 100644 --- a/xen/drivers/passthrough/vtd/iommu.c +++ b/xen/drivers/passthrough/vtd/iommu.c @@ -1784,31 +1784,31 @@ static int intel_iommu_unmap_page(struct domain *d, unsigned long gfn) void iommu_pte_flush(struct domain *d, u64 gfn, u64 *pte, int order, int present) -{ - struct acpi_drhd_unit *drhd; - struct iommu *iommu = NULL; - struct hvm_iommu *hd = domain_hvm_iommu(d); - int flush_dev_iotlb; - int iommu_domid; + { + struct acpi_drhd_unit *drhd; + struct iommu *iommu = NULL; + struct hvm_iommu *hd = domain_hvm_iommu(d); + int flush_dev_iotlb; + int iommu_domid; - iommu_flush_cache_entry(pte, sizeof(struct dma_pte)); + iommu_flush_cache_entry(pte, sizeof(struct dma_pte)); - for_each_drhd_unit ( drhd ) - { - iommu = drhd->iommu; - if ( !test_bit(iommu->index, &hd->iommu_bitmap) ) - continue; + for_each_drhd_unit ( drhd ) + { + iommu = drhd->iommu; + if ( !test_bit(iommu->index, &hd->iommu_bitmap) ) + continue; - flush_dev_iotlb = find_ats_dev_drhd(iommu) ? 1 : 0; - iommu_domid= domain_iommu_domid(d, iommu); - if ( iommu_domid == -1 ) - continue; - if ( iommu_flush_iotlb_psi(iommu, iommu_domid, - (paddr_t)gfn << PAGE_SHIFT_4K, - order, !present, flush_dev_iotlb) ) - iommu_flush_write_buffer(iommu); + flush_dev_iotlb = find_ats_dev_drhd(iommu) ? 1 : 0; + iommu_domid= domain_iommu_domid(d, iommu); + if ( iommu_domid == -1 ) + continue; + if ( iommu_flush_iotlb_psi(iommu, iommu_domid, + (paddr_t)gfn << PAGE_SHIFT_4K, + order, !present, flush_dev_iotlb) ) + iommu_flush_write_buffer(iommu); + } } -} static int vtd_ept_page_compatible(struct iommu *iommu) { diff --git a/xen/include/asm-x86/iommu.h b/xen/include/asm-x86/iommu.h new file mode 100644 index 0000000..34c1896 --- /dev/null +++ b/xen/include/asm-x86/iommu.h @@ -0,0 +1,46 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. +*/ +#ifndef __ARCH_X86_IOMMU_H__ +#define __ARCH_X86_IOMMU_H__ + +#define MAX_IOMMUS 32 + +#include <asm/msi.h> + +void iommu_update_ire_from_apic(unsigned int apic, unsigned int reg, unsigned int value); +int iommu_update_ire_from_msi(struct msi_desc *msi_desc, struct msi_msg *msg); +void iommu_read_msi_from_ire(struct msi_desc *msi_desc, struct msi_msg *msg); +unsigned int iommu_read_apic_from_ire(unsigned int apic, unsigned int reg); +int iommu_setup_hpet_msi(struct msi_desc *); + +void iommu_share_p2m_table(struct domain *d); + +/* While VT-d specific, this must get declared in a generic header. */ +int adjust_vtd_irq_affinities(void); +void iommu_pte_flush(struct domain *d, u64 gfn, u64 *pte, int order, int present); +int iommu_supports_eim(void); +int iommu_enable_x2apic_IR(void); +void iommu_disable_x2apic_IR(void); +void iommu_set_dom0_mapping(struct domain *d); + +#endif /* !__ARCH_X86_IOMMU_H__ */ +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/xen/include/xen/hvm/iommu.h b/xen/include/xen/hvm/iommu.h index 26539e0..2abb4e3 100644 --- a/xen/include/xen/hvm/iommu.h +++ b/xen/include/xen/hvm/iommu.h @@ -21,6 +21,7 @@ #define __XEN_HVM_IOMMU_H__ #include <xen/iommu.h> +#include <asm/hvm/iommu.h> struct g2m_ioport { struct list_head list; diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h index fcbc432..60df9d6 100644 --- a/xen/include/xen/iommu.h +++ b/xen/include/xen/iommu.h @@ -25,6 +25,7 @@ #include <xen/pci.h> #include <public/hvm/ioreq.h> #include <public/domctl.h> +#include <asm/iommu.h> extern bool_t iommu_enable, iommu_enabled; extern bool_t force_iommu, iommu_verbose; @@ -39,17 +40,12 @@ extern bool_t amd_iommu_perdev_intremap; #define domain_hvm_iommu(d) (&d->arch.hvm_domain.hvm_iommu) -#define MAX_IOMMUS 32 - #define PAGE_SHIFT_4K (12) #define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K) #define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K) #define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K) int iommu_setup(void); -int iommu_supports_eim(void); -int iommu_enable_x2apic_IR(void); -void iommu_disable_x2apic_IR(void); int iommu_add_device(struct pci_dev *pdev); int iommu_enable_device(struct pci_dev *pdev); @@ -59,6 +55,9 @@ void iommu_dom0_init(struct domain *d); void iommu_domain_destroy(struct domain *d); int deassign_device(struct domain *d, u16 seg, u8 bus, u8 devfn); +/* Function used internally, use iommu_domain_destroy */ +void iommu_teardown(struct domain *d); + /* iommu_map_page() takes flags to direct the mapping operation. */ #define _IOMMUF_readable 0 #define IOMMUF_readable (1u<<_IOMMUF_readable) @@ -67,9 +66,8 @@ int deassign_device(struct domain *d, u16 seg, u8 bus, u8 devfn); int iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn, unsigned int flags); int iommu_unmap_page(struct domain *d, unsigned long gfn); -void iommu_pte_flush(struct domain *d, u64 gfn, u64 *pte, int order, int present); -void iommu_domain_teardown(struct domain *d); +#ifdef HAS_PCI void pt_pci_init(void); struct pirq; @@ -84,62 +82,60 @@ void free_hvm_irq_dpci(struct hvm_irq_dpci *dpci); bool_t pt_irq_need_timer(uint32_t flags); #define PT_IRQ_TIME_OUT MILLISECS(8) +#endif /* HAS_PCI */ +#ifdef CONFIG_X86 struct msi_desc; struct msi_msg; +#endif /* CONFIG_X86 */ + struct page_info; struct iommu_ops { int (*init)(struct domain *d); void (*dom0_init)(struct domain *d); +#ifdef HAS_PCI int (*add_device)(u8 devfn, struct pci_dev *); int (*enable_device)(struct pci_dev *pdev); int (*remove_device)(u8 devfn, struct pci_dev *); int (*assign_device)(struct domain *, u8 devfn, struct pci_dev *); + int (*reassign_device)(struct domain *s, struct domain *t, + u8 devfn, struct pci_dev *); + int (*get_device_group_id)(u16 seg, u8 bus, u8 devfn); +#endif /* HAS_PCI */ void (*teardown)(struct domain *d); int (*map_page)(struct domain *d, unsigned long gfn, unsigned long mfn, unsigned int flags); int (*unmap_page)(struct domain *d, unsigned long gfn); void (*free_page_table)(struct page_info *); - int (*reassign_device)(struct domain *s, struct domain *t, - u8 devfn, struct pci_dev *); - int (*get_device_group_id)(u16 seg, u8 bus, u8 devfn); +#ifdef CONFIG_X86 void (*update_ire_from_apic)(unsigned int apic, unsigned int reg, unsigned int value); int (*update_ire_from_msi)(struct msi_desc *msi_desc, struct msi_msg *msg); void (*read_msi_from_ire)(struct msi_desc *msi_desc, struct msi_msg *msg); unsigned int (*read_apic_from_ire)(unsigned int apic, unsigned int reg); int (*setup_hpet_msi)(struct msi_desc *); + void (*share_p2m)(struct domain *d); +#endif /* CONFIG_X86 */ void (*suspend)(void); void (*resume)(void); - void (*share_p2m)(struct domain *d); void (*crash_shutdown)(void); void (*iotlb_flush)(struct domain *d, unsigned long gfn, unsigned int page_count); void (*iotlb_flush_all)(struct domain *d); void (*dump_p2m_table)(struct domain *d); }; -void iommu_update_ire_from_apic(unsigned int apic, unsigned int reg, unsigned int value); -int iommu_update_ire_from_msi(struct msi_desc *msi_desc, struct msi_msg *msg); -void iommu_read_msi_from_ire(struct msi_desc *msi_desc, struct msi_msg *msg); -unsigned int iommu_read_apic_from_ire(unsigned int apic, unsigned int reg); -int iommu_setup_hpet_msi(struct msi_desc *); - void iommu_suspend(void); void iommu_resume(void); void iommu_crash_shutdown(void); -void iommu_set_dom0_mapping(struct domain *d); -void iommu_share_p2m_table(struct domain *d); - +#if HAS_PCI int iommu_do_domctl(struct xen_domctl *, struct domain *d, XEN_GUEST_HANDLE_PARAM(xen_domctl_t)); +#endif void iommu_iotlb_flush(struct domain *d, unsigned long gfn, unsigned int page_count); void iommu_iotlb_flush_all(struct domain *d); -/* While VT-d specific, this must get declared in a generic header. */ -int adjust_vtd_irq_affinities(void); - /* * The purpose of the iommu_dont_flush_iotlb optional cpu flag is to * avoid unecessary iotlb_flush in the low level IOMMU code. -- 1.7.10.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |