IOMMU: make page table deallocation preemptible This too can take an arbitrary amount of time. In fact, the bulk of the work is being moved to a tasklet, as handling the necessary preemption logic in line seems close to impossible given that the teardown may also be invoked on error paths. Signed-off-by: Jan Beulich --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c @@ -195,6 +195,8 @@ static int __init amd_iommu_setup_dom0_d return 0; } +static void deallocate_page_tables(unsigned long unused); + int __init amd_iov_detect(void) { INIT_LIST_HEAD(&amd_iommu_head); @@ -215,6 +217,7 @@ int __init amd_iov_detect(void) return -ENODEV; } + tasklet_init(&iommu_pt_cleanup_tasklet, deallocate_page_tables, 0); init_done = 1; if ( !amd_iommu_perdev_intremap ) @@ -405,11 +408,21 @@ static int amd_iommu_assign_device(struc return reassign_device(dom0, d, devfn, pdev); } -static void deallocate_next_page_table(struct page_info* pg, int level) +static void deallocate_next_page_table(struct page_info *pg, int level) +{ + spin_lock(&iommu_pt_cleanup_lock); + PFN_ORDER(pg) = level; + page_list_add_tail(pg, &iommu_pt_cleanup_list); + spin_unlock(&iommu_pt_cleanup_lock); +} + +static void deallocate_page_table(struct page_info *pg) { void *table_vaddr, *pde; u64 next_table_maddr; - int index, next_level; + unsigned int index, level = PFN_ORDER(pg), next_level; + + PFN_ORDER(pg) = 0; if ( level <= 1 ) { @@ -439,6 +452,23 @@ static void deallocate_next_page_table(s free_amd_iommu_pgtable(pg); } +static void deallocate_page_tables(unsigned long unused) +{ + do { + struct page_info *pg; + + spin_lock(&iommu_pt_cleanup_lock); + pg = page_list_remove_head(&iommu_pt_cleanup_list); + spin_unlock(&iommu_pt_cleanup_lock); + if ( !pg ) + return; + deallocate_page_table(pg); + } while ( !softirq_pending(smp_processor_id()) ); + + tasklet_schedule_on_cpu(&iommu_pt_cleanup_tasklet, + cpumask_cycle(smp_processor_id(), &cpu_online_map)); +} + static void deallocate_iommu_page_tables(struct domain *d) { struct hvm_iommu *hd = domain_hvm_iommu(d); @@ -451,6 +481,7 @@ static void deallocate_iommu_page_tables { deallocate_next_page_table(hd->root_table, hd->paging_mode); hd->root_table = NULL; + tasklet_schedule(&iommu_pt_cleanup_tasklet); } spin_unlock(&hd->mapping_lock); } --- a/xen/drivers/passthrough/iommu.c +++ b/xen/drivers/passthrough/iommu.c @@ -58,6 +58,10 @@ bool_t __read_mostly amd_iommu_perdev_in DEFINE_PER_CPU(bool_t, iommu_dont_flush_iotlb); +DEFINE_SPINLOCK(iommu_pt_cleanup_lock); +PAGE_LIST_HEAD(iommu_pt_cleanup_list); +struct tasklet iommu_pt_cleanup_tasklet; + static struct keyhandler iommu_p2m_table = { .diagnostic = 0, .u.fn = iommu_dump_p2m_table, --- a/xen/drivers/passthrough/vtd/iommu.c +++ b/xen/drivers/passthrough/vtd/iommu.c @@ -668,13 +668,24 @@ static void dma_pte_clear_one(struct dom static void iommu_free_pagetable(u64 pt_maddr, int level) { - int i; - struct dma_pte *pt_vaddr, *pte; - int next_level = level - 1; + struct page_info *pg = maddr_to_page(pt_maddr); if ( pt_maddr == 0 ) return; + spin_lock(&iommu_pt_cleanup_lock); + PFN_ORDER(pg) = level; + page_list_add_tail(pg, &iommu_pt_cleanup_list); + spin_unlock(&iommu_pt_cleanup_lock); +} + +static void iommu_free_page_table(struct page_info *pg) +{ + unsigned int i, next_level = PFN_ORDER(pg) - 1; + u64 pt_maddr = page_to_maddr(pg); + struct dma_pte *pt_vaddr, *pte; + + PFN_ORDER(pg) = 0; pt_vaddr = (struct dma_pte *)map_vtd_domain_page(pt_maddr); for ( i = 0; i < PTE_NUM; i++ ) @@ -694,6 +705,23 @@ static void iommu_free_pagetable(u64 pt_ free_pgtable_maddr(pt_maddr); } +static void iommu_free_pagetables(unsigned long unused) +{ + do { + struct page_info *pg; + + spin_lock(&iommu_pt_cleanup_lock); + pg = page_list_remove_head(&iommu_pt_cleanup_list); + spin_unlock(&iommu_pt_cleanup_lock); + if ( !pg ) + return; + iommu_free_page_table(pg); + } while ( !softirq_pending(smp_processor_id()) ); + + tasklet_schedule_on_cpu(&iommu_pt_cleanup_tasklet, + cpumask_cycle(smp_processor_id(), &cpu_online_map)); +} + static int iommu_set_root_entry(struct iommu *iommu) { u32 sts; @@ -1704,6 +1732,8 @@ void iommu_domain_teardown(struct domain iommu_free_pagetable(hd->pgd_maddr, agaw_to_level(hd->agaw)); hd->pgd_maddr = 0; spin_unlock(&hd->mapping_lock); + + tasklet_schedule(&iommu_pt_cleanup_tasklet); } static int intel_iommu_map_page( @@ -2204,6 +2234,7 @@ int __init intel_vtd_setup(void) if ( ret ) goto error; + tasklet_init(&iommu_pt_cleanup_tasklet, iommu_free_pagetables, 0); register_keyhandler('V', &dump_iommu_info_keyhandler); return 0; --- a/xen/include/xen/iommu.h +++ b/xen/include/xen/iommu.h @@ -151,4 +151,8 @@ int adjust_vtd_irq_affinities(void); */ DECLARE_PER_CPU(bool_t, iommu_dont_flush_iotlb); +extern struct spinlock iommu_pt_cleanup_lock; +extern struct page_list_head iommu_pt_cleanup_list; +extern struct tasklet iommu_pt_cleanup_tasklet; + #endif /* _IOMMU_H_ */