[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH 1/5] IOMMU: make page table population preemptible
On 10/12/2013 15:45, Jan Beulich wrote: > Since this can take an arbitrary amount of time, the rooting domctl as > well as all involved code must become aware of this requiring a > continuation. > > The subject domain's rel_mem_list is being (ab)used for this, in a way > similar to and compatible with broken page offlining. > > Further, operations get slightly re-ordered in assign_device(): IOMMU > page tables now get set up _before_ the first device gets assigned, at > once closing a small timing window in which the guest may already see > the device but wouldn't be able to access it. > > Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> > > --- a/xen/arch/x86/domain.c > +++ b/xen/arch/x86/domain.c > @@ -1923,6 +1923,12 @@ int domain_relinquish_resources(struct d > } > > d->arch.relmem = RELMEM_xen; > + > + spin_lock(&d->page_alloc_lock); > + page_list_splice(&d->arch.relmem_list, &d->page_list); > + INIT_PAGE_LIST_HEAD(&d->arch.relmem_list); > + spin_unlock(&d->page_alloc_lock); > + > /* Fallthrough. Relinquish every page of memory. */ > case RELMEM_xen: > ret = relinquish_memory(d, &d->xenpage_list, ~0UL); > --- a/xen/arch/x86/mm/p2m-pod.c > +++ b/xen/arch/x86/mm/p2m-pod.c > @@ -459,7 +459,8 @@ p2m_pod_offline_or_broken_hit(struct pag > > pod_hit: > lock_page_alloc(p2m); > - page_list_add_tail(p, &d->arch.relmem_list); > + /* Insertion must be at list head (see iommu_populate_page_table()). */ > + page_list_add(p, &d->arch.relmem_list); > unlock_page_alloc(p2m); > pod_unlock(p2m); > return 1; > --- a/xen/drivers/passthrough/iommu.c > +++ b/xen/drivers/passthrough/iommu.c > @@ -18,6 +18,7 @@ > #include <asm/hvm/iommu.h> > #include <xen/paging.h> > #include <xen/guest_access.h> > +#include <xen/event.h> > #include <xen/softirq.h> > #include <xen/keyhandler.h> > #include <xsm/xsm.h> > @@ -265,7 +266,23 @@ static int assign_device(struct domain * > d->mem_event->paging.ring_page)) ) > return -EXDEV; > > - spin_lock(&pcidevs_lock); > + if ( !spin_trylock(&pcidevs_lock) ) > + return -ERESTART; > + > + if ( need_iommu(d) <= 0 ) > + { > + if ( !iommu_use_hap_pt(d) ) > + { > + rc = iommu_populate_page_table(d); > + if ( rc ) > + { > + spin_unlock(&pcidevs_lock); > + return rc; > + } > + } > + d->need_iommu = 1; > + } > + > pdev = pci_get_pdev_by_domain(dom0, seg, bus, devfn); > if ( !pdev ) > { > @@ -290,15 +307,14 @@ static int assign_device(struct domain * > rc); > } > > - if ( has_arch_pdevs(d) && !need_iommu(d) ) > + done: > + if ( !has_arch_pdevs(d) && need_iommu(d) ) > { > - d->need_iommu = 1; > - if ( !iommu_use_hap_pt(d) ) > - rc = iommu_populate_page_table(d); > - goto done; > + d->need_iommu = 0; > + hd->platform_ops->teardown(d); > } > -done: > spin_unlock(&pcidevs_lock); > + > return rc; > } > > @@ -306,12 +322,17 @@ static int iommu_populate_page_table(str > { > struct hvm_iommu *hd = domain_hvm_iommu(d); > struct page_info *page; > - int rc = 0; > + int rc = 0, n = 0; > + > + d->need_iommu = -1; > > this_cpu(iommu_dont_flush_iotlb) = 1; > spin_lock(&d->page_alloc_lock); > > - page_list_for_each ( page, &d->page_list ) > + if ( unlikely(d->is_dying) ) > + rc = -ESRCH; > + > + while ( !rc && (page = page_list_remove_head(&d->page_list)) ) > { > if ( is_hvm_domain(d) || > (page->u.inuse.type_info & PGT_type_mask) == PGT_writable_page ) > @@ -321,7 +342,32 @@ static int iommu_populate_page_table(str > d, mfn_to_gmfn(d, page_to_mfn(page)), page_to_mfn(page), > IOMMUF_readable|IOMMUF_writable); > if ( rc ) > + { > + page_list_add(page, &d->page_list); > break; > + } > + } > + page_list_add_tail(page, &d->arch.relmem_list); > + if ( !(++n & 0xff) && !page_list_empty(&d->page_list) && Why the forced restart here? If nothing needs pre-empting, surely it is better to continue? Or is this about equality on the pcidevs_lock ? > + hypercall_preempt_check() ) > + rc = -ERESTART; > + } > + > + if ( !rc ) > + { > + /* > + * The expectation here is that generally there are many normal pages > + * on relmem_list (the ones we put there) and only few being in an > + * offline/broken state. The latter ones are always at the head of > the > + * list. Hence we first move the whole list, and then move back the > + * first few entries. > + */ > + page_list_move(&d->page_list, &d->arch.relmem_list); > + while ( (page = page_list_first(&d->page_list)) != NULL && > + (page->count_info & (PGC_state|PGC_broken)) ) > + { > + page_list_del(page, &d->page_list); > + page_list_add_tail(page, &d->arch.relmem_list); > } > } > > @@ -330,8 +376,11 @@ static int iommu_populate_page_table(str > > if ( !rc ) > iommu_iotlb_flush_all(d); > - else > + else if ( rc != -ERESTART ) > + { > hd->platform_ops->teardown(d); > + d->need_iommu = 0; > + } > > return rc; > } > @@ -688,7 +737,10 @@ int iommu_do_domctl( > > ret = device_assigned(seg, bus, devfn) ?: > assign_device(d, seg, bus, devfn); > - if ( ret ) > + if ( ret == -ERESTART ) > + ret = hypercall_create_continuation(__HYPERVISOR_domctl, > + "h", u_domctl); > + else if ( ret ) > printk(XENLOG_G_ERR "XEN_DOMCTL_assign_device: " > "assign %04x:%02x:%02x.%u to dom%d failed (%d)\n", > seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), > --- a/xen/include/xen/sched.h > +++ b/xen/include/xen/sched.h > @@ -323,7 +323,7 @@ struct domain > > #ifdef HAS_PASSTHROUGH > /* Does this guest need iommu mappings? */ > - bool_t need_iommu; > + s8 need_iommu; I think this change from bool_t to s8 needs a comment explaining that -1 indicates "the iommu mappings are pending creation" Is there any particular reason that -ERESTART is used when -EAGAIN is the prevailing style for hypercall continuations? ~Andrew _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |