[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-ia64-devel] [PATCH][RFC] per vcpu VHPT
I sent out the old patches. sorry for that. attached the newest one. Please discard old ones. On Mon, Jul 24, 2006 at 09:54:28PM +0900, Isaku Yamahata wrote: > Hi. > > I implemented per vcpu VHPT for non-VTi domain. > The motivation is to alleviate vcpu migration cost between physical cpus > with credit scheduler. > If more than one vcpu of same domain, VHPT needs to be flushed every > vcpu switch. I'd like to avoid this scenario. > The patch is for discussion and performance evaluation. Not for commit. > > > I checked the mailing list archives and found the thread > Xen/ia64 - global or per VP VHPT > http://lists.xensource.com/archives/html/xen-devel/2005-04/msg01002.html > > The conclustion at that time isn't concluded. > (At least my understanding. Because the thread was very long to follow. > So I might be wrong, correct me.) > With this patch, we can measure the performance and descide to include > this patch or discard the idea. > > > This patch introduces compile time optoin, xen_ia64_pervcpu_vhpt=y, > to enable this feature and xen boot time option, pervcpu_vhpt=0 > to disable per vcpu vhpt allocation. > The patch depends on tlb tracking patch which I sent before. > I attached these patches for convinience. > > Thanks > -- > yamahata > # HG changeset patch > # User yamahata@xxxxxxxxxxxxx > # Node ID c654d462c4481685fb2e803e41cb2beba56bee4b > # Parent b2abc70be89e02d0d380674096c8c1fb9e552431 > import linux/include/linux/hash.h. > PATCHNAME: import_linux_hash.h > > Signed-off-by: Isaku Yamahata <yamahata@xxxxxxxxxxxxx> > > diff -r b2abc70be89e -r c654d462c448 xen/include/asm-ia64/linux/README.origin > --- a/xen/include/asm-ia64/linux/README.origin Wed Jul 19 07:17:54 > 2006 -0600 > +++ b/xen/include/asm-ia64/linux/README.origin Mon Jul 24 21:34:37 > 2006 +0900 > @@ -8,6 +8,7 @@ bitmap.h -> linux/include/linux/bitmap. > bitmap.h -> linux/include/linux/bitmap.h > bitops.h -> linux/include/linux/bitops.h > initrd.h -> linux/include/linux/initrd.h > +hash.h -> linux/include/linux/hash.h > jiffies.h -> linux/include/linux/jiffies.h > kmalloc_sizes.h -> linux/include/linux/kmalloc_sizes.h > linkage.h -> linux/include/linux/linkage.h > diff -r b2abc70be89e -r c654d462c448 xen/include/asm-ia64/linux/hash.h > --- /dev/null Thu Jan 01 00:00:00 1970 +0000 > +++ b/xen/include/asm-ia64/linux/hash.h Mon Jul 24 21:34:37 2006 +0900 > @@ -0,0 +1,58 @@ > +#ifndef _LINUX_HASH_H > +#define _LINUX_HASH_H > +/* Fast hashing routine for a long. > + (C) 2002 William Lee Irwin III, IBM */ > + > +/* > + * Knuth recommends primes in approximately golden ratio to the maximum > + * integer representable by a machine word for multiplicative hashing. > + * Chuck Lever verified the effectiveness of this technique: > + * http://www.citi.umich.edu/techreports/reports/citi-tr-00-1.pdf > + * > + * These primes are chosen to be bit-sparse, that is operations on > + * them can use shifts and additions instead of multiplications for > + * machines where multiplications are slow. > + */ > +#if BITS_PER_LONG == 32 > +/* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */ > +#define GOLDEN_RATIO_PRIME 0x9e370001UL > +#elif BITS_PER_LONG == 64 > +/* 2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */ > +#define GOLDEN_RATIO_PRIME 0x9e37fffffffc0001UL > +#else > +#error Define GOLDEN_RATIO_PRIME for your wordsize. > +#endif > + > +static inline unsigned long hash_long(unsigned long val, unsigned int bits) > +{ > + unsigned long hash = val; > + > +#if BITS_PER_LONG == 64 > + /* Sigh, gcc can't optimise this alone like it does for 32 bits. */ > + unsigned long n = hash; > + n <<= 18; > + hash -= n; > + n <<= 33; > + hash -= n; > + n <<= 3; > + hash += n; > + n <<= 3; > + hash -= n; > + n <<= 4; > + hash += n; > + n <<= 2; > + hash += n; > +#else > + /* On some cpus multiply is faster, on others gcc will do shifts */ > + hash *= GOLDEN_RATIO_PRIME; > +#endif > + > + /* High bits are more random, so use them. */ > + return hash >> (BITS_PER_LONG - bits); > +} > + > +static inline unsigned long hash_ptr(void *ptr, unsigned int bits) > +{ > + return hash_long((unsigned long)ptr, bits); > +} > +#endif /* _LINUX_HASH_H */ > # HG changeset patch > # User yamahata@xxxxxxxxxxxxx > # Node ID cb0aa2b2e180d76d09592ed32338f9cb4ac5b7a0 > # Parent c654d462c4481685fb2e803e41cb2beba56bee4b > add tlb insert tracking to do vTLB flush finer grained virtual address > range when a page is unmapped from a domain. > This is functionality is enabled with a compile time option, > xen_ia64_tlb_track=y. > PATCHNAME: tlb_track > > Signed-off-by: Isaku Yamahata <yamahata@xxxxxxxxxxxxx> > > diff -r c654d462c448 -r cb0aa2b2e180 xen/arch/ia64/Rules.mk > --- a/xen/arch/ia64/Rules.mk Mon Jul 24 21:34:37 2006 +0900 > +++ b/xen/arch/ia64/Rules.mk Mon Jul 24 21:35:16 2006 +0900 > @@ -39,6 +39,9 @@ ifeq ($(xen_ia64_dom0_virtual_physical), > ifeq ($(xen_ia64_dom0_virtual_physical),y) > CFLAGS += -DCONFIG_XEN_IA64_DOM0_VP > endif > +ifeq ($(xen_ia64_tlb_track),y) > +CFLAGS += -DCONFIG_XEN_IA64_TLB_TRACK > +endif > ifeq ($(no_warns),y) > CFLAGS += -Wa,--fatal-warnings -Werror -Wno-uninitialized > endif > diff -r c654d462c448 -r cb0aa2b2e180 xen/arch/ia64/xen/Makefile > --- a/xen/arch/ia64/xen/Makefile Mon Jul 24 21:34:37 2006 +0900 > +++ b/xen/arch/ia64/xen/Makefile Mon Jul 24 21:35:16 2006 +0900 > @@ -27,3 +27,4 @@ obj-y += privop_stat.o > obj-y += privop_stat.o > > obj-$(crash_debug) += gdbstub.o > +obj-$(xen_ia64_tlb_track) += tlb_track.o > diff -r c654d462c448 -r cb0aa2b2e180 xen/arch/ia64/xen/domain.c > --- a/xen/arch/ia64/xen/domain.c Mon Jul 24 21:34:37 2006 +0900 > +++ b/xen/arch/ia64/xen/domain.c Mon Jul 24 21:35:16 2006 +0900 > @@ -60,6 +60,9 @@ > #include <asm/regionreg.h> > #include <asm/dom_fw.h> > #include <asm/privop_stat.h> > +#ifdef CONFIG_XEN_IA64_TLB_TRACK > +#include <asm/tlb_track.h> > +#endif > > #ifndef CONFIG_XEN_IA64_DOM0_VP > #define CONFIG_DOMAIN0_CONTIGUOUS > @@ -351,6 +354,10 @@ int arch_domain_create(struct domain *d) > if (is_idle_domain(d)) > return 0; > > +#ifdef CONFIG_XEN_IA64_TLB_TRACK > + if (tlb_track_create(d) < 0) > + goto fail_nomem; > +#endif > d->shared_info = alloc_xenheap_pages(get_order_from_shift(XSI_SHIFT)); > if (d->shared_info == NULL) > goto fail_nomem; > @@ -389,6 +396,9 @@ void arch_domain_destroy(struct domain * > if (d->shared_info != NULL) > free_xenheap_pages(d->shared_info, get_order_from_shift(XSI_SHIFT)); > > +#ifdef CONFIG_XEN_IA64_TLB_TRACK > + tlb_track_destroy(d); > +#endif > domain_flush_destroy (d); > > deallocate_rid_range(d); > diff -r c654d462c448 -r cb0aa2b2e180 xen/arch/ia64/xen/faults.c > --- a/xen/arch/ia64/xen/faults.c Mon Jul 24 21:34:37 2006 +0900 > +++ b/xen/arch/ia64/xen/faults.c Mon Jul 24 21:35:16 2006 +0900 > @@ -27,6 +27,7 @@ > #include <asm/debugger.h> > #include <asm/fpswa.h> > #include <asm/bundle.h> > +#include <asm/p2m_entry.h> > #include <asm/privop_stat.h> > #include <asm/asm-xsi-offsets.h> > > @@ -202,8 +203,15 @@ void ia64_do_page_fault (unsigned long a > fault = vcpu_translate(current,address,is_data,&pteval,&itir,&iha); > if (fault == IA64_NO_FAULT || fault == IA64_USE_TLB) { > struct p2m_entry entry; > - pteval = translate_domain_pte(pteval, address, itir, &logps, > &entry); > - vcpu_itc_no_srlz(current,is_data?2:1,address,pteval,-1UL,logps); > + unsigned long m_pteval; > + m_pteval = translate_domain_pte(pteval, address, itir, &logps, > &entry); > +#ifndef CONFIG_XEN_IA64_TLB_TRACK > + vcpu_itc_no_srlz(current, (is_data? 2: 1) | 4, > + address, m_pteval, pteval, logps); > +#else > + vcpu_itc_no_srlz(current, (is_data? 2: 1) | 4, > + address, m_pteval, pteval, logps, &entry); > +#endif > if ((fault == IA64_USE_TLB && !current->arch.dtlb.pte.p) || > p2m_entry_retry(&entry)) { > /* dtlb has been purged in-between. This dtlb was > diff -r c654d462c448 -r cb0aa2b2e180 xen/arch/ia64/xen/mm.c > --- a/xen/arch/ia64/xen/mm.c Mon Jul 24 21:34:37 2006 +0900 > +++ b/xen/arch/ia64/xen/mm.c Mon Jul 24 21:35:16 2006 +0900 > @@ -170,13 +170,14 @@ > #include <asm/pgalloc.h> > #include <asm/vhpt.h> > #include <asm/vcpu.h> > +#include <asm/p2m_entry.h> > #include <linux/efi.h> > > #ifndef CONFIG_XEN_IA64_DOM0_VP > #define CONFIG_DOMAIN0_CONTIGUOUS > #else > -static void domain_page_flush(struct domain* d, unsigned long mpaddr, > - unsigned long old_mfn, unsigned long new_mfn); > +static void domain_page_flush(struct domain* d, > + volatile pte_t* ptep, pte_t old_pte); > #endif > > static struct domain *dom_xen, *dom_io; > @@ -718,6 +719,19 @@ void *domain_mpa_to_imva(struct domain * > } > #endif > > +static unsigned long > +assign_flags_to_pteflags(unsigned long flags) > +{ > + unsigned long pteflags = > + (flags & ASSIGN_readonly)? _PAGE_AR_R: _PAGE_AR_RWX; > +#ifdef CONFIG_XEN_IA64_TLB_TRACK > + if (flags & ASSIGN_tlb_track) { > + pteflags |= _PAGE_TLB_TRACKING; > + } > +#endif > + return pteflags; > +} > + > /* Allocate a new page for domain and map it to the specified metaphysical > address. */ > static struct page_info * > @@ -811,7 +825,7 @@ assign_new_domain0_page(struct domain *d > } > > /* map a physical address to the specified metaphysical addr */ > -// flags: currently only ASSIGN_readonly > +// flags: ASSIGN_xxx > // This is called by assign_domain_mmio_page(). > // So accessing to pte is racy. > void > @@ -823,13 +837,13 @@ __assign_domain_page(struct domain *d, > pte_t old_pte; > pte_t new_pte; > pte_t ret_pte; > - unsigned long arflags = (flags & ASSIGN_readonly)? _PAGE_AR_R: > _PAGE_AR_RWX; > + unsigned long pteflags = assign_flags_to_pteflags(flags); > > pte = lookup_alloc_domain_pte(d, mpaddr); > > old_pte = __pte(0); > new_pte = pfn_pte(physaddr >> PAGE_SHIFT, > - __pgprot(__DIRTY_BITS | _PAGE_PL_2 | arflags)); > + __pgprot(__DIRTY_BITS | _PAGE_PL_2 | pteflags)); > ret_pte = ptep_cmpxchg_rel(&d->arch.mm, mpaddr, pte, old_pte, new_pte); > if (pte_val(ret_pte) == pte_val(old_pte)) > smp_mb(); > @@ -945,7 +959,7 @@ assign_domain_mach_page(struct domain *d > // caller must call set_gpfn_from_mfn() before call if necessary. > // because set_gpfn_from_mfn() result must be visible before pte xchg > // caller must use memory barrier. NOTE: xchg has acquire semantics. > -// flags: currently only ASSIGN_readonly > +// flags: ASSIGN_xxx > static void > assign_domain_page_replace(struct domain *d, unsigned long mpaddr, > unsigned long mfn, unsigned long flags) > @@ -954,11 +968,11 @@ assign_domain_page_replace(struct domain > volatile pte_t* pte; > pte_t old_pte; > pte_t npte; > - unsigned long arflags = (flags & ASSIGN_readonly)? _PAGE_AR_R: > _PAGE_AR_RWX; > + unsigned long pteflags = assign_flags_to_pteflags(flags); > pte = lookup_alloc_domain_pte(d, mpaddr); > > // update pte > - npte = pfn_pte(mfn, __pgprot(__DIRTY_BITS | _PAGE_PL_2 | arflags)); > + npte = pfn_pte(mfn, __pgprot(__DIRTY_BITS | _PAGE_PL_2 | pteflags)); > old_pte = ptep_xchg(mm, mpaddr, pte, npte); > if (pte_mem(old_pte)) { > unsigned long old_mfn = pte_pfn(old_pte); > @@ -978,7 +992,7 @@ assign_domain_page_replace(struct domain > set_gpfn_from_mfn(old_mfn, INVALID_M2P_ENTRY); > } > > - domain_page_flush(d, mpaddr, old_mfn, mfn); > + domain_page_flush(d, pte, old_pte); > > try_to_clear_PGC_allocate(d, old_page); > put_page(old_page); > @@ -997,29 +1011,29 @@ assign_domain_page_cmpxchg_rel(struct do > struct mm_struct *mm = &d->arch.mm; > volatile pte_t* pte; > unsigned long old_mfn; > - unsigned long old_arflags; > + unsigned long old_pteflags; > pte_t old_pte; > unsigned long new_mfn; > - unsigned long new_arflags; > + unsigned long new_pteflags; > pte_t new_pte; > pte_t ret_pte; > > pte = lookup_alloc_domain_pte(d, mpaddr); > > again: > - old_arflags = pte_val(*pte) & ~_PAGE_PPN_MASK; > + old_pteflags = pte_val(*pte) & ~_PAGE_PPN_MASK; > old_mfn = page_to_mfn(old_page); > - old_pte = pfn_pte(old_mfn, __pgprot(old_arflags)); > + old_pte = pfn_pte(old_mfn, __pgprot(old_pteflags)); > if (!pte_present(old_pte)) { > - DPRINTK("%s: old_pte 0x%lx old_arflags 0x%lx old_mfn 0x%lx\n", > - __func__, pte_val(old_pte), old_arflags, old_mfn); > + DPRINTK("%s: old_pte 0x%lx old_pteflags 0x%lx old_mfn 0x%lx\n", > + __func__, pte_val(old_pte), old_pteflags, old_mfn); > return -EINVAL; > } > > - new_arflags = (flags & ASSIGN_readonly)? _PAGE_AR_R: _PAGE_AR_RWX; > + new_pteflags = assign_flags_to_pteflags(flags); > new_mfn = page_to_mfn(new_page); > new_pte = pfn_pte(new_mfn, > - __pgprot(__DIRTY_BITS | _PAGE_PL_2 | new_arflags)); > + __pgprot(__DIRTY_BITS | _PAGE_PL_2 | new_pteflags)); > > // update pte > ret_pte = ptep_cmpxchg_rel(mm, mpaddr, pte, old_pte, new_pte); > @@ -1028,10 +1042,10 @@ assign_domain_page_cmpxchg_rel(struct do > goto again; > } > > - DPRINTK("%s: old_pte 0x%lx old_arflags 0x%lx old_mfn 0x%lx " > + DPRINTK("%s: old_pte 0x%lx old_pteflags 0x%lx old_mfn 0x%lx " > "ret_pte 0x%lx ret_mfn 0x%lx\n", > __func__, > - pte_val(old_pte), old_arflags, old_mfn, > + pte_val(old_pte), old_pteflags, old_mfn, > pte_val(ret_pte), pte_pfn(ret_pte)); > return -EINVAL; > } > @@ -1043,7 +1057,7 @@ assign_domain_page_cmpxchg_rel(struct do > > set_gpfn_from_mfn(old_mfn, INVALID_M2P_ENTRY); > > - domain_page_flush(d, mpaddr, old_mfn, new_mfn); > + domain_page_flush(d, pte, old_pte); > put_page(old_page); > return 0; > } > @@ -1111,7 +1125,7 @@ zap_domain_page_one(struct domain *d, un > set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY); > } > > - domain_page_flush(d, mpaddr, mfn, INVALID_MFN); > + domain_page_flush(d, pte, old_pte); > > if (page_get_owner(page) != NULL) { > try_to_clear_PGC_allocate(d, page); > @@ -1199,8 +1213,12 @@ create_grant_host_mapping(unsigned long > BUG_ON(ret == 0); > BUG_ON(page_get_owner(mfn_to_page(mfn)) == d && > get_gpfn_from_mfn(mfn) != INVALID_M2P_ENTRY); > - assign_domain_page_replace(d, gpaddr, mfn, (flags & GNTMAP_readonly)? > - ASSIGN_readonly: > ASSIGN_writable); > + assign_domain_page_replace(d, gpaddr, mfn, > +#ifdef CONFIG_XEN_IA64_TLB_TRACK > + ASSIGN_tlb_track | > +#endif > + ((flags & GNTMAP_readonly) ? > + ASSIGN_readonly: ASSIGN_writable)); > return GNTST_okay; > } > > @@ -1254,7 +1272,7 @@ destroy_grant_host_mapping(unsigned long > } > BUG_ON(pte_pfn(old_pte) != mfn); > > - domain_page_flush(d, gpaddr, mfn, INVALID_MFN); > + domain_page_flush(d, pte, old_pte); > > page = mfn_to_page(mfn); > BUG_ON(page_get_owner(page) == d);//try_to_clear_PGC_allocate(d, page) > is not needed. > @@ -1418,11 +1436,38 @@ guest_physmap_remove_page(struct domain > > //XXX sledgehammer. > // flush finer range. > -void > -domain_page_flush(struct domain* d, unsigned long mpaddr, > - unsigned long old_mfn, unsigned long new_mfn) > -{ > +static void > +domain_page_flush(struct domain* d, volatile pte_t* ptep, pte_t old_pte) > +{ > +#ifndef CONFIG_XEN_IA64_TLB_TRACK > domain_flush_vtlb_all(); > +#else > + struct tlb_track_entry* entry; > + switch (tlb_track_search_and_remove(d->arch.tlb_track, > + ptep, old_pte, &entry)) { > + case TLB_TRACK_NOT_TRACKED: > + //DPRINTK("%s TLB_TRACK_NOT_TRACKED\n", __func__); > + domain_flush_vtlb_all(); > + break; > + case TLB_TRACK_NOT_FOUND: > + // do nothing > + //DPRINTK("%s TLB_TRACK_NOT_FOUND\n", __func__); > + break; > + case TLB_TRACK_FOUND: > + //DPRINTK("%s TLB_TRACK_FOUND\n", __func__); > + domain_flush_vltb_track_entry(d, entry); > + tlb_track_free_entry(d->arch.tlb_track, entry); > + break; > + case TLB_TRACK_MANY: > + DPRINTK("%s TLB_TRACK_MANY\n", __func__); > + domain_flush_vtlb_all(); > + break; > + case TLB_TRACK_AGAIN: > + DPRINTK("%s TLB_TRACK_AGAIN\n", __func__); > + BUG(); > + break; > + } > +#endif > } > > int > diff -r c654d462c448 -r cb0aa2b2e180 xen/arch/ia64/xen/vcpu.c > --- a/xen/arch/ia64/xen/vcpu.c Mon Jul 24 21:34:37 2006 +0900 > +++ b/xen/arch/ia64/xen/vcpu.c Mon Jul 24 21:35:16 2006 +0900 > @@ -22,6 +22,7 @@ > #include <asm/vmx_phy_mode.h> > #include <asm/bundle.h> > #include <asm/privop_stat.h> > +#include <asm/p2m_entry.h> > > /* FIXME: where these declarations should be there ? */ > extern void getreg(unsigned long regnum, unsigned long *val, int *nat, > struct pt_regs *regs); > @@ -2003,7 +2004,11 @@ IA64FAULT vcpu_set_dtr(VCPU *vcpu, u64 s > VCPU translation cache access routines > **************************************************************************/ > > +#ifndef CONFIG_XEN_IA64_TLB_TRACK > void vcpu_itc_no_srlz(VCPU *vcpu, UINT64 IorD, UINT64 vaddr, UINT64 pte, > UINT64 mp_pte, UINT64 logps) > +#else > +void vcpu_itc_no_srlz(VCPU *vcpu, UINT64 IorD, UINT64 vaddr, UINT64 pte, > UINT64 mp_pte, UINT64 logps, struct p2m_entry* entry) > +#endif > { > unsigned long psr; > unsigned long ps = (vcpu->domain==dom0) ? logps : PAGE_SHIFT; > @@ -2017,6 +2022,9 @@ void vcpu_itc_no_srlz(VCPU *vcpu, UINT64 > > #ifdef CONFIG_XEN_IA64_DOM0_VP > BUG_ON(logps > PAGE_SHIFT); > +#endif > +#ifdef CONFIG_XEN_IA64_TLB_TRACK > + vcpu_tlb_track_insert_or_dirty(vcpu, vaddr, entry); > #endif > psr = ia64_clear_ic(); > ia64_itc(IorD,vaddr,pte,ps); // FIXME: look for bigger mappings > @@ -2035,7 +2043,7 @@ void vcpu_itc_no_srlz(VCPU *vcpu, UINT64 > // PAGE_SIZE mapping in the vhpt for now, else purging is complicated > else vhpt_insert(vaddr,pte,PAGE_SHIFT<<2); > #endif > - if ((mp_pte == -1UL) || (IorD & 0x4)) // don't place in 1-entry TLB > + if (IorD & 0x4) // don't place in 1-entry TLB > return; > if (IorD & 0x1) { > vcpu_set_tr_entry(&PSCBX(vcpu,itlb),mp_pte,ps<<2,vaddr); > @@ -2060,7 +2068,11 @@ again: > pteval = translate_domain_pte(pte, ifa, itir, &logps, &entry); > if (!pteval) return IA64_ILLOP_FAULT; > if (swap_rr0) set_one_rr(0x0,PSCB(vcpu,rrs[0])); > +#ifndef CONFIG_XEN_IA64_TLB_TRACK > vcpu_itc_no_srlz(vcpu,2,ifa,pteval,pte,logps); > +#else > + vcpu_itc_no_srlz(vcpu,2,ifa,pteval,pte,logps,&entry); > +#endif > if (swap_rr0) set_metaphysical_rr0(); > if (p2m_entry_retry(&entry)) { > vcpu_flush_tlb_vhpt_range(ifa, logps); > @@ -2083,7 +2095,11 @@ again: > pteval = translate_domain_pte(pte, ifa, itir, &logps, &entry); > if (!pteval) return IA64_ILLOP_FAULT; > if (swap_rr0) set_one_rr(0x0,PSCB(vcpu,rrs[0])); > +#ifndef CONFIG_XEN_IA64_TLB_TRACK > vcpu_itc_no_srlz(vcpu, 1,ifa,pteval,pte,logps); > +#else > + vcpu_itc_no_srlz(vcpu, 1,ifa,pteval,pte,logps,&entry); > +#endif > if (swap_rr0) set_metaphysical_rr0(); > if (p2m_entry_retry(&entry)) { > vcpu_flush_tlb_vhpt_range(ifa, logps); > diff -r c654d462c448 -r cb0aa2b2e180 xen/arch/ia64/xen/vhpt.c > --- a/xen/arch/ia64/xen/vhpt.c Mon Jul 24 21:34:37 2006 +0900 > +++ b/xen/arch/ia64/xen/vhpt.c Mon Jul 24 21:35:16 2006 +0900 > @@ -227,6 +227,48 @@ void domain_flush_vtlb_range (struct dom > ia64_global_tlb_purge(vadr,vadr+addr_range,PAGE_SHIFT); > } > > +#ifdef CONFIG_XEN_IA64_TLB_TRACK > +#include <asm/tlb_track.h> > +void > +domain_flush_vltb_track_entry(struct domain* d, > + const struct tlb_track_entry* entry) > +{ > + unsigned long old_rid; > + struct vcpu* v; > + int cpu; > + > + //tlb_track_entry_printf(entry); > + vcpu_get_rr(current, 0, &old_rid); > + vcpu_set_rr(current, 0, entry->rid); > + > + for_each_vcpu(d, v) { > + if (!test_bit(_VCPUF_initialised, &v->vcpu_flags)) > + continue; > + if (!vcpu_isset(v->vcpu_id, entry->vcpu_dirty_mask)) > + continue; > + > + /* Purge TC entries. > + FIXME: clear only if match. */ > + vcpu_purge_tr_entry(&PSCBX(v, dtlb)); > + vcpu_purge_tr_entry(&PSCBX(v, itlb)); > + } > + smp_mb(); > + > + for_each_cpu_mask(cpu, entry->pcpu_dirty_mask) { > + //printk("%s:%d cpu %d\n", __func__, __LINE__, cpu); > + /* Invalidate VHPT entries. */ > + cpu_flush_vhpt_range(cpu, entry->vaddr, PAGE_SIZE); > + } > + // ptc.ga has release semantics. > + > + /* ptc.ga */ > + ia64_global_tlb_purge(entry->vaddr, entry->vaddr + PAGE_SIZE, > + PAGE_SHIFT); > + > + vcpu_set_rr(current, 0, old_rid); > +} > +#endif > + > static void flush_tlb_vhpt_all (struct domain *d) > { > /* First VHPT. */ > diff -r c654d462c448 -r cb0aa2b2e180 xen/include/asm-ia64/domain.h > --- a/xen/include/asm-ia64/domain.h Mon Jul 24 21:34:37 2006 +0900 > +++ b/xen/include/asm-ia64/domain.h Mon Jul 24 21:35:16 2006 +0900 > @@ -12,28 +12,10 @@ > #include <xen/cpumask.h> > #include <asm/fpswa.h> > > -struct p2m_entry { > - volatile pte_t* pte; > - pte_t used; > -}; > - > -static inline void > -p2m_entry_set(struct p2m_entry* entry, volatile pte_t* pte, pte_t used) > -{ > - entry->pte = pte; > - entry->used = used; > -} > - > -static inline int > -p2m_entry_retry(struct p2m_entry* entry) > -{ > - //XXX see lookup_domain_pte(). > - // NULL is set for invalid gpaddr for the time being. > - if (entry->pte == NULL) > - return 0; > - > - return (pte_val(*entry->pte) != pte_val(entry->used)); > -} > +struct p2m_entry; > +#ifdef CONFIG_XEN_IA64_TLB_TRACK > +struct tlb_track; > +#endif > > extern void domain_relinquish_resources(struct domain *); > > @@ -118,6 +100,10 @@ struct arch_domain { > void *fpswa_inf; > > struct last_vcpu last_vcpu[NR_CPUS]; > + > +#ifdef CONFIG_XEN_IA64_TLB_TRACK > + struct tlb_track* tlb_track; > +#endif > }; > #define INT_ENABLE_OFFSET(v) \ > (sizeof(vcpu_info_t) * (v)->vcpu_id + \ > diff -r c654d462c448 -r cb0aa2b2e180 xen/include/asm-ia64/tlbflush.h > --- a/xen/include/asm-ia64/tlbflush.h Mon Jul 24 21:34:37 2006 +0900 > +++ b/xen/include/asm-ia64/tlbflush.h Mon Jul 24 21:35:16 2006 +0900 > @@ -22,6 +22,13 @@ void domain_flush_vtlb_all (void); > /* Global range-flush of vTLB. */ > void domain_flush_vtlb_range (struct domain *d, u64 vadr, u64 addr_range); > > +#ifdef CONFIG_XEN_IA64_TLB_TRACK > +struct tlb_track_entry; > +/* Global entry-flush of vTLB */ > +void domain_flush_vltb_track_entry(struct domain* d, > + const struct tlb_track_entry* entry); > +#endif > + > /* Final vTLB flush on every dirty cpus. */ > void domain_flush_destroy (struct domain *d); > > diff -r c654d462c448 -r cb0aa2b2e180 xen/include/asm-ia64/vcpu.h > --- a/xen/include/asm-ia64/vcpu.h Mon Jul 24 21:34:37 2006 +0900 > +++ b/xen/include/asm-ia64/vcpu.h Mon Jul 24 21:35:16 2006 +0900 > @@ -158,7 +158,12 @@ extern void vcpu_set_next_timer(VCPU *vc > extern void vcpu_set_next_timer(VCPU *vcpu); > extern BOOLEAN vcpu_timer_expired(VCPU *vcpu); > extern UINT64 vcpu_deliverable_interrupts(VCPU *vcpu); > +#ifndef CONFIG_XEN_IA64_TLB_TRACK > extern void vcpu_itc_no_srlz(VCPU *vcpu, UINT64, UINT64, UINT64, UINT64, > UINT64); > +#else > +struct p2m_entry; > +extern void vcpu_itc_no_srlz(VCPU *vcpu, UINT64, UINT64, UINT64, UINT64, > UINT64, struct p2m_entry*); > +#endif > extern UINT64 vcpu_get_tmp(VCPU *, UINT64); > extern void vcpu_set_tmp(VCPU *, UINT64, UINT64); > > diff -r c654d462c448 -r cb0aa2b2e180 xen/include/public/arch-ia64.h > --- a/xen/include/public/arch-ia64.h Mon Jul 24 21:34:37 2006 +0900 > +++ b/xen/include/public/arch-ia64.h Mon Jul 24 21:35:16 2006 +0900 > @@ -357,8 +357,14 @@ DEFINE_XEN_GUEST_HANDLE(vcpu_guest_conte > // address space. > // flags for page assignement to pseudo physical address space > #define _ASSIGN_readonly 0 > +#define _ASSIGN_tlb_track 1 > + > #define ASSIGN_readonly (1UL << _ASSIGN_readonly) > #define ASSIGN_writable (0UL << _ASSIGN_readonly) // dummy > flag > +#ifdef CONFIG_XEN_IA64_TLB_TRACK > +# define ASSIGN_tlb_track (1UL << _ASSIGN_tlb_track) > +#endif > + > > /* This structure has the same layout of struct ia64_boot_param, defined in > <asm/system.h>. It is redefined here to ease use. */ > diff -r c654d462c448 -r cb0aa2b2e180 xen/arch/ia64/xen/tlb_track.c > --- /dev/null Thu Jan 01 00:00:00 1970 +0000 > +++ b/xen/arch/ia64/xen/tlb_track.c Mon Jul 24 21:35:16 2006 +0900 > @@ -0,0 +1,558 @@ > +/****************************************************************************** > + * tlb_track.h > + * > + * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp> > + * VA Linux Systems Japan K.K. > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; either version 2 of the License, or > + * (at your option) any later version. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License > + * along with this program; if not, write to the Free Software > + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA > + * > + */ > + > +#include <asm/tlb_track.h> > +#include <asm/p2m_entry.h> > +#include <asm/vmx_mm_def.h> // for IA64_RR_SHIFT > +#include <asm/vcpu.h> // for PSCB() > + > +#define CONFIG_TLB_TRACK_DEBUG > +#ifdef CONFIG_TLB_TRACK_DEBUG > +# define tlb_track_printd(fmt, ...) \ > + printf("%s:%d " fmt, __func__, __LINE__, ##__VA_ARGS__) > +#else > +# define tlb_track_printd(fmt, ...) do { } while (0) > +#endif > + > +#define CONFIG_TLB_TRACK_STAT_KEY_HANDLER > +#ifdef CONFIG_TLB_TRACK_STAT_KEY_HANDLER > +#include <asm/regs.h> > +#include <xen/keyhandler.h> > + > +static void > +dump_tlb_track_stat(unsigned char key) > +{ > + tlb_track_stat_printf(&dom0->arch.tlb_track->stat); > +} > +#endif > + > +static int > +tlb_track_allocate_entries(struct tlb_track* tlb_track) > +{ > + struct page_info* entry_page; > + struct tlb_track_entry* track_entries; > + unsigned int allocated; > + unsigned long i; > + > + BUG_ON(tlb_track->num_free > 0); > + if (tlb_track->num_entries >= tlb_track->limit) { > + DPRINTK("%s: num_entries %d limit %d\n", > + __func__, tlb_track->num_entries, tlb_track->limit); > + return -ENOMEM; > + } > + entry_page = alloc_domheap_page(NULL); > + if (entry_page == NULL) { > + DPRINTK("%s: domheap page failed. num_entries %d limit %d\n", > + __func__, tlb_track->num_entries, tlb_track->limit); > + return -ENOMEM; > + } > + > + list_add(&entry_page->list, &tlb_track->page_list); > + track_entries = (struct tlb_track_entry*)page_to_virt(entry_page); > + allocated = PAGE_SIZE / sizeof(track_entries[0]); > + tlb_track->num_entries += allocated; > + tlb_track->num_free += allocated; > + for (i = 0; i < allocated; i++) { > + list_add(&track_entries[i].list, &tlb_track->free_list); > + //tlb_track_printd("track_entries[%ld] 0x%p\n", i, > &track_entries[i]); > + } > + tlb_track_printd("allocated %d num_entries %d num_free %d\n", > + allocated, tlb_track->num_entries, tlb_track->num_free); > + return 0; > +} > + > + > +int > +tlb_track_create(struct domain* d) > +{ > + struct tlb_track* tlb_track = NULL; > + struct page_info* hash_page = NULL; > + unsigned int hash_size; > + unsigned int hash_shift; > + unsigned int i; > + > + tlb_track = xmalloc(struct tlb_track); > + if (tlb_track == NULL) { > + goto out; > + } > + hash_page = alloc_domheap_page(NULL); > + if (hash_page == NULL) { > + goto out; > + } > + > + spin_lock_init(&tlb_track->free_list_lock); > + INIT_LIST_HEAD(&tlb_track->free_list); > + tlb_track->limit = TLB_TRACK_LIMIT_ENTRIES; > + tlb_track->num_entries = 0; > + tlb_track->num_free = 0; > + INIT_LIST_HEAD(&tlb_track->page_list); > + if (tlb_track_allocate_entries(tlb_track) < 0) { > + goto out; > + } > + > + spin_lock_init(&tlb_track->hash_lock); > + //XXX hash size optimization > + hash_size = PAGE_SIZE / sizeof(tlb_track->hash[0]); > + for (hash_shift = 0; (1 << (hash_shift + 1)) < hash_size; hash_shift++) > + /* nothing */; > + tlb_track->hash_size = (1 << hash_shift); > + tlb_track->hash_shift = hash_shift; > + tlb_track->hash_mask = (1 << hash_shift) - 1; > + tlb_track->hash = page_to_virt(hash_page); > + for (i = 0; i < tlb_track->hash_size; i++) { > + INIT_LIST_HEAD(&tlb_track->hash[i]); > + } > + > + memset(&tlb_track->stat, 0, sizeof(tlb_track->stat)); > + > + smp_mb(); // make initialization visible before use. > + d->arch.tlb_track = tlb_track; > + printk("%s:%d hash 0x%p hash_size %d \n", > + __func__, __LINE__, tlb_track->hash, tlb_track->hash_size); > + > +#ifdef CONFIG_TLB_TRACK_STAT_KEY_HANDLER > + register_keyhandler( > + 's', dump_tlb_track_stat, "dump dom0 tlb track stats"); > +#endif > + return 0; > + > +out: > + if (hash_page != NULL) { > + free_domheap_page(hash_page); > + } > + if (tlb_track != NULL) { > + xfree(tlb_track); > + } > + return -ENOMEM; > +} > + > +void > +tlb_track_destroy(struct domain* d) > +{ > + struct tlb_track* tlb_track = d->arch.tlb_track; > + struct page_info* page; > + struct page_info* next; > + > + spin_lock(&tlb_track->free_list_lock); > + BUG_ON(tlb_track->num_free != tlb_track->num_entries); > + > + list_for_each_entry_safe(page, next, &tlb_track->page_list, list) { > + list_del(&page->list); > + free_domheap_page(page); > + } > + > + free_domheap_page(virt_to_page(tlb_track->hash)); > + xfree(tlb_track); > + //d->tlb_track = NULL; > +} > + > +static struct tlb_track_entry* > +tlb_track_get_entry(struct tlb_track* tlb_track) > +{ > + struct tlb_track_entry* entry = NULL; > + spin_lock(&tlb_track->free_list_lock); > + if (tlb_track->num_free == 0) { > + (void)tlb_track_allocate_entries(tlb_track); > + } > + if (tlb_track->num_free > 0) { > + BUG_ON(list_empty(&tlb_track->free_list)); > + entry = list_entry(tlb_track->free_list.next, > + struct tlb_track_entry, list); > + tlb_track->num_free--; > + list_del(&entry->list); > + } > + spin_unlock(&tlb_track->free_list_lock); > + return entry; > +} > + > +void > +tlb_track_free_entry(struct tlb_track* tlb_track, > + struct tlb_track_entry* entry) > +{ > + spin_lock(&tlb_track->free_list_lock); > + list_add(&entry->list, &tlb_track->free_list); > + tlb_track->num_free++; > + spin_unlock(&tlb_track->free_list_lock); > +} > + > + > +#include <linux/hash.h> > +// XXX hash function. > +static struct list_head* > +tlb_track_hash_head(struct tlb_track* tlb_track, volatile pte_t* ptep) > +{ > + unsigned long hash = hash_long((unsigned long)ptep, > tlb_track->hash_shift); > + BUG_ON(hash >= tlb_track->hash_size); > + BUG_ON((hash & tlb_track->hash_mask) != hash); > + return &tlb_track->hash[hash]; > +} > + > +static int > +tlb_track_pte_zapped(pte_t old_pte, pte_t ret_pte) > +{ > + if (pte_pfn(old_pte) != pte_pfn(ret_pte) || > + (pte_val(old_pte) & ~(_PFN_MASK | _PAGE_TLB_TRACK_MASK)) != > + (pte_val(ret_pte) & ~(_PFN_MASK | _PAGE_TLB_TRACK_MASK))) { > + // Other thread zapped the p2m entry. > + return 1; > + } > + return 0; > +} > + > +static TLB_TRACK_RET_T > +tlb_track_insert_or_dirty(struct tlb_track* tlb_track, struct mm_struct* mm, > + volatile pte_t* ptep, pte_t old_pte, > + unsigned long vaddr, unsigned long rid) > +{ > + unsigned long mfn = pte_pfn(old_pte); > + struct list_head* head = tlb_track_hash_head(tlb_track, ptep); > + struct tlb_track_entry* entry; > + struct tlb_track_entry* new_entry = NULL; > + unsigned long bit_to_be_set = _PAGE_TLB_INSERTED; > + pte_t new_pte; > + pte_t ret_pte; > + > + struct vcpu* v = current; > + TLB_TRACK_RET_T ret = TLB_TRACK_NOT_FOUND; > + > + tlb_track->stat.iod++; > + if (!pte_tlb_tracking(old_pte)) { > + tlb_track->stat.iod_not_tracked++; > + return TLB_TRACK_NOT_TRACKED; > + } > + if (pte_tlb_inserted_many(old_pte)) { > + tlb_track->stat.iod_tracked_many++; > + return TLB_TRACK_MANY; > + } > + > + // vaddr must be normalized so that it is in rr0 and page aligned. > + BUG_ON((vaddr >> IA64_RR_SHIFT) != 0); > + BUG_ON((vaddr & ~PAGE_MASK) != 0); > +#if 0 > + tlb_track_printd("\n" > + "\tmfn 0x%016lx\n" > + "\told_pte 0x%016lx ptep 0x%p\n" > + "\tptep_val 0x%016lx vaddr 0x%016lx rid %ld\n" > + "\ttlb_track 0x%p head 0x%p\n", > + mfn, > + pte_val(old_pte), ptep, pte_val(*ptep), > + vaddr, rid, > + tlb_track, head); > +#endif > + > + again: > + // zapping side may zap the p2m entry and then remove tlb track entry > + // non-atomically. We may see the stale tlb track entry here. > + // p2m_entry_retry() handles such a case. > + // Or other thread may zap the p2m entry and remove tlb track entry > + // and inserted new tlb track entry. > + spin_lock(&tlb_track->hash_lock); > + list_for_each_entry(entry, head, list) { > + if (entry->ptep != ptep) { > + continue; > + } > + > + if (pte_pfn(entry->pte_val) == mfn) { > + //tlb_track_entry_printf(entry); > + if (entry->vaddr == vaddr && entry->rid == rid) { > + //tlb_track_printd("TLB_TRACK_FOUND\n"); > + ret = TLB_TRACK_FOUND; > + tlb_track->stat.iod_found++; > +#ifdef CONFIG_TLB_TRACK_CNT > + entry->cnt++; > + if (entry->cnt > TLB_TRACK_CNT_FORCE_MANY) { > + // heuristics: > + // If a page is used to transfer data by dev channel, > + // it would be unmapped with small amount access > + // (once or twice tlb insert) after real device > + // I/O completion. It would be short period. > + // However this page seems to be accessed many times. > + // We guess that this page is used I/O ring > + // so that tracking this entry might be useless. > + //tlb_track_entry_printf(entry); > + //tlb_track_printd("cnt = %ld\n", entry->cnt); > + tlb_track->stat.iod_force_many++; > + goto force_many; > + } > +#endif > + goto found; > + } else { > +#ifdef CONFIG_TLB_TRACK_CNT > + force_many: > +#endif > + if (!pte_tlb_inserted(old_pte)) { > + printk("%s:%d racy update\n", __func__, __LINE__); > + old_pte = __pte(pte_val(old_pte) | _PAGE_TLB_INSERTED); > + } > + new_pte = __pte(pte_val(old_pte) | _PAGE_TLB_INSERTED_MANY); > + ret_pte = ptep_cmpxchg_rel(mm, vaddr, ptep, old_pte, > new_pte); > + if (pte_val(ret_pte) != pte_val(old_pte)) { > + //tlb_track_printd("TLB_TRACK_AGAIN\n"); > + ret = TLB_TRACK_AGAIN; > + tlb_track->stat.iod_again++; > + } else { > + //tlb_track_printd("TLB_TRACK_MANY del entry 0x%p\n", > entry); > + ret = TLB_TRACK_MANY; > + list_del(&entry->list); > + //tlb_track_entry_printf(entry); > + tlb_track->stat.iod_tracked_many_del++; > + } > + goto out; > + } > + } > + > + // Other thread changed the p2m entry and removed and inserted new > + // tlb tracn entry after we get old_pte, but before we get > + // spinlock. > + //tlb_track_printd("TLB_TRACK_AGAIN\n"); > + ret = TLB_TRACK_AGAIN; > + tlb_track->stat.iod_again++; > + goto out; > + } > + > + entry = NULL; // prevent freeing entry. > + if (pte_tlb_inserted(old_pte)) { > + // Other thread else removed the tlb_track_entry after we got old_pte > + // before we got spin lock. > + ret = TLB_TRACK_AGAIN; > + tlb_track->stat.iod_again++; > + goto out; > + } > + if (new_entry == NULL && bit_to_be_set == _PAGE_TLB_INSERTED) { > + spin_unlock(&tlb_track->hash_lock); > + new_entry = tlb_track_get_entry(tlb_track); > + if (new_entry == NULL) { > + tlb_track_printd("get_entry failed\n"); > + // entry can't be allocated. > + // fall down into full flush mode. > + bit_to_be_set |= _PAGE_TLB_INSERTED_MANY; > + tlb_track->stat.iod_new_failed++; > + } > + //tlb_track_printd("new_entry 0x%p\n", new_entry); > + tlb_track->stat.iod_new_entry++; > + goto again; > + } > + > + BUG_ON(pte_tlb_inserted_many(old_pte)); > + new_pte = __pte(pte_val(old_pte) | bit_to_be_set); > + ret_pte = ptep_cmpxchg_rel(mm, vaddr, ptep, old_pte, new_pte); > + if (pte_val(old_pte) != pte_val(ret_pte)) { > + if (tlb_track_pte_zapped(old_pte, ret_pte)) { > + //tlb_track_printd("zapped TLB_TRACK_AGAIN\n"); > + ret = TLB_TRACK_AGAIN; > + tlb_track->stat.iod_again++; > + goto out; > + } > + > + // Other thread set _PAGE_TLB_INSERTED and/or _PAGE_TLB_INSERTED_MANY > + if (pte_tlb_inserted_many(ret_pte)) { > + // Other thread already set _PAGE_TLB_INSERTED_MANY and > + // removed the entry. > + //tlb_track_printd("iserted TLB_TRACK_MANY\n"); > + BUG_ON(!pte_tlb_inserted(ret_pte)); > + ret = TLB_TRACK_MANY; > + tlb_track->stat.iod_new_many++; > + goto out; > + } > + BUG_ON(pte_tlb_inserted(ret_pte)); > + BUG(); > + } > + if (new_entry) { > + //tlb_track_printd("iserting new_entry 0x%p\n", new_entry); > + entry = new_entry; > + new_entry = NULL; > + > + entry->ptep = ptep; > + entry->pte_val = old_pte; > + entry->vaddr = vaddr; > + entry->rid = rid; > + cpus_clear(entry->pcpu_dirty_mask); > + vcpus_clear(entry->vcpu_dirty_mask); > + list_add(&entry->list, head); > + > +#ifdef CONFIG_TLB_TRACK_CNT > + entry->cnt = 0; > +#endif > + tlb_track->stat.iod_insert++; > + //tlb_track_entry_printf(entry); > + } else { > + goto out; > + } > + > + found: > + BUG_ON(v->processor >= NR_CPUS); > + cpu_set(v->processor, entry->pcpu_dirty_mask); > + BUG_ON(v->vcpu_id >= NR_CPUS); > + vcpu_set(v->vcpu_id, entry->vcpu_dirty_mask); > + tlb_track->stat.iod_dirtied++; > + > + out: > + spin_unlock(&tlb_track->hash_lock); > + if (ret == TLB_TRACK_MANY && entry != NULL) { > + tlb_track_free_entry(tlb_track, entry); > + } > + if (new_entry != NULL) { > + tlb_track_free_entry(tlb_track, new_entry); > + } > + return ret; > +} > + > +void > +vcpu_tlb_track_insert_or_dirty(struct vcpu *vcpu, unsigned long vaddr, > + struct p2m_entry* entry) > +{ > + unsigned long vrn = vaddr >> IA64_RR_SHIFT; > + unsigned long rid = PSCB(vcpu, rrs[vrn]); > + TLB_TRACK_RET_T ret; > + > + vaddr = (vaddr << 3) >> 3;// mask rid bit > + vaddr &= PAGE_MASK; > + ret = tlb_track_insert_or_dirty(vcpu->domain->arch.tlb_track, > + &vcpu->domain->arch.mm, > + entry->ptep, entry->used, > + vaddr, rid); > + if (ret == TLB_TRACK_AGAIN) { > + p2m_entry_set_retry(entry); > + } > +} > + > +TLB_TRACK_RET_T > +tlb_track_search_and_remove(struct tlb_track* tlb_track, > + volatile pte_t* ptep, pte_t old_pte, > + struct tlb_track_entry** entryp) > +{ > + unsigned long mfn = pte_pfn(old_pte); > + struct list_head* head = tlb_track_hash_head(tlb_track, ptep); > + struct tlb_track_entry* entry; > + > + tlb_track->stat.sar++; > + if (!pte_tlb_tracking(old_pte)) { > + tlb_track->stat.sar_not_tracked++; > + return TLB_TRACK_NOT_TRACKED; > + } > + if (!pte_tlb_inserted(old_pte)) { > + BUG_ON(pte_tlb_inserted_many(old_pte)); > + tlb_track->stat.sar_not_found++; > + return TLB_TRACK_NOT_FOUND; > + } > + if (pte_tlb_inserted_many(old_pte)) { > + BUG_ON(!pte_tlb_inserted(old_pte)); > + tlb_track->stat.sar_many++; > + return TLB_TRACK_MANY; > + } > + > + spin_lock(&tlb_track->hash_lock); > + list_for_each_entry(entry, head, list) { > + if (entry->ptep != ptep) { > + continue; > + } > + if (pte_pfn(entry->pte_val) == mfn) { > + list_del(&entry->list); > + tlb_track->stat.sar_found++; > + spin_unlock(&tlb_track->hash_lock); > + *entryp = entry; > + //tlb_track_entry_printf(entry); > +#ifdef CONFIG_TLB_TRACK_CNT > + //tlb_track_printd("cnt = %ld\n", entry->cnt); > +#endif > + return TLB_TRACK_FOUND; > + } > + BUG(); > + } > + BUG(); > + spin_unlock(&tlb_track->hash_lock); > + return TLB_TRACK_NOT_TRACKED; > +} > + > +void > +tlb_track_stat_printf(const struct tlb_track_stat* stat) > +{ > + printk("iod %ld\n" > + "iod_again %ld\n" > + "iod_not_tracked %ld\n" > + "iod_force_many %ld\n" > + "iod_tracked_many %ld\n" > + "iod_tracked_many_del %ld\n" > + "iod_found %ld\n" > + "iod_new_entry %ld\n" > + "iod_new_failed %ld\n" > + "iod_new_many %ld\n" > + "iod_insert %ld\n" > + "iod_dirtied %ld\n" > + "sar %ld\n" > + "sar_not_tracked %ld\n" > + "sar_not_found %ld\n" > + "sar_found %ld\n" > + "sar_many %ld\n", > + stat->iod, > + stat->iod_again, > + stat->iod_not_tracked, > + stat->iod_force_many, > + stat->iod_tracked_many, > + stat->iod_tracked_many_del, > + stat->iod_found, > + stat->iod_new_entry, > + stat->iod_new_failed, > + stat->iod_new_many, > + stat->iod_insert, > + stat->iod_dirtied, > + stat->sar, > + stat->sar_not_tracked, > + stat->sar_not_found, > + stat->sar_found, > + stat->sar_many); > +} > + > +// for debug > +void > +__tlb_track_entry_printf(const char* func, int line, > + const struct tlb_track_entry* entry) > +{ > + char pcpumask_buf[NR_CPUS + 1]; > + char vcpumask_buf[MAX_VIRT_CPUS + 1]; > + cpumask_scnprintf(pcpumask_buf, sizeof(pcpumask_buf), > + entry->pcpu_dirty_mask); > + vcpumask_scnprintf(vcpumask_buf, sizeof(vcpumask_buf), > + entry->vcpu_dirty_mask); > + printk("%s:%d\n" > + "\tmfn 0x%016lx\n" > + "\told_pte 0x%016lx ptep 0x%p\n" > + "\tpte_val 0x%016lx vaddr 0x%016lx rid %ld\n" > + "\tpcpu_dirty_mask %s vcpu_dirty_mask %s\n" > + "\tentry 0x%p\n", > + func, line, > + pte_pfn(entry->pte_val), > + pte_val(entry->pte_val), entry->ptep, pte_val(*entry->ptep), > + entry->vaddr, entry->rid, > + pcpumask_buf, vcpumask_buf, > + entry); > +} > + > +/* > + * Local variables: > + * mode: C > + * c-set-style: "BSD" > + * c-basic-offset: 4 > + * tab-width: 4 > + * indent-tabs-mode: nil > + * End: > + */ > diff -r c654d462c448 -r cb0aa2b2e180 xen/include/asm-ia64/p2m_entry.h > --- /dev/null Thu Jan 01 00:00:00 1970 +0000 > +++ b/xen/include/asm-ia64/p2m_entry.h Mon Jul 24 21:35:16 2006 +0900 > @@ -0,0 +1,76 @@ > +/****************************************************************************** > + * p2m_entry.h > + * > + * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp> > + * VA Linux Systems Japan K.K. > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; either version 2 of the License, or > + * (at your option) any later version. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License > + * along with this program; if not, write to the Free Software > + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA > + * > + */ > + > +#ifndef __ASM_P2M_ENTRY_H__ > +#define __ASM_P2M_ENTRY_H__ > + > +#include <asm/tlb_track.h> > + > +struct p2m_entry { > +#define P2M_PTE_ALWAYS_RETRY ((volatile pte_t*) -1) > + volatile pte_t* ptep; > + pte_t used; > +}; > + > +static inline void > +p2m_entry_set(struct p2m_entry* entry, volatile pte_t* ptep, pte_t used) > +{ > + entry->ptep = ptep; > + entry->used = used; > +} > + > +static inline void > +p2m_entry_set_retry(struct p2m_entry* entry) > +{ > + entry->ptep = P2M_PTE_ALWAYS_RETRY; > +} > + > +static inline int > +p2m_entry_retry(struct p2m_entry* entry) > +{ > + //XXX see lookup_domain_pte(). > + // NULL is set for invalid gpaddr for the time being. > + if (entry->ptep == NULL) > + return 0; > + > + if (entry->ptep == P2M_PTE_ALWAYS_RETRY) > + return 1; > + > +#ifdef CONFIG_XEN_IA64_TLB_TRACK > + return ((pte_val(*entry->ptep) & ~_PAGE_TLB_TRACK_MASK) != > + (pte_val(entry->used) & ~_PAGE_TLB_TRACK_MASK)); > +#else > + return (pte_val(*entry->ptep) != pte_val(entry->used)); > +#endif > +} > + > +#endif // __ASM_P2M_ENTRY_H__ > + > +/* > + * Local variables: > + * mode: C > + * c-set-style: "BSD" > + * c-basic-offset: 4 > + * tab-width: 4 > + * indent-tabs-mode: nil > + * End: > + */ > diff -r c654d462c448 -r cb0aa2b2e180 xen/include/asm-ia64/tlb_track.h > --- /dev/null Thu Jan 01 00:00:00 1970 +0000 > +++ b/xen/include/asm-ia64/tlb_track.h Mon Jul 24 21:35:16 2006 +0900 > @@ -0,0 +1,201 @@ > +/****************************************************************************** > + * tlb_track.c > + * > + * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp> > + * VA Linux Systems Japan K.K. > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; either version 2 of the License, or > + * (at your option) any later version. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License > + * along with this program; if not, write to the Free Software > + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA > + * > + */ > + > +#ifndef __TLB_TRACK_H__ > +#define __TLB_TRACK_H__ > + > +#ifdef CONFIG_XEN_IA64_TLB_TRACK > + > +#include <asm/domain.h> > +#include <xen/list.h> > + > +#define _PAGE_TLB_TRACKING_BIT 53 > +#define _PAGE_TLB_INSERTED_BIT 54 > +#define _PAGE_TLB_INSERTED_MANY_BIT 55 > + > +#define _PAGE_TLB_TRACKING (1UL << _PAGE_TLB_TRACKING_BIT) > +#define _PAGE_TLB_INSERTED (1UL << _PAGE_TLB_INSERTED_BIT) > +#define _PAGE_TLB_INSERTED_MANY (1UL << _PAGE_TLB_INSERTED_MANY_BIT) > +#define _PAGE_TLB_TRACK_MASK (_PAGE_TLB_TRACKING | > _PAGE_TLB_INSERTED | _PAGE_TLB_INSERTED_MANY) > + > +#define pte_tlb_tracking(pte) \ > + ((pte_val(pte) & _PAGE_TLB_TRACKING) != 0) > +#define pte_tlb_inserted(pte) \ > + ((pte_val(pte) & _PAGE_TLB_INSERTED) != 0) > +#define pte_tlb_inserted_many(pte) \ > + ((pte_val(pte) & _PAGE_TLB_INSERTED_MANY) != 0) > + > + > +// vcpu mask > +// stolen from cpumask.h > +typedef struct { DECLARE_BITMAP(bits, MAX_VIRT_CPUS); } vcpumask_t; > + > +#define vcpu_set(vcpu, dst) __vcpu_set((vcpu), &(dst)) > +static inline void __vcpu_set(int vcpu, volatile vcpumask_t *dstp) > +{ > + set_bit(vcpu, dstp->bits); > +} > +#define vcpus_clear(dst) __vcpus_clear(&(dst), MAX_VIRT_CPUS) > +static inline void __vcpus_clear(vcpumask_t *dstp, int nbits) > +{ > + bitmap_zero(dstp->bits, nbits); > +} > +/* No static inline type checking - see Subtlety (1) above. */ > +#define vcpu_isset(vcpu, vcpumask) test_bit((vcpu), (vcpumask).bits) > + > +#define vcpumask_scnprintf(buf, len, src) \ > + __vcpumask_scnprintf((buf), (len), &(src), > MAX_VIRT_CPUS) > +static inline int __vcpumask_scnprintf(char *buf, int len, > + const vcpumask_t *srcp, int nbits) > +{ > + return bitmap_scnprintf(buf, len, srcp->bits, nbits); > +} > + > + > +// TODO: compact this structure. > +struct tlb_track_entry { > + struct list_head list; > + > + > + volatile pte_t* ptep; // corresponding p2m entry > + > + //XXX should we use TR_ENTRY? > + pte_t pte_val; // mfn and other flags > + // pte_val.p = 1: > + // tlb entry is inserted. > + // pte_val.p = 0: > + // once tlb entry is inserted, so > + // this entry is created. But tlb > + // purge is isseued, so this > + // virtual address need not to be > + // purged. > + unsigned long vaddr; // virtual address > + unsigned long rid; // rid > + > + cpumask_t pcpu_dirty_mask; > + vcpumask_t vcpu_dirty_mask; > + // tlbflush_timestamp; > + > +#define CONFIG_TLB_TRACK_CNT > +#ifdef CONFIG_TLB_TRACK_CNT > +#define TLB_TRACK_CNT_FORCE_MANY 256 //XXX how many? > + unsigned long cnt; > +#endif > +}; > + > +struct tlb_track_stat { > + // insert or dirty > + unsigned long iod; > + unsigned long iod_again; > + unsigned long iod_not_tracked; > + unsigned long iod_force_many; > + unsigned long iod_tracked_many; > + unsigned long iod_tracked_many_del; > + unsigned long iod_found; > + unsigned long iod_new_entry; > + unsigned long iod_new_failed; > + unsigned long iod_new_many; > + unsigned long iod_insert; > + unsigned long iod_dirtied; > + > + // search and remove > + unsigned long sar; > + unsigned long sar_not_tracked; > + unsigned long sar_not_found; > + unsigned long sar_found; > + unsigned long sar_many; > +}; > +void tlb_track_stat_printf(const struct tlb_track_stat* stat); > + > +struct tlb_track { > + > +// see __gnttab_map_grant_ref() > +// A domain can map granted-page up to MAPTRACK_MAX_ENTRIES pages. > +#define TLB_TRACK_LIMIT_ENTRIES \ > + (MAPTRACK_MAX_ENTRIES * (PAGE_SIZE / sizeof(struct tlb_track))) > + > + spinlock_t free_list_lock; > + struct list_head free_list; > + unsigned int limit; > + unsigned int num_entries; > + unsigned int num_free; > + struct list_head page_list; > + > + // XXX hash table size > + spinlock_t hash_lock; > + unsigned int hash_size; > + unsigned int hash_shift; > + unsigned int hash_mask; > + struct list_head* hash; > + > + struct tlb_track_stat stat; > +}; > + > +int tlb_track_create(struct domain* d); > +void tlb_track_destroy(struct domain* d); > + > +void tlb_track_free_entry(struct tlb_track* tlb_track, > + struct tlb_track_entry* entry); > + > +struct p2m_entry; > +void > +vcpu_tlb_track_insert_or_dirty(struct vcpu *vcpu, unsigned long vaddr, > + struct p2m_entry* entry); > + > +// return value > +// NULL if this entry is used > +// entry if this entry isn't used > +enum TLB_TRACK_RET { > + TLB_TRACK_NOT_TRACKED, > + TLB_TRACK_NOT_FOUND, > + TLB_TRACK_FOUND, > + TLB_TRACK_MANY, > + TLB_TRACK_AGAIN, > +}; > +typedef enum TLB_TRACK_RET TLB_TRACK_RET_T; > + > +TLB_TRACK_RET_T > +tlb_track_search_and_remove(struct tlb_track* tlb_track, > + volatile pte_t* ptep, pte_t old_pte, > + struct tlb_track_entry** entryp); > + > +void > +__tlb_track_entry_printf(const char* func, int line, > + const struct tlb_track_entry* entry); > +#define tlb_track_entry_printf(entry) \ > + __tlb_track_entry_printf(__func__, __LINE__, (entry)) > +#else > +//define nop > + > +#endif // CONFIG_XEN_IA64_TLB_TRACK > + > +#endif // __TLB_TRACK_H__ > + > +/* > + * Local variables: > + * mode: C > + * c-set-style: "BSD" > + * c-basic-offset: 4 > + * tab-width: 4 > + * indent-tabs-mode: nil > + * End: > + */ > # HG changeset patch > # User yamahata@xxxxxxxxxxxxx > # Node ID a56d48066373c9fe317e986580c08394fe89fc7e > # Parent cb0aa2b2e180d76d09592ed32338f9cb4ac5b7a0 > implement per vcpu vhpt option. allocate VHPT per vcpu. > added compile time option, xen_ia64_pervcpu_vhpt=y, to enable it. > added xen boot time option, pervcpu_vhpt=0, to disable it. > This patch depends on tlb tracking patch. > PATCHNAME: pervcpu_vhpt > > Signed-off-by: Isaku Yamahata <yamahata@xxxxxxxxxxxxx> > > diff -r cb0aa2b2e180 -r a56d48066373 xen/arch/ia64/Rules.mk > --- a/xen/arch/ia64/Rules.mk Mon Jul 24 21:35:16 2006 +0900 > +++ b/xen/arch/ia64/Rules.mk Mon Jul 24 21:37:15 2006 +0900 > @@ -42,6 +42,9 @@ ifeq ($(xen_ia64_tlb_track),y) > ifeq ($(xen_ia64_tlb_track),y) > CFLAGS += -DCONFIG_XEN_IA64_TLB_TRACK > endif > +ifeq ($(xen_ia64_pervcpu_vhpt),y) > +CFLAGS += -DCONFIG_XEN_IA64_PERVCPU_VHPT > +endif > ifeq ($(no_warns),y) > CFLAGS += -Wa,--fatal-warnings -Werror -Wno-uninitialized > endif > diff -r cb0aa2b2e180 -r a56d48066373 xen/arch/ia64/xen/domain.c > --- a/xen/arch/ia64/xen/domain.c Mon Jul 24 21:35:16 2006 +0900 > +++ b/xen/arch/ia64/xen/domain.c Mon Jul 24 21:37:15 2006 +0900 > @@ -117,8 +117,12 @@ static void flush_vtlb_for_context_switc > if (VMX_DOMAIN(vcpu)) { > // currently vTLB for vt-i domian is per vcpu. > // so any flushing isn't needed. > +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT > + } else if (HAS_PERVCPU_VHPT(v->domain)) { > + // nothing to do > +#endif > } else { > - vhpt_flush(); > + local_vhpt_flush(); > } > local_flush_tlb_all(); > } > @@ -133,9 +137,13 @@ void schedule_tail(struct vcpu *prev) > vmx_do_launch(current); > } else { > ia64_set_iva(&ia64_ivt); > - ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) | > - VHPT_ENABLED); > + // disable VHPT. ia64_new_rr7() might cause VHPT > + // fault without this because it flushes dtr[IA64_TR_VHPT] > + // (VHPT_SIZE_LOG2 << 2) is just for avoid > + // Reserved Register/Field fault. > + ia64_set_pta(VHPT_SIZE_LOG2 << 2); > load_region_regs(current); > + ia64_set_pta(vcpu_pta(current)); > vcpu_load_kernel_regs(current); > __ia64_per_cpu_var(current_psr_i_addr) = ¤t->domain-> > shared_info->vcpu_info[current->vcpu_id].evtchn_upcall_mask; > @@ -186,9 +194,13 @@ if (!i--) { i = 1000000; printk("+"); } > > nd = current->domain; > if (!is_idle_domain(nd)) { > - ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) | > - VHPT_ENABLED); > + // disable VHPT. ia64_new_rr7() might cause VHPT > + // fault without this because it changes dtr[IA64_TR_VHPT] > + // (VHPT_SIZE_LOG2 << 2) is just for avoid > + // Reserved Register/Field fault. > + ia64_set_pta(VHPT_SIZE_LOG2 << 2); > load_region_regs(current); > + ia64_set_pta(vcpu_pta(current)); > vcpu_load_kernel_regs(current); > vcpu_set_next_timer(current); > if (vcpu_timer_expired(current)) > @@ -305,6 +317,17 @@ struct vcpu *alloc_vcpu_struct(struct do > v->arch.ending_rid = d->arch.ending_rid; > v->arch.breakimm = d->arch.breakimm; > v->arch.last_processor = INVALID_PROCESSOR; > + > +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT > + if (HAS_PERVCPU_VHPT(d)) { > + if (pervcpu_vhpt_alloc(v) < 0) { > + free_xenheap_pages(v->arch.privregs, > + get_order(sizeof(mapped_regs_t))); > + free_xenheap_pages(v, KERNEL_STACK_SIZE_ORDER); > + return NULL; > + } > + } > +#endif > } > > return v; > @@ -315,6 +338,10 @@ void free_vcpu_struct(struct vcpu *v) > if (VMX_DOMAIN(v)) > vmx_relinquish_vcpu_resources(v); > else { > +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT > + if (HAS_PERVCPU_VHPT(v->domain)) > + pervcpu_vhpt_free(v); > +#endif > if (v->arch.privregs != NULL) > free_xenheap_pages(v->arch.privregs, > get_order_from_shift(XMAPPEDREGS_SHIFT)); > @@ -340,6 +367,11 @@ static void init_switch_stack(struct vcp > memset(v->arch._thread.fph,0,sizeof(struct ia64_fpreg)*96); > } > > +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT > +static int opt_pervcpu_vhpt = 1; > +integer_param("pervcpu_vhpt", opt_pervcpu_vhpt); > +#endif > + > int arch_domain_create(struct domain *d) > { > int i; > @@ -354,6 +386,13 @@ int arch_domain_create(struct domain *d) > if (is_idle_domain(d)) > return 0; > > +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT > + d->arch.has_pervcpu_vhpt = opt_pervcpu_vhpt; > +#if 1 > + DPRINTK("%s:%d domain %d pervcpu_vhpt %d\n", > + __func__, __LINE__, d->domain_id, d->arch.has_pervcpu_vhpt); > +#endif > +#endif > #ifdef CONFIG_XEN_IA64_TLB_TRACK > if (tlb_track_create(d) < 0) > goto fail_nomem; > diff -r cb0aa2b2e180 -r a56d48066373 xen/arch/ia64/xen/regionreg.c > --- a/xen/arch/ia64/xen/regionreg.c Mon Jul 24 21:35:16 2006 +0900 > +++ b/xen/arch/ia64/xen/regionreg.c Mon Jul 24 21:37:15 2006 +0900 > @@ -260,7 +260,7 @@ int set_one_rr(unsigned long rr, unsigne > } else if (rreg == 7) { > ia64_new_rr7(vmMangleRID(newrrv.rrval),v->domain->shared_info, > v->arch.privregs, v->domain->arch.shared_info_va, > - __get_cpu_var(vhpt_paddr)); > + vcpu_vhpt_maddr(v)); > } else { > set_rr(rr,newrrv.rrval); > } > diff -r cb0aa2b2e180 -r a56d48066373 xen/arch/ia64/xen/vhpt.c > --- a/xen/arch/ia64/xen/vhpt.c Mon Jul 24 21:35:16 2006 +0900 > +++ b/xen/arch/ia64/xen/vhpt.c Mon Jul 24 21:37:15 2006 +0900 > @@ -23,18 +23,30 @@ DEFINE_PER_CPU (unsigned long, vhpt_padd > DEFINE_PER_CPU (unsigned long, vhpt_paddr); > DEFINE_PER_CPU (unsigned long, vhpt_pend); > > -void vhpt_flush(void) > -{ > - struct vhpt_lf_entry *v = __va(__ia64_per_cpu_var(vhpt_paddr)); > +static void __vhpt_flush(unsigned long vhpt_maddr) > +{ > + struct vhpt_lf_entry *v =(struct vhpt_lf_entry*)__va(vhpt_maddr); > int i; > > for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++) > v->ti_tag = INVALID_TI_TAG; > } > > -static void vhpt_erase(void) > -{ > - struct vhpt_lf_entry *v = (struct vhpt_lf_entry *)VHPT_ADDR; > +void local_vhpt_flush(void) > +{ > + __vhpt_flush(__ia64_per_cpu_var(vhpt_paddr)); > +} > + > +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT > +static void vcpu_vhpt_flush(struct vcpu* v) > +{ > + __vhpt_flush(vcpu_vhpt_maddr(v)); > +} > +#endif > + > +static void vhpt_erase(unsigned long vhpt_maddr) > +{ > + struct vhpt_lf_entry *v = (struct vhpt_lf_entry*)__va(vhpt_maddr); > int i; > > for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++) { > @@ -44,17 +56,6 @@ static void vhpt_erase(void) > v->ti_tag = INVALID_TI_TAG; > } > // initialize cache too??? > -} > - > - > -static void vhpt_map(unsigned long pte) > -{ > - unsigned long psr; > - > - psr = ia64_clear_ic(); > - ia64_itr(0x2, IA64_TR_VHPT, VHPT_ADDR, pte, VHPT_SIZE_LOG2); > - ia64_set_psr(psr); > - ia64_srlz_i(); > } > > void vhpt_insert (unsigned long vadr, unsigned long pte, unsigned long logps) > @@ -101,7 +102,7 @@ void vhpt_multiple_insert(unsigned long > > void vhpt_init(void) > { > - unsigned long paddr, pte; > + unsigned long paddr; > struct page_info *page; > #if !VHPT_ENABLED > return; > @@ -121,13 +122,54 @@ void vhpt_init(void) > __get_cpu_var(vhpt_pend) = paddr + (1 << VHPT_SIZE_LOG2) - 1; > printf("vhpt_init: vhpt paddr=0x%lx, end=0x%lx\n", > paddr, __get_cpu_var(vhpt_pend)); > - pte = pte_val(pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL)); > - vhpt_map(pte); > - ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) | > - VHPT_ENABLED); > - vhpt_erase(); > -} > - > + vhpt_erase(paddr); > + // we don't enable VHPT here. > + // context_switch() or schedule_tail() does it. > +} > + > +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT > +int > +pervcpu_vhpt_alloc(struct vcpu *v) > +{ > + unsigned long vhpt_size_log2 = VHPT_SIZE_LOG2; > + DPRINTK("%s:%d allocating d 0x%p %d v 0x%p %d\n", > + __func__, __LINE__, > + v->domain, v->domain->domain_id, > + v, v->vcpu_id); > + > + v->arch.vhpt_entries = > + (1UL << vhpt_size_log2) / sizeof(struct vhpt_lf_entry); > + v->arch.vhpt_page = > + alloc_domheap_pages(NULL, vhpt_size_log2 - PAGE_SHIFT, 0); > + if (!v->arch.vhpt_page) > + return -ENOMEM; > + > + v->arch.vhpt_maddr = page_to_maddr(v->arch.vhpt_page); > + if (v->arch.vhpt_maddr & ((1 << VHPT_SIZE_LOG2) - 1)) > + panic("pervcpu_vhpt_init: bad VHPT alignment!\n"); > + > + v->arch.pta.val = 0; // zero clear > + v->arch.pta.ve = 1; // enable vhpt > + v->arch.pta.size = VHPT_SIZE_LOG2; > + v->arch.pta.vf = 1; // long format > + v->arch.pta.base = v->arch.vhpt_maddr >> 15; > + > + vhpt_erase(v->arch.vhpt_maddr); > + return 0; > +} > + > +void > +pervcpu_vhpt_free(struct vcpu *v) > +{ > + unsigned long vhpt_size_log2 = VHPT_SIZE_LOG2; > + DPRINTK("%s:%d freeing d 0x%p %d v 0x%p %d\n", > + __func__, __LINE__, > + v->domain, v->domain->domain_id, > + v, v->vcpu_id); > + > + free_domheap_pages(v->arch.vhpt_page, vhpt_size_log2 - PAGE_SHIFT); > +} > +#endif > > void vcpu_flush_vtlb_all(struct vcpu *v) > { > @@ -136,7 +178,15 @@ void vcpu_flush_vtlb_all(struct vcpu *v) > vcpu_purge_tr_entry(&PSCBX(v,itlb)); > > /* Then VHPT. */ > - vhpt_flush (); > +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT > + if (HAS_PERVCPU_VHPT(v->domain.arch)) { > + vcpu_vhpt_flush(v); > + } else { > + local_vhpt_flush(); > + } > +#else > + local_vhpt_flush(); > +#endif > > /* Then mTLB. */ > local_flush_tlb_all (); > @@ -169,9 +219,10 @@ void domain_flush_vtlb_all (void) > } > } > > -static void cpu_flush_vhpt_range (int cpu, u64 vadr, u64 addr_range) > -{ > - void *vhpt_base = __va(per_cpu(vhpt_paddr, cpu)); > +static void __flush_vhpt_range(unsigned long vhpt_maddr, > + u64 vadr, u64 addr_range) > +{ > + void *vhpt_base = __va(vhpt_maddr); > > while ((long)addr_range > 0) { > /* Get the VHPT entry. */ > @@ -184,9 +235,30 @@ static void cpu_flush_vhpt_range (int cp > } > } > > +static void cpu_vhpt_flush_range(int cpu, u64 vadr, u64 addr_range) > +{ > + __flush_vhpt_range(per_cpu(vhpt_paddr, cpu), vadr, addr_range); > +} > + > +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT > +static void vcpu_vhpt_flush_range(struct vcpu* v, u64 vadr, u64 addr_range) > +{ > + __flush_vhpt_range(vcpu_vhpt_maddr(v), vadr, addr_range); > +} > +#endif > + > void vcpu_flush_tlb_vhpt_range (u64 vadr, u64 log_range) > { > - cpu_flush_vhpt_range (current->processor, vadr, 1UL << log_range); > +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT > + if (HAS_PERVCPU_VHPT(current->domain.arch)) { > + vcpu_vhpt_flush_range(current, vadr, 1UL << log_range); > + } else { > + cpu_vhpt_flush_range(current->processor, > + vadr, 1UL << log_range); > + } > +#else > + cpu_vhpt_flush_range(current->processor, vadr, 1UL << log_range); > +#endif > ia64_ptcl(vadr, log_range << 2); > ia64_srlz_i(); > } > @@ -218,8 +290,17 @@ void domain_flush_vtlb_range (struct dom > if (!test_bit(_VCPUF_initialised, &v->vcpu_flags)) > continue; > > +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT > + if (HAS_PERVCPU_VHPT(d->arch)) { > + vcpu_vhpt_flush_range(v, vadr, addr_range); > + } else { > + /* Invalidate VHPT entries. */ > + cpu_vhpt_flush_range(v->processor, vadr, addr_range); > + } > +#else > /* Invalidate VHPT entries. */ > - cpu_flush_vhpt_range (v->processor, vadr, addr_range); > + cpu_vhpt_flush_range(v->processor, vadr, addr_range); > +#endif > } > // ptc.ga has release semantics. > > @@ -254,11 +335,30 @@ domain_flush_vltb_track_entry(struct dom > } > smp_mb(); > > +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT > + if (HAS_PERVCPU_VHPT(d->arch)) { > + for_each_vcpu(d, v) { > + if (!test_bit(_VCPUF_initialised, &v->vcpu_flags)) > + continue; > + if (!vcpu_isset(v->vcpu_id, entry->vcpu_dirty_mask)) > + continue; > + > + /* Invalidate VHPT entries. */ > + vcpu_vhpt_flush_range(v, entry->vaddr, PAGE_SIZE); > + } > + } else { > + for_each_cpu_mask(cpu, entry->pcpu_dirty_mask) { > + /* Invalidate VHPT entries. */ > + cpu_vhpt_flush_range(cpu, entry->vaddr, PAGE_SIZE); > + } > + } > +#else > for_each_cpu_mask(cpu, entry->pcpu_dirty_mask) { > //printk("%s:%d cpu %d\n", __func__, __LINE__, cpu); > /* Invalidate VHPT entries. */ > - cpu_flush_vhpt_range(cpu, entry->vaddr, PAGE_SIZE); > - } > + cpu_vhpt_flush_range(cpu, entry->vaddr, PAGE_SIZE); > + } > +#endif > // ptc.ga has release semantics. > > /* ptc.ga */ > @@ -272,7 +372,7 @@ static void flush_tlb_vhpt_all (struct d > static void flush_tlb_vhpt_all (struct domain *d) > { > /* First VHPT. */ > - vhpt_flush (); > + local_vhpt_flush (); > > /* Then mTLB. */ > local_flush_tlb_all (); > @@ -281,7 +381,14 @@ void domain_flush_destroy (struct domain > void domain_flush_destroy (struct domain *d) > { > /* Very heavy... */ > +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT > + if (HAS_PERVCPU_VHPT(d->arch)) > + on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1, 1); > + else > + on_each_cpu((void (*)(void *))flush_tlb_vhpt_all, d, 1, 1); > +#else > on_each_cpu ((void (*)(void *))flush_tlb_vhpt_all, d, 1, 1); > +#endif > cpus_clear (d->domain_dirty_cpumask); > } > > diff -r cb0aa2b2e180 -r a56d48066373 xen/include/asm-ia64/domain.h > --- a/xen/include/asm-ia64/domain.h Mon Jul 24 21:35:16 2006 +0900 > +++ b/xen/include/asm-ia64/domain.h Mon Jul 24 21:37:15 2006 +0900 > @@ -63,6 +63,9 @@ struct arch_domain { > unsigned long flags; > struct { > unsigned int is_vti : 1; > +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT > + unsigned int has_pervcpu_vhpt : 1; > +#endif > }; > }; > > @@ -108,6 +111,13 @@ struct arch_domain { > #define INT_ENABLE_OFFSET(v) \ > (sizeof(vcpu_info_t) * (v)->vcpu_id + \ > offsetof(vcpu_info_t, evtchn_upcall_mask)) > + > +#ifdef CONFIG_XEN_IA64_PER_VCPU_VHPT > +#define HAS_PERVCPU_VHPT(d) ((d)->has_pervcpu_vhpt) > +#else > +#define HAS_PERVCPU_VHPT(d) (0) > +#endif > + > > struct arch_vcpu { > /* Save the state of vcpu. > @@ -158,6 +168,13 @@ struct arch_vcpu { > fpswa_ret_t fpswa_ret; /* save return values of FPSWA emulation */ > struct arch_vmx_struct arch_vmx; /* Virtual Machine Extensions */ > > +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT > + PTA pta; > + unsigned long vhpt_maddr; > + struct page_info* vhpt_page; > + unsigned long vhpt_entries; > +#endif > + > #define INVALID_PROCESSOR INT_MAX > int last_processor; > }; > diff -r cb0aa2b2e180 -r a56d48066373 xen/include/asm-ia64/vhpt.h > --- a/xen/include/asm-ia64/vhpt.h Mon Jul 24 21:35:16 2006 +0900 > +++ b/xen/include/asm-ia64/vhpt.h Mon Jul 24 21:37:15 2006 +0900 > @@ -42,11 +42,47 @@ extern void vhpt_multiple_insert(unsigne > unsigned long logps); > extern void vhpt_insert (unsigned long vadr, unsigned long pte, > unsigned long logps); > -void vhpt_flush(void); > +void local_vhpt_flush(void); > > /* Currently the VHPT is allocated per CPU. */ > DECLARE_PER_CPU (unsigned long, vhpt_paddr); > DECLARE_PER_CPU (unsigned long, vhpt_pend); > > +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT > +#if !VHPT_ENABLED > +#error "VHPT_ENABLED must be set for CONFIG_XEN_IA64_PERVCPU_VHPT" > +#endif > +#include <xen/sched.h> > +int pervcpu_vhpt_alloc(struct vcpu *v); > +void pervcpu_vhpt_free(struct vcpu *v); > +static inline unsigned long > +vcpu_vhpt_maddr(struct vcpu* v) > +{ > +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT > + if (HAS_PERVCPU_VHPT(v->domain)) { > + return v->arch.vhpt_maddr; > + } > +#endif > + > +#if 0 > + // referencecing v->processor is racy. > + return per_cpu(vhpt_paddr, v->processor); > +#endif > + BUG_ON(v != current); > + return __get_cpu_var(vhpt_paddr); > +} > + > +static inline unsigned long > +vcpu_pta(struct vcpu* v) > +{ > +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT > + if (HAS_PERVCPU_VHPT(v->domain)) { > + return v->arch.pta.val; > + } > +#endif > + return VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) | VHPT_ENABLED; > +} > +#endif > + > #endif /* !__ASSEMBLY */ > #endif > _______________________________________________ > Xen-ia64-devel mailing list > Xen-ia64-devel@xxxxxxxxxxxxxxxxxxx > http://lists.xensource.com/xen-ia64-devel -- yamahata Attachment:
10701:3cee9325a6c6_import_linux_hash.h.patch Attachment:
10702:b90fff753ca1_tlb_track.patch Attachment:
10703:f9b91b850f7b_pervcpu_vhpt.patch _______________________________________________ Xen-ia64-devel mailing list Xen-ia64-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-ia64-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |