[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-ia64-devel] PATCH: performance enhancements for BS2000 domU
Hi, this is a patch to enhance performance of BS2000 domu. I've changed the following: - enable some fast_reflects for big-endian guests - even in case of 4k-vhpt use page-size up to PAGE_SIZE in entries, if guest inserts larger TC-entries - added KEY_PERMISSION_VECTOR support (reflect to guest) BS2000 boot was about 10% faster with these changes. Juergen -- Juergen Gross Principal Developer IP SW OS6 Telephone: +49 (0) 89 636 47950 Fujitsu Siemens Computers e-mail: juergen.gross@xxxxxxxxxxxxxxxxxxx Otto-Hahn-Ring 6 Internet: www.fujitsu-siemens.com D-81739 Muenchen Company details: www.fujitsu-siemens.com/imprint.html # HG changeset patch # User juergen.gross@xxxxxxxxxxxxxxxxxx # Node ID 15d4ba6c29c4949c0fa88f24b68ab88bd352cb52 # Parent 1668a62e1c1fff8dc8d552c4fb459ecb19663157 performance enhancement for big-endian, 4k-pages, protection keys Signed-off-by: Juergen Gross juergen.gross@xxxxxxxxxxxxxxxxxxx diff -r 1668a62e1c1f -r 15d4ba6c29c4 xen/arch/ia64/xen/faults.c --- a/xen/arch/ia64/xen/faults.c Tue Sep 11 10:29:49 2007 +0200 +++ b/xen/arch/ia64/xen/faults.c Fri Sep 14 13:49:10 2007 +0200 @@ -594,6 +594,9 @@ ia64_handle_reflection(unsigned long ifa check_lazy_cover = 1; vector = IA64_PAGE_NOT_PRESENT_VECTOR; break; + case 21: + vector = IA64_KEY_PERMISSION_VECTOR; + break; case 22: vector = IA64_INST_ACCESS_RIGHTS_VECTOR; break; diff -r 1668a62e1c1f -r 15d4ba6c29c4 xen/arch/ia64/xen/hyperprivop.S --- a/xen/arch/ia64/xen/hyperprivop.S Tue Sep 11 10:29:49 2007 +0200 +++ b/xen/arch/ia64/xen/hyperprivop.S Fri Sep 14 13:49:10 2007 +0200 @@ -223,9 +223,6 @@ ENTRY(hyper_ssm_i) // give up for now if: ipsr.be==1, ipsr.pp==1 mov r30=cr.ipsr mov r29=cr.iip;; - extr.u r21=r30,IA64_PSR_BE_BIT,1 ;; - cmp.ne p7,p0=r21,r0 -(p7) br.sptk.many dispatch_break_fault ;; extr.u r21=r30,IA64_PSR_PP_BIT,1 ;; cmp.ne p7,p0=r21,r0 (p7) br.sptk.many dispatch_break_fault ;; @@ -268,7 +265,7 @@ ENTRY(hyper_ssm_i) // FOR SSM_I ONLY, also turn on psr.i and psr.ic movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT|IA64_PSR_I|IA64_PSR_IC) // movl r27=~(IA64_PSR_BE|IA64_PSR_PP|IA64_PSR_BN);; - movl r27=~(IA64_PSR_BE|IA64_PSR_BN);; + movl r27=~IA64_PSR_BN;; or r30=r30,r28;; and r30=r30,r27;; mov r20=1 @@ -361,10 +358,7 @@ GLOBAL_ENTRY(fast_tick_reflect) cmp.ltu p6,p0=r26,r27 (p6) br.cond.spnt.few rp;; mov r17=cr.ipsr;; - // slow path if: ipsr.be==1, ipsr.pp==1 - extr.u r21=r17,IA64_PSR_BE_BIT,1 ;; - cmp.ne p6,p0=r21,r0 -(p6) br.cond.spnt.few rp;; + // slow path if: ipsr.pp==1 extr.u r21=r17,IA64_PSR_PP_BIT,1 ;; cmp.ne p6,p0=r21,r0 (p6) br.cond.spnt.few rp;; @@ -453,7 +447,7 @@ GLOBAL_ENTRY(fast_tick_reflect) cmp.eq p7,p0=CONFIG_CPL0_EMUL,r29;; (p7) dep r17=0,r17,IA64_PSR_CPL0_BIT,2 movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT) - movl r27=~(IA64_PSR_BE|IA64_PSR_PP|IA64_PSR_BN|IA64_PSR_I|IA64_PSR_IC);; + movl r27=~(IA64_PSR_PP|IA64_PSR_BN|IA64_PSR_I|IA64_PSR_IC);; or r17=r17,r28;; and r17=r17,r27 ld4 r16=[r18];; @@ -556,9 +550,6 @@ GLOBAL_ENTRY(fast_break_reflect) #endif mov r30=cr.ipsr mov r29=cr.iip;; - extr.u r21=r30,IA64_PSR_BE_BIT,1 ;; - cmp.ne p7,p0=r21,r0 -(p7) br.spnt.few dispatch_break_fault ;; extr.u r21=r30,IA64_PSR_PP_BIT,1 ;; cmp.ne p7,p0=r21,r0 (p7) br.spnt.few dispatch_break_fault ;; @@ -633,7 +624,7 @@ ENTRY(fast_reflect) cmp.eq p7,p0=CONFIG_CPL0_EMUL,r29;; (p7) dep r30=0,r30,IA64_PSR_CPL0_BIT,2 movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT) - movl r27=~(IA64_PSR_BE|IA64_PSR_PP|IA64_PSR_BN);; + movl r27=~(IA64_PSR_PP|IA64_PSR_BN);; or r30=r30,r28;; and r30=r30,r27 // also set shared_mem ipsr.i and ipsr.ic appropriately @@ -744,9 +735,6 @@ GLOBAL_ENTRY(fast_access_reflect) #endif mov r30=cr.ipsr mov r29=cr.iip;; - extr.u r21=r30,IA64_PSR_BE_BIT,1 ;; - cmp.ne p7,p0=r21,r0 -(p7) br.spnt.few dispatch_reflection ;; extr.u r21=r30,IA64_PSR_PP_BIT,1 ;; cmp.ne p7,p0=r21,r0 (p7) br.spnt.few dispatch_reflection ;; @@ -794,9 +782,6 @@ GLOBAL_ENTRY(fast_tlb_miss_reflect) cmp.eq p7,p0=r21,r0 (p7) br.spnt.few page_fault ;; // slow path if strange ipsr or isr bits set - extr.u r21=r30,IA64_PSR_BE_BIT,1 ;; - cmp.ne p7,p0=r21,r0 -(p7) br.spnt.few page_fault ;; extr.u r21=r30,IA64_PSR_PP_BIT,1 ;; cmp.ne p7,p0=r21,r0 (p7) br.spnt.few page_fault ;; @@ -1068,10 +1053,6 @@ 1: 1: adds r20=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;; ld8 r21=[r20];; // r21 = vcr.ipsr - extr.u r22=r21,IA64_PSR_BE_BIT,1 ;; - // if turning on psr.be, give up for now and do it the slow way - cmp.ne p7,p0=r22,r0 -(p7) br.spnt.few slow_vcpu_rfi ;; // if (!(vpsr.dt && vpsr.rt && vpsr.it)), do it the slow way movl r20=(IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_IT);; and r22=r20,r21 diff -r 1668a62e1c1f -r 15d4ba6c29c4 xen/arch/ia64/xen/ivt.S --- a/xen/arch/ia64/xen/ivt.S Tue Sep 11 10:29:49 2007 +0200 +++ b/xen/arch/ia64/xen/ivt.S Fri Sep 14 13:49:10 2007 +0200 @@ -313,7 +313,6 @@ GLOBAL_ENTRY(dispatch_reflection) adds out1=16,sp mov out2=cr.isr mov out3=cr.iim -// mov out3=cr.itir // TODO: why commented out? ssm psr.ic | PSR_DEFAULT_BITS ;; diff -r 1668a62e1c1f -r 15d4ba6c29c4 xen/arch/ia64/xen/mm.c --- a/xen/arch/ia64/xen/mm.c Tue Sep 11 10:29:49 2007 +0200 +++ b/xen/arch/ia64/xen/mm.c Fri Sep 14 13:49:10 2007 +0200 @@ -509,25 +509,22 @@ u64 translate_domain_pte(u64 pteval, u64 u64 arflags; u64 arflags2; u64 maflags2; - u64 ps; pteval &= ((1UL << 53) - 1);// ignore [63:53] bits // FIXME address had better be pre-validated on insert mask = ~itir_mask(_itir.itir); mpaddr = ((pteval & _PAGE_PPN_MASK) & ~mask) | (address & mask); - ps = current->arch.vhpt_pg_shift ? current->arch.vhpt_pg_shift : - PAGE_SHIFT; - - if (_itir.ps > ps) - _itir.ps = ps; + + if (_itir.ps > PAGE_SHIFT) + _itir.ps = PAGE_SHIFT; ((ia64_itir_t*)itir)->itir = _itir.itir;/* Copy the whole register. */ ((ia64_itir_t*)itir)->ps = _itir.ps; /* Overwrite ps part! */ pteval2 = lookup_domain_mpa(d, mpaddr, entry); - if (ps < PAGE_SHIFT) - pteval2 |= mpaddr & (PAGE_SIZE - 1) & ~((1L << ps) - 1); + if (_itir.ps < PAGE_SHIFT) + pteval2 |= mpaddr & (PAGE_SIZE - 1) & ~((1L << _itir.ps) - 1); /* Check access rights. */ arflags = pteval & _PAGE_AR_MASK; diff -r 1668a62e1c1f -r 15d4ba6c29c4 xen/arch/ia64/xen/vcpu.c --- a/xen/arch/ia64/xen/vcpu.c Tue Sep 11 10:29:49 2007 +0200 +++ b/xen/arch/ia64/xen/vcpu.c Fri Sep 14 13:49:10 2007 +0200 @@ -1817,7 +1817,7 @@ IA64FAULT vcpu_tpa(VCPU * vcpu, u64 vadr IA64FAULT vcpu_tak(VCPU * vcpu, u64 vadr, u64 * key) { - u64 pteval, itir, mask, iha; + u64 pteval, itir, iha; IA64FAULT fault; fault = vcpu_translate(vcpu, vadr, TRUE, &pteval, &itir, &iha); @@ -2319,8 +2319,6 @@ vcpu_itc_no_srlz(VCPU * vcpu, u64 IorD, { ia64_itir_t _itir = {.itir = itir}; unsigned long psr; - unsigned long ps = (vcpu->domain == dom0) ? _itir.ps : - vcpu->arch.vhpt_pg_shift; check_xen_space_overlap("itc", vaddr, 1UL << _itir.ps); @@ -2329,12 +2327,12 @@ vcpu_itc_no_srlz(VCPU * vcpu, u64 IorD, panic_domain(NULL, "vcpu_itc_no_srlz: domain trying to use " "smaller page size!\n"); - BUG_ON(_itir.ps > vcpu->arch.vhpt_pg_shift); + BUG_ON(_itir.ps > PAGE_SHIFT); vcpu_tlb_track_insert_or_dirty(vcpu, vaddr, entry); psr = ia64_clear_ic(); pte &= ~(_PAGE_RV2 | _PAGE_RV1); // Mask out the reserved bits. // FIXME: look for bigger mappings - ia64_itc(IorD, vaddr, pte, IA64_ITIR_PS_KEY(ps, _itir.key)); + ia64_itc(IorD, vaddr, pte, _itir.itir); ia64_set_psr(psr); // ia64_srlz_i(); // no srls req'd, will rfi later if (vcpu->domain == dom0 && ((vaddr >> 61) == 7)) { @@ -2350,7 +2348,6 @@ vcpu_itc_no_srlz(VCPU * vcpu, u64 IorD, // even if domain pagesize is larger than PAGE_SIZE, just put // PAGE_SIZE mapping in the vhpt for now, else purging is complicated else { - _itir.ps = vcpu->arch.vhpt_pg_shift; vhpt_insert(vaddr, pte, _itir.itir); } } diff -r 1668a62e1c1f -r 15d4ba6c29c4 xen/arch/ia64/xen/vhpt.c --- a/xen/arch/ia64/xen/vhpt.c Tue Sep 11 10:29:49 2007 +0200 +++ b/xen/arch/ia64/xen/vhpt.c Fri Sep 14 13:49:10 2007 +0200 @@ -293,15 +293,18 @@ __flush_vhpt_range(unsigned long vhpt_ma { void *vhpt_base = __va(vhpt_maddr); u64 pgsz = 1L << current->arch.vhpt_pg_shift; - + u64 purge_addr = vadr & ~PAGE_SIZE; + + addr_range += vadr - purge_addr; + addr_range = (addr_range + PAGE_SIZE - 1) & ~PAGE_SIZE; while ((long)addr_range > 0) { /* Get the VHPT entry. */ - unsigned int off = ia64_thash(vadr) - + unsigned int off = ia64_thash(purge_addr) - __va_ul(vcpu_vhpt_maddr(current)); struct vhpt_lf_entry *v = vhpt_base + off; v->ti_tag = INVALID_TI_TAG; addr_range -= pgsz; - vadr += pgsz; + purge_addr += pgsz; } } _______________________________________________ Xen-ia64-devel mailing list Xen-ia64-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-ia64-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |