[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-ia64-devel] PATCH: performance enhancements for BS2000 domU


  • To: xen-ia64-devel <xen-ia64-devel@xxxxxxxxxxxxxxxxxxx>
  • From: Juergen Gross <juergen.gross@xxxxxxxxxxxxxxxxxxx>
  • Date: Fri, 14 Sep 2007 14:00:08 +0200
  • Delivery-date: Fri, 14 Sep 2007 05:00:29 -0700
  • Domainkey-signature: s=s768; d=fujitsu-siemens.com; c=nofws; q=dns; h=Received:X-SBRSScore:X-IronPort-AV:Received:Received: Message-ID:Date:From:Organization:User-Agent: MIME-Version:To:Subject:X-Enigmail-Version: Content-Type; b=s3iJs0o4daTvOCbYZRrk/+JJp2AM6NCF1O/CBw0nAB5FNAsQ+dFm2 WH0v/04jBVw4B6a69pvfzjeyUyHJEuh03467If2j+Mxp9c+lOzFlh ff0kxGOqpV3iM6ZkyRXkh+;
  • List-id: Discussion of the ia64 port of Xen <xen-ia64-devel.lists.xensource.com>

Hi,

this is a patch to enhance performance of BS2000 domu. I've changed the
following:
- enable some fast_reflects for big-endian guests
- even in case of 4k-vhpt use page-size up to PAGE_SIZE in entries, if guest
  inserts larger TC-entries
- added KEY_PERMISSION_VECTOR support (reflect to guest)

BS2000 boot was about 10% faster with these changes.

Juergen

-- 
Juergen Gross                             Principal Developer
IP SW OS6                      Telephone: +49 (0) 89 636 47950
Fujitsu Siemens Computers         e-mail: juergen.gross@xxxxxxxxxxxxxxxxxxx
Otto-Hahn-Ring 6                Internet: www.fujitsu-siemens.com
D-81739 Muenchen         Company details: www.fujitsu-siemens.com/imprint.html
# HG changeset patch
# User juergen.gross@xxxxxxxxxxxxxxxxxx
# Node ID 15d4ba6c29c4949c0fa88f24b68ab88bd352cb52
# Parent  1668a62e1c1fff8dc8d552c4fb459ecb19663157

performance enhancement for big-endian, 4k-pages, protection keys

Signed-off-by: Juergen Gross juergen.gross@xxxxxxxxxxxxxxxxxxx

diff -r 1668a62e1c1f -r 15d4ba6c29c4 xen/arch/ia64/xen/faults.c
--- a/xen/arch/ia64/xen/faults.c        Tue Sep 11 10:29:49 2007 +0200
+++ b/xen/arch/ia64/xen/faults.c        Fri Sep 14 13:49:10 2007 +0200
@@ -594,6 +594,9 @@ ia64_handle_reflection(unsigned long ifa
                check_lazy_cover = 1;
                vector = IA64_PAGE_NOT_PRESENT_VECTOR;
                break;
+       case 21:
+               vector = IA64_KEY_PERMISSION_VECTOR;
+               break;
        case 22:
                vector = IA64_INST_ACCESS_RIGHTS_VECTOR;
                break;
diff -r 1668a62e1c1f -r 15d4ba6c29c4 xen/arch/ia64/xen/hyperprivop.S
--- a/xen/arch/ia64/xen/hyperprivop.S   Tue Sep 11 10:29:49 2007 +0200
+++ b/xen/arch/ia64/xen/hyperprivop.S   Fri Sep 14 13:49:10 2007 +0200
@@ -223,9 +223,6 @@ ENTRY(hyper_ssm_i)
        // give up for now if: ipsr.be==1, ipsr.pp==1
        mov r30=cr.ipsr
        mov r29=cr.iip;;
-       extr.u r21=r30,IA64_PSR_BE_BIT,1 ;;
-       cmp.ne p7,p0=r21,r0
-(p7)   br.sptk.many dispatch_break_fault ;;
        extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
        cmp.ne p7,p0=r21,r0
 (p7)   br.sptk.many dispatch_break_fault ;;
@@ -268,7 +265,7 @@ ENTRY(hyper_ssm_i)
        // FOR SSM_I ONLY, also turn on psr.i and psr.ic
        movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT|IA64_PSR_I|IA64_PSR_IC)
 //     movl r27=~(IA64_PSR_BE|IA64_PSR_PP|IA64_PSR_BN);;
-       movl r27=~(IA64_PSR_BE|IA64_PSR_BN);;
+       movl r27=~IA64_PSR_BN;;
        or r30=r30,r28;;
        and r30=r30,r27;;
        mov r20=1
@@ -361,10 +358,7 @@ GLOBAL_ENTRY(fast_tick_reflect)
        cmp.ltu p6,p0=r26,r27
 (p6)   br.cond.spnt.few rp;;
        mov r17=cr.ipsr;;
-       // slow path if: ipsr.be==1, ipsr.pp==1
-       extr.u r21=r17,IA64_PSR_BE_BIT,1 ;;
-       cmp.ne p6,p0=r21,r0
-(p6)   br.cond.spnt.few rp;;
+       // slow path if: ipsr.pp==1
        extr.u r21=r17,IA64_PSR_PP_BIT,1 ;;
        cmp.ne p6,p0=r21,r0
 (p6)   br.cond.spnt.few rp;;
@@ -453,7 +447,7 @@ GLOBAL_ENTRY(fast_tick_reflect)
        cmp.eq p7,p0=CONFIG_CPL0_EMUL,r29;;
 (p7)   dep r17=0,r17,IA64_PSR_CPL0_BIT,2
        movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT)
-       movl r27=~(IA64_PSR_BE|IA64_PSR_PP|IA64_PSR_BN|IA64_PSR_I|IA64_PSR_IC);;
+       movl r27=~(IA64_PSR_PP|IA64_PSR_BN|IA64_PSR_I|IA64_PSR_IC);;
        or r17=r17,r28;;
        and r17=r17,r27
        ld4 r16=[r18];;
@@ -556,9 +550,6 @@ GLOBAL_ENTRY(fast_break_reflect)
 #endif
        mov r30=cr.ipsr
        mov r29=cr.iip;;
-       extr.u r21=r30,IA64_PSR_BE_BIT,1 ;;
-       cmp.ne p7,p0=r21,r0
-(p7)   br.spnt.few dispatch_break_fault ;;
        extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
        cmp.ne p7,p0=r21,r0
 (p7)   br.spnt.few dispatch_break_fault ;;
@@ -633,7 +624,7 @@ ENTRY(fast_reflect)
        cmp.eq p7,p0=CONFIG_CPL0_EMUL,r29;;
 (p7)   dep r30=0,r30,IA64_PSR_CPL0_BIT,2
        movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT)
-       movl r27=~(IA64_PSR_BE|IA64_PSR_PP|IA64_PSR_BN);;
+       movl r27=~(IA64_PSR_PP|IA64_PSR_BN);;
        or r30=r30,r28;;
        and r30=r30,r27
        // also set shared_mem ipsr.i and ipsr.ic appropriately
@@ -744,9 +735,6 @@ GLOBAL_ENTRY(fast_access_reflect)
 #endif
        mov r30=cr.ipsr
        mov r29=cr.iip;;
-       extr.u r21=r30,IA64_PSR_BE_BIT,1 ;;
-       cmp.ne p7,p0=r21,r0
-(p7)   br.spnt.few dispatch_reflection ;;
        extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
        cmp.ne p7,p0=r21,r0
 (p7)   br.spnt.few dispatch_reflection ;;
@@ -794,9 +782,6 @@ GLOBAL_ENTRY(fast_tlb_miss_reflect)
        cmp.eq p7,p0=r21,r0
 (p7)   br.spnt.few page_fault ;;
        // slow path if strange ipsr or isr bits set
-       extr.u r21=r30,IA64_PSR_BE_BIT,1 ;;
-       cmp.ne p7,p0=r21,r0
-(p7)   br.spnt.few page_fault ;;
        extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
        cmp.ne p7,p0=r21,r0
 (p7)   br.spnt.few page_fault ;;
@@ -1068,10 +1053,6 @@ 1:
 1:
        adds r20=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
        ld8 r21=[r20];;         // r21 = vcr.ipsr
-       extr.u r22=r21,IA64_PSR_BE_BIT,1 ;;
-       // if turning on psr.be, give up for now and do it the slow way
-       cmp.ne p7,p0=r22,r0
-(p7)   br.spnt.few slow_vcpu_rfi ;;
        // if (!(vpsr.dt && vpsr.rt && vpsr.it)), do it the slow way
        movl r20=(IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_IT);;
        and r22=r20,r21
diff -r 1668a62e1c1f -r 15d4ba6c29c4 xen/arch/ia64/xen/ivt.S
--- a/xen/arch/ia64/xen/ivt.S   Tue Sep 11 10:29:49 2007 +0200
+++ b/xen/arch/ia64/xen/ivt.S   Fri Sep 14 13:49:10 2007 +0200
@@ -313,7 +313,6 @@ GLOBAL_ENTRY(dispatch_reflection)
        adds out1=16,sp
        mov out2=cr.isr
        mov out3=cr.iim
-//     mov out3=cr.itir                // TODO: why commented out?
 
        ssm psr.ic | PSR_DEFAULT_BITS
        ;;
diff -r 1668a62e1c1f -r 15d4ba6c29c4 xen/arch/ia64/xen/mm.c
--- a/xen/arch/ia64/xen/mm.c    Tue Sep 11 10:29:49 2007 +0200
+++ b/xen/arch/ia64/xen/mm.c    Fri Sep 14 13:49:10 2007 +0200
@@ -509,25 +509,22 @@ u64 translate_domain_pte(u64 pteval, u64
        u64 arflags;
        u64 arflags2;
        u64 maflags2;
-       u64 ps;
 
        pteval &= ((1UL << 53) - 1);// ignore [63:53] bits
 
        // FIXME address had better be pre-validated on insert
        mask = ~itir_mask(_itir.itir);
        mpaddr = ((pteval & _PAGE_PPN_MASK) & ~mask) | (address & mask);
-       ps = current->arch.vhpt_pg_shift ? current->arch.vhpt_pg_shift :
-                                          PAGE_SHIFT;
-
-       if (_itir.ps > ps)
-               _itir.ps = ps;
+
+       if (_itir.ps > PAGE_SHIFT)
+               _itir.ps = PAGE_SHIFT;
 
        ((ia64_itir_t*)itir)->itir = _itir.itir;/* Copy the whole register. */
        ((ia64_itir_t*)itir)->ps = _itir.ps;    /* Overwrite ps part! */
 
        pteval2 = lookup_domain_mpa(d, mpaddr, entry);
-       if (ps < PAGE_SHIFT)
-               pteval2 |= mpaddr & (PAGE_SIZE - 1) & ~((1L << ps) - 1);
+       if (_itir.ps < PAGE_SHIFT)
+               pteval2 |= mpaddr & (PAGE_SIZE - 1) & ~((1L << _itir.ps) - 1);
 
        /* Check access rights.  */
        arflags  = pteval  & _PAGE_AR_MASK;
diff -r 1668a62e1c1f -r 15d4ba6c29c4 xen/arch/ia64/xen/vcpu.c
--- a/xen/arch/ia64/xen/vcpu.c  Tue Sep 11 10:29:49 2007 +0200
+++ b/xen/arch/ia64/xen/vcpu.c  Fri Sep 14 13:49:10 2007 +0200
@@ -1817,7 +1817,7 @@ IA64FAULT vcpu_tpa(VCPU * vcpu, u64 vadr
 
 IA64FAULT vcpu_tak(VCPU * vcpu, u64 vadr, u64 * key)
 {
-       u64 pteval, itir, mask, iha;
+       u64 pteval, itir, iha;
        IA64FAULT fault;
 
        fault = vcpu_translate(vcpu, vadr, TRUE, &pteval, &itir, &iha);
@@ -2319,8 +2319,6 @@ vcpu_itc_no_srlz(VCPU * vcpu, u64 IorD, 
 {
        ia64_itir_t _itir = {.itir = itir};
        unsigned long psr;
-       unsigned long ps = (vcpu->domain == dom0) ? _itir.ps :
-                                                   vcpu->arch.vhpt_pg_shift;
 
        check_xen_space_overlap("itc", vaddr, 1UL << _itir.ps);
 
@@ -2329,12 +2327,12 @@ vcpu_itc_no_srlz(VCPU * vcpu, u64 IorD, 
                panic_domain(NULL, "vcpu_itc_no_srlz: domain trying to use "
                             "smaller page size!\n");
 
-       BUG_ON(_itir.ps > vcpu->arch.vhpt_pg_shift);
+       BUG_ON(_itir.ps > PAGE_SHIFT);
        vcpu_tlb_track_insert_or_dirty(vcpu, vaddr, entry);
        psr = ia64_clear_ic();
        pte &= ~(_PAGE_RV2 | _PAGE_RV1);        // Mask out the reserved bits.
                                        // FIXME: look for bigger mappings
-       ia64_itc(IorD, vaddr, pte, IA64_ITIR_PS_KEY(ps, _itir.key));
+       ia64_itc(IorD, vaddr, pte, _itir.itir);
        ia64_set_psr(psr);
        // ia64_srlz_i(); // no srls req'd, will rfi later
        if (vcpu->domain == dom0 && ((vaddr >> 61) == 7)) {
@@ -2350,7 +2348,6 @@ vcpu_itc_no_srlz(VCPU * vcpu, u64 IorD, 
        // even if domain pagesize is larger than PAGE_SIZE, just put
        // PAGE_SIZE mapping in the vhpt for now, else purging is complicated
        else {
-               _itir.ps = vcpu->arch.vhpt_pg_shift;
                vhpt_insert(vaddr, pte, _itir.itir);
        }
 }
diff -r 1668a62e1c1f -r 15d4ba6c29c4 xen/arch/ia64/xen/vhpt.c
--- a/xen/arch/ia64/xen/vhpt.c  Tue Sep 11 10:29:49 2007 +0200
+++ b/xen/arch/ia64/xen/vhpt.c  Fri Sep 14 13:49:10 2007 +0200
@@ -293,15 +293,18 @@ __flush_vhpt_range(unsigned long vhpt_ma
 {
        void *vhpt_base = __va(vhpt_maddr);
        u64 pgsz = 1L << current->arch.vhpt_pg_shift;
-
+       u64 purge_addr = vadr & ~PAGE_SIZE;
+
+       addr_range += vadr - purge_addr;
+       addr_range = (addr_range + PAGE_SIZE - 1) & ~PAGE_SIZE;
        while ((long)addr_range > 0) {
                /* Get the VHPT entry.  */
-               unsigned int off = ia64_thash(vadr) -
+               unsigned int off = ia64_thash(purge_addr) -
                        __va_ul(vcpu_vhpt_maddr(current));
                struct vhpt_lf_entry *v = vhpt_base + off;
                v->ti_tag = INVALID_TI_TAG;
                addr_range -= pgsz;
-               vadr += pgsz;
+               purge_addr += pgsz;
        }
 }
 
_______________________________________________
Xen-ia64-devel mailing list
Xen-ia64-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ia64-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.