[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-ia64-devel] [PATCH] support of 4k page size for individual guests


  • To: xen-ia64-devel <xen-ia64-devel@xxxxxxxxxxxxxxxxxxx>
  • From: Juergen Gross <juergen.gross@xxxxxxxxxxxxxxxxxxx>
  • Date: Thu, 16 Aug 2007 12:47:06 +0200
  • Delivery-date: Thu, 16 Aug 2007 03:47:19 -0700
  • Domainkey-signature: s=s768; d=fujitsu-siemens.com; c=nofws; q=dns; b=AtlgPNFxskBzcbXgpcxXm4wbRL/x+5rgaQir4GOizYp+LuISyZ25kgMOKRETZEvGyHOBBIkQsp9HsYDGJ0AN+SgFoWUyYYznAUL9PZYxuonyE301QdxruUrFWB+5O0Co;
  • List-id: Discussion of the ia64 port of Xen <xen-ia64-devel.lists.xensource.com>

Hi,

this is the patch needed to support 4k (and 8k) pages for individual guests
(currently PV only).
"normal" domU's should not be affected, as the per-vcpu vhpt is reconfigured
only if a domU uses a page size less than PAGE_SIZE.
I haven't touched grant pages yet, I think they should work on PAGE_SIZE base
as before, but I didn't check it.

Tested by compile, boot dom0, boot domU (Linux) and boot domU (BS2000 - our
mainframe OS using 4k pages)

Juergen

-- 
Juergen Gross                             Principal Developer
IP SW OS6                      Telephone: +49 (0) 89 636 47950
Fujitsu Siemens Computers         e-mail: juergen.gross@xxxxxxxxxxxxxxxxxxx
Otto-Hahn-Ring 6                Internet: www.fujitsu-siemens.com
D-81739 Muenchen         Company details: www.fujitsu-siemens.com/imprint.html
# HG changeset patch
# User juergen.gross@xxxxxxxxxxxxxxxxxx
# Node ID 2f58face717cefcaaa9791994ab978f975b14573
# Parent  6b0c965e95a668bf65d475f519b254107cce21a3

4k pagesize support per vcpu

Signed-off-by: Juergen Gross juergen.gross@xxxxxxxxxxxxxxxxxxx

diff -r 6b0c965e95a6 -r 2f58face717c xen/arch/ia64/asm-offsets.c
--- a/xen/arch/ia64/asm-offsets.c       Thu Aug  9 08:48:00 2007 +0200
+++ b/xen/arch/ia64/asm-offsets.c       Thu Aug 16 11:33:27 2007 +0200
@@ -72,6 +72,7 @@ void foo(void)
        DEFINE(IA64_VCPU_DOMAIN_ITM_LAST_OFFSET, offsetof (struct vcpu, 
arch.domain_itm_last));
        DEFINE(IA64_VCPU_ITLB_OFFSET, offsetof (struct vcpu, arch.itlb));
        DEFINE(IA64_VCPU_DTLB_OFFSET, offsetof (struct vcpu, arch.dtlb));
+       DEFINE(IA64_VCPU_VHPT_PG_SHIFT_OFFSET, offsetof (struct vcpu, 
arch.vhpt_pg_shift));
 
        BLANK();
 
diff -r 6b0c965e95a6 -r 2f58face717c xen/arch/ia64/xen/domain.c
--- a/xen/arch/ia64/xen/domain.c        Thu Aug  9 08:48:00 2007 +0200
+++ b/xen/arch/ia64/xen/domain.c        Thu Aug 16 11:33:27 2007 +0200
@@ -445,6 +445,7 @@ int vcpu_initialise(struct vcpu *v)
            v->arch.ending_rid = d->arch.ending_rid;
            v->arch.breakimm = d->arch.breakimm;
            v->arch.last_processor = INVALID_PROCESSOR;
+           v->arch.vhpt_pg_shift = PAGE_SHIFT;
        }
 
        if (!VMX_DOMAIN(v))
diff -r 6b0c965e95a6 -r 2f58face717c xen/arch/ia64/xen/faults.c
--- a/xen/arch/ia64/xen/faults.c        Thu Aug  9 08:48:00 2007 +0200
+++ b/xen/arch/ia64/xen/faults.c        Thu Aug 16 11:33:27 2007 +0200
@@ -239,6 +239,8 @@ void ia64_do_page_fault(unsigned long ad
                    (regs->cr_ipsr & ~DELIVER_PSR_CLR) | DELIVER_PSR_SET;
                regs->cr_ipsr = vcpu_pl_adjust(regs->cr_ipsr,
                                               IA64_PSR_CPL0_BIT);
+               if (PSCB(current, dcr) & IA64_DCR_BE)
+                       regs->cr_ipsr |= IA64_PSR_BE;
 
                if (PSCB(current, hpsr_dfh))
                        regs->cr_ipsr |= IA64_PSR_DFH;  
@@ -741,7 +743,8 @@ ia64_shadow_fault(unsigned long ifa, uns
        pte = vlfe->page_flags;
        if (vlfe->ti_tag == ia64_ttag(ifa)) {
                /* The VHPT entry is valid.  */
-               gpfn = get_gpfn_from_mfn((pte & _PAGE_PPN_MASK) >> PAGE_SHIFT);
+               gpfn = get_gpfn_from_mfn((pte & _PAGE_PPN_MASK) >>
+                                        v->arch.vhpt_pg_shift);
                BUG_ON(gpfn == INVALID_M2P_ENTRY);
        } else {
                unsigned long itir, iha;
@@ -757,10 +760,10 @@ ia64_shadow_fault(unsigned long ifa, uns
                /* Try again!  */
                if (fault != IA64_NO_FAULT) {
                        /* This will trigger a dtlb miss.  */
-                       ia64_ptcl(ifa, PAGE_SHIFT << 2);
-                       return;
-               }
-               gpfn = ((pte & _PAGE_PPN_MASK) >> PAGE_SHIFT);
+                       ia64_ptcl(ifa, v->arch.vhpt_pg_shift << 2);
+                       return;
+               }
+               gpfn = ((pte & _PAGE_PPN_MASK) >> v->arch.vhpt_pg_shift);
                if (pte & _PAGE_D)
                        pte |= _PAGE_VIRT_D;
        }
@@ -788,7 +791,7 @@ ia64_shadow_fault(unsigned long ifa, uns
                        /* Purge the TC locally.
                           It will be reloaded from the VHPT iff the
                           VHPT entry is still valid.  */
-                       ia64_ptcl(ifa, PAGE_SHIFT << 2);
+                       ia64_ptcl(ifa, v->arch.vhpt_pg_shift << 2);
 
                        atomic64_inc(&d->arch.shadow_fault_count);
                } else {
@@ -800,6 +803,6 @@ ia64_shadow_fault(unsigned long ifa, uns
                /* We don't know wether or not the fault must be
                   reflected.  The VHPT entry is not valid.  */
                /* FIXME: in metaphysical mode, we could do an ITC now.  */
-               ia64_ptcl(ifa, PAGE_SHIFT << 2);
-       }
-}
+               ia64_ptcl(ifa, v->arch.vhpt_pg_shift << 2);
+       }
+}
diff -r 6b0c965e95a6 -r 2f58face717c xen/arch/ia64/xen/hyperprivop.S
--- a/xen/arch/ia64/xen/hyperprivop.S   Thu Aug  9 08:48:00 2007 +0200
+++ b/xen/arch/ia64/xen/hyperprivop.S   Thu Aug 16 11:33:27 2007 +0200
@@ -1604,26 +1604,27 @@ ENTRY(hyper_set_rr)
        extr.u r26=r9,IA64_RR_RID,IA64_RR_RID_LEN       // r26 = r9.rid
        movl r20=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
        ld8 r20=[r20];;
-       adds r21=IA64_VCPU_STARTING_RID_OFFSET,r20;;
-       ld4 r22=[r21];;
-       adds r21=IA64_VCPU_ENDING_RID_OFFSET,r20;;
-       ld4 r23=[r21];;
-       adds r24=IA64_VCPU_META_SAVED_RR0_OFFSET,r20;;
+       adds r22=IA64_VCPU_STARTING_RID_OFFSET,r20
+       adds r23=IA64_VCPU_ENDING_RID_OFFSET,r20
+       adds r24=IA64_VCPU_META_SAVED_RR0_OFFSET,r20
+       adds r21=IA64_VCPU_VHPT_PG_SHIFT_OFFSET,r20;;
+       ld4 r22=[r22]
+       ld4 r23=[r23]
+       ld1 r21=[r21];;
        add r22=r26,r22;;
        cmp.geu p6,p0=r22,r23   // if r9.rid + starting_rid >= ending_rid
 (p6)   br.cond.spnt.few 1f;    // this is an error, but just ignore/return
-       // r21=starting_rid
        adds r20=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18
        shl r25=r25,3;;
        add r20=r20,r25;;
        st8 [r20]=r9;;          // store away exactly what was passed
        // but adjust value actually placed in rr[r8]
        // r22 contains adjusted rid, "mangle" it (see regionreg.c)
-       // and set ps to PAGE_SHIFT and ve to 1
+       // and set ps to v->arch.vhpt_pg_shift and ve to 1
        extr.u r27=r22,0,8
        extr.u r28=r22,8,8
-       extr.u r29=r22,16,8;;
-       dep.z r23=PAGE_SHIFT,IA64_RR_PS,IA64_RR_PS_LEN;;
+       extr.u r29=r22,16,8
+       dep.z r23=r21,IA64_RR_PS,IA64_RR_PS_LEN;;
        dep r23=-1,r23,0,1;;    // mangling is swapping bytes 1 & 3
        dep r23=r27,r23,24,8;;
        dep r23=r28,r23,16,8;;
@@ -1673,34 +1674,38 @@ ENTRY(hyper_set_rr0_to_rr4)
        ld8 r17=[r17];;
 
        adds r21=IA64_VCPU_STARTING_RID_OFFSET,r17
-       adds r25=IA64_VCPU_ENDING_RID_OFFSET,r17
+       adds r22=IA64_VCPU_ENDING_RID_OFFSET,r17
+       adds r23=IA64_VCPU_VHPT_PG_SHIFT_OFFSET,r17
        ;; 
-       ld4 r22=[r21] // r22 = current->starting_rid
+       ld4 r21=[r21] // r21 = current->starting_rid
        extr.u r26=r8,IA64_RR_RID,IA64_RR_RID_LEN       // r26 = r8.rid
        extr.u r27=r9,IA64_RR_RID,IA64_RR_RID_LEN       // r27 = r9.rid
-       ld4 r23=[r25] // r23 = current->ending_rid
+       ld4 r22=[r22] // r22 = current->ending_rid
        extr.u r28=r10,IA64_RR_RID,IA64_RR_RID_LEN      // r28 = r10.rid
        extr.u r29=r11,IA64_RR_RID,IA64_RR_RID_LEN      // r29 = r11.rid
        adds r24=IA64_VCPU_META_SAVED_RR0_OFFSET,r17
        extr.u r30=r14,IA64_RR_RID,IA64_RR_RID_LEN      // r30 = r14.rid
+       ld1 r23=[r23] // r23 = current->vhpt_pg_shift
        ;; 
-       add r16=r26,r22
-       add r17=r27,r22
-       add r19=r28,r22
-       add r20=r29,r22
-       add r21=r30,r22 
+       add r16=r26,r21
+       add r17=r27,r21
+       add r19=r28,r21
+       add r20=r29,r21
+       add r21=r30,r21 
+       dep.z r23=r23,IA64_RR_PS,IA64_RR_PS_LEN         // r23 = rr.ps
        ;; 
-       cmp.geu p6,p0=r16,r23   // if r8.rid + starting_rid >= ending_rid
-       cmp.geu p7,p0=r17,r23   // if r9.rid + starting_rid >= ending_rid
-       cmp.geu p8,p0=r19,r23   // if r10.rid + starting_rid >= ending_rid
+       cmp.geu p6,p0=r16,r22   // if r8.rid + starting_rid >= ending_rid
+       cmp.geu p7,p0=r17,r22   // if r9.rid + starting_rid >= ending_rid
+       cmp.geu p8,p0=r19,r22   // if r10.rid + starting_rid >= ending_rid
 (p6)   br.cond.spnt.few 1f     // this is an error, but just ignore/return
 (p7)   br.cond.spnt.few 1f     // this is an error, but just ignore/return
-       cmp.geu p9,p0=r20,r23   // if r11.rid + starting_rid >= ending_rid
+       cmp.geu p9,p0=r20,r22   // if r11.rid + starting_rid >= ending_rid
 (p8)   br.cond.spnt.few 1f     // this is an error, but just ignore/return
 (p9)   br.cond.spnt.few 1f     // this is an error, but just ignore/return
-       cmp.geu p10,p0=r21,r23  // if r14.rid + starting_rid >= ending_rid
+       cmp.geu p10,p0=r21,r22  // if r14.rid + starting_rid >= ending_rid
 (p10)  br.cond.spnt.few 1f     // this is an error, but just ignore/return
-       
+       dep r23=-1,r23,0,1      // add rr.ve
+       ;;
        mov r25=1
        adds r22=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18
        ;;
@@ -1715,13 +1720,11 @@ ENTRY(hyper_set_rr0_to_rr4)
        extr.u r27=r16,0,8
        extr.u r28=r16,8,8
        extr.u r29=r16,16,8;;
-       dep.z r23=PAGE_SHIFT,2,6;;
-       dep r23=-1,r23,0,1;;    // mangling is swapping bytes 1 & 3
-       dep r23=r27,r23,24,8;;
-       dep r23=r28,r23,16,8;;
-       dep r23=r29,r23,8,8;; 
-       st8 [r24]=r23           // save for metaphysical
-       mov rr[r26]=r23
+       dep r25=r27,r23,24,8;;  // mangling is swapping bytes 1 & 3
+       dep r25=r28,r25,16,8;;
+       dep r25=r29,r25,8,8;; 
+       st8 [r24]=r25           // save for metaphysical
+       mov rr[r26]=r25
        dv_serialize_data
 
        // rr1
@@ -1730,12 +1733,10 @@ ENTRY(hyper_set_rr0_to_rr4)
        extr.u r27=r17,0,8
        extr.u r28=r17,8,8
        extr.u r29=r17,16,8;;
-       dep.z r23=PAGE_SHIFT,2,6;;
-       dep r23=-1,r23,0,1;;    // mangling is swapping bytes 1 & 3
-       dep r23=r27,r23,24,8;;
-       dep r23=r28,r23,16,8;;
-       dep r23=r29,r23,8,8;; 
-       mov rr[r26]=r23
+       dep r25=r27,r23,24,8;;  // mangling is swapping bytes 1 & 3
+       dep r25=r28,r25,16,8;;
+       dep r25=r29,r25,8,8;; 
+       mov rr[r26]=r25
        dv_serialize_data
 
        // rr2
@@ -1744,12 +1745,10 @@ ENTRY(hyper_set_rr0_to_rr4)
        extr.u r27=r19,0,8
        extr.u r28=r19,8,8
        extr.u r29=r19,16,8;;
-       dep.z r23=PAGE_SHIFT,2,6;;
-       dep r23=-1,r23,0,1;;    // mangling is swapping bytes 1 & 3
-       dep r23=r27,r23,24,8;;
-       dep r23=r28,r23,16,8;;
-       dep r23=r29,r23,8,8;; 
-       mov rr[r26]=r23
+       dep r25=r27,r23,24,8;;  // mangling is swapping bytes 1 & 3
+       dep r25=r28,r25,16,8;;
+       dep r25=r29,r25,8,8;; 
+       mov rr[r26]=r25
        dv_serialize_data
 
        // rr3
@@ -1759,12 +1758,10 @@ ENTRY(hyper_set_rr0_to_rr4)
        extr.u r27=r20,0,8
        extr.u r28=r20,8,8
        extr.u r29=r20,16,8;;
-       dep.z r23=PAGE_SHIFT,2,6;;
-       dep r23=-1,r23,0,1;;    // mangling is swapping bytes 1 & 3
-       dep r23=r27,r23,24,8;;
-       dep r23=r28,r23,16,8;;
-       dep r23=r29,r23,8,8;; 
-       mov rr[r26]=r23
+       dep r25=r27,r23,24,8;;  // mangling is swapping bytes 1 & 3
+       dep r25=r28,r25,16,8;;
+       dep r25=r29,r25,8,8;; 
+       mov rr[r26]=r25
        dv_serialize_data
        
        // rr4
@@ -1774,49 +1771,43 @@ ENTRY(hyper_set_rr0_to_rr4)
        extr.u r27=r21,0,8
        extr.u r28=r21,8,8
        extr.u r29=r21,16,8;;
-       dep.z r23=PAGE_SHIFT,2,6;;
-       dep r23=-1,r23,0,1;;    // mangling is swapping bytes 1 & 3
-       dep r23=r27,r23,24,8;;
-       dep r23=r28,r23,16,8;;
-       dep r23=r29,r23,8,8;; 
-       mov rr[r26]=r23
+       dep r25=r27,r23,24,8;;  // mangling is swapping bytes 1 & 3
+       dep r25=r28,r25,16,8;;
+       dep r25=r29,r25,8,8;; 
+       mov rr[r26]=r25
        dv_serialize_data
 #else
        // shuffled version
        // rr0
        // uses r27, r28, r29 for mangling
-       //      r23           for mangled value
+       //      r25           for mangled value
        st8 [r22]=r8, 8 // current->rrs[0] = r8
        mov r26=0       // r26=0x0000000000000000
        extr.u r27=r16,0,8
        extr.u r28=r16,8,8
-       extr.u r29=r16,16,8
-       dep.z r23=PAGE_SHIFT,2,6;;
-       dep r23=-1,r23,0,1;;    // mangling is swapping bytes 1 & 3
-       extr.u r25=r17,0,8
-       dep r23=r27,r23,24,8;;
-       dep r23=r28,r23,16,8;;
-       dep r23=r29,r23,8,8;; 
-       st8 [r24]=r23           // save for metaphysical
-       mov rr[r26]=r23
+       extr.u r29=r16,16,8;;
+       dep r25=r27,r23,24,8;;  // mangling is swapping bytes 1 & 3
+       dep r25=r28,r25,16,8;;
+       dep r25=r29,r25,8,8;; 
+       st8 [r24]=r25           // save for metaphysical
+       mov rr[r26]=r25
        dv_serialize_data
 
        // r16, r24, r25 is usable.
        // rr1
        // uses r25, r28, r29 for mangling
-       //      r23           for mangled value
+       //      r25           for mangled value
+       extr.u r25=r17,0,8
        extr.u r28=r17,8,8
        st8 [r22]=r9, 8 // current->rrs[1] = r9
        extr.u r29=r17,16,8 ;; 
-       dep.z r23=PAGE_SHIFT,2,6;;
        add r26=r26,r30 // r26 = 0x2000000000000000
-       dep r23=-1,r23,0,1;;    // mangling is swapping bytes 1 & 3
        extr.u r24=r19,8,8
        extr.u r16=r19,0,8
-       dep r23=r25,r23,24,8;;
-       dep r23=r28,r23,16,8;;
-       dep r23=r29,r23,8,8;; 
-       mov rr[r26]=r23
+       dep r25=r25,r23,24,8;;  // mangling is swapping bytes 1 & 3
+       dep r25=r28,r25,16,8;;
+       dep r25=r29,r25,8,8;; 
+       mov rr[r26]=r25
        dv_serialize_data
 
        // r16, r17, r24, r25 is usable
@@ -1826,10 +1817,8 @@ ENTRY(hyper_set_rr0_to_rr4)
        extr.u r29=r19,16,8
        extr.u r27=r20,0,8
        st8 [r22]=r10, 8 // current->rrs[2] = r10
-       dep.z r17=PAGE_SHIFT,2,6;;
        add r26=r26,r30 // r26 = 0x4000000000000000     
-       dep r17=-1,r17,0,1;;    // mangling is swapping bytes 1 & 3
-       dep r17=r16,r17,24,8;;
+       dep r17=r16,r23,24,8;;  // mangling is swapping bytes 1 & 3
        dep r17=r24,r17,16,8;;
        dep r17=r29,r17,8,8;; 
        mov rr[r26]=r17
@@ -1838,18 +1827,16 @@ ENTRY(hyper_set_rr0_to_rr4)
        // r16, r17, r19, r24, r25 is usable
        // rr3
        // uses r27, r28, r29 for mangling
-       //      r23           for mangled value
+       //      r25           for mangled value
        extr.u r28=r20,8,8
        extr.u r29=r20,16,8
        st8 [r22]=r11, 8 // current->rrs[3] = r11
        extr.u r16=r21,0,8
-       dep.z r23=PAGE_SHIFT,2,6;;
        add r26=r26,r30 // r26 = 0x6000000000000000
-       dep r23=-1,r23,0,1;;    // mangling is swapping bytes 1 & 3
-       dep r23=r27,r23,24,8;;
-       dep r23=r28,r23,16,8;;
-       dep r23=r29,r23,8,8;; 
-       mov rr[r26]=r23
+       dep r25=r27,r23,24,8;;  // mangling is swapping bytes 1 & 3
+       dep r25=r28,r25,16,8;;
+       dep r25=r29,r25,8,8;; 
+       mov rr[r26]=r25
        dv_serialize_data
        
        // r16, r17, r19, r20, r24, r25
@@ -1859,10 +1846,8 @@ ENTRY(hyper_set_rr0_to_rr4)
        extr.u r17=r21,8,8
        extr.u r24=r21,16,8
        st8 [r22]=r14 // current->rrs[4] = r14
-       dep.z r25=PAGE_SHIFT,2,6;;
        add r26=r26,r30 // r26 = 0x8000000000000000
-       dep r25=-1,r25,0,1;;    // mangling is swapping bytes 1 & 3
-       dep r25=r16,r25,24,8;;
+       dep r25=r16,r23,24,8;;  // mangling is swapping bytes 1 & 3
        dep r25=r17,r25,16,8;;
        dep r25=r24,r25,8,8;; 
        mov rr[r26]=r25
@@ -2024,26 +2009,30 @@ ENTRY(hyper_ptc_ga)
        adds r21=1,r21;;
        st4 [r20]=r21;;
 #endif
+       movl r21=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+       ld8 r21=[r21];;
+       adds r22=IA64_VCPU_VHPT_PG_SHIFT_OFFSET,r21
        mov r28=r8
        extr.u r19=r9,2,6               // addr_range=1<<((r9&0xfc)>>2)
        mov r20=1
        shr.u r24=r8,61
-       addl r27=56,r0                  // PAGE_SHIFT<<2 (for ptc.ga)
        movl r26=0x8000000000000000     // INVALID_TI_TAG
        mov r30=ar.lc
        ;;
+       ld1 r22=[r22]                   // current->arch.vhpt_pg_shift
        shl r19=r20,r19
        cmp.eq p7,p0=7,r24
 (p7)   br.spnt.many dispatch_break_fault ;;    // slow way for rr7
        ;;
+       shl r27=r22,2                   // vhpt_pg_shift<<2 (for ptc.ga)
+       shr.u r23=r19,r22               // repeat loop for n pages
        cmp.le p7,p0=r19,r0             // skip flush if size<=0
 (p7)   br.cond.dpnt 2f ;;
-       extr.u r24=r19,0,PAGE_SHIFT
-       shr.u r23=r19,PAGE_SHIFT ;;     // repeat loop for n pages
-       cmp.ne p7,p0=r24,r0 ;;
+       shl r24=r23,r22;;
+       cmp.ne p7,p0=r24,r23 ;;
 (p7)   adds r23=1,r23 ;;               // n_pages<size<n_pages+1? extra iter
        mov ar.lc=r23
-       movl r29=PAGE_SIZE;;
+       shl r29=r20,r22;;               // page_size
 1:
        thash r25=r28 ;;
        adds r25=16,r25 ;;
@@ -2060,10 +2049,8 @@ 2:
        mov ar.lc=r30 ;;
        mov r29=cr.ipsr
        mov r30=cr.iip;;
-       movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
-       ld8 r27=[r27];;
-       adds r25=IA64_VCPU_DTLB_OFFSET,r27
-       adds r26=IA64_VCPU_ITLB_OFFSET,r27;;
+       adds r25=IA64_VCPU_DTLB_OFFSET,r21
+       adds r26=IA64_VCPU_ITLB_OFFSET,r21;;
        ld8 r24=[r25]
        ld8 r27=[r26] ;;
        and r24=-2,r24
@@ -2110,10 +2097,14 @@ hyper_itc_d:
        br.sptk.many dispatch_break_fault ;;
 #else
        // ensure itir.ps >= xen's pagesize
+       movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+       ld8 r27=[r27];;
+       adds r22=IA64_VCPU_VHPT_PG_SHIFT_OFFSET,r27
        adds r23=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 ;;
+       ld1 r22=[r22]
        ld8 r23=[r23];;
        extr.u r24=r23,IA64_ITIR_PS,IA64_ITIR_PS_LEN;;          // r24==logps
-       cmp.gt p7,p0=PAGE_SHIFT,r24
+       cmp.gt p7,p0=r22,r24
 (p7)   br.spnt.many dispatch_break_fault ;;
        adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;;
        ld8 r21=[r21];;
@@ -2121,8 +2112,6 @@ hyper_itc_d:
        extr.u r21=r21,61,3;;
        cmp.eq p7,p0=r21,r0
 (p7)   br.spnt.many dispatch_break_fault ;;
-       movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
-       ld8 r27=[r27];;
        adds r27=IA64_VCPU_DOMAIN_OFFSET,r27;;
        ld8 r27=[r27]
 // FIXME: is the global var dom0 always pinned? assume so for now
@@ -2160,18 +2149,24 @@ END(hyper_itc)
 //     r31 == pr
 ENTRY(fast_insert)
        // translate_domain_pte(r16=pteval,PSCB(ifa)=address,r24=itir)
-       mov r19=1;;
-       shl r20=r19,r24;;
+       mov r19=1
+       movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+       shl r20=r19,r24
+       ld8 r27=[r27];;
+       adds r23=IA64_VCPU_VHPT_PG_SHIFT_OFFSET,r27
        adds r20=-1,r20         // r20 == mask
        movl r19=_PAGE_PPN_MASK;;
+       ld1 r23=[r23]
+       mov r25=-1
        and r22=r16,r19;;       // r22 == pteval & _PAGE_PPN_MASK
        andcm r19=r22,r20
+       shl r25=r25,r23         // -1 << current->arch.vhpt_pg_shift
        adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;;
        ld8 r21=[r21];;
        and r20=r21,r20;;
        or r19=r19,r20;;        // r19 == mpaddr
 // FIXME: for now, just do domain0 and skip mpaddr range checks
-       dep r20=r0,r19,0,PAGE_SHIFT
+       and r20=r25,r19
        movl r21=PAGE_PHYS ;;
        or r20=r20,r21 ;;       // r20==return value from lookup_domain_mpa
        // r16=pteval,r20=pteval2
@@ -2208,8 +2203,6 @@ ENTRY(fast_insert)
        // vcpu_set_tr_entry(trp,r22=pte|1,r24=itir,r23=ifa)
        // TR_ENTRY = {page_flags,itir,addr,rid}
        tbit.z p6,p7=r17,0
-       movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
-       ld8 r27=[r27];;
        adds r28=IA64_VCPU_STARTING_RID_OFFSET,r27
 (p6)   adds r27=IA64_VCPU_DTLB_OFFSET,r27
 (p7)   adds r27=IA64_VCPU_ITLB_OFFSET,r27;;
diff -r 6b0c965e95a6 -r 2f58face717c xen/arch/ia64/xen/mm.c
--- a/xen/arch/ia64/xen/mm.c    Thu Aug  9 08:48:00 2007 +0200
+++ b/xen/arch/ia64/xen/mm.c    Thu Aug 16 11:33:27 2007 +0200
@@ -447,7 +447,7 @@ gmfn_to_mfn_foreign(struct domain *d, un
 // given a domain virtual address, pte and pagesize, extract the metaphysical
 // address, convert the pte for a physical address for (possibly different)
 // Xen PAGE_SIZE and return modified pte.  (NOTE: TLB insert should use
-// PAGE_SIZE!)
+// current->arch.vhpt_pg_shift!)
 u64 translate_domain_pte(u64 pteval, u64 address, u64 itir__, u64* itir,
                          struct p2m_entry* entry)
 {
@@ -457,20 +457,25 @@ u64 translate_domain_pte(u64 pteval, u64
        u64 arflags;
        u64 arflags2;
        u64 maflags2;
+       u64 ps;
 
        pteval &= ((1UL << 53) - 1);// ignore [63:53] bits
 
        // FIXME address had better be pre-validated on insert
        mask = ~itir_mask(_itir.itir);
        mpaddr = ((pteval & _PAGE_PPN_MASK) & ~mask) | (address & mask);
-
-       if (_itir.ps > PAGE_SHIFT)
-               _itir.ps = PAGE_SHIFT;
+       ps = current->arch.vhpt_pg_shift ? current->arch.vhpt_pg_shift :
+                                          PAGE_SHIFT;
+
+       if (_itir.ps > ps)
+               _itir.ps = ps;
 
        ((ia64_itir_t*)itir)->itir = _itir.itir;/* Copy the whole register. */
        ((ia64_itir_t*)itir)->ps = _itir.ps;    /* Overwrite ps part! */
 
        pteval2 = lookup_domain_mpa(d, mpaddr, entry);
+       if (ps < PAGE_SHIFT)
+               pteval2 |= address & (PAGE_SIZE - 1) & ~((1L << ps) - 1);
 
        /* Check access rights.  */
        arflags  = pteval  & _PAGE_AR_MASK;
@@ -544,10 +549,11 @@ u64 translate_domain_pte(u64 pteval, u64
                        pteval &= ~_PAGE_D;
        }
     
-       /* Ignore non-addr bits of pteval2 and force PL0->2
+       /* Ignore non-addr bits of pteval2 and force PL0->1
           (PL3 is unaffected) */
-       return (pteval & ~_PAGE_PPN_MASK) |
-              (pteval2 & _PAGE_PPN_MASK) | _PAGE_PL_PRIV;
+       return (pteval & ~(_PAGE_PPN_MASK | _PAGE_PL_MASK)) |
+              (pteval2 & _PAGE_PPN_MASK) |
+              (vcpu_pl_adjust(pteval, 7) & _PAGE_PL_MASK);
 }
 
 // given a current domain metaphysical address, return the physical address
diff -r 6b0c965e95a6 -r 2f58face717c xen/arch/ia64/xen/regionreg.c
--- a/xen/arch/ia64/xen/regionreg.c     Thu Aug  9 08:48:00 2007 +0200
+++ b/xen/arch/ia64/xen/regionreg.c     Thu Aug 16 11:33:27 2007 +0200
@@ -72,7 +72,7 @@ static unsigned long allocate_metaphysic
 
        rrv.rrval = 0;  // Or else may see reserved bit fault
        rrv.rid = d->arch.starting_mp_rid + n;
-       rrv.ps = PAGE_SHIFT;
+       rrv.ps = PAGE_SHIFT;    // only used at domain creation
        rrv.ve = 0;
        /* Mangle metaphysical rid */
        rrv.rrval = vmMangleRID(rrv.rrval);
@@ -254,7 +254,7 @@ int set_one_rr(unsigned long rr, unsigne
        memrrv.rrval = rrv.rrval;
        newrrv.rid = newrid;
        newrrv.ve = 1;  // VHPT now enabled for region 7!!
-       newrrv.ps = PAGE_SHIFT;
+       newrrv.ps = v->arch.vhpt_pg_shift;
 
        if (rreg == 0) {
                v->arch.metaphysical_saved_rr0 = vmMangleRID(newrrv.rrval);
@@ -288,7 +288,7 @@ void init_all_rr(struct vcpu *v)
 
        rrv.rrval = 0;
        //rrv.rrval = v->domain->arch.metaphysical_rr0;
-       rrv.ps = PAGE_SHIFT;
+       rrv.ps = v->arch.vhpt_pg_shift;
        rrv.ve = 1;
 if (!v->vcpu_info) { panic("Stopping in init_all_rr\n"); }
        VCPU(v,rrs[0]) = -1;
diff -r 6b0c965e95a6 -r 2f58face717c xen/arch/ia64/xen/vcpu.c
--- a/xen/arch/ia64/xen/vcpu.c  Thu Aug  9 08:48:00 2007 +0200
+++ b/xen/arch/ia64/xen/vcpu.c  Thu Aug 16 11:33:27 2007 +0200
@@ -1697,7 +1697,7 @@ IA64FAULT vcpu_translate(VCPU * vcpu, u6
                } else {
                        *pteval = (address & _PAGE_PPN_MASK) |
                                __DIRTY_BITS | _PAGE_PL_PRIV | _PAGE_AR_RWX;
-                       *itir = PAGE_SHIFT << 2;
+                       *itir = vcpu->arch.vhpt_pg_shift << 2;
                        perfc_incr(phys_translate);
                        return IA64_NO_FAULT;
                }
@@ -2290,13 +2290,29 @@ IA64FAULT vcpu_set_dtr(VCPU * vcpu, u64 
  VCPU translation cache access routines
 **************************************************************************/
 
+static void
+vcpu_rebuild_vhpt(VCPU * vcpu, u64 ps)
+{
+#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT
+       printk("vhpt rebuild: using page_shift %d\n", (int)ps);
+       vcpu->arch.vhpt_pg_shift = ps;
+       vcpu_purge_tr_entry(&PSCBX(vcpu, dtlb));
+       vcpu_purge_tr_entry(&PSCBX(vcpu, itlb));
+       local_vhpt_flush();
+       load_region_regs(vcpu);
+#else
+       panic_domain(NULL, "domain trying to use smaller page size!\n");
+#endif
+}
+
 void
 vcpu_itc_no_srlz(VCPU * vcpu, u64 IorD, u64 vaddr, u64 pte,
                  u64 mp_pte, u64 itir, struct p2m_entry *entry)
 {
        ia64_itir_t _itir = {.itir = itir};
        unsigned long psr;
-       unsigned long ps = (vcpu->domain == dom0) ? _itir.ps : PAGE_SHIFT;
+       unsigned long ps = (vcpu->domain == dom0) ? _itir.ps :
+                                                   vcpu->arch.vhpt_pg_shift;
 
        check_xen_space_overlap("itc", vaddr, 1UL << _itir.ps);
 
@@ -2305,7 +2321,7 @@ vcpu_itc_no_srlz(VCPU * vcpu, u64 IorD, 
                panic_domain(NULL, "vcpu_itc_no_srlz: domain trying to use "
                             "smaller page size!\n");
 
-       BUG_ON(_itir.ps > PAGE_SHIFT);
+       BUG_ON(_itir.ps > vcpu->arch.vhpt_pg_shift);
        vcpu_tlb_track_insert_or_dirty(vcpu, vaddr, entry);
        psr = ia64_clear_ic();
        pte &= ~(_PAGE_RV2 | _PAGE_RV1);        // Mask out the reserved bits.
@@ -2318,7 +2334,7 @@ vcpu_itc_no_srlz(VCPU * vcpu, u64 IorD, 
                // addresses never get flushed.  More work needed if this
                // ever happens.
 //printk("vhpt_insert(%p,%p,%p)\n",vaddr,pte,1L<<logps);
-               if (_itir.ps > PAGE_SHIFT)
+               if (_itir.ps > vcpu->arch.vhpt_pg_shift)
                        vhpt_multiple_insert(vaddr, pte, _itir.itir);
                else
                        vhpt_insert(vaddr, pte, _itir.itir);
@@ -2326,7 +2342,7 @@ vcpu_itc_no_srlz(VCPU * vcpu, u64 IorD, 
        // even if domain pagesize is larger than PAGE_SIZE, just put
        // PAGE_SIZE mapping in the vhpt for now, else purging is complicated
        else {
-               _itir.ps = PAGE_SHIFT;
+               _itir.ps = vcpu->arch.vhpt_pg_shift;
                vhpt_insert(vaddr, pte, _itir.itir);
        }
 }
@@ -2338,12 +2354,11 @@ IA64FAULT vcpu_itc_d(VCPU * vcpu, u64 pt
        struct p2m_entry entry;
        ia64_itir_t _itir = {.itir = itir};
 
-       if (_itir.ps < PAGE_SHIFT)
-               panic_domain(NULL, "vcpu_itc_d: domain trying to use "
-                            "smaller page size!\n");
+       if (_itir.ps < vcpu->arch.vhpt_pg_shift)
+               vcpu_rebuild_vhpt(vcpu, _itir.ps);
 
  again:
-       //itir = (itir & ~0xfc) | (PAGE_SHIFT<<2); // ignore domain's pagesize
+       //itir = (itir & ~0xfc) | (vcpu->arch.vhpt_pg_shift<<2); // ign dom pgsz
        pteval = translate_domain_pte(pte, ifa, itir, &(_itir.itir), &entry);
        if (!pteval)
                return IA64_ILLOP_FAULT;
@@ -2367,11 +2382,11 @@ IA64FAULT vcpu_itc_i(VCPU * vcpu, u64 pt
        struct p2m_entry entry;
        ia64_itir_t _itir = {.itir = itir};
 
-       if (_itir.ps < PAGE_SHIFT)
-               panic_domain(NULL, "vcpu_itc_i: domain trying to use "
-                            "smaller page size!\n");
+       if (_itir.ps < vcpu->arch.vhpt_pg_shift)
+               vcpu_rebuild_vhpt(vcpu, _itir.ps);
+
       again:
-       //itir = (itir & ~0xfc) | (PAGE_SHIFT<<2); // ignore domain's pagesize
+       //itir = (itir & ~0xfc) | (vcpu->arch.vhpt_pg_shift<<2); // ign dom pgsz
        pteval = translate_domain_pte(pte, ifa, itir, &(_itir.itir), &entry);
        if (!pteval)
                return IA64_ILLOP_FAULT;
diff -r 6b0c965e95a6 -r 2f58face717c xen/arch/ia64/xen/vhpt.c
--- a/xen/arch/ia64/xen/vhpt.c  Thu Aug  9 08:48:00 2007 +0200
+++ b/xen/arch/ia64/xen/vhpt.c  Thu Aug 16 11:33:27 2007 +0200
@@ -88,15 +88,16 @@ void vhpt_multiple_insert(unsigned long 
 void vhpt_multiple_insert(unsigned long vaddr, unsigned long pte,
                           unsigned long itir)
 {
+       unsigned char ps = current->arch.vhpt_pg_shift;
        ia64_itir_t _itir = {.itir = itir};
        unsigned long mask = (1L << _itir.ps) - 1;
        int i;
 
-       if (_itir.ps-PAGE_SHIFT > 10 && !running_on_sim) {
+       if (_itir.ps-ps > 10 && !running_on_sim) {
                // if this happens, we may want to revisit this algorithm
                panic("vhpt_multiple_insert:logps-PAGE_SHIFT>10,spinning..\n");
        }
-       if (_itir.ps-PAGE_SHIFT > 2) {
+       if (_itir.ps-ps > 2) {
                // FIXME: Should add counter here to see how often this
                //  happens (e.g. for 16MB pages!) and determine if it
                //  is a performance problem.  On a quick look, it takes
@@ -111,9 +112,9 @@ void vhpt_multiple_insert(unsigned long 
        }
        vaddr &= ~mask;
        pte = ((pte & _PFN_MASK) & ~mask) | (pte & ~_PFN_MASK);
-       for (i = 1L << (_itir.ps-PAGE_SHIFT); i > 0; i--) {
+       for (i = 1L << (_itir.ps-ps); i > 0; i--) {
                vhpt_insert(vaddr, pte, _itir.itir);
-               vaddr += PAGE_SIZE;
+               vaddr += (1L << ps);
        }
 }
 
@@ -291,6 +292,7 @@ __flush_vhpt_range(unsigned long vhpt_ma
 __flush_vhpt_range(unsigned long vhpt_maddr, u64 vadr, u64 addr_range)
 {
        void *vhpt_base = __va(vhpt_maddr);
+       u64 pgsz = 1L << current->arch.vhpt_pg_shift;
 
        while ((long)addr_range > 0) {
                /* Get the VHPT entry.  */
@@ -298,8 +300,8 @@ __flush_vhpt_range(unsigned long vhpt_ma
                        __va_ul(vcpu_vhpt_maddr(current));
                struct vhpt_lf_entry *v = vhpt_base + off;
                v->ti_tag = INVALID_TI_TAG;
-               addr_range -= PAGE_SIZE;
-               vadr += PAGE_SIZE;
+               addr_range -= pgsz;
+               vadr += pgsz;
        }
 }
 
@@ -362,7 +364,8 @@ void domain_flush_vtlb_range (struct dom
        // ptc.ga has release semantics.
 
        /* ptc.ga  */
-       platform_global_tlb_purge(vadr, vadr + addr_range, PAGE_SHIFT);
+       platform_global_tlb_purge(vadr, vadr + addr_range,
+                                 current->arch.vhpt_pg_shift);
        perfc_incr(domain_flush_vtlb_range);
 }
 
@@ -381,6 +384,7 @@ __domain_flush_vtlb_track_entry(struct d
        int cpu;
        int vcpu;
        int local_purge = 1;
+       unsigned char ps = current->arch.vhpt_pg_shift;
        
        BUG_ON((vaddr >> VRN_SHIFT) != VRN7);
        /*
@@ -413,7 +417,7 @@ __domain_flush_vtlb_track_entry(struct d
                                continue;
 
                        /* Invalidate VHPT entries.  */
-                       vcpu_flush_vhpt_range(v, vaddr, PAGE_SIZE);
+                       vcpu_flush_vhpt_range(v, vaddr, 1L << ps);
 
                        /*
                         * current->processor == v->processor
@@ -427,7 +431,7 @@ __domain_flush_vtlb_track_entry(struct d
        } else {
                for_each_cpu_mask(cpu, entry->pcpu_dirty_mask) {
                        /* Invalidate VHPT entries.  */
-                       cpu_flush_vhpt_range(cpu, vaddr, PAGE_SIZE);
+                       cpu_flush_vhpt_range(cpu, vaddr, 1L << ps);
 
                        if (d->vcpu[cpu] != current)
                                local_purge = 0;
@@ -436,12 +440,11 @@ __domain_flush_vtlb_track_entry(struct d
 
        /* ptc.ga  */
        if (local_purge) {
-               ia64_ptcl(vaddr, PAGE_SHIFT << 2);
+               ia64_ptcl(vaddr, ps << 2);
                perfc_incr(domain_flush_vtlb_local);
        } else {
                /* ptc.ga has release semantics. */
-               platform_global_tlb_purge(vaddr, vaddr + PAGE_SIZE,
-                                         PAGE_SHIFT);
+               platform_global_tlb_purge(vaddr, vaddr + (1L << ps), ps);
                perfc_incr(domain_flush_vtlb_global);
        }
 
diff -r 6b0c965e95a6 -r 2f58face717c xen/include/asm-ia64/domain.h
--- a/xen/include/asm-ia64/domain.h     Thu Aug  9 08:48:00 2007 +0200
+++ b/xen/include/asm-ia64/domain.h     Thu Aug 16 11:33:27 2007 +0200
@@ -246,6 +246,7 @@ struct arch_vcpu {
 #define XEN_IA64_PKR_IN_USE    0x1             /* If psr.pk = 1 was set. */
     unsigned char pkr_flags;
 
+    unsigned char       vhpt_pg_shift;         /* PAGE_SHIFT or less */
 #ifdef CONFIG_XEN_IA64_PERVCPU_VHPT
     PTA                 pta;
     unsigned long       vhpt_maddr;
_______________________________________________
Xen-ia64-devel mailing list
Xen-ia64-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ia64-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.