[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-ia64-devel] [PATCH] support of 4k page size for individual guests
Hi, this is the patch needed to support 4k (and 8k) pages for individual guests (currently PV only). "normal" domU's should not be affected, as the per-vcpu vhpt is reconfigured only if a domU uses a page size less than PAGE_SIZE. I haven't touched grant pages yet, I think they should work on PAGE_SIZE base as before, but I didn't check it. Tested by compile, boot dom0, boot domU (Linux) and boot domU (BS2000 - our mainframe OS using 4k pages) Juergen -- Juergen Gross Principal Developer IP SW OS6 Telephone: +49 (0) 89 636 47950 Fujitsu Siemens Computers e-mail: juergen.gross@xxxxxxxxxxxxxxxxxxx Otto-Hahn-Ring 6 Internet: www.fujitsu-siemens.com D-81739 Muenchen Company details: www.fujitsu-siemens.com/imprint.html # HG changeset patch # User juergen.gross@xxxxxxxxxxxxxxxxxx # Node ID 2f58face717cefcaaa9791994ab978f975b14573 # Parent 6b0c965e95a668bf65d475f519b254107cce21a3 4k pagesize support per vcpu Signed-off-by: Juergen Gross juergen.gross@xxxxxxxxxxxxxxxxxxx diff -r 6b0c965e95a6 -r 2f58face717c xen/arch/ia64/asm-offsets.c --- a/xen/arch/ia64/asm-offsets.c Thu Aug 9 08:48:00 2007 +0200 +++ b/xen/arch/ia64/asm-offsets.c Thu Aug 16 11:33:27 2007 +0200 @@ -72,6 +72,7 @@ void foo(void) DEFINE(IA64_VCPU_DOMAIN_ITM_LAST_OFFSET, offsetof (struct vcpu, arch.domain_itm_last)); DEFINE(IA64_VCPU_ITLB_OFFSET, offsetof (struct vcpu, arch.itlb)); DEFINE(IA64_VCPU_DTLB_OFFSET, offsetof (struct vcpu, arch.dtlb)); + DEFINE(IA64_VCPU_VHPT_PG_SHIFT_OFFSET, offsetof (struct vcpu, arch.vhpt_pg_shift)); BLANK(); diff -r 6b0c965e95a6 -r 2f58face717c xen/arch/ia64/xen/domain.c --- a/xen/arch/ia64/xen/domain.c Thu Aug 9 08:48:00 2007 +0200 +++ b/xen/arch/ia64/xen/domain.c Thu Aug 16 11:33:27 2007 +0200 @@ -445,6 +445,7 @@ int vcpu_initialise(struct vcpu *v) v->arch.ending_rid = d->arch.ending_rid; v->arch.breakimm = d->arch.breakimm; v->arch.last_processor = INVALID_PROCESSOR; + v->arch.vhpt_pg_shift = PAGE_SHIFT; } if (!VMX_DOMAIN(v)) diff -r 6b0c965e95a6 -r 2f58face717c xen/arch/ia64/xen/faults.c --- a/xen/arch/ia64/xen/faults.c Thu Aug 9 08:48:00 2007 +0200 +++ b/xen/arch/ia64/xen/faults.c Thu Aug 16 11:33:27 2007 +0200 @@ -239,6 +239,8 @@ void ia64_do_page_fault(unsigned long ad (regs->cr_ipsr & ~DELIVER_PSR_CLR) | DELIVER_PSR_SET; regs->cr_ipsr = vcpu_pl_adjust(regs->cr_ipsr, IA64_PSR_CPL0_BIT); + if (PSCB(current, dcr) & IA64_DCR_BE) + regs->cr_ipsr |= IA64_PSR_BE; if (PSCB(current, hpsr_dfh)) regs->cr_ipsr |= IA64_PSR_DFH; @@ -741,7 +743,8 @@ ia64_shadow_fault(unsigned long ifa, uns pte = vlfe->page_flags; if (vlfe->ti_tag == ia64_ttag(ifa)) { /* The VHPT entry is valid. */ - gpfn = get_gpfn_from_mfn((pte & _PAGE_PPN_MASK) >> PAGE_SHIFT); + gpfn = get_gpfn_from_mfn((pte & _PAGE_PPN_MASK) >> + v->arch.vhpt_pg_shift); BUG_ON(gpfn == INVALID_M2P_ENTRY); } else { unsigned long itir, iha; @@ -757,10 +760,10 @@ ia64_shadow_fault(unsigned long ifa, uns /* Try again! */ if (fault != IA64_NO_FAULT) { /* This will trigger a dtlb miss. */ - ia64_ptcl(ifa, PAGE_SHIFT << 2); - return; - } - gpfn = ((pte & _PAGE_PPN_MASK) >> PAGE_SHIFT); + ia64_ptcl(ifa, v->arch.vhpt_pg_shift << 2); + return; + } + gpfn = ((pte & _PAGE_PPN_MASK) >> v->arch.vhpt_pg_shift); if (pte & _PAGE_D) pte |= _PAGE_VIRT_D; } @@ -788,7 +791,7 @@ ia64_shadow_fault(unsigned long ifa, uns /* Purge the TC locally. It will be reloaded from the VHPT iff the VHPT entry is still valid. */ - ia64_ptcl(ifa, PAGE_SHIFT << 2); + ia64_ptcl(ifa, v->arch.vhpt_pg_shift << 2); atomic64_inc(&d->arch.shadow_fault_count); } else { @@ -800,6 +803,6 @@ ia64_shadow_fault(unsigned long ifa, uns /* We don't know wether or not the fault must be reflected. The VHPT entry is not valid. */ /* FIXME: in metaphysical mode, we could do an ITC now. */ - ia64_ptcl(ifa, PAGE_SHIFT << 2); - } -} + ia64_ptcl(ifa, v->arch.vhpt_pg_shift << 2); + } +} diff -r 6b0c965e95a6 -r 2f58face717c xen/arch/ia64/xen/hyperprivop.S --- a/xen/arch/ia64/xen/hyperprivop.S Thu Aug 9 08:48:00 2007 +0200 +++ b/xen/arch/ia64/xen/hyperprivop.S Thu Aug 16 11:33:27 2007 +0200 @@ -1604,26 +1604,27 @@ ENTRY(hyper_set_rr) extr.u r26=r9,IA64_RR_RID,IA64_RR_RID_LEN // r26 = r9.rid movl r20=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; ld8 r20=[r20];; - adds r21=IA64_VCPU_STARTING_RID_OFFSET,r20;; - ld4 r22=[r21];; - adds r21=IA64_VCPU_ENDING_RID_OFFSET,r20;; - ld4 r23=[r21];; - adds r24=IA64_VCPU_META_SAVED_RR0_OFFSET,r20;; + adds r22=IA64_VCPU_STARTING_RID_OFFSET,r20 + adds r23=IA64_VCPU_ENDING_RID_OFFSET,r20 + adds r24=IA64_VCPU_META_SAVED_RR0_OFFSET,r20 + adds r21=IA64_VCPU_VHPT_PG_SHIFT_OFFSET,r20;; + ld4 r22=[r22] + ld4 r23=[r23] + ld1 r21=[r21];; add r22=r26,r22;; cmp.geu p6,p0=r22,r23 // if r9.rid + starting_rid >= ending_rid (p6) br.cond.spnt.few 1f; // this is an error, but just ignore/return - // r21=starting_rid adds r20=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 shl r25=r25,3;; add r20=r20,r25;; st8 [r20]=r9;; // store away exactly what was passed // but adjust value actually placed in rr[r8] // r22 contains adjusted rid, "mangle" it (see regionreg.c) - // and set ps to PAGE_SHIFT and ve to 1 + // and set ps to v->arch.vhpt_pg_shift and ve to 1 extr.u r27=r22,0,8 extr.u r28=r22,8,8 - extr.u r29=r22,16,8;; - dep.z r23=PAGE_SHIFT,IA64_RR_PS,IA64_RR_PS_LEN;; + extr.u r29=r22,16,8 + dep.z r23=r21,IA64_RR_PS,IA64_RR_PS_LEN;; dep r23=-1,r23,0,1;; // mangling is swapping bytes 1 & 3 dep r23=r27,r23,24,8;; dep r23=r28,r23,16,8;; @@ -1673,34 +1674,38 @@ ENTRY(hyper_set_rr0_to_rr4) ld8 r17=[r17];; adds r21=IA64_VCPU_STARTING_RID_OFFSET,r17 - adds r25=IA64_VCPU_ENDING_RID_OFFSET,r17 + adds r22=IA64_VCPU_ENDING_RID_OFFSET,r17 + adds r23=IA64_VCPU_VHPT_PG_SHIFT_OFFSET,r17 ;; - ld4 r22=[r21] // r22 = current->starting_rid + ld4 r21=[r21] // r21 = current->starting_rid extr.u r26=r8,IA64_RR_RID,IA64_RR_RID_LEN // r26 = r8.rid extr.u r27=r9,IA64_RR_RID,IA64_RR_RID_LEN // r27 = r9.rid - ld4 r23=[r25] // r23 = current->ending_rid + ld4 r22=[r22] // r22 = current->ending_rid extr.u r28=r10,IA64_RR_RID,IA64_RR_RID_LEN // r28 = r10.rid extr.u r29=r11,IA64_RR_RID,IA64_RR_RID_LEN // r29 = r11.rid adds r24=IA64_VCPU_META_SAVED_RR0_OFFSET,r17 extr.u r30=r14,IA64_RR_RID,IA64_RR_RID_LEN // r30 = r14.rid + ld1 r23=[r23] // r23 = current->vhpt_pg_shift ;; - add r16=r26,r22 - add r17=r27,r22 - add r19=r28,r22 - add r20=r29,r22 - add r21=r30,r22 + add r16=r26,r21 + add r17=r27,r21 + add r19=r28,r21 + add r20=r29,r21 + add r21=r30,r21 + dep.z r23=r23,IA64_RR_PS,IA64_RR_PS_LEN // r23 = rr.ps ;; - cmp.geu p6,p0=r16,r23 // if r8.rid + starting_rid >= ending_rid - cmp.geu p7,p0=r17,r23 // if r9.rid + starting_rid >= ending_rid - cmp.geu p8,p0=r19,r23 // if r10.rid + starting_rid >= ending_rid + cmp.geu p6,p0=r16,r22 // if r8.rid + starting_rid >= ending_rid + cmp.geu p7,p0=r17,r22 // if r9.rid + starting_rid >= ending_rid + cmp.geu p8,p0=r19,r22 // if r10.rid + starting_rid >= ending_rid (p6) br.cond.spnt.few 1f // this is an error, but just ignore/return (p7) br.cond.spnt.few 1f // this is an error, but just ignore/return - cmp.geu p9,p0=r20,r23 // if r11.rid + starting_rid >= ending_rid + cmp.geu p9,p0=r20,r22 // if r11.rid + starting_rid >= ending_rid (p8) br.cond.spnt.few 1f // this is an error, but just ignore/return (p9) br.cond.spnt.few 1f // this is an error, but just ignore/return - cmp.geu p10,p0=r21,r23 // if r14.rid + starting_rid >= ending_rid + cmp.geu p10,p0=r21,r22 // if r14.rid + starting_rid >= ending_rid (p10) br.cond.spnt.few 1f // this is an error, but just ignore/return - + dep r23=-1,r23,0,1 // add rr.ve + ;; mov r25=1 adds r22=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;; @@ -1715,13 +1720,11 @@ ENTRY(hyper_set_rr0_to_rr4) extr.u r27=r16,0,8 extr.u r28=r16,8,8 extr.u r29=r16,16,8;; - dep.z r23=PAGE_SHIFT,2,6;; - dep r23=-1,r23,0,1;; // mangling is swapping bytes 1 & 3 - dep r23=r27,r23,24,8;; - dep r23=r28,r23,16,8;; - dep r23=r29,r23,8,8;; - st8 [r24]=r23 // save for metaphysical - mov rr[r26]=r23 + dep r25=r27,r23,24,8;; // mangling is swapping bytes 1 & 3 + dep r25=r28,r25,16,8;; + dep r25=r29,r25,8,8;; + st8 [r24]=r25 // save for metaphysical + mov rr[r26]=r25 dv_serialize_data // rr1 @@ -1730,12 +1733,10 @@ ENTRY(hyper_set_rr0_to_rr4) extr.u r27=r17,0,8 extr.u r28=r17,8,8 extr.u r29=r17,16,8;; - dep.z r23=PAGE_SHIFT,2,6;; - dep r23=-1,r23,0,1;; // mangling is swapping bytes 1 & 3 - dep r23=r27,r23,24,8;; - dep r23=r28,r23,16,8;; - dep r23=r29,r23,8,8;; - mov rr[r26]=r23 + dep r25=r27,r23,24,8;; // mangling is swapping bytes 1 & 3 + dep r25=r28,r25,16,8;; + dep r25=r29,r25,8,8;; + mov rr[r26]=r25 dv_serialize_data // rr2 @@ -1744,12 +1745,10 @@ ENTRY(hyper_set_rr0_to_rr4) extr.u r27=r19,0,8 extr.u r28=r19,8,8 extr.u r29=r19,16,8;; - dep.z r23=PAGE_SHIFT,2,6;; - dep r23=-1,r23,0,1;; // mangling is swapping bytes 1 & 3 - dep r23=r27,r23,24,8;; - dep r23=r28,r23,16,8;; - dep r23=r29,r23,8,8;; - mov rr[r26]=r23 + dep r25=r27,r23,24,8;; // mangling is swapping bytes 1 & 3 + dep r25=r28,r25,16,8;; + dep r25=r29,r25,8,8;; + mov rr[r26]=r25 dv_serialize_data // rr3 @@ -1759,12 +1758,10 @@ ENTRY(hyper_set_rr0_to_rr4) extr.u r27=r20,0,8 extr.u r28=r20,8,8 extr.u r29=r20,16,8;; - dep.z r23=PAGE_SHIFT,2,6;; - dep r23=-1,r23,0,1;; // mangling is swapping bytes 1 & 3 - dep r23=r27,r23,24,8;; - dep r23=r28,r23,16,8;; - dep r23=r29,r23,8,8;; - mov rr[r26]=r23 + dep r25=r27,r23,24,8;; // mangling is swapping bytes 1 & 3 + dep r25=r28,r25,16,8;; + dep r25=r29,r25,8,8;; + mov rr[r26]=r25 dv_serialize_data // rr4 @@ -1774,49 +1771,43 @@ ENTRY(hyper_set_rr0_to_rr4) extr.u r27=r21,0,8 extr.u r28=r21,8,8 extr.u r29=r21,16,8;; - dep.z r23=PAGE_SHIFT,2,6;; - dep r23=-1,r23,0,1;; // mangling is swapping bytes 1 & 3 - dep r23=r27,r23,24,8;; - dep r23=r28,r23,16,8;; - dep r23=r29,r23,8,8;; - mov rr[r26]=r23 + dep r25=r27,r23,24,8;; // mangling is swapping bytes 1 & 3 + dep r25=r28,r25,16,8;; + dep r25=r29,r25,8,8;; + mov rr[r26]=r25 dv_serialize_data #else // shuffled version // rr0 // uses r27, r28, r29 for mangling - // r23 for mangled value + // r25 for mangled value st8 [r22]=r8, 8 // current->rrs[0] = r8 mov r26=0 // r26=0x0000000000000000 extr.u r27=r16,0,8 extr.u r28=r16,8,8 - extr.u r29=r16,16,8 - dep.z r23=PAGE_SHIFT,2,6;; - dep r23=-1,r23,0,1;; // mangling is swapping bytes 1 & 3 - extr.u r25=r17,0,8 - dep r23=r27,r23,24,8;; - dep r23=r28,r23,16,8;; - dep r23=r29,r23,8,8;; - st8 [r24]=r23 // save for metaphysical - mov rr[r26]=r23 + extr.u r29=r16,16,8;; + dep r25=r27,r23,24,8;; // mangling is swapping bytes 1 & 3 + dep r25=r28,r25,16,8;; + dep r25=r29,r25,8,8;; + st8 [r24]=r25 // save for metaphysical + mov rr[r26]=r25 dv_serialize_data // r16, r24, r25 is usable. // rr1 // uses r25, r28, r29 for mangling - // r23 for mangled value + // r25 for mangled value + extr.u r25=r17,0,8 extr.u r28=r17,8,8 st8 [r22]=r9, 8 // current->rrs[1] = r9 extr.u r29=r17,16,8 ;; - dep.z r23=PAGE_SHIFT,2,6;; add r26=r26,r30 // r26 = 0x2000000000000000 - dep r23=-1,r23,0,1;; // mangling is swapping bytes 1 & 3 extr.u r24=r19,8,8 extr.u r16=r19,0,8 - dep r23=r25,r23,24,8;; - dep r23=r28,r23,16,8;; - dep r23=r29,r23,8,8;; - mov rr[r26]=r23 + dep r25=r25,r23,24,8;; // mangling is swapping bytes 1 & 3 + dep r25=r28,r25,16,8;; + dep r25=r29,r25,8,8;; + mov rr[r26]=r25 dv_serialize_data // r16, r17, r24, r25 is usable @@ -1826,10 +1817,8 @@ ENTRY(hyper_set_rr0_to_rr4) extr.u r29=r19,16,8 extr.u r27=r20,0,8 st8 [r22]=r10, 8 // current->rrs[2] = r10 - dep.z r17=PAGE_SHIFT,2,6;; add r26=r26,r30 // r26 = 0x4000000000000000 - dep r17=-1,r17,0,1;; // mangling is swapping bytes 1 & 3 - dep r17=r16,r17,24,8;; + dep r17=r16,r23,24,8;; // mangling is swapping bytes 1 & 3 dep r17=r24,r17,16,8;; dep r17=r29,r17,8,8;; mov rr[r26]=r17 @@ -1838,18 +1827,16 @@ ENTRY(hyper_set_rr0_to_rr4) // r16, r17, r19, r24, r25 is usable // rr3 // uses r27, r28, r29 for mangling - // r23 for mangled value + // r25 for mangled value extr.u r28=r20,8,8 extr.u r29=r20,16,8 st8 [r22]=r11, 8 // current->rrs[3] = r11 extr.u r16=r21,0,8 - dep.z r23=PAGE_SHIFT,2,6;; add r26=r26,r30 // r26 = 0x6000000000000000 - dep r23=-1,r23,0,1;; // mangling is swapping bytes 1 & 3 - dep r23=r27,r23,24,8;; - dep r23=r28,r23,16,8;; - dep r23=r29,r23,8,8;; - mov rr[r26]=r23 + dep r25=r27,r23,24,8;; // mangling is swapping bytes 1 & 3 + dep r25=r28,r25,16,8;; + dep r25=r29,r25,8,8;; + mov rr[r26]=r25 dv_serialize_data // r16, r17, r19, r20, r24, r25 @@ -1859,10 +1846,8 @@ ENTRY(hyper_set_rr0_to_rr4) extr.u r17=r21,8,8 extr.u r24=r21,16,8 st8 [r22]=r14 // current->rrs[4] = r14 - dep.z r25=PAGE_SHIFT,2,6;; add r26=r26,r30 // r26 = 0x8000000000000000 - dep r25=-1,r25,0,1;; // mangling is swapping bytes 1 & 3 - dep r25=r16,r25,24,8;; + dep r25=r16,r23,24,8;; // mangling is swapping bytes 1 & 3 dep r25=r17,r25,16,8;; dep r25=r24,r25,8,8;; mov rr[r26]=r25 @@ -2024,26 +2009,30 @@ ENTRY(hyper_ptc_ga) adds r21=1,r21;; st4 [r20]=r21;; #endif + movl r21=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; + ld8 r21=[r21];; + adds r22=IA64_VCPU_VHPT_PG_SHIFT_OFFSET,r21 mov r28=r8 extr.u r19=r9,2,6 // addr_range=1<<((r9&0xfc)>>2) mov r20=1 shr.u r24=r8,61 - addl r27=56,r0 // PAGE_SHIFT<<2 (for ptc.ga) movl r26=0x8000000000000000 // INVALID_TI_TAG mov r30=ar.lc ;; + ld1 r22=[r22] // current->arch.vhpt_pg_shift shl r19=r20,r19 cmp.eq p7,p0=7,r24 (p7) br.spnt.many dispatch_break_fault ;; // slow way for rr7 ;; + shl r27=r22,2 // vhpt_pg_shift<<2 (for ptc.ga) + shr.u r23=r19,r22 // repeat loop for n pages cmp.le p7,p0=r19,r0 // skip flush if size<=0 (p7) br.cond.dpnt 2f ;; - extr.u r24=r19,0,PAGE_SHIFT - shr.u r23=r19,PAGE_SHIFT ;; // repeat loop for n pages - cmp.ne p7,p0=r24,r0 ;; + shl r24=r23,r22;; + cmp.ne p7,p0=r24,r23 ;; (p7) adds r23=1,r23 ;; // n_pages<size<n_pages+1? extra iter mov ar.lc=r23 - movl r29=PAGE_SIZE;; + shl r29=r20,r22;; // page_size 1: thash r25=r28 ;; adds r25=16,r25 ;; @@ -2060,10 +2049,8 @@ 2: mov ar.lc=r30 ;; mov r29=cr.ipsr mov r30=cr.iip;; - movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; - ld8 r27=[r27];; - adds r25=IA64_VCPU_DTLB_OFFSET,r27 - adds r26=IA64_VCPU_ITLB_OFFSET,r27;; + adds r25=IA64_VCPU_DTLB_OFFSET,r21 + adds r26=IA64_VCPU_ITLB_OFFSET,r21;; ld8 r24=[r25] ld8 r27=[r26] ;; and r24=-2,r24 @@ -2110,10 +2097,14 @@ hyper_itc_d: br.sptk.many dispatch_break_fault ;; #else // ensure itir.ps >= xen's pagesize + movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; + ld8 r27=[r27];; + adds r22=IA64_VCPU_VHPT_PG_SHIFT_OFFSET,r27 adds r23=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 ;; + ld1 r22=[r22] ld8 r23=[r23];; extr.u r24=r23,IA64_ITIR_PS,IA64_ITIR_PS_LEN;; // r24==logps - cmp.gt p7,p0=PAGE_SHIFT,r24 + cmp.gt p7,p0=r22,r24 (p7) br.spnt.many dispatch_break_fault ;; adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;; ld8 r21=[r21];; @@ -2121,8 +2112,6 @@ hyper_itc_d: extr.u r21=r21,61,3;; cmp.eq p7,p0=r21,r0 (p7) br.spnt.many dispatch_break_fault ;; - movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; - ld8 r27=[r27];; adds r27=IA64_VCPU_DOMAIN_OFFSET,r27;; ld8 r27=[r27] // FIXME: is the global var dom0 always pinned? assume so for now @@ -2160,18 +2149,24 @@ END(hyper_itc) // r31 == pr ENTRY(fast_insert) // translate_domain_pte(r16=pteval,PSCB(ifa)=address,r24=itir) - mov r19=1;; - shl r20=r19,r24;; + mov r19=1 + movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; + shl r20=r19,r24 + ld8 r27=[r27];; + adds r23=IA64_VCPU_VHPT_PG_SHIFT_OFFSET,r27 adds r20=-1,r20 // r20 == mask movl r19=_PAGE_PPN_MASK;; + ld1 r23=[r23] + mov r25=-1 and r22=r16,r19;; // r22 == pteval & _PAGE_PPN_MASK andcm r19=r22,r20 + shl r25=r25,r23 // -1 << current->arch.vhpt_pg_shift adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;; ld8 r21=[r21];; and r20=r21,r20;; or r19=r19,r20;; // r19 == mpaddr // FIXME: for now, just do domain0 and skip mpaddr range checks - dep r20=r0,r19,0,PAGE_SHIFT + and r20=r25,r19 movl r21=PAGE_PHYS ;; or r20=r20,r21 ;; // r20==return value from lookup_domain_mpa // r16=pteval,r20=pteval2 @@ -2208,8 +2203,6 @@ ENTRY(fast_insert) // vcpu_set_tr_entry(trp,r22=pte|1,r24=itir,r23=ifa) // TR_ENTRY = {page_flags,itir,addr,rid} tbit.z p6,p7=r17,0 - movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; - ld8 r27=[r27];; adds r28=IA64_VCPU_STARTING_RID_OFFSET,r27 (p6) adds r27=IA64_VCPU_DTLB_OFFSET,r27 (p7) adds r27=IA64_VCPU_ITLB_OFFSET,r27;; diff -r 6b0c965e95a6 -r 2f58face717c xen/arch/ia64/xen/mm.c --- a/xen/arch/ia64/xen/mm.c Thu Aug 9 08:48:00 2007 +0200 +++ b/xen/arch/ia64/xen/mm.c Thu Aug 16 11:33:27 2007 +0200 @@ -447,7 +447,7 @@ gmfn_to_mfn_foreign(struct domain *d, un // given a domain virtual address, pte and pagesize, extract the metaphysical // address, convert the pte for a physical address for (possibly different) // Xen PAGE_SIZE and return modified pte. (NOTE: TLB insert should use -// PAGE_SIZE!) +// current->arch.vhpt_pg_shift!) u64 translate_domain_pte(u64 pteval, u64 address, u64 itir__, u64* itir, struct p2m_entry* entry) { @@ -457,20 +457,25 @@ u64 translate_domain_pte(u64 pteval, u64 u64 arflags; u64 arflags2; u64 maflags2; + u64 ps; pteval &= ((1UL << 53) - 1);// ignore [63:53] bits // FIXME address had better be pre-validated on insert mask = ~itir_mask(_itir.itir); mpaddr = ((pteval & _PAGE_PPN_MASK) & ~mask) | (address & mask); - - if (_itir.ps > PAGE_SHIFT) - _itir.ps = PAGE_SHIFT; + ps = current->arch.vhpt_pg_shift ? current->arch.vhpt_pg_shift : + PAGE_SHIFT; + + if (_itir.ps > ps) + _itir.ps = ps; ((ia64_itir_t*)itir)->itir = _itir.itir;/* Copy the whole register. */ ((ia64_itir_t*)itir)->ps = _itir.ps; /* Overwrite ps part! */ pteval2 = lookup_domain_mpa(d, mpaddr, entry); + if (ps < PAGE_SHIFT) + pteval2 |= address & (PAGE_SIZE - 1) & ~((1L << ps) - 1); /* Check access rights. */ arflags = pteval & _PAGE_AR_MASK; @@ -544,10 +549,11 @@ u64 translate_domain_pte(u64 pteval, u64 pteval &= ~_PAGE_D; } - /* Ignore non-addr bits of pteval2 and force PL0->2 + /* Ignore non-addr bits of pteval2 and force PL0->1 (PL3 is unaffected) */ - return (pteval & ~_PAGE_PPN_MASK) | - (pteval2 & _PAGE_PPN_MASK) | _PAGE_PL_PRIV; + return (pteval & ~(_PAGE_PPN_MASK | _PAGE_PL_MASK)) | + (pteval2 & _PAGE_PPN_MASK) | + (vcpu_pl_adjust(pteval, 7) & _PAGE_PL_MASK); } // given a current domain metaphysical address, return the physical address diff -r 6b0c965e95a6 -r 2f58face717c xen/arch/ia64/xen/regionreg.c --- a/xen/arch/ia64/xen/regionreg.c Thu Aug 9 08:48:00 2007 +0200 +++ b/xen/arch/ia64/xen/regionreg.c Thu Aug 16 11:33:27 2007 +0200 @@ -72,7 +72,7 @@ static unsigned long allocate_metaphysic rrv.rrval = 0; // Or else may see reserved bit fault rrv.rid = d->arch.starting_mp_rid + n; - rrv.ps = PAGE_SHIFT; + rrv.ps = PAGE_SHIFT; // only used at domain creation rrv.ve = 0; /* Mangle metaphysical rid */ rrv.rrval = vmMangleRID(rrv.rrval); @@ -254,7 +254,7 @@ int set_one_rr(unsigned long rr, unsigne memrrv.rrval = rrv.rrval; newrrv.rid = newrid; newrrv.ve = 1; // VHPT now enabled for region 7!! - newrrv.ps = PAGE_SHIFT; + newrrv.ps = v->arch.vhpt_pg_shift; if (rreg == 0) { v->arch.metaphysical_saved_rr0 = vmMangleRID(newrrv.rrval); @@ -288,7 +288,7 @@ void init_all_rr(struct vcpu *v) rrv.rrval = 0; //rrv.rrval = v->domain->arch.metaphysical_rr0; - rrv.ps = PAGE_SHIFT; + rrv.ps = v->arch.vhpt_pg_shift; rrv.ve = 1; if (!v->vcpu_info) { panic("Stopping in init_all_rr\n"); } VCPU(v,rrs[0]) = -1; diff -r 6b0c965e95a6 -r 2f58face717c xen/arch/ia64/xen/vcpu.c --- a/xen/arch/ia64/xen/vcpu.c Thu Aug 9 08:48:00 2007 +0200 +++ b/xen/arch/ia64/xen/vcpu.c Thu Aug 16 11:33:27 2007 +0200 @@ -1697,7 +1697,7 @@ IA64FAULT vcpu_translate(VCPU * vcpu, u6 } else { *pteval = (address & _PAGE_PPN_MASK) | __DIRTY_BITS | _PAGE_PL_PRIV | _PAGE_AR_RWX; - *itir = PAGE_SHIFT << 2; + *itir = vcpu->arch.vhpt_pg_shift << 2; perfc_incr(phys_translate); return IA64_NO_FAULT; } @@ -2290,13 +2290,29 @@ IA64FAULT vcpu_set_dtr(VCPU * vcpu, u64 VCPU translation cache access routines **************************************************************************/ +static void +vcpu_rebuild_vhpt(VCPU * vcpu, u64 ps) +{ +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT + printk("vhpt rebuild: using page_shift %d\n", (int)ps); + vcpu->arch.vhpt_pg_shift = ps; + vcpu_purge_tr_entry(&PSCBX(vcpu, dtlb)); + vcpu_purge_tr_entry(&PSCBX(vcpu, itlb)); + local_vhpt_flush(); + load_region_regs(vcpu); +#else + panic_domain(NULL, "domain trying to use smaller page size!\n"); +#endif +} + void vcpu_itc_no_srlz(VCPU * vcpu, u64 IorD, u64 vaddr, u64 pte, u64 mp_pte, u64 itir, struct p2m_entry *entry) { ia64_itir_t _itir = {.itir = itir}; unsigned long psr; - unsigned long ps = (vcpu->domain == dom0) ? _itir.ps : PAGE_SHIFT; + unsigned long ps = (vcpu->domain == dom0) ? _itir.ps : + vcpu->arch.vhpt_pg_shift; check_xen_space_overlap("itc", vaddr, 1UL << _itir.ps); @@ -2305,7 +2321,7 @@ vcpu_itc_no_srlz(VCPU * vcpu, u64 IorD, panic_domain(NULL, "vcpu_itc_no_srlz: domain trying to use " "smaller page size!\n"); - BUG_ON(_itir.ps > PAGE_SHIFT); + BUG_ON(_itir.ps > vcpu->arch.vhpt_pg_shift); vcpu_tlb_track_insert_or_dirty(vcpu, vaddr, entry); psr = ia64_clear_ic(); pte &= ~(_PAGE_RV2 | _PAGE_RV1); // Mask out the reserved bits. @@ -2318,7 +2334,7 @@ vcpu_itc_no_srlz(VCPU * vcpu, u64 IorD, // addresses never get flushed. More work needed if this // ever happens. //printk("vhpt_insert(%p,%p,%p)\n",vaddr,pte,1L<<logps); - if (_itir.ps > PAGE_SHIFT) + if (_itir.ps > vcpu->arch.vhpt_pg_shift) vhpt_multiple_insert(vaddr, pte, _itir.itir); else vhpt_insert(vaddr, pte, _itir.itir); @@ -2326,7 +2342,7 @@ vcpu_itc_no_srlz(VCPU * vcpu, u64 IorD, // even if domain pagesize is larger than PAGE_SIZE, just put // PAGE_SIZE mapping in the vhpt for now, else purging is complicated else { - _itir.ps = PAGE_SHIFT; + _itir.ps = vcpu->arch.vhpt_pg_shift; vhpt_insert(vaddr, pte, _itir.itir); } } @@ -2338,12 +2354,11 @@ IA64FAULT vcpu_itc_d(VCPU * vcpu, u64 pt struct p2m_entry entry; ia64_itir_t _itir = {.itir = itir}; - if (_itir.ps < PAGE_SHIFT) - panic_domain(NULL, "vcpu_itc_d: domain trying to use " - "smaller page size!\n"); + if (_itir.ps < vcpu->arch.vhpt_pg_shift) + vcpu_rebuild_vhpt(vcpu, _itir.ps); again: - //itir = (itir & ~0xfc) | (PAGE_SHIFT<<2); // ignore domain's pagesize + //itir = (itir & ~0xfc) | (vcpu->arch.vhpt_pg_shift<<2); // ign dom pgsz pteval = translate_domain_pte(pte, ifa, itir, &(_itir.itir), &entry); if (!pteval) return IA64_ILLOP_FAULT; @@ -2367,11 +2382,11 @@ IA64FAULT vcpu_itc_i(VCPU * vcpu, u64 pt struct p2m_entry entry; ia64_itir_t _itir = {.itir = itir}; - if (_itir.ps < PAGE_SHIFT) - panic_domain(NULL, "vcpu_itc_i: domain trying to use " - "smaller page size!\n"); + if (_itir.ps < vcpu->arch.vhpt_pg_shift) + vcpu_rebuild_vhpt(vcpu, _itir.ps); + again: - //itir = (itir & ~0xfc) | (PAGE_SHIFT<<2); // ignore domain's pagesize + //itir = (itir & ~0xfc) | (vcpu->arch.vhpt_pg_shift<<2); // ign dom pgsz pteval = translate_domain_pte(pte, ifa, itir, &(_itir.itir), &entry); if (!pteval) return IA64_ILLOP_FAULT; diff -r 6b0c965e95a6 -r 2f58face717c xen/arch/ia64/xen/vhpt.c --- a/xen/arch/ia64/xen/vhpt.c Thu Aug 9 08:48:00 2007 +0200 +++ b/xen/arch/ia64/xen/vhpt.c Thu Aug 16 11:33:27 2007 +0200 @@ -88,15 +88,16 @@ void vhpt_multiple_insert(unsigned long void vhpt_multiple_insert(unsigned long vaddr, unsigned long pte, unsigned long itir) { + unsigned char ps = current->arch.vhpt_pg_shift; ia64_itir_t _itir = {.itir = itir}; unsigned long mask = (1L << _itir.ps) - 1; int i; - if (_itir.ps-PAGE_SHIFT > 10 && !running_on_sim) { + if (_itir.ps-ps > 10 && !running_on_sim) { // if this happens, we may want to revisit this algorithm panic("vhpt_multiple_insert:logps-PAGE_SHIFT>10,spinning..\n"); } - if (_itir.ps-PAGE_SHIFT > 2) { + if (_itir.ps-ps > 2) { // FIXME: Should add counter here to see how often this // happens (e.g. for 16MB pages!) and determine if it // is a performance problem. On a quick look, it takes @@ -111,9 +112,9 @@ void vhpt_multiple_insert(unsigned long } vaddr &= ~mask; pte = ((pte & _PFN_MASK) & ~mask) | (pte & ~_PFN_MASK); - for (i = 1L << (_itir.ps-PAGE_SHIFT); i > 0; i--) { + for (i = 1L << (_itir.ps-ps); i > 0; i--) { vhpt_insert(vaddr, pte, _itir.itir); - vaddr += PAGE_SIZE; + vaddr += (1L << ps); } } @@ -291,6 +292,7 @@ __flush_vhpt_range(unsigned long vhpt_ma __flush_vhpt_range(unsigned long vhpt_maddr, u64 vadr, u64 addr_range) { void *vhpt_base = __va(vhpt_maddr); + u64 pgsz = 1L << current->arch.vhpt_pg_shift; while ((long)addr_range > 0) { /* Get the VHPT entry. */ @@ -298,8 +300,8 @@ __flush_vhpt_range(unsigned long vhpt_ma __va_ul(vcpu_vhpt_maddr(current)); struct vhpt_lf_entry *v = vhpt_base + off; v->ti_tag = INVALID_TI_TAG; - addr_range -= PAGE_SIZE; - vadr += PAGE_SIZE; + addr_range -= pgsz; + vadr += pgsz; } } @@ -362,7 +364,8 @@ void domain_flush_vtlb_range (struct dom // ptc.ga has release semantics. /* ptc.ga */ - platform_global_tlb_purge(vadr, vadr + addr_range, PAGE_SHIFT); + platform_global_tlb_purge(vadr, vadr + addr_range, + current->arch.vhpt_pg_shift); perfc_incr(domain_flush_vtlb_range); } @@ -381,6 +384,7 @@ __domain_flush_vtlb_track_entry(struct d int cpu; int vcpu; int local_purge = 1; + unsigned char ps = current->arch.vhpt_pg_shift; BUG_ON((vaddr >> VRN_SHIFT) != VRN7); /* @@ -413,7 +417,7 @@ __domain_flush_vtlb_track_entry(struct d continue; /* Invalidate VHPT entries. */ - vcpu_flush_vhpt_range(v, vaddr, PAGE_SIZE); + vcpu_flush_vhpt_range(v, vaddr, 1L << ps); /* * current->processor == v->processor @@ -427,7 +431,7 @@ __domain_flush_vtlb_track_entry(struct d } else { for_each_cpu_mask(cpu, entry->pcpu_dirty_mask) { /* Invalidate VHPT entries. */ - cpu_flush_vhpt_range(cpu, vaddr, PAGE_SIZE); + cpu_flush_vhpt_range(cpu, vaddr, 1L << ps); if (d->vcpu[cpu] != current) local_purge = 0; @@ -436,12 +440,11 @@ __domain_flush_vtlb_track_entry(struct d /* ptc.ga */ if (local_purge) { - ia64_ptcl(vaddr, PAGE_SHIFT << 2); + ia64_ptcl(vaddr, ps << 2); perfc_incr(domain_flush_vtlb_local); } else { /* ptc.ga has release semantics. */ - platform_global_tlb_purge(vaddr, vaddr + PAGE_SIZE, - PAGE_SHIFT); + platform_global_tlb_purge(vaddr, vaddr + (1L << ps), ps); perfc_incr(domain_flush_vtlb_global); } diff -r 6b0c965e95a6 -r 2f58face717c xen/include/asm-ia64/domain.h --- a/xen/include/asm-ia64/domain.h Thu Aug 9 08:48:00 2007 +0200 +++ b/xen/include/asm-ia64/domain.h Thu Aug 16 11:33:27 2007 +0200 @@ -246,6 +246,7 @@ struct arch_vcpu { #define XEN_IA64_PKR_IN_USE 0x1 /* If psr.pk = 1 was set. */ unsigned char pkr_flags; + unsigned char vhpt_pg_shift; /* PAGE_SHIFT or less */ #ifdef CONFIG_XEN_IA64_PERVCPU_VHPT PTA pta; unsigned long vhpt_maddr; _______________________________________________ Xen-ia64-devel mailing list Xen-ia64-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-ia64-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |