[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH 08/10] x86/hvm: pkeys, add pkeys support for do_page_fault
On 16/11/15 10:31, Huaitong Han wrote: > This patch adds pkeys support for do_page_fault. > > the protection keys architecture define a new status bit in the PFEC. PFEC.PK > (bit 5) is set to 1 if an only if protection keys block the access. > > Protection keys block an access and induce a page fault if and only if > 1.Protection keys are enabled (CR4.PKE=1 and EFER.LMA=1), and > 2.The page has a valid translation (page is present with no reserved bit > violations), and > 3.The access is not an instruction fetch, and > 4.The access is to a user page, and > 5.At least one of the following restrictions apply: > --The access is a data read or data write and AD=1 > --The access is a data write and WD=1 and either CR0.WP=1 or (CR0.WP=0 and > it is a user access) > > Signed-off-by: Huaitong Han <huaitong.han@xxxxxxxxx> > > diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c > index 9f5a6c6..73abb3b 100644 > --- a/xen/arch/x86/traps.c > +++ b/xen/arch/x86/traps.c > @@ -1287,7 +1287,7 @@ enum pf_type { > spurious_fault > }; > > -static enum pf_type __page_fault_type( > +static enum pf_type __page_fault_type(struct vcpu *vcpu, > unsigned long addr, const struct cpu_user_regs *regs) > { > unsigned long mfn, cr3 = read_cr3(); > @@ -1295,7 +1295,7 @@ static enum pf_type __page_fault_type( > l3_pgentry_t l3e, *l3t; > l2_pgentry_t l2e, *l2t; > l1_pgentry_t l1e, *l1t; > - unsigned int required_flags, disallowed_flags, page_user; > + unsigned int required_flags, disallowed_flags, page_user, pte_pkeys; > unsigned int error_code = regs->error_code; > > /*hvm_wp_enabled > > @@ -1340,6 +1340,7 @@ static enum pf_type __page_fault_type( > if ( ((l3e_get_flags(l3e) & required_flags) != required_flags) || > (l3e_get_flags(l3e) & disallowed_flags) ) > return real_fault; > + pte_pkeys = l3e_get_pkeys(l3e); > page_user &= l3e_get_flags(l3e); > if ( l3e_get_flags(l3e) & _PAGE_PSE ) > goto leaf; > @@ -1351,6 +1352,7 @@ static enum pf_type __page_fault_type( > if ( ((l2e_get_flags(l2e) & required_flags) != required_flags) || > (l2e_get_flags(l2e) & disallowed_flags) ) > return real_fault; > + pte_pkeys = l2e_get_pkeys(l2e); > page_user &= l2e_get_flags(l2e); > if ( l2e_get_flags(l2e) & _PAGE_PSE ) > goto leaf; > @@ -1362,12 +1364,22 @@ static enum pf_type __page_fault_type( > if ( ((l1e_get_flags(l1e) & required_flags) != required_flags) || > (l1e_get_flags(l1e) & disallowed_flags) ) > return real_fault; > + pte_pkeys = l1e_get_pkeys(l1e); > page_user &= l1e_get_flags(l1e); > > leaf: > if ( page_user ) > { > unsigned long cr4 = read_cr4(); > + unsigned int ff, wf, uf, rsvdf, pkuf; > + unsigned int pkru_ad, pkru_wd; > + > + uf = error_code & PFEC_user_mode; > + wf = error_code & PFEC_write_access; > + rsvdf = error_code & PFEC_reserved_bit; > + ff = error_code & PFEC_insn_fetch; > + pkuf = error_code & PFEC_protection_key; These should be bool_t's rather than unsigned int, but I don't really see the point of breaking them all out. > + > /* > * Supervisor Mode Execution Prevention (SMEP): > * Disallow supervisor execution from user-accessible mappings > @@ -1386,15 +1398,35 @@ leaf: > * - CPL=3 or X86_EFLAGS_AC is clear > * - Page fault in kernel mode > */ > - if ( (cr4 & X86_CR4_SMAP) && !(error_code & PFEC_user_mode) && > + if ( (cr4 & X86_CR4_SMAP) && !uf && > (((regs->cs & 3) == 3) || !(regs->eflags & X86_EFLAGS_AC)) ) > return smap_fault; > + /* > + * PKU: additional mechanism by which the paging controls > + * access to user-mode addresses based on the value in the > + * PKRU register. A fault is considered as a PKU violation if all > + * of the following conditions are ture: > + * 1.CR4_PKE=1. > + * 2.EFER_LMA=1. > + * 3.page is present with no reserved bit violations. > + * 4.the access is not an instruction fetch. > + * 5.the access is to a user page. > + * 6.PKRU.AD=1 > + * or The access is a data write and PKRU.WD=1 > + * and either CR0.WP=1 or it is a user access. > + */ > + pkru_ad = READ_PKRU_AD(pte_pkeys); > + pkru_wd = READ_PKRU_AD(pte_pkeys); > + if ( pkuf && (cr4 & X86_CR4_PKE) && hvm_long_mode_enabled(vcpu) && > + !rsvdf && !ff && (pkru_ad || > + (pkru_wd && wf && (hvm_wp_enabled(vcpu) || uf)))) > + return real_fault; This has clearly never been tested on a non-Skylake processor. The rdpkru instruction must be after the CR4_PKE check, or it will kill Xen with a #UD fault. The hvm_long_mode_enabled() and hvm_wp_enabled() are not valid without a has_hvm_container() check. However, you on a PV-only codepath so can guarantee that the vcpu is not an hvm one (All HVM pagefaults are handled via paging_fault()). You can also guarantee that CR0.WP is 1, and don't actually need to pass a vcpu pointer into here at at all. > } > > return spurious_fault; > } > > -static enum pf_type spurious_page_fault( > +static enum pf_type spurious_page_fault(struct vcpu *vcpu, > unsigned long addr, const struct cpu_user_regs *regs) > { > unsigned long flags; > @@ -1405,7 +1437,7 @@ static enum pf_type spurious_page_fault( > * page tables from becoming invalid under our feet during the walk. > */ > local_irq_save(flags); > - pf_type = __page_fault_type(addr, regs); > + pf_type = __page_fault_type(vcpu, addr, regs); > local_irq_restore(flags); > > return pf_type; > @@ -1479,6 +1511,7 @@ static int fixup_page_fault(unsigned long addr, struct > cpu_user_regs *regs) > * Bit 2: User mode (=1) ; Supervisor mode (=0) > * Bit 3: Reserved bit violation > * Bit 4: Instruction fetch > + * Bit 5: Protection-key violations > */ > void do_page_fault(struct cpu_user_regs *regs) > { > @@ -1500,7 +1533,7 @@ void do_page_fault(struct cpu_user_regs *regs) > > if ( unlikely(!guest_mode(regs)) ) > { > - pf_type = spurious_page_fault(addr, regs); > + pf_type = spurious_page_fault(current, addr, regs); > if ( (pf_type == smep_fault) || (pf_type == smap_fault) ) > { > console_start_sync(); > @@ -1533,7 +1566,7 @@ void do_page_fault(struct cpu_user_regs *regs) > > if ( unlikely(current->domain->arch.suppress_spurious_page_faults) ) > { > - pf_type = spurious_page_fault(addr, regs); > + pf_type = spurious_page_fault(current, addr, regs); > if ( (pf_type == smep_fault) || (pf_type == smap_fault)) > { > printk(XENLOG_G_ERR "%pv fatal SM%cP violation\n", > diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h > index 427eb84..e8090fb 100644 > --- a/xen/include/asm-x86/processor.h > +++ b/xen/include/asm-x86/processor.h > @@ -135,13 +135,14 @@ > #define TF_kernel_mode (1<<_TF_kernel_mode) > > /* #PF error code values. */ > -#define PFEC_page_present (1U<<0) > -#define PFEC_write_access (1U<<1) > -#define PFEC_user_mode (1U<<2) > -#define PFEC_reserved_bit (1U<<3) > -#define PFEC_insn_fetch (1U<<4) > -#define PFEC_page_paged (1U<<5) > -#define PFEC_page_shared (1U<<6) > +#define PFEC_page_present (1U<<0) > +#define PFEC_write_access (1U<<1) > +#define PFEC_user_mode (1U<<2) > +#define PFEC_reserved_bit (1U<<3) > +#define PFEC_insn_fetch (1U<<4) > +#define PFEC_protection_key (1U<<5) > +#define PFEC_page_paged (1U<<6) > +#define PFEC_page_shared (1U<<7) Again, this needs a rebase, and you will find that PFEC_prot_key is already included. ~Andrew _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |