[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH v2] xen/arm: Convert runstate address during hypcall
On Tue, 28 Jul 2020, Bertrand Marquis wrote: > At the moment on Arm, a Linux guest running with KTPI enabled will > cause the following error when a context switch happens in user mode: > (XEN) p2m.c:1890: d1v0: Failed to walk page-table va 0xffffff837ebe0cd0 > > The error is caused by the virtual address for the runstate area > registered by the guest only being accessible when the guest is running > in kernel space when KPTI is enabled. > > To solve this issue, this patch is doing the translation from virtual > address to physical address during the hypercall and mapping the > required pages using vmap. This is removing the conversion from virtual > to physical address during the context switch which is solving the > problem with KPTI. > > This is done only on arm architecture, the behaviour on x86 is not > modified by this patch and the address conversion is done as before > during each context switch. > > This is introducing several limitations in comparison to the previous > behaviour (on arm only): > - if the guest is remapping the area at a different physical address Xen > will continue to update the area at the previous physical address. As > the area is in kernel space and usually defined as a global variable this > is something which is believed not to happen. If this is required by a > guest, it will have to call the hypercall with the new area (even if it > is at the same virtual address). > - the area needs to be mapped during the hypercall. For the same reasons > as for the previous case, even if the area is registered for a different > vcpu. It is believed that registering an area using a virtual address > unmapped is not something done. > > inline functions in headers could not be used as the architecture > domain.h is included before the global domain.h making it impossible > to use the struct vcpu inside the architecture header. > This should not have any performance impact as the hypercall is only > called once per vcpu usually. > > Signed-off-by: Bertrand Marquis <bertrand.marquis@xxxxxxx> > --- > Changes in v2 > - use vmap to map the pages during the hypercall. > - reintroduce initial copy during hypercall. > > --- > xen/arch/arm/domain.c | 160 +++++++++++++++++++++++++++++++---- > xen/arch/x86/domain.c | 30 ++++++- > xen/arch/x86/x86_64/domain.c | 4 +- > xen/common/domain.c | 19 ++--- > xen/include/asm-arm/domain.h | 9 ++ > xen/include/asm-x86/domain.h | 16 ++++ > xen/include/xen/domain.h | 5 ++ > xen/include/xen/sched.h | 16 +--- > 8 files changed, 207 insertions(+), 52 deletions(-) > > diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c > index 31169326b2..c595438bd9 100644 > --- a/xen/arch/arm/domain.c > +++ b/xen/arch/arm/domain.c > @@ -19,6 +19,7 @@ > #include <xen/sched.h> > #include <xen/softirq.h> > #include <xen/wait.h> > +#include <xen/vmap.h> > > #include <asm/alternative.h> > #include <asm/cpuerrata.h> > @@ -275,36 +276,157 @@ static void ctxt_switch_to(struct vcpu *n) > virt_timer_restore(n); > } > > -/* Update per-VCPU guest runstate shared memory area (if registered). */ > -static void update_runstate_area(struct vcpu *v) > +static void cleanup_runstate_vcpu_locked(struct vcpu *v) > +{ > + if ( v->arch.runstate_guest ) > + { > + vunmap((void *)((unsigned long)v->arch.runstate_guest & PAGE_MASK)); > + > + put_page(v->arch.runstate_guest_page[0]); > + > + if ( v->arch.runstate_guest_page[1] ) > + { > + put_page(v->arch.runstate_guest_page[1]); > + } > + v->arch.runstate_guest = NULL; > + } > +} > + > +void arch_vcpu_cleanup_runstate(struct vcpu *v) > { > - void __user *guest_handle = NULL; > + spin_lock(&v->arch.runstate_guest_lock); > + > + cleanup_runstate_vcpu_locked(v); > + > + spin_unlock(&v->arch.runstate_guest_lock); > +} > + > +static int setup_runstate_vcpu_locked(struct vcpu *v, vaddr_t vaddr) > +{ > + unsigned int offset; > + mfn_t mfn[2]; > + struct page_info *page; > + unsigned int numpages; > struct vcpu_runstate_info runstate; > + void *p; > > - if ( guest_handle_is_null(runstate_guest(v)) ) > - return; > + /* user can pass a NULL address to unregister a previous area */ > + if ( vaddr == 0 ) > + return 0; > > - memcpy(&runstate, &v->runstate, sizeof(runstate)); > + offset = vaddr & ~PAGE_MASK; > > - if ( VM_ASSIST(v->domain, runstate_update_flag) ) > + /* provided address must be aligned to a 64bit */ > + if ( offset % alignof(struct vcpu_runstate_info) ) > { > - guest_handle = &v->runstate_guest.p->state_entry_time + 1; > - guest_handle--; > - runstate.state_entry_time |= XEN_RUNSTATE_UPDATE; > - __raw_copy_to_guest(guest_handle, > - (void *)(&runstate.state_entry_time + 1) - 1, 1); > - smp_wmb(); > + gprintk(XENLOG_WARNING, "Cannot map runstate pointer at 0x%lx: " > + "Invalid offset\n", vaddr); > + return -EINVAL; > + } > + > + page = get_page_from_gva(v, vaddr, GV2M_WRITE); > + if ( !page ) > + { > + gprintk(XENLOG_WARNING, "Cannot map runstate pointer at 0x%lx: " > + "Page is not mapped\n", vaddr); > + return -EINVAL; > + } > + mfn[0] = page_to_mfn(page); > + v->arch.runstate_guest_page[0] = page; > + > + if ( offset > (PAGE_SIZE - sizeof(struct vcpu_runstate_info)) ) > + { > + /* guest area is crossing pages */ > + page = get_page_from_gva(v, vaddr + PAGE_SIZE, GV2M_WRITE); > + if ( !page ) > + { > + put_page(v->arch.runstate_guest_page[0]); > + gprintk(XENLOG_WARNING, "Cannot map runstate pointer at 0x%lx: " > + "2nd Page is not mapped\n", vaddr); > + return -EINVAL; > + } > + mfn[1] = page_to_mfn(page); > + v->arch.runstate_guest_page[1] = page; > + numpages = 2; > + } > + else > + { > + v->arch.runstate_guest_page[1] = NULL; > + numpages = 1; > + } > + > + p = vmap(mfn, numpages); > + if ( !p ) > + { > + put_page(v->arch.runstate_guest_page[0]); > + if ( numpages == 2 ) > + put_page(v->arch.runstate_guest_page[1]); > + > + gprintk(XENLOG_WARNING, "Cannot map runstate pointer at 0x%lx: " > + "vmap error\n", vaddr); > + return -EINVAL; > } > > - __copy_to_guest(runstate_guest(v), &runstate, 1); > + v->arch.runstate_guest = p + offset; > > - if ( guest_handle ) > + if (v == current) > { > - runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE; > + memcpy(v->arch.runstate_guest, &v->runstate, sizeof(v->runstate)); > + } > + else > + { > + vcpu_runstate_get(v, &runstate); > + memcpy(v->arch.runstate_guest, &runstate, sizeof(v->runstate)); > + } > + > + return 0; > +} The arm32 build breaks with: domain.c: In function 'setup_runstate_vcpu_locked': domain.c:322:9: error: format '%lx' expects argument of type 'long unsigned int', but argument 3 has type 'vaddr_t' [-Werror=format=] gprintk(XENLOG_WARNING, "Cannot map runstate pointer at 0x%lx: " ^ domain.c:330:9: error: format '%lx' expects argument of type 'long unsigned int', but argument 3 has type 'vaddr_t' [-Werror=format=] gprintk(XENLOG_WARNING, "Cannot map runstate pointer at 0x%lx: " ^ domain.c:344:13: error: format '%lx' expects argument of type 'long unsigned int', but argument 3 has type 'vaddr_t' [-Werror=format=] gprintk(XENLOG_WARNING, "Cannot map runstate pointer at 0x%lx: " ^ domain.c:365:9: error: format '%lx' expects argument of type 'long unsigned int', but argument 3 has type 'vaddr_t' [-Werror=format=] gprintk(XENLOG_WARNING, "Cannot map runstate pointer at 0x%lx: " ^ cc1: all warnings being treated as errors > +int arch_vcpu_setup_runstate(struct vcpu *v, > + struct vcpu_register_runstate_memory_area area) > +{ > + int rc; > + > + spin_lock(&v->arch.runstate_guest_lock); > + > + /* cleanup if we are recalled */ > + cleanup_runstate_vcpu_locked(v); > + > + rc = setup_runstate_vcpu_locked(v, (vaddr_t)area.addr.v); > + > + spin_unlock(&v->arch.runstate_guest_lock); > + > + return rc; > +} > + > + > +/* Update per-VCPU guest runstate shared memory area (if registered). */ > +static void update_runstate_area(struct vcpu *v) > +{ > + spin_lock(&v->arch.runstate_guest_lock); > + > + if ( v->arch.runstate_guest ) > + { > + if ( VM_ASSIST(v->domain, runstate_update_flag) ) > + { > + v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE; > + v->arch.runstate_guest->state_entry_time |= XEN_RUNSTATE_UPDATE; Please use write_atomic (as suggested by Julien here: https://marc.info/?l=xen-devel&m=159225391619240) > + smp_wmb(); > + } > + > + memcpy(v->arch.runstate_guest, &v->runstate, sizeof(v->runstate)); > + > + /* copy must be done before switching the bit */ > smp_wmb(); > - __raw_copy_to_guest(guest_handle, > - (void *)(&runstate.state_entry_time + 1) - 1, 1); > + > + if ( VM_ASSIST(v->domain, runstate_update_flag) ) > + { > + v->runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE; > + v->arch.runstate_guest->state_entry_time &= ~XEN_RUNSTATE_UPDATE; and here too The rest looks OK to me. > + } > } > + > + spin_unlock(&v->arch.runstate_guest_lock); > } > > static void schedule_tail(struct vcpu *prev)
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |