[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH 1/2] xen/arm: Convert runstate address during hypcall
> On 11 Jun 2020, at 19:16, Stefano Stabellini <sstabellini@xxxxxxxxxx> wrote: > > On Thu, 11 Jun 2020, Bertrand Marquis wrote: >> At the moment on Arm, a Linux guest running with KTPI enabled will >> cause the following error when a context switch happens in user mode: >> (XEN) p2m.c:1890: d1v0: Failed to walk page-table va 0xffffff837ebe0cd0 >> >> This patch is modifying the register runstate area handling on arm to >> convert the guest address during the hypercall. During runtime context >> switches the area is mapped to update the guest runstate copy. >> The patch is also removing the initial copy which was done during the >> hypercall handling as this is done on the current core when the context >> is restore to go back to guest execution on arm. >> >> As a consequence if the register runstate hypercall is called on one >> vcpu for an other vcpu, the area will not be updated until the vcpu >> will be executed (on arm only). >> >> On x86, the behaviour is not modified, the address conversion is done >> during the context switch and the area is updated fully during the >> hypercall. >> inline functions in headers could not be used as the architecture >> domain.h is included before the global domain.h making it impossible >> to use the struct vcpu inside the architecture header. >> This should not have any performance impact as the hypercall is only >> called once per vcpu usually. >> >> Signed-off-by: Bertrand Marquis <bertrand.marquis@xxxxxxx> >> --- >> xen/arch/arm/domain.c | 109 +++++++++++++++++++++++++++++------ >> xen/arch/x86/domain.c | 30 +++++++++- >> xen/arch/x86/x86_64/domain.c | 4 +- >> xen/common/domain.c | 19 ++---- >> xen/include/asm-arm/domain.h | 8 +++ >> xen/include/asm-x86/domain.h | 16 +++++ >> xen/include/xen/domain.h | 4 ++ >> xen/include/xen/sched.h | 16 +---- >> 8 files changed, 153 insertions(+), 53 deletions(-) >> >> diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c >> index 31169326b2..739059234f 100644 >> --- a/xen/arch/arm/domain.c >> +++ b/xen/arch/arm/domain.c >> @@ -19,6 +19,7 @@ >> #include <xen/sched.h> >> #include <xen/softirq.h> >> #include <xen/wait.h> >> +#include <xen/domain_page.h> >> >> #include <asm/alternative.h> >> #include <asm/cpuerrata.h> >> @@ -275,36 +276,104 @@ static void ctxt_switch_to(struct vcpu *n) >> virt_timer_restore(n); >> } >> >> -/* Update per-VCPU guest runstate shared memory area (if registered). */ >> -static void update_runstate_area(struct vcpu *v) >> +void arch_cleanup_runstate_guest(struct vcpu *v) >> { >> - void __user *guest_handle = NULL; >> - struct vcpu_runstate_info runstate; >> + spin_lock(&v->arch.runstate_guest.lock); >> >> - if ( guest_handle_is_null(runstate_guest(v)) ) >> - return; >> + /* cleanup previous page if any */ >> + if ( v->arch.runstate_guest.page ) >> + { >> + put_page_and_type(v->arch.runstate_guest.page); >> + v->arch.runstate_guest.page = NULL; >> + v->arch.runstate_guest.offset = 0; >> + } >> >> - memcpy(&runstate, &v->runstate, sizeof(runstate)); >> + spin_unlock(&v->arch.runstate_guest.lock); >> +} >> >> - if ( VM_ASSIST(v->domain, runstate_update_flag) ) >> +int arch_setup_runstate_guest(struct vcpu *v, >> + struct vcpu_register_runstate_memory_area area) >> +{ >> + struct page_info *page; >> + unsigned offset; >> + >> + spin_lock(&v->arch.runstate_guest.lock); >> + >> + /* cleanup previous page if any */ >> + if ( v->arch.runstate_guest.page ) >> { >> - guest_handle = &v->runstate_guest.p->state_entry_time + 1; >> - guest_handle--; >> - runstate.state_entry_time |= XEN_RUNSTATE_UPDATE; >> - __raw_copy_to_guest(guest_handle, >> - (void *)(&runstate.state_entry_time + 1) - 1, >> 1); >> - smp_wmb(); >> + put_page_and_type(v->arch.runstate_guest.page); >> + v->arch.runstate_guest.page = NULL; >> + v->arch.runstate_guest.offset = 0; >> + } >> + >> + offset = ((vaddr_t)area.addr.v) & ~PAGE_MASK; >> + if ( offset > (PAGE_SIZE - sizeof(struct vcpu_runstate_info)) ) >> + { >> + spin_unlock(&v->arch.runstate_guest.lock); >> + gprintk(XENLOG_DEBUG, "Runstate is crossing pages\n"); >> + return -EINVAL; >> + } >> + >> + /* provided address must be aligned to a 64bit */ >> + if ( offset % alignof(struct vcpu_runstate_info) ) >> + { >> + spin_unlock(&v->arch.runstate_guest.lock); >> + gprintk(XENLOG_DEBUG, "Runstate pointer is not aligned\n"); >> + return -EINVAL; >> + } >> + >> + page = get_page_from_gva(v, (vaddr_t)area.addr.v, GV2M_WRITE); >> + if ( !page ) >> + { >> + spin_unlock(&v->arch.runstate_guest.lock); >> + gprintk(XENLOG_DEBUG, "Runstate pointer is not mapped\n"); > > I would make all these XENLOG_WARNING or XENLOG_ERR, they are errors > after all. This last one I'd say "Could not map runstate point" and also > print the address. Ok I will fix it to Warning and change the message like this. > > >> + return -EINVAL; >> } >> >> - __copy_to_guest(runstate_guest(v), &runstate, 1); >> + v->arch.runstate_guest.page = page; >> + v->arch.runstate_guest.offset = offset; >> + >> + spin_unlock(&v->arch.runstate_guest.lock); >> + >> + return 0; >> +} >> + >> + >> +/* Update per-VCPU guest runstate shared memory area (if registered). */ >> +static void update_runstate_area(struct vcpu *v) >> +{ >> + struct vcpu_runstate_info *guest_runstate; >> + void *p; >> + >> + spin_lock(&v->arch.runstate_guest.lock); >> >> - if ( guest_handle ) >> + if ( v->arch.runstate_guest.page ) >> { >> - runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE; >> + p = __map_domain_page(v->arch.runstate_guest.page); >> + guest_runstate = p + v->arch.runstate_guest.offset; >> + >> + if ( VM_ASSIST(v->domain, runstate_update_flag) ) >> + { >> + v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE; >> + guest_runstate->state_entry_time |= XEN_RUNSTATE_UPDATE; > > I think that this write to guest_runstate should use write_atomic or > another atomic write operation. I will answer at the end of the discussion on the subject. > > >> + smp_wmb(); >> + } >> + >> + memcpy(guest_runstate, &v->runstate, sizeof(v->runstate)); >> smp_wmb(); >> - __raw_copy_to_guest(guest_handle, >> - (void *)(&runstate.state_entry_time + 1) - 1, >> 1); >> + >> + if ( VM_ASSIST(v->domain, runstate_update_flag) ) >> + { >> + v->runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE; >> + guest_runstate->state_entry_time &= ~XEN_RUNSTATE_UPDATE; > > same here > > >> + smp_wmb(); >> + } >> + >> + unmap_domain_page(p); >> } >> + >> + spin_unlock(&v->arch.runstate_guest.lock); >> } >> >> static void schedule_tail(struct vcpu *prev) >> @@ -560,6 +629,8 @@ int arch_vcpu_create(struct vcpu *v) >> v->arch.saved_context.sp = (register_t)v->arch.cpu_info; >> v->arch.saved_context.pc = (register_t)continue_new_vcpu; >> >> + spin_lock_init(&v->arch.runstate_guest.lock); >> + >> /* Idle VCPUs don't need the rest of this setup */ >> if ( is_idle_vcpu(v) ) >> return rc; >> diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c >> index fee6c3931a..32bbed87d5 100644 >> --- a/xen/arch/x86/domain.c >> +++ b/xen/arch/x86/domain.c >> @@ -1642,6 +1642,30 @@ void paravirt_ctxt_switch_to(struct vcpu *v) >> wrmsr_tsc_aux(v->arch.msrs->tsc_aux); >> } >> >> +int arch_setup_runstate_guest(struct vcpu *v, >> + struct vcpu_register_runstate_memory_area area) >> +{ >> + struct vcpu_runstate_info runstate; >> + >> + runstate_guest(v) = area.addr.h; >> + >> + if ( v == current ) >> + { >> + __copy_to_guest(runstate_guest(v), &v->runstate, 1); >> + } >> + else >> + { >> + vcpu_runstate_get(v, &runstate); >> + __copy_to_guest(runstate_guest(v), &runstate, 1); >> + } >> + return 0; >> +} >> + >> +void arch_cleanup_runstate_guest(struct vcpu *v) >> +{ >> + set_xen_guest_handle(runstate_guest(v), NULL); >> +} >> + >> /* Update per-VCPU guest runstate shared memory area (if registered). */ >> bool update_runstate_area(struct vcpu *v) >> { >> @@ -1660,8 +1684,8 @@ bool update_runstate_area(struct vcpu *v) >> if ( VM_ASSIST(v->domain, runstate_update_flag) ) >> { >> guest_handle = has_32bit_shinfo(v->domain) >> - ? &v->runstate_guest.compat.p->state_entry_time + 1 >> - : &v->runstate_guest.native.p->state_entry_time + 1; >> + ? &v->arch.runstate_guest.compat.p->state_entry_time + 1 >> + : &v->arch.runstate_guest.native.p->state_entry_time + 1; >> guest_handle--; >> runstate.state_entry_time |= XEN_RUNSTATE_UPDATE; >> __raw_copy_to_guest(guest_handle, >> @@ -1674,7 +1698,7 @@ bool update_runstate_area(struct vcpu *v) >> struct compat_vcpu_runstate_info info; >> >> XLAT_vcpu_runstate_info(&info, &runstate); >> - __copy_to_guest(v->runstate_guest.compat, &info, 1); >> + __copy_to_guest(v->arch.runstate_guest.compat, &info, 1); >> rc = true; >> } >> else >> diff --git a/xen/arch/x86/x86_64/domain.c b/xen/arch/x86/x86_64/domain.c >> index c46dccc25a..b879e8dd2c 100644 >> --- a/xen/arch/x86/x86_64/domain.c >> +++ b/xen/arch/x86/x86_64/domain.c >> @@ -36,7 +36,7 @@ arch_compat_vcpu_op( >> break; >> >> rc = 0; >> - guest_from_compat_handle(v->runstate_guest.compat, area.addr.h); >> + guest_from_compat_handle(v->arch.runstate_guest.compat, >> area.addr.h); >> >> if ( v == current ) >> { >> @@ -49,7 +49,7 @@ arch_compat_vcpu_op( >> vcpu_runstate_get(v, &runstate); >> XLAT_vcpu_runstate_info(&info, &runstate); >> } >> - __copy_to_guest(v->runstate_guest.compat, &info, 1); >> + __copy_to_guest(v->arch.runstate_guest.compat, &info, 1); >> >> break; >> } >> diff --git a/xen/common/domain.c b/xen/common/domain.c >> index 7cc9526139..0ca6bf4dbc 100644 >> --- a/xen/common/domain.c >> +++ b/xen/common/domain.c >> @@ -727,7 +727,10 @@ int domain_kill(struct domain *d) >> if ( cpupool_move_domain(d, cpupool0) ) >> return -ERESTART; >> for_each_vcpu ( d, v ) >> + { >> + arch_cleanup_runstate_guest(v); >> unmap_vcpu_info(v); >> + } >> d->is_dying = DOMDYING_dead; >> /* Mem event cleanup has to go here because the rings >> * have to be put before we call put_domain. */ >> @@ -1167,7 +1170,7 @@ int domain_soft_reset(struct domain *d) >> >> for_each_vcpu ( d, v ) >> { >> - set_xen_guest_handle(runstate_guest(v), NULL); >> + arch_cleanup_runstate_guest(v); >> unmap_vcpu_info(v); >> } >> >> @@ -1484,7 +1487,6 @@ long do_vcpu_op(int cmd, unsigned int vcpuid, >> XEN_GUEST_HANDLE_PARAM(void) arg) >> case VCPUOP_register_runstate_memory_area: >> { >> struct vcpu_register_runstate_memory_area area; >> - struct vcpu_runstate_info runstate; >> >> rc = -EFAULT; >> if ( copy_from_guest(&area, arg, 1) ) >> @@ -1493,18 +1495,7 @@ long do_vcpu_op(int cmd, unsigned int vcpuid, >> XEN_GUEST_HANDLE_PARAM(void) arg) >> if ( !guest_handle_okay(area.addr.h, 1) ) >> break; >> >> - rc = 0; >> - runstate_guest(v) = area.addr.h; >> - >> - if ( v == current ) >> - { >> - __copy_to_guest(runstate_guest(v), &v->runstate, 1); >> - } >> - else >> - { >> - vcpu_runstate_get(v, &runstate); >> - __copy_to_guest(runstate_guest(v), &runstate, 1); >> - } >> + rc = arch_setup_runstate_guest(v, area); >> >> break; >> } >> diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h >> index 4e2f582006..3a7f53e13d 100644 >> --- a/xen/include/asm-arm/domain.h >> +++ b/xen/include/asm-arm/domain.h >> @@ -11,6 +11,7 @@ >> #include <asm/vgic.h> >> #include <asm/vpl011.h> >> #include <public/hvm/params.h> >> +#include <public/vcpu.h> >> #include <xen/serial.h> >> #include <xen/rbtree.h> >> >> @@ -206,6 +207,13 @@ struct arch_vcpu >> */ >> bool need_flush_to_ram; >> >> + /* runstate guest info */ >> + struct { >> + struct page_info *page; /* guest page */ >> + unsigned offset; /* offset in page */ >> + spinlock_t lock; /* lock to access page */ >> + } runstate_guest; >> + >> } __cacheline_aligned; >> >> void vcpu_show_execution_state(struct vcpu *); >> diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h >> index e8cee3d5e5..f917b48cb8 100644 >> --- a/xen/include/asm-x86/domain.h >> +++ b/xen/include/asm-x86/domain.h >> @@ -11,6 +11,11 @@ >> #include <asm/x86_emulate.h> >> #include <public/vcpu.h> >> #include <public/hvm/hvm_info_table.h> >> +#ifdef CONFIG_COMPAT >> +#include <compat/vcpu.h> >> +DEFINE_XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t); >> +#endif >> + >> >> #define has_32bit_shinfo(d) ((d)->arch.has_32bit_shinfo) >> >> @@ -626,6 +631,17 @@ struct arch_vcpu >> struct { >> bool next_interrupt_enabled; >> } monitor; >> + >> +#ifndef CONFIG_COMPAT >> +# define runstate_guest(v) ((v)->arch.runstate_guest) >> + XEN_GUEST_HANDLE(vcpu_runstate_info_t) runstate_guest; /* guest address >> */ >> +#else >> +# define runstate_guest(v) ((v)->arch.runstate_guest.native) >> + union { >> + XEN_GUEST_HANDLE(vcpu_runstate_info_t) native; >> + XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t) compat; >> + } runstate_guest; >> +#endif >> }; >> >> struct guest_memory_policy >> diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h >> index 7e51d361de..d5d73ce997 100644 >> --- a/xen/include/xen/domain.h >> +++ b/xen/include/xen/domain.h >> @@ -63,6 +63,10 @@ void arch_vcpu_destroy(struct vcpu *v); >> int map_vcpu_info(struct vcpu *v, unsigned long gfn, unsigned offset); >> void unmap_vcpu_info(struct vcpu *v); >> >> +int arch_setup_runstate_guest(struct vcpu *v, >> + struct vcpu_register_runstate_memory_area area); > > NIT: code style, the indentation is off Ok > > >> +void arch_cleanup_runstate_guest(struct vcpu *v); >> + >> int arch_domain_create(struct domain *d, >> struct xen_domctl_createdomain *config); >> >> diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h >> index ac53519d7f..fac030fb83 100644 >> --- a/xen/include/xen/sched.h >> +++ b/xen/include/xen/sched.h >> @@ -29,11 +29,6 @@ >> #include <public/vcpu.h> >> #include <public/event_channel.h> >> >> -#ifdef CONFIG_COMPAT >> -#include <compat/vcpu.h> >> -DEFINE_XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t); >> -#endif >> - >> /* >> * Stats >> * >> @@ -166,16 +161,7 @@ struct vcpu >> struct sched_unit *sched_unit; >> >> struct vcpu_runstate_info runstate; >> -#ifndef CONFIG_COMPAT >> -# define runstate_guest(v) ((v)->runstate_guest) >> - XEN_GUEST_HANDLE(vcpu_runstate_info_t) runstate_guest; /* guest address >> */ >> -#else >> -# define runstate_guest(v) ((v)->runstate_guest.native) >> - union { >> - XEN_GUEST_HANDLE(vcpu_runstate_info_t) native; >> - XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t) compat; >> - } runstate_guest; /* guest address */ >> -#endif >> + >> unsigned int new_state; >> >> /* Has the FPU been initialised? */ >> -- >> 2.17.1
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |