|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH 1/2] VMX: fix VMCS race on context-switch paths
On Tue, 2017-02-14 at 03:28 -0700, Jan Beulich wrote:
> When __context_switch() is being bypassed during original context
> switch handling, the vCPU "owning" the VMCS partially loses control of
> it: It will appear non-running to remote CPUs, and hence their attempt
> to pause the owning vCPU will have no effect on it (as it already
> looks to be paused). At the same time the "owning" CPU will re-enable
> interrupts eventually (the lastest when entering the idle loop) and
> hence becomes subject to IPIs from other CPUs requesting access to the
> VMCS. As a result, when __context_switch() finally gets run, the CPU
> may no longer have the VMCS loaded, and hence any accesses to it would
> fail. Hence we may need to re-load the VMCS in vmx_ctxt_switch_from().
>
> Similarly, when __context_switch() is being bypassed also on the second
> (switch-in) path, VMCS ownership may have been lost and hence needs
> re-establishing. Since there's no existing hook to put this in, add a
> new one.
>
> Reported-by: Kevin Mayer <Kevin.Mayer@xxxxxxxx>
> Reported-by: Anshul Makkar <anshul.makkar@xxxxxxxxxx>
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
>
> --- a/xen/arch/x86/domain.c
> +++ b/xen/arch/x86/domain.c
> @@ -2098,11 +2098,14 @@ void context_switch(struct vcpu *prev, s
>
> set_current(next);
>
> - if ( (per_cpu(curr_vcpu, cpu) == next) ||
> - (is_idle_domain(nextd) && cpu_online(cpu)) )
> + if ( (per_cpu(curr_vcpu, cpu) == next) )
> {
> + if ( next->arch.ctxt_switch_same )
> + next->arch.ctxt_switch_same(next);
> local_irq_enable();
> }
> + else if ( is_idle_domain(nextd) && cpu_online(cpu) )
> + local_irq_enable();
> else
> {
> __context_switch();
> --- a/xen/arch/x86/hvm/vmx/vmcs.c
> +++ b/xen/arch/x86/hvm/vmx/vmcs.c
> @@ -552,6 +552,27 @@ static void vmx_load_vmcs(struct vcpu *v
> local_irq_restore(flags);
> }
>
> +void vmx_vmcs_reload(struct vcpu *v)
> +{
> + /*
> + * As we're running with interrupts disabled, we can't acquire
> + * v->arch.hvm_vmx.vmcs_lock here. However, with interrupts disabled
> + * the VMCS can't be taken away from us anymore if we still own it.
> + */
> + ASSERT(!local_irq_is_enabled());
> + if ( v->arch.hvm_vmx.vmcs_pa == this_cpu(current_vmcs) )
> + return;
> + ASSERT(!this_cpu(current_vmcs));
> +
> + /*
> + * Wait for the remote side to be done with the VMCS before loading
> + * it here.
> + */
> + while ( v->arch.hvm_vmx.active_cpu != -1 )
> + cpu_relax();
> + vmx_load_vmcs(v);
> +}
> +
> int vmx_cpu_up_prepare(unsigned int cpu)
> {
> /*
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -298,6 +298,7 @@ static int vmx_vcpu_initialise(struct vc
> v->arch.schedule_tail = vmx_do_resume;
> v->arch.ctxt_switch_from = vmx_ctxt_switch_from;
> v->arch.ctxt_switch_to = vmx_ctxt_switch_to;
> + v->arch.ctxt_switch_same = vmx_vmcs_reload;
>
> if ( (rc = vmx_create_vmcs(v)) != 0 )
> {
> @@ -936,6 +937,18 @@ static void vmx_ctxt_switch_from(struct
> if ( unlikely(!this_cpu(vmxon)) )
> return;
>
> + if ( !v->is_running )
> + {
> + /*
> + * When this vCPU isn't marked as running anymore, a remote pCPU's
> + * attempt to pause us (from vmx_vmcs_enter()) won't have a reason
> + * to spin in vcpu_sleep_sync(), and hence that pCPU might have taken
> + * away the VMCS from us. As we're running with interrupts disabled,
> + * we also can't call vmx_vmcs_enter().
> + */
> + vmx_vmcs_reload(v);
> + }
> +
> vmx_fpu_leave(v);
> vmx_save_guest_msrs(v);
> vmx_restore_host_msrs();
> --- a/xen/include/asm-x86/domain.h
> +++ b/xen/include/asm-x86/domain.h
> @@ -514,6 +514,7 @@ struct arch_vcpu
>
> void (*ctxt_switch_from) (struct vcpu *);
> void (*ctxt_switch_to) (struct vcpu *);
> + void (*ctxt_switch_same) (struct vcpu *);
>
> struct vpmu_struct vpmu;
>
> --- a/xen/include/asm-x86/hvm/vmx/vmcs.h
> +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
> @@ -174,6 +174,7 @@ void vmx_destroy_vmcs(struct vcpu *v);
> void vmx_vmcs_enter(struct vcpu *v);
> bool_t __must_check vmx_vmcs_try_enter(struct vcpu *v);
> void vmx_vmcs_exit(struct vcpu *v);
> +void vmx_vmcs_reload(struct vcpu *v);
>
> #define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004
> #define CPU_BASED_USE_TSC_OFFSETING 0x00000008
>
>
Using the above patch with the following change for Xen-4.6.1:
- if ( v->arch.hvm_vmx.vmcs_pa == this_cpu(current_vmcs) )
+ if ( v->arch.hvm_vmx.vmcs == this_cpu(current_vmcs) )
This is what I'm getting during the original test case (32 VMs reboot):
(XEN) [ 1407.789329] Watchdog timer detects that CPU12 is stuck!
(XEN) [ 1407.795726] ----[ Xen-4.6.1-xs-local x86_64 debug=n Not tainted
]----
(XEN) [ 1407.803774] CPU: 12
(XEN) [ 1407.806975] RIP: e008:[<ffff82d0801ea2a2>] vmx_vmcs_reload+0x32/0x50
(XEN) [ 1407.814926] RFLAGS: 0000000000000013 CONTEXT: hypervisor (d230v0)
(XEN) [ 1407.822486] rax: 0000000000000000 rbx: ffff830079ee7000 rcx:
0000000000000000
(XEN) [ 1407.831407] rdx: 0000006f8f72ce00 rsi: ffff8329b3efbfe8 rdi:
ffff830079ee7000
(XEN) [ 1407.840326] rbp: ffff83007bab7000 rsp: ffff83400fab7dc8 r8:
000001468e9e3ccc
(XEN) [ 1407.849246] r9: ffff83403ffe7000 r10: 00000146c91c1737 r11:
ffff833a9558c310
(XEN) [ 1407.858166] r12: ffff833a9558c000 r13: 000000000000000c r14:
ffff83403ffe7000
(XEN) [ 1407.867085] r15: ffff82d080364640 cr0: 0000000080050033 cr4:
00000000003526e0
(XEN) [ 1407.876005] cr3: 000000294b074000 cr2: 000007fefd7ce150
(XEN) [ 1407.882599] ds: 0000 es: 0000 fs: 0000 gs: 0000 ss: 0000 cs:
e008
(XEN) [ 1407.890938] Xen code around <ffff82d0801ea2a2>
(vmx_vmcs_reload+0x32/0x50):
(XEN) [ 1407.899277] 84 00 00 00 00 00 f3 90 <83> bf e8 05 00 00 ff 75 f5 e9
a0 fa ff ff f3 c3
(XEN) [ 1407.908679] Xen stack trace from rsp=ffff83400fab7dc8:
(XEN) [ 1407.914982] ffff82d08016c58d 0000000000001000 0000000000000000
0000000000000000
(XEN) [ 1407.923998] 0000000000000206 0000000000000086 0000000000000286
000000000000000c
(XEN) [ 1407.933017] ffff83007bab7058 ffff82d080364640 ffff83007bab7000
00000146a7f26495
(XEN) [ 1407.942032] ffff830079ee7000 ffff833a9558cf84 ffff833a9558c000
ffff82d080364640
(XEN) [ 1407.951048] ffff82d08012fb8e ffff83400fabda98 ffff83400faba148
ffff83403ffe7000
(XEN) [ 1407.960067] ffff83400faba160 ffff83400fabda40 ffff82d080164305
000000000000000c
(XEN) [ 1407.969083] ffff830079ee7000 0000000001c9c380 ffff82d080136400
000000440000011d
(XEN) [ 1407.978101] 00000000ffffffff ffffffffffffffff ffff83400fab0000
ffff82d080348d00
(XEN) [ 1407.987116] ffff833a9558c000 ffff82d080364640 ffff82d08013311c
ffff830079ee7000
(XEN) [ 1407.996134] ffff83400fab0000 ffff830079ee7000 ffff83403ffe7000
00000000ffffffff
(XEN) [ 1408.005151] ffff82d080167d35 ffff83007bab7000 0000000000000001
fffffa80077f9700
(XEN) [ 1408.014167] fffffa80075bf900 fffffa80077f9820 0000000000000000
0000000000000000
(XEN) [ 1408.023184] fffffa8008889c00 0000000002fa1e78 0000003b6ed18d78
0000000000000000
(XEN) [ 1408.032202] 00000000068e7780 fffffa80075ba790 fffffa80077f9848
fffff800027f9e80
(XEN) [ 1408.041220] 0000000000000001 000000fc00000000 fffff880042499c2
0000000000000000
(XEN) [ 1408.050235] 0000000000000246 fffff80000b9cb58 0000000000000000
80248e00e008e1f0
(XEN) [ 1408.059253] 00000000ffff82d0 80248e00e008e200 00000000ffff82d0
80248e000000000c
(XEN) [ 1408.068268] ffff830079ee7000 0000006f8f72ce00 00000000ffff82d0
(XEN) [ 1408.075638] Xen call trace:
(XEN) [ 1408.079322] [<ffff82d0801ea2a2>] vmx_vmcs_reload+0x32/0x50
(XEN) [ 1408.086303] [<ffff82d08016c58d>] context_switch+0x85d/0xeb0
(XEN) [ 1408.093380] [<ffff82d08012fb8e>] schedule.c#schedule+0x46e/0x7d0
(XEN) [ 1408.100942] [<ffff82d080164305>] reprogram_timer+0x75/0xe0
(XEN) [ 1408.107925] [<ffff82d080136400>]
timer.c#timer_softirq_action+0x90/0x210
(XEN) [ 1408.116263] [<ffff82d08013311c>] softirq.c#__do_softirq+0x5c/0x90
(XEN) [ 1408.123921] [<ffff82d080167d35>] domain.c#idle_loop+0x25/0x60
Currently I'm testing the following patch that fixes at least one possible
scenario:
commit f76dc832c2de4950872fc27962c56c609cff1160
Author: Sergey Dyasli <sergey.dyasli@xxxxxxxxxx>
Date: Tue Feb 14 15:23:54 2017 +0000
x86/vmx: use curr_vcpu in vmx_vmcs_exit()
During context_switch() from a HVM vCPU to the idle vCPU, current is
updated but curr_vcpu retains the HMV vCPU's value. If after that,
for some reason, vmx_vmcs_enter() + vmx_vmcs_exit() pair will be
executed, the test for has_hvm_container_vcpu(current) will fail and
vmcs for curr_vcpu will not be loaded.
This will lead to BUG() during the next __context_switch() when
vmx_ctxt_switch_from() will be executed and __vmwrite() will fail.
Fix this by testing has_hvm_container_vcpu() against curr_vcpu.
Signed-off-by: Sergey Dyasli <sergey.dyasli@xxxxxxxxxx>
diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index 88db7ee..f0d71ae 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -742,6 +742,8 @@ void vmx_vmcs_enter(struct vcpu *v)
void vmx_vmcs_exit(struct vcpu *v)
{
struct foreign_vmcs *fv;
+ unsigned int cpu = smp_processor_id();
+ struct vcpu *p = per_cpu(curr_vcpu, cpu);
if ( likely(v == current) )
return;
@@ -754,8 +756,8 @@ void vmx_vmcs_exit(struct vcpu *v)
{
/* Don't confuse vmx_do_resume (for @v or @current!) */
vmx_clear_vmcs(v);
- if ( has_hvm_container_vcpu(current) )
- vmx_load_vmcs(current);
+ if ( has_hvm_container_vcpu(p) )
+ vmx_load_vmcs(p);
spin_unlock(&v->arch.hvm_vmx.vmcs_lock);
vcpu_unpause(v);
So far no crashes observed but testing continues.
--
Thanks,
Sergey
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |