[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 2/7] x86/wait: prevent duplicated assembly labels



When enabling UBSAN with clang, the following error is triggered during the
build:

common/wait.c:154:9: error: symbol '.L_wq_resume' is already defined
  154 |         "push %%rbx; push %%rbp; push %%r12;"
      |         ^
<inline asm>:1:121: note: instantiated into assembly here
    1 |         push %rbx; push %rbp; push %r12;push %r13; push %r14; push 
%r15;sub %esp,%ecx;cmp $4096, %ecx;ja .L_skip;mov %rsp,%rsi;.L_wq_resume: rep 
movsb;mov %rsp,%rsi;.L_skip:pop %r15; pop %r14; pop %r13;pop %r12; pop %rbp; 
pop %rbx
      |                                                                         
                                                       ^
common/wait.c:154:9: error: symbol '.L_skip' is already defined
  154 |         "push %%rbx; push %%rbp; push %%r12;"
      |         ^
<inline asm>:1:159: note: instantiated into assembly here
    1 |         push %rbx; push %rbp; push %r12;push %r13; push %r14; push 
%r15;sub %esp,%ecx;cmp $4096, %ecx;ja .L_skip;mov %rsp,%rsi;.L_wq_resume: rep 
movsb;mov %rsp,%rsi;.L_skip:pop %r15; pop %r14; pop %r13;pop %r12; pop %rbp; 
pop %rbx
      |                                                                         
                                                                                
             ^
2 errors generated.

The inline assembly block in __prepare_to_wait() is duplicated, thus
leading to multiple definitions of the otherwise unique labels inside the
assembly block.  GCC extended-asm documentation notes the possibility of
duplicating asm blocks:

> Under certain circumstances, GCC may duplicate (or remove duplicates of)
> your assembly code when optimizing. This can lead to unexpected duplicate
> symbol errors during compilation if your asm code defines symbols or
> labels. Using ‘%=’ (see AssemblerTemplate) may help resolve this problem.

Move the assembly blocks that deal with saving and restoring the current
CPU context into it's own explicitly non-inline functions.  This prevents
clang from duplicating the assembly blocks.  Just using noinline attribute
seems to be enough to prevent assembly duplication, in the future noclone
might also be required if asm block duplication issues arise again.

Additionally, add a small self-test to ensure the consistency of the
context save and restore logic.

Link: https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html
Link: https://github.com/llvm/llvm-project/issues/92161
Signed-off-by: Roger Pau Monné <roger.pau@xxxxxxxxxx>
---
 xen/common/wait.c | 111 +++++++++++++++++++++++++++++++---------------
 1 file changed, 76 insertions(+), 35 deletions(-)

diff --git a/xen/common/wait.c b/xen/common/wait.c
index cb6f5ff3c20a..2fcbbe8d0c71 100644
--- a/xen/common/wait.c
+++ b/xen/common/wait.c
@@ -119,24 +119,16 @@ void wake_up_all(struct waitqueue_head *wq)
 
 #ifdef CONFIG_X86
 
-static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
+/*
+ * context_save() must strictly be noinline, as to avoid multiple callers from
+ * inlining the code, thus duplicating the label and triggering an assembler
+ * error about duplicated labels.
+ */
+static void noinline context_save(struct waitqueue_vcpu *wqv)
 {
     struct cpu_info *cpu_info = get_cpu_info();
-    struct vcpu *curr = current;
     unsigned long dummy;
 
-    ASSERT(wqv->esp == NULL);
-
-    /* Save current VCPU affinity; force wakeup on *this* CPU only. */
-    if ( vcpu_temporary_affinity(curr, smp_processor_id(), VCPU_AFFINITY_WAIT) 
)
-    {
-        gdprintk(XENLOG_ERR, "Unable to set vcpu affinity\n");
-        domain_crash(curr->domain);
-
-        for ( ; ; )
-            do_softirq();
-    }
-
     /*
      * Hand-rolled setjmp().
      *
@@ -170,6 +162,54 @@ static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
         : "0" (0), "1" (cpu_info), "2" (wqv->stack),
           [sz] "i" (PAGE_SIZE)
         : "memory", "rax", "rdx", "r8", "r9", "r10", "r11" );
+}
+
+/*
+ * Since context_save() is noinline, context_restore() must also be noinline,
+ * to balance the RET vs CALL instructions.
+ */
+static void noinline noreturn context_restore(struct waitqueue_vcpu *wqv)
+{
+    /*
+     * Hand-rolled longjmp().
+     *
+     * check_wakeup_from_wait() is always called with a shallow stack,
+     * immediately after the vCPU has been rescheduled.
+     *
+     * Adjust %rsp to be the correct depth for the (deeper) stack we want to
+     * restore, then prepare %rsi, %rdi and %rcx such that when we rejoin the
+     * rep movs in __prepare_to_wait(), it copies from wqv->stack over the
+     * active stack.
+     *
+     * All other GPRs are available for use; They're restored from the stack,
+     * or explicitly clobbered.
+     */
+    asm volatile ( "mov %%rdi, %%rsp;"
+                   "jmp .L_wq_resume"
+                   :
+                   : "S" (wqv->stack), "D" (wqv->esp),
+                     "c" ((char *)get_cpu_info() - (char *)wqv->esp)
+                   : "memory" );
+    unreachable();
+}
+
+static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
+{
+    struct vcpu *curr = current;
+
+    ASSERT(wqv->esp == NULL);
+
+    /* Save current VCPU affinity; force wakeup on *this* CPU only. */
+    if ( vcpu_temporary_affinity(curr, smp_processor_id(), VCPU_AFFINITY_WAIT) 
)
+    {
+        gdprintk(XENLOG_ERR, "Unable to set vcpu affinity\n");
+        domain_crash(curr->domain);
+
+        for ( ; ; )
+            do_softirq();
+    }
+
+    context_save(wqv);
 
     if ( unlikely(wqv->esp == NULL) )
     {
@@ -229,30 +269,31 @@ void check_wakeup_from_wait(void)
      *
      * Therefore, no actions are necessary here to maintain RSB safety.
      */
-
-    /*
-     * Hand-rolled longjmp().
-     *
-     * check_wakeup_from_wait() is always called with a shallow stack,
-     * immediately after the vCPU has been rescheduled.
-     *
-     * Adjust %rsp to be the correct depth for the (deeper) stack we want to
-     * restore, then prepare %rsi, %rdi and %rcx such that when we rejoin the
-     * rep movs in __prepare_to_wait(), it copies from wqv->stack over the
-     * active stack.
-     *
-     * All other GPRs are available for use; They're restored from the stack,
-     * or explicitly clobbered.
-     */
-    asm volatile ( "mov %%rdi, %%rsp;"
-                   "jmp .L_wq_resume"
-                   :
-                   : "S" (wqv->stack), "D" (wqv->esp),
-                     "c" ((char *)get_cpu_info() - (char *)wqv->esp)
-                   : "memory" );
+    context_restore(wqv);
     unreachable();
 }
 
+#ifdef CONFIG_SELF_TESTS
+static void __init __constructor test_save_restore_ctx(void)
+{
+    static unsigned int __initdata count;
+    struct waitqueue_vcpu wqv = {};
+
+    wqv.stack = alloc_xenheap_page();
+    if ( !wqv.stack )
+        panic("unable to allocate memory for context selftest\n");
+
+    context_save(&wqv);
+    if ( !count++ )
+        context_restore(&wqv);
+
+    if ( count != 2 )
+        panic("context save and restore not working as expected\n");
+
+    free_xenheap_page(wqv.stack);
+}
+#endif
+
 #else /* !CONFIG_X86 */
 
 #define __prepare_to_wait(wqv) ((void)0)
-- 
2.48.1




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.