[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 15/21] x86/boot: choose AP stack based on APIC ID



From: Krystian Hebel <krystian.hebel@xxxxxxxxx>

This is made as the first step of making parallel AP bring-up possible.
It should be enough for pre-C code.

Parallel AP bring-up is necessary because TXT by design releases all APs
at once. In addition to that it reduces number of IPIs (and more
importantly, delays between them) required to start all logical
processors. This results in significant reduction of boot time, even
when DRTM is not used, with performance gain growing with the number of
logical CPUs.

Signed-off-by: Krystian Hebel <krystian.hebel@xxxxxxxxx>
Signed-off-by: Sergii Dmytruk <sergii.dmytruk@xxxxxxxxx>
---
 xen/arch/x86/boot/head.S             |  1 +
 xen/arch/x86/boot/trampoline.S       | 21 +++++++++++++++++++++
 xen/arch/x86/boot/x86_64.S           | 28 +++++++++++++++++++++++++++-
 xen/arch/x86/include/asm/apicdef.h   |  4 ++++
 xen/arch/x86/include/asm/msr-index.h |  3 +++
 xen/arch/x86/setup.c                 |  7 +++++++
 6 files changed, 63 insertions(+), 1 deletion(-)

diff --git a/xen/arch/x86/boot/head.S b/xen/arch/x86/boot/head.S
index 0b7903070a..419bf58d5c 100644
--- a/xen/arch/x86/boot/head.S
+++ b/xen/arch/x86/boot/head.S
@@ -8,6 +8,7 @@
 #include <asm/page.h>
 #include <asm/processor.h>
 #include <asm/msr-index.h>
+#include <asm/apicdef.h>
 #include <asm/cpufeature.h>
 #include <asm/trampoline.h>
 
diff --git a/xen/arch/x86/boot/trampoline.S b/xen/arch/x86/boot/trampoline.S
index a92e399fbe..ed593acc46 100644
--- a/xen/arch/x86/boot/trampoline.S
+++ b/xen/arch/x86/boot/trampoline.S
@@ -71,6 +71,27 @@ trampoline_protmode_entry:
         mov     $X86_CR4_PAE,%ecx
         mov     %ecx,%cr4
 
+        /*
+         * Get APIC ID while we're in non-paged mode. Start by checking if
+         * x2APIC is enabled.
+         */
+        mov     $MSR_APIC_BASE, %ecx
+        rdmsr
+        test    $APIC_BASE_EXTD, %eax
+        jnz     .Lx2apic
+
+        /* Not x2APIC, read from MMIO */
+        and     $APIC_BASE_ADDR_MASK, %eax
+        mov     APIC_ID(%eax), %esp
+        shr     $24, %esp
+        jmp     1f
+
+.Lx2apic:
+        mov     $(MSR_X2APIC_FIRST + (APIC_ID >> MSR_X2APIC_SHIFT)), %ecx
+        rdmsr
+        mov     %eax, %esp
+1:
+
         /* Load pagetable base register. */
         mov     $sym_offs(idle_pg_table),%eax
         add     bootsym_rel(trampoline_xen_phys_start,4,%eax)
diff --git a/xen/arch/x86/boot/x86_64.S b/xen/arch/x86/boot/x86_64.S
index 08ae97e261..ac33576d8f 100644
--- a/xen/arch/x86/boot/x86_64.S
+++ b/xen/arch/x86/boot/x86_64.S
@@ -15,7 +15,33 @@ ENTRY(__high_start)
         mov     $XEN_MINIMAL_CR4,%rcx
         mov     %rcx,%cr4
 
-        mov     stack_start(%rip),%rsp
+        test    %ebx,%ebx
+        cmovz   stack_start(%rip), %rsp
+        jz      .L_stack_set
+
+        /* APs only: get stack base from APIC ID saved in %esp. */
+        mov     $-1, %rax
+        lea     x86_cpu_to_apicid(%rip), %rcx
+1:
+        add     $1, %rax
+        cmp     $NR_CPUS, %eax
+        jb      2f
+        hlt
+2:
+        cmp     %esp, (%rcx, %rax, 4)
+        jne     1b
+
+        /* %eax is now Xen CPU index. */
+        lea     stack_base(%rip), %rcx
+        mov     (%rcx, %rax, 8), %rsp
+
+        test    %rsp,%rsp
+        jnz     1f
+        hlt
+1:
+        add     $(STACK_SIZE - CPUINFO_sizeof), %rsp
+
+.L_stack_set:
 
         /* Reset EFLAGS (subsumes CLI and CLD). */
         pushq   $0
diff --git a/xen/arch/x86/include/asm/apicdef.h 
b/xen/arch/x86/include/asm/apicdef.h
index 63dab01dde..e093a2aa3c 100644
--- a/xen/arch/x86/include/asm/apicdef.h
+++ b/xen/arch/x86/include/asm/apicdef.h
@@ -121,6 +121,10 @@
 
 #define MAX_IO_APICS 128
 
+#ifndef __ASSEMBLY__
+
 extern bool x2apic_enabled;
 
+#endif /* !__ASSEMBLY__ */
+
 #endif
diff --git a/xen/arch/x86/include/asm/msr-index.h 
b/xen/arch/x86/include/asm/msr-index.h
index 22d9e76e55..794cf44abe 100644
--- a/xen/arch/x86/include/asm/msr-index.h
+++ b/xen/arch/x86/include/asm/msr-index.h
@@ -169,6 +169,9 @@
 #define MSR_X2APIC_FIRST                    0x00000800
 #define MSR_X2APIC_LAST                     0x000008ff
 
+/* MSR offset can be obtained by shifting MMIO offset this number of bits to 
the right. */
+#define MSR_X2APIC_SHIFT                    4
+
 #define MSR_X2APIC_TPR                      0x00000808
 #define MSR_X2APIC_PPR                      0x0000080a
 #define MSR_X2APIC_EOI                      0x0000080b
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index 403d976449..c6ebdc3c6b 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -2068,6 +2068,7 @@ void asmlinkage __init noreturn __start_xen(void)
      */
     if ( !pv_shim )
     {
+        /* Separate loop to make parallel AP bringup possible. */
         for_each_present_cpu ( i )
         {
             /* Set up cpu_to_node[]. */
@@ -2075,6 +2076,12 @@ void asmlinkage __init noreturn __start_xen(void)
             /* Set up node_to_cpumask based on cpu_to_node[]. */
             numa_add_cpu(i);
 
+            if ( stack_base[i] == NULL )
+                stack_base[i] = cpu_alloc_stack(i);
+        }
+
+        for_each_present_cpu ( i )
+        {
             if ( (park_offline_cpus || num_online_cpus() < max_cpus) &&
                  !cpu_online(i) )
             {
-- 
2.49.0




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.