|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH] x86/boot: copy/clear sections more efficiently
Both the trampoline copy and BSS initialise can be performed more
efficiently by using 4-byte variants of the string operations.
On Intel systems with ERMSB (efficient rep movsb), this is no practical
difference. On all other systems, this is 4 times more efficient.
Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
CC: Jan Beulich <JBeulich@xxxxxxxx>
---
xen/arch/x86/boot/head.S | 9 +++++----
xen/arch/x86/xen.lds.S | 6 ++++++
2 files changed, 11 insertions(+), 4 deletions(-)
diff --git a/xen/arch/x86/boot/head.S b/xen/arch/x86/boot/head.S
index 0999997..6060ec2 100644
--- a/xen/arch/x86/boot/head.S
+++ b/xen/arch/x86/boot/head.S
@@ -128,7 +128,8 @@ __start:
mov $sym_phys(__bss_end),%ecx
sub %edi,%ecx
xor %eax,%eax
- rep stosb
+ shr $2,%ecx
+ rep stosl
/* Interrogate CPU extended features via CPUID. */
mov $0x80000000,%eax
@@ -192,8 +193,8 @@ __start:
/* Copy bootstrap trampoline to low memory, below 1MB. */
mov $sym_phys(trampoline_start),%esi
- mov $trampoline_end - trampoline_start,%ecx
- rep movsb
+ mov $((trampoline_end - trampoline_start) / 4),%ecx
+ rep movsl
/* Jump into the relocated trampoline. */
lret
@@ -205,6 +206,6 @@ reloc:
ENTRY(trampoline_start)
#include "trampoline.S"
-GLOBAL(trampoline_end)
+ENTRY(trampoline_end)
#include "x86_64.S"
diff --git a/xen/arch/x86/xen.lds.S b/xen/arch/x86/xen.lds.S
index 301fd8c..8731b39 100644
--- a/xen/arch/x86/xen.lds.S
+++ b/xen/arch/x86/xen.lds.S
@@ -240,6 +240,7 @@ SECTIONS
*(.bss.percpu.read_mostly)
. = ALIGN(SMP_CACHE_BYTES);
__per_cpu_data_end = .;
+ . = ALIGN(4);
__bss_end = .;
} :text
_end = . ;
@@ -320,3 +321,8 @@ ASSERT(IS_ALIGNED(cpu0_stack, STACK_SIZE), "cpu0_stack
misaligned")
ASSERT(IS_ALIGNED(__init_begin, PAGE_SIZE), "__init_begin misaligned")
ASSERT(IS_ALIGNED(__init_end, PAGE_SIZE), "__init_end misaligned")
+
+ASSERT(IS_ALIGNED(trampoline_start, 4), "trampoline_start misaligned")
+ASSERT(IS_ALIGNED(trampoline_end, 4), "trampoline_end misaligned")
+ASSERT(IS_ALIGNED(__bss_start, 4), "__bss_start misaligned")
+ASSERT(IS_ALIGNED(__bss_end, 4), "__bss_end misaligned")
--
2.1.4
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |