[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] linux-2.6.18/x86-64: provide a memset() that can deal with 4Gb or above at a time


  • To: "xen-devel" <xen-devel@xxxxxxxxxxxxx>
  • From: "Jan Beulich" <JBeulich@xxxxxxxx>
  • Date: Fri, 23 Mar 2012 11:19:34 +0000
  • Delivery-date: Fri, 23 Mar 2012 11:19:06 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xen.org>

Now that a corresponding change got accepted into Linux 3.4, let's fix
this in our code too. It is particularly required by the memset()
invoked from __alloc_bootmem_core(), which can be called with sizes
beyond 4Gb out of alloc_node_mem_map() when CONFIG_FLAT_NODE_MEM_MAP is
defined (starting at around 300Gb).

In order to not affect the native kernel (which is unlikely to be
affected anyway, as it usually sets up separate maps for each node [as
long as NUMA is defined], and hence would require said amount of memory
per node [and SPARSEMEM not to be used] for the problem to become
visible, plus in this tree we're not really concerned about fixing
native problems), introduce a Xen-specific clone of the original file.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>

--- /dev/null
+++ b/arch/x86_64/lib/memset-xen.S
@@ -0,0 +1,122 @@
+/* Copyright 2002 Andi Kleen, SuSE Labs */
+/*
+ * ISO C memset - set a memory block to a byte value.
+ *
+ * rdi   destination
+ * rsi   value (char)
+ * rdx   count (bytes)
+ *
+ * rax   original destination
+ */
+       .globl __memset
+       .globl memset
+       .p2align 4
+memset:
+__memset:
+       movq %rdi,%r10
+
+       /* expand byte value  */
+       movzbl %sil,%ecx
+       movabs $0x0101010101010101,%rax
+       imulq  %rcx,%rax
+
+       /* align dst */
+       movl  %edi,%r9d
+       andl  $7,%r9d
+       jnz  .Lbad_alignment
+.Lafter_bad_alignment:
+
+       movq  %rdx,%rcx
+       shrq  $6,%rcx
+       jz       .Lhandle_tail
+
+       .p2align 4
+.Lloop_64:
+       decq  %rcx
+       movq  %rax,(%rdi)
+       movq  %rax,8(%rdi)
+       movq  %rax,16(%rdi)
+       movq  %rax,24(%rdi)
+       movq  %rax,32(%rdi)
+       movq  %rax,40(%rdi)
+       movq  %rax,48(%rdi)
+       movq  %rax,56(%rdi)
+       leaq  64(%rdi),%rdi
+       jnz    .Lloop_64
+
+       /* Handle tail in loops. The loops should be faster than hard
+          to predict jump tables. */
+       .p2align 4
+.Lhandle_tail:
+       movl    %edx,%ecx
+       andl    $63&(~7),%ecx
+       jz              .Lhandle_7
+       shrl    $3,%ecx
+       .p2align 4
+.Lloop_8:
+       decl   %ecx
+       movq  %rax,(%rdi)
+       leaq  8(%rdi),%rdi
+       jnz    .Lloop_8
+
+.Lhandle_7:
+       andl    $7,%edx
+       jz      .Lende
+       .p2align 4
+.Lloop_1:
+       decl    %edx
+       movb    %al,(%rdi)
+       leaq    1(%rdi),%rdi
+       jnz     .Lloop_1
+
+.Lende:
+       movq    %r10,%rax
+       ret
+
+.Lbad_alignment:
+       cmpq $7,%rdx
+       jbe     .Lhandle_7
+       movq %rax,(%rdi)        /* unaligned store */
+       movq $8,%r8
+       subq %r9,%r8
+       addq %r8,%rdi
+       subq %r8,%rdx
+       jmp .Lafter_bad_alignment
+
+       /* Some CPUs run faster using the string instructions.
+          It is also a lot simpler. Use this when possible */
+
+#include <asm/cpufeature.h>
+
+       .section .altinstructions,"a"
+       .align 8
+       .quad  memset
+       .quad  memset_c
+       .byte  X86_FEATURE_REP_GOOD
+       .byte  memset_c_end-memset_c
+       .byte  memset_c_end-memset_c
+       .previous
+
+       .section .altinstr_replacement,"ax"
+ /* rdi        destination
+  * rsi value
+  * rdx count
+  */
+memset_c:
+       movq %rdi,%r9
+       movq %rdx,%rcx
+       andl $7,%edx
+       shrq $3,%rcx
+       /* expand byte value  */
+       movzbl %sil,%esi
+       movabs $0x0101010101010101,%rax
+       imulq %rsi,%rax
+       rep
+       stosq
+       movl %edx,%ecx
+       rep
+       stosb
+       movq %r9,%rax
+       ret
+memset_c_end:
+       .previous



Attachment: xen-x86_64-memset.patch
Description: Text document

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.