Following a change in Linux 2.6.33, make x86-32 always use __builtin_mem{cpy,set}() on gcc 4.0+. This particularly works around certain intermediate gcc revisions generating out-of-range-array-index warnings with the current inline implementation. It may be worthwhile considering to make this the case for x86-64 too. At the same time eliminate the redundant inline assembly in the C file, and instead use the inline functions coming from the header. Signed-off-by: Jan Beulich --- 2010-05-04.orig/xen/arch/x86/string.c 2007-11-26 16:57:03.000000000 +0100 +++ 2010-05-04/xen/arch/x86/string.c 2010-05-11 09:45:27.000000000 +0200 @@ -11,44 +11,13 @@ #undef memcpy void *memcpy(void *dest, const void *src, size_t n) { - long d0, d1, d2; - - asm volatile ( -#ifdef __i386__ - " rep movsl ; " -#else - " rep movsq ; " - " testb $4,%b4 ; " - " je 0f ; " - " movsl ; " - "0: ; " -#endif - " testb $2,%b4 ; " - " je 1f ; " - " movsw ; " - "1: testb $1,%b4 ; " - " je 2f ; " - " movsb ; " - "2: " - : "=&c" (d0), "=&D" (d1), "=&S" (d2) - : "0" (n/sizeof(long)), "q" (n), "1" (dest), "2" (src) - : "memory"); - - return dest; + return __variable_memcpy(dest, src, n); } #undef memset void *memset(void *s, int c, size_t n) { - long d0, d1; - - asm volatile ( - "rep stosb" - : "=&c" (d0), "=&D" (d1) - : "a" (c), "1" (s), "0" (n) - : "memory"); - - return s; + return __memset_generic(s, c, n); } #undef memmove --- 2010-05-04.orig/xen/include/asm-x86/string.h 2009-10-07 13:31:36.000000000 +0200 +++ 2010-05-04/xen/include/asm-x86/string.h 2010-05-11 10:21:04.000000000 +0200 @@ -16,6 +16,11 @@ static inline void *__variable_memcpy(vo return to; } +#define __HAVE_ARCH_MEMCPY +#if defined(__i386__) && __GNUC__ >= 4 +#define memcpy(t, f, n) __builtin_memcpy(t, f, n) +#else + /* * This looks horribly ugly, but the compiler can optimize it totally, * as the count is constant. @@ -95,7 +100,6 @@ static always_inline void * __constant_m return to; } -#define __HAVE_ARCH_MEMCPY /* align source to a 64-bit boundary */ static always_inline void *__var_memcpy(void *t, const void *f, size_t n) @@ -121,11 +125,13 @@ void *__memcpy(void *t, const void *f, s __var_memcpy((t),(f),(n))); } +#endif /* !__i386__ || __GNUC__ < 4 */ + /* Some version of gcc don't have this builtin. It's non-critical anyway. */ #define __HAVE_ARCH_MEMMOVE extern void *memmove(void *dest, const void *src, size_t n); -static inline void *__memset_generic(void *s, char c, size_t count) +static inline void *__memset_generic(void *s, int c, size_t count) { long d0, d1; __asm__ __volatile__ ( @@ -134,6 +140,11 @@ static inline void *__memset_generic(voi return s; } +#define __HAVE_ARCH_MEMSET +#if defined(__i386__) && __GNUC__ >= 4 +#define memset(s, c, n) __builtin_memset(s, c, n) +#else + /* we might want to write optimized versions of these later */ #define __constant_count_memset(s,c,count) __memset_generic((s),(c),(count)) @@ -238,11 +249,12 @@ static always_inline void *__constant_c_ #define MEMSET_PATTERN_MUL 0x01010101UL #endif -#define __HAVE_ARCH_MEMSET #define memset(s, c, count) (__memset((s),(c),(count))) #define __memset(s, c, count) \ (__builtin_constant_p(c) ? \ __constant_c_x_memset((s),(MEMSET_PATTERN_MUL*(unsigned char)(c)),(count)) : \ __var_x_memset((s),(c),(count))) +#endif /* !__i386__ || __GNUC__ < 4 */ + #endif /* __X86_STRING_H__ */