arm64: fix fls()

It using CLZ on a 64-bit register while specifying the input operand as
only 32 bits wide is wrong: An operand intentionally shrunk down to 32
bits at the source level doesn't imply respective zero extension also
happens at the machine instruction level, and hence the wrong result
could get returned.

Add suitable inline assembly abstraction so that the function can
remain shared between arm32 and arm64. The need to include asm_defns.h
in bitops.h makes it necessary to adjust processor.h though - it is
generally wrong to include public headers without making sure that
integer types are properly defined. (I didn't innvestigate or try
whether the possible alternative of moving the public/arch-arm.h
inclusion down in the file would also work.)

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---
Note: Only compile tested due to lack of suitable ARM64 test environment.

--- a/xen/include/asm-arm/asm_defns.h
+++ b/xen/include/asm-arm/asm_defns.h
@@ -7,6 +7,15 @@
 #endif
 #include <asm/processor.h>
 
+/* For generic assembly code: use macros to define operand sizes. */
+#if defined(CONFIG_ARM_32)
+# define __OP32
+#elif defined(CONFIG_ARM_64)
+# define __OP32 "w"
+#else
+# error "unknown ARM variant"
+#endif
+
 #endif /* __ARM_ASM_DEFNS_H__ */
 /*
  * Local variables:
--- a/xen/include/asm-arm/bitops.h
+++ b/xen/include/asm-arm/bitops.h
@@ -9,6 +9,8 @@
 #ifndef _ARM_BITOPS_H
 #define _ARM_BITOPS_H
 
+#include <asm/asm_defns.h>
+
 /*
  * Non-atomic bit manipulation.
  *
@@ -111,9 +113,8 @@ static inline int fls(unsigned int x)
         if (__builtin_constant_p(x))
                return generic_fls(x);
 
-        asm("clz\t%0, %1" : "=r" (ret) : "r" (x));
-        ret = BITS_PER_LONG - ret;
-        return ret;
+        asm("clz\t%"__OP32"0, %"__OP32"1" : "=r" (ret) : "r" (x));
+        return 32 - ret;
 }
 
 
--- a/xen/include/asm-arm/processor.h
+++ b/xen/include/asm-arm/processor.h
@@ -3,6 +3,9 @@
 
 #include <asm/cpregs.h>
 #include <asm/sysregs.h>
+#ifndef __ASSEMBLY__
+#include <xen/types.h>
+#endif
 #include <public/arch-arm.h>
 
 /* MIDR Main ID Register */
@@ -220,8 +223,6 @@
 
 #ifndef __ASSEMBLY__
 
-#include <xen/types.h>
-
 struct cpuinfo_arm {
     union {
         uint32_t bits;