[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Xen-devel] [PATCH] arm: fixup hard tabs

To: <xen-devel@xxxxxxxxxxxxxxxxxxx>
From: Ian Campbell <ian.campbell@xxxxxxxxxx>
Date: Mon, 13 Feb 2012 18:03:49 +0000
Cc: stefano.stabellini@xxxxxxxxxx, tim@xxxxxxx, david.vrabel@xxxxxxxxxx
Delivery-date: Mon, 13 Feb 2012 18:04:28 +0000
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
# HG changeset patch
# User Ian Campbell <ian.campbell@xxxxxxxxxx>
# Date 1329153968 0
# Node ID 63e88a26e1ef58c8e5a2b30a003ab7c3bc9c6b54
# Parent  7fd8f10cfd3eaf9f0982eb6fd49334a1e229ba98
arm: fixup hard tabs

Unfortunately the tool I was using to apply patches mangles hard tabs. This
patch corrects this in the effected files (which is fortunately only a subset
of .S or files imported from Linux)

"git diff" and "git diff -b" vs. Stefano's v6 branch now contain the same
output -- i.e. only the intervening development

Signed-off-by: Ian Campbell <ian.campbell@xxxxxxxxxx>

diff -r 7fd8f10cfd3e -r 63e88a26e1ef xen/arch/arm/dtb.S
--- a/xen/arch/arm/dtb.S        Mon Feb 13 17:03:44 2012 +0000
+++ b/xen/arch/arm/dtb.S        Mon Feb 13 17:26:08 2012 +0000
@@ -1,2 +1,2 @@
-        .section .dtb,#alloc
-        .incbin CONFIG_DTB_FILE
+       .section .dtb,#alloc
+       .incbin CONFIG_DTB_FILE
diff -r 7fd8f10cfd3e -r 63e88a26e1ef xen/arch/arm/dummy.S
--- a/xen/arch/arm/dummy.S      Mon Feb 13 17:03:44 2012 +0000
+++ b/xen/arch/arm/dummy.S      Mon Feb 13 17:26:08 2012 +0000
@@ -1,13 +1,13 @@
 /* Nothing is mapped at 1G, for the moment */
 #define DUMMY(x) \
-       .globl x; \
-x:     .word 0xe7f000f0
-/* x:  mov r0, #0x40000000 ; str r0, [r0]; b x */
+       .globl x; \
+x:     .word 0xe7f000f0
+/* x:  mov r0, #0x40000000 ; str r0, [r0]; b x */
 
 #define  NOP(x) \
-       .globl x; \
-x:     mov pc, lr
-
+       .globl x; \
+x:     mov pc, lr
+       
 DUMMY(alloc_pirq_struct);
 DUMMY(alloc_vcpu_guest_context);
 DUMMY(arch_do_domctl);
diff -r 7fd8f10cfd3e -r 63e88a26e1ef xen/arch/arm/entry.S
--- a/xen/arch/arm/entry.S      Mon Feb 13 17:03:44 2012 +0000
+++ b/xen/arch/arm/entry.S      Mon Feb 13 17:26:08 2012 +0000
@@ -1,69 +1,69 @@
 #include <xen/config.h>
 #include <asm/asm_defns.h>
 
-#define SAVE_ONE_BANKED(reg)   mrs r11, reg; str r11, [sp, #UREGS_##reg]
-#define RESTORE_ONE_BANKED(reg)        ldr r11, [sp, #UREGS_##reg]; msr reg, 
r11
+#define SAVE_ONE_BANKED(reg)   mrs r11, reg; str r11, [sp, #UREGS_##reg]
+#define RESTORE_ONE_BANKED(reg)        ldr r11, [sp, #UREGS_##reg]; msr reg, 
r11
 
 #define SAVE_BANKED(mode) \
-       SAVE_ONE_BANKED(SP_##mode) ; SAVE_ONE_BANKED(LR_##mode) ; 
SAVE_ONE_BANKED(SPSR_##mode)
+       SAVE_ONE_BANKED(SP_##mode) ; SAVE_ONE_BANKED(LR_##mode) ; 
SAVE_ONE_BANKED(SPSR_##mode)
 
 #define RESTORE_BANKED(mode) \
-       RESTORE_ONE_BANKED(SP_##mode) ; RESTORE_ONE_BANKED(LR_##mode) ; 
RESTORE_ONE_BANKED(SPSR_##mode)
+       RESTORE_ONE_BANKED(SP_##mode) ; RESTORE_ONE_BANKED(LR_##mode) ; 
RESTORE_ONE_BANKED(SPSR_##mode)
 
-#define SAVE_ALL                                                               
                        \
-       sub sp, #(UREGS_R8_fiq - UREGS_sp); /* SP, LR, SPSR, PC */              
                        \
-       push {r0-r12}; /* Save R0-R12 */                                        
                        \
-                                                                               
                        \
-       mrs r11, ELR_hyp;               /* ELR_hyp is return address. */        
                        \
-       str r11, [sp, #UREGS_pc];                                               
                        \
-                                                                               
                        \
-       str lr, [sp, #UREGS_lr];                                                
                        \
-                                                                               
                        \
-       add r11, sp, #UREGS_kernel_sizeof+4;                                    
                        \
-       str r11, [sp, #UREGS_sp];                                               
                        \
-                                                                               
                        \
-       mrs r11, SPSR_hyp;                                                      
                        \
-       str r11, [sp, #UREGS_cpsr];                                             
                        \
-       and r11, #PSR_MODE_MASK;                                                
                        \
-       cmp r11, #PSR_MODE_HYP;                                                 
                        \
-       blne save_guest_regs
+#define SAVE_ALL                                                               
                        \
+       sub sp, #(UREGS_R8_fiq - UREGS_sp); /* SP, LR, SPSR, PC */              
                        \
+       push {r0-r12}; /* Save R0-R12 */                                        
                        \
+                                                                               
                        \
+       mrs r11, ELR_hyp;               /* ELR_hyp is return address. */        
                        \
+       str r11, [sp, #UREGS_pc];                                               
                        \
+                                                                               
                        \
+       str lr, [sp, #UREGS_lr];                                                
                        \
+                                                                               
                        \
+       add r11, sp, #UREGS_kernel_sizeof+4;                                    
                        \
+       str r11, [sp, #UREGS_sp];                                               
                        \
+                                                                               
                        \
+       mrs r11, SPSR_hyp;                                                      
                        \
+       str r11, [sp, #UREGS_cpsr];                                             
                        \
+       and r11, #PSR_MODE_MASK;                                                
                        \
+       cmp r11, #PSR_MODE_HYP;                                                 
                        \
+       blne save_guest_regs
 
 save_guest_regs:
-       ldr r11, [sp, #UREGS_lr]
-       str r11, [sp, #UREGS_LR_usr]
-       ldr r11, =0xffffffff  /* Clobber SP which is only valid for hypervisor 
frames. */
-       str r11, [sp, #UREGS_sp]
-       SAVE_ONE_BANKED(SP_usr)
-       SAVE_BANKED(svc)
-       SAVE_BANKED(abt)
-       SAVE_BANKED(und)
-       SAVE_BANKED(irq)
-       SAVE_BANKED(fiq)
-       SAVE_ONE_BANKED(R8_fiq); SAVE_ONE_BANKED(R9_fiq); 
SAVE_ONE_BANKED(R10_fiq)
-       SAVE_ONE_BANKED(R11_fiq); SAVE_ONE_BANKED(R12_fiq);
-       mov pc, lr
+       ldr r11, [sp, #UREGS_lr]
+       str r11, [sp, #UREGS_LR_usr]
+       ldr r11, =0xffffffff  /* Clobber SP which is only valid for hypervisor 
frames. */
+       str r11, [sp, #UREGS_sp]
+       SAVE_ONE_BANKED(SP_usr)
+       SAVE_BANKED(svc)
+       SAVE_BANKED(abt)
+       SAVE_BANKED(und)
+       SAVE_BANKED(irq)
+       SAVE_BANKED(fiq)
+       SAVE_ONE_BANKED(R8_fiq); SAVE_ONE_BANKED(R9_fiq); 
SAVE_ONE_BANKED(R10_fiq)
+       SAVE_ONE_BANKED(R11_fiq); SAVE_ONE_BANKED(R12_fiq);
+       mov pc, lr
 
-#define DEFINE_TRAP_ENTRY(trap)                                                
                                \
-       ALIGN;                                                                  
                        \
-trap_##trap:                                                                   
                        \
-       SAVE_ALL;                                                               
                        \
-       adr lr, return_from_trap;                                               
                        \
-       mov r0, sp;                                                             
                        \
-       mov r11, sp;                                                            
                        \
-       bic sp, #7; /* Align the stack pointer (noop on guest trap) */          
                        \
-       b do_trap_##trap
+#define DEFINE_TRAP_ENTRY(trap)                                                
                                \
+       ALIGN;                                                                  
                        \
+trap_##trap:                                                                   
                        \
+       SAVE_ALL;                                                               
                        \
+       adr lr, return_from_trap;                                               
                        \
+       mov r0, sp;                                                             
                        \
+       mov r11, sp;                                                            
                        \
+       bic sp, #7; /* Align the stack pointer (noop on guest trap) */          
                        \
+       b do_trap_##trap
 
 .globl hyp_traps_vector
-       .align 5
+       .align 5
 hyp_traps_vector:
-       .word 0                         /* 0x00 - Reset */
-       b trap_undefined_instruction    /* 0x04 - Undefined Instruction */
-       b trap_supervisor_call          /* 0x08 - Supervisor Call */
-       b trap_prefetch_abort           /* 0x0c - Prefetch Abort */
-       b trap_data_abort               /* 0x10 - Data Abort */
-       b trap_hypervisor               /* 0x14 - Hypervisor */
-       b trap_irq                      /* 0x18 - IRQ */
-       b trap_fiq                      /* 0x1c - FIQ */
+       .word 0                         /* 0x00 - Reset */
+       b trap_undefined_instruction    /* 0x04 - Undefined Instruction */
+       b trap_supervisor_call          /* 0x08 - Supervisor Call */
+       b trap_prefetch_abort           /* 0x0c - Prefetch Abort */
+       b trap_data_abort               /* 0x10 - Data Abort */
+       b trap_hypervisor               /* 0x14 - Hypervisor */
+       b trap_irq                      /* 0x18 - IRQ */
+       b trap_fiq                      /* 0x1c - FIQ */
 
 DEFINE_TRAP_ENTRY(undefined_instruction)
 DEFINE_TRAP_ENTRY(supervisor_call)
@@ -74,34 +74,34 @@ DEFINE_TRAP_ENTRY(irq)
 DEFINE_TRAP_ENTRY(fiq)
 
 ENTRY(return_from_trap)
-       ldr r11, [sp, #UREGS_cpsr]
-       and r11, #PSR_MODE_MASK
-       cmp r11, #PSR_MODE_HYP
-       beq return_to_hypervisor
+       ldr r11, [sp, #UREGS_cpsr]
+       and r11, #PSR_MODE_MASK
+       cmp r11, #PSR_MODE_HYP
+       beq return_to_hypervisor
 
 ENTRY(return_to_guest)
-       mov r11, sp
-       bic sp, #7 /* Align the stack pointer */
-       bl leave_hypervisor_tail
-       ldr r11, [sp, #UREGS_pc]
-       msr ELR_hyp, r11
-       ldr r11, [sp, #UREGS_cpsr]
-       msr SPSR_hyp, r11
-       RESTORE_ONE_BANKED(SP_usr)
-       RESTORE_BANKED(svc)
-       RESTORE_BANKED(abt)
-       RESTORE_BANKED(und)
-       RESTORE_BANKED(irq)
-       RESTORE_BANKED(fiq)
-       RESTORE_ONE_BANKED(R8_fiq); RESTORE_ONE_BANKED(R9_fiq); 
RESTORE_ONE_BANKED(R10_fiq)
-       RESTORE_ONE_BANKED(R11_fiq); RESTORE_ONE_BANKED(R12_fiq);
-       ldr lr, [sp, #UREGS_LR_usr]
-       pop {r0-r12}
-       add sp, #(UREGS_R8_fiq - UREGS_sp); /* SP, LR, SPSR, PC */
-       eret
+       mov r11, sp
+       bic sp, #7 /* Align the stack pointer */
+       bl leave_hypervisor_tail
+       ldr r11, [sp, #UREGS_pc]
+       msr ELR_hyp, r11
+       ldr r11, [sp, #UREGS_cpsr]
+       msr SPSR_hyp, r11
+       RESTORE_ONE_BANKED(SP_usr)
+       RESTORE_BANKED(svc)
+       RESTORE_BANKED(abt)
+       RESTORE_BANKED(und)
+       RESTORE_BANKED(irq)
+       RESTORE_BANKED(fiq)
+       RESTORE_ONE_BANKED(R8_fiq); RESTORE_ONE_BANKED(R9_fiq); 
RESTORE_ONE_BANKED(R10_fiq)
+       RESTORE_ONE_BANKED(R11_fiq); RESTORE_ONE_BANKED(R12_fiq);
+       ldr lr, [sp, #UREGS_LR_usr]
+       pop {r0-r12}
+       add sp, #(UREGS_R8_fiq - UREGS_sp); /* SP, LR, SPSR, PC */
+       eret
 
 ENTRY(return_to_hypervisor)
-       ldr lr, [sp, #UREGS_lr]
-       pop {r0-r12}
-       add sp, #(UREGS_R8_fiq - UREGS_sp); /* SP, LR, SPSR, PC */
-       eret
+       ldr lr, [sp, #UREGS_lr]
+       pop {r0-r12}
+       add sp, #(UREGS_R8_fiq - UREGS_sp); /* SP, LR, SPSR, PC */
+       eret
diff -r 7fd8f10cfd3e -r 63e88a26e1ef xen/arch/arm/head.S
--- a/xen/arch/arm/head.S       Mon Feb 13 17:03:44 2012 +0000
+++ b/xen/arch/arm/head.S       Mon Feb 13 17:26:08 2012 +0000
@@ -26,281 +26,281 @@
  * Clobbers r0-r3. */
 #ifdef EARLY_UART_ADDRESS
 #define PRINT(_s)       \
-       adr   r0, 98f ; \
-       bl    puts    ; \
-       b     99f     ; \
-98:    .asciz _s     ; \
-       .align 2      ; \
+       adr   r0, 98f ; \
+       bl    puts    ; \
+       b     99f     ; \
+98:    .asciz _s     ; \
+       .align 2      ; \
 99:
 #else
 #define PRINT(s)
 #endif
 
-       .arm
+       .arm
 
-       /* This must be the very first address in the loaded image.
-        * It should be linked at XEN_VIRT_START, and loaded at any
-        * 2MB-aligned address.  All of text+data+bss must fit in 2MB,
-        * or the initial pagetable code below will need adjustment. */
-       .global start
+       /* This must be the very first address in the loaded image.
+        * It should be linked at XEN_VIRT_START, and loaded at any
+        * 2MB-aligned address.  All of text+data+bss must fit in 2MB,
+        * or the initial pagetable code below will need adjustment. */
+       .global start
 start:
-       cpsid aif                    /* Disable all interrupts */
+       cpsid aif                    /* Disable all interrupts */
 
-       /* Save the bootloader arguments in less-clobberable registers */
-       mov   r7, r1                 /* r7 := ARM-linux machine type */
-       mov   r8, r2                 /* r8 := ATAG base address */
+       /* Save the bootloader arguments in less-clobberable registers */
+       mov   r7, r1                 /* r7 := ARM-linux machine type */
+       mov   r8, r2                 /* r8 := ATAG base address */
 
-       /* Find out where we are */
-       ldr   r0, =start
-       adr   r9, start              /* r9  := paddr (start) */
-       sub   r10, r9, r0            /* r10 := phys-offset */
+       /* Find out where we are */
+       ldr   r0, =start
+       adr   r9, start              /* r9  := paddr (start) */
+       sub   r10, r9, r0            /* r10 := phys-offset */
 
-        /* Using the DTB in the .dtb section? */
+       /* Using the DTB in the .dtb section? */
 #ifdef CONFIG_DTB_FILE
-        ldr   r8, =_sdtb
-        add   r8, r10                /* r8 := paddr(DTB) */
+       ldr   r8, =_sdtb
+       add   r8, r10                /* r8 := paddr(DTB) */
 #endif
 
 #ifdef EARLY_UART_ADDRESS
-       /* Say hello */
-       ldr   r11, =EARLY_UART_ADDRESS  /* r11 := UART base address */
-       bl    init_uart
+       /* Say hello */
+       ldr   r11, =EARLY_UART_ADDRESS  /* r11 := UART base address */
+       bl    init_uart
 #endif
 
-       /* Check that this CPU has Hyp mode */
-       mrc   CP32(r0, ID_PFR1)
-       and   r0, r0, #0xf000        /* Bits 12-15 define virt extensions */
-       teq   r0, #0x1000            /* Must == 0x1 or may be incompatible */
-       beq   1f
-       bl    putn
-       PRINT("- CPU doesn't support the virtualization extensions -\r\n")
-       b     fail
+       /* Check that this CPU has Hyp mode */
+       mrc   CP32(r0, ID_PFR1)
+       and   r0, r0, #0xf000        /* Bits 12-15 define virt extensions */
+       teq   r0, #0x1000            /* Must == 0x1 or may be incompatible */
+       beq   1f
+       bl    putn
+       PRINT("- CPU doesn't support the virtualization extensions -\r\n")
+       b     fail
 1:
-       /* Check if we're already in it */
-       mrs   r0, cpsr
-       and   r0, r0, #0x1f          /* Mode is in the low 5 bits of CPSR */
-       teq   r0, #0x1a              /* Hyp Mode? */
-       bne   1f
-       PRINT("- Started in Hyp mode -\r\n")
-       b     hyp
+       /* Check if we're already in it */
+       mrs   r0, cpsr
+       and   r0, r0, #0x1f          /* Mode is in the low 5 bits of CPSR */
+       teq   r0, #0x1a              /* Hyp Mode? */
+       bne   1f
+       PRINT("- Started in Hyp mode -\r\n")
+       b     hyp
 1:
-       /* Otherwise, it must have been Secure Supervisor mode */
-       mrc   CP32(r0, SCR)
-       tst   r0, #0x1               /* Not-Secure bit set? */
-       beq   1f
-       PRINT("- CPU is not in Hyp mode or Secure state -\r\n")
-       b     fail
+       /* Otherwise, it must have been Secure Supervisor mode */
+       mrc   CP32(r0, SCR)
+       tst   r0, #0x1               /* Not-Secure bit set? */
+       beq   1f
+       PRINT("- CPU is not in Hyp mode or Secure state -\r\n")
+       b     fail
 1:
-       /* OK, we're in Secure state. */
-       PRINT("- Started in Secure state -\r\n- Entering Hyp mode -\r\n")
+       /* OK, we're in Secure state. */
+       PRINT("- Started in Secure state -\r\n- Entering Hyp mode -\r\n")
 
-       /* Dance into Hyp mode */
-       cpsid aif, #0x16             /* Enter Monitor mode */
-       mrc   CP32(r0, SCR)
-       orr   r0, r0, #0x100         /* Set HCE */
-       orr   r0, r0, #0xb1          /* Set SCD, AW, FW and NS */
-       bic   r0, r0, #0xe           /* Clear EA, FIQ and IRQ */
-       mcr   CP32(r0, SCR)
-       /* Ugly: the system timer's frequency register is only
-        * programmable in Secure state.  Since we don't know where its
-        * memory-mapped control registers live, we can't find out the
-        * right frequency.  Use the VE model's default frequency here. */
-       ldr   r0, =0x5f5e100         /* 100 MHz */
-       mcr   CP32(r0, CNTFRQ)
-       ldr   r0, =0x40c00           /* SMP, c11, c10 in non-secure mode */
-       mcr   CP32(r0, NSACR)
-       /* Continuing ugliness: Set up the GIC so NS state owns interrupts */
-       mov   r0, #GIC_BASE_ADDRESS
-       add   r0, r0, #GIC_DR_OFFSET
-       mov   r1, #0
-       str   r1, [r0]               /* Disable delivery in the distributor */
-       add   r0, r0, #0x80          /* GICD_IGROUP0 */
-       mov   r2, #0xffffffff        /* All interrupts to group 1 */
-       str   r2, [r0]
-       str   r2, [r0, #4]
-       str   r2, [r0, #8]
-       /* Must drop priority mask below 0x80 before entering NS state */
-       mov   r0, #GIC_BASE_ADDRESS
-       add   r0, r0, #GIC_CR_OFFSET
-       ldr   r1, =0xff
-       str   r1, [r0, #0x4]         /* -> GICC_PMR */
-       /* Reset a few config registers */
-       mov   r0, #0
-       mcr   CP32(r0, FCSEIDR)
-       mcr   CP32(r0, CONTEXTIDR)
-       /* FIXME: ought to reset some other NS control regs here */
-       adr   r1, 1f
-       adr   r0, hyp                /* Store paddr (hyp entry point) */
-       str   r0, [r1]               /* where we can use it for RFE */
-       isb                          /* Ensure we see the stored target address 
*/
-       rfeia r1                     /* Enter Hyp mode */
+       /* Dance into Hyp mode */
+       cpsid aif, #0x16             /* Enter Monitor mode */
+       mrc   CP32(r0, SCR)
+       orr   r0, r0, #0x100         /* Set HCE */
+       orr   r0, r0, #0xb1          /* Set SCD, AW, FW and NS */
+       bic   r0, r0, #0xe           /* Clear EA, FIQ and IRQ */
+       mcr   CP32(r0, SCR)
+       /* Ugly: the system timer's frequency register is only
+        * programmable in Secure state.  Since we don't know where its
+        * memory-mapped control registers live, we can't find out the
+        * right frequency.  Use the VE model's default frequency here. */
+       ldr   r0, =0x5f5e100         /* 100 MHz */
+       mcr   CP32(r0, CNTFRQ)
+       ldr   r0, =0x40c00           /* SMP, c11, c10 in non-secure mode */
+       mcr   CP32(r0, NSACR)
+       /* Continuing ugliness: Set up the GIC so NS state owns interrupts */
+       mov   r0, #GIC_BASE_ADDRESS
+       add   r0, r0, #GIC_DR_OFFSET
+       mov   r1, #0
+       str   r1, [r0]               /* Disable delivery in the distributor */
+       add   r0, r0, #0x80          /* GICD_IGROUP0 */
+       mov   r2, #0xffffffff        /* All interrupts to group 1 */
+       str   r2, [r0]
+       str   r2, [r0, #4]
+       str   r2, [r0, #8]
+       /* Must drop priority mask below 0x80 before entering NS state */
+       mov   r0, #GIC_BASE_ADDRESS
+       add   r0, r0, #GIC_CR_OFFSET
+       ldr   r1, =0xff
+       str   r1, [r0, #0x4]         /* -> GICC_PMR */
+       /* Reset a few config registers */
+       mov   r0, #0
+       mcr   CP32(r0, FCSEIDR)
+       mcr   CP32(r0, CONTEXTIDR)
+       /* FIXME: ought to reset some other NS control regs here */
+       adr   r1, 1f
+       adr   r0, hyp                /* Store paddr (hyp entry point) */
+       str   r0, [r1]               /* where we can use it for RFE */
+       isb                          /* Ensure we see the stored target address 
*/
+       rfeia r1                     /* Enter Hyp mode */
 
-1:     .word 0                      /* PC to enter Hyp mode at */
-       .word 0x000001da             /* CPSR: LE, Abort/IRQ/FIQ off, Hyp */
+1:     .word 0                      /* PC to enter Hyp mode at */
+       .word 0x000001da             /* CPSR: LE, Abort/IRQ/FIQ off, Hyp */
 
 hyp:
-       PRINT("- Setting up control registers -\r\n")
+       PRINT("- Setting up control registers -\r\n")
 
-       /* Set up memory attribute type tables */
-       ldr   r0, =MAIR0VAL
-       ldr   r1, =MAIR1VAL
-       mcr   CP32(r0, MAIR0)
-       mcr   CP32(r1, MAIR1)
-       mcr   CP32(r0, HMAIR0)
-       mcr   CP32(r1, HMAIR1)
+       /* Set up memory attribute type tables */
+       ldr   r0, =MAIR0VAL
+       ldr   r1, =MAIR1VAL
+       mcr   CP32(r0, MAIR0)
+       mcr   CP32(r1, MAIR1)
+       mcr   CP32(r0, HMAIR0)
+       mcr   CP32(r1, HMAIR1)
 
-       /* Set up the HTCR:
-        * PT walks use Outer-Shareable accesses,
-        * PT walks are write-back, no-write-allocate in both cache levels,
-        * Full 32-bit address space goes through this table. */
-       ldr   r0, =0x80002500
-       mcr   CP32(r0, HTCR)
+       /* Set up the HTCR:
+        * PT walks use Outer-Shareable accesses,
+        * PT walks are write-back, no-write-allocate in both cache levels,
+        * Full 32-bit address space goes through this table. */
+       ldr   r0, =0x80002500
+       mcr   CP32(r0, HTCR)
 
-       /* Set up the HSCTLR:
-        * Exceptions in LE ARM,
-        * Low-latency IRQs disabled,
-        * Write-implies-XN disabled (for now),
-        * I-cache and d-cache enabled,
-        * Alignment checking enabled,
-        * MMU translation disabled (for now). */
-       ldr   r0, =(HSCTLR_BASE|SCTLR_A|SCTLR_C)
-       mcr   CP32(r0, HSCTLR)
+       /* Set up the HSCTLR:
+        * Exceptions in LE ARM,
+        * Low-latency IRQs disabled,
+        * Write-implies-XN disabled (for now),
+        * I-cache and d-cache enabled,
+        * Alignment checking enabled,
+        * MMU translation disabled (for now). */
+       ldr   r0, =(HSCTLR_BASE|SCTLR_A|SCTLR_C)
+       mcr   CP32(r0, HSCTLR)
 
-       /* Write Xen's PT's paddr into the HTTBR */
-       ldr   r4, =xen_pgtable
-       add   r4, r4, r10            /* r4 := paddr (xen_pagetable) */
-       mov   r5, #0                 /* r4:r5 is paddr (xen_pagetable) */
-       mcrr  CP64(r4, r5, HTTBR)
+       /* Write Xen's PT's paddr into the HTTBR */
+       ldr   r4, =xen_pgtable
+       add   r4, r4, r10            /* r4 := paddr (xen_pagetable) */
+       mov   r5, #0                 /* r4:r5 is paddr (xen_pagetable) */
+       mcrr  CP64(r4, r5, HTTBR)
 
-       /* Build the baseline idle pagetable's first-level entries */
-       ldr   r1, =xen_second
-       add   r1, r1, r10            /* r1 := paddr (xen_second) */
-       mov   r3, #0x0
-       orr   r2, r1, #0xe00         /* r2:r3 := table map of xen_second */
-       orr   r2, r2, #0x07f         /* (+ rights for linear PT) */
-       strd  r2, r3, [r4, #0]       /* Map it in slot 0 */
-       add   r2, r2, #0x1000
-       strd  r2, r3, [r4, #8]       /* Map 2nd page in slot 1 */
-       add   r2, r2, #0x1000
-       strd  r2, r3, [r4, #16]      /* Map 3rd page in slot 2 */
-       add   r2, r2, #0x1000
-       strd  r2, r3, [r4, #24]      /* Map 4th page in slot 3 */
+       /* Build the baseline idle pagetable's first-level entries */
+       ldr   r1, =xen_second
+       add   r1, r1, r10            /* r1 := paddr (xen_second) */
+       mov   r3, #0x0
+       orr   r2, r1, #0xe00         /* r2:r3 := table map of xen_second */
+       orr   r2, r2, #0x07f         /* (+ rights for linear PT) */
+       strd  r2, r3, [r4, #0]       /* Map it in slot 0 */
+       add   r2, r2, #0x1000
+       strd  r2, r3, [r4, #8]       /* Map 2nd page in slot 1 */
+       add   r2, r2, #0x1000
+       strd  r2, r3, [r4, #16]      /* Map 3rd page in slot 2 */
+       add   r2, r2, #0x1000
+       strd  r2, r3, [r4, #24]      /* Map 4th page in slot 3 */
 
-       /* Now set up the second-level entries */
-       orr   r2, r9, #0xe00
-       orr   r2, r2, #0x07d         /* r2:r3 := 2MB normal map of Xen */
-       mov   r4, r9, lsr #18        /* Slot for paddr(start) */
-       strd  r2, r3, [r1, r4]       /* Map Xen there */
-       ldr   r4, =start
-       lsr   r4, #18                /* Slot for vaddr(start) */
-       strd  r2, r3, [r1, r4]       /* Map Xen there too */
+       /* Now set up the second-level entries */
+       orr   r2, r9, #0xe00
+       orr   r2, r2, #0x07d         /* r2:r3 := 2MB normal map of Xen */
+       mov   r4, r9, lsr #18        /* Slot for paddr(start) */
+       strd  r2, r3, [r1, r4]       /* Map Xen there */
+       ldr   r4, =start
+       lsr   r4, #18                /* Slot for vaddr(start) */
+       strd  r2, r3, [r1, r4]       /* Map Xen there too */
 #ifdef EARLY_UART_ADDRESS
-       ldr   r3, =(1<<(54-32))      /* NS for device mapping */
-       lsr   r2, r11, #21
-       lsl   r2, r2, #21            /* 2MB-aligned paddr of UART */
-       orr   r2, r2, #0xe00
-       orr   r2, r2, #0x071         /* r2:r3 := 2MB dev map including UART */
-       add   r4, r4, #8
-       strd  r2, r3, [r1, r4]       /* Map it in the fixmap's slot */
+       ldr   r3, =(1<<(54-32))      /* NS for device mapping */
+       lsr   r2, r11, #21
+       lsl   r2, r2, #21            /* 2MB-aligned paddr of UART */
+       orr   r2, r2, #0xe00
+       orr   r2, r2, #0x071         /* r2:r3 := 2MB dev map including UART */
+       add   r4, r4, #8
+       strd  r2, r3, [r1, r4]       /* Map it in the fixmap's slot */
 #else
-       add   r4, r4, #8             /* Skip over unused fixmap slot */
+       add   r4, r4, #8             /* Skip over unused fixmap slot */
 #endif
-       mov   r3, #0x0
-       lsr   r2, r8, #21
-       lsl   r2, r2, #21            /* 2MB-aligned paddr of DTB */
-       orr   r2, r2, #0xf00
-       orr   r2, r2, #0x07d         /* r2:r3 := 2MB RAM incl. DTB */
-       add   r4, r4, #8
-       strd  r2, r3, [r1, r4]       /* Map it in the early boot slot */
+       mov   r3, #0x0
+       lsr   r2, r8, #21
+       lsl   r2, r2, #21            /* 2MB-aligned paddr of DTB */
+       orr   r2, r2, #0xf00
+       orr   r2, r2, #0x07d         /* r2:r3 := 2MB RAM incl. DTB */
+       add   r4, r4, #8
+       strd  r2, r3, [r1, r4]       /* Map it in the early boot slot */
 
-       PRINT("- Turning on paging -\r\n")
+       PRINT("- Turning on paging -\r\n")
 
-       ldr   r1, =paging            /* Explicit vaddr, not RIP-relative */
-       mrc   CP32(r0, HSCTLR)
-       orr   r0, r0, #0x1           /* Add in the MMU enable bit */
-       dsb                          /* Flush PTE writes and finish reads */
-       mcr   CP32(r0, HSCTLR)       /* now paging is enabled */
-       isb                          /* Now, flush the icache */
-       mov   pc, r1                 /* Get a proper vaddr into PC */
+       ldr   r1, =paging            /* Explicit vaddr, not RIP-relative */
+       mrc   CP32(r0, HSCTLR)
+       orr   r0, r0, #0x1           /* Add in the MMU enable bit */
+       dsb                          /* Flush PTE writes and finish reads */
+       mcr   CP32(r0, HSCTLR)       /* now paging is enabled */
+       isb                          /* Now, flush the icache */
+       mov   pc, r1                 /* Get a proper vaddr into PC */
 paging:
 
 #ifdef EARLY_UART_ADDRESS
-       /* Recover the UART address in the new address space */
-       lsl   r11, #11
-       lsr   r11, #11               /* UART base's offset from 2MB base */
-       adr   r0, start
-       add   r0, r0, #0x200000      /* vaddr of the fixmap's 2MB slot */
-       add   r11, r11, r0           /* r11 := vaddr (UART base address) */
+       /* Recover the UART address in the new address space */
+       lsl   r11, #11
+       lsr   r11, #11               /* UART base's offset from 2MB base */
+       adr   r0, start
+       add   r0, r0, #0x200000      /* vaddr of the fixmap's 2MB slot */
+       add   r11, r11, r0           /* r11 := vaddr (UART base address) */
 #endif
 
-       PRINT("- Entering C -\r\n")
+       PRINT("- Entering C -\r\n")
 
-       ldr   sp, =init_stack        /* Supply a stack */
-       add   sp, #STACK_SIZE        /* (which grows down from the top). */
-       sub   sp, #CPUINFO_sizeof    /* Make room for CPU save record */
-       mov   r0, r10                /* Marshal args: - phys_offset */
-       mov   r1, r7                 /*               - machine type */
-       mov   r2, r8                 /*               - ATAG address */
-       b     start_xen              /* and disappear into the land of C */
+       ldr   sp, =init_stack        /* Supply a stack */
+       add   sp, #STACK_SIZE        /* (which grows down from the top). */
+       sub   sp, #CPUINFO_sizeof    /* Make room for CPU save record */
+       mov   r0, r10                /* Marshal args: - phys_offset */
+       mov   r1, r7                 /*               - machine type */
+       mov   r2, r8                 /*               - ATAG address */
+       b     start_xen              /* and disappear into the land of C */
 
 /* Fail-stop
  * r0: string explaining why */
-fail:  PRINT("- Boot failed -\r\n")
-1:     wfe
-       b     1b
+fail:  PRINT("- Boot failed -\r\n")
+1:     wfe
+       b     1b
 
 #ifdef EARLY_UART_ADDRESS
 
 /* Bring up the UART. Specific to the PL011 UART.
  * Clobbers r0-r2 */
 init_uart:
-       mov   r1, #0x0
-       str   r1, [r11, #0x24]       /* -> UARTIBRD (Baud divisor fraction) */
-       mov   r1, #0x4               /* 7.3728MHz / 0x4 == 16 * 115200 */
-       str   r1, [r11, #0x24]       /* -> UARTIBRD (Baud divisor integer) */
-       mov   r1, #0x60              /* 8n1 */
-       str   r1, [r11, #0x24]       /* -> UARTLCR_H (Line control) */
-       ldr   r1, =0x00000301        /* RXE | TXE | UARTEN */
-       str   r1, [r11, #0x30]       /* -> UARTCR (Control Register) */
-       adr   r0, 1f
-       b     puts
-1:     .asciz "- UART enabled -\r\n"
-       .align 4
+       mov   r1, #0x0
+       str   r1, [r11, #0x24]       /* -> UARTIBRD (Baud divisor fraction) */
+       mov   r1, #0x4               /* 7.3728MHz / 0x4 == 16 * 115200 */
+       str   r1, [r11, #0x24]       /* -> UARTIBRD (Baud divisor integer) */
+       mov   r1, #0x60              /* 8n1 */
+       str   r1, [r11, #0x24]       /* -> UARTLCR_H (Line control) */
+       ldr   r1, =0x00000301        /* RXE | TXE | UARTEN */
+       str   r1, [r11, #0x30]       /* -> UARTCR (Control Register) */
+       adr   r0, 1f
+       b     puts
+1:     .asciz "- UART enabled -\r\n"
+       .align 4
 
 /* Print early debug messages.  Specific to the PL011 UART.
  * r0: Nul-terminated string to print.
  * Clobbers r0-r2 */
 puts:
-       ldr   r2, [r11, #0x18]       /* <- UARTFR (Flag register) */
-       tst   r2, #0x8               /* Check BUSY bit */
-       bne   puts                   /* Wait for the UART to be ready */
-       ldrb  r2, [r0], #1           /* Load next char */
-       teq   r2, #0                 /* Exit on nul*/
-       moveq pc, lr
-       str   r2, [r11]              /* -> UARTDR (Data Register) */
-       b     puts
+       ldr   r2, [r11, #0x18]       /* <- UARTFR (Flag register) */
+       tst   r2, #0x8               /* Check BUSY bit */
+       bne   puts                   /* Wait for the UART to be ready */
+       ldrb  r2, [r0], #1           /* Load next char */
+       teq   r2, #0                 /* Exit on nul*/
+       moveq pc, lr
+       str   r2, [r11]              /* -> UARTDR (Data Register) */
+       b     puts
 
 /* Print a 32-bit number in hex.  Specific to the PL011 UART.
  * r0: Number to print.
  * clobbers r0-r3 */
 putn:
-       adr   r1, hex
-       mov   r3, #8
-1:     ldr   r2, [r11, #0x18]       /* <- UARTFR (Flag register) */
-       tst   r2, #0x8               /* Check BUSY bit */
-       bne   1b                     /* Wait for the UART to be ready */
-       and   r2, r0, #0xf0000000    /* Mask off the top nybble */
-       ldrb  r2, [r1, r2, lsr #28]  /* Convert to a char */
-       str   r2, [r11]              /* -> UARTDR (Data Register) */
-       lsl   r0, #4                 /* Roll it through one nybble at a time */
-       subs  r3, r3, #1
-       bne   1b
-       adr   r0, crlf               /* Finish with a newline */
-       b     puts
+       adr   r1, hex
+       mov   r3, #8
+1:     ldr   r2, [r11, #0x18]       /* <- UARTFR (Flag register) */
+       tst   r2, #0x8               /* Check BUSY bit */
+       bne   1b                     /* Wait for the UART to be ready */
+       and   r2, r0, #0xf0000000    /* Mask off the top nybble */
+       ldrb  r2, [r1, r2, lsr #28]  /* Convert to a char */
+       str   r2, [r11]              /* -> UARTDR (Data Register) */
+       lsl   r0, #4                 /* Roll it through one nybble at a time */
+       subs  r3, r3, #1
+       bne   1b
+       adr   r0, crlf               /* Finish with a newline */
+       b     puts
 
-crlf:  .asciz "\r\n"
-hex:   .ascii "0123456789abcdef"
-       .align 2
+crlf:  .asciz "\r\n"
+hex:   .ascii "0123456789abcdef"
+       .align 2
 
 #else  /* EARLY_UART_ADDRESS */
 
@@ -308,6 +308,6 @@ init_uart:
 .global early_puts
 early_puts:
 puts:
-putn:  mov   pc, lr
+putn:  mov   pc, lr
 
 #endif /* EARLY_UART_ADDRESS */
diff -r 7fd8f10cfd3e -r 63e88a26e1ef xen/arch/arm/lib/bitops.h
--- a/xen/arch/arm/lib/bitops.h Mon Feb 13 17:03:44 2012 +0000
+++ b/xen/arch/arm/lib/bitops.h Mon Feb 13 17:26:08 2012 +0000
@@ -1,61 +1,61 @@
 #include <xen/config.h>
 
 #if __LINUX_ARM_ARCH__ >= 6
-       .macro  bitop, instr
-       ands    ip, r1, #3
-       strneb  r1, [ip]                @ assert word-aligned
-       mov     r2, #1
-       and     r3, r0, #31             @ Get bit offset
-       mov     r0, r0, lsr #5
-       add     r1, r1, r0, lsl #2      @ Get word offset
-       mov     r3, r2, lsl r3
-1:     ldrex   r2, [r1]
-       \instr  r2, r2, r3
-       strex   r0, r2, [r1]
-       cmp     r0, #0
-       bne     1b
-       bx      lr
-       .endm
+       .macro  bitop, instr
+       ands    ip, r1, #3
+       strneb  r1, [ip]                @ assert word-aligned
+       mov     r2, #1
+       and     r3, r0, #31             @ Get bit offset
+       mov     r0, r0, lsr #5
+       add     r1, r1, r0, lsl #2      @ Get word offset
+       mov     r3, r2, lsl r3
+1:     ldrex   r2, [r1]
+       \instr  r2, r2, r3
+       strex   r0, r2, [r1]
+       cmp     r0, #0
+       bne     1b
+       bx      lr
+       .endm
 
-       .macro  testop, instr, store
-       ands    ip, r1, #3
-       strneb  r1, [ip]                @ assert word-aligned
-       mov     r2, #1
-       and     r3, r0, #31             @ Get bit offset
-       mov     r0, r0, lsr #5
-       add     r1, r1, r0, lsl #2      @ Get word offset
-       mov     r3, r2, lsl r3          @ create mask
-       smp_dmb
-1:     ldrex   r2, [r1]
-       ands    r0, r2, r3              @ save old value of bit
-       \instr  r2, r2, r3              @ toggle bit
-       strex   ip, r2, [r1]
-       cmp     ip, #0
-       bne     1b
-       smp_dmb
-       cmp     r0, #0
-       movne   r0, #1
-2:     bx      lr
-       .endm
+       .macro  testop, instr, store
+       ands    ip, r1, #3
+       strneb  r1, [ip]                @ assert word-aligned
+       mov     r2, #1
+       and     r3, r0, #31             @ Get bit offset
+       mov     r0, r0, lsr #5
+       add     r1, r1, r0, lsl #2      @ Get word offset
+       mov     r3, r2, lsl r3          @ create mask
+       smp_dmb
+1:     ldrex   r2, [r1]
+       ands    r0, r2, r3              @ save old value of bit
+       \instr  r2, r2, r3              @ toggle bit
+       strex   ip, r2, [r1]
+       cmp     ip, #0
+       bne     1b
+       smp_dmb
+       cmp     r0, #0
+       movne   r0, #1
+2:     bx      lr
+       .endm
 #else
-       .macro  bitop, name, instr
-ENTRY( \name           )
-UNWIND(        .fnstart        )
-       ands    ip, r1, #3
-       strneb  r1, [ip]                @ assert word-aligned
-       and     r2, r0, #31
-       mov     r0, r0, lsr #5
-       mov     r3, #1
-       mov     r3, r3, lsl r2
-       save_and_disable_irqs ip
-       ldr     r2, [r1, r0, lsl #2]
-       \instr  r2, r2, r3
-       str     r2, [r1, r0, lsl #2]
-       restore_irqs ip
-       mov     pc, lr
-UNWIND(        .fnend          )
-ENDPROC(\name          )
-       .endm
+       .macro  bitop, name, instr
+ENTRY( \name           )
+UNWIND(        .fnstart        )
+       ands    ip, r1, #3
+       strneb  r1, [ip]                @ assert word-aligned
+       and     r2, r0, #31
+       mov     r0, r0, lsr #5
+       mov     r3, #1
+       mov     r3, r3, lsl r2
+       save_and_disable_irqs ip
+       ldr     r2, [r1, r0, lsl #2]
+       \instr  r2, r2, r3
+       str     r2, [r1, r0, lsl #2]
+       restore_irqs ip
+       mov     pc, lr
+UNWIND(        .fnend          )
+ENDPROC(\name          )
+       .endm
 
 /**
  * testop - implement a test_and_xxx_bit operation.
@@ -65,23 +65,23 @@ ENDPROC(\name          )
  * Note: we can trivially conditionalise the store instruction
  * to avoid dirtying the data cache.
  */
-       .macro  testop, name, instr, store
-ENTRY( \name           )
-UNWIND(        .fnstart        )
-       ands    ip, r1, #3
-       strneb  r1, [ip]                @ assert word-aligned
-       and     r3, r0, #31
-       mov     r0, r0, lsr #5
-       save_and_disable_irqs ip
-       ldr     r2, [r1, r0, lsl #2]!
-       mov     r0, #1
-       tst     r2, r0, lsl r3
-       \instr  r2, r2, r0, lsl r3
-       \store  r2, [r1]
-       moveq   r0, #0
-       restore_irqs ip
-       mov     pc, lr
-UNWIND(        .fnend          )
-ENDPROC(\name          )
-       .endm
+       .macro  testop, name, instr, store
+ENTRY( \name           )
+UNWIND(        .fnstart        )
+       ands    ip, r1, #3
+       strneb  r1, [ip]                @ assert word-aligned
+       and     r3, r0, #31
+       mov     r0, r0, lsr #5
+       save_and_disable_irqs ip
+       ldr     r2, [r1, r0, lsl #2]!
+       mov     r0, #1
+       tst     r2, r0, lsl r3
+       \instr  r2, r2, r0, lsl r3
+       \store  r2, [r1]
+       moveq   r0, #0
+       restore_irqs ip
+       mov     pc, lr
+UNWIND(        .fnend          )
+ENDPROC(\name          )
+       .endm
 #endif
diff -r 7fd8f10cfd3e -r 63e88a26e1ef xen/arch/arm/lib/changebit.S
--- a/xen/arch/arm/lib/changebit.S      Mon Feb 13 17:03:44 2012 +0000
+++ b/xen/arch/arm/lib/changebit.S      Mon Feb 13 17:26:08 2012 +0000
@@ -14,5 +14,5 @@
                 .text
 
 ENTRY(_change_bit)
-       bitop   eor
+       bitop   eor
 ENDPROC(_change_bit)
diff -r 7fd8f10cfd3e -r 63e88a26e1ef xen/arch/arm/lib/clearbit.S
--- a/xen/arch/arm/lib/clearbit.S       Mon Feb 13 17:03:44 2012 +0000
+++ b/xen/arch/arm/lib/clearbit.S       Mon Feb 13 17:26:08 2012 +0000
@@ -15,5 +15,5 @@
                 .text
 
 ENTRY(_clear_bit)
-       bitop   bic
+       bitop   bic
 ENDPROC(_clear_bit)
diff -r 7fd8f10cfd3e -r 63e88a26e1ef xen/arch/arm/lib/copy_template.S
--- a/xen/arch/arm/lib/copy_template.S  Mon Feb 13 17:03:44 2012 +0000
+++ b/xen/arch/arm/lib/copy_template.S  Mon Feb 13 17:26:08 2012 +0000
@@ -3,9 +3,9 @@
  *
  *  Code template for optimized memory copy functions
  *
- *  Author:    Nicolas Pitre
- *  Created:   Sep 28, 2005
- *  Copyright: MontaVista Software, Inc.
+ *  Author:    Nicolas Pitre
+ *  Created:   Sep 28, 2005
+ *  Copyright: MontaVista Software, Inc.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License version 2 as
@@ -24,227 +24,227 @@
  *
  * ldr1w ptr reg abort
  *
- *     This loads one word from 'ptr', stores it in 'reg' and increments
- *     'ptr' to the next word. The 'abort' argument is used for fixup tables.
+ *     This loads one word from 'ptr', stores it in 'reg' and increments
+ *     'ptr' to the next word. The 'abort' argument is used for fixup tables.
  *
  * ldr4w ptr reg1 reg2 reg3 reg4 abort
  * ldr8w ptr, reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
  *
- *     This loads four or eight words starting from 'ptr', stores them
- *     in provided registers and increments 'ptr' past those words.
- *     The'abort' argument is used for fixup tables.
+ *     This loads four or eight words starting from 'ptr', stores them
+ *     in provided registers and increments 'ptr' past those words.
+ *     The'abort' argument is used for fixup tables.
  *
  * ldr1b ptr reg cond abort
  *
- *     Similar to ldr1w, but it loads a byte and increments 'ptr' one byte.
- *     It also must apply the condition code if provided, otherwise the
- *     "al" condition is assumed by default.
+ *     Similar to ldr1w, but it loads a byte and increments 'ptr' one byte.
+ *     It also must apply the condition code if provided, otherwise the
+ *     "al" condition is assumed by default.
  *
  * str1w ptr reg abort
  * str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
  * str1b ptr reg cond abort
  *
- *     Same as their ldr* counterparts, but data is stored to 'ptr' location
- *     rather than being loaded.
+ *     Same as their ldr* counterparts, but data is stored to 'ptr' location
+ *     rather than being loaded.
  *
  * enter reg1 reg2
  *
- *     Preserve the provided registers on the stack plus any additional
- *     data as needed by the implementation including this code. Called
- *     upon code entry.
+ *     Preserve the provided registers on the stack plus any additional
+ *     data as needed by the implementation including this code. Called
+ *     upon code entry.
  *
  * exit reg1 reg2
  *
- *     Restore registers with the values previously saved with the
- *     'preserv' macro. Called upon code termination.
+ *     Restore registers with the values previously saved with the
+ *     'preserv' macro. Called upon code termination.
  *
  * LDR1W_SHIFT
  * STR1W_SHIFT
  *
- *     Correction to be applied to the "ip" register when branching into
- *     the ldr1w or str1w instructions (some of these macros may expand to
- *     than one 32bit instruction in Thumb-2)
+ *     Correction to be applied to the "ip" register when branching into
+ *     the ldr1w or str1w instructions (some of these macros may expand to
+ *     than one 32bit instruction in Thumb-2)
  */
 
 
-               enter   r4, lr
+               enter   r4, lr
 
-               subs    r2, r2, #4
-               blt     8f
-               ands    ip, r0, #3
-       PLD(    pld     [r1, #0]                )
-               bne     9f
-               ands    ip, r1, #3
-               bne     10f
+               subs    r2, r2, #4
+               blt     8f
+               ands    ip, r0, #3
+       PLD(    pld     [r1, #0]                )
+               bne     9f
+               ands    ip, r1, #3
+               bne     10f
 
-1:             subs    r2, r2, #(28)
-               stmfd   sp!, {r5 - r8}
-               blt     5f
+1:             subs    r2, r2, #(28)
+               stmfd   sp!, {r5 - r8}
+               blt     5f
 
-       CALGN(  ands    ip, r0, #31             )
-       CALGN(  rsb     r3, ip, #32             )
-       CALGN(  sbcnes  r4, r3, r2              )  @ C is always set here
-       CALGN(  bcs     2f                      )
-       CALGN(  adr     r4, 6f                  )
-       CALGN(  subs    r2, r2, r3              )  @ C gets set
-       CALGN(  add     pc, r4, ip              )
+       CALGN(  ands    ip, r0, #31             )
+       CALGN(  rsb     r3, ip, #32             )
+       CALGN(  sbcnes  r4, r3, r2              )  @ C is always set here
+       CALGN(  bcs     2f                      )
+       CALGN(  adr     r4, 6f                  )
+       CALGN(  subs    r2, r2, r3              )  @ C gets set
+       CALGN(  add     pc, r4, ip              )
 
-       PLD(    pld     [r1, #0]                )
-2:     PLD(    subs    r2, r2, #96             )
-       PLD(    pld     [r1, #28]               )
-       PLD(    blt     4f                      )
-       PLD(    pld     [r1, #60]               )
-       PLD(    pld     [r1, #92]               )
+       PLD(    pld     [r1, #0]                )
+2:     PLD(    subs    r2, r2, #96             )
+       PLD(    pld     [r1, #28]               )
+       PLD(    blt     4f                      )
+       PLD(    pld     [r1, #60]               )
+       PLD(    pld     [r1, #92]               )
 
-3:     PLD(    pld     [r1, #124]              )
-4:             ldr8w   r1, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
-               subs    r2, r2, #32
-               str8w   r0, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
-               bge     3b
-       PLD(    cmn     r2, #96                 )
-       PLD(    bge     4b                      )
+3:     PLD(    pld     [r1, #124]              )
+4:             ldr8w   r1, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
+               subs    r2, r2, #32
+               str8w   r0, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
+               bge     3b
+       PLD(    cmn     r2, #96                 )
+       PLD(    bge     4b                      )
 
-5:             ands    ip, r2, #28
-               rsb     ip, ip, #32
+5:             ands    ip, r2, #28
+               rsb     ip, ip, #32
 #if LDR1W_SHIFT > 0
-               lsl     ip, ip, #LDR1W_SHIFT
+               lsl     ip, ip, #LDR1W_SHIFT
 #endif
-               addne   pc, pc, ip              @ C is always clear here
-               b       7f
+               addne   pc, pc, ip              @ C is always clear here
+               b       7f
 6:
-               .rept   (1 << LDR1W_SHIFT)
-               W(nop)
-               .endr
-               ldr1w   r1, r3, abort=20f
-               ldr1w   r1, r4, abort=20f
-               ldr1w   r1, r5, abort=20f
-               ldr1w   r1, r6, abort=20f
-               ldr1w   r1, r7, abort=20f
-               ldr1w   r1, r8, abort=20f
-               ldr1w   r1, lr, abort=20f
+               .rept   (1 << LDR1W_SHIFT)
+               W(nop)
+               .endr
+               ldr1w   r1, r3, abort=20f
+               ldr1w   r1, r4, abort=20f
+               ldr1w   r1, r5, abort=20f
+               ldr1w   r1, r6, abort=20f
+               ldr1w   r1, r7, abort=20f
+               ldr1w   r1, r8, abort=20f
+               ldr1w   r1, lr, abort=20f
 
 #if LDR1W_SHIFT < STR1W_SHIFT
-               lsl     ip, ip, #STR1W_SHIFT - LDR1W_SHIFT
+               lsl     ip, ip, #STR1W_SHIFT - LDR1W_SHIFT
 #elif LDR1W_SHIFT > STR1W_SHIFT
-               lsr     ip, ip, #LDR1W_SHIFT - STR1W_SHIFT
+               lsr     ip, ip, #LDR1W_SHIFT - STR1W_SHIFT
 #endif
-               add     pc, pc, ip
-               nop
-               .rept   (1 << STR1W_SHIFT)
-               W(nop)
-               .endr
-               str1w   r0, r3, abort=20f
-               str1w   r0, r4, abort=20f
-               str1w   r0, r5, abort=20f
-               str1w   r0, r6, abort=20f
-               str1w   r0, r7, abort=20f
-               str1w   r0, r8, abort=20f
-               str1w   r0, lr, abort=20f
+               add     pc, pc, ip
+               nop
+               .rept   (1 << STR1W_SHIFT)
+               W(nop)
+               .endr
+               str1w   r0, r3, abort=20f
+               str1w   r0, r4, abort=20f
+               str1w   r0, r5, abort=20f
+               str1w   r0, r6, abort=20f
+               str1w   r0, r7, abort=20f
+               str1w   r0, r8, abort=20f
+               str1w   r0, lr, abort=20f
 
-       CALGN(  bcs     2b                      )
+       CALGN(  bcs     2b                      )
 
-7:             ldmfd   sp!, {r5 - r8}
+7:             ldmfd   sp!, {r5 - r8}
 
-8:             movs    r2, r2, lsl #31
-               ldr1b   r1, r3, ne, abort=21f
-               ldr1b   r1, r4, cs, abort=21f
-               ldr1b   r1, ip, cs, abort=21f
-               str1b   r0, r3, ne, abort=21f
-               str1b   r0, r4, cs, abort=21f
-               str1b   r0, ip, cs, abort=21f
+8:             movs    r2, r2, lsl #31
+               ldr1b   r1, r3, ne, abort=21f
+               ldr1b   r1, r4, cs, abort=21f
+               ldr1b   r1, ip, cs, abort=21f
+               str1b   r0, r3, ne, abort=21f
+               str1b   r0, r4, cs, abort=21f
+               str1b   r0, ip, cs, abort=21f
 
-               exit    r4, pc
+               exit    r4, pc
 
-9:             rsb     ip, ip, #4
-               cmp     ip, #2
-               ldr1b   r1, r3, gt, abort=21f
-               ldr1b   r1, r4, ge, abort=21f
-               ldr1b   r1, lr, abort=21f
-               str1b   r0, r3, gt, abort=21f
-               str1b   r0, r4, ge, abort=21f
-               subs    r2, r2, ip
-               str1b   r0, lr, abort=21f
-               blt     8b
-               ands    ip, r1, #3
-               beq     1b
+9:             rsb     ip, ip, #4
+               cmp     ip, #2
+               ldr1b   r1, r3, gt, abort=21f
+               ldr1b   r1, r4, ge, abort=21f
+               ldr1b   r1, lr, abort=21f
+               str1b   r0, r3, gt, abort=21f
+               str1b   r0, r4, ge, abort=21f
+               subs    r2, r2, ip
+               str1b   r0, lr, abort=21f
+               blt     8b
+               ands    ip, r1, #3
+               beq     1b
 
-10:            bic     r1, r1, #3
-               cmp     ip, #2
-               ldr1w   r1, lr, abort=21f
-               beq     17f
-               bgt     18f
+10:            bic     r1, r1, #3
+               cmp     ip, #2
+               ldr1w   r1, lr, abort=21f
+               beq     17f
+               bgt     18f
 
 
-               .macro  forward_copy_shift pull push
+               .macro  forward_copy_shift pull push
 
-               subs    r2, r2, #28
-               blt     14f
+               subs    r2, r2, #28
+               blt     14f
 
-       CALGN(  ands    ip, r0, #31             )
-       CALGN(  rsb     ip, ip, #32             )
-       CALGN(  sbcnes  r4, ip, r2              )  @ C is always set here
-       CALGN(  subcc   r2, r2, ip              )
-       CALGN(  bcc     15f                     )
+       CALGN(  ands    ip, r0, #31             )
+       CALGN(  rsb     ip, ip, #32             )
+       CALGN(  sbcnes  r4, ip, r2              )  @ C is always set here
+       CALGN(  subcc   r2, r2, ip              )
+       CALGN(  bcc     15f                     )
 
-11:            stmfd   sp!, {r5 - r9}
+11:            stmfd   sp!, {r5 - r9}
 
-       PLD(    pld     [r1, #0]                )
-       PLD(    subs    r2, r2, #96             )
-       PLD(    pld     [r1, #28]               )
-       PLD(    blt     13f                     )
-       PLD(    pld     [r1, #60]               )
-       PLD(    pld     [r1, #92]               )
+       PLD(    pld     [r1, #0]                )
+       PLD(    subs    r2, r2, #96             )
+       PLD(    pld     [r1, #28]               )
+       PLD(    blt     13f                     )
+       PLD(    pld     [r1, #60]               )
+       PLD(    pld     [r1, #92]               )
 
-12:    PLD(    pld     [r1, #124]              )
-13:            ldr4w   r1, r4, r5, r6, r7, abort=19f
-               mov     r3, lr, pull #\pull
-               subs    r2, r2, #32
-               ldr4w   r1, r8, r9, ip, lr, abort=19f
-               orr     r3, r3, r4, push #\push
-               mov     r4, r4, pull #\pull
-               orr     r4, r4, r5, push #\push
-               mov     r5, r5, pull #\pull
-               orr     r5, r5, r6, push #\push
-               mov     r6, r6, pull #\pull
-               orr     r6, r6, r7, push #\push
-               mov     r7, r7, pull #\pull
-               orr     r7, r7, r8, push #\push
-               mov     r8, r8, pull #\pull
-               orr     r8, r8, r9, push #\push
-               mov     r9, r9, pull #\pull
-               orr     r9, r9, ip, push #\push
-               mov     ip, ip, pull #\pull
-               orr     ip, ip, lr, push #\push
-               str8w   r0, r3, r4, r5, r6, r7, r8, r9, ip, , abort=19f
-               bge     12b
-       PLD(    cmn     r2, #96                 )
-       PLD(    bge     13b                     )
+12:    PLD(    pld     [r1, #124]              )
+13:            ldr4w   r1, r4, r5, r6, r7, abort=19f
+               mov     r3, lr, pull #\pull
+               subs    r2, r2, #32
+               ldr4w   r1, r8, r9, ip, lr, abort=19f
+               orr     r3, r3, r4, push #\push
+               mov     r4, r4, pull #\pull
+               orr     r4, r4, r5, push #\push
+               mov     r5, r5, pull #\pull
+               orr     r5, r5, r6, push #\push
+               mov     r6, r6, pull #\pull
+               orr     r6, r6, r7, push #\push
+               mov     r7, r7, pull #\pull
+               orr     r7, r7, r8, push #\push
+               mov     r8, r8, pull #\pull
+               orr     r8, r8, r9, push #\push
+               mov     r9, r9, pull #\pull
+               orr     r9, r9, ip, push #\push
+               mov     ip, ip, pull #\pull
+               orr     ip, ip, lr, push #\push
+               str8w   r0, r3, r4, r5, r6, r7, r8, r9, ip, , abort=19f
+               bge     12b
+       PLD(    cmn     r2, #96                 )
+       PLD(    bge     13b                     )
 
-               ldmfd   sp!, {r5 - r9}
+               ldmfd   sp!, {r5 - r9}
 
-14:            ands    ip, r2, #28
-               beq     16f
+14:            ands    ip, r2, #28
+               beq     16f
 
-15:            mov     r3, lr, pull #\pull
-               ldr1w   r1, lr, abort=21f
-               subs    ip, ip, #4
-               orr     r3, r3, lr, push #\push
-               str1w   r0, r3, abort=21f
-               bgt     15b
-       CALGN(  cmp     r2, #0                  )
-       CALGN(  bge     11b                     )
+15:            mov     r3, lr, pull #\pull
+               ldr1w   r1, lr, abort=21f
+               subs    ip, ip, #4
+               orr     r3, r3, lr, push #\push
+               str1w   r0, r3, abort=21f
+               bgt     15b
+       CALGN(  cmp     r2, #0                  )
+       CALGN(  bge     11b                     )
 
-16:            sub     r1, r1, #(\push / 8)
-               b       8b
+16:            sub     r1, r1, #(\push / 8)
+               b       8b
 
-               .endm
+               .endm
 
 
-               forward_copy_shift      pull=8  push=24
+               forward_copy_shift      pull=8  push=24
 
-17:            forward_copy_shift      pull=16 push=16
+17:            forward_copy_shift      pull=16 push=16
 
-18:            forward_copy_shift      pull=24 push=8
+18:            forward_copy_shift      pull=24 push=8
 
 
 /*
@@ -254,14 +254,14 @@ 18:            forward_copy_shift      p
  * the exit macro.
  */
 
-       .macro  copy_abort_preamble
-19:    ldmfd   sp!, {r5 - r9}
-       b       21f
-20:    ldmfd   sp!, {r5 - r8}
+       .macro  copy_abort_preamble
+19:    ldmfd   sp!, {r5 - r9}
+       b       21f
+20:    ldmfd   sp!, {r5 - r8}
 21:
-       .endm
+       .endm
 
-       .macro  copy_abort_end
-       ldmfd   sp!, {r4, pc}
-       .endm
+       .macro  copy_abort_end
+       ldmfd   sp!, {r4, pc}
+       .endm
 
diff -r 7fd8f10cfd3e -r 63e88a26e1ef xen/arch/arm/lib/div64.S
--- a/xen/arch/arm/lib/div64.S  Mon Feb 13 17:03:44 2012 +0000
+++ b/xen/arch/arm/lib/div64.S  Mon Feb 13 17:26:08 2012 +0000
@@ -3,9 +3,9 @@
  *
  *  Optimized computation of 64-bit dividend / 32-bit divisor
  *
- *  Author:    Nicolas Pitre
- *  Created:   Oct 5, 2003
- *  Copyright: Monta Vista Software, Inc.
+ *  Author:    Nicolas Pitre
+ *  Created:   Oct 5, 2003
+ *  Copyright: Monta Vista Software, Inc.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License version 2 as
@@ -14,7 +14,7 @@
 
 #include <xen/config.h>
 #include "assembler.h"
-
+       
 #ifdef __ARMEB__
 #define xh r0
 #define xl r1
@@ -34,12 +34,12 @@
  *       This is meant to be used by do_div() from include/asm/div64.h only.
  *
  * Input parameters:
- *     xh-xl   = dividend (clobbered)
- *     r4      = divisor (preserved)
+ *     xh-xl   = dividend (clobbered)
+ *     r4      = divisor (preserved)
  *
  * Output values:
- *     yh-yl   = result
- *     xh      = remainder
+ *     yh-yl   = result
+ *     xh      = remainder
  *
  * Clobbered regs: xl, ip
  */
@@ -47,165 +47,165 @@
 ENTRY(__do_div64)
 UNWIND(.fnstart)
 
-       @ Test for easy paths first.
-       subs    ip, r4, #1
-       bls     9f                      @ divisor is 0 or 1
-       tst     ip, r4
-       beq     8f                      @ divisor is power of 2
+       @ Test for easy paths first.
+       subs    ip, r4, #1
+       bls     9f                      @ divisor is 0 or 1
+       tst     ip, r4
+       beq     8f                      @ divisor is power of 2
 
-       @ See if we need to handle upper 32-bit result.
-       cmp     xh, r4
-       mov     yh, #0
-       blo     3f
+       @ See if we need to handle upper 32-bit result.
+       cmp     xh, r4
+       mov     yh, #0
+       blo     3f
 
-       @ Align divisor with upper part of dividend.
-       @ The aligned divisor is stored in yl preserving the original.
-       @ The bit position is stored in ip.
+       @ Align divisor with upper part of dividend.
+       @ The aligned divisor is stored in yl preserving the original.
+       @ The bit position is stored in ip.
 
 #if __LINUX_ARM_ARCH__ >= 5
 
-       clz     yl, r4
-       clz     ip, xh
-       sub     yl, yl, ip
-       mov     ip, #1
-       mov     ip, ip, lsl yl
-       mov     yl, r4, lsl yl
+       clz     yl, r4
+       clz     ip, xh
+       sub     yl, yl, ip
+       mov     ip, #1
+       mov     ip, ip, lsl yl
+       mov     yl, r4, lsl yl
 
 #else
 
-       mov     yl, r4
-       mov     ip, #1
-1:     cmp     yl, #0x80000000
-       cmpcc   yl, xh
-       movcc   yl, yl, lsl #1
-       movcc   ip, ip, lsl #1
-       bcc     1b
+       mov     yl, r4
+       mov     ip, #1
+1:     cmp     yl, #0x80000000
+       cmpcc   yl, xh
+       movcc   yl, yl, lsl #1
+       movcc   ip, ip, lsl #1
+       bcc     1b
 
 #endif
 
-       @ The division loop for needed upper bit positions.
-       @ Break out early if dividend reaches 0.
-2:     cmp     xh, yl
-       orrcs   yh, yh, ip
-       subcss  xh, xh, yl
-       movnes  ip, ip, lsr #1
-       mov     yl, yl, lsr #1
-       bne     2b
+       @ The division loop for needed upper bit positions.
+       @ Break out early if dividend reaches 0.
+2:     cmp     xh, yl
+       orrcs   yh, yh, ip
+       subcss  xh, xh, yl
+       movnes  ip, ip, lsr #1
+       mov     yl, yl, lsr #1
+       bne     2b
 
-       @ See if we need to handle lower 32-bit result.
-3:     cmp     xh, #0
-       mov     yl, #0
-       cmpeq   xl, r4
-       movlo   xh, xl
-       movlo   pc, lr
+       @ See if we need to handle lower 32-bit result.
+3:     cmp     xh, #0
+       mov     yl, #0
+       cmpeq   xl, r4
+       movlo   xh, xl
+       movlo   pc, lr
 
-       @ The division loop for lower bit positions.
-       @ Here we shift remainer bits leftwards rather than moving the
-       @ divisor for comparisons, considering the carry-out bit as well.
-       mov     ip, #0x80000000
-4:     movs    xl, xl, lsl #1
-       adcs    xh, xh, xh
-       beq     6f
-       cmpcc   xh, r4
-5:     orrcs   yl, yl, ip
-       subcs   xh, xh, r4
-       movs    ip, ip, lsr #1
-       bne     4b
-       mov     pc, lr
+       @ The division loop for lower bit positions.
+       @ Here we shift remainer bits leftwards rather than moving the
+       @ divisor for comparisons, considering the carry-out bit as well.
+       mov     ip, #0x80000000
+4:     movs    xl, xl, lsl #1
+       adcs    xh, xh, xh
+       beq     6f
+       cmpcc   xh, r4
+5:     orrcs   yl, yl, ip
+       subcs   xh, xh, r4
+       movs    ip, ip, lsr #1
+       bne     4b
+       mov     pc, lr
 
-       @ The top part of remainder became zero.  If carry is set
-       @ (the 33th bit) this is a false positive so resume the loop.
-       @ Otherwise, if lower part is also null then we are done.
-6:     bcs     5b
-       cmp     xl, #0
-       moveq   pc, lr
+       @ The top part of remainder became zero.  If carry is set
+       @ (the 33th bit) this is a false positive so resume the loop.
+       @ Otherwise, if lower part is also null then we are done.
+6:     bcs     5b
+       cmp     xl, #0
+       moveq   pc, lr
 
-       @ We still have remainer bits in the low part.  Bring them up.
+       @ We still have remainer bits in the low part.  Bring them up.
 
 #if __LINUX_ARM_ARCH__ >= 5
 
-       clz     xh, xl                  @ we know xh is zero here so...
-       add     xh, xh, #1
-       mov     xl, xl, lsl xh
-       mov     ip, ip, lsr xh
+       clz     xh, xl                  @ we know xh is zero here so...
+       add     xh, xh, #1
+       mov     xl, xl, lsl xh
+       mov     ip, ip, lsr xh
 
 #else
 
-7:     movs    xl, xl, lsl #1
-       mov     ip, ip, lsr #1
-       bcc     7b
+7:     movs    xl, xl, lsl #1
+       mov     ip, ip, lsr #1
+       bcc     7b
 
 #endif
 
-       @ Current remainder is now 1.  It is worthless to compare with
-       @ divisor at this point since divisor can not be smaller than 3 here.
-       @ If possible, branch for another shift in the division loop.
-       @ If no bit position left then we are done.
-       movs    ip, ip, lsr #1
-       mov     xh, #1
-       bne     4b
-       mov     pc, lr
+       @ Current remainder is now 1.  It is worthless to compare with
+       @ divisor at this point since divisor can not be smaller than 3 here.
+       @ If possible, branch for another shift in the division loop.
+       @ If no bit position left then we are done.
+       movs    ip, ip, lsr #1
+       mov     xh, #1
+       bne     4b
+       mov     pc, lr
 
-8:     @ Division by a power of 2: determine what that divisor order is
-       @ then simply shift values around
+8:     @ Division by a power of 2: determine what that divisor order is
+       @ then simply shift values around
 
 #if __LINUX_ARM_ARCH__ >= 5
 
-       clz     ip, r4
-       rsb     ip, ip, #31
+       clz     ip, r4
+       rsb     ip, ip, #31
 
 #else
 
-       mov     yl, r4
-       cmp     r4, #(1 << 16)
-       mov     ip, #0
-       movhs   yl, yl, lsr #16
-       movhs   ip, #16
+       mov     yl, r4
+       cmp     r4, #(1 << 16)
+       mov     ip, #0
+       movhs   yl, yl, lsr #16
+       movhs   ip, #16
 
-       cmp     yl, #(1 << 8)
-       movhs   yl, yl, lsr #8
-       addhs   ip, ip, #8
+       cmp     yl, #(1 << 8)
+       movhs   yl, yl, lsr #8
+       addhs   ip, ip, #8
 
-       cmp     yl, #(1 << 4)
-       movhs   yl, yl, lsr #4
-       addhs   ip, ip, #4
+       cmp     yl, #(1 << 4)
+       movhs   yl, yl, lsr #4
+       addhs   ip, ip, #4
 
-       cmp     yl, #(1 << 2)
-       addhi   ip, ip, #3
-       addls   ip, ip, yl, lsr #1
+       cmp     yl, #(1 << 2)
+       addhi   ip, ip, #3
+       addls   ip, ip, yl, lsr #1
 
 #endif
 
-       mov     yh, xh, lsr ip
-       mov     yl, xl, lsr ip
-       rsb     ip, ip, #32
- ARM(  orr     yl, yl, xh, lsl ip      )
- THUMB(        lsl     xh, xh, ip              )
- THUMB(        orr     yl, yl, xh              )
-       mov     xh, xl, lsl ip
-       mov     xh, xh, lsr ip
-       mov     pc, lr
+       mov     yh, xh, lsr ip
+       mov     yl, xl, lsr ip
+       rsb     ip, ip, #32
+ ARM(  orr     yl, yl, xh, lsl ip      )
+ THUMB(        lsl     xh, xh, ip              )
+ THUMB(        orr     yl, yl, xh              )
+       mov     xh, xl, lsl ip
+       mov     xh, xh, lsr ip
+       mov     pc, lr
 
-       @ eq -> division by 1: obvious enough...
-9:     moveq   yl, xl
-       moveq   yh, xh
-       moveq   xh, #0
-       moveq   pc, lr
+       @ eq -> division by 1: obvious enough...
+9:     moveq   yl, xl
+       moveq   yh, xh
+       moveq   xh, #0
+       moveq   pc, lr
 UNWIND(.fnend)
 
 UNWIND(.fnstart)
 UNWIND(.pad #4)
 UNWIND(.save {lr})
 Ldiv0_64:
-       @ Division by 0:
-       str     lr, [sp, #-8]!
-       bl      __div0
+       @ Division by 0:
+       str     lr, [sp, #-8]!
+       bl      __div0
 
-       @ as wrong as it could be...
-       mov     yl, #0
-       mov     yh, #0
-       mov     xh, #0
-       ldr     pc, [sp], #8
+       @ as wrong as it could be...
+       mov     yl, #0
+       mov     yh, #0
+       mov     xh, #0
+       ldr     pc, [sp], #8
 
 UNWIND(.fnend)
 ENDPROC(__do_div64)
diff -r 7fd8f10cfd3e -r 63e88a26e1ef xen/arch/arm/lib/findbit.S
--- a/xen/arch/arm/lib/findbit.S        Mon Feb 13 17:03:44 2012 +0000
+++ b/xen/arch/arm/lib/findbit.S        Mon Feb 13 17:26:08 2012 +0000
@@ -24,20 +24,20 @@
  * Prototype: int find_first_zero_bit(void *addr, unsigned int maxbit);
  */
 ENTRY(_find_first_zero_bit_le)
-               teq     r1, #0
-               beq     3f
-               mov     r2, #0
+               teq     r1, #0  
+               beq     3f
+               mov     r2, #0
 1:
- ARM(          ldrb    r3, [r0, r2, lsr #3]    )
- THUMB(                lsr     r3, r2, #3              )
- THUMB(                ldrb    r3, [r0, r3]            )
-               eors    r3, r3, #0xff           @ invert bits
-               bne     .L_found                @ any now set - found zero bit
-               add     r2, r2, #8              @ next bit pointer
-2:             cmp     r2, r1                  @ any more?
-               blo     1b
-3:             mov     r0, r1                  @ no free bits
-               mov     pc, lr
+ ARM(          ldrb    r3, [r0, r2, lsr #3]    )
+ THUMB(                lsr     r3, r2, #3              )
+ THUMB(                ldrb    r3, [r0, r3]            )
+               eors    r3, r3, #0xff           @ invert bits
+               bne     .L_found                @ any now set - found zero bit
+               add     r2, r2, #8              @ next bit pointer
+2:             cmp     r2, r1                  @ any more?
+               blo     1b
+3:             mov     r0, r1                  @ no free bits
+               mov     pc, lr
 ENDPROC(_find_first_zero_bit_le)
 
 /*
@@ -45,19 +45,19 @@ ENDPROC(_find_first_zero_bit_le)
  * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int 
offset)
  */
 ENTRY(_find_next_zero_bit_le)
-               teq     r1, #0
-               beq     3b
-               ands    ip, r2, #7
-               beq     1b                      @ If new byte, goto old routine
- ARM(          ldrb    r3, [r0, r2, lsr #3]    )
- THUMB(                lsr     r3, r2, #3              )
- THUMB(                ldrb    r3, [r0, r3]            )
-               eor     r3, r3, #0xff           @ now looking for a 1 bit
-               movs    r3, r3, lsr ip          @ shift off unused bits
-               bne     .L_found
-               orr     r2, r2, #7              @ if zero, then no bits here
-               add     r2, r2, #1              @ align bit pointer
-               b       2b                      @ loop for next bit
+               teq     r1, #0
+               beq     3b
+               ands    ip, r2, #7
+               beq     1b                      @ If new byte, goto old routine
+ ARM(          ldrb    r3, [r0, r2, lsr #3]    )
+ THUMB(                lsr     r3, r2, #3              )
+ THUMB(                ldrb    r3, [r0, r3]            )
+               eor     r3, r3, #0xff           @ now looking for a 1 bit
+               movs    r3, r3, lsr ip          @ shift off unused bits
+               bne     .L_found
+               orr     r2, r2, #7              @ if zero, then no bits here
+               add     r2, r2, #1              @ align bit pointer
+               b       2b                      @ loop for next bit
 ENDPROC(_find_next_zero_bit_le)
 
 /*
@@ -65,20 +65,20 @@ ENDPROC(_find_next_zero_bit_le)
  * Prototype: int find_first_bit(const unsigned long *addr, unsigned int 
maxbit);
  */
 ENTRY(_find_first_bit_le)
-               teq     r1, #0
-               beq     3f
-               mov     r2, #0
+               teq     r1, #0  
+               beq     3f
+               mov     r2, #0
 1:
- ARM(          ldrb    r3, [r0, r2, lsr #3]    )
- THUMB(                lsr     r3, r2, #3              )
- THUMB(                ldrb    r3, [r0, r3]            )
-               movs    r3, r3
-               bne     .L_found                @ any now set - found zero bit
-               add     r2, r2, #8              @ next bit pointer
-2:             cmp     r2, r1                  @ any more?
-               blo     1b
-3:             mov     r0, r1                  @ no free bits
-               mov     pc, lr
+ ARM(          ldrb    r3, [r0, r2, lsr #3]    )
+ THUMB(                lsr     r3, r2, #3              )
+ THUMB(                ldrb    r3, [r0, r3]            )
+               movs    r3, r3
+               bne     .L_found                @ any now set - found zero bit
+               add     r2, r2, #8              @ next bit pointer
+2:             cmp     r2, r1                  @ any more?
+               blo     1b
+3:             mov     r0, r1                  @ no free bits
+               mov     pc, lr
 ENDPROC(_find_first_bit_le)
 
 /*
@@ -86,87 +86,87 @@ ENDPROC(_find_first_bit_le)
  * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int 
offset)
  */
 ENTRY(_find_next_bit_le)
-               teq     r1, #0
-               beq     3b
-               ands    ip, r2, #7
-               beq     1b                      @ If new byte, goto old routine
- ARM(          ldrb    r3, [r0, r2, lsr #3]    )
- THUMB(                lsr     r3, r2, #3              )
- THUMB(                ldrb    r3, [r0, r3]            )
-               movs    r3, r3, lsr ip          @ shift off unused bits
-               bne     .L_found
-               orr     r2, r2, #7              @ if zero, then no bits here
-               add     r2, r2, #1              @ align bit pointer
-               b       2b                      @ loop for next bit
+               teq     r1, #0
+               beq     3b
+               ands    ip, r2, #7
+               beq     1b                      @ If new byte, goto old routine
+ ARM(          ldrb    r3, [r0, r2, lsr #3]    )
+ THUMB(                lsr     r3, r2, #3              )
+ THUMB(                ldrb    r3, [r0, r3]            )
+               movs    r3, r3, lsr ip          @ shift off unused bits
+               bne     .L_found
+               orr     r2, r2, #7              @ if zero, then no bits here
+               add     r2, r2, #1              @ align bit pointer
+               b       2b                      @ loop for next bit
 ENDPROC(_find_next_bit_le)
 
 #ifdef __ARMEB__
 
 ENTRY(_find_first_zero_bit_be)
-               teq     r1, #0
-               beq     3f
-               mov     r2, #0
-1:             eor     r3, r2, #0x18           @ big endian byte ordering
- ARM(          ldrb    r3, [r0, r3, lsr #3]    )
- THUMB(                lsr     r3, #3                  )
- THUMB(                ldrb    r3, [r0, r3]            )
-               eors    r3, r3, #0xff           @ invert bits
-               bne     .L_found                @ any now set - found zero bit
-               add     r2, r2, #8              @ next bit pointer
-2:             cmp     r2, r1                  @ any more?
-               blo     1b
-3:             mov     r0, r1                  @ no free bits
-               mov     pc, lr
+               teq     r1, #0
+               beq     3f
+               mov     r2, #0
+1:             eor     r3, r2, #0x18           @ big endian byte ordering
+ ARM(          ldrb    r3, [r0, r3, lsr #3]    )
+ THUMB(                lsr     r3, #3                  )
+ THUMB(                ldrb    r3, [r0, r3]            )
+               eors    r3, r3, #0xff           @ invert bits
+               bne     .L_found                @ any now set - found zero bit
+               add     r2, r2, #8              @ next bit pointer
+2:             cmp     r2, r1                  @ any more?
+               blo     1b
+3:             mov     r0, r1                  @ no free bits
+               mov     pc, lr
 ENDPROC(_find_first_zero_bit_be)
 
 ENTRY(_find_next_zero_bit_be)
-               teq     r1, #0
-               beq     3b
-               ands    ip, r2, #7
-               beq     1b                      @ If new byte, goto old routine
-               eor     r3, r2, #0x18           @ big endian byte ordering
- ARM(          ldrb    r3, [r0, r3, lsr #3]    )
- THUMB(                lsr     r3, #3                  )
- THUMB(                ldrb    r3, [r0, r3]            )
-               eor     r3, r3, #0xff           @ now looking for a 1 bit
-               movs    r3, r3, lsr ip          @ shift off unused bits
-               bne     .L_found
-               orr     r2, r2, #7              @ if zero, then no bits here
-               add     r2, r2, #1              @ align bit pointer
-               b       2b                      @ loop for next bit
+               teq     r1, #0
+               beq     3b
+               ands    ip, r2, #7
+               beq     1b                      @ If new byte, goto old routine
+               eor     r3, r2, #0x18           @ big endian byte ordering
+ ARM(          ldrb    r3, [r0, r3, lsr #3]    )
+ THUMB(                lsr     r3, #3                  )
+ THUMB(                ldrb    r3, [r0, r3]            )
+               eor     r3, r3, #0xff           @ now looking for a 1 bit
+               movs    r3, r3, lsr ip          @ shift off unused bits
+               bne     .L_found
+               orr     r2, r2, #7              @ if zero, then no bits here
+               add     r2, r2, #1              @ align bit pointer
+               b       2b                      @ loop for next bit
 ENDPROC(_find_next_zero_bit_be)
 
 ENTRY(_find_first_bit_be)
-               teq     r1, #0
-               beq     3f
-               mov     r2, #0
-1:             eor     r3, r2, #0x18           @ big endian byte ordering
- ARM(          ldrb    r3, [r0, r3, lsr #3]    )
- THUMB(                lsr     r3, #3                  )
- THUMB(                ldrb    r3, [r0, r3]            )
-               movs    r3, r3
-               bne     .L_found                @ any now set - found zero bit
-               add     r2, r2, #8              @ next bit pointer
-2:             cmp     r2, r1                  @ any more?
-               blo     1b
-3:             mov     r0, r1                  @ no free bits
-               mov     pc, lr
+               teq     r1, #0
+               beq     3f
+               mov     r2, #0
+1:             eor     r3, r2, #0x18           @ big endian byte ordering
+ ARM(          ldrb    r3, [r0, r3, lsr #3]    )
+ THUMB(                lsr     r3, #3                  )
+ THUMB(                ldrb    r3, [r0, r3]            )
+               movs    r3, r3
+               bne     .L_found                @ any now set - found zero bit
+               add     r2, r2, #8              @ next bit pointer
+2:             cmp     r2, r1                  @ any more?
+               blo     1b
+3:             mov     r0, r1                  @ no free bits
+               mov     pc, lr
 ENDPROC(_find_first_bit_be)
 
 ENTRY(_find_next_bit_be)
-               teq     r1, #0
-               beq     3b
-               ands    ip, r2, #7
-               beq     1b                      @ If new byte, goto old routine
-               eor     r3, r2, #0x18           @ big endian byte ordering
- ARM(          ldrb    r3, [r0, r3, lsr #3]    )
- THUMB(                lsr     r3, #3                  )
- THUMB(                ldrb    r3, [r0, r3]            )
-               movs    r3, r3, lsr ip          @ shift off unused bits
-               bne     .L_found
-               orr     r2, r2, #7              @ if zero, then no bits here
-               add     r2, r2, #1              @ align bit pointer
-               b       2b                      @ loop for next bit
+               teq     r1, #0
+               beq     3b
+               ands    ip, r2, #7
+               beq     1b                      @ If new byte, goto old routine
+               eor     r3, r2, #0x18           @ big endian byte ordering
+ ARM(          ldrb    r3, [r0, r3, lsr #3]    )
+ THUMB(                lsr     r3, #3                  )
+ THUMB(                ldrb    r3, [r0, r3]            )
+               movs    r3, r3, lsr ip          @ shift off unused bits
+               bne     .L_found
+               orr     r2, r2, #7              @ if zero, then no bits here
+               add     r2, r2, #1              @ align bit pointer
+               b       2b                      @ loop for next bit
 ENDPROC(_find_next_bit_be)
 
 #endif
@@ -176,23 +176,23 @@ ENDPROC(_find_next_bit_be)
  */
 .L_found:
 #if __LINUX_ARM_ARCH__ >= 5
-               rsb     r0, r3, #0
-               and     r3, r3, r0
-               clz     r3, r3
-               rsb     r3, r3, #31
-               add     r0, r2, r3
+               rsb     r0, r3, #0
+               and     r3, r3, r0
+               clz     r3, r3
+               rsb     r3, r3, #31
+               add     r0, r2, r3
 #else
-               tst     r3, #0x0f
-               addeq   r2, r2, #4
-               movne   r3, r3, lsl #4
-               tst     r3, #0x30
-               addeq   r2, r2, #2
-               movne   r3, r3, lsl #2
-               tst     r3, #0x40
-               addeq   r2, r2, #1
-               mov     r0, r2
+               tst     r3, #0x0f
+               addeq   r2, r2, #4
+               movne   r3, r3, lsl #4
+               tst     r3, #0x30
+               addeq   r2, r2, #2
+               movne   r3, r3, lsl #2
+               tst     r3, #0x40
+               addeq   r2, r2, #1
+               mov     r0, r2
 #endif
-               cmp     r1, r0                  @ Clamp to maxbit
-               movlo   r0, r1
-               mov     pc, lr
+               cmp     r1, r0                  @ Clamp to maxbit
+               movlo   r0, r1
+               mov     pc, lr
 
diff -r 7fd8f10cfd3e -r 63e88a26e1ef xen/arch/arm/lib/lib1funcs.S
--- a/xen/arch/arm/lib/lib1funcs.S      Mon Feb 13 17:03:44 2012 +0000
+++ b/xen/arch/arm/lib/lib1funcs.S      Mon Feb 13 17:26:08 2012 +0000
@@ -40,64 +40,64 @@ Boston, MA 02111-1307, USA.  */
 
 #if __LINUX_ARM_ARCH__ >= 5
 
-       clz     \curbit, \divisor
-       clz     \result, \dividend
-       sub     \result, \curbit, \result
-       mov     \curbit, #1
-       mov     \divisor, \divisor, lsl \result
-       mov     \curbit, \curbit, lsl \result
-       mov     \result, #0
-
+       clz     \curbit, \divisor
+       clz     \result, \dividend
+       sub     \result, \curbit, \result
+       mov     \curbit, #1
+       mov     \divisor, \divisor, lsl \result
+       mov     \curbit, \curbit, lsl \result
+       mov     \result, #0
+       
 #else
 
-       @ Initially shift the divisor left 3 bits if possible,
-       @ set curbit accordingly.  This allows for curbit to be located
-       @ at the left end of each 4 bit nibbles in the division loop
-       @ to save one loop in most cases.
-       tst     \divisor, #0xe0000000
-       moveq   \divisor, \divisor, lsl #3
-       moveq   \curbit, #8
-       movne   \curbit, #1
+       @ Initially shift the divisor left 3 bits if possible,
+       @ set curbit accordingly.  This allows for curbit to be located
+       @ at the left end of each 4 bit nibbles in the division loop
+       @ to save one loop in most cases.
+       tst     \divisor, #0xe0000000
+       moveq   \divisor, \divisor, lsl #3
+       moveq   \curbit, #8
+       movne   \curbit, #1
 
-       @ Unless the divisor is very big, shift it up in multiples of
-       @ four bits, since this is the amount of unwinding in the main
-       @ division loop.  Continue shifting until the divisor is
-       @ larger than the dividend.
-1:     cmp     \divisor, #0x10000000
-       cmplo   \divisor, \dividend
-       movlo   \divisor, \divisor, lsl #4
-       movlo   \curbit, \curbit, lsl #4
-       blo     1b
+       @ Unless the divisor is very big, shift it up in multiples of
+       @ four bits, since this is the amount of unwinding in the main
+       @ division loop.  Continue shifting until the divisor is 
+       @ larger than the dividend.
+1:     cmp     \divisor, #0x10000000
+       cmplo   \divisor, \dividend
+       movlo   \divisor, \divisor, lsl #4
+       movlo   \curbit, \curbit, lsl #4
+       blo     1b
 
-       @ For very big divisors, we must shift it a bit at a time, or
-       @ we will be in danger of overflowing.
-1:     cmp     \divisor, #0x80000000
-       cmplo   \divisor, \dividend
-       movlo   \divisor, \divisor, lsl #1
-       movlo   \curbit, \curbit, lsl #1
-       blo     1b
+       @ For very big divisors, we must shift it a bit at a time, or
+       @ we will be in danger of overflowing.
+1:     cmp     \divisor, #0x80000000
+       cmplo   \divisor, \dividend
+       movlo   \divisor, \divisor, lsl #1
+       movlo   \curbit, \curbit, lsl #1
+       blo     1b
 
-       mov     \result, #0
+       mov     \result, #0
 
 #endif
 
-       @ Division loop
-1:     cmp     \dividend, \divisor
-       subhs   \dividend, \dividend, \divisor
-       orrhs   \result,   \result,   \curbit
-       cmp     \dividend, \divisor,  lsr #1
-       subhs   \dividend, \dividend, \divisor, lsr #1
-       orrhs   \result,   \result,   \curbit,  lsr #1
-       cmp     \dividend, \divisor,  lsr #2
-       subhs   \dividend, \dividend, \divisor, lsr #2
-       orrhs   \result,   \result,   \curbit,  lsr #2
-       cmp     \dividend, \divisor,  lsr #3
-       subhs   \dividend, \dividend, \divisor, lsr #3
-       orrhs   \result,   \result,   \curbit,  lsr #3
-       cmp     \dividend, #0                   @ Early termination?
-       movnes  \curbit,   \curbit,  lsr #4     @ No, any more bits to do?
-       movne   \divisor,  \divisor, lsr #4
-       bne     1b
+       @ Division loop
+1:     cmp     \dividend, \divisor
+       subhs   \dividend, \dividend, \divisor
+       orrhs   \result,   \result,   \curbit
+       cmp     \dividend, \divisor,  lsr #1
+       subhs   \dividend, \dividend, \divisor, lsr #1
+       orrhs   \result,   \result,   \curbit,  lsr #1
+       cmp     \dividend, \divisor,  lsr #2
+       subhs   \dividend, \dividend, \divisor, lsr #2
+       orrhs   \result,   \result,   \curbit,  lsr #2
+       cmp     \dividend, \divisor,  lsr #3
+       subhs   \dividend, \dividend, \divisor, lsr #3
+       orrhs   \result,   \result,   \curbit,  lsr #3
+       cmp     \dividend, #0                   @ Early termination?
+       movnes  \curbit,   \curbit,  lsr #4     @ No, any more bits to do?
+       movne   \divisor,  \divisor, lsr #4
+       bne     1b
 
 .endm
 
@@ -106,27 +106,27 @@ 1:     cmp     \dividend, \divisor
 
 #if __LINUX_ARM_ARCH__ >= 5
 
-       clz     \order, \divisor
-       rsb     \order, \order, #31
+       clz     \order, \divisor
+       rsb     \order, \order, #31
 
 #else
 
-       cmp     \divisor, #(1 << 16)
-       movhs   \divisor, \divisor, lsr #16
-       movhs   \order, #16
-       movlo   \order, #0
+       cmp     \divisor, #(1 << 16)
+       movhs   \divisor, \divisor, lsr #16
+       movhs   \order, #16
+       movlo   \order, #0
 
-       cmp     \divisor, #(1 << 8)
-       movhs   \divisor, \divisor, lsr #8
-       addhs   \order, \order, #8
+       cmp     \divisor, #(1 << 8)
+       movhs   \divisor, \divisor, lsr #8
+       addhs   \order, \order, #8
 
-       cmp     \divisor, #(1 << 4)
-       movhs   \divisor, \divisor, lsr #4
-       addhs   \order, \order, #4
+       cmp     \divisor, #(1 << 4)
+       movhs   \divisor, \divisor, lsr #4
+       addhs   \order, \order, #4
 
-       cmp     \divisor, #(1 << 2)
-       addhi   \order, \order, #3
-       addls   \order, \order, \divisor, lsr #1
+       cmp     \divisor, #(1 << 2)
+       addhi   \order, \order, #3
+       addls   \order, \order, \divisor, lsr #1
 
 #endif
 
@@ -137,69 +137,69 @@ 1:     cmp     \dividend, \divisor
 
 #if __LINUX_ARM_ARCH__ >= 5
 
-       clz     \order, \divisor
-       clz     \spare, \dividend
-       sub     \order, \order, \spare
-       mov     \divisor, \divisor, lsl \order
+       clz     \order, \divisor
+       clz     \spare, \dividend
+       sub     \order, \order, \spare
+       mov     \divisor, \divisor, lsl \order
 
 #else
 
-       mov     \order, #0
+       mov     \order, #0
 
-       @ Unless the divisor is very big, shift it up in multiples of
-       @ four bits, since this is the amount of unwinding in the main
-       @ division loop.  Continue shifting until the divisor is
-       @ larger than the dividend.
-1:     cmp     \divisor, #0x10000000
-       cmplo   \divisor, \dividend
-       movlo   \divisor, \divisor, lsl #4
-       addlo   \order, \order, #4
-       blo     1b
+       @ Unless the divisor is very big, shift it up in multiples of
+       @ four bits, since this is the amount of unwinding in the main
+       @ division loop.  Continue shifting until the divisor is 
+       @ larger than the dividend.
+1:     cmp     \divisor, #0x10000000
+       cmplo   \divisor, \dividend
+       movlo   \divisor, \divisor, lsl #4
+       addlo   \order, \order, #4
+       blo     1b
 
-       @ For very big divisors, we must shift it a bit at a time, or
-       @ we will be in danger of overflowing.
-1:     cmp     \divisor, #0x80000000
-       cmplo   \divisor, \dividend
-       movlo   \divisor, \divisor, lsl #1
-       addlo   \order, \order, #1
-       blo     1b
+       @ For very big divisors, we must shift it a bit at a time, or
+       @ we will be in danger of overflowing.
+1:     cmp     \divisor, #0x80000000
+       cmplo   \divisor, \dividend
+       movlo   \divisor, \divisor, lsl #1
+       addlo   \order, \order, #1
+       blo     1b
 
 #endif
 
-       @ Perform all needed substractions to keep only the reminder.
-       @ Do comparisons in batch of 4 first.
-       subs    \order, \order, #3              @ yes, 3 is intended here
-       blt     2f
+       @ Perform all needed substractions to keep only the reminder.
+       @ Do comparisons in batch of 4 first.
+       subs    \order, \order, #3              @ yes, 3 is intended here
+       blt     2f
 
-1:     cmp     \dividend, \divisor
-       subhs   \dividend, \dividend, \divisor
-       cmp     \dividend, \divisor,  lsr #1
-       subhs   \dividend, \dividend, \divisor, lsr #1
-       cmp     \dividend, \divisor,  lsr #2
-       subhs   \dividend, \dividend, \divisor, lsr #2
-       cmp     \dividend, \divisor,  lsr #3
-       subhs   \dividend, \dividend, \divisor, lsr #3
-       cmp     \dividend, #1
-       mov     \divisor, \divisor, lsr #4
-       subges  \order, \order, #4
-       bge     1b
+1:     cmp     \dividend, \divisor
+       subhs   \dividend, \dividend, \divisor
+       cmp     \dividend, \divisor,  lsr #1
+       subhs   \dividend, \dividend, \divisor, lsr #1
+       cmp     \dividend, \divisor,  lsr #2
+       subhs   \dividend, \dividend, \divisor, lsr #2
+       cmp     \dividend, \divisor,  lsr #3
+       subhs   \dividend, \dividend, \divisor, lsr #3
+       cmp     \dividend, #1
+       mov     \divisor, \divisor, lsr #4
+       subges  \order, \order, #4
+       bge     1b
 
-       tst     \order, #3
-       teqne   \dividend, #0
-       beq     5f
+       tst     \order, #3
+       teqne   \dividend, #0
+       beq     5f
 
-       @ Either 1, 2 or 3 comparison/substractions are left.
-2:     cmn     \order, #2
-       blt     4f
-       beq     3f
-       cmp     \dividend, \divisor
-       subhs   \dividend, \dividend, \divisor
-       mov     \divisor,  \divisor,  lsr #1
-3:     cmp     \dividend, \divisor
-       subhs   \dividend, \dividend, \divisor
-       mov     \divisor,  \divisor,  lsr #1
-4:     cmp     \dividend, \divisor
-       subhs   \dividend, \dividend, \divisor
+       @ Either 1, 2 or 3 comparison/substractions are left.
+2:     cmn     \order, #2
+       blt     4f
+       beq     3f
+       cmp     \dividend, \divisor
+       subhs   \dividend, \dividend, \divisor
+       mov     \divisor,  \divisor,  lsr #1
+3:     cmp     \dividend, \divisor
+       subhs   \dividend, \dividend, \divisor
+       mov     \divisor,  \divisor,  lsr #1
+4:     cmp     \dividend, \divisor
+       subhs   \dividend, \dividend, \divisor
 5:
 .endm
 
@@ -208,27 +208,27 @@ ENTRY(__udivsi3)
 ENTRY(__aeabi_uidiv)
 UNWIND(.fnstart)
 
-       subs    r2, r1, #1
-       moveq   pc, lr
-       bcc     Ldiv0
-       cmp     r0, r1
-       bls     11f
-       tst     r1, r2
-       beq     12f
+       subs    r2, r1, #1
+       moveq   pc, lr
+       bcc     Ldiv0
+       cmp     r0, r1
+       bls     11f
+       tst     r1, r2
+       beq     12f
 
-       ARM_DIV_BODY r0, r1, r2, r3
+       ARM_DIV_BODY r0, r1, r2, r3
 
-       mov     r0, r2
-       mov     pc, lr
+       mov     r0, r2
+       mov     pc, lr
 
-11:    moveq   r0, #1
-       movne   r0, #0
-       mov     pc, lr
+11:    moveq   r0, #1
+       movne   r0, #0
+       mov     pc, lr
 
-12:    ARM_DIV2_ORDER r1, r2
+12:    ARM_DIV2_ORDER r1, r2
 
-       mov     r0, r0, lsr r2
-       mov     pc, lr
+       mov     r0, r0, lsr r2
+       mov     pc, lr
 
 UNWIND(.fnend)
 ENDPROC(__udivsi3)
@@ -237,17 +237,17 @@ ENDPROC(__aeabi_uidiv)
 ENTRY(__umodsi3)
 UNWIND(.fnstart)
 
-       subs    r2, r1, #1                      @ compare divisor with 1
-       bcc     Ldiv0
-       cmpne   r0, r1                          @ compare dividend with divisor
-       moveq   r0, #0
-       tsthi   r1, r2                          @ see if divisor is power of 2
-       andeq   r0, r0, r2
-       movls   pc, lr
+       subs    r2, r1, #1                      @ compare divisor with 1
+       bcc     Ldiv0
+       cmpne   r0, r1                          @ compare dividend with divisor
+       moveq   r0, #0
+       tsthi   r1, r2                          @ see if divisor is power of 2
+       andeq   r0, r0, r2
+       movls   pc, lr
 
-       ARM_MOD_BODY r0, r1, r2, r3
+       ARM_MOD_BODY r0, r1, r2, r3
 
-       mov     pc, lr
+       mov     pc, lr
 
 UNWIND(.fnend)
 ENDPROC(__umodsi3)
@@ -256,40 +256,40 @@ ENTRY(__divsi3)
 ENTRY(__aeabi_idiv)
 UNWIND(.fnstart)
 
-       cmp     r1, #0
-       eor     ip, r0, r1                      @ save the sign of the result.
-       beq     Ldiv0
-       rsbmi   r1, r1, #0                      @ loops below use unsigned.
-       subs    r2, r1, #1                      @ division by 1 or -1 ?
-       beq     10f
-       movs    r3, r0
-       rsbmi   r3, r0, #0                      @ positive dividend value
-       cmp     r3, r1
-       bls     11f
-       tst     r1, r2                          @ divisor is power of 2 ?
-       beq     12f
+       cmp     r1, #0
+       eor     ip, r0, r1                      @ save the sign of the result.
+       beq     Ldiv0
+       rsbmi   r1, r1, #0                      @ loops below use unsigned.
+       subs    r2, r1, #1                      @ division by 1 or -1 ?
+       beq     10f
+       movs    r3, r0
+       rsbmi   r3, r0, #0                      @ positive dividend value
+       cmp     r3, r1
+       bls     11f
+       tst     r1, r2                          @ divisor is power of 2 ?
+       beq     12f
 
-       ARM_DIV_BODY r3, r1, r0, r2
+       ARM_DIV_BODY r3, r1, r0, r2
 
-       cmp     ip, #0
-       rsbmi   r0, r0, #0
-       mov     pc, lr
+       cmp     ip, #0
+       rsbmi   r0, r0, #0
+       mov     pc, lr
 
-10:    teq     ip, r0                          @ same sign ?
-       rsbmi   r0, r0, #0
-       mov     pc, lr
+10:    teq     ip, r0                          @ same sign ?
+       rsbmi   r0, r0, #0
+       mov     pc, lr
 
-11:    movlo   r0, #0
-       moveq   r0, ip, asr #31
-       orreq   r0, r0, #1
-       mov     pc, lr
+11:    movlo   r0, #0
+       moveq   r0, ip, asr #31
+       orreq   r0, r0, #1
+       mov     pc, lr
 
-12:    ARM_DIV2_ORDER r1, r2
+12:    ARM_DIV2_ORDER r1, r2
 
-       cmp     ip, #0
-       mov     r0, r3, lsr r2
-       rsbmi   r0, r0, #0
-       mov     pc, lr
+       cmp     ip, #0
+       mov     r0, r3, lsr r2
+       rsbmi   r0, r0, #0
+       mov     pc, lr
 
 UNWIND(.fnend)
 ENDPROC(__divsi3)
@@ -298,23 +298,23 @@ ENDPROC(__aeabi_idiv)
 ENTRY(__modsi3)
 UNWIND(.fnstart)
 
-       cmp     r1, #0
-       beq     Ldiv0
-       rsbmi   r1, r1, #0                      @ loops below use unsigned.
-       movs    ip, r0                          @ preserve sign of dividend
-       rsbmi   r0, r0, #0                      @ if negative make positive
-       subs    r2, r1, #1                      @ compare divisor with 1
-       cmpne   r0, r1                          @ compare dividend with divisor
-       moveq   r0, #0
-       tsthi   r1, r2                          @ see if divisor is power of 2
-       andeq   r0, r0, r2
-       bls     10f
+       cmp     r1, #0
+       beq     Ldiv0
+       rsbmi   r1, r1, #0                      @ loops below use unsigned.
+       movs    ip, r0                          @ preserve sign of dividend
+       rsbmi   r0, r0, #0                      @ if negative make positive
+       subs    r2, r1, #1                      @ compare divisor with 1
+       cmpne   r0, r1                          @ compare dividend with divisor
+       moveq   r0, #0
+       tsthi   r1, r2                          @ see if divisor is power of 2
+       andeq   r0, r0, r2
+       bls     10f
 
-       ARM_MOD_BODY r0, r1, r2, r3
+       ARM_MOD_BODY r0, r1, r2, r3
 
-10:    cmp     ip, #0
-       rsbmi   r0, r0, #0
-       mov     pc, lr
+10:    cmp     ip, #0
+       rsbmi   r0, r0, #0
+       mov     pc, lr
 
 UNWIND(.fnend)
 ENDPROC(__modsi3)
@@ -323,56 +323,56 @@ ENDPROC(__modsi3)
 
 ENTRY(__aeabi_uidivmod)
 UNWIND(.fnstart)
-UNWIND(.save {r0, r1, ip, lr}  )
+UNWIND(.save {r0, r1, ip, lr}  )
 
-       stmfd   sp!, {r0, r1, ip, lr}
-       bl      __aeabi_uidiv
-       ldmfd   sp!, {r1, r2, ip, lr}
-       mul     r3, r0, r2
-       sub     r1, r1, r3
-       mov     pc, lr
+       stmfd   sp!, {r0, r1, ip, lr}
+       bl      __aeabi_uidiv
+       ldmfd   sp!, {r1, r2, ip, lr}
+       mul     r3, r0, r2
+       sub     r1, r1, r3
+       mov     pc, lr
 
 UNWIND(.fnend)
 ENDPROC(__aeabi_uidivmod)
 
 ENTRY(__aeabi_idivmod)
 UNWIND(.fnstart)
-UNWIND(.save {r0, r1, ip, lr}  )
-       stmfd   sp!, {r0, r1, ip, lr}
-       bl      __aeabi_idiv
-       ldmfd   sp!, {r1, r2, ip, lr}
-       mul     r3, r0, r2
-       sub     r1, r1, r3
-       mov     pc, lr
+UNWIND(.save {r0, r1, ip, lr}  )
+       stmfd   sp!, {r0, r1, ip, lr}
+       bl      __aeabi_idiv
+       ldmfd   sp!, {r1, r2, ip, lr}
+       mul     r3, r0, r2
+       sub     r1, r1, r3
+       mov     pc, lr
 
 UNWIND(.fnend)
 ENDPROC(__aeabi_idivmod)
 
 ENTRY(__aeabi_uldivmod)
 UNWIND(.fnstart)
-UNWIND(.save {lr}      )
-       sub sp, sp, #8
-       stmfd   sp!, {sp, lr}
-       bl __qdivrem
-       ldr lr, [sp, #4]
-       add sp, sp, #8
-       ldmfd sp!, {r2, r3}
-       mov     pc, lr
+UNWIND(.save {lr}      )
+       sub sp, sp, #8
+       stmfd   sp!, {sp, lr}
+       bl __qdivrem
+       ldr lr, [sp, #4]
+       add sp, sp, #8
+       ldmfd sp!, {r2, r3}
+       mov     pc, lr
 
 UNWIND(.fnend)
 ENDPROC(__aeabi_uldivmod)
 
 ENTRY(__aeabi_ldivmod)
 UNWIND(.fnstart)
-UNWIND(.save {lr}      )
-       sub sp, sp, #16
-       stmfd   sp!, {sp, lr}
-       bl __ldivmod_helper
-       ldr lr, [sp, #4]
-       add sp, sp, #16
-       ldmfd   sp!, {r2, r3}
-       mov     pc, lr
-
+UNWIND(.save {lr}      )
+       sub sp, sp, #16
+       stmfd   sp!, {sp, lr}
+       bl __ldivmod_helper
+       ldr lr, [sp, #4]
+       add sp, sp, #16
+       ldmfd   sp!, {r2, r3}
+       mov     pc, lr
+       
 UNWIND(.fnend)
 ENDPROC(__aeabi_ldivmod)
 #endif
@@ -381,9 +381,9 @@ Ldiv0:
 UNWIND(.fnstart)
 UNWIND(.pad #4)
 UNWIND(.save {lr})
-       str     lr, [sp, #-8]!
-       bl      __div0
-       mov     r0, #0                  @ About as wrong as it could be.
-       ldr     pc, [sp], #8
+       str     lr, [sp, #-8]!
+       bl      __div0
+       mov     r0, #0                  @ About as wrong as it could be.
+       ldr     pc, [sp], #8
 UNWIND(.fnend)
 ENDPROC(Ldiv0)
diff -r 7fd8f10cfd3e -r 63e88a26e1ef xen/arch/arm/lib/memcpy.S
--- a/xen/arch/arm/lib/memcpy.S Mon Feb 13 17:03:44 2012 +0000
+++ b/xen/arch/arm/lib/memcpy.S Mon Feb 13 17:26:08 2012 +0000
@@ -1,9 +1,9 @@
 /*
  *  linux/arch/arm/lib/memcpy.S
  *
- *  Author:    Nicolas Pitre
- *  Created:   Sep 28, 2005
- *  Copyright: MontaVista Software, Inc.
+ *  Author:    Nicolas Pitre
+ *  Created:   Sep 28, 2005
+ *  Copyright: MontaVista Software, Inc.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License version 2 as
@@ -13,46 +13,46 @@
 #include <xen/config.h>
 #include "assembler.h"
 
-#define LDR1W_SHIFT    0
-#define STR1W_SHIFT    0
+#define LDR1W_SHIFT    0
+#define STR1W_SHIFT    0
 
-       .macro ldr1w ptr reg abort
-       W(ldr) \reg, [\ptr], #4
-       .endm
+       .macro ldr1w ptr reg abort
+       W(ldr) \reg, [\ptr], #4
+       .endm
 
-       .macro ldr4w ptr reg1 reg2 reg3 reg4 abort
-       ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4}
-       .endm
+       .macro ldr4w ptr reg1 reg2 reg3 reg4 abort
+       ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4}
+       .endm
 
-       .macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
-       ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
-       .endm
+       .macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
+       ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
+       .endm
 
-       .macro ldr1b ptr reg cond=al abort
-       ldr\cond\()b \reg, [\ptr], #1
-       .endm
+       .macro ldr1b ptr reg cond=al abort
+       ldr\cond\()b \reg, [\ptr], #1
+       .endm
 
-       .macro str1w ptr reg abort
-       W(str) \reg, [\ptr], #4
-       .endm
+       .macro str1w ptr reg abort
+       W(str) \reg, [\ptr], #4
+       .endm
 
-       .macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
-       stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
-       .endm
+       .macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
+       stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
+       .endm
 
-       .macro str1b ptr reg cond=al abort
-       str\cond\()b \reg, [\ptr], #1
-       .endm
+       .macro str1b ptr reg cond=al abort
+       str\cond\()b \reg, [\ptr], #1
+       .endm
 
-       .macro enter reg1 reg2
-       stmdb sp!, {r0, \reg1, \reg2}
-       .endm
+       .macro enter reg1 reg2
+       stmdb sp!, {r0, \reg1, \reg2}
+       .endm
 
-       .macro exit reg1 reg2
-       ldmfd sp!, {r0, \reg1, \reg2}
-       .endm
+       .macro exit reg1 reg2
+       ldmfd sp!, {r0, \reg1, \reg2}
+       .endm
 
-       .text
+       .text
 
 /* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
 
diff -r 7fd8f10cfd3e -r 63e88a26e1ef xen/arch/arm/lib/memmove.S
--- a/xen/arch/arm/lib/memmove.S        Mon Feb 13 17:03:44 2012 +0000
+++ b/xen/arch/arm/lib/memmove.S        Mon Feb 13 17:26:08 2012 +0000
@@ -1,9 +1,9 @@
 /*
  *  linux/arch/arm/lib/memmove.S
  *
- *  Author:    Nicolas Pitre
- *  Created:   Sep 28, 2005
- *  Copyright: (C) MontaVista Software Inc.
+ *  Author:    Nicolas Pitre
+ *  Created:   Sep 28, 2005
+ *  Copyright: (C) MontaVista Software Inc.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License version 2 as
@@ -14,7 +14,7 @@
 
 #include "assembler.h"
 
-               .text
+               .text
 
 /*
  * Prototype: void *memmove(void *dest, const void *src, size_t n);
@@ -29,172 +29,172 @@
 
 ENTRY(memmove)
 
-               subs    ip, r0, r1
-               cmphi   r2, ip
-               bls     memcpy
+               subs    ip, r0, r1
+               cmphi   r2, ip
+               bls     memcpy
 
-               stmfd   sp!, {r0, r4, lr}
-               add     r1, r1, r2
-               add     r0, r0, r2
-               subs    r2, r2, #4
-               blt     8f
-               ands    ip, r0, #3
-       PLD(    pld     [r1, #-4]               )
-               bne     9f
-               ands    ip, r1, #3
-               bne     10f
+               stmfd   sp!, {r0, r4, lr}
+               add     r1, r1, r2
+               add     r0, r0, r2
+               subs    r2, r2, #4
+               blt     8f
+               ands    ip, r0, #3
+       PLD(    pld     [r1, #-4]               )
+               bne     9f
+               ands    ip, r1, #3
+               bne     10f
 
-1:             subs    r2, r2, #(28)
-               stmfd   sp!, {r5 - r8}
-               blt     5f
+1:             subs    r2, r2, #(28)
+               stmfd   sp!, {r5 - r8}
+               blt     5f
 
-       CALGN(  ands    ip, r0, #31             )
-       CALGN(  sbcnes  r4, ip, r2              )  @ C is always set here
-       CALGN(  bcs     2f                      )
-       CALGN(  adr     r4, 6f                  )
-       CALGN(  subs    r2, r2, ip              )  @ C is set here
-       CALGN(  rsb     ip, ip, #32             )
-       CALGN(  add     pc, r4, ip              )
+       CALGN(  ands    ip, r0, #31             )
+       CALGN(  sbcnes  r4, ip, r2              )  @ C is always set here
+       CALGN(  bcs     2f                      )
+       CALGN(  adr     r4, 6f                  )
+       CALGN(  subs    r2, r2, ip              )  @ C is set here
+       CALGN(  rsb     ip, ip, #32             )
+       CALGN(  add     pc, r4, ip              )
 
-       PLD(    pld     [r1, #-4]               )
-2:     PLD(    subs    r2, r2, #96             )
-       PLD(    pld     [r1, #-32]              )
-       PLD(    blt     4f                      )
-       PLD(    pld     [r1, #-64]              )
-       PLD(    pld     [r1, #-96]              )
+       PLD(    pld     [r1, #-4]               )
+2:     PLD(    subs    r2, r2, #96             )
+       PLD(    pld     [r1, #-32]              )
+       PLD(    blt     4f                      )
+       PLD(    pld     [r1, #-64]              )
+       PLD(    pld     [r1, #-96]              )
 
-3:     PLD(    pld     [r1, #-128]             )
-4:             ldmdb   r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
-               subs    r2, r2, #32
-               stmdb   r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
-               bge     3b
-       PLD(    cmn     r2, #96                 )
-       PLD(    bge     4b                      )
+3:     PLD(    pld     [r1, #-128]             )
+4:             ldmdb   r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
+               subs    r2, r2, #32
+               stmdb   r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
+               bge     3b
+       PLD(    cmn     r2, #96                 )
+       PLD(    bge     4b                      )
 
-5:             ands    ip, r2, #28
-               rsb     ip, ip, #32
-               addne   pc, pc, ip              @ C is always clear here
-               b       7f
-6:             W(nop)
-               W(ldr)  r3, [r1, #-4]!
-               W(ldr)  r4, [r1, #-4]!
-               W(ldr)  r5, [r1, #-4]!
-               W(ldr)  r6, [r1, #-4]!
-               W(ldr)  r7, [r1, #-4]!
-               W(ldr)  r8, [r1, #-4]!
-               W(ldr)  lr, [r1, #-4]!
+5:             ands    ip, r2, #28
+               rsb     ip, ip, #32
+               addne   pc, pc, ip              @ C is always clear here
+               b       7f
+6:             W(nop)
+               W(ldr)  r3, [r1, #-4]!
+               W(ldr)  r4, [r1, #-4]!
+               W(ldr)  r5, [r1, #-4]!
+               W(ldr)  r6, [r1, #-4]!
+               W(ldr)  r7, [r1, #-4]!
+               W(ldr)  r8, [r1, #-4]!
+               W(ldr)  lr, [r1, #-4]!
 
-               add     pc, pc, ip
-               nop
-               W(nop)
-               W(str)  r3, [r0, #-4]!
-               W(str)  r4, [r0, #-4]!
-               W(str)  r5, [r0, #-4]!
-               W(str)  r6, [r0, #-4]!
-               W(str)  r7, [r0, #-4]!
-               W(str)  r8, [r0, #-4]!
-               W(str)  lr, [r0, #-4]!
+               add     pc, pc, ip
+               nop
+               W(nop)
+               W(str)  r3, [r0, #-4]!
+               W(str)  r4, [r0, #-4]!
+               W(str)  r5, [r0, #-4]!
+               W(str)  r6, [r0, #-4]!
+               W(str)  r7, [r0, #-4]!
+               W(str)  r8, [r0, #-4]!
+               W(str)  lr, [r0, #-4]!
 
-       CALGN(  bcs     2b                      )
+       CALGN(  bcs     2b                      )
 
-7:             ldmfd   sp!, {r5 - r8}
+7:             ldmfd   sp!, {r5 - r8}
 
-8:             movs    r2, r2, lsl #31
-               ldrneb  r3, [r1, #-1]!
-               ldrcsb  r4, [r1, #-1]!
-               ldrcsb  ip, [r1, #-1]
-               strneb  r3, [r0, #-1]!
-               strcsb  r4, [r0, #-1]!
-               strcsb  ip, [r0, #-1]
-               ldmfd   sp!, {r0, r4, pc}
+8:             movs    r2, r2, lsl #31
+               ldrneb  r3, [r1, #-1]!
+               ldrcsb  r4, [r1, #-1]!
+               ldrcsb  ip, [r1, #-1]
+               strneb  r3, [r0, #-1]!
+               strcsb  r4, [r0, #-1]!
+               strcsb  ip, [r0, #-1]
+               ldmfd   sp!, {r0, r4, pc}
 
-9:             cmp     ip, #2
-               ldrgtb  r3, [r1, #-1]!
-               ldrgeb  r4, [r1, #-1]!
-               ldrb    lr, [r1, #-1]!
-               strgtb  r3, [r0, #-1]!
-               strgeb  r4, [r0, #-1]!
-               subs    r2, r2, ip
-               strb    lr, [r0, #-1]!
-               blt     8b
-               ands    ip, r1, #3
-               beq     1b
+9:             cmp     ip, #2
+               ldrgtb  r3, [r1, #-1]!
+               ldrgeb  r4, [r1, #-1]!
+               ldrb    lr, [r1, #-1]!
+               strgtb  r3, [r0, #-1]!
+               strgeb  r4, [r0, #-1]!
+               subs    r2, r2, ip
+               strb    lr, [r0, #-1]!
+               blt     8b
+               ands    ip, r1, #3
+               beq     1b
 
-10:            bic     r1, r1, #3
-               cmp     ip, #2
-               ldr     r3, [r1, #0]
-               beq     17f
-               blt     18f
+10:            bic     r1, r1, #3
+               cmp     ip, #2
+               ldr     r3, [r1, #0]
+               beq     17f
+               blt     18f
 
 
-               .macro  backward_copy_shift push pull
+               .macro  backward_copy_shift push pull
 
-               subs    r2, r2, #28
-               blt     14f
+               subs    r2, r2, #28
+               blt     14f
 
-       CALGN(  ands    ip, r0, #31             )
-       CALGN(  sbcnes  r4, ip, r2              )  @ C is always set here
-       CALGN(  subcc   r2, r2, ip              )
-       CALGN(  bcc     15f                     )
+       CALGN(  ands    ip, r0, #31             )
+       CALGN(  sbcnes  r4, ip, r2              )  @ C is always set here
+       CALGN(  subcc   r2, r2, ip              )
+       CALGN(  bcc     15f                     )
 
-11:            stmfd   sp!, {r5 - r9}
+11:            stmfd   sp!, {r5 - r9}
 
-       PLD(    pld     [r1, #-4]               )
-       PLD(    subs    r2, r2, #96             )
-       PLD(    pld     [r1, #-32]              )
-       PLD(    blt     13f                     )
-       PLD(    pld     [r1, #-64]              )
-       PLD(    pld     [r1, #-96]              )
+       PLD(    pld     [r1, #-4]               )
+       PLD(    subs    r2, r2, #96             )
+       PLD(    pld     [r1, #-32]              )
+       PLD(    blt     13f                     )
+       PLD(    pld     [r1, #-64]              )
+       PLD(    pld     [r1, #-96]              )
 
-12:    PLD(    pld     [r1, #-128]             )
-13:            ldmdb   r1!, {r7, r8, r9, ip}
-               mov     lr, r3, push #\push
-               subs    r2, r2, #32
-               ldmdb   r1!, {r3, r4, r5, r6}
-               orr     lr, lr, ip, pull #\pull
-               mov     ip, ip, push #\push
-               orr     ip, ip, r9, pull #\pull
-               mov     r9, r9, push #\push
-               orr     r9, r9, r8, pull #\pull
-               mov     r8, r8, push #\push
-               orr     r8, r8, r7, pull #\pull
-               mov     r7, r7, push #\push
-               orr     r7, r7, r6, pull #\pull
-               mov     r6, r6, push #\push
-               orr     r6, r6, r5, pull #\pull
-               mov     r5, r5, push #\push
-               orr     r5, r5, r4, pull #\pull
-               mov     r4, r4, push #\push
-               orr     r4, r4, r3, pull #\pull
-               stmdb   r0!, {r4 - r9, ip, lr}
-               bge     12b
-       PLD(    cmn     r2, #96                 )
-       PLD(    bge     13b                     )
+12:    PLD(    pld     [r1, #-128]             )
+13:            ldmdb   r1!, {r7, r8, r9, ip}
+               mov     lr, r3, push #\push
+               subs    r2, r2, #32
+               ldmdb   r1!, {r3, r4, r5, r6}
+               orr     lr, lr, ip, pull #\pull
+               mov     ip, ip, push #\push
+               orr     ip, ip, r9, pull #\pull
+               mov     r9, r9, push #\push
+               orr     r9, r9, r8, pull #\pull
+               mov     r8, r8, push #\push
+               orr     r8, r8, r7, pull #\pull
+               mov     r7, r7, push #\push
+               orr     r7, r7, r6, pull #\pull
+               mov     r6, r6, push #\push
+               orr     r6, r6, r5, pull #\pull
+               mov     r5, r5, push #\push
+               orr     r5, r5, r4, pull #\pull
+               mov     r4, r4, push #\push
+               orr     r4, r4, r3, pull #\pull
+               stmdb   r0!, {r4 - r9, ip, lr}
+               bge     12b
+       PLD(    cmn     r2, #96                 )
+       PLD(    bge     13b                     )
 
-               ldmfd   sp!, {r5 - r9}
+               ldmfd   sp!, {r5 - r9}
 
-14:            ands    ip, r2, #28
-               beq     16f
+14:            ands    ip, r2, #28
+               beq     16f
 
-15:            mov     lr, r3, push #\push
-               ldr     r3, [r1, #-4]!
-               subs    ip, ip, #4
-               orr     lr, lr, r3, pull #\pull
-               str     lr, [r0, #-4]!
-               bgt     15b
-       CALGN(  cmp     r2, #0                  )
-       CALGN(  bge     11b                     )
+15:            mov     lr, r3, push #\push
+               ldr     r3, [r1, #-4]!
+               subs    ip, ip, #4
+               orr     lr, lr, r3, pull #\pull
+               str     lr, [r0, #-4]!
+               bgt     15b
+       CALGN(  cmp     r2, #0                  )
+       CALGN(  bge     11b                     )
 
-16:            add     r1, r1, #(\pull / 8)
-               b       8b
+16:            add     r1, r1, #(\pull / 8)
+               b       8b
 
-               .endm
+               .endm
 
 
-               backward_copy_shift     push=8  pull=24
+               backward_copy_shift     push=8  pull=24
 
-17:            backward_copy_shift     push=16 pull=16
+17:            backward_copy_shift     push=16 pull=16
 
-18:            backward_copy_shift     push=24 pull=8
+18:            backward_copy_shift     push=24 pull=8
 
 ENDPROC(memmove)
diff -r 7fd8f10cfd3e -r 63e88a26e1ef xen/arch/arm/lib/memset.S
--- a/xen/arch/arm/lib/memset.S Mon Feb 13 17:03:44 2012 +0000
+++ b/xen/arch/arm/lib/memset.S Mon Feb 13 17:26:08 2012 +0000
@@ -14,33 +14,33 @@
 
 #include "assembler.h"
 
-       .text
-       .align  5
-       .word   0
+       .text
+       .align  5
+       .word   0
 
-1:     subs    r2, r2, #4              @ 1 do we have enough
-       blt     5f                      @ 1 bytes to align with?
-       cmp     r3, #2                  @ 1
-       strltb  r1, [r0], #1            @ 1
-       strleb  r1, [r0], #1            @ 1
-       strb    r1, [r0], #1            @ 1
-       add     r2, r2, r3              @ 1 (r2 = r2 - (4 - r3))
+1:     subs    r2, r2, #4              @ 1 do we have enough
+       blt     5f                      @ 1 bytes to align with?
+       cmp     r3, #2                  @ 1
+       strltb  r1, [r0], #1            @ 1
+       strleb  r1, [r0], #1            @ 1
+       strb    r1, [r0], #1            @ 1
+       add     r2, r2, r3              @ 1 (r2 = r2 - (4 - r3))
 /*
  * The pointer is now aligned and the length is adjusted.  Try doing the
  * memset again.
  */
 
 ENTRY(memset)
-       ands    r3, r0, #3              @ 1 unaligned?
-       bne     1b                      @ 1
+       ands    r3, r0, #3              @ 1 unaligned?
+       bne     1b                      @ 1
 /*
  * we know that the pointer in r0 is aligned to a word boundary.
  */
-       orr     r1, r1, r1, lsl #8
-       orr     r1, r1, r1, lsl #16
-       mov     r3, r1
-       cmp     r2, #16
-       blt     4f
+       orr     r1, r1, r1, lsl #8
+       orr     r1, r1, r1, lsl #16
+       mov     r3, r1
+       cmp     r2, #16
+       blt     4f
 
 #if ! CALGN(1)+0
 
@@ -48,26 +48,26 @@ ENTRY(memset)
  * We need an extra register for this loop - save the return address and
  * use the LR
  */
-       str     lr, [sp, #-4]!
-       mov     ip, r1
-       mov     lr, r1
+       str     lr, [sp, #-4]!
+       mov     ip, r1
+       mov     lr, r1
 
-2:     subs    r2, r2, #64
-       stmgeia r0!, {r1, r3, ip, lr}   @ 64 bytes at a time.
-       stmgeia r0!, {r1, r3, ip, lr}
-       stmgeia r0!, {r1, r3, ip, lr}
-       stmgeia r0!, {r1, r3, ip, lr}
-       bgt     2b
-       ldmeqfd sp!, {pc}               @ Now <64 bytes to go.
+2:     subs    r2, r2, #64
+       stmgeia r0!, {r1, r3, ip, lr}   @ 64 bytes at a time.
+       stmgeia r0!, {r1, r3, ip, lr}
+       stmgeia r0!, {r1, r3, ip, lr}
+       stmgeia r0!, {r1, r3, ip, lr}
+       bgt     2b
+       ldmeqfd sp!, {pc}               @ Now <64 bytes to go.
 /*
  * No need to correct the count; we're only testing bits from now on
  */
-       tst     r2, #32
-       stmneia r0!, {r1, r3, ip, lr}
-       stmneia r0!, {r1, r3, ip, lr}
-       tst     r2, #16
-       stmneia r0!, {r1, r3, ip, lr}
-       ldr     lr, [sp], #4
+       tst     r2, #32
+       stmneia r0!, {r1, r3, ip, lr}
+       stmneia r0!, {r1, r3, ip, lr}
+       tst     r2, #16
+       stmneia r0!, {r1, r3, ip, lr}
+       ldr     lr, [sp], #4
 
 #else
 
@@ -76,54 +76,54 @@ 2:     subs    r2, r2, #64
  * whole cache lines at once.
  */
 
-       stmfd   sp!, {r4-r7, lr}
-       mov     r4, r1
-       mov     r5, r1
-       mov     r6, r1
-       mov     r7, r1
-       mov     ip, r1
-       mov     lr, r1
+       stmfd   sp!, {r4-r7, lr}
+       mov     r4, r1
+       mov     r5, r1
+       mov     r6, r1
+       mov     r7, r1
+       mov     ip, r1
+       mov     lr, r1
 
-       cmp     r2, #96
-       tstgt   r0, #31
-       ble     3f
+       cmp     r2, #96
+       tstgt   r0, #31
+       ble     3f
 
-       and     ip, r0, #31
-       rsb     ip, ip, #32
-       sub     r2, r2, ip
-       movs    ip, ip, lsl #(32 - 4)
-       stmcsia r0!, {r4, r5, r6, r7}
-       stmmiia r0!, {r4, r5}
-       tst     ip, #(1 << 30)
-       mov     ip, r1
-       strne   r1, [r0], #4
+       and     ip, r0, #31
+       rsb     ip, ip, #32
+       sub     r2, r2, ip
+       movs    ip, ip, lsl #(32 - 4)
+       stmcsia r0!, {r4, r5, r6, r7}
+       stmmiia r0!, {r4, r5}
+       tst     ip, #(1 << 30)
+       mov     ip, r1
+       strne   r1, [r0], #4
 
-3:     subs    r2, r2, #64
-       stmgeia r0!, {r1, r3-r7, ip, lr}
-       stmgeia r0!, {r1, r3-r7, ip, lr}
-       bgt     3b
-       ldmeqfd sp!, {r4-r7, pc}
+3:     subs    r2, r2, #64
+       stmgeia r0!, {r1, r3-r7, ip, lr}
+       stmgeia r0!, {r1, r3-r7, ip, lr}
+       bgt     3b
+       ldmeqfd sp!, {r4-r7, pc}
 
-       tst     r2, #32
-       stmneia r0!, {r1, r3-r7, ip, lr}
-       tst     r2, #16
-       stmneia r0!, {r4-r7}
-       ldmfd   sp!, {r4-r7, lr}
+       tst     r2, #32
+       stmneia r0!, {r1, r3-r7, ip, lr}
+       tst     r2, #16
+       stmneia r0!, {r4-r7}
+       ldmfd   sp!, {r4-r7, lr}
 
 #endif
 
-4:     tst     r2, #8
-       stmneia r0!, {r1, r3}
-       tst     r2, #4
-       strne   r1, [r0], #4
+4:     tst     r2, #8
+       stmneia r0!, {r1, r3}
+       tst     r2, #4
+       strne   r1, [r0], #4
 /*
  * When we get here, we've got less than 4 bytes to zero.  We
  * may have an unaligned pointer as well.
  */
-5:     tst     r2, #2
-       strneb  r1, [r0], #1
-       strneb  r1, [r0], #1
-       tst     r2, #1
-       strneb  r1, [r0], #1
-       mov     pc, lr
+5:     tst     r2, #2
+       strneb  r1, [r0], #1
+       strneb  r1, [r0], #1
+       tst     r2, #1
+       strneb  r1, [r0], #1
+       mov     pc, lr
 ENDPROC(memset)
diff -r 7fd8f10cfd3e -r 63e88a26e1ef xen/arch/arm/lib/memzero.S
--- a/xen/arch/arm/lib/memzero.S        Mon Feb 13 17:03:44 2012 +0000
+++ b/xen/arch/arm/lib/memzero.S        Mon Feb 13 17:26:08 2012 +0000
@@ -12,35 +12,35 @@
 
 #include "assembler.h"
 
-       .text
-       .align  5
-       .word   0
+       .text
+       .align  5
+       .word   0
 /*
  * Align the pointer in r0.  r3 contains the number of bytes that we are
  * mis-aligned by, and r1 is the number of bytes.  If r1 < 4, then we
  * don't bother; we use byte stores instead.
  */
-1:     subs    r1, r1, #4              @ 1 do we have enough
-       blt     5f                      @ 1 bytes to align with?
-       cmp     r3, #2                  @ 1
-       strltb  r2, [r0], #1            @ 1
-       strleb  r2, [r0], #1            @ 1
-       strb    r2, [r0], #1            @ 1
-       add     r1, r1, r3              @ 1 (r1 = r1 - (4 - r3))
+1:     subs    r1, r1, #4              @ 1 do we have enough
+       blt     5f                      @ 1 bytes to align with?
+       cmp     r3, #2                  @ 1
+       strltb  r2, [r0], #1            @ 1
+       strleb  r2, [r0], #1            @ 1
+       strb    r2, [r0], #1            @ 1
+       add     r1, r1, r3              @ 1 (r1 = r1 - (4 - r3))
 /*
  * The pointer is now aligned and the length is adjusted.  Try doing the
  * memzero again.
  */
 
 ENTRY(__memzero)
-       mov     r2, #0                  @ 1
-       ands    r3, r0, #3              @ 1 unaligned?
-       bne     1b                      @ 1
+       mov     r2, #0                  @ 1
+       ands    r3, r0, #3              @ 1 unaligned?
+       bne     1b                      @ 1
 /*
  * r3 = 0, and we know that the pointer in r0 is aligned to a word boundary.
  */
-       cmp     r1, #16                 @ 1 we can skip this chunk if we
-       blt     4f                      @ 1 have < 16 bytes
+       cmp     r1, #16                 @ 1 we can skip this chunk if we
+       blt     4f                      @ 1 have < 16 bytes
 
 #if ! CALGN(1)+0
 
@@ -48,26 +48,26 @@ ENTRY(__memzero)
  * We need an extra register for this loop - save the return address and
  * use the LR
  */
-       str     lr, [sp, #-4]!          @ 1
-       mov     ip, r2                  @ 1
-       mov     lr, r2                  @ 1
+       str     lr, [sp, #-4]!          @ 1
+       mov     ip, r2                  @ 1
+       mov     lr, r2                  @ 1
 
-3:     subs    r1, r1, #64             @ 1 write 32 bytes out per loop
-       stmgeia r0!, {r2, r3, ip, lr}   @ 4
-       stmgeia r0!, {r2, r3, ip, lr}   @ 4
-       stmgeia r0!, {r2, r3, ip, lr}   @ 4
-       stmgeia r0!, {r2, r3, ip, lr}   @ 4
-       bgt     3b                      @ 1
-       ldmeqfd sp!, {pc}               @ 1/2 quick exit
+3:     subs    r1, r1, #64             @ 1 write 32 bytes out per loop
+       stmgeia r0!, {r2, r3, ip, lr}   @ 4
+       stmgeia r0!, {r2, r3, ip, lr}   @ 4
+       stmgeia r0!, {r2, r3, ip, lr}   @ 4
+       stmgeia r0!, {r2, r3, ip, lr}   @ 4
+       bgt     3b                      @ 1
+       ldmeqfd sp!, {pc}               @ 1/2 quick exit
 /*
  * No need to correct the count; we're only testing bits from now on
  */
-       tst     r1, #32                 @ 1
-       stmneia r0!, {r2, r3, ip, lr}   @ 4
-       stmneia r0!, {r2, r3, ip, lr}   @ 4
-       tst     r1, #16                 @ 1 16 bytes or more?
-       stmneia r0!, {r2, r3, ip, lr}   @ 4
-       ldr     lr, [sp], #4            @ 1
+       tst     r1, #32                 @ 1
+       stmneia r0!, {r2, r3, ip, lr}   @ 4
+       stmneia r0!, {r2, r3, ip, lr}   @ 4
+       tst     r1, #16                 @ 1 16 bytes or more?
+       stmneia r0!, {r2, r3, ip, lr}   @ 4
+       ldr     lr, [sp], #4            @ 1
 
 #else
 
@@ -76,52 +76,52 @@ 3:     subs    r1, r1, #64             @
  * whole cache lines at once.
  */
 
-       stmfd   sp!, {r4-r7, lr}
-       mov     r4, r2
-       mov     r5, r2
-       mov     r6, r2
-       mov     r7, r2
-       mov     ip, r2
-       mov     lr, r2
+       stmfd   sp!, {r4-r7, lr}
+       mov     r4, r2
+       mov     r5, r2
+       mov     r6, r2
+       mov     r7, r2
+       mov     ip, r2
+       mov     lr, r2
 
-       cmp     r1, #96
-       andgts  ip, r0, #31
-       ble     3f
+       cmp     r1, #96
+       andgts  ip, r0, #31
+       ble     3f
 
-       rsb     ip, ip, #32
-       sub     r1, r1, ip
-       movs    ip, ip, lsl #(32 - 4)
-       stmcsia r0!, {r4, r5, r6, r7}
-       stmmiia r0!, {r4, r5}
-       movs    ip, ip, lsl #2
-       strcs   r2, [r0], #4
+       rsb     ip, ip, #32
+       sub     r1, r1, ip
+       movs    ip, ip, lsl #(32 - 4)
+       stmcsia r0!, {r4, r5, r6, r7}
+       stmmiia r0!, {r4, r5}
+       movs    ip, ip, lsl #2
+       strcs   r2, [r0], #4
 
-3:     subs    r1, r1, #64
-       stmgeia r0!, {r2-r7, ip, lr}
-       stmgeia r0!, {r2-r7, ip, lr}
-       bgt     3b
-       ldmeqfd sp!, {r4-r7, pc}
+3:     subs    r1, r1, #64
+       stmgeia r0!, {r2-r7, ip, lr}
+       stmgeia r0!, {r2-r7, ip, lr}
+       bgt     3b
+       ldmeqfd sp!, {r4-r7, pc}
 
-       tst     r1, #32
-       stmneia r0!, {r2-r7, ip, lr}
-       tst     r1, #16
-       stmneia r0!, {r4-r7}
-       ldmfd   sp!, {r4-r7, lr}
+       tst     r1, #32
+       stmneia r0!, {r2-r7, ip, lr}
+       tst     r1, #16
+       stmneia r0!, {r4-r7}
+       ldmfd   sp!, {r4-r7, lr}
 
 #endif
 
-4:     tst     r1, #8                  @ 1 8 bytes or more?
-       stmneia r0!, {r2, r3}           @ 2
-       tst     r1, #4                  @ 1 4 bytes or more?
-       strne   r2, [r0], #4            @ 1
+4:     tst     r1, #8                  @ 1 8 bytes or more?
+       stmneia r0!, {r2, r3}           @ 2
+       tst     r1, #4                  @ 1 4 bytes or more?
+       strne   r2, [r0], #4            @ 1
 /*
  * When we get here, we've got less than 4 bytes to zero.  We
  * may have an unaligned pointer as well.
  */
-5:     tst     r1, #2                  @ 1 2 bytes or more?
-       strneb  r2, [r0], #1            @ 1
-       strneb  r2, [r0], #1            @ 1
-       tst     r1, #1                  @ 1 a byte left over
-       strneb  r2, [r0], #1            @ 1
-       mov     pc, lr                  @ 1
+5:     tst     r1, #2                  @ 1 2 bytes or more?
+       strneb  r2, [r0], #1            @ 1
+       strneb  r2, [r0], #1            @ 1
+       tst     r1, #1                  @ 1 a byte left over
+       strneb  r2, [r0], #1            @ 1
+       mov     pc, lr                  @ 1
 ENDPROC(__memzero)
diff -r 7fd8f10cfd3e -r 63e88a26e1ef xen/arch/arm/lib/setbit.S
--- a/xen/arch/arm/lib/setbit.S Mon Feb 13 17:03:44 2012 +0000
+++ b/xen/arch/arm/lib/setbit.S Mon Feb 13 17:26:08 2012 +0000
@@ -11,8 +11,8 @@
 
 #include "assembler.h"
 #include "bitops.h"
-       .text
+       .text
 
 ENTRY(_set_bit)
-       bitop   orr
+       bitop   orr
 ENDPROC(_set_bit)
diff -r 7fd8f10cfd3e -r 63e88a26e1ef xen/arch/arm/lib/testchangebit.S
--- a/xen/arch/arm/lib/testchangebit.S  Mon Feb 13 17:03:44 2012 +0000
+++ b/xen/arch/arm/lib/testchangebit.S  Mon Feb 13 17:26:08 2012 +0000
@@ -14,5 +14,5 @@
                 .text
 
 ENTRY(_test_and_change_bit)
-       testop  eor, str
+       testop  eor, str
 ENDPROC(_test_and_change_bit)
diff -r 7fd8f10cfd3e -r 63e88a26e1ef xen/arch/arm/lib/testclearbit.S
--- a/xen/arch/arm/lib/testclearbit.S   Mon Feb 13 17:03:44 2012 +0000
+++ b/xen/arch/arm/lib/testclearbit.S   Mon Feb 13 17:26:08 2012 +0000
@@ -14,5 +14,5 @@
                 .text
 
 ENTRY(_test_and_clear_bit)
-       testop  bicne, strne
+       testop  bicne, strne
 ENDPROC(_test_and_clear_bit)
diff -r 7fd8f10cfd3e -r 63e88a26e1ef xen/arch/arm/lib/testsetbit.S
--- a/xen/arch/arm/lib/testsetbit.S     Mon Feb 13 17:03:44 2012 +0000
+++ b/xen/arch/arm/lib/testsetbit.S     Mon Feb 13 17:26:08 2012 +0000
@@ -14,5 +14,5 @@
                 .text
 
 ENTRY(_test_and_set_bit)
-       testop  orreq, streq
+       testop  orreq, streq
 ENDPROC(_test_and_set_bit)
diff -r 7fd8f10cfd3e -r 63e88a26e1ef xen/include/asm-arm/bitops.h
--- a/xen/include/asm-arm/bitops.h      Mon Feb 13 17:03:44 2012 +0000
+++ b/xen/include/asm-arm/bitops.h      Mon Feb 13 17:26:08 2012 +0000
@@ -115,19 +115,19 @@ extern int _find_next_bit_be(const unsig
 /*
  * These are the little endian, atomic definitions.
  */
-#define find_first_zero_bit(p,sz)      _find_first_zero_bit_le(p,sz)
-#define find_next_zero_bit(p,sz,off)   _find_next_zero_bit_le(p,sz,off)
-#define find_first_bit(p,sz)           _find_first_bit_le(p,sz)
-#define find_next_bit(p,sz,off)                _find_next_bit_le(p,sz,off)
+#define find_first_zero_bit(p,sz)      _find_first_zero_bit_le(p,sz)
+#define find_next_zero_bit(p,sz,off)   _find_next_zero_bit_le(p,sz,off)
+#define find_first_bit(p,sz)           _find_first_bit_le(p,sz)
+#define find_next_bit(p,sz,off)                _find_next_bit_le(p,sz,off)
 
 #else
 /*
  * These are the big endian, atomic definitions.
  */
-#define find_first_zero_bit(p,sz)      _find_first_zero_bit_be(p,sz)
-#define find_next_zero_bit(p,sz,off)   _find_next_zero_bit_be(p,sz,off)
-#define find_first_bit(p,sz)           _find_first_bit_be(p,sz)
-#define find_next_bit(p,sz,off)                _find_next_bit_be(p,sz,off)
+#define find_first_zero_bit(p,sz)      _find_first_zero_bit_be(p,sz)
+#define find_next_zero_bit(p,sz,off)   _find_next_zero_bit_be(p,sz,off)
+#define find_first_bit(p,sz)           _find_first_bit_be(p,sz)
+#define find_next_bit(p,sz,off)                _find_next_bit_be(p,sz,off)
 
 #endif
 
diff -r 7fd8f10cfd3e -r 63e88a26e1ef xen/include/asm-arm/div64.h
--- a/xen/include/asm-arm/div64.h       Mon Feb 13 17:03:44 2012 +0000
+++ b/xen/include/asm-arm/div64.h       Mon Feb 13 17:26:08 2012 +0000
@@ -10,9 +10,9 @@
  *
  * uint32_t do_div(uint64_t *n, uint32_t base)
  * {
- *     uint32_t remainder = *n % base;
- *     *n = *n / base;
- *     return remainder;
+ *     uint32_t remainder = *n % base;
+ *     *n = *n / base;
+ *     return remainder;
  * }
  *
  * In other words, a 64-bit dividend with a 32-bit divisor producing
@@ -29,22 +29,22 @@
 #define __xh "r1"
 #endif
 
-#define __do_div_asm(n, base)                                  \
-({                                                             \
-       register unsigned int __base      asm("r4") = base;     \
-       register unsigned long long __n   asm("r0") = n;        \
-       register unsigned long long __res asm("r2");            \
-       register unsigned int __rem       asm(__xh);            \
-       asm(    __asmeq("%0", __xh)                             \
-               __asmeq("%1", "r2")                             \
-               __asmeq("%2", "r0")                             \
-               __asmeq("%3", "r4")                             \
-               "bl     __do_div64"                             \
-               : "=r" (__rem), "=r" (__res)                    \
-               : "r" (__n), "r" (__base)                       \
-               : "ip", "lr", "cc");                            \
-       n = __res;                                              \
-       __rem;                                                  \
+#define __do_div_asm(n, base)                                  \
+({                                                             \
+       register unsigned int __base      asm("r4") = base;     \
+       register unsigned long long __n   asm("r0") = n;        \
+       register unsigned long long __res asm("r2");            \
+       register unsigned int __rem       asm(__xh);            \
+       asm(    __asmeq("%0", __xh)                             \
+               __asmeq("%1", "r2")                             \
+               __asmeq("%2", "r0")                             \
+               __asmeq("%3", "r4")                             \
+               "bl     __do_div64"                             \
+               : "=r" (__rem), "=r" (__res)                    \
+               : "r" (__n), "r" (__base)                       \
+               : "ip", "lr", "cc");                            \
+       n = __res;                                              \
+       __rem;                                                  \
 })
 
 #if __GNUC__ < 4
@@ -71,155 +71,155 @@
  * sufficiently recent to perform proper long long constant propagation.
  * (It is unfortunate that gcc doesn't perform all this internally.)
  */
-#define do_div(n, base)                                                        
\
-({                                                                     \
-       unsigned int __r, __b = (base);                                 \
-       if (!__builtin_constant_p(__b) || __b == 0) {                   \
-               /* non-constant divisor (or zero): slow path */         \
-               __r = __do_div_asm(n, __b);                             \
-       } else if ((__b & (__b - 1)) == 0) {                            \
-               /* Trivial: __b is constant and a power of 2 */         \
-               /* gcc does the right thing with this code.  */         \
-               __r = n;                                                \
-               __r &= (__b - 1);                                       \
-               n /= __b;                                               \
-       } else {                                                        \
-               /* Multiply by inverse of __b: n/b = n*(p/b)/p       */ \
-               /* We rely on the fact that most of this code gets   */ \
-               /* optimized away at compile time due to constant    */ \
-               /* propagation and only a couple inline assembly     */ \
-               /* instructions should remain. Better avoid any      */ \
-               /* code construct that might prevent that.           */ \
-               unsigned long long __res, __x, __t, __m, __n = n;       \
-               unsigned int __c, __p, __z = 0;                         \
-               /* preserve low part of n for reminder computation */   \
-               __r = __n;                                              \
-               /* determine number of bits to represent __b */         \
-               __p = 1 << __div64_fls(__b);                            \
-               /* compute __m = ((__p << 64) + __b - 1) / __b */       \
-               __m = (~0ULL / __b) * __p;                              \
-               __m += (((~0ULL % __b + 1) * __p) + __b - 1) / __b;     \
-               /* compute __res = __m*(~0ULL/__b*__b-1)/(__p << 64) */ \
-               __x = ~0ULL / __b * __b - 1;                            \
-               __res = (__m & 0xffffffff) * (__x & 0xffffffff);        \
-               __res >>= 32;                                           \
-               __res += (__m & 0xffffffff) * (__x >> 32);              \
-               __t = __res;                                            \
-               __res += (__x & 0xffffffff) * (__m >> 32);              \
-               __t = (__res < __t) ? (1ULL << 32) : 0;                 \
-               __res = (__res >> 32) + __t;                            \
-               __res += (__m >> 32) * (__x >> 32);                     \
-               __res /= __p;                                           \
-               /* Now sanitize and optimize what we've got. */         \
-               if (~0ULL % (__b / (__b & -__b)) == 0) {                \
-                       /* those cases can be simplified with: */       \
-                       __n /= (__b & -__b);                            \
-                       __m = ~0ULL / (__b / (__b & -__b));             \
-                       __p = 1;                                        \
-                       __c = 1;                                        \
-               } else if (__res != __x / __b) {                        \
-                       /* We can't get away without a correction    */ \
-                       /* to compensate for bit truncation errors.  */ \
-                       /* To avoid it we'd need an additional bit   */ \
-                       /* to represent __m which would overflow it. */ \
-                       /* Instead we do m=p/b and n/b=(n*m+m)/p.    */ \
-                       __c = 1;                                        \
-                       /* Compute __m = (__p << 64) / __b */           \
-                       __m = (~0ULL / __b) * __p;                      \
-                       __m += ((~0ULL % __b + 1) * __p) / __b;         \
-               } else {                                                \
-                       /* Reduce __m/__p, and try to clear bit 31   */ \
-                       /* of __m when possible otherwise that'll    */ \
-                       /* need extra overflow handling later.       */ \
-                       unsigned int __bits = -(__m & -__m);            \
-                       __bits |= __m >> 32;                            \
-                       __bits = (~__bits) << 1;                        \
-                       /* If __bits == 0 then setting bit 31 is     */ \
-                       /* unavoidable.  Simply apply the maximum    */ \
-                       /* possible reduction in that case.          */ \
-                       /* Otherwise the MSB of __bits indicates the */ \
-                       /* best reduction we should apply.           */ \
-                       if (!__bits) {                                  \
-                               __p /= (__m & -__m);                    \
-                               __m /= (__m & -__m);                    \
-                       } else {                                        \
-                               __p >>= __div64_fls(__bits);            \
-                               __m >>= __div64_fls(__bits);            \
-                       }                                               \
-                       /* No correction needed. */                     \
-                       __c = 0;                                        \
-               }                                                       \
-               /* Now we have a combination of 2 conditions:        */ \
-               /* 1) whether or not we need a correction (__c), and */ \
-               /* 2) whether or not there might be an overflow in   */ \
-               /*    the cross product (__m & ((1<<63) | (1<<31)))  */ \
-               /* Select the best insn combination to perform the   */ \
-               /* actual __m * __n / (__p << 64) operation.         */ \
-               if (!__c) {                                             \
-                       asm (   "umull  %Q0, %R0, %1, %Q2\n\t"          \
-                               "mov    %Q0, #0"                        \
-                               : "=&r" (__res)                         \
-                               : "r" (__m), "r" (__n)                  \
-                               : "cc" );                               \
-               } else if (!(__m & ((1ULL << 63) | (1ULL << 31)))) {    \
-                       __res = __m;                                    \
-                       asm (   "umlal  %Q0, %R0, %Q1, %Q2\n\t"         \
-                               "mov    %Q0, #0"                        \
-                               : "+&r" (__res)                         \
-                               : "r" (__m), "r" (__n)                  \
-                               : "cc" );                               \
-               } else {                                                \
-                       asm (   "umull  %Q0, %R0, %Q1, %Q2\n\t"         \
-                               "cmn    %Q0, %Q1\n\t"                   \
-                               "adcs   %R0, %R0, %R1\n\t"              \
-                               "adc    %Q0, %3, #0"                    \
-                               : "=&r" (__res)                         \
-                               : "r" (__m), "r" (__n), "r" (__z)       \
-                               : "cc" );                               \
-               }                                                       \
-               if (!(__m & ((1ULL << 63) | (1ULL << 31)))) {           \
-                       asm (   "umlal  %R0, %Q0, %R1, %Q2\n\t"         \
-                               "umlal  %R0, %Q0, %Q1, %R2\n\t"         \
-                               "mov    %R0, #0\n\t"                    \
-                               "umlal  %Q0, %R0, %R1, %R2"             \
-                               : "+&r" (__res)                         \
-                               : "r" (__m), "r" (__n)                  \
-                               : "cc" );                               \
-               } else {                                                \
-                       asm (   "umlal  %R0, %Q0, %R2, %Q3\n\t"         \
-                               "umlal  %R0, %1, %Q2, %R3\n\t"          \
-                               "mov    %R0, #0\n\t"                    \
-                               "adds   %Q0, %1, %Q0\n\t"               \
-                               "adc    %R0, %R0, #0\n\t"               \
-                               "umlal  %Q0, %R0, %R2, %R3"             \
-                               : "+&r" (__res), "+&r" (__z)            \
-                               : "r" (__m), "r" (__n)                  \
-                               : "cc" );                               \
-               }                                                       \
-               __res /= __p;                                           \
-               /* The reminder can be computed with 32-bit regs     */ \
-               /* only, and gcc is good at that.                    */ \
-               {                                                       \
-                       unsigned int __res0 = __res;                    \
-                       unsigned int __b0 = __b;                        \
-                       __r -= __res0 * __b0;                           \
-               }                                                       \
-               /* BUG_ON(__r >= __b || __res * __b + __r != n); */     \
-               n = __res;                                              \
-       }                                                               \
-       __r;                                                            \
+#define do_div(n, base)                                                        
\
+({                                                                     \
+       unsigned int __r, __b = (base);                                 \
+       if (!__builtin_constant_p(__b) || __b == 0) {                   \
+               /* non-constant divisor (or zero): slow path */         \
+               __r = __do_div_asm(n, __b);                             \
+       } else if ((__b & (__b - 1)) == 0) {                            \
+               /* Trivial: __b is constant and a power of 2 */         \
+               /* gcc does the right thing with this code.  */         \
+               __r = n;                                                \
+               __r &= (__b - 1);                                       \
+               n /= __b;                                               \
+       } else {                                                        \
+               /* Multiply by inverse of __b: n/b = n*(p/b)/p       */ \
+               /* We rely on the fact that most of this code gets   */ \
+               /* optimized away at compile time due to constant    */ \
+               /* propagation and only a couple inline assembly     */ \
+               /* instructions should remain. Better avoid any      */ \
+               /* code construct that might prevent that.           */ \
+               unsigned long long __res, __x, __t, __m, __n = n;       \
+               unsigned int __c, __p, __z = 0;                         \
+               /* preserve low part of n for reminder computation */   \
+               __r = __n;                                              \
+               /* determine number of bits to represent __b */         \
+               __p = 1 << __div64_fls(__b);                            \
+               /* compute __m = ((__p << 64) + __b - 1) / __b */       \
+               __m = (~0ULL / __b) * __p;                              \
+               __m += (((~0ULL % __b + 1) * __p) + __b - 1) / __b;     \
+               /* compute __res = __m*(~0ULL/__b*__b-1)/(__p << 64) */ \
+               __x = ~0ULL / __b * __b - 1;                            \
+               __res = (__m & 0xffffffff) * (__x & 0xffffffff);        \
+               __res >>= 32;                                           \
+               __res += (__m & 0xffffffff) * (__x >> 32);              \
+               __t = __res;                                            \
+               __res += (__x & 0xffffffff) * (__m >> 32);              \
+               __t = (__res < __t) ? (1ULL << 32) : 0;                 \
+               __res = (__res >> 32) + __t;                            \
+               __res += (__m >> 32) * (__x >> 32);                     \
+               __res /= __p;                                           \
+               /* Now sanitize and optimize what we've got. */         \
+               if (~0ULL % (__b / (__b & -__b)) == 0) {                \
+                       /* those cases can be simplified with: */       \
+                       __n /= (__b & -__b);                            \
+                       __m = ~0ULL / (__b / (__b & -__b));             \
+                       __p = 1;                                        \
+                       __c = 1;                                        \
+               } else if (__res != __x / __b) {                        \
+                       /* We can't get away without a correction    */ \
+                       /* to compensate for bit truncation errors.  */ \
+                       /* To avoid it we'd need an additional bit   */ \
+                       /* to represent __m which would overflow it. */ \
+                       /* Instead we do m=p/b and n/b=(n*m+m)/p.    */ \
+                       __c = 1;                                        \
+                       /* Compute __m = (__p << 64) / __b */           \
+                       __m = (~0ULL / __b) * __p;                      \
+                       __m += ((~0ULL % __b + 1) * __p) / __b;         \
+               } else {                                                \
+                       /* Reduce __m/__p, and try to clear bit 31   */ \
+                       /* of __m when possible otherwise that'll    */ \
+                       /* need extra overflow handling later.       */ \
+                       unsigned int __bits = -(__m & -__m);            \
+                       __bits |= __m >> 32;                            \
+                       __bits = (~__bits) << 1;                        \
+                       /* If __bits == 0 then setting bit 31 is     */ \
+                       /* unavoidable.  Simply apply the maximum    */ \
+                       /* possible reduction in that case.          */ \
+                       /* Otherwise the MSB of __bits indicates the */ \
+                       /* best reduction we should apply.           */ \
+                       if (!__bits) {                                  \
+                               __p /= (__m & -__m);                    \
+                               __m /= (__m & -__m);                    \
+                       } else {                                        \
+                               __p >>= __div64_fls(__bits);            \
+                               __m >>= __div64_fls(__bits);            \
+                       }                                               \
+                       /* No correction needed. */                     \
+                       __c = 0;                                        \
+               }                                                       \
+               /* Now we have a combination of 2 conditions:        */ \
+               /* 1) whether or not we need a correction (__c), and */ \
+               /* 2) whether or not there might be an overflow in   */ \
+               /*    the cross product (__m & ((1<<63) | (1<<31)))  */ \
+               /* Select the best insn combination to perform the   */ \
+               /* actual __m * __n / (__p << 64) operation.         */ \
+               if (!__c) {                                             \
+                       asm (   "umull  %Q0, %R0, %1, %Q2\n\t"          \
+                               "mov    %Q0, #0"                        \
+                               : "=&r" (__res)                         \
+                               : "r" (__m), "r" (__n)                  \
+                               : "cc" );                               \
+               } else if (!(__m & ((1ULL << 63) | (1ULL << 31)))) {    \
+                       __res = __m;                                    \
+                       asm (   "umlal  %Q0, %R0, %Q1, %Q2\n\t"         \
+                               "mov    %Q0, #0"                        \
+                               : "+&r" (__res)                         \
+                               : "r" (__m), "r" (__n)                  \
+                               : "cc" );                               \
+               } else {                                                \
+                       asm (   "umull  %Q0, %R0, %Q1, %Q2\n\t"         \
+                               "cmn    %Q0, %Q1\n\t"                   \
+                               "adcs   %R0, %R0, %R1\n\t"              \
+                               "adc    %Q0, %3, #0"                    \
+                               : "=&r" (__res)                         \
+                               : "r" (__m), "r" (__n), "r" (__z)       \
+                               : "cc" );                               \
+               }                                                       \
+               if (!(__m & ((1ULL << 63) | (1ULL << 31)))) {           \
+                       asm (   "umlal  %R0, %Q0, %R1, %Q2\n\t"         \
+                               "umlal  %R0, %Q0, %Q1, %R2\n\t"         \
+                               "mov    %R0, #0\n\t"                    \
+                               "umlal  %Q0, %R0, %R1, %R2"             \
+                               : "+&r" (__res)                         \
+                               : "r" (__m), "r" (__n)                  \
+                               : "cc" );                               \
+               } else {                                                \
+                       asm (   "umlal  %R0, %Q0, %R2, %Q3\n\t"         \
+                               "umlal  %R0, %1, %Q2, %R3\n\t"          \
+                               "mov    %R0, #0\n\t"                    \
+                               "adds   %Q0, %1, %Q0\n\t"               \
+                               "adc    %R0, %R0, #0\n\t"               \
+                               "umlal  %Q0, %R0, %R2, %R3"             \
+                               : "+&r" (__res), "+&r" (__z)            \
+                               : "r" (__m), "r" (__n)                  \
+                               : "cc" );                               \
+               }                                                       \
+               __res /= __p;                                           \
+               /* The reminder can be computed with 32-bit regs     */ \
+               /* only, and gcc is good at that.                    */ \
+               {                                                       \
+                       unsigned int __res0 = __res;                    \
+                       unsigned int __b0 = __b;                        \
+                       __r -= __res0 * __b0;                           \
+               }                                                       \
+               /* BUG_ON(__r >= __b || __res * __b + __r != n); */     \
+               n = __res;                                              \
+       }                                                               \
+       __r;                                                            \
 })
 
 /* our own fls implementation to make sure constant propagation is fine */
-#define __div64_fls(bits)                                              \
-({                                                                     \
-       unsigned int __left = (bits), __nr = 0;                         \
-       if (__left & 0xffff0000) __nr += 16, __left >>= 16;             \
-       if (__left & 0x0000ff00) __nr +=  8, __left >>=  8;             \
-       if (__left & 0x000000f0) __nr +=  4, __left >>=  4;             \
-       if (__left & 0x0000000c) __nr +=  2, __left >>=  2;             \
-       if (__left & 0x00000002) __nr +=  1;                            \
-       __nr;                                                           \
+#define __div64_fls(bits)                                              \
+({                                                                     \
+       unsigned int __left = (bits), __nr = 0;                         \
+       if (__left & 0xffff0000) __nr += 16, __left >>= 16;             \
+       if (__left & 0x0000ff00) __nr +=  8, __left >>=  8;             \
+       if (__left & 0x000000f0) __nr +=  4, __left >>=  4;             \
+       if (__left & 0x0000000c) __nr +=  2, __left >>=  2;             \
+       if (__left & 0x00000002) __nr +=  1;                            \
+       __nr;                                                           \
 })
 
 #endif
diff -r 7fd8f10cfd3e -r 63e88a26e1ef xen/include/asm-arm/numa.h
--- a/xen/include/asm-arm/numa.h        Mon Feb 13 17:03:44 2012 +0000
+++ b/xen/include/asm-arm/numa.h        Mon Feb 13 17:26:08 2012 +0000
@@ -3,7 +3,7 @@
 
 /* Fake one node for now... */
 #define cpu_to_node(cpu) 0
-#define node_to_cpumask(node)  (cpu_online_map)
+#define node_to_cpumask(node)  (cpu_online_map)
 
 static inline __attribute__((pure)) int phys_to_nid(paddr_t addr)
 {

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
Follow-Ups:
- Re: [Xen-devel] [PATCH] arm: fixup hard tabs
  - From: David Vrabel
Prev by Date: Re: [Xen-devel] [PATCH v4 RESEND] build: add autoconf to replace custom checks in tools/check
Next by Date: Re: [Xen-devel] [PATCH] tools: fix qemu build
Previous by thread: Re: [Xen-devel] [PATCH] xl: remove duplicate line
Next by thread: Re: [Xen-devel] [PATCH] arm: fixup hard tabs
Index(es):
- Date
- Thread
Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.