[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] ARM: convert all "mov.* pc, reg" to "bx reg" for ARMv6+



ARMv6 and greater introduced a new instruction ("bx") which can be used
to return from function calls.  Recent CPUs perform better when the
"bx lr" instruction is used rather than the "mov pc, lr" instruction,
and this sequence is strongly recommended to be used by the ARM
architecture manual (section A.4.1.1).

We provide a new macro "ret" with all its variants for the condition
code which will resolve to the appropriate instruction.

Rather than doing this piecemeal, and miss some instances, change all
the "mov pc" instances to use the new macro, with the exception of
the "movs" instruction and the kprobes code.  This allows us to detect
the "mov pc, lr" case and fix it up - and also gives us the possibility
of deploying this for other registers depending on the CPU selection.

Signed-off-by: Russell King <rmk+kernel@xxxxxxxxxxxxxxxx>
---
(Resent without the long Cc list)

 arch/arm/crypto/aes-armv4.S              |  3 ++-
 arch/arm/include/asm/assembler.h         | 21 +++++++++++++++
 arch/arm/include/asm/entry-macro-multi.S |  2 +-
 arch/arm/kernel/debug.S                  | 10 ++++----
 arch/arm/kernel/entry-armv.S             | 42 +++++++++++++++---------------
 arch/arm/kernel/entry-common.S           | 13 +++++-----
 arch/arm/kernel/entry-header.S           | 14 ----------
 arch/arm/kernel/fiqasm.S                 |  4 +--
 arch/arm/kernel/head-common.S            |  7 ++---
 arch/arm/kernel/head-nommu.S             |  8 +++---
 arch/arm/kernel/head.S                   | 18 ++++++-------
 arch/arm/kernel/hyp-stub.S               |  6 ++---
 arch/arm/kernel/iwmmxt.S                 | 10 ++++----
 arch/arm/kernel/relocate_kernel.S        |  3 ++-
 arch/arm/kernel/sleep.S                  |  2 +-
 arch/arm/kvm/init.S                      |  3 ++-
 arch/arm/lib/ashldi3.S                   |  3 ++-
 arch/arm/lib/ashrdi3.S                   |  3 ++-
 arch/arm/lib/backtrace.S                 |  2 +-
 arch/arm/lib/bitops.h                    |  5 ++--
 arch/arm/lib/bswapsdi2.S                 |  5 ++--
 arch/arm/lib/call_with_stack.S           |  4 +--
 arch/arm/lib/csumpartial.S               |  2 +-
 arch/arm/lib/csumpartialcopygeneric.S    |  5 ++--
 arch/arm/lib/delay-loop.S                | 18 ++++++-------
 arch/arm/lib/div64.S                     | 13 +++++-----
 arch/arm/lib/findbit.S                   | 10 ++++----
 arch/arm/lib/getuser.S                   |  8 +++---
 arch/arm/lib/io-readsb.S                 |  2 +-
 arch/arm/lib/io-readsl.S                 |  6 ++---
 arch/arm/lib/io-readsw-armv3.S           |  4 +--
 arch/arm/lib/io-readsw-armv4.S           |  2 +-
 arch/arm/lib/io-writesb.S                |  2 +-
 arch/arm/lib/io-writesl.S                | 10 ++++----
 arch/arm/lib/io-writesw-armv3.S          |  4 +--
 arch/arm/lib/io-writesw-armv4.S          |  4 +--
 arch/arm/lib/lib1funcs.S                 | 26 +++++++++----------
 arch/arm/lib/lshrdi3.S                   |  3 ++-
 arch/arm/lib/memchr.S                    |  2 +-
 arch/arm/lib/memset.S                    |  2 +-
 arch/arm/lib/memzero.S                   |  2 +-
 arch/arm/lib/muldi3.S                    |  3 ++-
 arch/arm/lib/putuser.S                   | 10 ++++----
 arch/arm/lib/strchr.S                    |  2 +-
 arch/arm/lib/strrchr.S                   |  2 +-
 arch/arm/lib/ucmpdi2.S                   |  5 ++--
 arch/arm/mach-davinci/sleep.S            |  2 +-
 arch/arm/mach-ep93xx/crunch-bits.S       |  6 ++---
 arch/arm/mach-imx/suspend-imx6.S         |  5 ++--
 arch/arm/mach-mvebu/coherency_ll.S       | 10 ++++----
 arch/arm/mach-mvebu/headsmp-a9.S         |  3 ++-
 arch/arm/mach-omap2/sleep44xx.S          |  3 ++-
 arch/arm/mach-omap2/sram242x.S           |  6 ++---
 arch/arm/mach-omap2/sram243x.S           |  6 ++---
 arch/arm/mach-pxa/mioa701_bootresume.S   |  2 +-
 arch/arm/mach-pxa/standby.S              |  4 +--
 arch/arm/mach-s3c24xx/sleep-s3c2410.S    |  2 +-
 arch/arm/mach-s3c24xx/sleep-s3c2412.S    |  2 +-
 arch/arm/mach-shmobile/headsmp.S         |  3 ++-
 arch/arm/mach-tegra/sleep-tegra20.S      | 24 ++++++++---------
 arch/arm/mach-tegra/sleep-tegra30.S      | 14 +++++-----
 arch/arm/mach-tegra/sleep.S              |  8 +++---
 arch/arm/mm/cache-fa.S                   | 19 +++++++-------
 arch/arm/mm/cache-nop.S                  |  5 ++--
 arch/arm/mm/cache-v4.S                   | 13 +++++-----
 arch/arm/mm/cache-v4wb.S                 | 15 ++++++-----
 arch/arm/mm/cache-v4wt.S                 | 13 +++++-----
 arch/arm/mm/cache-v6.S                   | 20 +++++++--------
 arch/arm/mm/cache-v7.S                   | 30 +++++++++++-----------
 arch/arm/mm/l2c-l2x0-resume.S            |  7 ++---
 arch/arm/mm/proc-arm1020.S               | 34 ++++++++++++------------
 arch/arm/mm/proc-arm1020e.S              | 34 ++++++++++++------------
 arch/arm/mm/proc-arm1022.S               | 34 ++++++++++++------------
 arch/arm/mm/proc-arm1026.S               | 34 ++++++++++++------------
 arch/arm/mm/proc-arm720.S                | 16 ++++++------
 arch/arm/mm/proc-arm740.S                |  8 +++---
 arch/arm/mm/proc-arm7tdmi.S              |  8 +++---
 arch/arm/mm/proc-arm920.S                | 34 ++++++++++++------------
 arch/arm/mm/proc-arm922.S                | 34 ++++++++++++------------
 arch/arm/mm/proc-arm925.S                | 34 ++++++++++++------------
 arch/arm/mm/proc-arm926.S                | 34 ++++++++++++------------
 arch/arm/mm/proc-arm940.S                | 24 ++++++++---------
 arch/arm/mm/proc-arm946.S                | 30 +++++++++++-----------
 arch/arm/mm/proc-arm9tdmi.S              |  8 +++---
 arch/arm/mm/proc-fa526.S                 | 16 ++++++------
 arch/arm/mm/proc-feroceon.S              | 44 ++++++++++++++++----------------
 arch/arm/mm/proc-mohawk.S                | 34 ++++++++++++------------
 arch/arm/mm/proc-sa110.S                 | 16 ++++++------
 arch/arm/mm/proc-sa1100.S                | 16 ++++++------
 arch/arm/mm/proc-v6.S                    | 16 ++++++------
 arch/arm/mm/proc-v7-2level.S             |  4 +--
 arch/arm/mm/proc-v7-3level.S             |  5 ++--
 arch/arm/mm/proc-v7.S                    | 14 +++++-----
 arch/arm/mm/proc-v7m.S                   | 18 ++++++-------
 arch/arm/mm/proc-xsc3.S                  | 32 +++++++++++------------
 arch/arm/mm/proc-xscale.S                | 34 ++++++++++++------------
 arch/arm/mm/tlb-fa.S                     |  7 ++---
 arch/arm/mm/tlb-v4.S                     |  5 ++--
 arch/arm/mm/tlb-v4wb.S                   |  7 ++---
 arch/arm/mm/tlb-v4wbi.S                  |  7 ++---
 arch/arm/mm/tlb-v6.S                     |  5 ++--
 arch/arm/mm/tlb-v7.S                     |  4 +--
 arch/arm/nwfpe/entry.S                   |  8 +++---
 arch/arm/vfp/entry.S                     |  4 +--
 arch/arm/vfp/vfphw.S                     | 26 +++++++++----------
 arch/arm/xen/hypercall.S                 |  6 ++---
 106 files changed, 644 insertions(+), 607 deletions(-)

diff --git a/arch/arm/crypto/aes-armv4.S b/arch/arm/crypto/aes-armv4.S
index 3a14ea8fe97e..ebb9761fb572 100644
--- a/arch/arm/crypto/aes-armv4.S
+++ b/arch/arm/crypto/aes-armv4.S
@@ -35,6 +35,7 @@
 @ that is being targetted.
 
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 
 .text
 
@@ -648,7 +649,7 @@ _armv4_AES_set_encrypt_key:
 
 .Ldone:        mov     r0,#0
        ldmia   sp!,{r4-r12,lr}
-.Labrt:        mov     pc,lr
+.Labrt:        ret     lr
 ENDPROC(private_AES_set_encrypt_key)
 
 .align 5
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 906703a5b564..f67fd3afebdf 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -427,4 +427,25 @@ THUMB(     orr     \reg , \reg , #PSR_T_BIT        )
 #endif
        .endm
 
+       .irp    c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo
+       .macro  ret\c, reg
+#if __LINUX_ARM_ARCH__ < 6
+       mov\c   pc, \reg
+#else
+       .ifeqs  "\reg", "lr"
+       bx\c    \reg
+       .else
+       mov\c   pc, \reg
+       .endif
+#endif
+       .endm
+       .endr
+
+       .macro  ret.w, reg
+       ret     \reg
+#ifdef CONFIG_THUMB2_KERNEL
+       nop
+#endif
+       .endm
+
 #endif /* __ASM_ASSEMBLER_H__ */
diff --git a/arch/arm/include/asm/entry-macro-multi.S 
b/arch/arm/include/asm/entry-macro-multi.S
index 88d61815f0c0..469a2b30fa27 100644
--- a/arch/arm/include/asm/entry-macro-multi.S
+++ b/arch/arm/include/asm/entry-macro-multi.S
@@ -35,5 +35,5 @@
 \symbol_name:
        mov     r8, lr
        arch_irq_handler_default
-       mov     pc, r8
+       ret     r8
        .endm
diff --git a/arch/arm/kernel/debug.S b/arch/arm/kernel/debug.S
index 14f7c3b14632..78c91b5f97d4 100644
--- a/arch/arm/kernel/debug.S
+++ b/arch/arm/kernel/debug.S
@@ -90,7 +90,7 @@ ENTRY(printascii)
                ldrneb  r1, [r0], #1
                teqne   r1, #0
                bne     1b
-               mov     pc, lr
+               ret     lr
 ENDPROC(printascii)
 
 ENTRY(printch)
@@ -105,7 +105,7 @@ ENTRY(debug_ll_addr)
                addruart r2, r3, ip
                str     r2, [r0]
                str     r3, [r1]
-               mov     pc, lr
+               ret     lr
 ENDPROC(debug_ll_addr)
 #endif
 
@@ -116,7 +116,7 @@ ENTRY(printascii)
                mov     r0, #0x04               @ SYS_WRITE0
        ARM(    svc     #0x123456       )
        THUMB(  svc     #0xab           )
-               mov     pc, lr
+               ret     lr
 ENDPROC(printascii)
 
 ENTRY(printch)
@@ -125,14 +125,14 @@ ENTRY(printch)
                mov     r0, #0x03               @ SYS_WRITEC
        ARM(    svc     #0x123456       )
        THUMB(  svc     #0xab           )
-               mov     pc, lr
+               ret     lr
 ENDPROC(printch)
 
 ENTRY(debug_ll_addr)
                mov     r2, #0
                str     r2, [r0]
                str     r2, [r1]
-               mov     pc, lr
+               ret     lr
 ENDPROC(debug_ll_addr)
 
 #endif
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 52a949a8077d..36276cdccfbc 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -224,7 +224,7 @@ svc_preempt:
 1:     bl      preempt_schedule_irq            @ irq en/disable is done inside
        ldr     r0, [tsk, #TI_FLAGS]            @ get new tasks TI_FLAGS
        tst     r0, #_TIF_NEED_RESCHED
-       moveq   pc, r8                          @ go again
+       reteq   r8                              @ go again
        b       1b
 #endif
 
@@ -490,7 +490,7 @@ ENDPROC(__und_usr)
        .pushsection .fixup, "ax"
        .align  2
 4:     str     r4, [sp, #S_PC]                 @ retry current instruction
-       mov     pc, r9
+       ret     r9
        .popsection
        .pushsection __ex_table,"a"
        .long   1b, 4b
@@ -552,7 +552,7 @@ call_fpe:
 #endif
        tst     r0, #0x08000000                 @ only CDP/CPRT/LDC/STC have 
bit 27
        tstne   r0, #0x04000000                 @ bit 26 set on both ARM and 
Thumb-2
-       moveq   pc, lr
+       reteq   lr
        and     r8, r0, #0x00000f00             @ mask out CP number
  THUMB(        lsr     r8, r8, #8              )
        mov     r7, #1
@@ -571,33 +571,33 @@ call_fpe:
  THUMB(        add     pc, r8                  )
        nop
 
-       movw_pc lr                              @ CP#0
+       ret.w   lr                              @ CP#0
        W(b)    do_fpe                          @ CP#1 (FPE)
        W(b)    do_fpe                          @ CP#2 (FPE)
-       movw_pc lr                              @ CP#3
+       ret.w   lr                              @ CP#3
 #ifdef CONFIG_CRUNCH
        b       crunch_task_enable              @ CP#4 (MaverickCrunch)
        b       crunch_task_enable              @ CP#5 (MaverickCrunch)
        b       crunch_task_enable              @ CP#6 (MaverickCrunch)
 #else
-       movw_pc lr                              @ CP#4
-       movw_pc lr                              @ CP#5
-       movw_pc lr                              @ CP#6
+       ret.w   lr                              @ CP#4
+       ret.w   lr                              @ CP#5
+       ret.w   lr                              @ CP#6
 #endif
-       movw_pc lr                              @ CP#7
-       movw_pc lr                              @ CP#8
-       movw_pc lr                              @ CP#9
+       ret.w   lr                              @ CP#7
+       ret.w   lr                              @ CP#8
+       ret.w   lr                              @ CP#9
 #ifdef CONFIG_VFP
        W(b)    do_vfp                          @ CP#10 (VFP)
        W(b)    do_vfp                          @ CP#11 (VFP)
 #else
-       movw_pc lr                              @ CP#10 (VFP)
-       movw_pc lr                              @ CP#11 (VFP)
+       ret.w   lr                              @ CP#10 (VFP)
+       ret.w   lr                              @ CP#11 (VFP)
 #endif
-       movw_pc lr                              @ CP#12
-       movw_pc lr                              @ CP#13
-       movw_pc lr                              @ CP#14 (Debug)
-       movw_pc lr                              @ CP#15 (Control)
+       ret.w   lr                              @ CP#12
+       ret.w   lr                              @ CP#13
+       ret.w   lr                              @ CP#14 (Debug)
+       ret.w   lr                              @ CP#15 (Control)
 
 #ifdef NEED_CPU_ARCHITECTURE
        .align  2
@@ -649,7 +649,7 @@ ENTRY(fp_enter)
        .popsection
 
 ENTRY(no_fp)
-       mov     pc, lr
+       ret     lr
 ENDPROC(no_fp)
 
 __und_usr_fault_32:
@@ -745,7 +745,7 @@ ENDPROC(__switch_to)
 #ifdef CONFIG_ARM_THUMB
        bx      \reg
 #else
-       mov     pc, \reg
+       ret     \reg
 #endif
        .endm
 
@@ -837,7 +837,7 @@ kuser_cmpxchg64_fixup:
 #if __LINUX_ARM_ARCH__ < 6
        bcc     kuser_cmpxchg32_fixup
 #endif
-       mov     pc, lr
+       ret     lr
        .previous
 
 #else
@@ -905,7 +905,7 @@ kuser_cmpxchg32_fixup:
        subs    r8, r4, r7
        rsbcss  r8, r8, #(2b - 1b)
        strcs   r7, [sp, #S_PC]
-       mov     pc, lr
+       ret     lr
        .previous
 
 #else
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 7139d4a7dea7..e52fe5a2d843 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -8,6 +8,7 @@
  * published by the Free Software Foundation.
  */
 
+#include <asm/assembler.h>
 #include <asm/unistd.h>
 #include <asm/ftrace.h>
 #include <asm/unwind.h>
@@ -88,7 +89,7 @@ ENTRY(ret_from_fork)
        cmp     r5, #0
        movne   r0, r4
        adrne   lr, BSYM(1f)
-       movne   pc, r5
+       retne   r5
 1:     get_thread_info tsk
        b       ret_slow_syscall
 ENDPROC(ret_from_fork)
@@ -290,7 +291,7 @@ ENDPROC(ftrace_graph_caller_old)
 
 .macro mcount_exit
        ldmia   sp!, {r0-r3, ip, lr}
-       mov     pc, ip
+       ret     ip
 .endm
 
 ENTRY(__gnu_mcount_nc)
@@ -298,7 +299,7 @@ UNWIND(.fnstart)
 #ifdef CONFIG_DYNAMIC_FTRACE
        mov     ip, lr
        ldmia   sp!, {lr}
-       mov     pc, ip
+       ret     ip
 #else
        __mcount
 #endif
@@ -333,12 +334,12 @@ return_to_handler:
        bl      ftrace_return_to_handler
        mov     lr, r0                  @ r0 has real ret addr
        ldmia   sp!, {r0-r3}
-       mov     pc, lr
+       ret     lr
 #endif
 
 ENTRY(ftrace_stub)
 .Lftrace_stub:
-       mov     pc, lr
+       ret     lr
 ENDPROC(ftrace_stub)
 
 #endif /* CONFIG_FUNCTION_TRACER */
@@ -561,7 +562,7 @@ sys_mmap2:
                streq   r5, [sp, #4]
                beq     sys_mmap_pgoff
                mov     r0, #-EINVAL
-               mov     pc, lr
+               ret     lr
 #else
                str     r5, [sp, #4]
                b       sys_mmap_pgoff
diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S
index 5d702f8900b1..8db307d0954b 100644
--- a/arch/arm/kernel/entry-header.S
+++ b/arch/arm/kernel/entry-header.S
@@ -240,12 +240,6 @@
        movs    pc, lr                          @ return & move spsr_svc into 
cpsr
        .endm
 
-       @
-       @ 32-bit wide "mov pc, reg"
-       @
-       .macro  movw_pc, reg
-       mov     pc, \reg
-       .endm
 #else  /* CONFIG_THUMB2_KERNEL */
        .macro  svc_exit, rpsr, irq = 0
        .if     \irq != 0
@@ -304,14 +298,6 @@
        movs    pc, lr                          @ return & move spsr_svc into 
cpsr
        .endm
 #endif /* ifdef CONFIG_CPU_V7M / else */
-
-       @
-       @ 32-bit wide "mov pc, reg"
-       @
-       .macro  movw_pc, reg
-       mov     pc, \reg
-       nop
-       .endm
 #endif /* !CONFIG_THUMB2_KERNEL */
 
 /*
diff --git a/arch/arm/kernel/fiqasm.S b/arch/arm/kernel/fiqasm.S
index 207f9d652010..8dd26e1a9bd6 100644
--- a/arch/arm/kernel/fiqasm.S
+++ b/arch/arm/kernel/fiqasm.S
@@ -32,7 +32,7 @@ ENTRY(__set_fiq_regs)
        ldr     lr, [r0]
        msr     cpsr_c, r1      @ return to SVC mode
        mov     r0, r0          @ avoid hazard prior to ARMv4
-       mov     pc, lr
+       ret     lr
 ENDPROC(__set_fiq_regs)
 
 ENTRY(__get_fiq_regs)
@@ -45,5 +45,5 @@ ENTRY(__get_fiq_regs)
        str     lr, [r0]
        msr     cpsr_c, r1      @ return to SVC mode
        mov     r0, r0          @ avoid hazard prior to ARMv4
-       mov     pc, lr
+       ret     lr
 ENDPROC(__get_fiq_regs)
diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S
index 572a38335c96..8733012d231f 100644
--- a/arch/arm/kernel/head-common.S
+++ b/arch/arm/kernel/head-common.S
@@ -10,6 +10,7 @@
  * published by the Free Software Foundation.
  *
  */
+#include <asm/assembler.h>
 
 #define ATAG_CORE 0x54410001
 #define ATAG_CORE_SIZE ((2*4 + 3*4) >> 2)
@@ -61,10 +62,10 @@ __vet_atags:
        cmp     r5, r6
        bne     1f
 
-2:     mov     pc, lr                          @ atag/dtb pointer is ok
+2:     ret     lr                              @ atag/dtb pointer is ok
 
 1:     mov     r2, #0
-       mov     pc, lr
+       ret     lr
 ENDPROC(__vet_atags)
 
 /*
@@ -162,7 +163,7 @@ __lookup_processor_type:
        cmp     r5, r6
        blo     1b
        mov     r5, #0                          @ unknown processor
-2:     mov     pc, lr
+2:     ret     lr
 ENDPROC(__lookup_processor_type)
 
 /*
diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
index 716249cc2ee1..cc176b67c134 100644
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -82,7 +82,7 @@ ENTRY(stext)
        adr     lr, BSYM(1f)                    @ return (PIC) address
  ARM(  add     pc, r10, #PROCINFO_INITFUNC     )
  THUMB(        add     r12, r10, #PROCINFO_INITFUNC    )
- THUMB(        mov     pc, r12                         )
+ THUMB(        ret     r12                             )
  1:    b       __after_proc_init
 ENDPROC(stext)
 
@@ -119,7 +119,7 @@ ENTRY(secondary_startup)
        mov     r13, r12                        @ __secondary_switched address
  ARM(  add     pc, r10, #PROCINFO_INITFUNC     )
  THUMB(        add     r12, r10, #PROCINFO_INITFUNC    )
- THUMB(        mov     pc, r12                         )
+ THUMB(        ret     r12                             )
 ENDPROC(secondary_startup)
 
 ENTRY(__secondary_switched)
@@ -164,7 +164,7 @@ __after_proc_init:
 #endif
        mcr     p15, 0, r0, c1, c0, 0           @ write control reg
 #endif /* CONFIG_CPU_CP15 */
-       mov     pc, r13
+       ret     r13
 ENDPROC(__after_proc_init)
        .ltorg
 
@@ -254,7 +254,7 @@ ENTRY(__setup_mpu)
        orr     r0, r0, #CR_M                   @ Set SCTRL.M (MPU on)
        mcr     p15, 0, r0, c1, c0, 0           @ Enable MPU
        isb
-       mov pc,lr
+       ret     lr
 ENDPROC(__setup_mpu)
 #endif
 #include "head-common.S"
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 2c35f0ff2fdc..664eee8c4a26 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -140,7 +140,7 @@ ENTRY(stext)
        mov     r8, r4                          @ set TTBR1 to swapper_pg_dir
  ARM(  add     pc, r10, #PROCINFO_INITFUNC     )
  THUMB(        add     r12, r10, #PROCINFO_INITFUNC    )
- THUMB(        mov     pc, r12                         )
+ THUMB(        ret     r12                             )
 1:     b       __enable_mmu
 ENDPROC(stext)
        .ltorg
@@ -335,7 +335,7 @@ __create_page_tables:
        sub     r4, r4, #0x1000         @ point to the PGD table
        mov     r4, r4, lsr #ARCH_PGD_SHIFT
 #endif
-       mov     pc, lr
+       ret     lr
 ENDPROC(__create_page_tables)
        .ltorg
        .align
@@ -383,7 +383,7 @@ ENTRY(secondary_startup)
  ARM(  add     pc, r10, #PROCINFO_INITFUNC     ) @ initialise processor
                                                  @ (return control reg)
  THUMB(        add     r12, r10, #PROCINFO_INITFUNC    )
- THUMB(        mov     pc, r12                         )
+ THUMB(        ret     r12                             )
 ENDPROC(secondary_startup)
 
        /*
@@ -468,7 +468,7 @@ ENTRY(__turn_mmu_on)
        instr_sync
        mov     r3, r3
        mov     r3, r13
-       mov     pc, r3
+       ret     r3
 __turn_mmu_on_end:
 ENDPROC(__turn_mmu_on)
        .popsection
@@ -487,7 +487,7 @@ __fixup_smp:
        orr     r4, r4, #0x0000b000
        orr     r4, r4, #0x00000020     @ val 0x4100b020
        teq     r3, r4                  @ ARM 11MPCore?
-       moveq   pc, lr                  @ yes, assume SMP
+       reteq   lr                      @ yes, assume SMP
 
        mrc     p15, 0, r0, c0, c0, 5   @ read MPIDR
        and     r0, r0, #0xc0000000     @ multiprocessing extensions and
@@ -500,7 +500,7 @@ __fixup_smp:
        orr     r4, r4, #0x0000c000
        orr     r4, r4, #0x00000090
        teq     r3, r4                  @ Check for ARM Cortex-A9
-       movne   pc, lr                  @ Not ARM Cortex-A9,
+       retne   lr                      @ Not ARM Cortex-A9,
 
        @ If a future SoC *does* use 0x0 as the PERIPH_BASE, then the
        @ below address check will need to be #ifdef'd or equivalent
@@ -512,7 +512,7 @@ __fixup_smp:
 ARM_BE8(rev    r0, r0)                 @ byteswap if big endian
        and     r0, r0, #0x3            @ number of CPUs
        teq     r0, #0x0                @ is 1?
-       movne   pc, lr
+       retne   lr
 
 __fixup_smp_on_up:
        adr     r0, 1f
@@ -539,7 +539,7 @@ smp_on_up:
        .text
 __do_fixup_smp_on_up:
        cmp     r4, r5
-       movhs   pc, lr
+       reths   lr
        ldmia   r4!, {r0, r6}
  ARM(  str     r6, [r0, r3]    )
  THUMB(        add     r0, r0, r3      )
@@ -672,7 +672,7 @@ ARM_BE8(rev16       ip, ip)
 2:     cmp     r4, r5
        ldrcc   r7, [r4], #4    @ use branch for delay slot
        bcc     1b
-       mov     pc, lr
+       ret     lr
 #endif
 ENDPROC(__fixup_a_pv_table)
 
diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S
index 797b1a6a4906..56ce6290c831 100644
--- a/arch/arm/kernel/hyp-stub.S
+++ b/arch/arm/kernel/hyp-stub.S
@@ -99,7 +99,7 @@ ENTRY(__hyp_stub_install_secondary)
         * immediately.
         */
        compare_cpu_mode_with_primary   r4, r5, r6, r7
-       movne   pc, lr
+       retne   lr
 
        /*
         * Once we have given up on one CPU, we do not try to install the
@@ -111,7 +111,7 @@ ENTRY(__hyp_stub_install_secondary)
         */
 
        cmp     r4, #HYP_MODE
-       movne   pc, lr                  @ give up if the CPU is not in HYP mode
+       retne   lr                      @ give up if the CPU is not in HYP mode
 
 /*
  * Configure HSCTLR to set correct exception endianness/instruction set
@@ -201,7 +201,7 @@ ENDPROC(__hyp_get_vectors)
        @ fall through
 ENTRY(__hyp_set_vectors)
        __HVC(0)
-       mov     pc, lr
+       ret     lr
 ENDPROC(__hyp_set_vectors)
 
 #ifndef ZIMAGE
diff --git a/arch/arm/kernel/iwmmxt.S b/arch/arm/kernel/iwmmxt.S
index a5599cfc43cb..0960be7953f0 100644
--- a/arch/arm/kernel/iwmmxt.S
+++ b/arch/arm/kernel/iwmmxt.S
@@ -179,7 +179,7 @@ concan_load:
        get_thread_info r10
 #endif
 4:     dec_preempt_count r10, r3
-       mov     pc, lr
+       ret     lr
 
 /*
  * Back up Concan regs to save area and disable access to them
@@ -265,7 +265,7 @@ ENTRY(iwmmxt_task_copy)
        mov     r3, lr                          @ preserve return address
        bl      concan_dump
        msr     cpsr_c, ip                      @ restore interrupt mode
-       mov     pc, r3
+       ret     r3
 
 /*
  * Restore Concan state from given memory address
@@ -301,7 +301,7 @@ ENTRY(iwmmxt_task_restore)
        mov     r3, lr                          @ preserve return address
        bl      concan_load
        msr     cpsr_c, ip                      @ restore interrupt mode
-       mov     pc, r3
+       ret     r3
 
 /*
  * Concan handling on task switch
@@ -323,7 +323,7 @@ ENTRY(iwmmxt_task_switch)
        add     r3, r0, #TI_IWMMXT_STATE        @ get next task Concan save area
        ldr     r2, [r2]                        @ get current Concan owner
        teq     r2, r3                          @ next task owns it?
-       movne   pc, lr                          @ no: leave Concan disabled
+       retne   lr                              @ no: leave Concan disabled
 
 1:     @ flip Concan access
        XSC(eor r1, r1, #0x3)
@@ -350,7 +350,7 @@ ENTRY(iwmmxt_task_release)
        eors    r0, r0, r1                      @ if equal...
        streq   r0, [r3]                        @ then clear ownership
        msr     cpsr_c, r2                      @ restore interrupts
-       mov     pc, lr
+       ret     lr
 
        .data
 concan_owner:
diff --git a/arch/arm/kernel/relocate_kernel.S 
b/arch/arm/kernel/relocate_kernel.S
index 95858966d84e..35e72585ec1d 100644
--- a/arch/arm/kernel/relocate_kernel.S
+++ b/arch/arm/kernel/relocate_kernel.S
@@ -3,6 +3,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 #include <asm/kexec.h>
 
        .align  3       /* not needed for this code, but keeps fncpy() happy */
@@ -59,7 +60,7 @@ ENTRY(relocate_new_kernel)
        mov r0,#0
        ldr r1,kexec_mach_type
        ldr r2,kexec_boot_atags
- ARM(  mov pc, lr      )
+ ARM(  ret lr  )
  THUMB(        bx lr           )
 
        .align
diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S
index 1b880db2a033..e1e60e5a7a27 100644
--- a/arch/arm/kernel/sleep.S
+++ b/arch/arm/kernel/sleep.S
@@ -107,7 +107,7 @@ ENTRY(cpu_resume_mmu)
        instr_sync
        mov     r0, r0
        mov     r0, r0
-       mov     pc, r3                  @ jump to virtual address
+       ret     r3                      @ jump to virtual address
 ENDPROC(cpu_resume_mmu)
        .popsection
 cpu_resume_after_mmu:
diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S
index 1b9844d369cc..b2d229f09c07 100644
--- a/arch/arm/kvm/init.S
+++ b/arch/arm/kvm/init.S
@@ -17,6 +17,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 #include <asm/unified.h>
 #include <asm/asm-offsets.h>
 #include <asm/kvm_asm.h>
@@ -134,7 +135,7 @@ phase2:
        ldr     r0, =TRAMPOLINE_VA
        adr     r1, target
        bfi     r0, r1, #0, #PAGE_SHIFT
-       mov     pc, r0
+       ret     r0
 
 target:        @ We're now in the trampoline code, switch page tables
        mcrr    p15, 4, r2, r3, c2
diff --git a/arch/arm/lib/ashldi3.S b/arch/arm/lib/ashldi3.S
index 638deb13da1c..b05e95840651 100644
--- a/arch/arm/lib/ashldi3.S
+++ b/arch/arm/lib/ashldi3.S
@@ -27,6 +27,7 @@ Boston, MA 02110-1301, USA.  */
 
 
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 
 #ifdef __ARMEB__
 #define al r1
@@ -47,7 +48,7 @@ ENTRY(__aeabi_llsl)
  THUMB(        lsrmi   r3, al, ip              )
  THUMB(        orrmi   ah, ah, r3              )
        mov     al, al, lsl r2
-       mov     pc, lr
+       ret     lr
 
 ENDPROC(__ashldi3)
 ENDPROC(__aeabi_llsl)
diff --git a/arch/arm/lib/ashrdi3.S b/arch/arm/lib/ashrdi3.S
index 015e8aa5a1d1..275d7d2341a4 100644
--- a/arch/arm/lib/ashrdi3.S
+++ b/arch/arm/lib/ashrdi3.S
@@ -27,6 +27,7 @@ Boston, MA 02110-1301, USA.  */
 
 
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 
 #ifdef __ARMEB__
 #define al r1
@@ -47,7 +48,7 @@ ENTRY(__aeabi_lasr)
  THUMB(        lslmi   r3, ah, ip              )
  THUMB(        orrmi   al, al, r3              )
        mov     ah, ah, asr r2
-       mov     pc, lr
+       ret     lr
 
 ENDPROC(__ashrdi3)
 ENDPROC(__aeabi_lasr)
diff --git a/arch/arm/lib/backtrace.S b/arch/arm/lib/backtrace.S
index 4102be617fce..fab5a50503ae 100644
--- a/arch/arm/lib/backtrace.S
+++ b/arch/arm/lib/backtrace.S
@@ -25,7 +25,7 @@
 ENTRY(c_backtrace)
 
 #if !defined(CONFIG_FRAME_POINTER) || !defined(CONFIG_PRINTK)
-               mov     pc, lr
+               ret     lr
 ENDPROC(c_backtrace)
 #else
                stmfd   sp!, {r4 - r8, lr}      @ Save an extra register so we 
have a location...
diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h
index 9f12ed1eea86..7d807cfd8ef5 100644
--- a/arch/arm/lib/bitops.h
+++ b/arch/arm/lib/bitops.h
@@ -1,3 +1,4 @@
+#include <asm/assembler.h>
 #include <asm/unwind.h>
 
 #if __LINUX_ARM_ARCH__ >= 6
@@ -70,7 +71,7 @@ UNWIND(       .fnstart        )
        \instr  r2, r2, r3
        str     r2, [r1, r0, lsl #2]
        restore_irqs ip
-       mov     pc, lr
+       ret     lr
 UNWIND(        .fnend          )
 ENDPROC(\name          )
        .endm
@@ -98,7 +99,7 @@ UNWIND(       .fnstart        )
        \store  r2, [r1]
        moveq   r0, #0
        restore_irqs ip
-       mov     pc, lr
+       ret     lr
 UNWIND(        .fnend          )
 ENDPROC(\name          )
        .endm
diff --git a/arch/arm/lib/bswapsdi2.S b/arch/arm/lib/bswapsdi2.S
index 9fcdd154eff9..07cda737bb11 100644
--- a/arch/arm/lib/bswapsdi2.S
+++ b/arch/arm/lib/bswapsdi2.S
@@ -1,4 +1,5 @@
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 
 #if __LINUX_ARM_ARCH__ >= 6
 ENTRY(__bswapsi2)
@@ -18,7 +19,7 @@ ENTRY(__bswapsi2)
        mov r3, r3, lsr #8
        bic r3, r3, #0xff00
        eor r0, r3, r0, ror #8
-       mov pc, lr
+       ret lr
 ENDPROC(__bswapsi2)
 
 ENTRY(__bswapdi2)
@@ -31,6 +32,6 @@ ENTRY(__bswapdi2)
        bic r1, r1, #0xff00
        eor r1, r1, r0, ror #8
        eor r0, r3, ip, ror #8
-       mov pc, lr
+       ret lr
 ENDPROC(__bswapdi2)
 #endif
diff --git a/arch/arm/lib/call_with_stack.S b/arch/arm/lib/call_with_stack.S
index 916c80f13ae7..ed1a421813cb 100644
--- a/arch/arm/lib/call_with_stack.S
+++ b/arch/arm/lib/call_with_stack.S
@@ -36,9 +36,9 @@ ENTRY(call_with_stack)
        mov     r0, r1
 
        adr     lr, BSYM(1f)
-       mov     pc, r2
+       ret     r2
 
 1:     ldr     lr, [sp]
        ldr     sp, [sp, #4]
-       mov     pc, lr
+       ret     lr
 ENDPROC(call_with_stack)
diff --git a/arch/arm/lib/csumpartial.S b/arch/arm/lib/csumpartial.S
index 31d3cb34740d..984e0f29d548 100644
--- a/arch/arm/lib/csumpartial.S
+++ b/arch/arm/lib/csumpartial.S
@@ -97,7 +97,7 @@ td3   .req    lr
 #endif
 #endif
                adcnes  sum, sum, td0           @ update checksum
-               mov     pc, lr
+               ret     lr
 
 ENTRY(csum_partial)
                stmfd   sp!, {buf, lr}
diff --git a/arch/arm/lib/csumpartialcopygeneric.S 
b/arch/arm/lib/csumpartialcopygeneric.S
index d6e742d24007..10b45909610c 100644
--- a/arch/arm/lib/csumpartialcopygeneric.S
+++ b/arch/arm/lib/csumpartialcopygeneric.S
@@ -7,6 +7,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
+#include <asm/assembler.h>
 
 /*
  * unsigned int
@@ -40,7 +41,7 @@ sum   .req    r3
                adcs    sum, sum, ip, put_byte_1        @ update checksum
                strb    ip, [dst], #1
                tst     dst, #2
-               moveq   pc, lr                  @ dst is now 32bit aligned
+               reteq   lr                      @ dst is now 32bit aligned
 
 .Ldst_16bit:   load2b  r8, ip
                sub     len, len, #2
@@ -48,7 +49,7 @@ sum   .req    r3
                strb    r8, [dst], #1
                adcs    sum, sum, ip, put_byte_1
                strb    ip, [dst], #1
-               mov     pc, lr                  @ dst is now 32bit aligned
+               ret     lr                      @ dst is now 32bit aligned
 
                /*
                 * Handle 0 to 7 bytes, with any alignment of source and
diff --git a/arch/arm/lib/delay-loop.S b/arch/arm/lib/delay-loop.S
index bc1033b897b4..518bf6e93f78 100644
--- a/arch/arm/lib/delay-loop.S
+++ b/arch/arm/lib/delay-loop.S
@@ -35,7 +35,7 @@ ENTRY(__loop_const_udelay)                    @ 0 <= r0 <= 
0x7fffff06
                mul     r0, r2, r0              @ max = 2^32-1
                add     r0, r0, r1, lsr #32-6
                movs    r0, r0, lsr #6
-               moveq   pc, lr
+               reteq   lr
 
 /*
  * loops = r0 * HZ * loops_per_jiffy / 1000000
@@ -46,23 +46,23 @@ ENTRY(__loop_const_udelay)                  @ 0 <= r0 <= 
0x7fffff06
 ENTRY(__loop_delay)
                subs    r0, r0, #1
 #if 0
-               movls   pc, lr
+               retls   lr
                subs    r0, r0, #1
-               movls   pc, lr
+               retls   lr
                subs    r0, r0, #1
-               movls   pc, lr
+               retls   lr
                subs    r0, r0, #1
-               movls   pc, lr
+               retls   lr
                subs    r0, r0, #1
-               movls   pc, lr
+               retls   lr
                subs    r0, r0, #1
-               movls   pc, lr
+               retls   lr
                subs    r0, r0, #1
-               movls   pc, lr
+               retls   lr
                subs    r0, r0, #1
 #endif
                bhi     __loop_delay
-               mov     pc, lr
+               ret     lr
 ENDPROC(__loop_udelay)
 ENDPROC(__loop_const_udelay)
 ENDPROC(__loop_delay)
diff --git a/arch/arm/lib/div64.S b/arch/arm/lib/div64.S
index e55c4842c290..a9eafe4981eb 100644
--- a/arch/arm/lib/div64.S
+++ b/arch/arm/lib/div64.S
@@ -13,6 +13,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 #include <asm/unwind.h>
 
 #ifdef __ARMEB__
@@ -97,7 +98,7 @@ UNWIND(.fnstart)
        mov     yl, #0
        cmpeq   xl, r4
        movlo   xh, xl
-       movlo   pc, lr
+       retlo   lr
 
        @ The division loop for lower bit positions.
        @ Here we shift remainer bits leftwards rather than moving the
@@ -111,14 +112,14 @@ UNWIND(.fnstart)
        subcs   xh, xh, r4
        movs    ip, ip, lsr #1
        bne     4b
-       mov     pc, lr
+       ret     lr
 
        @ The top part of remainder became zero.  If carry is set
        @ (the 33th bit) this is a false positive so resume the loop.
        @ Otherwise, if lower part is also null then we are done.
 6:     bcs     5b
        cmp     xl, #0
-       moveq   pc, lr
+       reteq   lr
 
        @ We still have remainer bits in the low part.  Bring them up.
 
@@ -144,7 +145,7 @@ UNWIND(.fnstart)
        movs    ip, ip, lsr #1
        mov     xh, #1
        bne     4b
-       mov     pc, lr
+       ret     lr
 
 8:     @ Division by a power of 2: determine what that divisor order is
        @ then simply shift values around
@@ -184,13 +185,13 @@ UNWIND(.fnstart)
  THUMB(        orr     yl, yl, xh              )
        mov     xh, xl, lsl ip
        mov     xh, xh, lsr ip
-       mov     pc, lr
+       ret     lr
 
        @ eq -> division by 1: obvious enough...
 9:     moveq   yl, xl
        moveq   yh, xh
        moveq   xh, #0
-       moveq   pc, lr
+       reteq   lr
 UNWIND(.fnend)
 
 UNWIND(.fnstart)
diff --git a/arch/arm/lib/findbit.S b/arch/arm/lib/findbit.S
index 64f6bc1a9132..7848780e8834 100644
--- a/arch/arm/lib/findbit.S
+++ b/arch/arm/lib/findbit.S
@@ -35,7 +35,7 @@ ENTRY(_find_first_zero_bit_le)
 2:             cmp     r2, r1                  @ any more?
                blo     1b
 3:             mov     r0, r1                  @ no free bits
-               mov     pc, lr
+               ret     lr
 ENDPROC(_find_first_zero_bit_le)
 
 /*
@@ -76,7 +76,7 @@ ENTRY(_find_first_bit_le)
 2:             cmp     r2, r1                  @ any more?
                blo     1b
 3:             mov     r0, r1                  @ no free bits
-               mov     pc, lr
+               ret     lr
 ENDPROC(_find_first_bit_le)
 
 /*
@@ -114,7 +114,7 @@ ENTRY(_find_first_zero_bit_be)
 2:             cmp     r2, r1                  @ any more?
                blo     1b
 3:             mov     r0, r1                  @ no free bits
-               mov     pc, lr
+               ret     lr
 ENDPROC(_find_first_zero_bit_be)
 
 ENTRY(_find_next_zero_bit_be)
@@ -148,7 +148,7 @@ ENTRY(_find_first_bit_be)
 2:             cmp     r2, r1                  @ any more?
                blo     1b
 3:             mov     r0, r1                  @ no free bits
-               mov     pc, lr
+               ret     lr
 ENDPROC(_find_first_bit_be)
 
 ENTRY(_find_next_bit_be)
@@ -192,5 +192,5 @@ ENDPROC(_find_next_bit_be)
 #endif
                cmp     r1, r0                  @ Clamp to maxbit
                movlo   r0, r1
-               mov     pc, lr
+               ret     lr
 
diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S
index 9b06bb41fca6..0f958e3d8180 100644
--- a/arch/arm/lib/getuser.S
+++ b/arch/arm/lib/getuser.S
@@ -36,7 +36,7 @@ ENTRY(__get_user_1)
        check_uaccess r0, 1, r1, r2, __get_user_bad
 1: TUSER(ldrb) r2, [r0]
        mov     r0, #0
-       mov     pc, lr
+       ret     lr
 ENDPROC(__get_user_1)
 
 ENTRY(__get_user_2)
@@ -56,20 +56,20 @@ rb  .req    r0
        orr     r2, rb, r2, lsl #8
 #endif
        mov     r0, #0
-       mov     pc, lr
+       ret     lr
 ENDPROC(__get_user_2)
 
 ENTRY(__get_user_4)
        check_uaccess r0, 4, r1, r2, __get_user_bad
 4: TUSER(ldr)  r2, [r0]
        mov     r0, #0
-       mov     pc, lr
+       ret     lr
 ENDPROC(__get_user_4)
 
 __get_user_bad:
        mov     r2, #0
        mov     r0, #-EFAULT
-       mov     pc, lr
+       ret     lr
 ENDPROC(__get_user_bad)
 
 .pushsection __ex_table, "a"
diff --git a/arch/arm/lib/io-readsb.S b/arch/arm/lib/io-readsb.S
index 9f4238987fe9..c31b2f3153f1 100644
--- a/arch/arm/lib/io-readsb.S
+++ b/arch/arm/lib/io-readsb.S
@@ -25,7 +25,7 @@
 
 ENTRY(__raw_readsb)
                teq     r2, #0          @ do we have to check for the zero len?
-               moveq   pc, lr
+               reteq   lr
                ands    ip, r1, #3
                bne     .Linsb_align
 
diff --git a/arch/arm/lib/io-readsl.S b/arch/arm/lib/io-readsl.S
index 7a7430950c79..2ed86fa5465f 100644
--- a/arch/arm/lib/io-readsl.S
+++ b/arch/arm/lib/io-readsl.S
@@ -12,7 +12,7 @@
 
 ENTRY(__raw_readsl)
                teq     r2, #0          @ do we have to check for the zero len?
-               moveq   pc, lr
+               reteq   lr
                ands    ip, r1, #3
                bne     3f
 
@@ -33,7 +33,7 @@ ENTRY(__raw_readsl)
                stmcsia r1!, {r3, ip}
                ldrne   r3, [r0, #0]
                strne   r3, [r1, #0]
-               mov     pc, lr
+               ret     lr
 
 3:             ldr     r3, [r0]
                cmp     ip, #2
@@ -75,5 +75,5 @@ ENTRY(__raw_readsl)
                strb    r3, [r1, #1]
 8:             mov     r3, ip, get_byte_0
                strb    r3, [r1, #0]
-               mov     pc, lr
+               ret     lr
 ENDPROC(__raw_readsl)
diff --git a/arch/arm/lib/io-readsw-armv3.S b/arch/arm/lib/io-readsw-armv3.S
index 88487c8c4f23..413da9914529 100644
--- a/arch/arm/lib/io-readsw-armv3.S
+++ b/arch/arm/lib/io-readsw-armv3.S
@@ -27,11 +27,11 @@
                strb    r3, [r1], #1
 
                subs    r2, r2, #1
-               moveq   pc, lr
+               reteq   lr
 
 ENTRY(__raw_readsw)
                teq     r2, #0          @ do we have to check for the zero len?
-               moveq   pc, lr
+               reteq   lr
                tst     r1, #3
                bne     .Linsw_align
 
diff --git a/arch/arm/lib/io-readsw-armv4.S b/arch/arm/lib/io-readsw-armv4.S
index 1f393d42593d..d9a45e9692ae 100644
--- a/arch/arm/lib/io-readsw-armv4.S
+++ b/arch/arm/lib/io-readsw-armv4.S
@@ -26,7 +26,7 @@
 
 ENTRY(__raw_readsw)
                teq     r2, #0
-               moveq   pc, lr
+               reteq   lr
                tst     r1, #3
                bne     .Linsw_align
 
diff --git a/arch/arm/lib/io-writesb.S b/arch/arm/lib/io-writesb.S
index 68b92f4acaeb..a46bbc9b168b 100644
--- a/arch/arm/lib/io-writesb.S
+++ b/arch/arm/lib/io-writesb.S
@@ -45,7 +45,7 @@
 
 ENTRY(__raw_writesb)
                teq     r2, #0          @ do we have to check for the zero len?
-               moveq   pc, lr
+               reteq   lr
                ands    ip, r1, #3
                bne     .Loutsb_align
 
diff --git a/arch/arm/lib/io-writesl.S b/arch/arm/lib/io-writesl.S
index d0d104a0dd11..4ea2435988c1 100644
--- a/arch/arm/lib/io-writesl.S
+++ b/arch/arm/lib/io-writesl.S
@@ -12,7 +12,7 @@
 
 ENTRY(__raw_writesl)
                teq     r2, #0          @ do we have to check for the zero len?
-               moveq   pc, lr
+               reteq   lr
                ands    ip, r1, #3
                bne     3f
 
@@ -33,7 +33,7 @@ ENTRY(__raw_writesl)
                ldrne   r3, [r1, #0]
                strcs   ip, [r0, #0]
                strne   r3, [r0, #0]
-               mov     pc, lr
+               ret     lr
 
 3:             bic     r1, r1, #3
                ldr     r3, [r1], #4
@@ -47,7 +47,7 @@ ENTRY(__raw_writesl)
                orr     ip, ip, r3, lspush #16
                str     ip, [r0]
                bne     4b
-               mov     pc, lr
+               ret     lr
 
 5:             mov     ip, r3, lspull #8
                ldr     r3, [r1], #4
@@ -55,7 +55,7 @@ ENTRY(__raw_writesl)
                orr     ip, ip, r3, lspush #24
                str     ip, [r0]
                bne     5b
-               mov     pc, lr
+               ret     lr
 
 6:             mov     ip, r3, lspull #24
                ldr     r3, [r1], #4
@@ -63,5 +63,5 @@ ENTRY(__raw_writesl)
                orr     ip, ip, r3, lspush #8
                str     ip, [r0]
                bne     6b
-               mov     pc, lr
+               ret     lr
 ENDPROC(__raw_writesl)
diff --git a/arch/arm/lib/io-writesw-armv3.S b/arch/arm/lib/io-writesw-armv3.S
index 49b800419e32..121789eb6802 100644
--- a/arch/arm/lib/io-writesw-armv3.S
+++ b/arch/arm/lib/io-writesw-armv3.S
@@ -28,11 +28,11 @@
                orr     r3, r3, r3, lsl #16
                str     r3, [r0]
                subs    r2, r2, #1
-               moveq   pc, lr
+               reteq   lr
 
 ENTRY(__raw_writesw)
                teq     r2, #0          @ do we have to check for the zero len?
-               moveq   pc, lr
+               reteq   lr
                tst     r1, #3
                bne     .Loutsw_align
 
diff --git a/arch/arm/lib/io-writesw-armv4.S b/arch/arm/lib/io-writesw-armv4.S
index ff4f71b579ee..269f90c51ad2 100644
--- a/arch/arm/lib/io-writesw-armv4.S
+++ b/arch/arm/lib/io-writesw-armv4.S
@@ -31,7 +31,7 @@
 
 ENTRY(__raw_writesw)
                teq     r2, #0
-               moveq   pc, lr
+               reteq   lr
                ands    r3, r1, #3
                bne     .Loutsw_align
 
@@ -96,5 +96,5 @@ ENTRY(__raw_writesw)
                tst     r2, #1
 3:             movne   ip, r3, lsr #8
                strneh  ip, [r0]
-               mov     pc, lr
+               ret     lr
 ENDPROC(__raw_writesw)
diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S
index c562f649734c..947567ff67f9 100644
--- a/arch/arm/lib/lib1funcs.S
+++ b/arch/arm/lib/lib1funcs.S
@@ -210,7 +210,7 @@ ENTRY(__aeabi_uidiv)
 UNWIND(.fnstart)
 
        subs    r2, r1, #1
-       moveq   pc, lr
+       reteq   lr
        bcc     Ldiv0
        cmp     r0, r1
        bls     11f
@@ -220,16 +220,16 @@ UNWIND(.fnstart)
        ARM_DIV_BODY r0, r1, r2, r3
 
        mov     r0, r2
-       mov     pc, lr
+       ret     lr
 
 11:    moveq   r0, #1
        movne   r0, #0
-       mov     pc, lr
+       ret     lr
 
 12:    ARM_DIV2_ORDER r1, r2
 
        mov     r0, r0, lsr r2
-       mov     pc, lr
+       ret     lr
 
 UNWIND(.fnend)
 ENDPROC(__udivsi3)
@@ -244,11 +244,11 @@ UNWIND(.fnstart)
        moveq   r0, #0
        tsthi   r1, r2                          @ see if divisor is power of 2
        andeq   r0, r0, r2
-       movls   pc, lr
+       retls   lr
 
        ARM_MOD_BODY r0, r1, r2, r3
 
-       mov     pc, lr
+       ret     lr
 
 UNWIND(.fnend)
 ENDPROC(__umodsi3)
@@ -274,23 +274,23 @@ UNWIND(.fnstart)
 
        cmp     ip, #0
        rsbmi   r0, r0, #0
-       mov     pc, lr
+       ret     lr
 
 10:    teq     ip, r0                          @ same sign ?
        rsbmi   r0, r0, #0
-       mov     pc, lr
+       ret     lr
 
 11:    movlo   r0, #0
        moveq   r0, ip, asr #31
        orreq   r0, r0, #1
-       mov     pc, lr
+       ret     lr
 
 12:    ARM_DIV2_ORDER r1, r2
 
        cmp     ip, #0
        mov     r0, r3, lsr r2
        rsbmi   r0, r0, #0
-       mov     pc, lr
+       ret     lr
 
 UNWIND(.fnend)
 ENDPROC(__divsi3)
@@ -315,7 +315,7 @@ UNWIND(.fnstart)
 
 10:    cmp     ip, #0
        rsbmi   r0, r0, #0
-       mov     pc, lr
+       ret     lr
 
 UNWIND(.fnend)
 ENDPROC(__modsi3)
@@ -331,7 +331,7 @@ UNWIND(.save {r0, r1, ip, lr}       )
        ldmfd   sp!, {r1, r2, ip, lr}
        mul     r3, r0, r2
        sub     r1, r1, r3
-       mov     pc, lr
+       ret     lr
 
 UNWIND(.fnend)
 ENDPROC(__aeabi_uidivmod)
@@ -344,7 +344,7 @@ UNWIND(.save {r0, r1, ip, lr}       )
        ldmfd   sp!, {r1, r2, ip, lr}
        mul     r3, r0, r2
        sub     r1, r1, r3
-       mov     pc, lr
+       ret     lr
 
 UNWIND(.fnend)
 ENDPROC(__aeabi_idivmod)
diff --git a/arch/arm/lib/lshrdi3.S b/arch/arm/lib/lshrdi3.S
index f83d449141f7..922dcd88b02b 100644
--- a/arch/arm/lib/lshrdi3.S
+++ b/arch/arm/lib/lshrdi3.S
@@ -27,6 +27,7 @@ Boston, MA 02110-1301, USA.  */
 
 
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 
 #ifdef __ARMEB__
 #define al r1
@@ -47,7 +48,7 @@ ENTRY(__aeabi_llsr)
  THUMB(        lslmi   r3, ah, ip              )
  THUMB(        orrmi   al, al, r3              )
        mov     ah, ah, lsr r2
-       mov     pc, lr
+       ret     lr
 
 ENDPROC(__lshrdi3)
 ENDPROC(__aeabi_llsr)
diff --git a/arch/arm/lib/memchr.S b/arch/arm/lib/memchr.S
index 1da86991d700..74a5bed6d999 100644
--- a/arch/arm/lib/memchr.S
+++ b/arch/arm/lib/memchr.S
@@ -22,5 +22,5 @@ ENTRY(memchr)
        bne     1b
        sub     r0, r0, #1
 2:     movne   r0, #0
-       mov     pc, lr
+       ret     lr
 ENDPROC(memchr)
diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S
index 94b0650ea98f..671455c854fa 100644
--- a/arch/arm/lib/memset.S
+++ b/arch/arm/lib/memset.S
@@ -110,7 +110,7 @@ ENTRY(memset)
        strneb  r1, [ip], #1
        tst     r2, #1
        strneb  r1, [ip], #1
-       mov     pc, lr
+       ret     lr
 
 6:     subs    r2, r2, #4              @ 1 do we have enough
        blt     5b                      @ 1 bytes to align with?
diff --git a/arch/arm/lib/memzero.S b/arch/arm/lib/memzero.S
index 3fbdef5f802a..385ccb306fa2 100644
--- a/arch/arm/lib/memzero.S
+++ b/arch/arm/lib/memzero.S
@@ -121,5 +121,5 @@ ENTRY(__memzero)
        strneb  r2, [r0], #1            @ 1
        tst     r1, #1                  @ 1 a byte left over
        strneb  r2, [r0], #1            @ 1
-       mov     pc, lr                  @ 1
+       ret     lr                      @ 1
 ENDPROC(__memzero)
diff --git a/arch/arm/lib/muldi3.S b/arch/arm/lib/muldi3.S
index 36c91b4957e2..204305956925 100644
--- a/arch/arm/lib/muldi3.S
+++ b/arch/arm/lib/muldi3.S
@@ -11,6 +11,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 
 #ifdef __ARMEB__
 #define xh r0
@@ -41,7 +42,7 @@ ENTRY(__aeabi_lmul)
        adc     xh, xh, yh, lsr #16
        adds    xl, xl, ip, lsl #16
        adc     xh, xh, ip, lsr #16
-       mov     pc, lr
+       ret     lr
 
 ENDPROC(__muldi3)
 ENDPROC(__aeabi_lmul)
diff --git a/arch/arm/lib/putuser.S b/arch/arm/lib/putuser.S
index 3d73dcb959b0..38d660d3705f 100644
--- a/arch/arm/lib/putuser.S
+++ b/arch/arm/lib/putuser.S
@@ -36,7 +36,7 @@ ENTRY(__put_user_1)
        check_uaccess r0, 1, r1, ip, __put_user_bad
 1: TUSER(strb) r2, [r0]
        mov     r0, #0
-       mov     pc, lr
+       ret     lr
 ENDPROC(__put_user_1)
 
 ENTRY(__put_user_2)
@@ -60,14 +60,14 @@ ENTRY(__put_user_2)
 #endif
 #endif /* CONFIG_THUMB2_KERNEL */
        mov     r0, #0
-       mov     pc, lr
+       ret     lr
 ENDPROC(__put_user_2)
 
 ENTRY(__put_user_4)
        check_uaccess r0, 4, r1, ip, __put_user_bad
 4: TUSER(str)  r2, [r0]
        mov     r0, #0
-       mov     pc, lr
+       ret     lr
 ENDPROC(__put_user_4)
 
 ENTRY(__put_user_8)
@@ -80,12 +80,12 @@ ENTRY(__put_user_8)
 6: TUSER(str)  r3, [r0]
 #endif
        mov     r0, #0
-       mov     pc, lr
+       ret     lr
 ENDPROC(__put_user_8)
 
 __put_user_bad:
        mov     r0, #-EFAULT
-       mov     pc, lr
+       ret     lr
 ENDPROC(__put_user_bad)
 
 .pushsection __ex_table, "a"
diff --git a/arch/arm/lib/strchr.S b/arch/arm/lib/strchr.S
index d8f2a1c1aea4..013d64c71e8d 100644
--- a/arch/arm/lib/strchr.S
+++ b/arch/arm/lib/strchr.S
@@ -23,5 +23,5 @@ ENTRY(strchr)
                teq     r2, r1
                movne   r0, #0
                subeq   r0, r0, #1
-               mov     pc, lr
+               ret     lr
 ENDPROC(strchr)
diff --git a/arch/arm/lib/strrchr.S b/arch/arm/lib/strrchr.S
index 302f20cd2423..3cec1c7482c4 100644
--- a/arch/arm/lib/strrchr.S
+++ b/arch/arm/lib/strrchr.S
@@ -22,5 +22,5 @@ ENTRY(strrchr)
                teq     r2, #0
                bne     1b
                mov     r0, r3
-               mov     pc, lr
+               ret     lr
 ENDPROC(strrchr)
diff --git a/arch/arm/lib/ucmpdi2.S b/arch/arm/lib/ucmpdi2.S
index f0df6a91db04..ad4a6309141a 100644
--- a/arch/arm/lib/ucmpdi2.S
+++ b/arch/arm/lib/ucmpdi2.S
@@ -11,6 +11,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 
 #ifdef __ARMEB__
 #define xh r0
@@ -31,7 +32,7 @@ ENTRY(__ucmpdi2)
        movlo   r0, #0
        moveq   r0, #1
        movhi   r0, #2
-       mov     pc, lr
+       ret     lr
 
 ENDPROC(__ucmpdi2)
 
@@ -44,7 +45,7 @@ ENTRY(__aeabi_ulcmp)
        movlo   r0, #-1
        moveq   r0, #0
        movhi   r0, #1
-       mov     pc, lr
+       ret     lr
 
 ENDPROC(__aeabi_ulcmp)
 
diff --git a/arch/arm/mach-davinci/sleep.S b/arch/arm/mach-davinci/sleep.S
index d4e9316ecacb..a5336a5e2739 100644
--- a/arch/arm/mach-davinci/sleep.S
+++ b/arch/arm/mach-davinci/sleep.S
@@ -213,7 +213,7 @@ ddr2clk_stop_done:
        cmp     ip, r0
        bne     ddr2clk_stop_done
 
-       mov     pc, lr
+       ret     lr
 ENDPROC(davinci_ddr_psc_config)
 
 CACHE_FLUSH:
diff --git a/arch/arm/mach-ep93xx/crunch-bits.S 
b/arch/arm/mach-ep93xx/crunch-bits.S
index e96923a3017b..ee0be2af5c61 100644
--- a/arch/arm/mach-ep93xx/crunch-bits.S
+++ b/arch/arm/mach-ep93xx/crunch-bits.S
@@ -198,7 +198,7 @@ crunch_load:
        get_thread_info r10
 #endif
 2:     dec_preempt_count r10, r3
-       mov     pc, lr
+       ret     lr
 
 /*
  * Back up crunch regs to save area and disable access to them
@@ -277,7 +277,7 @@ ENTRY(crunch_task_copy)
        mov     r3, lr                          @ preserve return address
        bl      crunch_save
        msr     cpsr_c, ip                      @ restore interrupt mode
-       mov     pc, r3
+       ret     r3
 
 /*
  * Restore crunch state from given memory address
@@ -310,4 +310,4 @@ ENTRY(crunch_task_restore)
        mov     r3, lr                          @ preserve return address
        bl      crunch_load
        msr     cpsr_c, ip                      @ restore interrupt mode
-       mov     pc, r3
+       ret     r3
diff --git a/arch/arm/mach-imx/suspend-imx6.S b/arch/arm/mach-imx/suspend-imx6.S
index fe123b079c05..74b50f1982db 100644
--- a/arch/arm/mach-imx/suspend-imx6.S
+++ b/arch/arm/mach-imx/suspend-imx6.S
@@ -10,6 +10,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 #include <asm/asm-offsets.h>
 #include <asm/hardware/cache-l2x0.h>
 #include "hardware.h"
@@ -301,7 +302,7 @@ rbc_loop:
        resume_mmdc
 
        /* return to suspend finish */
-       mov     pc, lr
+       ret     lr
 
 resume:
        /* invalidate L1 I-cache first */
@@ -325,7 +326,7 @@ resume:
        mov     r5, #0x1
        resume_mmdc
 
-       mov     pc, lr
+       ret     lr
 ENDPROC(imx6_suspend)
 
 /*
diff --git a/arch/arm/mach-mvebu/coherency_ll.S 
b/arch/arm/mach-mvebu/coherency_ll.S
index 510c29e079ca..f5d881b5d0f7 100644
--- a/arch/arm/mach-mvebu/coherency_ll.S
+++ b/arch/arm/mach-mvebu/coherency_ll.S
@@ -46,7 +46,7 @@ ENTRY(ll_get_coherency_base)
        ldr     r1, =coherency_base
        ldr     r1, [r1]
 2:
-       mov     pc, lr
+       ret     lr
 ENDPROC(ll_get_coherency_base)
 
 /*
@@ -63,7 +63,7 @@ ENTRY(ll_get_coherency_cpumask)
        mov     r2, #(1 << 24)
        lsl     r3, r2, r3
 ARM_BE8(rev    r3, r3)
-       mov     pc, lr
+       ret     lr
 ENDPROC(ll_get_coherency_cpumask)
 
 /*
@@ -94,7 +94,7 @@ ENTRY(ll_add_cpu_to_smp_group)
        strex   r1, r2, [r0]
        cmp     r1, #0
        bne     1b
-       mov     pc, lr
+       ret     lr
 ENDPROC(ll_add_cpu_to_smp_group)
 
 ENTRY(ll_enable_coherency)
@@ -118,7 +118,7 @@ ENTRY(ll_enable_coherency)
        bne     1b
        dsb
        mov     r0, #0
-       mov     pc, lr
+       ret     lr
 ENDPROC(ll_enable_coherency)
 
 ENTRY(ll_disable_coherency)
@@ -141,7 +141,7 @@ ENTRY(ll_disable_coherency)
        cmp     r1, #0
        bne     1b
        dsb
-       mov     pc, lr
+       ret     lr
 ENDPROC(ll_disable_coherency)
 
        .align 2
diff --git a/arch/arm/mach-mvebu/headsmp-a9.S b/arch/arm/mach-mvebu/headsmp-a9.S
index 5925366bc03c..7c91ddb6f1f7 100644
--- a/arch/arm/mach-mvebu/headsmp-a9.S
+++ b/arch/arm/mach-mvebu/headsmp-a9.S
@@ -14,6 +14,7 @@
 
 #include <linux/linkage.h>
 #include <linux/init.h>
+#include <asm/assembler.h>
 
        __CPUINIT
 #define CPU_RESUME_ADDR_REG 0xf10182d4
@@ -24,7 +25,7 @@
 armada_375_smp_cpu1_enable_code_start:
        ldr     r0, [pc, #4]
        ldr     r1, [r0]
-       mov     pc, r1
+       ret     r1
        .word   CPU_RESUME_ADDR_REG
 armada_375_smp_cpu1_enable_code_end:
 
diff --git a/arch/arm/mach-omap2/sleep44xx.S b/arch/arm/mach-omap2/sleep44xx.S
index 9086ce03ae12..b84a0122d823 100644
--- a/arch/arm/mach-omap2/sleep44xx.S
+++ b/arch/arm/mach-omap2/sleep44xx.S
@@ -10,6 +10,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 #include <asm/smp_scu.h>
 #include <asm/memory.h>
 #include <asm/hardware/cache-l2x0.h>
@@ -334,7 +335,7 @@ ENDPROC(omap4_cpu_resume)
 
 #ifndef CONFIG_OMAP4_ERRATA_I688
 ENTRY(omap_bus_sync)
-       mov     pc, lr
+       ret     lr
 ENDPROC(omap_bus_sync)
 #endif
 
diff --git a/arch/arm/mach-omap2/sram242x.S b/arch/arm/mach-omap2/sram242x.S
index 680a7c56cc3e..2c88ff2d0236 100644
--- a/arch/arm/mach-omap2/sram242x.S
+++ b/arch/arm/mach-omap2/sram242x.S
@@ -101,7 +101,7 @@ i_dll_wait:
 i_dll_delay:
        subs    r4, r4, #0x1
        bne     i_dll_delay
-       mov     pc, lr
+       ret     lr
 
        /*
         * shift up or down voltage, use R9 as input to tell level.
@@ -125,7 +125,7 @@ volt_delay:
        ldr     r7, [r3]                @ get timer value
        cmp     r5, r7                  @ time up?
        bhi     volt_delay              @ not yet->branch
-       mov     pc, lr                  @ back to caller.
+       ret     lr                      @ back to caller.
 
 omap242x_sdi_cm_clksel2_pll:
        .word OMAP2420_CM_REGADDR(PLL_MOD, CM_CLKSEL2)
@@ -220,7 +220,7 @@ volt_delay_c:
        ldr     r7, [r10]               @ get timer value
        cmp     r8, r7                  @ time up?
        bhi     volt_delay_c            @ not yet->branch
-       mov     pc, lr                  @ back to caller
+       ret     lr                      @ back to caller
 
 omap242x_srs_cm_clksel2_pll:
        .word OMAP2420_CM_REGADDR(PLL_MOD, CM_CLKSEL2)
diff --git a/arch/arm/mach-omap2/sram243x.S b/arch/arm/mach-omap2/sram243x.S
index a1e9edd673f4..d5deb9761fc7 100644
--- a/arch/arm/mach-omap2/sram243x.S
+++ b/arch/arm/mach-omap2/sram243x.S
@@ -101,7 +101,7 @@ i_dll_wait:
 i_dll_delay:
        subs    r4, r4, #0x1
        bne     i_dll_delay
-       mov     pc, lr
+       ret     lr
 
        /*
         * shift up or down voltage, use R9 as input to tell level.
@@ -125,7 +125,7 @@ volt_delay:
        ldr     r7, [r3]                @ get timer value
        cmp     r5, r7                  @ time up?
        bhi     volt_delay              @ not yet->branch
-       mov     pc, lr                  @ back to caller.
+       ret     lr                      @ back to caller.
 
 omap243x_sdi_cm_clksel2_pll:
        .word OMAP2430_CM_REGADDR(PLL_MOD, CM_CLKSEL2)
@@ -220,7 +220,7 @@ volt_delay_c:
        ldr     r7, [r10]               @ get timer value
        cmp     r8, r7                  @ time up?
        bhi     volt_delay_c            @ not yet->branch
-       mov     pc, lr                  @ back to caller
+       ret     lr                      @ back to caller
 
 omap243x_srs_cm_clksel2_pll:
        .word OMAP2430_CM_REGADDR(PLL_MOD, CM_CLKSEL2)
diff --git a/arch/arm/mach-pxa/mioa701_bootresume.S 
b/arch/arm/mach-pxa/mioa701_bootresume.S
index 324d25a48c85..81591491ab94 100644
--- a/arch/arm/mach-pxa/mioa701_bootresume.S
+++ b/arch/arm/mach-pxa/mioa701_bootresume.S
@@ -29,7 +29,7 @@ ENTRY(mioa701_jumpaddr)
        str     r1, [r0]                @ Early disable resume for next boot
        ldr     r0, mioa701_jumpaddr    @ (Murphy's Law)
        ldr     r0, [r0]
-       mov     pc, r0
+       ret     r0
 2:
 
 ENTRY(mioa701_bootstrap_lg)
diff --git a/arch/arm/mach-pxa/standby.S b/arch/arm/mach-pxa/standby.S
index 29f5f5c180b7..eab1645bb4ad 100644
--- a/arch/arm/mach-pxa/standby.S
+++ b/arch/arm/mach-pxa/standby.S
@@ -29,7 +29,7 @@ ENTRY(pxa_cpu_standby)
        .align  5
 1:     mcr     p14, 0, r2, c7, c0, 0   @ put the system into Standby
        str     r1, [r0]                @ make sure PSSR_PH/STS are clear
-       mov     pc, lr
+       ret     lr
 
 #endif
 
@@ -108,7 +108,7 @@ ENTRY(pm_enter_standby_start)
        bic     r0, r0, #0x20000000
        str     r0, [r1, #PXA3_DMCIER]
 
-       mov     pc, lr
+       ret     lr
 ENTRY(pm_enter_standby_end)
 
 #endif
diff --git a/arch/arm/mach-s3c24xx/sleep-s3c2410.S 
b/arch/arm/mach-s3c24xx/sleep-s3c2410.S
index c9b91223697c..875ba8911127 100644
--- a/arch/arm/mach-s3c24xx/sleep-s3c2410.S
+++ b/arch/arm/mach-s3c24xx/sleep-s3c2410.S
@@ -66,4 +66,4 @@ s3c2410_do_sleep:
        streq   r8, [r5]                        @ SDRAM power-down config
        streq   r9, [r6]                        @ CPU sleep
 1:     beq     1b
-       mov     pc, r14
+       ret     lr
diff --git a/arch/arm/mach-s3c24xx/sleep-s3c2412.S 
b/arch/arm/mach-s3c24xx/sleep-s3c2412.S
index 5adaceb7da13..6bf5b4d8743c 100644
--- a/arch/arm/mach-s3c24xx/sleep-s3c2412.S
+++ b/arch/arm/mach-s3c24xx/sleep-s3c2412.S
@@ -65,4 +65,4 @@ s3c2412_sleep_enter1:
        strne   r9, [r3]
        bne     s3c2412_sleep_enter1
 
-       mov     pc, r14
+       ret     lr
diff --git a/arch/arm/mach-shmobile/headsmp.S b/arch/arm/mach-shmobile/headsmp.S
index e5be5c88644b..293007579b8e 100644
--- a/arch/arm/mach-shmobile/headsmp.S
+++ b/arch/arm/mach-shmobile/headsmp.S
@@ -12,6 +12,7 @@
  */
 #include <linux/linkage.h>
 #include <linux/init.h>
+#include <asm/assembler.h>
 #include <asm/memory.h>
 
 ENTRY(shmobile_invalidate_start)
@@ -75,7 +76,7 @@ shmobile_smp_boot_next:
 
 shmobile_smp_boot_found:
        ldr     r0, [r7, r1, lsl #2]
-       mov     pc, r9
+       ret     r9
 ENDPROC(shmobile_smp_boot)
 
 ENTRY(shmobile_smp_sleep)
diff --git a/arch/arm/mach-tegra/sleep-tegra20.S 
b/arch/arm/mach-tegra/sleep-tegra20.S
index aaaf3abd2688..be4bc5f853f5 100644
--- a/arch/arm/mach-tegra/sleep-tegra20.S
+++ b/arch/arm/mach-tegra/sleep-tegra20.S
@@ -78,7 +78,7 @@ ENTRY(tegra20_hotplug_shutdown)
        /* Put this CPU down */
        cpu_id  r0
        bl      tegra20_cpu_shutdown
-       mov     pc, lr                  @ should never get here
+       ret     lr                      @ should never get here
 ENDPROC(tegra20_hotplug_shutdown)
 
 /*
@@ -96,7 +96,7 @@ ENDPROC(tegra20_hotplug_shutdown)
  */
 ENTRY(tegra20_cpu_shutdown)
        cmp     r0, #0
-       moveq   pc, lr                  @ must not be called for CPU 0
+       reteq   lr                      @ must not be called for CPU 0
        mov32   r1, TEGRA_PMC_VIRT + PMC_SCRATCH41
        mov     r12, #CPU_RESETTABLE
        str     r12, [r1]
@@ -117,7 +117,7 @@ ENTRY(tegra20_cpu_shutdown)
        cpu_id  r3
        cmp     r3, r0
        beq     .
-       mov     pc, lr
+       ret     lr
 ENDPROC(tegra20_cpu_shutdown)
 #endif
 
@@ -164,7 +164,7 @@ ENTRY(tegra_pen_lock)
        cmpeq   r12, r0                 @ !turn == cpu?
        beq     1b                      @ while !turn == cpu && flag[!cpu] == 1
 
-       mov     pc, lr                  @ locked
+       ret     lr                      @ locked
 ENDPROC(tegra_pen_lock)
 
 ENTRY(tegra_pen_unlock)
@@ -176,7 +176,7 @@ ENTRY(tegra_pen_unlock)
        addne   r2, r3, #PMC_SCRATCH39
        mov     r12, #0
        str     r12, [r2]
-       mov     pc, lr
+       ret     lr
 ENDPROC(tegra_pen_unlock)
 
 /*
@@ -189,7 +189,7 @@ ENTRY(tegra20_cpu_clear_resettable)
        mov32   r1, TEGRA_PMC_VIRT + PMC_SCRATCH41
        mov     r12, #CPU_NOT_RESETTABLE
        str     r12, [r1]
-       mov     pc, lr
+       ret     lr
 ENDPROC(tegra20_cpu_clear_resettable)
 
 /*
@@ -202,7 +202,7 @@ ENTRY(tegra20_cpu_set_resettable_soon)
        mov32   r1, TEGRA_PMC_VIRT + PMC_SCRATCH41
        mov     r12, #CPU_RESETTABLE_SOON
        str     r12, [r1]
-       mov     pc, lr
+       ret     lr
 ENDPROC(tegra20_cpu_set_resettable_soon)
 
 /*
@@ -217,7 +217,7 @@ ENTRY(tegra20_cpu_is_resettable_soon)
        cmp     r12, #CPU_RESETTABLE_SOON
        moveq   r0, #1
        movne   r0, #0
-       mov     pc, lr
+       ret     lr
 ENDPROC(tegra20_cpu_is_resettable_soon)
 
 /*
@@ -239,7 +239,7 @@ ENTRY(tegra20_sleep_core_finish)
        mov32   r1, TEGRA_IRAM_LPx_RESUME_AREA
        add     r0, r0, r1
 
-       mov     pc, r3
+       ret     r3
 ENDPROC(tegra20_sleep_core_finish)
 
 /*
@@ -402,7 +402,7 @@ exit_selfrefresh_loop:
 
        mov32   r0, TEGRA_PMC_BASE
        ldr     r0, [r0, #PMC_SCRATCH41]
-       mov     pc, r0                  @ jump to tegra_resume
+       ret     r0                      @ jump to tegra_resume
 ENDPROC(tegra20_lp1_reset)
 
 /*
@@ -455,7 +455,7 @@ tegra20_switch_cpu_to_clk32k:
        mov     r0, #0  /* brust policy = 32KHz */
        str     r0, [r5, #CLK_RESET_SCLK_BURST]
 
-       mov     pc, lr
+       ret     lr
 
 /*
  * tegra20_enter_sleep
@@ -535,7 +535,7 @@ padsave_done:
        adr     r2, tegra20_sclk_save
        str     r0, [r2]
        dsb
-       mov     pc, lr
+       ret     lr
 
 tegra20_sdram_pad_address:
        .word   TEGRA_APB_MISC_BASE + APB_MISC_XM2CFGCPADCTRL
diff --git a/arch/arm/mach-tegra/sleep-tegra30.S 
b/arch/arm/mach-tegra/sleep-tegra30.S
index b16d4a57fa59..09cad9b071de 100644
--- a/arch/arm/mach-tegra/sleep-tegra30.S
+++ b/arch/arm/mach-tegra/sleep-tegra30.S
@@ -142,7 +142,7 @@ ENTRY(tegra30_hotplug_shutdown)
        /* Powergate this CPU */
        mov     r0, #TEGRA30_POWER_HOTPLUG_SHUTDOWN
        bl      tegra30_cpu_shutdown
-       mov     pc, lr                  @ should never get here
+       ret     lr                      @ should never get here
 ENDPROC(tegra30_hotplug_shutdown)
 
 /*
@@ -161,7 +161,7 @@ ENTRY(tegra30_cpu_shutdown)
        bne     _no_cpu0_chk    @ It's not Tegra30
 
        cmp     r3, #0
-       moveq   pc, lr          @ Must never be called for CPU 0
+       reteq   lr              @ Must never be called for CPU 0
 _no_cpu0_chk:
 
        ldr     r12, =TEGRA_FLOW_CTRL_VIRT
@@ -266,7 +266,7 @@ ENTRY(tegra30_sleep_core_finish)
        mov32   r1, TEGRA_IRAM_LPx_RESUME_AREA
        add     r0, r0, r1
 
-       mov     pc, r3
+       ret     r3
 ENDPROC(tegra30_sleep_core_finish)
 
 /*
@@ -285,7 +285,7 @@ ENTRY(tegra30_sleep_cpu_secondary_finish)
        mov     r0, #0                          @ power mode flags (!hotplug)
        bl      tegra30_cpu_shutdown
        mov     r0, #1                          @ never return here
-       mov     pc, r7
+       ret     r7
 ENDPROC(tegra30_sleep_cpu_secondary_finish)
 
 /*
@@ -529,7 +529,7 @@ __no_dual_emc_chanl:
 
        mov32   r0, TEGRA_PMC_BASE
        ldr     r0, [r0, #PMC_SCRATCH41]
-       mov     pc, r0                  @ jump to tegra_resume
+       ret     r0                      @ jump to tegra_resume
 ENDPROC(tegra30_lp1_reset)
 
        .align  L1_CACHE_SHIFT
@@ -659,7 +659,7 @@ _no_pll_in_iddq:
        mov     r0, #0  /* brust policy = 32KHz */
        str     r0, [r5, #CLK_RESET_SCLK_BURST]
 
-       mov     pc, lr
+       ret     lr
 
 /*
  * tegra30_enter_sleep
@@ -819,7 +819,7 @@ pmc_io_dpd_skip:
 
        dsb
 
-       mov     pc, lr
+       ret     lr
 
        .ltorg
 /* dummy symbol for end of IRAM */
diff --git a/arch/arm/mach-tegra/sleep.S b/arch/arm/mach-tegra/sleep.S
index 8d06213fbc47..f024a5109e8e 100644
--- a/arch/arm/mach-tegra/sleep.S
+++ b/arch/arm/mach-tegra/sleep.S
@@ -87,7 +87,7 @@ ENTRY(tegra_init_l2_for_a15)
        mcrne   p15, 0x1, r0, c9, c0, 2
 _exit_init_l2_a15:
 
-       mov     pc, lr
+       ret     lr
 ENDPROC(tegra_init_l2_for_a15)
 
 /*
@@ -111,7 +111,7 @@ ENTRY(tegra_sleep_cpu_finish)
        add     r3, r3, r0
        mov     r0, r1
 
-       mov     pc, r3
+       ret     r3
 ENDPROC(tegra_sleep_cpu_finish)
 
 /*
@@ -139,7 +139,7 @@ ENTRY(tegra_shut_off_mmu)
        moveq   r3, #0
        streq   r3, [r2, #L2X0_CTRL]
 #endif
-       mov     pc, r0
+       ret     r0
 ENDPROC(tegra_shut_off_mmu)
        .popsection
 
@@ -156,6 +156,6 @@ ENTRY(tegra_switch_cpu_to_pllp)
        str     r0, [r5, #CLK_RESET_CCLK_BURST]
        mov     r0, #0
        str     r0, [r5, #CLK_RESET_CCLK_DIVIDER]
-       mov     pc, lr
+       ret     lr
 ENDPROC(tegra_switch_cpu_to_pllp)
 #endif
diff --git a/arch/arm/mm/cache-fa.S b/arch/arm/mm/cache-fa.S
index e505befe51b5..2f0c58836ae7 100644
--- a/arch/arm/mm/cache-fa.S
+++ b/arch/arm/mm/cache-fa.S
@@ -15,6 +15,7 @@
  */
 #include <linux/linkage.h>
 #include <linux/init.h>
+#include <asm/assembler.h>
 #include <asm/memory.h>
 #include <asm/page.h>
 
@@ -45,7 +46,7 @@
 ENTRY(fa_flush_icache_all)
        mov     r0, #0
        mcr     p15, 0, r0, c7, c5, 0           @ invalidate I cache
-       mov     pc, lr
+       ret     lr
 ENDPROC(fa_flush_icache_all)
 
 /*
@@ -71,7 +72,7 @@ __flush_whole_cache:
        mcrne   p15, 0, ip, c7, c5, 6           @ invalidate BTB
        mcrne   p15, 0, ip, c7, c10, 4          @ drain write buffer
        mcrne   p15, 0, ip, c7, c5, 4           @ prefetch flush
-       mov     pc, lr
+       ret     lr
 
 /*
  *     flush_user_cache_range(start, end, flags)
@@ -99,7 +100,7 @@ ENTRY(fa_flush_user_cache_range)
        mcrne   p15, 0, ip, c7, c5, 6           @ invalidate BTB
        mcrne   p15, 0, ip, c7, c10, 4          @ data write barrier
        mcrne   p15, 0, ip, c7, c5, 4           @ prefetch flush
-       mov     pc, lr
+       ret     lr
 
 /*
  *     coherent_kern_range(start, end)
@@ -135,7 +136,7 @@ ENTRY(fa_coherent_user_range)
        mcr     p15, 0, r0, c7, c5, 6           @ invalidate BTB
        mcr     p15, 0, r0, c7, c10, 4          @ drain write buffer
        mcr     p15, 0, r0, c7, c5, 4           @ prefetch flush
-       mov     pc, lr
+       ret     lr
 
 /*
  *     flush_kern_dcache_area(void *addr, size_t size)
@@ -155,7 +156,7 @@ ENTRY(fa_flush_kern_dcache_area)
        mov     r0, #0
        mcr     p15, 0, r0, c7, c5, 0           @ invalidate I cache
        mcr     p15, 0, r0, c7, c10, 4          @ drain write buffer
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_inv_range(start, end)
@@ -181,7 +182,7 @@ fa_dma_inv_range:
        blo     1b
        mov     r0, #0
        mcr     p15, 0, r0, c7, c10, 4          @ drain write buffer
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_clean_range(start, end)
@@ -199,7 +200,7 @@ fa_dma_clean_range:
        blo     1b
        mov     r0, #0  
        mcr     p15, 0, r0, c7, c10, 4          @ drain write buffer
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_flush_range(start,end)
@@ -214,7 +215,7 @@ ENTRY(fa_dma_flush_range)
        blo     1b
        mov     r0, #0  
        mcr     p15, 0, r0, c7, c10, 4          @ drain write buffer
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_map_area(start, size, dir)
@@ -237,7 +238,7 @@ ENDPROC(fa_dma_map_area)
  *     - dir   - DMA direction
  */
 ENTRY(fa_dma_unmap_area)
-       mov     pc, lr
+       ret     lr
 ENDPROC(fa_dma_unmap_area)
 
        .globl  fa_flush_kern_cache_louis
diff --git a/arch/arm/mm/cache-nop.S b/arch/arm/mm/cache-nop.S
index 8e12ddca0031..f1cc9861031f 100644
--- a/arch/arm/mm/cache-nop.S
+++ b/arch/arm/mm/cache-nop.S
@@ -5,11 +5,12 @@
  */
 #include <linux/linkage.h>
 #include <linux/init.h>
+#include <asm/assembler.h>
 
 #include "proc-macros.S"
 
 ENTRY(nop_flush_icache_all)
-       mov     pc, lr
+       ret     lr
 ENDPROC(nop_flush_icache_all)
 
        .globl nop_flush_kern_cache_all
@@ -29,7 +30,7 @@ ENDPROC(nop_flush_icache_all)
 
 ENTRY(nop_coherent_user_range)
        mov     r0, 0
-       mov     pc, lr
+       ret     lr
 ENDPROC(nop_coherent_user_range)
 
        .globl nop_flush_kern_dcache_area
diff --git a/arch/arm/mm/cache-v4.S b/arch/arm/mm/cache-v4.S
index a7ba68f59f0c..91e3adf155cb 100644
--- a/arch/arm/mm/cache-v4.S
+++ b/arch/arm/mm/cache-v4.S
@@ -9,6 +9,7 @@
  */
 #include <linux/linkage.h>
 #include <linux/init.h>
+#include <asm/assembler.h>
 #include <asm/page.h>
 #include "proc-macros.S"
 
@@ -18,7 +19,7 @@
  *     Unconditionally clean and invalidate the entire icache.
  */
 ENTRY(v4_flush_icache_all)
-       mov     pc, lr
+       ret     lr
 ENDPROC(v4_flush_icache_all)
 
 /*
@@ -40,7 +41,7 @@ ENTRY(v4_flush_kern_cache_all)
 #ifdef CONFIG_CPU_CP15
        mov     r0, #0
        mcr     p15, 0, r0, c7, c7, 0           @ flush ID cache
-       mov     pc, lr
+       ret     lr
 #else
        /* FALLTHROUGH */
 #endif
@@ -59,7 +60,7 @@ ENTRY(v4_flush_user_cache_range)
 #ifdef CONFIG_CPU_CP15
        mov     ip, #0
        mcr     p15, 0, ip, c7, c7, 0           @ flush ID cache
-       mov     pc, lr
+       ret     lr
 #else
        /* FALLTHROUGH */
 #endif
@@ -89,7 +90,7 @@ ENTRY(v4_coherent_kern_range)
  */
 ENTRY(v4_coherent_user_range)
        mov     r0, #0
-       mov     pc, lr
+       ret     lr
 
 /*
  *     flush_kern_dcache_area(void *addr, size_t size)
@@ -116,7 +117,7 @@ ENTRY(v4_dma_flush_range)
        mov     r0, #0
        mcr     p15, 0, r0, c7, c7, 0           @ flush ID cache
 #endif
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_unmap_area(start, size, dir)
@@ -136,7 +137,7 @@ ENTRY(v4_dma_unmap_area)
  *     - dir   - DMA direction
  */
 ENTRY(v4_dma_map_area)
-       mov     pc, lr
+       ret     lr
 ENDPROC(v4_dma_unmap_area)
 ENDPROC(v4_dma_map_area)
 
diff --git a/arch/arm/mm/cache-v4wb.S b/arch/arm/mm/cache-v4wb.S
index cd4945321407..2522f8c8fbb1 100644
--- a/arch/arm/mm/cache-v4wb.S
+++ b/arch/arm/mm/cache-v4wb.S
@@ -9,6 +9,7 @@
  */
 #include <linux/linkage.h>
 #include <linux/init.h>
+#include <asm/assembler.h>
 #include <asm/memory.h>
 #include <asm/page.h>
 #include "proc-macros.S"
@@ -58,7 +59,7 @@ flush_base:
 ENTRY(v4wb_flush_icache_all)
        mov     r0, #0
        mcr     p15, 0, r0, c7, c5, 0           @ invalidate I cache
-       mov     pc, lr
+       ret     lr
 ENDPROC(v4wb_flush_icache_all)
 
 /*
@@ -94,7 +95,7 @@ __flush_whole_cache:
        blo     1b
 #endif
        mcr     p15, 0, ip, c7, c10, 4          @ drain write buffer
-       mov     pc, lr
+       ret     lr
 
 /*
  *     flush_user_cache_range(start, end, flags)
@@ -122,7 +123,7 @@ ENTRY(v4wb_flush_user_cache_range)
        blo     1b
        tst     r2, #VM_EXEC
        mcrne   p15, 0, ip, c7, c10, 4          @ drain write buffer
-       mov     pc, lr
+       ret     lr
 
 /*
  *     flush_kern_dcache_area(void *addr, size_t size)
@@ -170,7 +171,7 @@ ENTRY(v4wb_coherent_user_range)
        mov     r0, #0
        mcr     p15, 0, r0, c7, c5, 0           @ invalidate I cache
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 
 /*
@@ -195,7 +196,7 @@ v4wb_dma_inv_range:
        cmp     r0, r1
        blo     1b
        mcr     p15, 0, r0, c7, c10, 4          @ drain write buffer
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_clean_range(start, end)
@@ -212,7 +213,7 @@ v4wb_dma_clean_range:
        cmp     r0, r1
        blo     1b
        mcr     p15, 0, r0, c7, c10, 4          @ drain write buffer
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_flush_range(start, end)
@@ -248,7 +249,7 @@ ENDPROC(v4wb_dma_map_area)
  *     - dir   - DMA direction
  */
 ENTRY(v4wb_dma_unmap_area)
-       mov     pc, lr
+       ret     lr
 ENDPROC(v4wb_dma_unmap_area)
 
        .globl  v4wb_flush_kern_cache_louis
diff --git a/arch/arm/mm/cache-v4wt.S b/arch/arm/mm/cache-v4wt.S
index 11e5e5838bc5..a0982ce49007 100644
--- a/arch/arm/mm/cache-v4wt.S
+++ b/arch/arm/mm/cache-v4wt.S
@@ -13,6 +13,7 @@
  */
 #include <linux/linkage.h>
 #include <linux/init.h>
+#include <asm/assembler.h>
 #include <asm/page.h>
 #include "proc-macros.S"
 
@@ -48,7 +49,7 @@
 ENTRY(v4wt_flush_icache_all)
        mov     r0, #0
        mcr     p15, 0, r0, c7, c5, 0           @ invalidate I cache
-       mov     pc, lr
+       ret     lr
 ENDPROC(v4wt_flush_icache_all)
 
 /*
@@ -71,7 +72,7 @@ __flush_whole_cache:
        tst     r2, #VM_EXEC
        mcrne   p15, 0, ip, c7, c5, 0           @ invalidate I cache
        mcr     p15, 0, ip, c7, c6, 0           @ invalidate D cache
-       mov     pc, lr
+       ret     lr
 
 /*
  *     flush_user_cache_range(start, end, flags)
@@ -94,7 +95,7 @@ ENTRY(v4wt_flush_user_cache_range)
        add     r0, r0, #CACHE_DLINESIZE
        cmp     r0, r1
        blo     1b
-       mov     pc, lr
+       ret     lr
 
 /*
  *     coherent_kern_range(start, end)
@@ -126,7 +127,7 @@ ENTRY(v4wt_coherent_user_range)
        cmp     r0, r1
        blo     1b
        mov     r0, #0
-       mov     pc, lr
+       ret     lr
 
 /*
  *     flush_kern_dcache_area(void *addr, size_t size)
@@ -160,7 +161,7 @@ v4wt_dma_inv_range:
        add     r0, r0, #CACHE_DLINESIZE
        cmp     r0, r1
        blo     1b
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_flush_range(start, end)
@@ -192,7 +193,7 @@ ENTRY(v4wt_dma_unmap_area)
  *     - dir   - DMA direction
  */
 ENTRY(v4wt_dma_map_area)
-       mov     pc, lr
+       ret     lr
 ENDPROC(v4wt_dma_unmap_area)
 ENDPROC(v4wt_dma_map_area)
 
diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S
index d8fd4d4bd3d4..24659952c278 100644
--- a/arch/arm/mm/cache-v6.S
+++ b/arch/arm/mm/cache-v6.S
@@ -51,7 +51,7 @@ ENTRY(v6_flush_icache_all)
 #else
        mcr     p15, 0, r0, c7, c5, 0           @ invalidate I-cache
 #endif
-       mov     pc, lr
+       ret     lr
 ENDPROC(v6_flush_icache_all)
 
 /*
@@ -73,7 +73,7 @@ ENTRY(v6_flush_kern_cache_all)
 #else
        mcr     p15, 0, r0, c7, c15, 0          @ Cache clean+invalidate
 #endif
-       mov     pc, lr
+       ret     lr
 
 /*
  *     v6_flush_cache_all()
@@ -98,7 +98,7 @@ ENTRY(v6_flush_user_cache_all)
  *     - we have a VIPT cache.
  */
 ENTRY(v6_flush_user_cache_range)
-       mov     pc, lr
+       ret     lr
 
 /*
  *     v6_coherent_kern_range(start,end)
@@ -150,7 +150,7 @@ ENTRY(v6_coherent_user_range)
 #else
        mcr     p15, 0, r0, c7, c5, 6           @ invalidate BTB
 #endif
-       mov     pc, lr
+       ret     lr
 
 /*
  * Fault handling for the cache operation above. If the virtual address in r0
@@ -158,7 +158,7 @@ ENTRY(v6_coherent_user_range)
  */
 9001:
        mov     r0, #-EFAULT
-       mov     pc, lr
+       ret     lr
  UNWIND(.fnend         )
 ENDPROC(v6_coherent_user_range)
 ENDPROC(v6_coherent_kern_range)
@@ -188,7 +188,7 @@ ENTRY(v6_flush_kern_dcache_area)
        mov     r0, #0
        mcr     p15, 0, r0, c7, c10, 4
 #endif
-       mov     pc, lr
+       ret     lr
 
 
 /*
@@ -239,7 +239,7 @@ v6_dma_inv_range:
        blo     1b
        mov     r0, #0
        mcr     p15, 0, r0, c7, c10, 4          @ drain write buffer
-       mov     pc, lr
+       ret     lr
 
 /*
  *     v6_dma_clean_range(start,end)
@@ -262,7 +262,7 @@ v6_dma_clean_range:
        blo     1b
        mov     r0, #0
        mcr     p15, 0, r0, c7, c10, 4          @ drain write buffer
-       mov     pc, lr
+       ret     lr
 
 /*
  *     v6_dma_flush_range(start,end)
@@ -290,7 +290,7 @@ ENTRY(v6_dma_flush_range)
        blo     1b
        mov     r0, #0
        mcr     p15, 0, r0, c7, c10, 4          @ drain write buffer
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_map_area(start, size, dir)
@@ -323,7 +323,7 @@ ENTRY(v6_dma_unmap_area)
        teq     r2, #DMA_TO_DEVICE
        bne     v6_dma_inv_range
 #endif
-       mov     pc, lr
+       ret     lr
 ENDPROC(v6_dma_unmap_area)
 
        .globl  v6_flush_kern_cache_louis
diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index 615c99e38ba1..b966656d2c2d 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -61,7 +61,7 @@ ENTRY(v7_invalidate_l1)
        bgt     1b
        dsb     st
        isb
-       mov     pc, lr
+       ret     lr
 ENDPROC(v7_invalidate_l1)
 
 /*
@@ -76,7 +76,7 @@ ENTRY(v7_flush_icache_all)
        mov     r0, #0
        ALT_SMP(mcr     p15, 0, r0, c7, c1, 0)          @ invalidate I-cache 
inner shareable
        ALT_UP(mcr      p15, 0, r0, c7, c5, 0)          @ I+BTB cache invalidate
-       mov     pc, lr
+       ret     lr
 ENDPROC(v7_flush_icache_all)
 
  /*
@@ -94,7 +94,7 @@ ENTRY(v7_flush_dcache_louis)
        ALT_UP(ands     r3, r0, #(7 << 27))     @ extract LoUU from clidr
 #ifdef CONFIG_ARM_ERRATA_643719
        ALT_SMP(mrceq   p15, 0, r2, c0, c0, 0)  @ read main ID register
-       ALT_UP(moveq    pc, lr)                 @ LoUU is zero, so nothing to do
+       ALT_UP(reteq    lr)                     @ LoUU is zero, so nothing to do
        ldreq   r1, =0x410fc090                 @ ID of ARM Cortex A9 r0p?
        biceq   r2, r2, #0x0000000f             @ clear minor revision number
        teqeq   r2, r1                          @ test for errata affected core 
and if so...
@@ -102,7 +102,7 @@ ENTRY(v7_flush_dcache_louis)
 #endif
        ALT_SMP(mov     r3, r3, lsr #20)        @ r3 = LoUIS * 2
        ALT_UP(mov      r3, r3, lsr #26)        @ r3 = LoUU * 2
-       moveq   pc, lr                          @ return if level == 0
+       reteq   lr                              @ return if level == 0
        mov     r10, #0                         @ r10 (starting level) = 0
        b       flush_levels                    @ start flushing cache levels
 ENDPROC(v7_flush_dcache_louis)
@@ -168,7 +168,7 @@ finished:
        mcr     p15, 2, r10, c0, c0, 0          @ select current cache level in 
cssr
        dsb     st
        isb
-       mov     pc, lr
+       ret     lr
 ENDPROC(v7_flush_dcache_all)
 
 /*
@@ -191,7 +191,7 @@ ENTRY(v7_flush_kern_cache_all)
        ALT_UP(mcr      p15, 0, r0, c7, c5, 0)  @ I+BTB cache invalidate
  ARM(  ldmfd   sp!, {r4-r5, r7, r9-r11, lr}    )
  THUMB(        ldmfd   sp!, {r4-r7, r9-r11, lr}        )
-       mov     pc, lr
+       ret     lr
 ENDPROC(v7_flush_kern_cache_all)
 
  /*
@@ -209,7 +209,7 @@ ENTRY(v7_flush_kern_cache_louis)
        ALT_UP(mcr      p15, 0, r0, c7, c5, 0)  @ I+BTB cache invalidate
  ARM(  ldmfd   sp!, {r4-r5, r7, r9-r11, lr}    )
  THUMB(        ldmfd   sp!, {r4-r7, r9-r11, lr}        )
-       mov     pc, lr
+       ret     lr
 ENDPROC(v7_flush_kern_cache_louis)
 
 /*
@@ -235,7 +235,7 @@ ENTRY(v7_flush_user_cache_all)
  *     - we have a VIPT cache.
  */
 ENTRY(v7_flush_user_cache_range)
-       mov     pc, lr
+       ret     lr
 ENDPROC(v7_flush_user_cache_all)
 ENDPROC(v7_flush_user_cache_range)
 
@@ -296,7 +296,7 @@ ENTRY(v7_coherent_user_range)
        ALT_UP(mcr      p15, 0, r0, c7, c5, 6)  @ invalidate BTB
        dsb     ishst
        isb
-       mov     pc, lr
+       ret     lr
 
 /*
  * Fault handling for the cache operation above. If the virtual address in r0
@@ -307,7 +307,7 @@ ENTRY(v7_coherent_user_range)
        dsb
 #endif
        mov     r0, #-EFAULT
-       mov     pc, lr
+       ret     lr
  UNWIND(.fnend         )
 ENDPROC(v7_coherent_kern_range)
 ENDPROC(v7_coherent_user_range)
@@ -336,7 +336,7 @@ ENTRY(v7_flush_kern_dcache_area)
        cmp     r0, r1
        blo     1b
        dsb     st
-       mov     pc, lr
+       ret     lr
 ENDPROC(v7_flush_kern_dcache_area)
 
 /*
@@ -369,7 +369,7 @@ v7_dma_inv_range:
        cmp     r0, r1
        blo     1b
        dsb     st
-       mov     pc, lr
+       ret     lr
 ENDPROC(v7_dma_inv_range)
 
 /*
@@ -391,7 +391,7 @@ v7_dma_clean_range:
        cmp     r0, r1
        blo     1b
        dsb     st
-       mov     pc, lr
+       ret     lr
 ENDPROC(v7_dma_clean_range)
 
 /*
@@ -413,7 +413,7 @@ ENTRY(v7_dma_flush_range)
        cmp     r0, r1
        blo     1b
        dsb     st
-       mov     pc, lr
+       ret     lr
 ENDPROC(v7_dma_flush_range)
 
 /*
@@ -439,7 +439,7 @@ ENTRY(v7_dma_unmap_area)
        add     r1, r1, r0
        teq     r2, #DMA_TO_DEVICE
        bne     v7_dma_inv_range
-       mov     pc, lr
+       ret     lr
 ENDPROC(v7_dma_unmap_area)
 
        __INITDATA
diff --git a/arch/arm/mm/l2c-l2x0-resume.S b/arch/arm/mm/l2c-l2x0-resume.S
index 99b05f21a59a..fda415e4ca8f 100644
--- a/arch/arm/mm/l2c-l2x0-resume.S
+++ b/arch/arm/mm/l2c-l2x0-resume.S
@@ -6,6 +6,7 @@
  * This code can only be used to if you are running in the secure world.
  */
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 #include <asm/hardware/cache-l2x0.h>
 
        .text
@@ -27,7 +28,7 @@ ENTRY(l2c310_early_resume)
 
        @ Check that the address has been initialised
        teq     r1, #0
-       moveq   pc, lr
+       reteq   lr
 
        @ The prefetch and power control registers are revision dependent
        @ and can be written whether or not the L2 cache is enabled
@@ -41,7 +42,7 @@ ENTRY(l2c310_early_resume)
        @ Don't setup the L2 cache if it is already enabled
        ldr     r0, [r1, #L2X0_CTRL]
        tst     r0, #L2X0_CTRL_EN
-       movne   pc, lr
+       retne   lr
 
        str     r3, [r1, #L310_TAG_LATENCY_CTRL]
        str     r4, [r1, #L310_DATA_LATENCY_CTRL]
@@ -51,7 +52,7 @@ ENTRY(l2c310_early_resume)
        str     r2, [r1, #L2X0_AUX_CTRL]
        mov     r9, #L2X0_CTRL_EN
        str     r9, [r1, #L2X0_CTRL]
-       mov     pc, lr
+       ret     lr
 ENDPROC(l2c310_early_resume)
 
        .align
diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S
index d1a2d05971e0..86ee5d47ce3c 100644
--- a/arch/arm/mm/proc-arm1020.S
+++ b/arch/arm/mm/proc-arm1020.S
@@ -73,7 +73,7 @@
  * cpu_arm1020_proc_init()
  */
 ENTRY(cpu_arm1020_proc_init)
-       mov     pc, lr
+       ret     lr
 
 /*
  * cpu_arm1020_proc_fin()
@@ -83,7 +83,7 @@ ENTRY(cpu_arm1020_proc_fin)
        bic     r0, r0, #0x1000                 @ ...i............
        bic     r0, r0, #0x000e                 @ ............wca.
        mcr     p15, 0, r0, c1, c0, 0           @ disable caches
-       mov     pc, lr
+       ret     lr
 
 /*
  * cpu_arm1020_reset(loc)
@@ -107,7 +107,7 @@ ENTRY(cpu_arm1020_reset)
        bic     ip, ip, #0x000f                 @ ............wcam
        bic     ip, ip, #0x1100                 @ ...i...s........
        mcr     p15, 0, ip, c1, c0, 0           @ ctrl register
-       mov     pc, r0
+       ret     r0
 ENDPROC(cpu_arm1020_reset)
        .popsection
 
@@ -117,7 +117,7 @@ ENDPROC(cpu_arm1020_reset)
        .align  5
 ENTRY(cpu_arm1020_do_idle)
        mcr     p15, 0, r0, c7, c0, 4           @ Wait for interrupt
-       mov     pc, lr
+       ret     lr
 
 /* ================================= CACHE ================================ */
 
@@ -133,7 +133,7 @@ ENTRY(arm1020_flush_icache_all)
        mov     r0, #0
        mcr     p15, 0, r0, c7, c5, 0           @ invalidate I cache
 #endif
-       mov     pc, lr
+       ret     lr
 ENDPROC(arm1020_flush_icache_all)
 
 /*
@@ -169,7 +169,7 @@ __flush_whole_cache:
        mcrne   p15, 0, ip, c7, c5, 0           @ invalidate I cache
 #endif
        mcrne   p15, 0, ip, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     flush_user_cache_range(start, end, flags)
@@ -200,7 +200,7 @@ ENTRY(arm1020_flush_user_cache_range)
        mcrne   p15, 0, ip, c7, c5, 0           @ invalidate I cache
 #endif
        mcrne   p15, 0, ip, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     coherent_kern_range(start, end)
@@ -242,7 +242,7 @@ ENTRY(arm1020_coherent_user_range)
        blo     1b
        mcr     p15, 0, ip, c7, c10, 4          @ drain WB
        mov     r0, #0
-       mov     pc, lr
+       ret     lr
 
 /*
  *     flush_kern_dcache_area(void *addr, size_t size)
@@ -264,7 +264,7 @@ ENTRY(arm1020_flush_kern_dcache_area)
        blo     1b
 #endif
        mcr     p15, 0, ip, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_inv_range(start, end)
@@ -297,7 +297,7 @@ arm1020_dma_inv_range:
        blo     1b
 #endif
        mcr     p15, 0, ip, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_clean_range(start, end)
@@ -320,7 +320,7 @@ arm1020_dma_clean_range:
        blo     1b
 #endif
        mcr     p15, 0, ip, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_flush_range(start, end)
@@ -342,7 +342,7 @@ ENTRY(arm1020_dma_flush_range)
        blo     1b
 #endif
        mcr     p15, 0, ip, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_map_area(start, size, dir)
@@ -365,7 +365,7 @@ ENDPROC(arm1020_dma_map_area)
  *     - dir   - DMA direction
  */
 ENTRY(arm1020_dma_unmap_area)
-       mov     pc, lr
+       ret     lr
 ENDPROC(arm1020_dma_unmap_area)
 
        .globl  arm1020_flush_kern_cache_louis
@@ -384,7 +384,7 @@ ENTRY(cpu_arm1020_dcache_clean_area)
        subs    r1, r1, #CACHE_DLINESIZE
        bhi     1b
 #endif
-       mov     pc, lr
+       ret     lr
 
 /* =============================== PageTable ============================== */
 
@@ -423,7 +423,7 @@ ENTRY(cpu_arm1020_switch_mm)
        mcr     p15, 0, r0, c2, c0, 0           @ load page table pointer
        mcr     p15, 0, r1, c8, c7, 0           @ invalidate I & D TLBs
 #endif /* CONFIG_MMU */
-       mov     pc, lr
+       ret     lr
         
 /*
  * cpu_arm1020_set_pte(ptep, pte)
@@ -441,7 +441,7 @@ ENTRY(cpu_arm1020_set_pte_ext)
 #endif
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
 #endif /* CONFIG_MMU */
-       mov     pc, lr
+       ret     lr
 
        .type   __arm1020_setup, #function
 __arm1020_setup:
@@ -460,7 +460,7 @@ __arm1020_setup:
 #ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
        orr     r0, r0, #0x4000                 @ .R.. .... .... ....
 #endif
-       mov     pc, lr
+       ret     lr
        .size   __arm1020_setup, . - __arm1020_setup
 
        /*
diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S
index 9d89405c3d03..a6331d78601f 100644
--- a/arch/arm/mm/proc-arm1020e.S
+++ b/arch/arm/mm/proc-arm1020e.S
@@ -73,7 +73,7 @@
  * cpu_arm1020e_proc_init()
  */
 ENTRY(cpu_arm1020e_proc_init)
-       mov     pc, lr
+       ret     lr
 
 /*
  * cpu_arm1020e_proc_fin()
@@ -83,7 +83,7 @@ ENTRY(cpu_arm1020e_proc_fin)
        bic     r0, r0, #0x1000                 @ ...i............
        bic     r0, r0, #0x000e                 @ ............wca.
        mcr     p15, 0, r0, c1, c0, 0           @ disable caches
-       mov     pc, lr
+       ret     lr
 
 /*
  * cpu_arm1020e_reset(loc)
@@ -107,7 +107,7 @@ ENTRY(cpu_arm1020e_reset)
        bic     ip, ip, #0x000f                 @ ............wcam
        bic     ip, ip, #0x1100                 @ ...i...s........
        mcr     p15, 0, ip, c1, c0, 0           @ ctrl register
-       mov     pc, r0
+       ret     r0
 ENDPROC(cpu_arm1020e_reset)
        .popsection
 
@@ -117,7 +117,7 @@ ENDPROC(cpu_arm1020e_reset)
        .align  5
 ENTRY(cpu_arm1020e_do_idle)
        mcr     p15, 0, r0, c7, c0, 4           @ Wait for interrupt
-       mov     pc, lr
+       ret     lr
 
 /* ================================= CACHE ================================ */
 
@@ -133,7 +133,7 @@ ENTRY(arm1020e_flush_icache_all)
        mov     r0, #0
        mcr     p15, 0, r0, c7, c5, 0           @ invalidate I cache
 #endif
-       mov     pc, lr
+       ret     lr
 ENDPROC(arm1020e_flush_icache_all)
 
 /*
@@ -168,7 +168,7 @@ __flush_whole_cache:
        mcrne   p15, 0, ip, c7, c5, 0           @ invalidate I cache
 #endif
        mcrne   p15, 0, ip, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     flush_user_cache_range(start, end, flags)
@@ -197,7 +197,7 @@ ENTRY(arm1020e_flush_user_cache_range)
        mcrne   p15, 0, ip, c7, c5, 0           @ invalidate I cache
 #endif
        mcrne   p15, 0, ip, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     coherent_kern_range(start, end)
@@ -236,7 +236,7 @@ ENTRY(arm1020e_coherent_user_range)
        blo     1b
        mcr     p15, 0, ip, c7, c10, 4          @ drain WB
        mov     r0, #0
-       mov     pc, lr
+       ret     lr
 
 /*
  *     flush_kern_dcache_area(void *addr, size_t size)
@@ -257,7 +257,7 @@ ENTRY(arm1020e_flush_kern_dcache_area)
        blo     1b
 #endif
        mcr     p15, 0, ip, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_inv_range(start, end)
@@ -286,7 +286,7 @@ arm1020e_dma_inv_range:
        blo     1b
 #endif
        mcr     p15, 0, ip, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_clean_range(start, end)
@@ -308,7 +308,7 @@ arm1020e_dma_clean_range:
        blo     1b
 #endif
        mcr     p15, 0, ip, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_flush_range(start, end)
@@ -328,7 +328,7 @@ ENTRY(arm1020e_dma_flush_range)
        blo     1b
 #endif
        mcr     p15, 0, ip, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_map_area(start, size, dir)
@@ -351,7 +351,7 @@ ENDPROC(arm1020e_dma_map_area)
  *     - dir   - DMA direction
  */
 ENTRY(arm1020e_dma_unmap_area)
-       mov     pc, lr
+       ret     lr
 ENDPROC(arm1020e_dma_unmap_area)
 
        .globl  arm1020e_flush_kern_cache_louis
@@ -369,7 +369,7 @@ ENTRY(cpu_arm1020e_dcache_clean_area)
        subs    r1, r1, #CACHE_DLINESIZE
        bhi     1b
 #endif
-       mov     pc, lr
+       ret     lr
 
 /* =============================== PageTable ============================== */
 
@@ -407,7 +407,7 @@ ENTRY(cpu_arm1020e_switch_mm)
        mcr     p15, 0, r0, c2, c0, 0           @ load page table pointer
        mcr     p15, 0, r1, c8, c7, 0           @ invalidate I & D TLBs
 #endif
-       mov     pc, lr
+       ret     lr
         
 /*
  * cpu_arm1020e_set_pte(ptep, pte)
@@ -423,7 +423,7 @@ ENTRY(cpu_arm1020e_set_pte_ext)
        mcr     p15, 0, r0, c7, c10, 1          @ clean D entry
 #endif
 #endif /* CONFIG_MMU */
-       mov     pc, lr
+       ret     lr
 
        .type   __arm1020e_setup, #function
 __arm1020e_setup:
@@ -441,7 +441,7 @@ __arm1020e_setup:
 #ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
        orr     r0, r0, #0x4000                 @ .R.. .... .... ....
 #endif
-       mov     pc, lr
+       ret     lr
        .size   __arm1020e_setup, . - __arm1020e_setup
 
        /*
diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S
index 6f01a0ae3b30..a126b7a59928 100644
--- a/arch/arm/mm/proc-arm1022.S
+++ b/arch/arm/mm/proc-arm1022.S
@@ -62,7 +62,7 @@
  * cpu_arm1022_proc_init()
  */
 ENTRY(cpu_arm1022_proc_init)
-       mov     pc, lr
+       ret     lr
 
 /*
  * cpu_arm1022_proc_fin()
@@ -72,7 +72,7 @@ ENTRY(cpu_arm1022_proc_fin)
        bic     r0, r0, #0x1000                 @ ...i............
        bic     r0, r0, #0x000e                 @ ............wca.
        mcr     p15, 0, r0, c1, c0, 0           @ disable caches
-       mov     pc, lr
+       ret     lr
 
 /*
  * cpu_arm1022_reset(loc)
@@ -96,7 +96,7 @@ ENTRY(cpu_arm1022_reset)
        bic     ip, ip, #0x000f                 @ ............wcam
        bic     ip, ip, #0x1100                 @ ...i...s........
        mcr     p15, 0, ip, c1, c0, 0           @ ctrl register
-       mov     pc, r0
+       ret     r0
 ENDPROC(cpu_arm1022_reset)
        .popsection
 
@@ -106,7 +106,7 @@ ENDPROC(cpu_arm1022_reset)
        .align  5
 ENTRY(cpu_arm1022_do_idle)
        mcr     p15, 0, r0, c7, c0, 4           @ Wait for interrupt
-       mov     pc, lr
+       ret     lr
 
 /* ================================= CACHE ================================ */
 
@@ -122,7 +122,7 @@ ENTRY(arm1022_flush_icache_all)
        mov     r0, #0
        mcr     p15, 0, r0, c7, c5, 0           @ invalidate I cache
 #endif
-       mov     pc, lr
+       ret     lr
 ENDPROC(arm1022_flush_icache_all)
 
 /*
@@ -156,7 +156,7 @@ __flush_whole_cache:
        mcrne   p15, 0, ip, c7, c5, 0           @ invalidate I cache
 #endif
        mcrne   p15, 0, ip, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     flush_user_cache_range(start, end, flags)
@@ -185,7 +185,7 @@ ENTRY(arm1022_flush_user_cache_range)
        mcrne   p15, 0, ip, c7, c5, 0           @ invalidate I cache
 #endif
        mcrne   p15, 0, ip, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     coherent_kern_range(start, end)
@@ -225,7 +225,7 @@ ENTRY(arm1022_coherent_user_range)
        blo     1b
        mcr     p15, 0, ip, c7, c10, 4          @ drain WB
        mov     r0, #0
-       mov     pc, lr
+       ret     lr
 
 /*
  *     flush_kern_dcache_area(void *addr, size_t size)
@@ -246,7 +246,7 @@ ENTRY(arm1022_flush_kern_dcache_area)
        blo     1b
 #endif
        mcr     p15, 0, ip, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_inv_range(start, end)
@@ -275,7 +275,7 @@ arm1022_dma_inv_range:
        blo     1b
 #endif
        mcr     p15, 0, ip, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_clean_range(start, end)
@@ -297,7 +297,7 @@ arm1022_dma_clean_range:
        blo     1b
 #endif
        mcr     p15, 0, ip, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_flush_range(start, end)
@@ -317,7 +317,7 @@ ENTRY(arm1022_dma_flush_range)
        blo     1b
 #endif
        mcr     p15, 0, ip, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_map_area(start, size, dir)
@@ -340,7 +340,7 @@ ENDPROC(arm1022_dma_map_area)
  *     - dir   - DMA direction
  */
 ENTRY(arm1022_dma_unmap_area)
-       mov     pc, lr
+       ret     lr
 ENDPROC(arm1022_dma_unmap_area)
 
        .globl  arm1022_flush_kern_cache_louis
@@ -358,7 +358,7 @@ ENTRY(cpu_arm1022_dcache_clean_area)
        subs    r1, r1, #CACHE_DLINESIZE
        bhi     1b
 #endif
-       mov     pc, lr
+       ret     lr
 
 /* =============================== PageTable ============================== */
 
@@ -389,7 +389,7 @@ ENTRY(cpu_arm1022_switch_mm)
        mcr     p15, 0, r0, c2, c0, 0           @ load page table pointer
        mcr     p15, 0, r1, c8, c7, 0           @ invalidate I & D TLBs
 #endif
-       mov     pc, lr
+       ret     lr
         
 /*
  * cpu_arm1022_set_pte_ext(ptep, pte, ext)
@@ -405,7 +405,7 @@ ENTRY(cpu_arm1022_set_pte_ext)
        mcr     p15, 0, r0, c7, c10, 1          @ clean D entry
 #endif
 #endif /* CONFIG_MMU */
-       mov     pc, lr
+       ret     lr
 
        .type   __arm1022_setup, #function
 __arm1022_setup:
@@ -423,7 +423,7 @@ __arm1022_setup:
 #ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
        orr     r0, r0, #0x4000                 @ .R..............
 #endif
-       mov     pc, lr
+       ret     lr
        .size   __arm1022_setup, . - __arm1022_setup
 
        /*
diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S
index 4799a24b43e6..fc294067e977 100644
--- a/arch/arm/mm/proc-arm1026.S
+++ b/arch/arm/mm/proc-arm1026.S
@@ -62,7 +62,7 @@
  * cpu_arm1026_proc_init()
  */
 ENTRY(cpu_arm1026_proc_init)
-       mov     pc, lr
+       ret     lr
 
 /*
  * cpu_arm1026_proc_fin()
@@ -72,7 +72,7 @@ ENTRY(cpu_arm1026_proc_fin)
        bic     r0, r0, #0x1000                 @ ...i............
        bic     r0, r0, #0x000e                 @ ............wca.
        mcr     p15, 0, r0, c1, c0, 0           @ disable caches
-       mov     pc, lr
+       ret     lr
 
 /*
  * cpu_arm1026_reset(loc)
@@ -96,7 +96,7 @@ ENTRY(cpu_arm1026_reset)
        bic     ip, ip, #0x000f                 @ ............wcam
        bic     ip, ip, #0x1100                 @ ...i...s........
        mcr     p15, 0, ip, c1, c0, 0           @ ctrl register
-       mov     pc, r0
+       ret     r0
 ENDPROC(cpu_arm1026_reset)
        .popsection
 
@@ -106,7 +106,7 @@ ENDPROC(cpu_arm1026_reset)
        .align  5
 ENTRY(cpu_arm1026_do_idle)
        mcr     p15, 0, r0, c7, c0, 4           @ Wait for interrupt
-       mov     pc, lr
+       ret     lr
 
 /* ================================= CACHE ================================ */
 
@@ -122,7 +122,7 @@ ENTRY(arm1026_flush_icache_all)
        mov     r0, #0
        mcr     p15, 0, r0, c7, c5, 0           @ invalidate I cache
 #endif
-       mov     pc, lr
+       ret     lr
 ENDPROC(arm1026_flush_icache_all)
 
 /*
@@ -151,7 +151,7 @@ __flush_whole_cache:
        mcrne   p15, 0, ip, c7, c5, 0           @ invalidate I cache
 #endif
        mcrne   p15, 0, ip, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     flush_user_cache_range(start, end, flags)
@@ -180,7 +180,7 @@ ENTRY(arm1026_flush_user_cache_range)
        mcrne   p15, 0, ip, c7, c5, 0           @ invalidate I cache
 #endif
        mcrne   p15, 0, ip, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     coherent_kern_range(start, end)
@@ -219,7 +219,7 @@ ENTRY(arm1026_coherent_user_range)
        blo     1b
        mcr     p15, 0, ip, c7, c10, 4          @ drain WB
        mov     r0, #0
-       mov     pc, lr
+       ret     lr
 
 /*
  *     flush_kern_dcache_area(void *addr, size_t size)
@@ -240,7 +240,7 @@ ENTRY(arm1026_flush_kern_dcache_area)
        blo     1b
 #endif
        mcr     p15, 0, ip, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_inv_range(start, end)
@@ -269,7 +269,7 @@ arm1026_dma_inv_range:
        blo     1b
 #endif
        mcr     p15, 0, ip, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_clean_range(start, end)
@@ -291,7 +291,7 @@ arm1026_dma_clean_range:
        blo     1b
 #endif
        mcr     p15, 0, ip, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_flush_range(start, end)
@@ -311,7 +311,7 @@ ENTRY(arm1026_dma_flush_range)
        blo     1b
 #endif
        mcr     p15, 0, ip, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_map_area(start, size, dir)
@@ -334,7 +334,7 @@ ENDPROC(arm1026_dma_map_area)
  *     - dir   - DMA direction
  */
 ENTRY(arm1026_dma_unmap_area)
-       mov     pc, lr
+       ret     lr
 ENDPROC(arm1026_dma_unmap_area)
 
        .globl  arm1026_flush_kern_cache_louis
@@ -352,7 +352,7 @@ ENTRY(cpu_arm1026_dcache_clean_area)
        subs    r1, r1, #CACHE_DLINESIZE
        bhi     1b
 #endif
-       mov     pc, lr
+       ret     lr
 
 /* =============================== PageTable ============================== */
 
@@ -378,7 +378,7 @@ ENTRY(cpu_arm1026_switch_mm)
        mcr     p15, 0, r0, c2, c0, 0           @ load page table pointer
        mcr     p15, 0, r1, c8, c7, 0           @ invalidate I & D TLBs
 #endif
-       mov     pc, lr
+       ret     lr
         
 /*
  * cpu_arm1026_set_pte_ext(ptep, pte, ext)
@@ -394,7 +394,7 @@ ENTRY(cpu_arm1026_set_pte_ext)
        mcr     p15, 0, r0, c7, c10, 1          @ clean D entry
 #endif
 #endif /* CONFIG_MMU */
-       mov     pc, lr
+       ret     lr
 
        .type   __arm1026_setup, #function
 __arm1026_setup:
@@ -417,7 +417,7 @@ __arm1026_setup:
 #ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
        orr     r0, r0, #0x4000                 @ .R.. .... .... ....
 #endif
-       mov     pc, lr
+       ret     lr
        .size   __arm1026_setup, . - __arm1026_setup
 
        /*
diff --git a/arch/arm/mm/proc-arm720.S b/arch/arm/mm/proc-arm720.S
index d42c37f9f5bc..2baa66b3ac9b 100644
--- a/arch/arm/mm/proc-arm720.S
+++ b/arch/arm/mm/proc-arm720.S
@@ -51,14 +51,14 @@
  */
 ENTRY(cpu_arm720_dcache_clean_area)
 ENTRY(cpu_arm720_proc_init)
-               mov     pc, lr
+               ret     lr
 
 ENTRY(cpu_arm720_proc_fin)
                mrc     p15, 0, r0, c1, c0, 0
                bic     r0, r0, #0x1000                 @ ...i............
                bic     r0, r0, #0x000e                 @ ............wca.
                mcr     p15, 0, r0, c1, c0, 0           @ disable caches
-               mov     pc, lr
+               ret     lr
 
 /*
  * Function: arm720_proc_do_idle(void)
@@ -66,7 +66,7 @@ ENTRY(cpu_arm720_proc_fin)
  * Purpose : put the processor in proper idle mode
  */
 ENTRY(cpu_arm720_do_idle)
-               mov     pc, lr
+               ret     lr
 
 /*
  * Function: arm720_switch_mm(unsigned long pgd_phys)
@@ -81,7 +81,7 @@ ENTRY(cpu_arm720_switch_mm)
                mcr     p15, 0, r0, c2, c0, 0           @ update page table ptr
                mcr     p15, 0, r1, c8, c7, 0           @ flush TLB (v4)
 #endif
-               mov     pc, lr
+               ret     lr
 
 /*
  * Function: arm720_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext)
@@ -94,7 +94,7 @@ ENTRY(cpu_arm720_set_pte_ext)
 #ifdef CONFIG_MMU
        armv3_set_pte_ext wc_disable=0
 #endif
-       mov     pc, lr
+       ret     lr
 
 /*
  * Function: arm720_reset
@@ -112,7 +112,7 @@ ENTRY(cpu_arm720_reset)
                bic     ip, ip, #0x000f                 @ ............wcam
                bic     ip, ip, #0x2100                 @ ..v....s........
                mcr     p15, 0, ip, c1, c0, 0           @ ctrl register
-               mov     pc, r0
+               ret     r0
 ENDPROC(cpu_arm720_reset)
                .popsection
 
@@ -128,7 +128,7 @@ __arm710_setup:
        bic     r0, r0, r5
        ldr     r5, arm710_cr1_set
        orr     r0, r0, r5
-       mov     pc, lr                          @ __ret (head.S)
+       ret     lr                              @ __ret (head.S)
        .size   __arm710_setup, . - __arm710_setup
 
        /*
@@ -156,7 +156,7 @@ __arm720_setup:
        mrc     p15, 0, r0, c1, c0              @ get control register
        bic     r0, r0, r5
        orr     r0, r0, r6
-       mov     pc, lr                          @ __ret (head.S)
+       ret     lr                              @ __ret (head.S)
        .size   __arm720_setup, . - __arm720_setup
 
        /*
diff --git a/arch/arm/mm/proc-arm740.S b/arch/arm/mm/proc-arm740.S
index 9b0ae90cbf17..ac1ea6b3bce4 100644
--- a/arch/arm/mm/proc-arm740.S
+++ b/arch/arm/mm/proc-arm740.S
@@ -32,7 +32,7 @@ ENTRY(cpu_arm740_proc_init)
 ENTRY(cpu_arm740_do_idle)
 ENTRY(cpu_arm740_dcache_clean_area)
 ENTRY(cpu_arm740_switch_mm)
-       mov     pc, lr
+       ret     lr
 
 /*
  * cpu_arm740_proc_fin()
@@ -42,7 +42,7 @@ ENTRY(cpu_arm740_proc_fin)
        bic     r0, r0, #0x3f000000             @ bank/f/lock/s
        bic     r0, r0, #0x0000000c             @ w-buffer/cache
        mcr     p15, 0, r0, c1, c0, 0           @ disable caches
-       mov     pc, lr
+       ret     lr
 
 /*
  * cpu_arm740_reset(loc)
@@ -56,7 +56,7 @@ ENTRY(cpu_arm740_reset)
        mrc     p15, 0, ip, c1, c0, 0           @ get ctrl register
        bic     ip, ip, #0x0000000c             @ ............wc..
        mcr     p15, 0, ip, c1, c0, 0           @ ctrl register
-       mov     pc, r0
+       ret     r0
 ENDPROC(cpu_arm740_reset)
        .popsection
 
@@ -115,7 +115,7 @@ __arm740_setup:
                                                @ need some benchmark
        orr     r0, r0, #0x0000000d             @ MPU/Cache/WB
 
-       mov     pc, lr
+       ret     lr
 
        .size   __arm740_setup, . - __arm740_setup
 
diff --git a/arch/arm/mm/proc-arm7tdmi.S b/arch/arm/mm/proc-arm7tdmi.S
index f6cc3f63ce39..bf6ba4bc30ff 100644
--- a/arch/arm/mm/proc-arm7tdmi.S
+++ b/arch/arm/mm/proc-arm7tdmi.S
@@ -32,13 +32,13 @@ ENTRY(cpu_arm7tdmi_proc_init)
 ENTRY(cpu_arm7tdmi_do_idle)
 ENTRY(cpu_arm7tdmi_dcache_clean_area)
 ENTRY(cpu_arm7tdmi_switch_mm)
-               mov     pc, lr
+               ret     lr
 
 /*
  * cpu_arm7tdmi_proc_fin()
  */
 ENTRY(cpu_arm7tdmi_proc_fin)
-               mov     pc, lr
+               ret     lr
 
 /*
  * Function: cpu_arm7tdmi_reset(loc)
@@ -47,13 +47,13 @@ ENTRY(cpu_arm7tdmi_proc_fin)
  */
                .pushsection    .idmap.text, "ax"
 ENTRY(cpu_arm7tdmi_reset)
-               mov     pc, r0
+               ret     r0
 ENDPROC(cpu_arm7tdmi_reset)
                .popsection
 
                .type   __arm7tdmi_setup, #function
 __arm7tdmi_setup:
-               mov     pc, lr
+               ret     lr
                .size   __arm7tdmi_setup, . - __arm7tdmi_setup
 
                __INITDATA
diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S
index 549557df6d57..22bf8dde4f84 100644
--- a/arch/arm/mm/proc-arm920.S
+++ b/arch/arm/mm/proc-arm920.S
@@ -63,7 +63,7 @@
  * cpu_arm920_proc_init()
  */
 ENTRY(cpu_arm920_proc_init)
-       mov     pc, lr
+       ret     lr
 
 /*
  * cpu_arm920_proc_fin()
@@ -73,7 +73,7 @@ ENTRY(cpu_arm920_proc_fin)
        bic     r0, r0, #0x1000                 @ ...i............
        bic     r0, r0, #0x000e                 @ ............wca.
        mcr     p15, 0, r0, c1, c0, 0           @ disable caches
-       mov     pc, lr
+       ret     lr
 
 /*
  * cpu_arm920_reset(loc)
@@ -97,7 +97,7 @@ ENTRY(cpu_arm920_reset)
        bic     ip, ip, #0x000f                 @ ............wcam
        bic     ip, ip, #0x1100                 @ ...i...s........
        mcr     p15, 0, ip, c1, c0, 0           @ ctrl register
-       mov     pc, r0
+       ret     r0
 ENDPROC(cpu_arm920_reset)
        .popsection
 
@@ -107,7 +107,7 @@ ENDPROC(cpu_arm920_reset)
        .align  5
 ENTRY(cpu_arm920_do_idle)
        mcr     p15, 0, r0, c7, c0, 4           @ Wait for interrupt
-       mov     pc, lr
+       ret     lr
 
 
 #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
@@ -120,7 +120,7 @@ ENTRY(cpu_arm920_do_idle)
 ENTRY(arm920_flush_icache_all)
        mov     r0, #0
        mcr     p15, 0, r0, c7, c5, 0           @ invalidate I cache
-       mov     pc, lr
+       ret     lr
 ENDPROC(arm920_flush_icache_all)
 
 /*
@@ -151,7 +151,7 @@ __flush_whole_cache:
        tst     r2, #VM_EXEC
        mcrne   p15, 0, ip, c7, c5, 0           @ invalidate I cache
        mcrne   p15, 0, ip, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     flush_user_cache_range(start, end, flags)
@@ -177,7 +177,7 @@ ENTRY(arm920_flush_user_cache_range)
        blo     1b
        tst     r2, #VM_EXEC
        mcrne   p15, 0, ip, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     coherent_kern_range(start, end)
@@ -211,7 +211,7 @@ ENTRY(arm920_coherent_user_range)
        blo     1b
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
        mov     r0, #0
-       mov     pc, lr
+       ret     lr
 
 /*
  *     flush_kern_dcache_area(void *addr, size_t size)
@@ -231,7 +231,7 @@ ENTRY(arm920_flush_kern_dcache_area)
        mov     r0, #0
        mcr     p15, 0, r0, c7, c5, 0           @ invalidate I cache
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_inv_range(start, end)
@@ -257,7 +257,7 @@ arm920_dma_inv_range:
        cmp     r0, r1
        blo     1b
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_clean_range(start, end)
@@ -276,7 +276,7 @@ arm920_dma_clean_range:
        cmp     r0, r1
        blo     1b
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_flush_range(start, end)
@@ -293,7 +293,7 @@ ENTRY(arm920_dma_flush_range)
        cmp     r0, r1
        blo     1b
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_map_area(start, size, dir)
@@ -316,7 +316,7 @@ ENDPROC(arm920_dma_map_area)
  *     - dir   - DMA direction
  */
 ENTRY(arm920_dma_unmap_area)
-       mov     pc, lr
+       ret     lr
 ENDPROC(arm920_dma_unmap_area)
 
        .globl  arm920_flush_kern_cache_louis
@@ -332,7 +332,7 @@ ENTRY(cpu_arm920_dcache_clean_area)
        add     r0, r0, #CACHE_DLINESIZE
        subs    r1, r1, #CACHE_DLINESIZE
        bhi     1b
-       mov     pc, lr
+       ret     lr
 
 /* =============================== PageTable ============================== */
 
@@ -367,7 +367,7 @@ ENTRY(cpu_arm920_switch_mm)
        mcr     p15, 0, r0, c2, c0, 0           @ load page table pointer
        mcr     p15, 0, ip, c8, c7, 0           @ invalidate I & D TLBs
 #endif
-       mov     pc, lr
+       ret     lr
 
 /*
  * cpu_arm920_set_pte(ptep, pte, ext)
@@ -382,7 +382,7 @@ ENTRY(cpu_arm920_set_pte_ext)
        mcr     p15, 0, r0, c7, c10, 1          @ clean D entry
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
 #endif
-       mov     pc, lr
+       ret     lr
 
 /* Suspend/resume support: taken from arch/arm/plat-s3c24xx/sleep.S */
 .globl cpu_arm920_suspend_size
@@ -423,7 +423,7 @@ __arm920_setup:
        mrc     p15, 0, r0, c1, c0              @ get control register v4
        bic     r0, r0, r5
        orr     r0, r0, r6
-       mov     pc, lr
+       ret     lr
        .size   __arm920_setup, . - __arm920_setup
 
        /*
diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S
index 2a758b06c6f6..0c6d5ac5a6d4 100644
--- a/arch/arm/mm/proc-arm922.S
+++ b/arch/arm/mm/proc-arm922.S
@@ -65,7 +65,7 @@
  * cpu_arm922_proc_init()
  */
 ENTRY(cpu_arm922_proc_init)
-       mov     pc, lr
+       ret     lr
 
 /*
  * cpu_arm922_proc_fin()
@@ -75,7 +75,7 @@ ENTRY(cpu_arm922_proc_fin)
        bic     r0, r0, #0x1000                 @ ...i............
        bic     r0, r0, #0x000e                 @ ............wca.
        mcr     p15, 0, r0, c1, c0, 0           @ disable caches
-       mov     pc, lr
+       ret     lr
 
 /*
  * cpu_arm922_reset(loc)
@@ -99,7 +99,7 @@ ENTRY(cpu_arm922_reset)
        bic     ip, ip, #0x000f                 @ ............wcam
        bic     ip, ip, #0x1100                 @ ...i...s........
        mcr     p15, 0, ip, c1, c0, 0           @ ctrl register
-       mov     pc, r0
+       ret     r0
 ENDPROC(cpu_arm922_reset)
        .popsection
 
@@ -109,7 +109,7 @@ ENDPROC(cpu_arm922_reset)
        .align  5
 ENTRY(cpu_arm922_do_idle)
        mcr     p15, 0, r0, c7, c0, 4           @ Wait for interrupt
-       mov     pc, lr
+       ret     lr
 
 
 #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
@@ -122,7 +122,7 @@ ENTRY(cpu_arm922_do_idle)
 ENTRY(arm922_flush_icache_all)
        mov     r0, #0
        mcr     p15, 0, r0, c7, c5, 0           @ invalidate I cache
-       mov     pc, lr
+       ret     lr
 ENDPROC(arm922_flush_icache_all)
 
 /*
@@ -153,7 +153,7 @@ __flush_whole_cache:
        tst     r2, #VM_EXEC
        mcrne   p15, 0, ip, c7, c5, 0           @ invalidate I cache
        mcrne   p15, 0, ip, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     flush_user_cache_range(start, end, flags)
@@ -179,7 +179,7 @@ ENTRY(arm922_flush_user_cache_range)
        blo     1b
        tst     r2, #VM_EXEC
        mcrne   p15, 0, ip, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     coherent_kern_range(start, end)
@@ -213,7 +213,7 @@ ENTRY(arm922_coherent_user_range)
        blo     1b
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
        mov     r0, #0
-       mov     pc, lr
+       ret     lr
 
 /*
  *     flush_kern_dcache_area(void *addr, size_t size)
@@ -233,7 +233,7 @@ ENTRY(arm922_flush_kern_dcache_area)
        mov     r0, #0
        mcr     p15, 0, r0, c7, c5, 0           @ invalidate I cache
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_inv_range(start, end)
@@ -259,7 +259,7 @@ arm922_dma_inv_range:
        cmp     r0, r1
        blo     1b
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_clean_range(start, end)
@@ -278,7 +278,7 @@ arm922_dma_clean_range:
        cmp     r0, r1
        blo     1b
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_flush_range(start, end)
@@ -295,7 +295,7 @@ ENTRY(arm922_dma_flush_range)
        cmp     r0, r1
        blo     1b
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_map_area(start, size, dir)
@@ -318,7 +318,7 @@ ENDPROC(arm922_dma_map_area)
  *     - dir   - DMA direction
  */
 ENTRY(arm922_dma_unmap_area)
-       mov     pc, lr
+       ret     lr
 ENDPROC(arm922_dma_unmap_area)
 
        .globl  arm922_flush_kern_cache_louis
@@ -336,7 +336,7 @@ ENTRY(cpu_arm922_dcache_clean_area)
        subs    r1, r1, #CACHE_DLINESIZE
        bhi     1b
 #endif
-       mov     pc, lr
+       ret     lr
 
 /* =============================== PageTable ============================== */
 
@@ -371,7 +371,7 @@ ENTRY(cpu_arm922_switch_mm)
        mcr     p15, 0, r0, c2, c0, 0           @ load page table pointer
        mcr     p15, 0, ip, c8, c7, 0           @ invalidate I & D TLBs
 #endif
-       mov     pc, lr
+       ret     lr
 
 /*
  * cpu_arm922_set_pte_ext(ptep, pte, ext)
@@ -386,7 +386,7 @@ ENTRY(cpu_arm922_set_pte_ext)
        mcr     p15, 0, r0, c7, c10, 1          @ clean D entry
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
 #endif /* CONFIG_MMU */
-       mov     pc, lr
+       ret     lr
 
        .type   __arm922_setup, #function
 __arm922_setup:
@@ -401,7 +401,7 @@ __arm922_setup:
        mrc     p15, 0, r0, c1, c0              @ get control register v4
        bic     r0, r0, r5
        orr     r0, r0, r6
-       mov     pc, lr
+       ret     lr
        .size   __arm922_setup, . - __arm922_setup
 
        /*
diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S
index ba0d58e1a2a2..c32d073282ea 100644
--- a/arch/arm/mm/proc-arm925.S
+++ b/arch/arm/mm/proc-arm925.S
@@ -86,7 +86,7 @@
  * cpu_arm925_proc_init()
  */
 ENTRY(cpu_arm925_proc_init)
-       mov     pc, lr
+       ret     lr
 
 /*
  * cpu_arm925_proc_fin()
@@ -96,7 +96,7 @@ ENTRY(cpu_arm925_proc_fin)
        bic     r0, r0, #0x1000                 @ ...i............
        bic     r0, r0, #0x000e                 @ ............wca.
        mcr     p15, 0, r0, c1, c0, 0           @ disable caches
-       mov     pc, lr
+       ret     lr
 
 /*
  * cpu_arm925_reset(loc)
@@ -129,7 +129,7 @@ ENDPROC(cpu_arm925_reset)
        bic     ip, ip, #0x000f                 @ ............wcam
        bic     ip, ip, #0x1100                 @ ...i...s........
        mcr     p15, 0, ip, c1, c0, 0           @ ctrl register
-       mov     pc, r0
+       ret     r0
 
 /*
  * cpu_arm925_do_idle()
@@ -145,7 +145,7 @@ ENTRY(cpu_arm925_do_idle)
        mcr     p15, 0, r2, c1, c0, 0           @ Disable I cache
        mcr     p15, 0, r0, c7, c0, 4           @ Wait for interrupt
        mcr     p15, 0, r1, c1, c0, 0           @ Restore ICache enable
-       mov     pc, lr
+       ret     lr
 
 /*
  *     flush_icache_all()
@@ -155,7 +155,7 @@ ENTRY(cpu_arm925_do_idle)
 ENTRY(arm925_flush_icache_all)
        mov     r0, #0
        mcr     p15, 0, r0, c7, c5, 0           @ invalidate I cache
-       mov     pc, lr
+       ret     lr
 ENDPROC(arm925_flush_icache_all)
 
 /*
@@ -188,7 +188,7 @@ __flush_whole_cache:
        tst     r2, #VM_EXEC
        mcrne   p15, 0, ip, c7, c5, 0           @ invalidate I cache
        mcrne   p15, 0, ip, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     flush_user_cache_range(start, end, flags)
@@ -225,7 +225,7 @@ ENTRY(arm925_flush_user_cache_range)
        blo     1b
        tst     r2, #VM_EXEC
        mcrne   p15, 0, ip, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     coherent_kern_range(start, end)
@@ -259,7 +259,7 @@ ENTRY(arm925_coherent_user_range)
        blo     1b
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
        mov     r0, #0
-       mov     pc, lr
+       ret     lr
 
 /*
  *     flush_kern_dcache_area(void *addr, size_t size)
@@ -279,7 +279,7 @@ ENTRY(arm925_flush_kern_dcache_area)
        mov     r0, #0
        mcr     p15, 0, r0, c7, c5, 0           @ invalidate I cache
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_inv_range(start, end)
@@ -307,7 +307,7 @@ arm925_dma_inv_range:
        cmp     r0, r1
        blo     1b
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_clean_range(start, end)
@@ -328,7 +328,7 @@ arm925_dma_clean_range:
        blo     1b
 #endif
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_flush_range(start, end)
@@ -350,7 +350,7 @@ ENTRY(arm925_dma_flush_range)
        cmp     r0, r1
        blo     1b
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_map_area(start, size, dir)
@@ -373,7 +373,7 @@ ENDPROC(arm925_dma_map_area)
  *     - dir   - DMA direction
  */
 ENTRY(arm925_dma_unmap_area)
-       mov     pc, lr
+       ret     lr
 ENDPROC(arm925_dma_unmap_area)
 
        .globl  arm925_flush_kern_cache_louis
@@ -390,7 +390,7 @@ ENTRY(cpu_arm925_dcache_clean_area)
        bhi     1b
 #endif
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /* =============================== PageTable ============================== */
 
@@ -419,7 +419,7 @@ ENTRY(cpu_arm925_switch_mm)
        mcr     p15, 0, r0, c2, c0, 0           @ load page table pointer
        mcr     p15, 0, ip, c8, c7, 0           @ invalidate I & D TLBs
 #endif
-       mov     pc, lr
+       ret     lr
 
 /*
  * cpu_arm925_set_pte_ext(ptep, pte, ext)
@@ -436,7 +436,7 @@ ENTRY(cpu_arm925_set_pte_ext)
 #endif
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
 #endif /* CONFIG_MMU */
-       mov     pc, lr
+       ret     lr
 
        .type   __arm925_setup, #function
 __arm925_setup:
@@ -469,7 +469,7 @@ __arm925_setup:
 #ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
        orr     r0, r0, #0x4000                 @ .1.. .... .... ....
 #endif
-       mov     pc, lr
+       ret     lr
        .size   __arm925_setup, . - __arm925_setup
 
        /*
diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S
index 0f098f407c9f..252b2503038d 100644
--- a/arch/arm/mm/proc-arm926.S
+++ b/arch/arm/mm/proc-arm926.S
@@ -55,7 +55,7 @@
  * cpu_arm926_proc_init()
  */
 ENTRY(cpu_arm926_proc_init)
-       mov     pc, lr
+       ret     lr
 
 /*
  * cpu_arm926_proc_fin()
@@ -65,7 +65,7 @@ ENTRY(cpu_arm926_proc_fin)
        bic     r0, r0, #0x1000                 @ ...i............
        bic     r0, r0, #0x000e                 @ ............wca.
        mcr     p15, 0, r0, c1, c0, 0           @ disable caches
-       mov     pc, lr
+       ret     lr
 
 /*
  * cpu_arm926_reset(loc)
@@ -89,7 +89,7 @@ ENTRY(cpu_arm926_reset)
        bic     ip, ip, #0x000f                 @ ............wcam
        bic     ip, ip, #0x1100                 @ ...i...s........
        mcr     p15, 0, ip, c1, c0, 0           @ ctrl register
-       mov     pc, r0
+       ret     r0
 ENDPROC(cpu_arm926_reset)
        .popsection
 
@@ -111,7 +111,7 @@ ENTRY(cpu_arm926_do_idle)
        mcr     p15, 0, r0, c7, c0, 4           @ Wait for interrupt
        mcr     p15, 0, r1, c1, c0, 0           @ Restore ICache enable
        msr     cpsr_c, r3                      @ Restore FIQ state
-       mov     pc, lr
+       ret     lr
 
 /*
  *     flush_icache_all()
@@ -121,7 +121,7 @@ ENTRY(cpu_arm926_do_idle)
 ENTRY(arm926_flush_icache_all)
        mov     r0, #0
        mcr     p15, 0, r0, c7, c5, 0           @ invalidate I cache
-       mov     pc, lr
+       ret     lr
 ENDPROC(arm926_flush_icache_all)
 
 /*
@@ -151,7 +151,7 @@ __flush_whole_cache:
        tst     r2, #VM_EXEC
        mcrne   p15, 0, ip, c7, c5, 0           @ invalidate I cache
        mcrne   p15, 0, ip, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     flush_user_cache_range(start, end, flags)
@@ -188,7 +188,7 @@ ENTRY(arm926_flush_user_cache_range)
        blo     1b
        tst     r2, #VM_EXEC
        mcrne   p15, 0, ip, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     coherent_kern_range(start, end)
@@ -222,7 +222,7 @@ ENTRY(arm926_coherent_user_range)
        blo     1b
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
        mov     r0, #0
-       mov     pc, lr
+       ret     lr
 
 /*
  *     flush_kern_dcache_area(void *addr, size_t size)
@@ -242,7 +242,7 @@ ENTRY(arm926_flush_kern_dcache_area)
        mov     r0, #0
        mcr     p15, 0, r0, c7, c5, 0           @ invalidate I cache
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_inv_range(start, end)
@@ -270,7 +270,7 @@ arm926_dma_inv_range:
        cmp     r0, r1
        blo     1b
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_clean_range(start, end)
@@ -291,7 +291,7 @@ arm926_dma_clean_range:
        blo     1b
 #endif
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_flush_range(start, end)
@@ -313,7 +313,7 @@ ENTRY(arm926_dma_flush_range)
        cmp     r0, r1
        blo     1b
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_map_area(start, size, dir)
@@ -336,7 +336,7 @@ ENDPROC(arm926_dma_map_area)
  *     - dir   - DMA direction
  */
 ENTRY(arm926_dma_unmap_area)
-       mov     pc, lr
+       ret     lr
 ENDPROC(arm926_dma_unmap_area)
 
        .globl  arm926_flush_kern_cache_louis
@@ -353,7 +353,7 @@ ENTRY(cpu_arm926_dcache_clean_area)
        bhi     1b
 #endif
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /* =============================== PageTable ============================== */
 
@@ -380,7 +380,7 @@ ENTRY(cpu_arm926_switch_mm)
        mcr     p15, 0, r0, c2, c0, 0           @ load page table pointer
        mcr     p15, 0, ip, c8, c7, 0           @ invalidate I & D TLBs
 #endif
-       mov     pc, lr
+       ret     lr
 
 /*
  * cpu_arm926_set_pte_ext(ptep, pte, ext)
@@ -397,7 +397,7 @@ ENTRY(cpu_arm926_set_pte_ext)
 #endif
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
 #endif
-       mov     pc, lr
+       ret     lr
 
 /* Suspend/resume support: taken from arch/arm/plat-s3c24xx/sleep.S */
 .globl cpu_arm926_suspend_size
@@ -448,7 +448,7 @@ __arm926_setup:
 #ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
        orr     r0, r0, #0x4000                 @ .1.. .... .... ....
 #endif
-       mov     pc, lr
+       ret     lr
        .size   __arm926_setup, . - __arm926_setup
 
        /*
diff --git a/arch/arm/mm/proc-arm940.S b/arch/arm/mm/proc-arm940.S
index 1c39a704ff6e..e5212d489377 100644
--- a/arch/arm/mm/proc-arm940.S
+++ b/arch/arm/mm/proc-arm940.S
@@ -31,7 +31,7 @@
  */
 ENTRY(cpu_arm940_proc_init)
 ENTRY(cpu_arm940_switch_mm)
-       mov     pc, lr
+       ret     lr
 
 /*
  * cpu_arm940_proc_fin()
@@ -41,7 +41,7 @@ ENTRY(cpu_arm940_proc_fin)
        bic     r0, r0, #0x00001000             @ i-cache
        bic     r0, r0, #0x00000004             @ d-cache
        mcr     p15, 0, r0, c1, c0, 0           @ disable caches
-       mov     pc, lr
+       ret     lr
 
 /*
  * cpu_arm940_reset(loc)
@@ -58,7 +58,7 @@ ENTRY(cpu_arm940_reset)
        bic     ip, ip, #0x00000005             @ .............c.p
        bic     ip, ip, #0x00001000             @ i-cache
        mcr     p15, 0, ip, c1, c0, 0           @ ctrl register
-       mov     pc, r0
+       ret     r0
 ENDPROC(cpu_arm940_reset)
        .popsection
 
@@ -68,7 +68,7 @@ ENDPROC(cpu_arm940_reset)
        .align  5
 ENTRY(cpu_arm940_do_idle)
        mcr     p15, 0, r0, c7, c0, 4           @ Wait for interrupt
-       mov     pc, lr
+       ret     lr
 
 /*
  *     flush_icache_all()
@@ -78,7 +78,7 @@ ENTRY(cpu_arm940_do_idle)
 ENTRY(arm940_flush_icache_all)
        mov     r0, #0
        mcr     p15, 0, r0, c7, c5, 0           @ invalidate I cache
-       mov     pc, lr
+       ret     lr
 ENDPROC(arm940_flush_icache_all)
 
 /*
@@ -122,7 +122,7 @@ ENTRY(arm940_flush_user_cache_range)
        tst     r2, #VM_EXEC
        mcrne   p15, 0, ip, c7, c5, 0           @ invalidate I cache
        mcrne   p15, 0, ip, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     coherent_kern_range(start, end)
@@ -170,7 +170,7 @@ ENTRY(arm940_flush_kern_dcache_area)
        bcs     1b                              @ segments 7 to 0
        mcr     p15, 0, r0, c7, c5, 0           @ invalidate I cache
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_inv_range(start, end)
@@ -191,7 +191,7 @@ arm940_dma_inv_range:
        subs    r1, r1, #1 << 4
        bcs     1b                              @ segments 7 to 0
        mcr     p15, 0, ip, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_clean_range(start, end)
@@ -215,7 +215,7 @@ ENTRY(cpu_arm940_dcache_clean_area)
        bcs     1b                              @ segments 7 to 0
 #endif
        mcr     p15, 0, ip, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_flush_range(start, end)
@@ -241,7 +241,7 @@ ENTRY(arm940_dma_flush_range)
        subs    r1, r1, #1 << 4
        bcs     1b                              @ segments 7 to 0
        mcr     p15, 0, ip, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_map_area(start, size, dir)
@@ -264,7 +264,7 @@ ENDPROC(arm940_dma_map_area)
  *     - dir   - DMA direction
  */
 ENTRY(arm940_dma_unmap_area)
-       mov     pc, lr
+       ret     lr
 ENDPROC(arm940_dma_unmap_area)
 
        .globl  arm940_flush_kern_cache_louis
@@ -337,7 +337,7 @@ __arm940_setup:
        orr     r0, r0, #0x00001000             @ I-cache
        orr     r0, r0, #0x00000005             @ MPU/D-cache
 
-       mov     pc, lr
+       ret     lr
 
        .size   __arm940_setup, . - __arm940_setup
 
diff --git a/arch/arm/mm/proc-arm946.S b/arch/arm/mm/proc-arm946.S
index 0289cd905e73..b3dd9b2d0b8e 100644
--- a/arch/arm/mm/proc-arm946.S
+++ b/arch/arm/mm/proc-arm946.S
@@ -38,7 +38,7 @@
  */
 ENTRY(cpu_arm946_proc_init)
 ENTRY(cpu_arm946_switch_mm)
-       mov     pc, lr
+       ret     lr
 
 /*
  * cpu_arm946_proc_fin()
@@ -48,7 +48,7 @@ ENTRY(cpu_arm946_proc_fin)
        bic     r0, r0, #0x00001000             @ i-cache
        bic     r0, r0, #0x00000004             @ d-cache
        mcr     p15, 0, r0, c1, c0, 0           @ disable caches
-       mov     pc, lr
+       ret     lr
 
 /*
  * cpu_arm946_reset(loc)
@@ -65,7 +65,7 @@ ENTRY(cpu_arm946_reset)
        bic     ip, ip, #0x00000005             @ .............c.p
        bic     ip, ip, #0x00001000             @ i-cache
        mcr     p15, 0, ip, c1, c0, 0           @ ctrl register
-       mov     pc, r0
+       ret     r0
 ENDPROC(cpu_arm946_reset)
        .popsection
 
@@ -75,7 +75,7 @@ ENDPROC(cpu_arm946_reset)
        .align  5
 ENTRY(cpu_arm946_do_idle)
        mcr     p15, 0, r0, c7, c0, 4           @ Wait for interrupt
-       mov     pc, lr
+       ret     lr
 
 /*
  *     flush_icache_all()
@@ -85,7 +85,7 @@ ENTRY(cpu_arm946_do_idle)
 ENTRY(arm946_flush_icache_all)
        mov     r0, #0
        mcr     p15, 0, r0, c7, c5, 0           @ invalidate I cache
-       mov     pc, lr
+       ret     lr
 ENDPROC(arm946_flush_icache_all)
 
 /*
@@ -117,7 +117,7 @@ __flush_whole_cache:
        tst     r2, #VM_EXEC
        mcrne   p15, 0, ip, c7, c5, 0           @ flush I cache
        mcrne   p15, 0, ip, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     flush_user_cache_range(start, end, flags)
@@ -156,7 +156,7 @@ ENTRY(arm946_flush_user_cache_range)
        blo     1b
        tst     r2, #VM_EXEC
        mcrne   p15, 0, ip, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     coherent_kern_range(start, end)
@@ -191,7 +191,7 @@ ENTRY(arm946_coherent_user_range)
        blo     1b
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
        mov     r0, #0
-       mov     pc, lr
+       ret     lr
 
 /*
  *     flush_kern_dcache_area(void *addr, size_t size)
@@ -212,7 +212,7 @@ ENTRY(arm946_flush_kern_dcache_area)
        mov     r0, #0
        mcr     p15, 0, r0, c7, c5, 0           @ invalidate I cache
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_inv_range(start, end)
@@ -239,7 +239,7 @@ arm946_dma_inv_range:
        cmp     r0, r1
        blo     1b
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_clean_range(start, end)
@@ -260,7 +260,7 @@ arm946_dma_clean_range:
        blo     1b
 #endif
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_flush_range(start, end)
@@ -284,7 +284,7 @@ ENTRY(arm946_dma_flush_range)
        cmp     r0, r1
        blo     1b
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_map_area(start, size, dir)
@@ -307,7 +307,7 @@ ENDPROC(arm946_dma_map_area)
  *     - dir   - DMA direction
  */
 ENTRY(arm946_dma_unmap_area)
-       mov     pc, lr
+       ret     lr
 ENDPROC(arm946_dma_unmap_area)
 
        .globl  arm946_flush_kern_cache_louis
@@ -324,7 +324,7 @@ ENTRY(cpu_arm946_dcache_clean_area)
        bhi     1b
 #endif
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
        .type   __arm946_setup, #function
 __arm946_setup:
@@ -392,7 +392,7 @@ __arm946_setup:
 #ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
        orr     r0, r0, #0x00004000             @ .1.. .... .... ....
 #endif
-       mov     pc, lr
+       ret     lr
 
        .size   __arm946_setup, . - __arm946_setup
 
diff --git a/arch/arm/mm/proc-arm9tdmi.S b/arch/arm/mm/proc-arm9tdmi.S
index f51197ba754a..8227322bbb8f 100644
--- a/arch/arm/mm/proc-arm9tdmi.S
+++ b/arch/arm/mm/proc-arm9tdmi.S
@@ -32,13 +32,13 @@ ENTRY(cpu_arm9tdmi_proc_init)
 ENTRY(cpu_arm9tdmi_do_idle)
 ENTRY(cpu_arm9tdmi_dcache_clean_area)
 ENTRY(cpu_arm9tdmi_switch_mm)
-               mov     pc, lr
+               ret     lr
 
 /*
  * cpu_arm9tdmi_proc_fin()
  */
 ENTRY(cpu_arm9tdmi_proc_fin)
-               mov     pc, lr
+               ret     lr
 
 /*
  * Function: cpu_arm9tdmi_reset(loc)
@@ -47,13 +47,13 @@ ENTRY(cpu_arm9tdmi_proc_fin)
  */
                .pushsection    .idmap.text, "ax"
 ENTRY(cpu_arm9tdmi_reset)
-               mov     pc, r0
+               ret     r0
 ENDPROC(cpu_arm9tdmi_reset)
                .popsection
 
                .type   __arm9tdmi_setup, #function
 __arm9tdmi_setup:
-               mov     pc, lr
+               ret     lr
                .size   __arm9tdmi_setup, . - __arm9tdmi_setup
 
                __INITDATA
diff --git a/arch/arm/mm/proc-fa526.S b/arch/arm/mm/proc-fa526.S
index 2dfc0f1d3bfd..c494886892ba 100644
--- a/arch/arm/mm/proc-fa526.S
+++ b/arch/arm/mm/proc-fa526.S
@@ -32,7 +32,7 @@
  * cpu_fa526_proc_init()
  */
 ENTRY(cpu_fa526_proc_init)
-       mov     pc, lr
+       ret     lr
 
 /*
  * cpu_fa526_proc_fin()
@@ -44,7 +44,7 @@ ENTRY(cpu_fa526_proc_fin)
        mcr     p15, 0, r0, c1, c0, 0           @ disable caches
        nop
        nop
-       mov     pc, lr
+       ret     lr
 
 /*
  * cpu_fa526_reset(loc)
@@ -72,7 +72,7 @@ ENTRY(cpu_fa526_reset)
        mcr     p15, 0, ip, c1, c0, 0           @ ctrl register
        nop
        nop
-       mov     pc, r0
+       ret     r0
 ENDPROC(cpu_fa526_reset)
        .popsection
 
@@ -81,7 +81,7 @@ ENDPROC(cpu_fa526_reset)
  */
        .align  4
 ENTRY(cpu_fa526_do_idle)
-       mov     pc, lr
+       ret     lr
 
 
 ENTRY(cpu_fa526_dcache_clean_area)
@@ -90,7 +90,7 @@ ENTRY(cpu_fa526_dcache_clean_area)
        subs    r1, r1, #CACHE_DLINESIZE
        bhi     1b
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /* =============================== PageTable ============================== */
 
@@ -117,7 +117,7 @@ ENTRY(cpu_fa526_switch_mm)
        mcr     p15, 0, r0, c2, c0, 0           @ load page table pointer
        mcr     p15, 0, ip, c8, c7, 0           @ invalidate UTLB
 #endif
-       mov     pc, lr
+       ret     lr
 
 /*
  * cpu_fa526_set_pte_ext(ptep, pte, ext)
@@ -133,7 +133,7 @@ ENTRY(cpu_fa526_set_pte_ext)
        mov     r0, #0
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
 #endif
-       mov     pc, lr
+       ret     lr
 
        .type   __fa526_setup, #function
 __fa526_setup:
@@ -162,7 +162,7 @@ __fa526_setup:
        bic     r0, r0, r5
        ldr     r5, fa526_cr1_set
        orr     r0, r0, r5
-       mov     pc, lr
+       ret     lr
        .size   __fa526_setup, . - __fa526_setup
 
        /*
diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S
index db79b62c92fb..03a1b75f2e16 100644
--- a/arch/arm/mm/proc-feroceon.S
+++ b/arch/arm/mm/proc-feroceon.S
@@ -69,7 +69,7 @@ ENTRY(cpu_feroceon_proc_init)
        movne   r2, r2, lsr #2                  @ turned into # of sets
        sub     r2, r2, #(1 << 5)
        stmia   r1, {r2, r3}
-       mov     pc, lr
+       ret     lr
 
 /*
  * cpu_feroceon_proc_fin()
@@ -86,7 +86,7 @@ ENTRY(cpu_feroceon_proc_fin)
        bic     r0, r0, #0x1000                 @ ...i............
        bic     r0, r0, #0x000e                 @ ............wca.
        mcr     p15, 0, r0, c1, c0, 0           @ disable caches
-       mov     pc, lr
+       ret     lr
 
 /*
  * cpu_feroceon_reset(loc)
@@ -110,7 +110,7 @@ ENTRY(cpu_feroceon_reset)
        bic     ip, ip, #0x000f                 @ ............wcam
        bic     ip, ip, #0x1100                 @ ...i...s........
        mcr     p15, 0, ip, c1, c0, 0           @ ctrl register
-       mov     pc, r0
+       ret     r0
 ENDPROC(cpu_feroceon_reset)
        .popsection
 
@@ -124,7 +124,7 @@ ENTRY(cpu_feroceon_do_idle)
        mov     r0, #0
        mcr     p15, 0, r0, c7, c10, 4          @ Drain write buffer
        mcr     p15, 0, r0, c7, c0, 4           @ Wait for interrupt
-       mov     pc, lr
+       ret     lr
 
 /*
  *     flush_icache_all()
@@ -134,7 +134,7 @@ ENTRY(cpu_feroceon_do_idle)
 ENTRY(feroceon_flush_icache_all)
        mov     r0, #0
        mcr     p15, 0, r0, c7, c5, 0           @ invalidate I cache
-       mov     pc, lr
+       ret     lr
 ENDPROC(feroceon_flush_icache_all)
 
 /*
@@ -169,7 +169,7 @@ __flush_whole_cache:
        mov     ip, #0
        mcrne   p15, 0, ip, c7, c5, 0           @ invalidate I cache
        mcrne   p15, 0, ip, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     flush_user_cache_range(start, end, flags)
@@ -198,7 +198,7 @@ ENTRY(feroceon_flush_user_cache_range)
        tst     r2, #VM_EXEC
        mov     ip, #0
        mcrne   p15, 0, ip, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     coherent_kern_range(start, end)
@@ -233,7 +233,7 @@ ENTRY(feroceon_coherent_user_range)
        blo     1b
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
        mov     r0, #0
-       mov     pc, lr
+       ret     lr
 
 /*
  *     flush_kern_dcache_area(void *addr, size_t size)
@@ -254,7 +254,7 @@ ENTRY(feroceon_flush_kern_dcache_area)
        mov     r0, #0
        mcr     p15, 0, r0, c7, c5, 0           @ invalidate I cache
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
        .align  5
 ENTRY(feroceon_range_flush_kern_dcache_area)
@@ -268,7 +268,7 @@ ENTRY(feroceon_range_flush_kern_dcache_area)
        mov     r0, #0
        mcr     p15, 0, r0, c7, c5, 0           @ invalidate I cache
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_inv_range(start, end)
@@ -295,7 +295,7 @@ feroceon_dma_inv_range:
        cmp     r0, r1
        blo     1b
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
        .align  5
 feroceon_range_dma_inv_range:
@@ -311,7 +311,7 @@ feroceon_range_dma_inv_range:
        mcr     p15, 5, r0, c15, c14, 0         @ D inv range start
        mcr     p15, 5, r1, c15, c14, 1         @ D inv range top
        msr     cpsr_c, r2                      @ restore interrupts
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_clean_range(start, end)
@@ -331,7 +331,7 @@ feroceon_dma_clean_range:
        cmp     r0, r1
        blo     1b
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
        .align  5
 feroceon_range_dma_clean_range:
@@ -344,7 +344,7 @@ feroceon_range_dma_clean_range:
        mcr     p15, 5, r1, c15, c13, 1         @ D clean range top
        msr     cpsr_c, r2                      @ restore interrupts
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_flush_range(start, end)
@@ -362,7 +362,7 @@ ENTRY(feroceon_dma_flush_range)
        cmp     r0, r1
        blo     1b
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
        .align  5
 ENTRY(feroceon_range_dma_flush_range)
@@ -375,7 +375,7 @@ ENTRY(feroceon_range_dma_flush_range)
        mcr     p15, 5, r1, c15, c15, 1         @ D clean/inv range top
        msr     cpsr_c, r2                      @ restore interrupts
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_map_area(start, size, dir)
@@ -412,7 +412,7 @@ ENDPROC(feroceon_range_dma_map_area)
  *     - dir   - DMA direction
  */
 ENTRY(feroceon_dma_unmap_area)
-       mov     pc, lr
+       ret     lr
 ENDPROC(feroceon_dma_unmap_area)
 
        .globl  feroceon_flush_kern_cache_louis
@@ -461,7 +461,7 @@ ENTRY(cpu_feroceon_dcache_clean_area)
        bhi     1b
 #endif
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /* =============================== PageTable ============================== */
 
@@ -490,9 +490,9 @@ ENTRY(cpu_feroceon_switch_mm)
 
        mcr     p15, 0, r0, c2, c0, 0           @ load page table pointer
        mcr     p15, 0, ip, c8, c7, 0           @ invalidate I & D TLBs
-       mov     pc, r2
+       ret     r2
 #else
-       mov     pc, lr
+       ret     lr
 #endif
 
 /*
@@ -512,7 +512,7 @@ ENTRY(cpu_feroceon_set_pte_ext)
 #endif
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
 #endif
-       mov     pc, lr
+       ret     lr
 
 /* Suspend/resume support: taken from arch/arm/mm/proc-arm926.S */
 .globl cpu_feroceon_suspend_size
@@ -554,7 +554,7 @@ __feroceon_setup:
        mrc     p15, 0, r0, c1, c0              @ get control register v4
        bic     r0, r0, r5
        orr     r0, r0, r6
-       mov     pc, lr
+       ret     lr
        .size   __feroceon_setup, . - __feroceon_setup
 
        /*
diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S
index 40acba595731..53d393455f13 100644
--- a/arch/arm/mm/proc-mohawk.S
+++ b/arch/arm/mm/proc-mohawk.S
@@ -45,7 +45,7 @@
  * cpu_mohawk_proc_init()
  */
 ENTRY(cpu_mohawk_proc_init)
-       mov     pc, lr
+       ret     lr
 
 /*
  * cpu_mohawk_proc_fin()
@@ -55,7 +55,7 @@ ENTRY(cpu_mohawk_proc_fin)
        bic     r0, r0, #0x1800                 @ ...iz...........
        bic     r0, r0, #0x0006                 @ .............ca.
        mcr     p15, 0, r0, c1, c0, 0           @ disable caches
-       mov     pc, lr
+       ret     lr
 
 /*
  * cpu_mohawk_reset(loc)
@@ -79,7 +79,7 @@ ENTRY(cpu_mohawk_reset)
        bic     ip, ip, #0x0007                 @ .............cam
        bic     ip, ip, #0x1100                 @ ...i...s........
        mcr     p15, 0, ip, c1, c0, 0           @ ctrl register
-       mov     pc, r0
+       ret     r0
 ENDPROC(cpu_mohawk_reset)
        .popsection
 
@@ -93,7 +93,7 @@ ENTRY(cpu_mohawk_do_idle)
        mov     r0, #0
        mcr     p15, 0, r0, c7, c10, 4          @ drain write buffer
        mcr     p15, 0, r0, c7, c0, 4           @ wait for interrupt
-       mov     pc, lr
+       ret     lr
 
 /*
  *     flush_icache_all()
@@ -103,7 +103,7 @@ ENTRY(cpu_mohawk_do_idle)
 ENTRY(mohawk_flush_icache_all)
        mov     r0, #0
        mcr     p15, 0, r0, c7, c5, 0           @ invalidate I cache
-       mov     pc, lr
+       ret     lr
 ENDPROC(mohawk_flush_icache_all)
 
 /*
@@ -128,7 +128,7 @@ __flush_whole_cache:
        tst     r2, #VM_EXEC
        mcrne   p15, 0, ip, c7, c5, 0           @ invalidate I cache
        mcrne   p15, 0, ip, c7, c10, 0          @ drain write buffer
-       mov     pc, lr
+       ret     lr
 
 /*
  *     flush_user_cache_range(start, end, flags)
@@ -158,7 +158,7 @@ ENTRY(mohawk_flush_user_cache_range)
        blo     1b
        tst     r2, #VM_EXEC
        mcrne   p15, 0, ip, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     coherent_kern_range(start, end)
@@ -194,7 +194,7 @@ ENTRY(mohawk_coherent_user_range)
        blo     1b
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
        mov     r0, #0
-       mov     pc, lr
+       ret     lr
 
 /*
  *     flush_kern_dcache_area(void *addr, size_t size)
@@ -214,7 +214,7 @@ ENTRY(mohawk_flush_kern_dcache_area)
        mov     r0, #0
        mcr     p15, 0, r0, c7, c5, 0           @ invalidate I cache
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_inv_range(start, end)
@@ -240,7 +240,7 @@ mohawk_dma_inv_range:
        cmp     r0, r1
        blo     1b
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_clean_range(start, end)
@@ -259,7 +259,7 @@ mohawk_dma_clean_range:
        cmp     r0, r1
        blo     1b
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_flush_range(start, end)
@@ -277,7 +277,7 @@ ENTRY(mohawk_dma_flush_range)
        cmp     r0, r1
        blo     1b
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_map_area(start, size, dir)
@@ -300,7 +300,7 @@ ENDPROC(mohawk_dma_map_area)
  *     - dir   - DMA direction
  */
 ENTRY(mohawk_dma_unmap_area)
-       mov     pc, lr
+       ret     lr
 ENDPROC(mohawk_dma_unmap_area)
 
        .globl  mohawk_flush_kern_cache_louis
@@ -315,7 +315,7 @@ ENTRY(cpu_mohawk_dcache_clean_area)
        subs    r1, r1, #CACHE_DLINESIZE
        bhi     1b
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 /*
  * cpu_mohawk_switch_mm(pgd)
@@ -333,7 +333,7 @@ ENTRY(cpu_mohawk_switch_mm)
        orr     r0, r0, #0x18                   @ cache the page table in L2
        mcr     p15, 0, r0, c2, c0, 0           @ load page table pointer
        mcr     p15, 0, ip, c8, c7, 0           @ invalidate I & D TLBs
-       mov     pc, lr
+       ret     lr
 
 /*
  * cpu_mohawk_set_pte_ext(ptep, pte, ext)
@@ -346,7 +346,7 @@ ENTRY(cpu_mohawk_set_pte_ext)
        mov     r0, r0
        mcr     p15, 0, r0, c7, c10, 1          @ clean D entry
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
-       mov     pc, lr
+       ret     lr
 
 .globl cpu_mohawk_suspend_size
 .equ   cpu_mohawk_suspend_size, 4 * 6
@@ -400,7 +400,7 @@ __mohawk_setup:
        mrc     p15, 0, r0, c1, c0              @ get control register
        bic     r0, r0, r5
        orr     r0, r0, r6
-       mov     pc, lr
+       ret     lr
 
        .size   __mohawk_setup, . - __mohawk_setup
 
diff --git a/arch/arm/mm/proc-sa110.S b/arch/arm/mm/proc-sa110.S
index c45319c8f1d9..8008a0461cf5 100644
--- a/arch/arm/mm/proc-sa110.S
+++ b/arch/arm/mm/proc-sa110.S
@@ -38,7 +38,7 @@
 ENTRY(cpu_sa110_proc_init)
        mov     r0, #0
        mcr     p15, 0, r0, c15, c1, 2          @ Enable clock switching
-       mov     pc, lr
+       ret     lr
 
 /*
  * cpu_sa110_proc_fin()
@@ -50,7 +50,7 @@ ENTRY(cpu_sa110_proc_fin)
        bic     r0, r0, #0x1000                 @ ...i............
        bic     r0, r0, #0x000e                 @ ............wca.
        mcr     p15, 0, r0, c1, c0, 0           @ disable caches
-       mov     pc, lr
+       ret     lr
 
 /*
  * cpu_sa110_reset(loc)
@@ -74,7 +74,7 @@ ENTRY(cpu_sa110_reset)
        bic     ip, ip, #0x000f                 @ ............wcam
        bic     ip, ip, #0x1100                 @ ...i...s........
        mcr     p15, 0, ip, c1, c0, 0           @ ctrl register
-       mov     pc, r0
+       ret     r0
 ENDPROC(cpu_sa110_reset)
        .popsection
 
@@ -103,7 +103,7 @@ ENTRY(cpu_sa110_do_idle)
        mov     r0, r0                          @ safety
        mov     r0, r0                          @ safety
        mcr     p15, 0, r0, c15, c1, 2          @ enable clock switching
-       mov     pc, lr
+       ret     lr
 
 /* ================================= CACHE ================================ */
 
@@ -121,7 +121,7 @@ ENTRY(cpu_sa110_dcache_clean_area)
        add     r0, r0, #DCACHELINESIZE
        subs    r1, r1, #DCACHELINESIZE
        bhi     1b
-       mov     pc, lr
+       ret     lr
 
 /* =============================== PageTable ============================== */
 
@@ -141,7 +141,7 @@ ENTRY(cpu_sa110_switch_mm)
        mcr     p15, 0, ip, c8, c7, 0           @ invalidate I & D TLBs
        ldr     pc, [sp], #4
 #else
-       mov     pc, lr
+       ret     lr
 #endif
 
 /*
@@ -157,7 +157,7 @@ ENTRY(cpu_sa110_set_pte_ext)
        mcr     p15, 0, r0, c7, c10, 1          @ clean D entry
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
 #endif
-       mov     pc, lr
+       ret     lr
 
        .type   __sa110_setup, #function
 __sa110_setup:
@@ -173,7 +173,7 @@ __sa110_setup:
        mrc     p15, 0, r0, c1, c0              @ get control register v4
        bic     r0, r0, r5
        orr     r0, r0, r6
-       mov     pc, lr
+       ret     lr
        .size   __sa110_setup, . - __sa110_setup
 
        /*
diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S
index 09d241ae2dbe..89f97ac648a9 100644
--- a/arch/arm/mm/proc-sa1100.S
+++ b/arch/arm/mm/proc-sa1100.S
@@ -43,7 +43,7 @@ ENTRY(cpu_sa1100_proc_init)
        mov     r0, #0
        mcr     p15, 0, r0, c15, c1, 2          @ Enable clock switching
        mcr     p15, 0, r0, c9, c0, 5           @ Allow read-buffer operations 
from userland
-       mov     pc, lr
+       ret     lr
 
 /*
  * cpu_sa1100_proc_fin()
@@ -58,7 +58,7 @@ ENTRY(cpu_sa1100_proc_fin)
        bic     r0, r0, #0x1000                 @ ...i............
        bic     r0, r0, #0x000e                 @ ............wca.
        mcr     p15, 0, r0, c1, c0, 0           @ disable caches
-       mov     pc, lr
+       ret     lr
 
 /*
  * cpu_sa1100_reset(loc)
@@ -82,7 +82,7 @@ ENTRY(cpu_sa1100_reset)
        bic     ip, ip, #0x000f                 @ ............wcam
        bic     ip, ip, #0x1100                 @ ...i...s........
        mcr     p15, 0, ip, c1, c0, 0           @ ctrl register
-       mov     pc, r0
+       ret     r0
 ENDPROC(cpu_sa1100_reset)
        .popsection
 
@@ -113,7 +113,7 @@ ENTRY(cpu_sa1100_do_idle)
        mcr     p15, 0, r0, c15, c8, 2          @ wait for interrupt
        mov     r0, r0                          @ safety
        mcr     p15, 0, r0, c15, c1, 2          @ enable clock switching
-       mov     pc, lr
+       ret     lr
 
 /* ================================= CACHE ================================ */
 
@@ -131,7 +131,7 @@ ENTRY(cpu_sa1100_dcache_clean_area)
        add     r0, r0, #DCACHELINESIZE
        subs    r1, r1, #DCACHELINESIZE
        bhi     1b
-       mov     pc, lr
+       ret     lr
 
 /* =============================== PageTable ============================== */
 
@@ -152,7 +152,7 @@ ENTRY(cpu_sa1100_switch_mm)
        mcr     p15, 0, ip, c8, c7, 0           @ invalidate I & D TLBs
        ldr     pc, [sp], #4
 #else
-       mov     pc, lr
+       ret     lr
 #endif
 
 /*
@@ -168,7 +168,7 @@ ENTRY(cpu_sa1100_set_pte_ext)
        mcr     p15, 0, r0, c7, c10, 1          @ clean D entry
        mcr     p15, 0, r0, c7, c10, 4          @ drain WB
 #endif
-       mov     pc, lr
+       ret     lr
 
 .globl cpu_sa1100_suspend_size
 .equ   cpu_sa1100_suspend_size, 4 * 3
@@ -211,7 +211,7 @@ __sa1100_setup:
        mrc     p15, 0, r0, c1, c0              @ get control register v4
        bic     r0, r0, r5
        orr     r0, r0, r6
-       mov     pc, lr
+       ret     lr
        .size   __sa1100_setup, . - __sa1100_setup
 
        /*
diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S
index 32b3558321c4..d0390f4b3f18 100644
--- a/arch/arm/mm/proc-v6.S
+++ b/arch/arm/mm/proc-v6.S
@@ -36,14 +36,14 @@
 #define PMD_FLAGS_SMP  PMD_SECT_WBWA|PMD_SECT_S
 
 ENTRY(cpu_v6_proc_init)
-       mov     pc, lr
+       ret     lr
 
 ENTRY(cpu_v6_proc_fin)
        mrc     p15, 0, r0, c1, c0, 0           @ ctrl register
        bic     r0, r0, #0x1000                 @ ...i............
        bic     r0, r0, #0x0006                 @ .............ca.
        mcr     p15, 0, r0, c1, c0, 0           @ disable caches
-       mov     pc, lr
+       ret     lr
 
 /*
  *     cpu_v6_reset(loc)
@@ -62,7 +62,7 @@ ENTRY(cpu_v6_reset)
        mcr     p15, 0, r1, c1, c0, 0           @ disable MMU
        mov     r1, #0
        mcr     p15, 0, r1, c7, c5, 4           @ ISB
-       mov     pc, r0
+       ret     r0
 ENDPROC(cpu_v6_reset)
        .popsection
 
@@ -77,14 +77,14 @@ ENTRY(cpu_v6_do_idle)
        mov     r1, #0
        mcr     p15, 0, r1, c7, c10, 4          @ DWB - WFI may enter a 
low-power mode
        mcr     p15, 0, r1, c7, c0, 4           @ wait for interrupt
-       mov     pc, lr
+       ret     lr
 
 ENTRY(cpu_v6_dcache_clean_area)
 1:     mcr     p15, 0, r0, c7, c10, 1          @ clean D entry
        add     r0, r0, #D_CACHE_LINE_SIZE
        subs    r1, r1, #D_CACHE_LINE_SIZE
        bhi     1b
-       mov     pc, lr
+       ret     lr
 
 /*
  *     cpu_v6_switch_mm(pgd_phys, tsk)
@@ -113,7 +113,7 @@ ENTRY(cpu_v6_switch_mm)
 #endif
        mcr     p15, 0, r1, c13, c0, 1          @ set context ID
 #endif
-       mov     pc, lr
+       ret     lr
 
 /*
  *     cpu_v6_set_pte_ext(ptep, pte, ext)
@@ -131,7 +131,7 @@ ENTRY(cpu_v6_set_pte_ext)
 #ifdef CONFIG_MMU
        armv6_set_pte_ext cpu_v6
 #endif
-       mov     pc, lr
+       ret     lr
 
 /* Suspend/resume support: taken from arch/arm/mach-s3c64xx/sleep.S */
 .globl cpu_v6_suspend_size
@@ -241,7 +241,7 @@ __v6_setup:
        mcreq   p15, 0, r5, c1, c0, 1           @ write aux control reg
        orreq   r0, r0, #(1 << 21)              @ low interrupt latency 
configuration
 #endif
-       mov     pc, lr                          @ return to head.S:__ret
+       ret     lr                              @ return to head.S:__ret
 
        /*
         *         V X F   I D LR
diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S
index 1f52915f2b28..ed448d8a596b 100644
--- a/arch/arm/mm/proc-v7-2level.S
+++ b/arch/arm/mm/proc-v7-2level.S
@@ -59,7 +59,7 @@ ENTRY(cpu_v7_switch_mm)
        mcr     p15, 0, r0, c2, c0, 0           @ set TTB 0
        isb
 #endif
-       mov     pc, lr
+       bx      lr
 ENDPROC(cpu_v7_switch_mm)
 
 /*
@@ -106,7 +106,7 @@ ENTRY(cpu_v7_set_pte_ext)
        ALT_SMP(W(nop))
        ALT_UP (mcr     p15, 0, r0, c7, c10, 1)         @ flush_pte
 #endif
-       mov     pc, lr
+       bx      lr
 ENDPROC(cpu_v7_set_pte_ext)
 
        /*
diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S
index 22e3ad63500c..564f4b934ceb 100644
--- a/arch/arm/mm/proc-v7-3level.S
+++ b/arch/arm/mm/proc-v7-3level.S
@@ -19,6 +19,7 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
+#include <asm/assembler.h>
 
 #define TTB_IRGN_NC    (0 << 8)
 #define TTB_IRGN_WBWA  (1 << 8)
@@ -61,7 +62,7 @@ ENTRY(cpu_v7_switch_mm)
        mcrr    p15, 0, rpgdl, rpgdh, c2                @ set TTB 0
        isb
 #endif
-       mov     pc, lr
+       ret     lr
 ENDPROC(cpu_v7_switch_mm)
 
 #ifdef __ARMEB__
@@ -92,7 +93,7 @@ ENTRY(cpu_v7_set_pte_ext)
        ALT_SMP(W(nop))
        ALT_UP (mcr     p15, 0, r0, c7, c10, 1)         @ flush_pte
 #endif
-       mov     pc, lr
+       ret     lr
 ENDPROC(cpu_v7_set_pte_ext)
 
        /*
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 3db2c2f04a30..71abb60c4222 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -26,7 +26,7 @@
 #endif
 
 ENTRY(cpu_v7_proc_init)
-       mov     pc, lr
+       ret     lr
 ENDPROC(cpu_v7_proc_init)
 
 ENTRY(cpu_v7_proc_fin)
@@ -34,7 +34,7 @@ ENTRY(cpu_v7_proc_fin)
        bic     r0, r0, #0x1000                 @ ...i............
        bic     r0, r0, #0x0006                 @ .............ca.
        mcr     p15, 0, r0, c1, c0, 0           @ disable caches
-       mov     pc, lr
+       ret     lr
 ENDPROC(cpu_v7_proc_fin)
 
 /*
@@ -71,20 +71,20 @@ ENDPROC(cpu_v7_reset)
 ENTRY(cpu_v7_do_idle)
        dsb                                     @ WFI may enter a low-power mode
        wfi
-       mov     pc, lr
+       ret     lr
 ENDPROC(cpu_v7_do_idle)
 
 ENTRY(cpu_v7_dcache_clean_area)
        ALT_SMP(W(nop))                 @ MP extensions imply L1 PTW
        ALT_UP_B(1f)
-       mov     pc, lr
+       ret     lr
 1:     dcache_line_size r2, r3
 2:     mcr     p15, 0, r0, c7, c10, 1          @ clean D entry
        add     r0, r0, r2
        subs    r1, r1, r2
        bhi     2b
        dsb     ishst
-       mov     pc, lr
+       ret     lr
 ENDPROC(cpu_v7_dcache_clean_area)
 
        string  cpu_v7_name, "ARMv7 Processor"
@@ -163,7 +163,7 @@ ENTRY(cpu_pj4b_do_idle)
        dsb                                     @ WFI may enter a low-power mode
        wfi
        dsb                                     @barrier
-       mov     pc, lr
+       ret     lr
 ENDPROC(cpu_pj4b_do_idle)
 #else
        globl_equ       cpu_pj4b_do_idle,       cpu_v7_do_idle
@@ -407,7 +407,7 @@ __v7_setup:
        bic     r0, r0, r5                      @ clear bits them
        orr     r0, r0, r6                      @ set them
  THUMB(        orr     r0, r0, #1 << 30        )       @ Thumb exceptions
-       mov     pc, lr                          @ return to head.S:__ret
+       ret     lr                              @ return to head.S:__ret
 ENDPROC(__v7_setup)
 
        .align  2
diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S
index 1ca37c72f12f..d1e68b553d3b 100644
--- a/arch/arm/mm/proc-v7m.S
+++ b/arch/arm/mm/proc-v7m.S
@@ -16,11 +16,11 @@
 #include "proc-macros.S"
 
 ENTRY(cpu_v7m_proc_init)
-       mov     pc, lr
+       ret     lr
 ENDPROC(cpu_v7m_proc_init)
 
 ENTRY(cpu_v7m_proc_fin)
-       mov     pc, lr
+       ret     lr
 ENDPROC(cpu_v7m_proc_fin)
 
 /*
@@ -34,7 +34,7 @@ ENDPROC(cpu_v7m_proc_fin)
  */
        .align  5
 ENTRY(cpu_v7m_reset)
-       mov     pc, r0
+       ret     r0
 ENDPROC(cpu_v7m_reset)
 
 /*
@@ -46,18 +46,18 @@ ENDPROC(cpu_v7m_reset)
  */
 ENTRY(cpu_v7m_do_idle)
        wfi
-       mov     pc, lr
+       ret     lr
 ENDPROC(cpu_v7m_do_idle)
 
 ENTRY(cpu_v7m_dcache_clean_area)
-       mov     pc, lr
+       ret     lr
 ENDPROC(cpu_v7m_dcache_clean_area)
 
 /*
  * There is no MMU, so here is nothing to do.
  */
 ENTRY(cpu_v7m_switch_mm)
-       mov     pc, lr
+       ret     lr
 ENDPROC(cpu_v7m_switch_mm)
 
 .globl cpu_v7m_suspend_size
@@ -65,11 +65,11 @@ ENDPROC(cpu_v7m_switch_mm)
 
 #ifdef CONFIG_ARM_CPU_SUSPEND
 ENTRY(cpu_v7m_do_suspend)
-       mov     pc, lr
+       ret     lr
 ENDPROC(cpu_v7m_do_suspend)
 
 ENTRY(cpu_v7m_do_resume)
-       mov     pc, lr
+       ret     lr
 ENDPROC(cpu_v7m_do_resume)
 #endif
 
@@ -120,7 +120,7 @@ __v7m_setup:
        ldr     r12, [r0, V7M_SCB_CCR]  @ system control register
        orr     r12, #V7M_SCB_CCR_STKALIGN
        str     r12, [r0, V7M_SCB_CCR]
-       mov     pc, lr
+       ret     lr
 ENDPROC(__v7m_setup)
 
        .align 2
diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S
index dc1645890042..f8acdfece036 100644
--- a/arch/arm/mm/proc-xsc3.S
+++ b/arch/arm/mm/proc-xsc3.S
@@ -83,7 +83,7 @@
  * Nothing too exciting at the moment
  */
 ENTRY(cpu_xsc3_proc_init)
-       mov     pc, lr
+       ret     lr
 
 /*
  * cpu_xsc3_proc_fin()
@@ -93,7 +93,7 @@ ENTRY(cpu_xsc3_proc_fin)
        bic     r0, r0, #0x1800                 @ ...IZ...........
        bic     r0, r0, #0x0006                 @ .............CA.
        mcr     p15, 0, r0, c1, c0, 0           @ disable caches
-       mov     pc, lr
+       ret     lr
 
 /*
  * cpu_xsc3_reset(loc)
@@ -119,7 +119,7 @@ ENTRY(cpu_xsc3_reset)
        @ CAUTION: MMU turned off from this point.  We count on the pipeline
        @ already containing those two last instructions to survive.
        mcr     p15, 0, ip, c8, c7, 0           @ invalidate I and D TLBs
-       mov     pc, r0
+       ret     r0
 ENDPROC(cpu_xsc3_reset)
        .popsection
 
@@ -138,7 +138,7 @@ ENDPROC(cpu_xsc3_reset)
 ENTRY(cpu_xsc3_do_idle)
        mov     r0, #1
        mcr     p14, 0, r0, c7, c0, 0           @ go to idle
-       mov     pc, lr
+       ret     lr
 
 /* ================================= CACHE ================================ */
 
@@ -150,7 +150,7 @@ ENTRY(cpu_xsc3_do_idle)
 ENTRY(xsc3_flush_icache_all)
        mov     r0, #0
        mcr     p15, 0, r0, c7, c5, 0           @ invalidate I cache
-       mov     pc, lr
+       ret     lr
 ENDPROC(xsc3_flush_icache_all)
 
 /*
@@ -176,7 +176,7 @@ __flush_whole_cache:
        mcrne   p15, 0, ip, c7, c5, 0           @ invalidate L1 I cache and BTB
        mcrne   p15, 0, ip, c7, c10, 4          @ data write barrier
        mcrne   p15, 0, ip, c7, c5, 4           @ prefetch flush
-       mov     pc, lr
+       ret     lr
 
 /*
  *     flush_user_cache_range(start, end, vm_flags)
@@ -205,7 +205,7 @@ ENTRY(xsc3_flush_user_cache_range)
        mcrne   p15, 0, ip, c7, c5, 6           @ invalidate BTB
        mcrne   p15, 0, ip, c7, c10, 4          @ data write barrier
        mcrne   p15, 0, ip, c7, c5, 4           @ prefetch flush
-       mov     pc, lr
+       ret     lr
 
 /*
  *     coherent_kern_range(start, end)
@@ -232,7 +232,7 @@ ENTRY(xsc3_coherent_user_range)
        mcr     p15, 0, r0, c7, c5, 0           @ invalidate L1 I cache and BTB
        mcr     p15, 0, r0, c7, c10, 4          @ data write barrier
        mcr     p15, 0, r0, c7, c5, 4           @ prefetch flush
-       mov     pc, lr
+       ret     lr
 
 /*
  *     flush_kern_dcache_area(void *addr, size_t size)
@@ -253,7 +253,7 @@ ENTRY(xsc3_flush_kern_dcache_area)
        mcr     p15, 0, r0, c7, c5, 0           @ invalidate L1 I cache and BTB
        mcr     p15, 0, r0, c7, c10, 4          @ data write barrier
        mcr     p15, 0, r0, c7, c5, 4           @ prefetch flush
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_inv_range(start, end)
@@ -277,7 +277,7 @@ xsc3_dma_inv_range:
        cmp     r0, r1
        blo     1b
        mcr     p15, 0, r0, c7, c10, 4          @ data write barrier
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_clean_range(start, end)
@@ -294,7 +294,7 @@ xsc3_dma_clean_range:
        cmp     r0, r1
        blo     1b
        mcr     p15, 0, r0, c7, c10, 4          @ data write barrier
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_flush_range(start, end)
@@ -311,7 +311,7 @@ ENTRY(xsc3_dma_flush_range)
        cmp     r0, r1
        blo     1b
        mcr     p15, 0, r0, c7, c10, 4          @ data write barrier
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_map_area(start, size, dir)
@@ -334,7 +334,7 @@ ENDPROC(xsc3_dma_map_area)
  *     - dir   - DMA direction
  */
 ENTRY(xsc3_dma_unmap_area)
-       mov     pc, lr
+       ret     lr
 ENDPROC(xsc3_dma_unmap_area)
 
        .globl  xsc3_flush_kern_cache_louis
@@ -348,7 +348,7 @@ ENTRY(cpu_xsc3_dcache_clean_area)
        add     r0, r0, #CACHELINESIZE
        subs    r1, r1, #CACHELINESIZE
        bhi     1b
-       mov     pc, lr
+       ret     lr
 
 /* =============================== PageTable ============================== */
 
@@ -406,7 +406,7 @@ ENTRY(cpu_xsc3_set_pte_ext)
        orr     r2, r2, ip
 
        xscale_set_pte_ext_epilogue
-       mov     pc, lr
+       ret     lr
 
        .ltorg
        .align
@@ -478,7 +478,7 @@ __xsc3_setup:
        bic     r0, r0, r5                      @ ..V. ..R. .... ..A.
        orr     r0, r0, r6                      @ ..VI Z..S .... .C.M (mmu)
                                                @ ...I Z..S .... .... (uc)
-       mov     pc, lr
+       ret     lr
 
        .size   __xsc3_setup, . - __xsc3_setup
 
diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S
index d19b1cfcad91..23259f104c66 100644
--- a/arch/arm/mm/proc-xscale.S
+++ b/arch/arm/mm/proc-xscale.S
@@ -118,7 +118,7 @@ ENTRY(cpu_xscale_proc_init)
        mrc     p15, 0, r1, c1, c0, 1
        bic     r1, r1, #1
        mcr     p15, 0, r1, c1, c0, 1
-       mov     pc, lr
+       ret     lr
 
 /*
  * cpu_xscale_proc_fin()
@@ -128,7 +128,7 @@ ENTRY(cpu_xscale_proc_fin)
        bic     r0, r0, #0x1800                 @ ...IZ...........
        bic     r0, r0, #0x0006                 @ .............CA.
        mcr     p15, 0, r0, c1, c0, 0           @ disable caches
-       mov     pc, lr
+       ret     lr
 
 /*
  * cpu_xscale_reset(loc)
@@ -160,7 +160,7 @@ ENTRY(cpu_xscale_reset)
        @ CAUTION: MMU turned off from this point. We count on the pipeline
        @ already containing those two last instructions to survive.
        mcr     p15, 0, ip, c8, c7, 0           @ invalidate I & D TLBs
-       mov     pc, r0
+       ret     r0
 ENDPROC(cpu_xscale_reset)
        .popsection
 
@@ -179,7 +179,7 @@ ENDPROC(cpu_xscale_reset)
 ENTRY(cpu_xscale_do_idle)
        mov     r0, #1
        mcr     p14, 0, r0, c7, c0, 0           @ Go to IDLE
-       mov     pc, lr
+       ret     lr
 
 /* ================================= CACHE ================================ */
 
@@ -191,7 +191,7 @@ ENTRY(cpu_xscale_do_idle)
 ENTRY(xscale_flush_icache_all)
        mov     r0, #0
        mcr     p15, 0, r0, c7, c5, 0           @ invalidate I cache
-       mov     pc, lr
+       ret     lr
 ENDPROC(xscale_flush_icache_all)
 
 /*
@@ -216,7 +216,7 @@ __flush_whole_cache:
        tst     r2, #VM_EXEC
        mcrne   p15, 0, ip, c7, c5, 0           @ Invalidate I cache & BTB
        mcrne   p15, 0, ip, c7, c10, 4          @ Drain Write (& Fill) Buffer
-       mov     pc, lr
+       ret     lr
 
 /*
  *     flush_user_cache_range(start, end, vm_flags)
@@ -245,7 +245,7 @@ ENTRY(xscale_flush_user_cache_range)
        tst     r2, #VM_EXEC
        mcrne   p15, 0, ip, c7, c5, 6           @ Invalidate BTB
        mcrne   p15, 0, ip, c7, c10, 4          @ Drain Write (& Fill) Buffer
-       mov     pc, lr
+       ret     lr
 
 /*
  *     coherent_kern_range(start, end)
@@ -269,7 +269,7 @@ ENTRY(xscale_coherent_kern_range)
        mov     r0, #0
        mcr     p15, 0, r0, c7, c5, 0           @ Invalidate I cache & BTB
        mcr     p15, 0, r0, c7, c10, 4          @ Drain Write (& Fill) Buffer
-       mov     pc, lr
+       ret     lr
 
 /*
  *     coherent_user_range(start, end)
@@ -291,7 +291,7 @@ ENTRY(xscale_coherent_user_range)
        mov     r0, #0
        mcr     p15, 0, r0, c7, c5, 6           @ Invalidate BTB
        mcr     p15, 0, r0, c7, c10, 4          @ Drain Write (& Fill) Buffer
-       mov     pc, lr
+       ret     lr
 
 /*
  *     flush_kern_dcache_area(void *addr, size_t size)
@@ -312,7 +312,7 @@ ENTRY(xscale_flush_kern_dcache_area)
        mov     r0, #0
        mcr     p15, 0, r0, c7, c5, 0           @ Invalidate I cache & BTB
        mcr     p15, 0, r0, c7, c10, 4          @ Drain Write (& Fill) Buffer
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_inv_range(start, end)
@@ -336,7 +336,7 @@ xscale_dma_inv_range:
        cmp     r0, r1
        blo     1b
        mcr     p15, 0, r0, c7, c10, 4          @ Drain Write (& Fill) Buffer
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_clean_range(start, end)
@@ -353,7 +353,7 @@ xscale_dma_clean_range:
        cmp     r0, r1
        blo     1b
        mcr     p15, 0, r0, c7, c10, 4          @ Drain Write (& Fill) Buffer
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_flush_range(start, end)
@@ -371,7 +371,7 @@ ENTRY(xscale_dma_flush_range)
        cmp     r0, r1
        blo     1b
        mcr     p15, 0, r0, c7, c10, 4          @ Drain Write (& Fill) Buffer
-       mov     pc, lr
+       ret     lr
 
 /*
  *     dma_map_area(start, size, dir)
@@ -407,7 +407,7 @@ ENDPROC(xscale_80200_A0_A1_dma_map_area)
  *     - dir   - DMA direction
  */
 ENTRY(xscale_dma_unmap_area)
-       mov     pc, lr
+       ret     lr
 ENDPROC(xscale_dma_unmap_area)
 
        .globl  xscale_flush_kern_cache_louis
@@ -458,7 +458,7 @@ ENTRY(cpu_xscale_dcache_clean_area)
        add     r0, r0, #CACHELINESIZE
        subs    r1, r1, #CACHELINESIZE
        bhi     1b
-       mov     pc, lr
+       ret     lr
 
 /* =============================== PageTable ============================== */
 
@@ -521,7 +521,7 @@ ENTRY(cpu_xscale_set_pte_ext)
        orr     r2, r2, ip
 
        xscale_set_pte_ext_epilogue
-       mov     pc, lr
+       ret     lr
 
        .ltorg
        .align
@@ -572,7 +572,7 @@ __xscale_setup:
        mrc     p15, 0, r0, c1, c0, 0           @ get control register
        bic     r0, r0, r5
        orr     r0, r0, r6
-       mov     pc, lr
+       ret     lr
        .size   __xscale_setup, . - __xscale_setup
 
        /*
diff --git a/arch/arm/mm/tlb-fa.S b/arch/arm/mm/tlb-fa.S
index d3ddcf9a76ca..d2d9ecbe0aac 100644
--- a/arch/arm/mm/tlb-fa.S
+++ b/arch/arm/mm/tlb-fa.S
@@ -18,6 +18,7 @@
  */
 #include <linux/linkage.h>
 #include <linux/init.h>
+#include <asm/assembler.h>
 #include <asm/asm-offsets.h>
 #include <asm/tlbflush.h>
 #include "proc-macros.S"
@@ -37,7 +38,7 @@ ENTRY(fa_flush_user_tlb_range)
        vma_vm_mm ip, r2
        act_mm  r3                              @ get current->active_mm
        eors    r3, ip, r3                      @ == mm ?
-       movne   pc, lr                          @ no, we dont do anything
+       retne   lr                              @ no, we dont do anything
        mov     r3, #0
        mcr     p15, 0, r3, c7, c10, 4          @ drain WB
        bic     r0, r0, #0x0ff
@@ -47,7 +48,7 @@ ENTRY(fa_flush_user_tlb_range)
        cmp     r0, r1
        blo     1b
        mcr     p15, 0, r3, c7, c10, 4          @ data write barrier
-       mov     pc, lr
+       ret     lr
 
 
 ENTRY(fa_flush_kern_tlb_range)
@@ -61,7 +62,7 @@ ENTRY(fa_flush_kern_tlb_range)
        blo     1b
        mcr     p15, 0, r3, c7, c10, 4          @ data write barrier
        mcr     p15, 0, r3, c7, c5, 4           @ prefetch flush (isb)
-       mov     pc, lr
+       ret     lr
 
        __INITDATA
 
diff --git a/arch/arm/mm/tlb-v4.S b/arch/arm/mm/tlb-v4.S
index 17a025ade573..a2b5dca42048 100644
--- a/arch/arm/mm/tlb-v4.S
+++ b/arch/arm/mm/tlb-v4.S
@@ -14,6 +14,7 @@
  */
 #include <linux/linkage.h>
 #include <linux/init.h>
+#include <asm/assembler.h>
 #include <asm/asm-offsets.h>
 #include <asm/tlbflush.h>
 #include "proc-macros.S"
@@ -33,7 +34,7 @@ ENTRY(v4_flush_user_tlb_range)
        vma_vm_mm ip, r2
        act_mm  r3                              @ get current->active_mm
        eors    r3, ip, r3                              @ == mm ?
-       movne   pc, lr                          @ no, we dont do anything
+       retne   lr                              @ no, we dont do anything
 .v4_flush_kern_tlb_range:
        bic     r0, r0, #0x0ff
        bic     r0, r0, #0xf00
@@ -41,7 +42,7 @@ ENTRY(v4_flush_user_tlb_range)
        add     r0, r0, #PAGE_SZ
        cmp     r0, r1
        blo     1b
-       mov     pc, lr
+       ret     lr
 
 /*
  *     v4_flush_kern_tlb_range(start, end)
diff --git a/arch/arm/mm/tlb-v4wb.S b/arch/arm/mm/tlb-v4wb.S
index c04598fa4d4a..5a093b458dbc 100644
--- a/arch/arm/mm/tlb-v4wb.S
+++ b/arch/arm/mm/tlb-v4wb.S
@@ -14,6 +14,7 @@
  */
 #include <linux/linkage.h>
 #include <linux/init.h>
+#include <asm/assembler.h>
 #include <asm/asm-offsets.h>
 #include <asm/tlbflush.h>
 #include "proc-macros.S"
@@ -33,7 +34,7 @@ ENTRY(v4wb_flush_user_tlb_range)
        vma_vm_mm ip, r2
        act_mm  r3                              @ get current->active_mm
        eors    r3, ip, r3                              @ == mm ?
-       movne   pc, lr                          @ no, we dont do anything
+       retne   lr                              @ no, we dont do anything
        vma_vm_flags r2, r2
        mcr     p15, 0, r3, c7, c10, 4          @ drain WB
        tst     r2, #VM_EXEC
@@ -44,7 +45,7 @@ ENTRY(v4wb_flush_user_tlb_range)
        add     r0, r0, #PAGE_SZ
        cmp     r0, r1
        blo     1b
-       mov     pc, lr
+       ret     lr
 
 /*
  *     v4_flush_kern_tlb_range(start, end)
@@ -65,7 +66,7 @@ ENTRY(v4wb_flush_kern_tlb_range)
        add     r0, r0, #PAGE_SZ
        cmp     r0, r1
        blo     1b
-       mov     pc, lr
+       ret     lr
 
        __INITDATA
 
diff --git a/arch/arm/mm/tlb-v4wbi.S b/arch/arm/mm/tlb-v4wbi.S
index 1f6062b6c1c1..058861548f68 100644
--- a/arch/arm/mm/tlb-v4wbi.S
+++ b/arch/arm/mm/tlb-v4wbi.S
@@ -14,6 +14,7 @@
  */
 #include <linux/linkage.h>
 #include <linux/init.h>
+#include <asm/assembler.h>
 #include <asm/asm-offsets.h>
 #include <asm/tlbflush.h>
 #include "proc-macros.S"
@@ -32,7 +33,7 @@ ENTRY(v4wbi_flush_user_tlb_range)
        vma_vm_mm ip, r2
        act_mm  r3                              @ get current->active_mm
        eors    r3, ip, r3                      @ == mm ?
-       movne   pc, lr                          @ no, we dont do anything
+       retne   lr                              @ no, we dont do anything
        mov     r3, #0
        mcr     p15, 0, r3, c7, c10, 4          @ drain WB
        vma_vm_flags r2, r2
@@ -44,7 +45,7 @@ ENTRY(v4wbi_flush_user_tlb_range)
        add     r0, r0, #PAGE_SZ
        cmp     r0, r1
        blo     1b
-       mov     pc, lr
+       ret     lr
 
 ENTRY(v4wbi_flush_kern_tlb_range)
        mov     r3, #0
@@ -56,7 +57,7 @@ ENTRY(v4wbi_flush_kern_tlb_range)
        add     r0, r0, #PAGE_SZ
        cmp     r0, r1
        blo     1b
-       mov     pc, lr
+       ret     lr
 
        __INITDATA
 
diff --git a/arch/arm/mm/tlb-v6.S b/arch/arm/mm/tlb-v6.S
index eca07f550a0b..6f689be638bd 100644
--- a/arch/arm/mm/tlb-v6.S
+++ b/arch/arm/mm/tlb-v6.S
@@ -13,6 +13,7 @@
 #include <linux/init.h>
 #include <linux/linkage.h>
 #include <asm/asm-offsets.h>
+#include <asm/assembler.h>
 #include <asm/page.h>
 #include <asm/tlbflush.h>
 #include "proc-macros.S"
@@ -55,7 +56,7 @@ ENTRY(v6wbi_flush_user_tlb_range)
        cmp     r0, r1
        blo     1b
        mcr     p15, 0, ip, c7, c10, 4          @ data synchronization barrier
-       mov     pc, lr
+       ret     lr
 
 /*
  *     v6wbi_flush_kern_tlb_range(start,end)
@@ -84,7 +85,7 @@ ENTRY(v6wbi_flush_kern_tlb_range)
        blo     1b
        mcr     p15, 0, r2, c7, c10, 4          @ data synchronization barrier
        mcr     p15, 0, r2, c7, c5, 4           @ prefetch flush (isb)
-       mov     pc, lr
+       ret     lr
 
        __INIT
 
diff --git a/arch/arm/mm/tlb-v7.S b/arch/arm/mm/tlb-v7.S
index 355308767bae..e5101a3bc57c 100644
--- a/arch/arm/mm/tlb-v7.S
+++ b/arch/arm/mm/tlb-v7.S
@@ -57,7 +57,7 @@ ENTRY(v7wbi_flush_user_tlb_range)
        cmp     r0, r1
        blo     1b
        dsb     ish
-       mov     pc, lr
+       ret     lr
 ENDPROC(v7wbi_flush_user_tlb_range)
 
 /*
@@ -86,7 +86,7 @@ ENTRY(v7wbi_flush_kern_tlb_range)
        blo     1b
        dsb     ish
        isb
-       mov     pc, lr
+       ret     lr
 ENDPROC(v7wbi_flush_kern_tlb_range)
 
        __INIT
diff --git a/arch/arm/nwfpe/entry.S b/arch/arm/nwfpe/entry.S
index d18dde95b8aa..5d65be1f1e8a 100644
--- a/arch/arm/nwfpe/entry.S
+++ b/arch/arm/nwfpe/entry.S
@@ -19,7 +19,7 @@
     along with this program; if not, write to the Free Software
     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */
-
+#include <asm/assembler.h>
 #include <asm/opcodes.h>
 
 /* This is the kernel's entry point into the floating point emulator.
@@ -92,7 +92,7 @@ emulate:
        mov     r0, r6                  @ prepare for EmulateAll()
        bl      EmulateAll              @ emulate the instruction
        cmp     r0, #0                  @ was emulation successful
-       moveq   pc, r4                  @ no, return failure
+       reteq   r4                      @ no, return failure
 
 next:
 .Lx1:  ldrt    r6, [r5], #4            @ get the next instruction and
@@ -102,7 +102,7 @@ next:
        teq     r2, #0x0C000000
        teqne   r2, #0x0D000000
        teqne   r2, #0x0E000000
-       movne   pc, r9                  @ return ok if not a fp insn
+       retne   r9                      @ return ok if not a fp insn
 
        str     r5, [sp, #S_PC]         @ update PC copy in regs
 
@@ -115,7 +115,7 @@ next:
        @ plain LDR instruction.  Weird, but it seems harmless.
        .pushsection .fixup,"ax"
        .align  2
-.Lfix: mov     pc, r9                  @ let the user eat segfaults
+.Lfix: ret     r9                      @ let the user eat segfaults
        .popsection
 
        .pushsection __ex_table,"a"
diff --git a/arch/arm/vfp/entry.S b/arch/arm/vfp/entry.S
index fe6ca574d093..2e78760f3495 100644
--- a/arch/arm/vfp/entry.S
+++ b/arch/arm/vfp/entry.S
@@ -34,7 +34,7 @@ ENDPROC(do_vfp)
 
 ENTRY(vfp_null_entry)
        dec_preempt_count_ti r10, r4
-       mov     pc, lr
+       ret     lr
 ENDPROC(vfp_null_entry)
 
        .align  2
@@ -49,7 +49,7 @@ ENTRY(vfp_testing_entry)
        dec_preempt_count_ti r10, r4
        ldr     r0, VFP_arch_address
        str     r0, [r0]                @ set to non-zero value
-       mov     pc, r9                  @ we have handled the fault
+       ret     r9                      @ we have handled the fault
 ENDPROC(vfp_testing_entry)
 
        .align  2
diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S
index be807625ed8c..cda654cbf2c2 100644
--- a/arch/arm/vfp/vfphw.S
+++ b/arch/arm/vfp/vfphw.S
@@ -183,7 +183,7 @@ vfp_hw_state_valid:
                                        @ always subtract 4 from the following
                                        @ instruction address.
        dec_preempt_count_ti r10, r4
-       mov     pc, r9                  @ we think we have handled things
+       ret     r9                      @ we think we have handled things
 
 
 look_for_VFP_exceptions:
@@ -202,7 +202,7 @@ look_for_VFP_exceptions:
 
        DBGSTR  "not VFP"
        dec_preempt_count_ti r10, r4
-       mov     pc, lr
+       ret     lr
 
 process_exception:
        DBGSTR  "bounce"
@@ -234,7 +234,7 @@ ENTRY(vfp_save_state)
        VFPFMRX r12, FPINST2            @ FPINST2 if needed (and present)
 1:
        stmia   r0, {r1, r2, r3, r12}   @ save FPEXC, FPSCR, FPINST, FPINST2
-       mov     pc, lr
+       ret     lr
 ENDPROC(vfp_save_state)
 
        .align
@@ -245,7 +245,7 @@ vfp_current_hw_state_address:
 #ifdef CONFIG_THUMB2_KERNEL
        adr     \tmp, 1f
        add     \tmp, \tmp, \base, lsl \shift
-       mov     pc, \tmp
+       ret     \tmp
 #else
        add     pc, pc, \base, lsl \shift
        mov     r0, r0
@@ -257,10 +257,10 @@ ENTRY(vfp_get_float)
        tbl_branch r0, r3, #3
        .irp    dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
 1:     mrc     p10, 0, r0, c\dr, c0, 0 @ fmrs  r0, s0
-       mov     pc, lr
+       ret     lr
        .org    1b + 8
 1:     mrc     p10, 0, r0, c\dr, c0, 4 @ fmrs  r0, s1
-       mov     pc, lr
+       ret     lr
        .org    1b + 8
        .endr
 ENDPROC(vfp_get_float)
@@ -269,10 +269,10 @@ ENTRY(vfp_put_float)
        tbl_branch r1, r3, #3
        .irp    dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
 1:     mcr     p10, 0, r0, c\dr, c0, 0 @ fmsr  r0, s0
-       mov     pc, lr
+       ret     lr
        .org    1b + 8
 1:     mcr     p10, 0, r0, c\dr, c0, 4 @ fmsr  r0, s1
-       mov     pc, lr
+       ret     lr
        .org    1b + 8
        .endr
 ENDPROC(vfp_put_float)
@@ -281,14 +281,14 @@ ENTRY(vfp_get_double)
        tbl_branch r0, r3, #3
        .irp    dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
 1:     fmrrd   r0, r1, d\dr
-       mov     pc, lr
+       ret     lr
        .org    1b + 8
        .endr
 #ifdef CONFIG_VFPv3
        @ d16 - d31 registers
        .irp    dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
 1:     mrrc    p11, 3, r0, r1, c\dr    @ fmrrd r0, r1, d\dr
-       mov     pc, lr
+       ret     lr
        .org    1b + 8
        .endr
 #endif
@@ -296,21 +296,21 @@ ENTRY(vfp_get_double)
        @ virtual register 16 (or 32 if VFPv3) for compare with zero
        mov     r0, #0
        mov     r1, #0
-       mov     pc, lr
+       ret     lr
 ENDPROC(vfp_get_double)
 
 ENTRY(vfp_put_double)
        tbl_branch r2, r3, #3
        .irp    dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
 1:     fmdrr   d\dr, r0, r1
-       mov     pc, lr
+       ret     lr
        .org    1b + 8
        .endr
 #ifdef CONFIG_VFPv3
        @ d16 - d31 registers
        .irp    dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
 1:     mcrr    p11, 3, r0, r1, c\dr    @ fmdrr r0, r1, d\dr
-       mov     pc, lr
+       ret     lr
        .org    1b + 8
        .endr
 #endif
diff --git a/arch/arm/xen/hypercall.S b/arch/arm/xen/hypercall.S
index 44e3a5f10c4c..f00e08075938 100644
--- a/arch/arm/xen/hypercall.S
+++ b/arch/arm/xen/hypercall.S
@@ -58,7 +58,7 @@
 ENTRY(HYPERVISOR_##hypercall)                  \
        mov r12, #__HYPERVISOR_##hypercall;     \
        __HVC(XEN_IMM);                                         \
-       mov pc, lr;                                                     \
+       ret lr;                                 \
 ENDPROC(HYPERVISOR_##hypercall)
 
 #define HYPERCALL0 HYPERCALL_SIMPLE
@@ -74,7 +74,7 @@ ENTRY(HYPERVISOR_##hypercall)                 \
        mov r12, #__HYPERVISOR_##hypercall;     \
        __HVC(XEN_IMM);                                         \
        ldm sp!, {r4}                                           \
-       mov pc, lr                                                      \
+       ret lr                                  \
 ENDPROC(HYPERVISOR_##hypercall)
 
                 .text
@@ -101,5 +101,5 @@ ENTRY(privcmd_call)
        ldr r4, [sp, #4]
        __HVC(XEN_IMM)
        ldm sp!, {r4}
-       mov pc, lr
+       ret lr
 ENDPROC(privcmd_call);
-- 
1.8.3.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.