[1/2] Add Thumb2 support to ARM assembler memcpy

Submitted by Andre McCurdy on Sept. 13, 2019, 6:44 p.m.

Details

Message ID 20190913184432.29753-1-armccurdy@gmail.com
State New
Series "Series without cover letter"
Headers show

Commit Message

Andre McCurdy Sept. 13, 2019, 6:44 p.m.
For Thumb2 compatibility, replace two instances of a single
instruction "orr with a variable shift" with the two instruction
equivalent. Neither of the replacements are in a performance critical
loop.
---
 src/string/arm/memcpy.c    |  2 +-
 src/string/arm/memcpy_le.S | 17 ++++++++++-------
 2 files changed, 11 insertions(+), 8 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/string/arm/memcpy.c b/src/string/arm/memcpy.c
index f703c9bd..041614f4 100644
--- a/src/string/arm/memcpy.c
+++ b/src/string/arm/memcpy.c
@@ -1,3 +1,3 @@ 
-#if __ARMEB__ || __thumb__
+#if __ARMEB__
 #include "../memcpy.c"
 #endif
diff --git a/src/string/arm/memcpy_le.S b/src/string/arm/memcpy_le.S
index 9cfbcb2a..64bc5f9e 100644
--- a/src/string/arm/memcpy_le.S
+++ b/src/string/arm/memcpy_le.S
@@ -1,4 +1,4 @@ 
-#if !__ARMEB__ && !__thumb__
+#if !__ARMEB__
 
 /*
  * Copyright (C) 2008 The Android Open Source Project
@@ -40,8 +40,9 @@ 
  * This file has been modified from the original for use in musl libc.
  * The main changes are: addition of .type memcpy,%function to make the
  * code safely callable from thumb mode, adjusting the return
- * instructions to be compatible with pre-thumb ARM cpus, and removal
- * of prefetch code that is not compatible with older cpus.
+ * instructions to be compatible with pre-thumb ARM cpus, removal of
+ * prefetch code that is not compatible with older cpus and support for
+ * building as thumb 2.
  */
 
 .syntax unified
@@ -241,8 +242,9 @@  non_congruent:
 	beq     2f
 	ldr     r5, [r1], #4
 	sub     r2, r2, #4
-	orr     r4, r3, r5,             lsl lr
-	mov     r3, r5,                 lsr r12
+	mov     r4, r5, lsl lr
+	orr     r4, r4, r3
+	mov     r3, r5, lsr r12
 	str     r4, [r0], #4
 	cmp     r2, #4
 	bhs     1b
@@ -348,8 +350,9 @@  less_than_thirtytwo:
 
 1:      ldr     r5, [r1], #4
 	sub     r2, r2, #4
-	orr     r4, r3, r5,             lsl lr
-	mov     r3,     r5,                     lsr r12
+	mov     r4, r5, lsl lr
+	orr     r4, r4, r3
+	mov     r3, r5, lsr r12
 	str     r4, [r0], #4
 	cmp     r2, #4
 	bhs     1b