Redo THUMB support.

Discussed with and tested by: Saleem Abdulrasool

llvm-svn: 213481
This commit is contained in:
Joerg Sonnenberger 2014-07-20 20:53:37 +00:00
parent 09f45ca39b
commit 9720fcf4bf
3 changed files with 76 additions and 7 deletions

View File

@ -17,6 +17,18 @@
.syntax unified
.text
#if __ARM_ARCH_ISA_THUMB == 2
.thumb
#endif
#if __ARM_ARCH_ISA_THUMB == 2
#define IT(cond) it cond
#define ITT(cond) itt cond
#else
#define IT(cond)
#define ITT(cond)
#endif
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__udivmodsi4)
#if __ARM_ARCH_EXT_IDIV__
@ -42,6 +54,7 @@ DEFINE_COMPILERRT_FUNCTION(__udivmodsi4)
* r0 and (r1 << I) have the highest bit set in the same position.
* At the time of JMP, ip := .Ldiv0block - 12 * I.
* This depends on the fixed instruction size of block.
* For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes.
*
* block(shift) implements the test-and-update-quotient core.
* It assumes (r0 << shift) can be computed without overflow and
@ -53,12 +66,20 @@ DEFINE_COMPILERRT_FUNCTION(__udivmodsi4)
clz r3, r1
/* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */
sub r3, r3, ip
# if __ARM_ARCH_ISA_THUMB == 2
adr ip, LOCAL_LABEL(div0block) + 1
sub ip, ip, r3, lsl #1
# else
adr ip, LOCAL_LABEL(div0block)
# endif
sub ip, ip, r3, lsl #2
sub ip, ip, r3, lsl #3
mov r3, #0
bx ip
# else
# if __ARM_ARCH_ISA_THUMB == 2
# error THUMB mode requires CLZ or UDIV
# endif
str r4, [sp, #-8]!
mov r4, r0
@ -98,8 +119,9 @@ DEFINE_COMPILERRT_FUNCTION(__udivmodsi4)
#define block(shift) \
cmp r0, r1, lsl IMM shift; \
addhs r3, r3, IMM (1 << shift); \
subhs r0, r0, r1, lsl IMM shift
ITT(hs); \
addhs.w r3, r3, IMM (1 << shift); \
subhs.w r0, r0, r1, lsl IMM shift
block(31)
block(30)

View File

@ -17,6 +17,18 @@
.syntax unified
.text
#if __ARM_ARCH_ISA_THUMB == 2
.thumb
#endif
#if __ARM_ARCH_ISA_THUMB == 2
#define IT(cond) it cond
#define ITT(cond) itt cond
#else
#define IT(cond)
#define ITT(cond)
#endif
.p2align 2
DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_uidiv, __udivsi3)
DEFINE_COMPILERRT_FUNCTION(__udivsi3)
@ -30,8 +42,10 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3)
#else
cmp r1, #1
bcc LOCAL_LABEL(divby0)
IT(eq)
JMPc(lr, eq)
cmp r0, r1
ITT(cc)
movcc r0, #0
JMPc(lr, cc)
/*
@ -43,6 +57,7 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3)
* r0 and (r1 << I) have the highest bit set in the same position.
* At the time of JMP, ip := .Ldiv0block - 12 * I.
* This depends on the fixed instruction size of block.
* For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes.
*
* block(shift) implements the test-and-update-quotient core.
* It assumes (r0 << shift) can be computed without overflow and
@ -54,12 +69,20 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3)
clz r3, r1
/* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */
sub r3, r3, ip
# if __ARM_ARCH_ISA_THUMB == 2
adr ip, LOCAL_LABEL(div0block) + 1
sub ip, ip, r3, lsl #1
# else
adr ip, LOCAL_LABEL(div0block)
# endif
sub ip, ip, r3, lsl #2
sub ip, ip, r3, lsl #3
mov r3, #0
bx ip
# else
# if __ARM_ARCH_ISA_THUMB == 2
# error THUMB mode requires CLZ or UDIV
# endif
mov r2, r0
adr ip, LOCAL_LABEL(div0block)
@ -96,8 +119,9 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3)
#define block(shift) \
cmp r0, r1, lsl IMM shift; \
addhs r3, r3, IMM (1 << shift); \
subhs r0, r0, r1, lsl IMM shift
ITT(hs); \
addhs.w r3, r3, IMM (1 << shift); \
subhs.w r0, r0, r1, lsl IMM shift
block(31)
block(30)

View File

@ -16,6 +16,17 @@
.syntax unified
.text
#if __ARM_ARCH_ISA_THUMB == 2
.thumb
#endif
#if __ARM_ARCH_ISA_THUMB == 2
#define IT(cond) it cond
#define ITT(cond) itt cond
#else
#define IT(cond)
#define ITT(cond)
#endif
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__umodsi3)
@ -30,9 +41,11 @@ DEFINE_COMPILERRT_FUNCTION(__umodsi3)
#else
cmp r1, #1
bcc LOCAL_LABEL(divby0)
ITT(eq)
moveq r0, #0
JMPc(lr, eq)
cmp r0, r1
IT(cc)
JMPc(lr, cc)
/*
* Implement division using binary long division algorithm.
@ -43,6 +56,7 @@ DEFINE_COMPILERRT_FUNCTION(__umodsi3)
* r0 and (r1 << I) have the highest bit set in the same position.
* At the time of JMP, ip := .Ldiv0block - 8 * I.
* This depends on the fixed instruction size of block.
* For ARM mode, this is 8 Bytes, for THUMB mode 10 Bytes.
*
* block(shift) implements the test-and-update-quotient core.
* It assumes (r0 << shift) can be computed without overflow and
@ -54,10 +68,18 @@ DEFINE_COMPILERRT_FUNCTION(__umodsi3)
clz r3, r1
/* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */
sub r3, r3, ip
# if __ARM_ARCH_ISA_THUMB == 2
adr ip, LOCAL_LABEL(div0block) + 1
sub ip, ip, r3, lsl #1
# else
adr ip, LOCAL_LABEL(div0block)
# endif
sub ip, ip, r3, lsl #3
bx ip
# else
# if __ARM_ARCH_ISA_THUMB == 2
# error THUMB mode requires CLZ or UDIV
# endif
mov r2, r0
adr ip, LOCAL_LABEL(div0block)
@ -90,9 +112,10 @@ DEFINE_COMPILERRT_FUNCTION(__umodsi3)
#define IMM #
#define block(shift) \
cmp r0, r1, lsl IMM shift; \
subhs r0, r0, r1, lsl IMM shift
#define block(shift) \
cmp r0, r1, lsl IMM shift; \
IT(hs); \
subhs.w r0, r0, r1, lsl IMM shift
block(31)
block(30)