forked from OSchip/llvm-project
162 lines
3.0 KiB
ArmAsm
162 lines
3.0 KiB
ArmAsm
|
/*===-- udivmodsi4.S - 32-bit unsigned integer divide and modulus ---------===//
|
||
|
*
|
||
|
* The LLVM Compiler Infrastructure
|
||
|
*
|
||
|
* This file is dual licensed under the MIT and the University of Illinois Open
|
||
|
* Source Licenses. See LICENSE.TXT for details.
|
||
|
*
|
||
|
*===----------------------------------------------------------------------===//
|
||
|
*
|
||
|
* This file implements the __udivmodsi4 (32-bit unsigned integer divide and
|
||
|
* modulus) function for the ARM 32-bit architecture.
|
||
|
*
|
||
|
*===----------------------------------------------------------------------===*/
|
||
|
|
||
|
#include "../assembly.h"
|
||
|
|
||
|
.syntax unified
|
||
|
|
||
|
.text
|
||
|
.p2align 2
|
||
|
DEFINE_COMPILERRT_FUNCTION(__udivmodsi4)
|
||
|
#if __ARM_ARCH_EXT_IDIV__
|
||
|
tst r1, r1
|
||
|
beq LOCAL_LABEL(divby0)
|
||
|
mov r3, r0
|
||
|
udiv r0, r3, r1
|
||
|
mls r1, r0, r1, r3
|
||
|
str r1, [r2]
|
||
|
bx lr
|
||
|
#else
|
||
|
cmp r1, #1
|
||
|
bcc LOCAL_LABEL(divby0)
|
||
|
beq LOCAL_LABEL(divby1)
|
||
|
cmp r0, r1
|
||
|
bcc LOCAL_LABEL(quotient0)
|
||
|
/*
|
||
|
* Implement division using binary long division algorithm.
|
||
|
*
|
||
|
* r0 is the numerator, r1 the denominator.
|
||
|
*
|
||
|
* The code before JMP computes the correct shift I, so that
|
||
|
* r0 and (r1 << I) have the highest bit set in the same position.
|
||
|
* At the time of JMP, ip := .Ldiv0block - 12 * I.
|
||
|
* This depends on the fixed instruction size of block.
|
||
|
*
|
||
|
* block(shift) implements the test-and-update-quotient core.
|
||
|
* It assumes (r0 << shift) can be computed without overflow and
|
||
|
* that (r0 << shift) < 2 * r1. The quotient is stored in r3.
|
||
|
*/
|
||
|
|
||
|
# ifdef __ARM_FEATURE_CLZ
|
||
|
clz ip, r0
|
||
|
clz r3, r1
|
||
|
/* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */
|
||
|
sub r3, r3, ip
|
||
|
adr ip, LOCAL_LABEL(div0block)
|
||
|
sub ip, ip, r3, lsl #2
|
||
|
sub ip, ip, r3, lsl #3
|
||
|
mov r3, #0
|
||
|
bx ip
|
||
|
# else
|
||
|
str r4, [sp, #-8]!
|
||
|
|
||
|
mov r4, r0
|
||
|
adr ip, LOCAL_LABEL(div0block)
|
||
|
|
||
|
lsr r3, r4, #16
|
||
|
cmp r3, r1
|
||
|
movhs r4, r3
|
||
|
subhs ip, ip, #(16 * 12)
|
||
|
|
||
|
lsr r3, r4, #8
|
||
|
cmp r3, r1
|
||
|
movhs r4, r3
|
||
|
subhs ip, ip, #(8 * 12)
|
||
|
|
||
|
lsr r3, r4, #4
|
||
|
cmp r3, r1
|
||
|
movhs r4, r3
|
||
|
subhs ip, #(4 * 12)
|
||
|
|
||
|
lsr r3, r4, #2
|
||
|
cmp r3, r1
|
||
|
movhs r4, r3
|
||
|
subhs ip, ip, #(2 * 12)
|
||
|
|
||
|
/* Last block, no need to update r3 or r4. */
|
||
|
cmp r1, r4, lsr #1
|
||
|
subls ip, ip, #(1 * 12)
|
||
|
|
||
|
ldr r4, [sp], #8 /* restore r4, we are done with it. */
|
||
|
mov r3, #0
|
||
|
|
||
|
JMP(ip)
|
||
|
# endif
|
||
|
|
||
|
#define IMM #
|
||
|
|
||
|
#define block(shift) \
|
||
|
cmp r0, r1, lsl IMM shift; \
|
||
|
addhs r3, r3, IMM (1 << shift); \
|
||
|
subhs r0, r0, r1, lsl IMM shift
|
||
|
|
||
|
block(31)
|
||
|
block(30)
|
||
|
block(29)
|
||
|
block(28)
|
||
|
block(27)
|
||
|
block(26)
|
||
|
block(25)
|
||
|
block(24)
|
||
|
block(23)
|
||
|
block(22)
|
||
|
block(21)
|
||
|
block(20)
|
||
|
block(19)
|
||
|
block(18)
|
||
|
block(17)
|
||
|
block(16)
|
||
|
block(15)
|
||
|
block(14)
|
||
|
block(13)
|
||
|
block(12)
|
||
|
block(11)
|
||
|
block(10)
|
||
|
block(9)
|
||
|
block(8)
|
||
|
block(7)
|
||
|
block(6)
|
||
|
block(5)
|
||
|
block(4)
|
||
|
block(3)
|
||
|
block(2)
|
||
|
block(1)
|
||
|
LOCAL_LABEL(div0block):
|
||
|
block(0)
|
||
|
|
||
|
str r0, [r2]
|
||
|
mov r0, r3
|
||
|
JMP(lr)
|
||
|
|
||
|
LOCAL_LABEL(quotient0):
|
||
|
str r0, [r2]
|
||
|
mov r0, #0
|
||
|
JMP(lr)
|
||
|
|
||
|
LOCAL_LABEL(divby1):
|
||
|
mov r3, #0
|
||
|
str r3, [r2]
|
||
|
JMP(lr)
|
||
|
#endif /* __ARM_ARCH_EXT_IDIV__ */
|
||
|
|
||
|
LOCAL_LABEL(divby0):
|
||
|
mov r0, #0
|
||
|
#ifdef __ARM_EABI__
|
||
|
b __aeabi_idiv0
|
||
|
#else
|
||
|
JMP(lr)
|
||
|
#endif
|
||
|
|
||
|
END_COMPILERRT_FUNCTION(__udivmodsi4)
|