forked from OSchip/llvm-project
262 lines
5.8 KiB
ArmAsm
262 lines
5.8 KiB
ArmAsm
//===-- udivsi3.S - 32-bit unsigned integer divide ------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file implements the __udivsi3 (32-bit unsigned integer divide)
|
|
// function for the ARM 32-bit architecture.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "../assembly.h"
|
|
|
|
.syntax unified
|
|
.text
|
|
|
|
DEFINE_CODE_STATE
|
|
|
|
.p2align 2
|
|
DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_uidiv, __udivsi3)
|
|
|
|
@ unsigned int __udivsi3(unsigned int divident, unsigned int divisor)
|
|
@ Calculate and return the quotient of the (unsigned) division.
|
|
|
|
DEFINE_COMPILERRT_FUNCTION(__udivsi3)
|
|
#if __ARM_ARCH_EXT_IDIV__
|
|
tst r1, r1
|
|
beq LOCAL_LABEL(divby0)
|
|
udiv r0, r0, r1
|
|
bx lr
|
|
|
|
LOCAL_LABEL(divby0):
|
|
mov r0, #0
|
|
# ifdef __ARM_EABI__
|
|
b __aeabi_idiv0
|
|
# else
|
|
JMP(lr)
|
|
# endif
|
|
|
|
#else // ! __ARM_ARCH_EXT_IDIV__
|
|
cmp r1, #1
|
|
bcc LOCAL_LABEL(divby0)
|
|
#if defined(USE_THUMB_1)
|
|
bne LOCAL_LABEL(num_neq_denom)
|
|
JMP(lr)
|
|
LOCAL_LABEL(num_neq_denom):
|
|
#else
|
|
IT(eq)
|
|
JMPc(lr, eq)
|
|
#endif
|
|
cmp r0, r1
|
|
#if defined(USE_THUMB_1)
|
|
bhs LOCAL_LABEL(num_ge_denom)
|
|
movs r0, #0
|
|
JMP(lr)
|
|
LOCAL_LABEL(num_ge_denom):
|
|
#else
|
|
ITT(cc)
|
|
movcc r0, #0
|
|
JMPc(lr, cc)
|
|
#endif
|
|
|
|
// Implement division using binary long division algorithm.
|
|
//
|
|
// r0 is the numerator, r1 the denominator.
|
|
//
|
|
// The code before JMP computes the correct shift I, so that
|
|
// r0 and (r1 << I) have the highest bit set in the same position.
|
|
// At the time of JMP, ip := .Ldiv0block - 12 * I.
|
|
// This depends on the fixed instruction size of block.
|
|
// For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes.
|
|
//
|
|
// block(shift) implements the test-and-update-quotient core.
|
|
// It assumes (r0 << shift) can be computed without overflow and
|
|
// that (r0 << shift) < 2 * r1. The quotient is stored in r3.
|
|
|
|
# if defined(__ARM_FEATURE_CLZ)
|
|
clz ip, r0
|
|
clz r3, r1
|
|
// r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3.
|
|
sub r3, r3, ip
|
|
# if defined(USE_THUMB_2)
|
|
adr ip, LOCAL_LABEL(div0block) + 1
|
|
sub ip, ip, r3, lsl #1
|
|
# else
|
|
adr ip, LOCAL_LABEL(div0block)
|
|
# endif
|
|
sub ip, ip, r3, lsl #2
|
|
sub ip, ip, r3, lsl #3
|
|
mov r3, #0
|
|
bx ip
|
|
# else // No CLZ Feature
|
|
# if defined(USE_THUMB_2)
|
|
# error THUMB mode requires CLZ or UDIV
|
|
# endif
|
|
# if defined(USE_THUMB_1)
|
|
# define BLOCK_SIZE 10
|
|
# else
|
|
# define BLOCK_SIZE 12
|
|
# endif
|
|
|
|
mov r2, r0
|
|
# if defined(USE_THUMB_1)
|
|
mov ip, r0
|
|
adr r0, LOCAL_LABEL(div0block)
|
|
adds r0, #1
|
|
# else
|
|
adr ip, LOCAL_LABEL(div0block)
|
|
# endif
|
|
lsrs r3, r2, #16
|
|
cmp r3, r1
|
|
# if defined(USE_THUMB_1)
|
|
blo LOCAL_LABEL(skip_16)
|
|
movs r2, r3
|
|
subs r0, r0, #(16 * BLOCK_SIZE)
|
|
LOCAL_LABEL(skip_16):
|
|
# else
|
|
movhs r2, r3
|
|
subhs ip, ip, #(16 * BLOCK_SIZE)
|
|
# endif
|
|
|
|
lsrs r3, r2, #8
|
|
cmp r3, r1
|
|
# if defined(USE_THUMB_1)
|
|
blo LOCAL_LABEL(skip_8)
|
|
movs r2, r3
|
|
subs r0, r0, #(8 * BLOCK_SIZE)
|
|
LOCAL_LABEL(skip_8):
|
|
# else
|
|
movhs r2, r3
|
|
subhs ip, ip, #(8 * BLOCK_SIZE)
|
|
# endif
|
|
|
|
lsrs r3, r2, #4
|
|
cmp r3, r1
|
|
# if defined(USE_THUMB_1)
|
|
blo LOCAL_LABEL(skip_4)
|
|
movs r2, r3
|
|
subs r0, r0, #(4 * BLOCK_SIZE)
|
|
LOCAL_LABEL(skip_4):
|
|
# else
|
|
movhs r2, r3
|
|
subhs ip, #(4 * BLOCK_SIZE)
|
|
# endif
|
|
|
|
lsrs r3, r2, #2
|
|
cmp r3, r1
|
|
# if defined(USE_THUMB_1)
|
|
blo LOCAL_LABEL(skip_2)
|
|
movs r2, r3
|
|
subs r0, r0, #(2 * BLOCK_SIZE)
|
|
LOCAL_LABEL(skip_2):
|
|
# else
|
|
movhs r2, r3
|
|
subhs ip, ip, #(2 * BLOCK_SIZE)
|
|
# endif
|
|
|
|
// Last block, no need to update r2 or r3.
|
|
# if defined(USE_THUMB_1)
|
|
lsrs r3, r2, #1
|
|
cmp r3, r1
|
|
blo LOCAL_LABEL(skip_1)
|
|
subs r0, r0, #(1 * BLOCK_SIZE)
|
|
LOCAL_LABEL(skip_1):
|
|
movs r2, r0
|
|
mov r0, ip
|
|
movs r3, #0
|
|
JMP (r2)
|
|
|
|
# else
|
|
cmp r1, r2, lsr #1
|
|
subls ip, ip, #(1 * BLOCK_SIZE)
|
|
|
|
movs r3, #0
|
|
|
|
JMP(ip)
|
|
# endif
|
|
# endif // __ARM_FEATURE_CLZ
|
|
|
|
|
|
#define IMM #
|
|
// due to the range limit of branch in Thumb1, we have to place the
|
|
// block closer
|
|
LOCAL_LABEL(divby0):
|
|
movs r0, #0
|
|
# if defined(__ARM_EABI__)
|
|
push {r7, lr}
|
|
bl __aeabi_idiv0 // due to relocation limit, can't use b.
|
|
pop {r7, pc}
|
|
# else
|
|
JMP(lr)
|
|
# endif
|
|
|
|
|
|
#if defined(USE_THUMB_1)
|
|
#define block(shift) \
|
|
lsls r2, r1, IMM shift; \
|
|
cmp r0, r2; \
|
|
blo LOCAL_LABEL(block_skip_##shift); \
|
|
subs r0, r0, r2; \
|
|
LOCAL_LABEL(block_skip_##shift) :; \
|
|
adcs r3, r3 // same as ((r3 << 1) | Carry). Carry is set if r0 >= r2.
|
|
|
|
// TODO: if current location counter is not not word aligned, we don't
|
|
// need the .p2align and nop
|
|
// Label div0block must be word-aligned. First align block 31
|
|
.p2align 2
|
|
nop // Padding to align div0block as 31 blocks = 310 bytes
|
|
|
|
#else
|
|
#define block(shift) \
|
|
cmp r0, r1, lsl IMM shift; \
|
|
ITT(hs); \
|
|
WIDE(addhs) r3, r3, IMM (1 << shift); \
|
|
WIDE(subhs) r0, r0, r1, lsl IMM shift
|
|
#endif
|
|
|
|
block(31)
|
|
block(30)
|
|
block(29)
|
|
block(28)
|
|
block(27)
|
|
block(26)
|
|
block(25)
|
|
block(24)
|
|
block(23)
|
|
block(22)
|
|
block(21)
|
|
block(20)
|
|
block(19)
|
|
block(18)
|
|
block(17)
|
|
block(16)
|
|
block(15)
|
|
block(14)
|
|
block(13)
|
|
block(12)
|
|
block(11)
|
|
block(10)
|
|
block(9)
|
|
block(8)
|
|
block(7)
|
|
block(6)
|
|
block(5)
|
|
block(4)
|
|
block(3)
|
|
block(2)
|
|
block(1)
|
|
LOCAL_LABEL(div0block):
|
|
block(0)
|
|
|
|
mov r0, r3
|
|
JMP(lr)
|
|
#endif // __ARM_ARCH_EXT_IDIV__
|
|
|
|
END_COMPILERRT_FUNCTION(__udivsi3)
|
|
|
|
NO_EXEC_STACK_DIRECTIVE
|
|
|