forked from OSchip/llvm-project
Carefully written implementations of the 32-bit integer divide and modulus functions for ARM. These are still using a naive digit-by-digit algorithm, but the core loop has been carefully written.
llvm-svn: 127882
This commit is contained in:
parent
96a4bddefb
commit
5abb5c14c4
|
@ -0,0 +1,47 @@
|
|||
/*===-- divmodsi4.S - 32-bit signed integer divide and modulus ------------===//
|
||||
*
|
||||
* The LLVM Compiler Infrastructure
|
||||
*
|
||||
* This file is dual licensed under the MIT and the University of Illinois Open
|
||||
* Source Licenses. See LICENSE.TXT for details.
|
||||
*
|
||||
*===----------------------------------------------------------------------===//
|
||||
*
|
||||
* This file implements the __divmodsi4 (32-bit signed integer divide and
|
||||
* modulus) function for the ARM architecture. A naive digit-by-digit
|
||||
* computation is employed for simplicity.
|
||||
*
|
||||
*===----------------------------------------------------------------------===*/
|
||||
|
||||
#include "../assembly.h"
|
||||
|
||||
#define ESTABLISH_FRAME \
|
||||
push {r4-r7, lr} ;\
|
||||
add r7, sp, #12
|
||||
#define CLEAR_FRAME_AND_RETURN \
|
||||
pop {r4-r7, pc}
|
||||
|
||||
.syntax unified
|
||||
.align 3
|
||||
DEFINE_COMPILERRT_FUNCTION(__divmodsi4)
|
||||
ESTABLISH_FRAME
|
||||
// Set aside the sign of the quotient and modulus, and the address for the
|
||||
// modulus.
|
||||
eor r4, r0, r1
|
||||
mov r5, r0
|
||||
mov r6, r2
|
||||
// Take the absolute value of a and b via abs(x) = (x^(x >> 31)) - (x >> 31).
|
||||
eor ip, r0, r0, asr #31
|
||||
eor lr, r1, r1, asr #31
|
||||
sub r0, ip, r0, asr #31
|
||||
sub r1, lr, r1, asr #31
|
||||
// Unsigned divmod:
|
||||
bl ___udivmodsi4
|
||||
// Apply the sign of quotient and modulus
|
||||
ldr r1, [r6]
|
||||
eor r0, r0, r4, asr #31
|
||||
sub r0, r0, r4, asr #31
|
||||
eor r1, r1, r5, asr #31
|
||||
sub r1, r1, r5, asr #31
|
||||
str r1, [r6]
|
||||
CLEAR_FRAME_AND_RETURN
|
|
@ -0,0 +1,39 @@
|
|||
/*===-- divsi3.S - 32-bit signed integer divide ---------------------------===//
|
||||
*
|
||||
* The LLVM Compiler Infrastructure
|
||||
*
|
||||
* This file is dual licensed under the MIT and the University of Illinois Open
|
||||
* Source Licenses. See LICENSE.TXT for details.
|
||||
*
|
||||
*===----------------------------------------------------------------------===//
|
||||
*
|
||||
* This file implements the __divsi3 (32-bit signed integer divide) function
|
||||
* for the ARM architecture as a wrapper around the unsigned routine.
|
||||
*
|
||||
*===----------------------------------------------------------------------===*/
|
||||
|
||||
#include "../assembly.h"
|
||||
|
||||
#define ESTABLISH_FRAME \
|
||||
push {r4, r7, lr} ;\
|
||||
add r7, sp, #4
|
||||
#define CLEAR_FRAME_AND_RETURN \
|
||||
pop {r4, r7, pc}
|
||||
|
||||
.syntax unified
|
||||
.align 3
|
||||
DEFINE_COMPILERRT_FUNCTION(__divsi3)
|
||||
ESTABLISH_FRAME
|
||||
// Set aside the sign of the quotient.
|
||||
eor r4, r0, r1
|
||||
// Take absolute value of a and b via abs(x) = (x^(x >> 31)) - (x >> 31).
|
||||
eor r2, r0, r0, asr #31
|
||||
eor r3, r1, r1, asr #31
|
||||
sub r0, r2, r0, asr #31
|
||||
sub r1, r3, r1, asr #31
|
||||
// abs(a) / abs(b)
|
||||
bl ___udivsi3
|
||||
// Apply sign of quotient to result and return.
|
||||
eor r0, r0, r4, asr #31
|
||||
sub r0, r0, r4, asr #31
|
||||
CLEAR_FRAME_AND_RETURN
|
|
@ -1,36 +1,39 @@
|
|||
//===-------- modsi3.S - Implement modsi3 ---------------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is dual licensed under the MIT and the University of Illinois Open
|
||||
// Source Licenses. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
/*===-- modsi3.S - 32-bit signed integer modulus --------------------------===//
|
||||
*
|
||||
* The LLVM Compiler Infrastructure
|
||||
*
|
||||
* This file is dual licensed under the MIT and the University of Illinois Open
|
||||
* Source Licenses. See LICENSE.TXT for details.
|
||||
*
|
||||
*===----------------------------------------------------------------------===//
|
||||
*
|
||||
* This file implements the __modsi3 (32-bit signed integer modulus) function
|
||||
* for the ARM architecture as a wrapper around the unsigned routine.
|
||||
*
|
||||
*===----------------------------------------------------------------------===*/
|
||||
|
||||
#include "../assembly.h"
|
||||
|
||||
//
|
||||
// extern int32_t __modsi3(int32_t a, int32_t b);
|
||||
//
|
||||
// Returns the remainder when dividing two 32-bit signed integers.
|
||||
// Conceptually, the function is: { return a - (a / b) * b; }
|
||||
// But if you write that in C, llvm compiles it to a call to __modsi3...
|
||||
//
|
||||
.align 2
|
||||
#define ESTABLISH_FRAME \
|
||||
push {r4, r7, lr} ;\
|
||||
add r7, sp, #4
|
||||
#define CLEAR_FRAME_AND_RETURN \
|
||||
pop {r4, r7, pc}
|
||||
|
||||
.syntax unified
|
||||
.align 3
|
||||
DEFINE_COMPILERRT_FUNCTION(__modsi3)
|
||||
push {r4, r5, r7, lr}
|
||||
add r7, sp, #8 // set stack frame
|
||||
mov r5, r0 // save a
|
||||
mov r4, r1 // save b
|
||||
bl ___divsi3 // compute a/b
|
||||
#if __ARM_ARCH_7A__
|
||||
mls r0, r4, r0, r5 // mulitple result * b and subtract from a
|
||||
#else
|
||||
// before armv7, does not have "mls" instruction
|
||||
mul r3, r0, r4 // multiple result * b
|
||||
sub r0, r5, r3 // a - result
|
||||
#endif
|
||||
pop {r4, r5, r7, pc}
|
||||
|
||||
|
||||
|
||||
ESTABLISH_FRAME
|
||||
// Set aside the sign of the dividend.
|
||||
mov r4, r0
|
||||
// Take absolute value of a and b via abs(x) = (x^(x >> 31)) - (x >> 31).
|
||||
eor r2, r0, r0, asr #31
|
||||
eor r3, r1, r1, asr #31
|
||||
sub r0, r2, r0, asr #31
|
||||
sub r1, r3, r1, asr #31
|
||||
// abs(a) % abs(b)
|
||||
bl ___umodsi3
|
||||
// Apply sign of dividend to result and return.
|
||||
eor r0, r0, r4, asr #31
|
||||
sub r0, r0, r4, asr #31
|
||||
CLEAR_FRAME_AND_RETURN
|
||||
|
|
|
@ -0,0 +1,80 @@
|
|||
/*===-- udivmodsi4.S - 32-bit unsigned integer divide and modulus ---------===//
|
||||
*
|
||||
* The LLVM Compiler Infrastructure
|
||||
*
|
||||
* This file is dual licensed under the MIT and the University of Illinois Open
|
||||
* Source Licenses. See LICENSE.TXT for details.
|
||||
*
|
||||
*===----------------------------------------------------------------------===//
|
||||
*
|
||||
* This file implements the __udivmodsi4 (32-bit unsigned integer divide and
|
||||
* modulus) function for the ARM architecture. A naive digit-by-digit
|
||||
* computation is employed for simplicity.
|
||||
*
|
||||
*===----------------------------------------------------------------------===*/
|
||||
|
||||
#include "../assembly.h"
|
||||
|
||||
#define ESTABLISH_FRAME \
|
||||
push {r4, r7, lr} ;\
|
||||
add r7, sp, #4
|
||||
#define CLEAR_FRAME_AND_RETURN \
|
||||
pop {r4, r7, pc}
|
||||
|
||||
#define a r0
|
||||
#define b r1
|
||||
#define i r3
|
||||
#define r r4
|
||||
#define q ip
|
||||
#define one lr
|
||||
|
||||
.syntax unified
|
||||
.align 3
|
||||
DEFINE_COMPILERRT_FUNCTION(__udivmodsi4)
|
||||
// We use a simple digit by digit algorithm; before we get into the actual
|
||||
// divide loop, we must calculate the left-shift amount necessary to align
|
||||
// the MSB of the divisor with that of the dividend (If this shift is
|
||||
// negative, then the result is zero, and we early out). We also conjure a
|
||||
// bit mask of 1 to use in constructing the quotient, and initialize the
|
||||
// quotient to zero.
|
||||
ESTABLISH_FRAME
|
||||
clz r4, a
|
||||
tst b, b // detect divide-by-zero
|
||||
clz r3, b
|
||||
mov q, #0
|
||||
beq L_return // return 0 if b is zero.
|
||||
mov one, #1
|
||||
subs i, r3, r4
|
||||
blt L_return // return 0 if MSB(a) < MSB(b)
|
||||
|
||||
L_mainLoop:
|
||||
// This loop basically implements the following:
|
||||
//
|
||||
// do {
|
||||
// if (a >= b << i) {
|
||||
// a -= b << i;
|
||||
// q |= 1 << i;
|
||||
// if (a == 0) break;
|
||||
// }
|
||||
// } while (--i)
|
||||
//
|
||||
// Note that this does not perform the final iteration (i == 0); by doing it
|
||||
// this way, we can merge the two branches which is a substantial win for
|
||||
// such a tight loop on current ARM architectures.
|
||||
subs r, a, b, lsl i
|
||||
orrhs q, q,one, lsl i
|
||||
movhs a, r
|
||||
subsne i, i, #1
|
||||
bhi L_mainLoop
|
||||
|
||||
// Do the final test subtraction and update of quotient (i == 0), as it is
|
||||
// not performed in the main loop.
|
||||
subs r, a, b
|
||||
orrhs q, #1
|
||||
movhs a, r
|
||||
|
||||
L_return:
|
||||
// Store the remainder, and move the quotient to r0, then return.
|
||||
str a, [r2]
|
||||
mov r0, q
|
||||
CLEAR_FRAME_AND_RETURN
|
|
@ -0,0 +1,78 @@
|
|||
/*===-- udivsi3.S - 32-bit unsigned integer divide ------------------------===//
|
||||
*
|
||||
* The LLVM Compiler Infrastructure
|
||||
*
|
||||
* This file is dual licensed under the MIT and the University of Illinois Open
|
||||
* Source Licenses. See LICENSE.TXT for details.
|
||||
*
|
||||
*===----------------------------------------------------------------------===//
|
||||
*
|
||||
* This file implements the __udivsi3 (32-bit unsigned integer divide)
|
||||
* function for the ARM architecture. A naive digit-by-digit computation is
|
||||
* employed for simplicity.
|
||||
*
|
||||
*===----------------------------------------------------------------------===*/
|
||||
|
||||
#include "../assembly.h"
|
||||
|
||||
#define ESTABLISH_FRAME \
|
||||
push {r7, lr} ;\
|
||||
mov r7, sp
|
||||
#define CLEAR_FRAME_AND_RETURN \
|
||||
pop {r7, pc}
|
||||
|
||||
#define a r0
|
||||
#define b r1
|
||||
#define r r2
|
||||
#define i r3
|
||||
#define q ip
|
||||
#define one lr
|
||||
|
||||
.syntax unified
|
||||
.align 3
|
||||
DEFINE_COMPILERRT_FUNCTION(__udivsi3)
|
||||
// We use a simple digit by digit algorithm; before we get into the actual
|
||||
// divide loop, we must calculate the left-shift amount necessary to align
|
||||
// the MSB of the divisor with that of the dividend (If this shift is
|
||||
// negative, then the result is zero, and we early out). We also conjure a
|
||||
// bit mask of 1 to use in constructing the quotient, and initialize the
|
||||
// quotient to zero.
|
||||
ESTABLISH_FRAME
|
||||
clz r2, a
|
||||
tst b, b // detect divide-by-zero
|
||||
clz r3, b
|
||||
mov q, #0
|
||||
beq L_return // return 0 if b is zero.
|
||||
mov one, #1
|
||||
subs i, r3, r2
|
||||
blt L_return // return 0 if MSB(a) < MSB(b)
|
||||
|
||||
L_mainLoop:
|
||||
// This loop basically implements the following:
|
||||
//
|
||||
// do {
|
||||
// if (a >= b << i) {
|
||||
// a -= b << i;
|
||||
// q |= 1 << i;
|
||||
// if (a == 0) break;
|
||||
// }
|
||||
// } while (--i)
|
||||
//
|
||||
// Note that this does not perform the final iteration (i == 0); by doing it
|
||||
// this way, we can merge the two branches which is a substantial win for
|
||||
// such a tight loop on current ARM architectures.
|
||||
subs r, a, b, lsl i
|
||||
orrhs q, q,one, lsl i
|
||||
movhs a, r
|
||||
subsne i, i, #1
|
||||
bhi L_mainLoop
|
||||
|
||||
// Do the final test subtraction and update of quotient (i == 0), as it is
|
||||
// not performed in the main loop.
|
||||
subs r, a, b
|
||||
orrhs q, #1
|
||||
|
||||
L_return:
|
||||
// Move the quotient to r0 and return.
|
||||
mov r0, q
|
||||
CLEAR_FRAME_AND_RETURN
|
|
@ -0,0 +1,58 @@
|
|||
/*===-- umodsi3.S - 32-bit unsigned integer modulus -----------------------===//
|
||||
*
|
||||
* The LLVM Compiler Infrastructure
|
||||
*
|
||||
* This file is dual licensed under the MIT and the University of Illinois Open
|
||||
* Source Licenses. See LICENSE.TXT for details.
|
||||
*
|
||||
*===----------------------------------------------------------------------===//
|
||||
*
|
||||
* This file implements the __umodsi3 (32-bit unsigned integer modulus)
|
||||
* function for the ARM architecture. A naive digit-by-digit computation is
|
||||
* employed for simplicity.
|
||||
*
|
||||
*===----------------------------------------------------------------------===*/
|
||||
|
||||
#include "../assembly.h"
|
||||
|
||||
#define a r0
|
||||
#define b r1
|
||||
#define r r2
|
||||
#define i r3
|
||||
|
||||
.syntax unified
|
||||
.align 3
|
||||
DEFINE_COMPILERRT_FUNCTION(__umodsi3)
|
||||
// We use a simple digit by digit algorithm; before we get into the actual
|
||||
// divide loop, we must calculate the left-shift amount necessary to align
|
||||
// the MSB of the divisor with that of the dividend.
|
||||
clz r2, a
|
||||
tst b, b // detect b == 0
|
||||
clz r3, b
|
||||
bxeq lr // return a if b == 0
|
||||
subs i, r3, r2
|
||||
bxlt lr // return a if MSB(a) < MSB(b)
|
||||
|
||||
L_mainLoop:
|
||||
// This loop basically implements the following:
|
||||
//
|
||||
// do {
|
||||
// if (a >= b << i) {
|
||||
// a -= b << i;
|
||||
// if (a == 0) break;
|
||||
// }
|
||||
// } while (--i)
|
||||
//
|
||||
// Note that this does not perform the final iteration (i == 0); by doing it
|
||||
// this way, we can merge the two branches which is a substantial win for
|
||||
// such a tight loop on current ARM architectures.
|
||||
subs r, a, b, lsl i
|
||||
movhs a, r
|
||||
subsne i, i, #1
|
||||
bhi L_mainLoop
|
||||
|
||||
// Do the final test subtraction and update of remainder (i == 0), as it is
|
||||
// not performed in the main loop.
|
||||
subs r, a, b
|
||||
movhs a, r
|
||||
bx lr
|
Loading…
Reference in New Issue