diff --git a/compiler-rt/lib/arm/divmodsi4.S b/compiler-rt/lib/arm/divmodsi4.S new file mode 100644 index 000000000000..6e72eabbd9ee --- /dev/null +++ b/compiler-rt/lib/arm/divmodsi4.S @@ -0,0 +1,47 @@ +/*===-- divmodsi4.S - 32-bit signed integer divide and modulus ------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __divmodsi4 (32-bit signed integer divide and + * modulus) function for the ARM architecture. A naive digit-by-digit + * computation is employed for simplicity. + * + *===----------------------------------------------------------------------===*/ + +#include "../assembly.h" + +#define ESTABLISH_FRAME \ + push {r4-r7, lr} ;\ + add r7, sp, #12 +#define CLEAR_FRAME_AND_RETURN \ + pop {r4-r7, pc} + +.syntax unified +.align 3 +DEFINE_COMPILERRT_FUNCTION(__divmodsi4) + ESTABLISH_FRAME +// Set aside the sign of the quotient and modulus, and the address for the +// modulus. + eor r4, r0, r1 + mov r5, r0 + mov r6, r2 +// Take the absolute value of a and b via abs(x) = (x^(x >> 31)) - (x >> 31). + eor ip, r0, r0, asr #31 + eor lr, r1, r1, asr #31 + sub r0, ip, r0, asr #31 + sub r1, lr, r1, asr #31 +// Unsigned divmod: + bl ___udivmodsi4 +// Apply the sign of quotient and modulus + ldr r1, [r6] + eor r0, r0, r4, asr #31 + sub r0, r0, r4, asr #31 + eor r1, r1, r5, asr #31 + sub r1, r1, r5, asr #31 + str r1, [r6] + CLEAR_FRAME_AND_RETURN diff --git a/compiler-rt/lib/arm/divsi3.S b/compiler-rt/lib/arm/divsi3.S new file mode 100644 index 000000000000..69ef20c4a763 --- /dev/null +++ b/compiler-rt/lib/arm/divsi3.S @@ -0,0 +1,39 @@ +/*===-- divsi3.S - 32-bit signed integer divide ---------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __divsi3 (32-bit signed integer divide) function + * for the ARM architecture as a wrapper around the unsigned routine. + * + *===----------------------------------------------------------------------===*/ + +#include "../assembly.h" + +#define ESTABLISH_FRAME \ + push {r4, r7, lr} ;\ + add r7, sp, #4 +#define CLEAR_FRAME_AND_RETURN \ + pop {r4, r7, pc} + +.syntax unified +.align 3 +DEFINE_COMPILERRT_FUNCTION(__divsi3) + ESTABLISH_FRAME +// Set aside the sign of the quotient. + eor r4, r0, r1 +// Take absolute value of a and b via abs(x) = (x^(x >> 31)) - (x >> 31). + eor r2, r0, r0, asr #31 + eor r3, r1, r1, asr #31 + sub r0, r2, r0, asr #31 + sub r1, r3, r1, asr #31 +// abs(a) / abs(b) + bl ___udivsi3 +// Apply sign of quotient to result and return. + eor r0, r0, r4, asr #31 + sub r0, r0, r4, asr #31 + CLEAR_FRAME_AND_RETURN diff --git a/compiler-rt/lib/arm/modsi3.S b/compiler-rt/lib/arm/modsi3.S index 40ba856e3551..97573395ddf9 100644 --- a/compiler-rt/lib/arm/modsi3.S +++ b/compiler-rt/lib/arm/modsi3.S @@ -1,36 +1,39 @@ -//===-------- modsi3.S - Implement modsi3 ---------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// +/*===-- modsi3.S - 32-bit signed integer modulus --------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __modsi3 (32-bit signed integer modulus) function + * for the ARM architecture as a wrapper around the unsigned routine. + * + *===----------------------------------------------------------------------===*/ #include "../assembly.h" -// -// extern int32_t __modsi3(int32_t a, int32_t b); -// -// Returns the remainder when dividing two 32-bit signed integers. -// Conceptually, the function is: { return a - (a / b) * b; } -// But if you write that in C, llvm compiles it to a call to __modsi3... -// - .align 2 +#define ESTABLISH_FRAME \ + push {r4, r7, lr} ;\ + add r7, sp, #4 +#define CLEAR_FRAME_AND_RETURN \ + pop {r4, r7, pc} + +.syntax unified +.align 3 DEFINE_COMPILERRT_FUNCTION(__modsi3) - push {r4, r5, r7, lr} - add r7, sp, #8 // set stack frame - mov r5, r0 // save a - mov r4, r1 // save b - bl ___divsi3 // compute a/b -#if __ARM_ARCH_7A__ - mls r0, r4, r0, r5 // mulitple result * b and subtract from a -#else - // before armv7, does not have "mls" instruction - mul r3, r0, r4 // multiple result * b - sub r0, r5, r3 // a - result -#endif - pop {r4, r5, r7, pc} - - - + ESTABLISH_FRAME + // Set aside the sign of the dividend. + mov r4, r0 + // Take absolute value of a and b via abs(x) = (x^(x >> 31)) - (x >> 31). + eor r2, r0, r0, asr #31 + eor r3, r1, r1, asr #31 + sub r0, r2, r0, asr #31 + sub r1, r3, r1, asr #31 + // abs(a) % abs(b) + bl ___umodsi3 + // Apply sign of dividend to result and return. + eor r0, r0, r4, asr #31 + sub r0, r0, r4, asr #31 + CLEAR_FRAME_AND_RETURN diff --git a/compiler-rt/lib/arm/udivmodsi4.S b/compiler-rt/lib/arm/udivmodsi4.S new file mode 100644 index 000000000000..a2bda6312f80 --- /dev/null +++ b/compiler-rt/lib/arm/udivmodsi4.S @@ -0,0 +1,80 @@ +/*===-- udivmodsi4.S - 32-bit unsigned integer divide and modulus ---------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __udivmodsi4 (32-bit unsigned integer divide and + * modulus) function for the ARM architecture. A naive digit-by-digit + * computation is employed for simplicity. + * + *===----------------------------------------------------------------------===*/ + +#include "../assembly.h" + +#define ESTABLISH_FRAME \ + push {r4, r7, lr} ;\ + add r7, sp, #4 +#define CLEAR_FRAME_AND_RETURN \ + pop {r4, r7, pc} + +#define a r0 +#define b r1 +#define i r3 +#define r r4 +#define q ip +#define one lr + +.syntax unified +.align 3 +DEFINE_COMPILERRT_FUNCTION(__udivmodsi4) +// We use a simple digit by digit algorithm; before we get into the actual +// divide loop, we must calculate the left-shift amount necessary to align +// the MSB of the divisor with that of the dividend (If this shift is +// negative, then the result is zero, and we early out). We also conjure a +// bit mask of 1 to use in constructing the quotient, and initialize the +// quotient to zero. + ESTABLISH_FRAME + clz r4, a + tst b, b // detect divide-by-zero + clz r3, b + mov q, #0 + beq L_return // return 0 if b is zero. + mov one, #1 + subs i, r3, r4 + blt L_return // return 0 if MSB(a) < MSB(b) + +L_mainLoop: +// This loop basically implements the following: +// +// do { +// if (a >= b << i) { +// a -= b << i; +// q |= 1 << i; +// if (a == 0) break; +// } +// } while (--i) +// +// Note that this does not perform the final iteration (i == 0); by doing it +// this way, we can merge the two branches which is a substantial win for +// such a tight loop on current ARM architectures. + subs r, a, b, lsl i + orrhs q, q,one, lsl i + movhs a, r + subsne i, i, #1 + bhi L_mainLoop + +// Do the final test subtraction and update of quotient (i == 0), as it is +// not performed in the main loop. + subs r, a, b + orrhs q, #1 + movhs a, r + +L_return: +// Store the remainder, and move the quotient to r0, then return. + str a, [r2] + mov r0, q + CLEAR_FRAME_AND_RETURN diff --git a/compiler-rt/lib/arm/udivsi3.S b/compiler-rt/lib/arm/udivsi3.S new file mode 100644 index 000000000000..d721b6299b1b --- /dev/null +++ b/compiler-rt/lib/arm/udivsi3.S @@ -0,0 +1,78 @@ +/*===-- udivsi3.S - 32-bit unsigned integer divide ------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __udivsi3 (32-bit unsigned integer divide) + * function for the ARM architecture. A naive digit-by-digit computation is + * employed for simplicity. + * + *===----------------------------------------------------------------------===*/ + +#include "../assembly.h" + +#define ESTABLISH_FRAME \ + push {r7, lr} ;\ + mov r7, sp +#define CLEAR_FRAME_AND_RETURN \ + pop {r7, pc} + +#define a r0 +#define b r1 +#define r r2 +#define i r3 +#define q ip +#define one lr + +.syntax unified +.align 3 +DEFINE_COMPILERRT_FUNCTION(__udivsi3) +// We use a simple digit by digit algorithm; before we get into the actual +// divide loop, we must calculate the left-shift amount necessary to align +// the MSB of the divisor with that of the dividend (If this shift is +// negative, then the result is zero, and we early out). We also conjure a +// bit mask of 1 to use in constructing the quotient, and initialize the +// quotient to zero. + ESTABLISH_FRAME + clz r2, a + tst b, b // detect divide-by-zero + clz r3, b + mov q, #0 + beq L_return // return 0 if b is zero. + mov one, #1 + subs i, r3, r2 + blt L_return // return 0 if MSB(a) < MSB(b) + +L_mainLoop: +// This loop basically implements the following: +// +// do { +// if (a >= b << i) { +// a -= b << i; +// q |= 1 << i; +// if (a == 0) break; +// } +// } while (--i) +// +// Note that this does not perform the final iteration (i == 0); by doing it +// this way, we can merge the two branches which is a substantial win for +// such a tight loop on current ARM architectures. + subs r, a, b, lsl i + orrhs q, q,one, lsl i + movhs a, r + subsne i, i, #1 + bhi L_mainLoop + +// Do the final test subtraction and update of quotient (i == 0), as it is +// not performed in the main loop. + subs r, a, b + orrhs q, #1 + +L_return: +// Move the quotient to r0 and return. + mov r0, q + CLEAR_FRAME_AND_RETURN diff --git a/compiler-rt/lib/arm/umodsi3.S b/compiler-rt/lib/arm/umodsi3.S new file mode 100644 index 000000000000..e3e391bafcf5 --- /dev/null +++ b/compiler-rt/lib/arm/umodsi3.S @@ -0,0 +1,58 @@ +/*===-- umodsi3.S - 32-bit unsigned integer modulus -----------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __umodsi3 (32-bit unsigned integer modulus) + * function for the ARM architecture. A naive digit-by-digit computation is + * employed for simplicity. + * + *===----------------------------------------------------------------------===*/ + +#include "../assembly.h" + +#define a r0 +#define b r1 +#define r r2 +#define i r3 + +.syntax unified +.align 3 +DEFINE_COMPILERRT_FUNCTION(__umodsi3) +// We use a simple digit by digit algorithm; before we get into the actual +// divide loop, we must calculate the left-shift amount necessary to align +// the MSB of the divisor with that of the dividend. + clz r2, a + tst b, b // detect b == 0 + clz r3, b + bxeq lr // return a if b == 0 + subs i, r3, r2 + bxlt lr // return a if MSB(a) < MSB(b) + +L_mainLoop: +// This loop basically implements the following: +// +// do { +// if (a >= b << i) { +// a -= b << i; +// if (a == 0) break; +// } +// } while (--i) +// +// Note that this does not perform the final iteration (i == 0); by doing it +// this way, we can merge the two branches which is a substantial win for +// such a tight loop on current ARM architectures. + subs r, a, b, lsl i + movhs a, r + subsne i, i, #1 + bhi L_mainLoop + +// Do the final test subtraction and update of remainder (i == 0), as it is +// not performed in the main loop. + subs r, a, b + movhs a, r + bx lr