forked from OSchip/llvm-project
278 lines
6.9 KiB
ArmAsm
278 lines
6.9 KiB
ArmAsm
/*===-- addsf3.S - Adds two single precision floating pointer numbers-----===//
|
|
*
|
|
* The LLVM Compiler Infrastructure
|
|
*
|
|
* This file is dual licensed under the MIT and the University of Illinois Open
|
|
* Source Licenses. See LICENSE.TXT for details.
|
|
*
|
|
*===----------------------------------------------------------------------===//
|
|
*
|
|
* This file implements the __addsf3 (single precision floating pointer number
|
|
* addition with the IEEE-754 default rounding (to nearest, ties to even)
|
|
* function for the ARM Thumb1 ISA.
|
|
*
|
|
*===----------------------------------------------------------------------===*/
|
|
|
|
#include "../assembly.h"
|
|
#define significandBits 23
|
|
#define typeWidth 32
|
|
|
|
.syntax unified
|
|
.text
|
|
.thumb
|
|
.p2align 2
|
|
|
|
DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_fadd, __addsf3)
|
|
|
|
DEFINE_COMPILERRT_THUMB_FUNCTION(__addsf3)
|
|
push {r4, r5, r6, r7, lr}
|
|
// Get the absolute value of a and b.
|
|
lsls r2, r0, #1
|
|
lsls r3, r1, #1
|
|
lsrs r2, r2, #1 /* aAbs */
|
|
beq LOCAL_LABEL(a_zero_nan_inf)
|
|
lsrs r3, r3, #1 /* bAbs */
|
|
beq LOCAL_LABEL(zero_nan_inf)
|
|
|
|
// Detect if a or b is infinity or Nan.
|
|
lsrs r6, r2, #(significandBits)
|
|
lsrs r7, r3, #(significandBits)
|
|
cmp r6, #0xFF
|
|
beq LOCAL_LABEL(zero_nan_inf)
|
|
cmp r7, #0xFF
|
|
beq LOCAL_LABEL(zero_nan_inf)
|
|
|
|
// Swap Rep and Abs so that a and aAbs has the larger absolute value.
|
|
cmp r2, r3
|
|
bhs LOCAL_LABEL(no_swap)
|
|
movs r4, r0
|
|
movs r5, r2
|
|
movs r0, r1
|
|
movs r2, r3
|
|
movs r1, r4
|
|
movs r3, r5
|
|
LOCAL_LABEL(no_swap):
|
|
|
|
// Get the significands and shift them to give us round, guard and sticky.
|
|
lsls r4, r0, #(typeWidth - significandBits)
|
|
lsrs r4, r4, #(typeWidth - significandBits - 3) /* aSignificand << 3 */
|
|
lsls r5, r1, #(typeWidth - significandBits)
|
|
lsrs r5, r5, #(typeWidth - significandBits - 3) /* bSignificand << 3 */
|
|
|
|
// Get the implicitBit.
|
|
movs r6, #1
|
|
lsls r6, r6, #(significandBits + 3)
|
|
|
|
// Get aExponent and set implicit bit if necessary.
|
|
lsrs r2, r2, #(significandBits)
|
|
beq LOCAL_LABEL(a_done_implicit_bit)
|
|
orrs r4, r6
|
|
LOCAL_LABEL(a_done_implicit_bit):
|
|
|
|
// Get bExponent and set implicit bit if necessary.
|
|
lsrs r3, r3, #(significandBits)
|
|
beq LOCAL_LABEL(b_done_implicit_bit)
|
|
orrs r5, r6
|
|
LOCAL_LABEL(b_done_implicit_bit):
|
|
|
|
// Get the difference in exponents.
|
|
subs r6, r2, r3
|
|
beq LOCAL_LABEL(done_align)
|
|
|
|
// If b is denormal, then a must be normal as align > 0, and we only need to
|
|
// right shift bSignificand by (align - 1) bits.
|
|
cmp r3, #0
|
|
bne 1f
|
|
subs r6, r6, #1
|
|
1:
|
|
|
|
// No longer needs bExponent. r3 is dead here.
|
|
// Set sticky bits of b: sticky = bSignificand << (typeWidth - align).
|
|
movs r3, #(typeWidth)
|
|
subs r3, r3, r6
|
|
movs r7, r5
|
|
lsls r7, r3
|
|
beq 1f
|
|
movs r7, #1
|
|
1:
|
|
|
|
// bSignificand = bSignificand >> align | sticky;
|
|
lsrs r5, r6
|
|
orrs r5, r7
|
|
bne LOCAL_LABEL(done_align)
|
|
movs r5, #1 // sticky; b is known to be non-zero.
|
|
|
|
LOCAL_LABEL(done_align):
|
|
// isSubtraction = (aRep ^ bRep) >> 31;
|
|
movs r7, r0
|
|
eors r7, r1
|
|
lsrs r7, #31
|
|
bne LOCAL_LABEL(do_substraction)
|
|
|
|
// Same sign, do Addition.
|
|
|
|
// aSignificand += bSignificand;
|
|
adds r4, r4, r5
|
|
|
|
// Check carry bit.
|
|
movs r6, #1
|
|
lsls r6, r6, #(significandBits + 3 + 1)
|
|
movs r7, r4
|
|
ands r7, r6
|
|
beq LOCAL_LABEL(form_result)
|
|
// If the addition carried up, we need to right-shift the result and
|
|
// adjust the exponent.
|
|
movs r7, r4
|
|
movs r6, #1
|
|
ands r7, r6 // sticky = aSignificand & 1;
|
|
lsrs r4, #1
|
|
orrs r4, r7 // result Significand
|
|
adds r2, #1 // result Exponent
|
|
// If we have overflowed the type, return +/- infinity.
|
|
cmp r2, 0xFF
|
|
beq LOCAL_LABEL(ret_inf)
|
|
|
|
LOCAL_LABEL(form_result):
|
|
// Shift the sign, exponent and significand into place.
|
|
lsrs r0, #(typeWidth - 1)
|
|
lsls r0, #(typeWidth - 1) // Get Sign.
|
|
lsls r2, #(significandBits)
|
|
orrs r0, r2
|
|
movs r1, r4
|
|
lsls r4, #(typeWidth - significandBits - 3)
|
|
lsrs r4, #(typeWidth - significandBits)
|
|
orrs r0, r4
|
|
|
|
// Final rounding. The result may overflow to infinity, but that is the
|
|
// correct result in that case.
|
|
// roundGuardSticky = aSignificand & 0x7;
|
|
movs r2, #0x7
|
|
ands r1, r2
|
|
// if (roundGuardSticky > 0x4) result++;
|
|
|
|
cmp r1, #0x4
|
|
blt LOCAL_LABEL(done_round)
|
|
beq 1f
|
|
adds r0, #1
|
|
pop {r4, r5, r6, r7, pc}
|
|
1:
|
|
|
|
// if (roundGuardSticky == 0x4) result += result & 1;
|
|
movs r1, r0
|
|
lsrs r1, #1
|
|
bcc LOCAL_LABEL(done_round)
|
|
adds r0, r0, #1
|
|
LOCAL_LABEL(done_round):
|
|
pop {r4, r5, r6, r7, pc}
|
|
|
|
LOCAL_LABEL(do_substraction):
|
|
subs r4, r4, r5 // aSignificand -= bSignificand;
|
|
beq LOCAL_LABEL(ret_zero)
|
|
movs r6, r4
|
|
cmp r2, 0
|
|
beq LOCAL_LABEL(form_result) // if a's exp is 0, no need to normalize.
|
|
// If partial cancellation occured, we need to left-shift the result
|
|
// and adjust the exponent:
|
|
lsrs r6, r6, #(significandBits + 3)
|
|
bne LOCAL_LABEL(form_result)
|
|
|
|
push {r0, r1, r2, r3}
|
|
movs r0, r4
|
|
bl __clzsi2
|
|
movs r5, r0
|
|
pop {r0, r1, r2, r3}
|
|
// shift = rep_clz(aSignificand) - rep_clz(implicitBit << 3);
|
|
subs r5, r5, #(typeWidth - significandBits - 3 - 1)
|
|
// aSignificand <<= shift; aExponent -= shift;
|
|
lsls r4, r5
|
|
subs r2, r2, r5
|
|
bgt LOCAL_LABEL(form_result)
|
|
|
|
// Do normalization if aExponent <= 0.
|
|
movs r6, #1
|
|
subs r6, r6, r2 // 1 - aExponent;
|
|
movs r2, #0 // aExponent = 0;
|
|
movs r3, #(typeWidth) // bExponent is dead.
|
|
subs r3, r3, r6
|
|
movs r7, r4
|
|
lsls r7, r3 // stickyBit = (bool)(aSignificant << (typeWidth - align))
|
|
beq 1f
|
|
movs r7, #1
|
|
1:
|
|
lsrs r4, r6 /* aSignificand >> shift */
|
|
orrs r4, r7
|
|
b LOCAL_LABEL(form_result)
|
|
|
|
LOCAL_LABEL(ret_zero):
|
|
movs r0, #0
|
|
pop {r4, r5, r6, r7, pc}
|
|
|
|
|
|
LOCAL_LABEL(a_zero_nan_inf):
|
|
lsrs r3, r3, #1
|
|
|
|
LOCAL_LABEL(zero_nan_inf):
|
|
// Here r2 has aAbs, r3 has bAbs
|
|
movs r4, #0xFF
|
|
lsls r4, r4, #(significandBits) // Make +inf.
|
|
|
|
cmp r2, r4
|
|
bhi LOCAL_LABEL(a_is_nan)
|
|
cmp r3, r4
|
|
bhi LOCAL_LABEL(b_is_nan)
|
|
|
|
cmp r2, r4
|
|
bne LOCAL_LABEL(a_is_rational)
|
|
// aAbs is INF.
|
|
eors r1, r0 // aRep ^ bRep.
|
|
movs r6, #1
|
|
lsls r6, r6, #(typeWidth - 1) // get sign mask.
|
|
cmp r1, r6 // if they only differ on sign bit, it's -INF + INF
|
|
beq LOCAL_LABEL(a_is_nan)
|
|
pop {r4, r5, r6, r7, pc}
|
|
|
|
LOCAL_LABEL(a_is_rational):
|
|
cmp r3, r4
|
|
bne LOCAL_LABEL(b_is_rational)
|
|
movs r0, r1
|
|
pop {r4, r5, r6, r7, pc}
|
|
|
|
LOCAL_LABEL(b_is_rational):
|
|
// either a or b or both are zero.
|
|
adds r4, r2, r3
|
|
beq LOCAL_LABEL(both_zero)
|
|
cmp r2, #0 // is absA 0 ?
|
|
beq LOCAL_LABEL(ret_b)
|
|
pop {r4, r5, r6, r7, pc}
|
|
|
|
LOCAL_LABEL(both_zero):
|
|
ands r0, r1 // +0 + -0 = +0
|
|
pop {r4, r5, r6, r7, pc}
|
|
|
|
LOCAL_LABEL(ret_b):
|
|
movs r0, r1
|
|
|
|
LOCAL_LABEL(ret):
|
|
pop {r4, r5, r6, r7, pc}
|
|
|
|
LOCAL_LABEL(b_is_nan):
|
|
movs r0, r1
|
|
LOCAL_LABEL(a_is_nan):
|
|
movs r1, #1
|
|
lsls r1, r1, #(significandBits -1) // r1 is quiet bit.
|
|
orrs r0, r1
|
|
pop {r4, r5, r6, r7, pc}
|
|
|
|
LOCAL_LABEL(ret_inf):
|
|
movs r4, #0xFF
|
|
lsls r4, r4, #(significandBits)
|
|
orrs r0, r4
|
|
lsrs r0, r0, #(significandBits)
|
|
lsls r0, r0, #(significandBits)
|
|
pop {r4, r5, r6, r7, pc}
|
|
|
|
|
|
END_COMPILERRT_FUNCTION(__addsf3)
|
|
|
|
NO_EXEC_STACK_DIRECTIVE
|