llvm-project/compiler-rt/lib/builtins/hexagon/dfaddsub.S

397 lines
8.1 KiB
ArmAsm

//===----------------------Hexagon builtin routine ------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Double Precision Multiply
#define A r1:0
#define AH r1
#define AL r0
#define B r3:2
#define BH r3
#define BL r2
#define EXPA r4
#define EXPB r5
#define EXPB_A r5:4
#define ZTMP r7:6
#define ZTMPH r7
#define ZTMPL r6
#define ATMP r13:12
#define ATMPH r13
#define ATMPL r12
#define BTMP r9:8
#define BTMPH r9
#define BTMPL r8
#define ATMP2 r11:10
#define ATMP2H r11
#define ATMP2L r10
#define EXPDIFF r15
#define EXTRACTOFF r14
#define EXTRACTAMT r15:14
#define TMP r28
#define MANTBITS 52
#define HI_MANTBITS 20
#define EXPBITS 11
#define BIAS 1024
#define MANTISSA_TO_INT_BIAS 52
#define SR_BIT_INEXACT 5
#ifndef SR_ROUND_OFF
#define SR_ROUND_OFF 22
#endif
#define NORMAL p3
#define BIGB p2
#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
#define END(TAG) .size TAG,.-TAG
.text
.global __hexagon_adddf3
.global __hexagon_subdf3
.type __hexagon_adddf3, @function
.type __hexagon_subdf3, @function
Q6_ALIAS(adddf3)
FAST_ALIAS(adddf3)
FAST2_ALIAS(adddf3)
Q6_ALIAS(subdf3)
FAST_ALIAS(subdf3)
FAST2_ALIAS(subdf3)
.p2align 5
__hexagon_adddf3:
{
EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS)
EXPB = extractu(BH,#EXPBITS,#HI_MANTBITS)
ATMP = combine(##0x20000000,#0)
}
{
NORMAL = dfclass(A,#2)
NORMAL = dfclass(B,#2)
BTMP = ATMP
BIGB = cmp.gtu(EXPB,EXPA) // Is B substantially greater than A?
}
{
if (!NORMAL) jump .Ladd_abnormal // If abnormal, go to special code
if (BIGB) A = B // if B >> A, swap A and B
if (BIGB) B = A // If B >> A, swap A and B
if (BIGB) EXPB_A = combine(EXPA,EXPB) // swap exponents
}
{
ATMP = insert(A,#MANTBITS,#EXPBITS-2) // Q1.62
BTMP = insert(B,#MANTBITS,#EXPBITS-2) // Q1.62
EXPDIFF = sub(EXPA,EXPB)
ZTMP = combine(#62,#1)
}
#undef BIGB
#undef NORMAL
#define B_POS p3
#define A_POS p2
#define NO_STICKIES p1
.Ladd_continue:
{
EXPDIFF = min(EXPDIFF,ZTMPH) // If exponent difference >= ~60,
// will collapse to sticky bit
ATMP2 = neg(ATMP)
A_POS = cmp.gt(AH,#-1)
EXTRACTOFF = #0
}
{
if (!A_POS) ATMP = ATMP2
ATMP2 = extractu(BTMP,EXTRACTAMT)
BTMP = ASR(BTMP,EXPDIFF)
#undef EXTRACTAMT
#undef EXPDIFF
#undef EXTRACTOFF
#define ZERO r15:14
ZERO = #0
}
{
NO_STICKIES = cmp.eq(ATMP2,ZERO)
if (!NO_STICKIES.new) BTMPL = or(BTMPL,ZTMPL)
EXPB = add(EXPA,#-BIAS-60)
B_POS = cmp.gt(BH,#-1)
}
{
ATMP = add(ATMP,BTMP) // ADD!!!
ATMP2 = sub(ATMP,BTMP) // Negate and ADD --> SUB!!!
ZTMP = combine(#54,##2045)
}
{
p0 = cmp.gtu(EXPA,ZTMPH) // must be pretty high in case of large cancellation
p0 = !cmp.gtu(EXPA,ZTMPL)
if (!p0.new) jump:nt .Ladd_ovf_unf
if (!B_POS) ATMP = ATMP2 // if B neg, pick difference
}
{
A = convert_d2df(ATMP) // Convert to Double Precision, taking care of flags, etc. So nice!
p0 = cmp.eq(ATMPH,#0)
p0 = cmp.eq(ATMPL,#0)
if (p0.new) jump:nt .Ladd_zero // or maybe conversion handles zero case correctly?
}
{
AH += asl(EXPB,#HI_MANTBITS)
jumpr r31
}
.falign
__hexagon_subdf3:
{
BH = togglebit(BH,#31)
jump __qdsp_adddf3
}
.falign
.Ladd_zero:
// True zero, full cancellation
// +0 unless round towards negative infinity
{
TMP = USR
A = #0
BH = #1
}
{
TMP = extractu(TMP,#2,#22)
BH = asl(BH,#31)
}
{
p0 = cmp.eq(TMP,#2)
if (p0.new) AH = xor(AH,BH)
jumpr r31
}
.falign
.Ladd_ovf_unf:
// Overflow or Denormal is possible
// Good news: Underflow flag is not possible!
// ATMP has 2's complement value
//
// EXPA has A's exponent, EXPB has EXPA-BIAS-60
//
// Convert, extract exponent, add adjustment.
// If > 2046, overflow
// If <= 0, denormal
//
// Note that we've not done our zero check yet, so do that too
{
A = convert_d2df(ATMP)
p0 = cmp.eq(ATMPH,#0)
p0 = cmp.eq(ATMPL,#0)
if (p0.new) jump:nt .Ladd_zero
}
{
TMP = extractu(AH,#EXPBITS,#HI_MANTBITS)
AH += asl(EXPB,#HI_MANTBITS)
}
{
EXPB = add(EXPB,TMP)
B = combine(##0x00100000,#0)
}
{
p0 = cmp.gt(EXPB,##BIAS+BIAS-2)
if (p0.new) jump:nt .Ladd_ovf
}
{
p0 = cmp.gt(EXPB,#0)
if (p0.new) jumpr:t r31
TMP = sub(#1,EXPB)
}
{
B = insert(A,#MANTBITS,#0)
A = ATMP
}
{
B = lsr(B,TMP)
}
{
A = insert(B,#63,#0)
jumpr r31
}
.falign
.Ladd_ovf:
// We get either max finite value or infinity. Either way, overflow+inexact
{
A = ATMP // 2's complement value
TMP = USR
ATMP = combine(##0x7fefffff,#-1) // positive max finite
}
{
EXPB = extractu(TMP,#2,#SR_ROUND_OFF) // rounding bits
TMP = or(TMP,#0x28) // inexact + overflow
BTMP = combine(##0x7ff00000,#0) // positive infinity
}
{
USR = TMP
EXPB ^= lsr(AH,#31) // Does sign match rounding?
TMP = EXPB // unmodified rounding mode
}
{
p0 = !cmp.eq(TMP,#1) // If not round-to-zero and
p0 = !cmp.eq(EXPB,#2) // Not rounding the other way,
if (p0.new) ATMP = BTMP // we should get infinity
}
{
A = insert(ATMP,#63,#0) // insert inf/maxfinite, leave sign
}
{
p0 = dfcmp.eq(A,A)
jumpr r31
}
.Ladd_abnormal:
{
ATMP = extractu(A,#63,#0) // strip off sign
BTMP = extractu(B,#63,#0) // strip off sign
}
{
p3 = cmp.gtu(ATMP,BTMP)
if (!p3.new) A = B // sort values
if (!p3.new) B = A // sort values
}
{
// Any NaN --> NaN, possibly raise invalid if sNaN
p0 = dfclass(A,#0x0f) // A not NaN?
if (!p0.new) jump:nt .Linvalid_nan_add
if (!p3) ATMP = BTMP
if (!p3) BTMP = ATMP
}
{
// Infinity + non-infinity number is infinity
// Infinity + infinity --> inf or nan
p1 = dfclass(A,#0x08) // A is infinity
if (p1.new) jump:nt .Linf_add
}
{
p2 = dfclass(B,#0x01) // B is zero
if (p2.new) jump:nt .LB_zero // so return A or special 0+0
ATMP = #0
}
// We are left with adding one or more subnormals
{
p0 = dfclass(A,#4)
if (p0.new) jump:nt .Ladd_two_subnormal
ATMP = combine(##0x20000000,#0)
}
{
EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS)
EXPB = #1
// BTMP already ABS(B)
BTMP = asl(BTMP,#EXPBITS-2)
}
#undef ZERO
#define EXTRACTOFF r14
#define EXPDIFF r15
{
ATMP = insert(A,#MANTBITS,#EXPBITS-2)
EXPDIFF = sub(EXPA,EXPB)
ZTMP = combine(#62,#1)
jump .Ladd_continue
}
.Ladd_two_subnormal:
{
ATMP = extractu(A,#63,#0)
BTMP = extractu(B,#63,#0)
}
{
ATMP = neg(ATMP)
BTMP = neg(BTMP)
p0 = cmp.gt(AH,#-1)
p1 = cmp.gt(BH,#-1)
}
{
if (p0) ATMP = A
if (p1) BTMP = B
}
{
ATMP = add(ATMP,BTMP)
}
{
BTMP = neg(ATMP)
p0 = cmp.gt(ATMPH,#-1)
B = #0
}
{
if (!p0) A = BTMP
if (p0) A = ATMP
BH = ##0x80000000
}
{
if (!p0) AH = or(AH,BH)
p0 = dfcmp.eq(A,B)
if (p0.new) jump:nt .Lzero_plus_zero
}
{
jumpr r31
}
.Linvalid_nan_add:
{
TMP = convert_df2sf(A) // will generate invalid if sNaN
p0 = dfclass(B,#0x0f) // if B is not NaN
if (p0.new) B = A // make it whatever A is
}
{
BL = convert_df2sf(B) // will generate invalid if sNaN
A = #-1
jumpr r31
}
.falign
.LB_zero:
{
p0 = dfcmp.eq(ATMP,A) // is A also zero?
if (!p0.new) jumpr:t r31 // If not, just return A
}
// 0 + 0 is special
// if equal integral values, they have the same sign, which is fine for all rounding
// modes.
// If unequal in sign, we get +0 for all rounding modes except round down
.Lzero_plus_zero:
{
p0 = cmp.eq(A,B)
if (p0.new) jumpr:t r31
}
{
TMP = USR
}
{
TMP = extractu(TMP,#2,#SR_ROUND_OFF)
A = #0
}
{
p0 = cmp.eq(TMP,#2)
if (p0.new) AH = ##0x80000000
jumpr r31
}
.Linf_add:
// adding infinities is only OK if they are equal
{
p0 = !cmp.eq(AH,BH) // Do they have different signs
p0 = dfclass(B,#8) // And is B also infinite?
if (!p0.new) jumpr:t r31 // If not, just a normal inf
}
{
BL = ##0x7f800001 // sNAN
}
{
A = convert_sf2df(BL) // trigger invalid, set NaN
jumpr r31
}
END(__hexagon_adddf3)