forked from OSchip/llvm-project
492 lines
9.6 KiB
ArmAsm
492 lines
9.6 KiB
ArmAsm
//===----------------------Hexagon builtin routine ------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Double Precision Divide
|
|
|
|
#define A r1:0
|
|
#define AH r1
|
|
#define AL r0
|
|
|
|
#define B r3:2
|
|
#define BH r3
|
|
#define BL r2
|
|
|
|
#define Q r5:4
|
|
#define QH r5
|
|
#define QL r4
|
|
|
|
#define PROD r7:6
|
|
#define PRODHI r7
|
|
#define PRODLO r6
|
|
|
|
#define SFONE r8
|
|
#define SFDEN r9
|
|
#define SFERROR r10
|
|
#define SFRECIP r11
|
|
|
|
#define EXPBA r13:12
|
|
#define EXPB r13
|
|
#define EXPA r12
|
|
|
|
#define REMSUB2 r15:14
|
|
|
|
|
|
|
|
#define SIGN r28
|
|
|
|
#define Q_POSITIVE p3
|
|
#define NORMAL p2
|
|
#define NO_OVF_UNF p1
|
|
#define P_TMP p0
|
|
|
|
#define RECIPEST_SHIFT 3
|
|
#define QADJ 61
|
|
|
|
#define DFCLASS_NORMAL 0x02
|
|
#define DFCLASS_NUMBER 0x0F
|
|
#define DFCLASS_INFINITE 0x08
|
|
#define DFCLASS_ZERO 0x01
|
|
#define DFCLASS_NONZERO (DFCLASS_NUMBER ^ DFCLASS_ZERO)
|
|
#define DFCLASS_NONINFINITE (DFCLASS_NUMBER ^ DFCLASS_INFINITE)
|
|
|
|
#define DF_MANTBITS 52
|
|
#define DF_EXPBITS 11
|
|
#define SF_MANTBITS 23
|
|
#define SF_EXPBITS 8
|
|
#define DF_BIAS 0x3ff
|
|
|
|
#define SR_ROUND_OFF 22
|
|
|
|
#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
|
|
#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
|
|
#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
|
|
#define END(TAG) .size TAG,.-TAG
|
|
|
|
.text
|
|
.global __hexagon_divdf3
|
|
.type __hexagon_divdf3,@function
|
|
Q6_ALIAS(divdf3)
|
|
FAST_ALIAS(divdf3)
|
|
FAST2_ALIAS(divdf3)
|
|
.p2align 5
|
|
__hexagon_divdf3:
|
|
{
|
|
NORMAL = dfclass(A,#DFCLASS_NORMAL)
|
|
NORMAL = dfclass(B,#DFCLASS_NORMAL)
|
|
EXPBA = combine(BH,AH)
|
|
SIGN = xor(AH,BH)
|
|
}
|
|
#undef A
|
|
#undef AH
|
|
#undef AL
|
|
#undef B
|
|
#undef BH
|
|
#undef BL
|
|
#define REM r1:0
|
|
#define REMHI r1
|
|
#define REMLO r0
|
|
#define DENOM r3:2
|
|
#define DENOMHI r3
|
|
#define DENOMLO r2
|
|
{
|
|
if (!NORMAL) jump .Ldiv_abnormal
|
|
PROD = extractu(DENOM,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS)
|
|
SFONE = ##0x3f800001
|
|
}
|
|
{
|
|
SFDEN = or(SFONE,PRODLO)
|
|
EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32)
|
|
EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32)
|
|
Q_POSITIVE = cmp.gt(SIGN,#-1)
|
|
}
|
|
#undef SIGN
|
|
#define ONE r28
|
|
.Ldenorm_continue:
|
|
{
|
|
SFRECIP,P_TMP = sfrecipa(SFONE,SFDEN)
|
|
SFERROR = and(SFONE,#-2)
|
|
ONE = #1
|
|
EXPA = sub(EXPA,EXPB)
|
|
}
|
|
#undef EXPB
|
|
#define RECIPEST r13
|
|
{
|
|
SFERROR -= sfmpy(SFRECIP,SFDEN):lib
|
|
REMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32)
|
|
RECIPEST = ##0x00800000 << RECIPEST_SHIFT
|
|
}
|
|
{
|
|
SFRECIP += sfmpy(SFRECIP,SFERROR):lib
|
|
DENOMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32)
|
|
SFERROR = and(SFONE,#-2)
|
|
}
|
|
{
|
|
SFERROR -= sfmpy(SFRECIP,SFDEN):lib
|
|
QH = #-DF_BIAS+1
|
|
QL = #DF_BIAS-1
|
|
}
|
|
{
|
|
SFRECIP += sfmpy(SFRECIP,SFERROR):lib
|
|
NO_OVF_UNF = cmp.gt(EXPA,QH)
|
|
NO_OVF_UNF = !cmp.gt(EXPA,QL)
|
|
}
|
|
{
|
|
RECIPEST = insert(SFRECIP,#SF_MANTBITS,#RECIPEST_SHIFT)
|
|
Q = #0
|
|
EXPA = add(EXPA,#-QADJ)
|
|
}
|
|
#undef SFERROR
|
|
#undef SFRECIP
|
|
#define TMP r10
|
|
#define TMP1 r11
|
|
{
|
|
RECIPEST = add(RECIPEST,#((-3) << RECIPEST_SHIFT))
|
|
}
|
|
|
|
#define DIV_ITER1B(QSHIFTINSN,QSHIFT,REMSHIFT,EXTRA) \
|
|
{ \
|
|
PROD = mpyu(RECIPEST,REMHI); \
|
|
REM = asl(REM,# ## ( REMSHIFT )); \
|
|
}; \
|
|
{ \
|
|
PRODLO = # ## 0; \
|
|
REM -= mpyu(PRODHI,DENOMLO); \
|
|
REMSUB2 = mpyu(PRODHI,DENOMHI); \
|
|
}; \
|
|
{ \
|
|
Q += QSHIFTINSN(PROD, # ## ( QSHIFT )); \
|
|
REM -= asl(REMSUB2, # ## 32); \
|
|
EXTRA \
|
|
}
|
|
|
|
|
|
DIV_ITER1B(ASL,14,15,)
|
|
DIV_ITER1B(ASR,1,15,)
|
|
DIV_ITER1B(ASR,16,15,)
|
|
DIV_ITER1B(ASR,31,15,PROD=# ( 0 );)
|
|
|
|
#undef REMSUB2
|
|
#define TMPPAIR r15:14
|
|
#define TMPPAIRHI r15
|
|
#define TMPPAIRLO r14
|
|
#undef RECIPEST
|
|
#define EXPB r13
|
|
{
|
|
// compare or sub with carry
|
|
TMPPAIR = sub(REM,DENOM)
|
|
P_TMP = cmp.gtu(DENOM,REM)
|
|
// set up amt to add to q
|
|
if (!P_TMP.new) PRODLO = #2
|
|
}
|
|
{
|
|
Q = add(Q,PROD)
|
|
if (!P_TMP) REM = TMPPAIR
|
|
TMPPAIR = #0
|
|
}
|
|
{
|
|
P_TMP = cmp.eq(REM,TMPPAIR)
|
|
if (!P_TMP.new) QL = or(QL,ONE)
|
|
}
|
|
{
|
|
PROD = neg(Q)
|
|
}
|
|
{
|
|
if (!Q_POSITIVE) Q = PROD
|
|
}
|
|
#undef REM
|
|
#undef REMHI
|
|
#undef REMLO
|
|
#undef DENOM
|
|
#undef DENOMLO
|
|
#undef DENOMHI
|
|
#define A r1:0
|
|
#define AH r1
|
|
#define AL r0
|
|
#define B r3:2
|
|
#define BH r3
|
|
#define BL r2
|
|
{
|
|
A = convert_d2df(Q)
|
|
if (!NO_OVF_UNF) jump .Ldiv_ovf_unf
|
|
}
|
|
{
|
|
AH += asl(EXPA,#DF_MANTBITS-32)
|
|
jumpr r31
|
|
}
|
|
|
|
.Ldiv_ovf_unf:
|
|
{
|
|
AH += asl(EXPA,#DF_MANTBITS-32)
|
|
EXPB = extractu(AH,#DF_EXPBITS,#DF_MANTBITS-32)
|
|
}
|
|
{
|
|
PROD = abs(Q)
|
|
EXPA = add(EXPA,EXPB)
|
|
}
|
|
{
|
|
P_TMP = cmp.gt(EXPA,##DF_BIAS+DF_BIAS) // overflow
|
|
if (P_TMP.new) jump:nt .Ldiv_ovf
|
|
}
|
|
{
|
|
P_TMP = cmp.gt(EXPA,#0)
|
|
if (P_TMP.new) jump:nt .Lpossible_unf // round up to normal possible...
|
|
}
|
|
// Underflow
|
|
// We know what the infinite range exponent should be (EXPA)
|
|
// Q is 2's complement, PROD is abs(Q)
|
|
// Normalize Q, shift right, add a high bit, convert, change exponent
|
|
|
|
#define FUDGE1 7 // how much to shift right
|
|
#define FUDGE2 4 // how many guard/round to keep at lsbs
|
|
|
|
{
|
|
EXPB = add(clb(PROD),#-1) // doesn't need to be added in since
|
|
EXPA = sub(#FUDGE1,EXPA) // we extract post-converted exponent
|
|
TMP = USR
|
|
TMP1 = #63
|
|
}
|
|
{
|
|
EXPB = min(EXPA,TMP1)
|
|
TMP1 = or(TMP,#0x030)
|
|
PROD = asl(PROD,EXPB)
|
|
EXPA = #0
|
|
}
|
|
{
|
|
TMPPAIR = extractu(PROD,EXPBA) // bits that will get shifted out
|
|
PROD = lsr(PROD,EXPB) // shift out bits
|
|
B = #1
|
|
}
|
|
{
|
|
P_TMP = cmp.gtu(B,TMPPAIR)
|
|
if (!P_TMP.new) PRODLO = or(BL,PRODLO)
|
|
PRODHI = setbit(PRODHI,#DF_MANTBITS-32+FUDGE2)
|
|
}
|
|
{
|
|
Q = neg(PROD)
|
|
P_TMP = bitsclr(PRODLO,#(1<<FUDGE2)-1)
|
|
if (!P_TMP.new) TMP = TMP1
|
|
}
|
|
{
|
|
USR = TMP
|
|
if (Q_POSITIVE) Q = PROD
|
|
TMP = #-DF_BIAS-(DF_MANTBITS+FUDGE2)
|
|
}
|
|
{
|
|
A = convert_d2df(Q)
|
|
}
|
|
{
|
|
AH += asl(TMP,#DF_MANTBITS-32)
|
|
jumpr r31
|
|
}
|
|
|
|
|
|
.Lpossible_unf:
|
|
// If upper parts of Q were all F's, but abs(A) == 0x00100000_00000000, we rounded up to min_normal
|
|
// The answer is correct, but we need to raise Underflow
|
|
{
|
|
B = extractu(A,#63,#0)
|
|
TMPPAIR = combine(##0x00100000,#0) // min normal
|
|
TMP = #0x7FFF
|
|
}
|
|
{
|
|
P_TMP = dfcmp.eq(TMPPAIR,B) // Is everything zero in the rounded value...
|
|
P_TMP = bitsset(PRODHI,TMP) // but a bunch of bits set in the unrounded abs(quotient)?
|
|
}
|
|
|
|
#if (__HEXAGON_ARCH__ == 60)
|
|
TMP = USR // If not, just return
|
|
if (!P_TMP) jumpr r31 // Else, we want to set Unf+Inexact
|
|
// Note that inexact is already set...
|
|
#else
|
|
{
|
|
if (!P_TMP) jumpr r31 // If not, just return
|
|
TMP = USR // Else, we want to set Unf+Inexact
|
|
} // Note that inexact is already set...
|
|
#endif
|
|
{
|
|
TMP = or(TMP,#0x30)
|
|
}
|
|
{
|
|
USR = TMP
|
|
}
|
|
{
|
|
p0 = dfcmp.eq(A,A)
|
|
jumpr r31
|
|
}
|
|
|
|
.Ldiv_ovf:
|
|
|
|
// Raise Overflow, and choose the correct overflow value (saturated normal or infinity)
|
|
|
|
{
|
|
TMP = USR
|
|
B = combine(##0x7fefffff,#-1)
|
|
AH = mux(Q_POSITIVE,#0,#-1)
|
|
}
|
|
{
|
|
PROD = combine(##0x7ff00000,#0)
|
|
QH = extractu(TMP,#2,#SR_ROUND_OFF)
|
|
TMP = or(TMP,#0x28)
|
|
}
|
|
{
|
|
USR = TMP
|
|
QH ^= lsr(AH,#31)
|
|
QL = QH
|
|
}
|
|
{
|
|
p0 = !cmp.eq(QL,#1) // if not round-to-zero
|
|
p0 = !cmp.eq(QH,#2) // and not rounding the other way
|
|
if (p0.new) B = PROD // go to inf
|
|
p0 = dfcmp.eq(B,B) // get exceptions
|
|
}
|
|
{
|
|
A = insert(B,#63,#0)
|
|
jumpr r31
|
|
}
|
|
|
|
#undef ONE
|
|
#define SIGN r28
|
|
#undef NORMAL
|
|
#undef NO_OVF_UNF
|
|
#define P_INF p1
|
|
#define P_ZERO p2
|
|
.Ldiv_abnormal:
|
|
{
|
|
P_TMP = dfclass(A,#DFCLASS_NUMBER)
|
|
P_TMP = dfclass(B,#DFCLASS_NUMBER)
|
|
Q_POSITIVE = cmp.gt(SIGN,#-1)
|
|
}
|
|
{
|
|
P_INF = dfclass(A,#DFCLASS_INFINITE)
|
|
P_INF = dfclass(B,#DFCLASS_INFINITE)
|
|
}
|
|
{
|
|
P_ZERO = dfclass(A,#DFCLASS_ZERO)
|
|
P_ZERO = dfclass(B,#DFCLASS_ZERO)
|
|
}
|
|
{
|
|
if (!P_TMP) jump .Ldiv_nan
|
|
if (P_INF) jump .Ldiv_invalid
|
|
}
|
|
{
|
|
if (P_ZERO) jump .Ldiv_invalid
|
|
}
|
|
{
|
|
P_ZERO = dfclass(A,#DFCLASS_NONZERO) // nonzero
|
|
P_ZERO = dfclass(B,#DFCLASS_NONINFINITE) // non-infinite
|
|
}
|
|
{
|
|
P_INF = dfclass(A,#DFCLASS_NONINFINITE) // non-infinite
|
|
P_INF = dfclass(B,#DFCLASS_NONZERO) // nonzero
|
|
}
|
|
{
|
|
if (!P_ZERO) jump .Ldiv_zero_result
|
|
if (!P_INF) jump .Ldiv_inf_result
|
|
}
|
|
// Now we've narrowed it down to (de)normal / (de)normal
|
|
// Set up A/EXPA B/EXPB and go back
|
|
#undef P_ZERO
|
|
#undef P_INF
|
|
#define P_TMP2 p1
|
|
{
|
|
P_TMP = dfclass(A,#DFCLASS_NORMAL)
|
|
P_TMP2 = dfclass(B,#DFCLASS_NORMAL)
|
|
TMP = ##0x00100000
|
|
}
|
|
{
|
|
EXPBA = combine(BH,AH)
|
|
AH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32) // clear out hidden bit, sign bit
|
|
BH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32) // clear out hidden bit, sign bit
|
|
}
|
|
{
|
|
if (P_TMP) AH = or(AH,TMP) // if normal, add back in hidden bit
|
|
if (P_TMP2) BH = or(BH,TMP) // if normal, add back in hidden bit
|
|
}
|
|
{
|
|
QH = add(clb(A),#-DF_EXPBITS)
|
|
QL = add(clb(B),#-DF_EXPBITS)
|
|
TMP = #1
|
|
}
|
|
{
|
|
EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32)
|
|
EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32)
|
|
}
|
|
{
|
|
A = asl(A,QH)
|
|
B = asl(B,QL)
|
|
if (!P_TMP) EXPA = sub(TMP,QH)
|
|
if (!P_TMP2) EXPB = sub(TMP,QL)
|
|
} // recreate values needed by resume coke
|
|
{
|
|
PROD = extractu(B,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS)
|
|
}
|
|
{
|
|
SFDEN = or(SFONE,PRODLO)
|
|
jump .Ldenorm_continue
|
|
}
|
|
|
|
.Ldiv_zero_result:
|
|
{
|
|
AH = xor(AH,BH)
|
|
B = #0
|
|
}
|
|
{
|
|
A = insert(B,#63,#0)
|
|
jumpr r31
|
|
}
|
|
.Ldiv_inf_result:
|
|
{
|
|
p2 = dfclass(B,#DFCLASS_ZERO)
|
|
p2 = dfclass(A,#DFCLASS_NONINFINITE)
|
|
}
|
|
{
|
|
TMP = USR
|
|
if (!p2) jump 1f
|
|
AH = xor(AH,BH)
|
|
}
|
|
{
|
|
TMP = or(TMP,#0x04) // DBZ
|
|
}
|
|
{
|
|
USR = TMP
|
|
}
|
|
1:
|
|
{
|
|
B = combine(##0x7ff00000,#0)
|
|
p0 = dfcmp.uo(B,B) // take possible exception
|
|
}
|
|
{
|
|
A = insert(B,#63,#0)
|
|
jumpr r31
|
|
}
|
|
.Ldiv_nan:
|
|
{
|
|
p0 = dfclass(A,#0x10)
|
|
p1 = dfclass(B,#0x10)
|
|
if (!p0.new) A = B
|
|
if (!p1.new) B = A
|
|
}
|
|
{
|
|
QH = convert_df2sf(A) // get possible invalid exceptions
|
|
QL = convert_df2sf(B)
|
|
}
|
|
{
|
|
A = #-1
|
|
jumpr r31
|
|
}
|
|
|
|
.Ldiv_invalid:
|
|
{
|
|
TMP = ##0x7f800001
|
|
}
|
|
{
|
|
A = convert_sf2df(TMP) // get invalid, get DF qNaN
|
|
jumpr r31
|
|
}
|
|
END(__hexagon_divdf3)
|