GlobalISel: Handle llvm.roundeven

I still think it's highly questionable that we have two intrinsics
with identical behavior and only vary by the name of the libcall used
if it happens to be lowered that way, but try to reduce the feature
delta between SDAG and GlobalISel for recently added intrinsics. I'm
not sure which opcode should be considered the canonical one, but
lower roundeven back to round.
This commit is contained in:
Matt Arsenault 2020-07-19 09:56:15 -04:00 committed by Matt Arsenault
parent 71059257bd
commit 0da582d9b6
11 changed files with 710 additions and 11 deletions

View File

@ -228,6 +228,8 @@ private:
ArrayRef<Register> Src1Regs,
ArrayRef<Register> Src2Regs, LLT NarrowTy);
void changeOpcode(MachineInstr &MI, unsigned NewOpcode);
public:
/// Return the alignment to use for a stack temporary object with the given
/// type.

View File

@ -667,6 +667,15 @@ public:
Types2);
}
/// The instruction is emitted as a library call.
LegalizeRuleSet &libcall() {
using namespace LegalizeMutations;
// We have no choice but conservatively assume that predicate-less lowering
// properly handles all type indices by design:
markAllIdxsAsCovered();
return actionIf(LegalizeAction::Libcall, always);
}
/// Like legalIf, but for the Libcall action.
LegalizeRuleSet &libcallIf(LegalityPredicate Predicate) {
// We have no choice but conservatively assume that a libcall with a

View File

@ -297,6 +297,9 @@ HANDLE_TARGET_OPCODE(G_INTRINSIC_ROUND)
/// INTRINSIC round to integer intrinsic.
HANDLE_TARGET_OPCODE(G_INTRINSIC_LRINT)
/// INTRINSIC roundeven intrinsic.
HANDLE_TARGET_OPCODE(G_INTRINSIC_ROUNDEVEN)
/// INTRINSIC readcyclecounter
HANDLE_TARGET_OPCODE(G_READCYCLECOUNTER)

View File

@ -918,6 +918,12 @@ def G_INTRINSIC_LRINT : GenericInstruction {
let hasSideEffects = 0;
}
def G_INTRINSIC_ROUNDEVEN : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1);
let hasSideEffects = 0;
}
def G_READCYCLECOUNTER : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins);

View File

@ -1280,6 +1280,8 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
return TargetOpcode::G_FRINT;
case Intrinsic::round:
return TargetOpcode::G_INTRINSIC_ROUND;
case Intrinsic::roundeven:
return TargetOpcode::G_INTRINSIC_ROUNDEVEN;
case Intrinsic::sin:
return TargetOpcode::G_FSIN;
case Intrinsic::sqrt:

View File

@ -77,6 +77,8 @@ static Type *getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty) {
return Type::getFloatTy(Ctx);
case 64:
return Type::getDoubleTy(Ctx);
case 80:
return Type::getX86_FP80Ty(Ctx);
case 128:
return Type::getFP128Ty(Ctx);
default:
@ -386,7 +388,7 @@ void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
}
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
#define RTLIBCASE(LibcallPrefix) \
#define RTLIBCASE_INT(LibcallPrefix) \
do { \
switch (Size) { \
case 32: \
@ -400,19 +402,33 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
} \
} while (0)
assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
#define RTLIBCASE(LibcallPrefix) \
do { \
switch (Size) { \
case 32: \
return RTLIB::LibcallPrefix##32; \
case 64: \
return RTLIB::LibcallPrefix##64; \
case 80: \
return RTLIB::LibcallPrefix##80; \
case 128: \
return RTLIB::LibcallPrefix##128; \
default: \
llvm_unreachable("unexpected size"); \
} \
} while (0)
switch (Opcode) {
case TargetOpcode::G_SDIV:
RTLIBCASE(SDIV_I);
RTLIBCASE_INT(SDIV_I);
case TargetOpcode::G_UDIV:
RTLIBCASE(UDIV_I);
RTLIBCASE_INT(UDIV_I);
case TargetOpcode::G_SREM:
RTLIBCASE(SREM_I);
RTLIBCASE_INT(SREM_I);
case TargetOpcode::G_UREM:
RTLIBCASE(UREM_I);
RTLIBCASE_INT(UREM_I);
case TargetOpcode::G_CTLZ_ZERO_UNDEF:
RTLIBCASE(CTLZ_I);
RTLIBCASE_INT(CTLZ_I);
case TargetOpcode::G_FADD:
RTLIBCASE(ADD_F);
case TargetOpcode::G_FSUB:
@ -455,6 +471,8 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
RTLIBCASE(RINT_F);
case TargetOpcode::G_FNEARBYINT:
RTLIBCASE(NEARBYINT_F);
case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
RTLIBCASE(ROUNDEVEN_F);
}
llvm_unreachable("Unknown libcall function");
}
@ -670,10 +688,11 @@ LegalizerHelper::libcall(MachineInstr &MI) {
case TargetOpcode::G_FMAXNUM:
case TargetOpcode::G_FSQRT:
case TargetOpcode::G_FRINT:
case TargetOpcode::G_FNEARBYINT: {
case TargetOpcode::G_FNEARBYINT:
case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
if (!HLTy || (Size != 32 && Size != 64 && Size != 128)) {
LLVM_DEBUG(dbgs() << "No libcall available for size " << Size << ".\n");
if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
return UnableToLegalize;
}
auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
@ -2163,6 +2182,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
case TargetOpcode::G_FPOW:
case TargetOpcode::G_INTRINSIC_TRUNC:
case TargetOpcode::G_INTRINSIC_ROUND:
case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
assert(TypeIdx == 0);
Observer.changingInstr(MI);
@ -2363,6 +2383,13 @@ LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
}
}
// Legalize an instruction by changing the opcode in place.
void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
Observer.changingInstr(MI);
MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
Observer.changedInstr(MI);
}
LegalizerHelper::LegalizeResult
LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
using namespace TargetOpcode;
@ -2461,6 +2488,12 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
return lowerFFloor(MI);
case TargetOpcode::G_INTRINSIC_ROUND:
return lowerIntrinsicRound(MI);
case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
// Since round even is the assumed rounding mode for unconstrained FP
// operations, rint and roundeven are the same operation.
changeOpcode(MI, TargetOpcode::G_FRINT);
return Legalized;
}
case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
Register OldValRes = MI.getOperand(0).getReg();
Register SuccessRes = MI.getOperand(1).getReg();
@ -3557,6 +3590,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_FFLOOR:
case G_FRINT:
case G_INTRINSIC_ROUND:
case G_INTRINSIC_ROUNDEVEN:
case G_INTRINSIC_TRUNC:
case G_FCOS:
case G_FSIN:

View File

@ -711,7 +711,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.scalarize(0)
.lower();
getActionDefinitionsBuilder(G_INTRINSIC_ROUND)
// Lower roundeven into G_FRINT
getActionDefinitionsBuilder({G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
.scalarize(0)
.lower();

View File

@ -70,6 +70,11 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
setLegalizerInfoAVX512DQ();
setLegalizerInfoAVX512BW();
getActionDefinitionsBuilder(G_INTRINSIC_ROUNDEVEN)
.scalarize(0)
.minScalar(0, LLT::scalar(32))
.libcall();
setLegalizeScalarToDifferentSizeStrategy(G_PHI, 0, widen_1);
for (unsigned BinOp : {G_SUB, G_MUL, G_AND, G_OR, G_XOR})
setLegalizeScalarToDifferentSizeStrategy(BinOp, 0, widen_1);

View File

@ -132,6 +132,9 @@
# DEBUG-NEXT: G_INTRINSIC_LRINT (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT:.. imm index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: G_INTRINSIC_ROUNDEVEN (opcode {{[0-9]+}}): 1 type index, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: G_READCYCLECOUNTER (opcode {{[0-9]+}}): 1 type index, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined

View File

@ -0,0 +1,566 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s
; RUN: llc -global-isel -march=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s
; RUN: llc -global-isel -march=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
define float @v_roundeven_f32(float %x) {
; GFX6-LABEL: v_roundeven_f32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_rndne_f32_e32 v0, v0
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX7-LABEL: v_roundeven_f32:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_rndne_f32_e32 v0, v0
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_roundeven_f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_rndne_f32_e32 v0, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_roundeven_f32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_rndne_f32_e32 v0, v0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%roundeven = call float @llvm.roundeven.f32(float %x)
ret float %roundeven
}
define <2 x float> @v_roundeven_v2f32(<2 x float> %x) {
; GFX6-LABEL: v_roundeven_v2f32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_rndne_f32_e32 v0, v0
; GFX6-NEXT: v_rndne_f32_e32 v1, v1
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX7-LABEL: v_roundeven_v2f32:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_rndne_f32_e32 v0, v0
; GFX7-NEXT: v_rndne_f32_e32 v1, v1
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_roundeven_v2f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_rndne_f32_e32 v0, v0
; GFX8-NEXT: v_rndne_f32_e32 v1, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_roundeven_v2f32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_rndne_f32_e32 v0, v0
; GFX9-NEXT: v_rndne_f32_e32 v1, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
%roundeven = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %x)
ret <2 x float> %roundeven
}
define <3 x float> @v_roundeven_v3f32(<3 x float> %x) {
; GFX6-LABEL: v_roundeven_v3f32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_rndne_f32_e32 v0, v0
; GFX6-NEXT: v_rndne_f32_e32 v1, v1
; GFX6-NEXT: v_rndne_f32_e32 v2, v2
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX7-LABEL: v_roundeven_v3f32:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_rndne_f32_e32 v0, v0
; GFX7-NEXT: v_rndne_f32_e32 v1, v1
; GFX7-NEXT: v_rndne_f32_e32 v2, v2
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_roundeven_v3f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_rndne_f32_e32 v0, v0
; GFX8-NEXT: v_rndne_f32_e32 v1, v1
; GFX8-NEXT: v_rndne_f32_e32 v2, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_roundeven_v3f32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_rndne_f32_e32 v0, v0
; GFX9-NEXT: v_rndne_f32_e32 v1, v1
; GFX9-NEXT: v_rndne_f32_e32 v2, v2
; GFX9-NEXT: s_setpc_b64 s[30:31]
%roundeven = call <3 x float> @llvm.roundeven.v3f32(<3 x float> %x)
ret <3 x float> %roundeven
}
define <4 x float> @v_roundeven_v4f32(<4 x float> %x) {
; GFX6-LABEL: v_roundeven_v4f32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_rndne_f32_e32 v0, v0
; GFX6-NEXT: v_rndne_f32_e32 v1, v1
; GFX6-NEXT: v_rndne_f32_e32 v2, v2
; GFX6-NEXT: v_rndne_f32_e32 v3, v3
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX7-LABEL: v_roundeven_v4f32:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_rndne_f32_e32 v0, v0
; GFX7-NEXT: v_rndne_f32_e32 v1, v1
; GFX7-NEXT: v_rndne_f32_e32 v2, v2
; GFX7-NEXT: v_rndne_f32_e32 v3, v3
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_roundeven_v4f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_rndne_f32_e32 v0, v0
; GFX8-NEXT: v_rndne_f32_e32 v1, v1
; GFX8-NEXT: v_rndne_f32_e32 v2, v2
; GFX8-NEXT: v_rndne_f32_e32 v3, v3
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_roundeven_v4f32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_rndne_f32_e32 v0, v0
; GFX9-NEXT: v_rndne_f32_e32 v1, v1
; GFX9-NEXT: v_rndne_f32_e32 v2, v2
; GFX9-NEXT: v_rndne_f32_e32 v3, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
%roundeven = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %x)
ret <4 x float> %roundeven
}
define half @v_roundeven_f16(half %x) {
; GFX6-LABEL: v_roundeven_f16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX6-NEXT: v_rndne_f32_e32 v0, v0
; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX7-LABEL: v_roundeven_f16:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX7-NEXT: v_rndne_f32_e32 v0, v0
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_roundeven_f16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_rndne_f16_e32 v0, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_roundeven_f16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_rndne_f16_e32 v0, v0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%roundeven = call half @llvm.roundeven.f16(half %x)
ret half %roundeven
}
define <2 x half> @v_roundeven_v2f16(<2 x half> %x) {
; GFX6-LABEL: v_roundeven_v2f16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1
; GFX6-NEXT: v_rndne_f32_e32 v0, v0
; GFX6-NEXT: v_rndne_f32_e32 v1, v1
; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX7-LABEL: v_roundeven_v2f16:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
; GFX7-NEXT: v_rndne_f32_e32 v0, v0
; GFX7-NEXT: v_rndne_f32_e32 v1, v1
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_roundeven_v2f16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_rndne_f16_e32 v1, v0
; GFX8-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX8-NEXT: v_mov_b32_e32 v2, 16
; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_roundeven_v2f16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_rndne_f16_e32 v1, v0
; GFX9-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff
; GFX9-NEXT: v_and_or_b32 v0, v1, v2, v0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%roundeven = call <2 x half> @llvm.roundeven.v2f16(<2 x half> %x)
ret <2 x half> %roundeven
}
define <2 x half> @v_roundeven_v2f16_fneg(<2 x half> %x) {
; GFX6-LABEL: v_roundeven_v2f16_fneg:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX6-NEXT: v_or_b32_e32 v0, v1, v0
; GFX6-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v0
; GFX6-NEXT: v_rndne_f32_e32 v0, v1
; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX6-NEXT: v_rndne_f32_e32 v1, v2
; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX7-LABEL: v_roundeven_v2f16_fneg:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7-NEXT: v_or_b32_e32 v0, v1, v0
; GFX7-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v0
; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v0
; GFX7-NEXT: v_rndne_f32_e32 v0, v1
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX7-NEXT: v_rndne_f32_e32 v1, v2
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_roundeven_v2f16_fneg:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
; GFX8-NEXT: v_rndne_f16_e32 v1, v0
; GFX8-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX8-NEXT: v_mov_b32_e32 v2, 16
; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_roundeven_v2f16_fneg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
; GFX9-NEXT: v_rndne_f16_e32 v1, v0
; GFX9-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff
; GFX9-NEXT: v_and_or_b32 v0, v1, v2, v0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%x.fneg = fneg <2 x half> %x
%roundeven = call <2 x half> @llvm.roundeven.v2f16(<2 x half> %x.fneg)
ret <2 x half> %roundeven
}
define <4 x half> @v_roundeven_v4f16(<4 x half> %x) {
; GFX6-LABEL: v_roundeven_v4f16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1
; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3
; GFX6-NEXT: v_rndne_f32_e32 v0, v0
; GFX6-NEXT: v_rndne_f32_e32 v1, v1
; GFX6-NEXT: v_rndne_f32_e32 v2, v2
; GFX6-NEXT: v_rndne_f32_e32 v3, v3
; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2
; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX7-LABEL: v_roundeven_v4f16:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
; GFX7-NEXT: v_rndne_f32_e32 v0, v0
; GFX7-NEXT: v_rndne_f32_e32 v1, v1
; GFX7-NEXT: v_rndne_f32_e32 v2, v2
; GFX7-NEXT: v_rndne_f32_e32 v3, v3
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_roundeven_v4f16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_rndne_f16_e32 v2, v0
; GFX8-NEXT: v_rndne_f16_e32 v3, v1
; GFX8-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX8-NEXT: v_mov_b32_e32 v4, 16
; GFX8-NEXT: v_rndne_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_roundeven_v4f16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_rndne_f16_e32 v2, v0
; GFX9-NEXT: v_rndne_f16_e32 v3, v1
; GFX9-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff
; GFX9-NEXT: v_rndne_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX9-NEXT: v_and_or_b32 v0, v2, v4, v0
; GFX9-NEXT: v_and_or_b32 v1, v3, v4, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
%roundeven = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %x)
ret <4 x half> %roundeven
}
define float @v_roundeven_f32_fabs(float %x) {
; GFX6-LABEL: v_roundeven_f32_fabs:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_rndne_f32_e64 v0, |v0|
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX7-LABEL: v_roundeven_f32_fabs:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_rndne_f32_e64 v0, |v0|
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_roundeven_f32_fabs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_rndne_f32_e64 v0, |v0|
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_roundeven_f32_fabs:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_rndne_f32_e64 v0, |v0|
; GFX9-NEXT: s_setpc_b64 s[30:31]
%fabs.x = call float @llvm.fabs.f32(float %x)
%roundeven = call float @llvm.roundeven.f32(float %fabs.x)
ret float %roundeven
}
define amdgpu_ps float @s_roundeven_f32(float inreg %x) {
; GFX6-LABEL: s_roundeven_f32:
; GFX6: ; %bb.0:
; GFX6-NEXT: v_rndne_f32_e32 v0, s0
; GFX6-NEXT: ; return to shader part epilog
;
; GFX7-LABEL: s_roundeven_f32:
; GFX7: ; %bb.0:
; GFX7-NEXT: v_rndne_f32_e32 v0, s0
; GFX7-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_roundeven_f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: v_rndne_f32_e32 v0, s0
; GFX8-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: s_roundeven_f32:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_rndne_f32_e32 v0, s0
; GFX9-NEXT: ; return to shader part epilog
%roundeven = call float @llvm.roundeven.f32(float %x)
ret float %roundeven
}
define float @v_roundeven_f32_fneg(float %x) {
; GFX6-LABEL: v_roundeven_f32_fneg:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_rndne_f32_e64 v0, -v0
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX7-LABEL: v_roundeven_f32_fneg:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_rndne_f32_e64 v0, -v0
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_roundeven_f32_fneg:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_rndne_f32_e64 v0, -v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_roundeven_f32_fneg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_rndne_f32_e64 v0, -v0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%neg.x = fneg float %x
%roundeven = call float @llvm.roundeven.f32(float %neg.x)
ret float %roundeven
}
define double @v_roundeven_f64(double %x) {
; GFX6-LABEL: v_roundeven_f64:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_and_b32_e32 v3, 0x80000000, v1
; GFX6-NEXT: v_mov_b32_e32 v2, 0
; GFX6-NEXT: v_or_b32_e32 v3, 0x43300000, v3
; GFX6-NEXT: v_add_f64 v[4:5], v[0:1], v[2:3]
; GFX6-NEXT: s_mov_b32 s4, -1
; GFX6-NEXT: s_mov_b32 s5, 0x432fffff
; GFX6-NEXT: v_add_f64 v[2:3], v[4:5], -v[2:3]
; GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5]
; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX7-LABEL: v_roundeven_f64:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_roundeven_f64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_roundeven_f64:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
%roundeven = call double @llvm.roundeven.f64(double %x)
ret double %roundeven
}
define double @v_roundeven_f64_fneg(double %x) {
; GFX6-LABEL: v_roundeven_f64_fneg:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_xor_b32_e32 v2, 0x80000000, v1
; GFX6-NEXT: v_and_b32_e32 v4, 0x80000000, v2
; GFX6-NEXT: v_mov_b32_e32 v3, 0
; GFX6-NEXT: v_or_b32_e32 v4, 0x43300000, v4
; GFX6-NEXT: v_add_f64 v[5:6], -v[0:1], v[3:4]
; GFX6-NEXT: v_mov_b32_e32 v1, v0
; GFX6-NEXT: s_mov_b32 s4, -1
; GFX6-NEXT: s_mov_b32 s5, 0x432fffff
; GFX6-NEXT: v_add_f64 v[3:4], v[5:6], -v[3:4]
; GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[1:2]|, s[4:5]
; GFX6-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX7-LABEL: v_roundeven_f64_fneg:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_rndne_f64_e64 v[0:1], -v[0:1]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_roundeven_f64_fneg:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_rndne_f64_e64 v[0:1], -v[0:1]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_roundeven_f64_fneg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_rndne_f64_e64 v[0:1], -v[0:1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
%neg.x = fneg double %x
%roundeven = call double @llvm.roundeven.f64(double %neg.x)
ret double %roundeven
}
define <2 x double> @v_roundeven_v2f64(<2 x double> %x) {
; GFX6-LABEL: v_roundeven_v2f64:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: s_brev_b32 s6, 1
; GFX6-NEXT: s_mov_b32 s7, 0x43300000
; GFX6-NEXT: v_and_b32_e32 v5, s6, v1
; GFX6-NEXT: v_mov_b32_e32 v4, 0
; GFX6-NEXT: v_or_b32_e32 v5, s7, v5
; GFX6-NEXT: v_add_f64 v[6:7], v[0:1], v[4:5]
; GFX6-NEXT: s_mov_b32 s4, -1
; GFX6-NEXT: s_mov_b32 s5, 0x432fffff
; GFX6-NEXT: v_add_f64 v[5:6], v[6:7], -v[4:5]
; GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5]
; GFX6-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc
; GFX6-NEXT: v_and_b32_e32 v5, s6, v3
; GFX6-NEXT: v_or_b32_e32 v5, s7, v5
; GFX6-NEXT: v_add_f64 v[7:8], v[2:3], v[4:5]
; GFX6-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc
; GFX6-NEXT: v_add_f64 v[4:5], v[7:8], -v[4:5]
; GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[2:3]|, s[4:5]
; GFX6-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX7-LABEL: v_roundeven_v2f64:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
; GFX7-NEXT: v_rndne_f64_e32 v[2:3], v[2:3]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_roundeven_v2f64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
; GFX8-NEXT: v_rndne_f64_e32 v[2:3], v[2:3]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_roundeven_v2f64:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
; GFX9-NEXT: v_rndne_f64_e32 v[2:3], v[2:3]
; GFX9-NEXT: s_setpc_b64 s[30:31]
%roundeven = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %x)
ret <2 x double> %roundeven
}
declare half @llvm.roundeven.f16(half) #0
declare <2 x half> @llvm.roundeven.v2f16(<2 x half>) #0
declare <4 x half> @llvm.roundeven.v4f16(<4 x half>) #0
declare float @llvm.roundeven.f32(float) #0
declare <2 x float> @llvm.roundeven.v2f32(<2 x float>) #0
declare <3 x float> @llvm.roundeven.v3f32(<3 x float>) #0
declare <4 x float> @llvm.roundeven.v4f32(<4 x float>) #0
declare double @llvm.roundeven.f64(double) #0
declare <2 x double> @llvm.roundeven.v2f64(<2 x double>) #0
declare half @llvm.fabs.f16(half) #0
declare float @llvm.fabs.f32(float) #0
attributes #0 = { nounwind readnone speculatable willreturn }

View File

@ -0,0 +1,68 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -mtriple=x86_64-linux-gnu < %s | FileCheck %s
; FIXME: Calling convention lowering fails
; define half @roundeven_f16(half %x) {
; %roundeven = call half @llvm.roundeven.f16(half %x)
; ret half %roundeven
; }
define float @roundeven_f32(float %x) {
; CHECK-LABEL: roundeven_f32:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: callq roundevenf
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
%roundeven = call float @llvm.roundeven.f32(float %x)
ret float %roundeven
}
define double @roundeven_f64(double %x) {
; CHECK-LABEL: roundeven_f64:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: callq roundeven
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
%roundeven = call double @llvm.roundeven.f64(double %x)
ret double %roundeven
}
; FIXME: Insert fails
; define x86_fp80 @roundeven_fp80(x86_fp80 %x) {
; %roundeven = call x86_fp80 @llvm.roundeven.f80(x86_fp80 %x)
; ret x86_fp80 %roundeven
; }
define fp128 @roundeven_f128(fp128 %x) {
; CHECK-LABEL: roundeven_f128:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: callq roundevenl
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
%roundeven = call fp128 @llvm.roundeven.f128(fp128 %x)
ret fp128 %roundeven
}
; FIXME: Fails on build_vector
; define <4 x float> @roundeven_v4f32(<4 x float> %x) {
; %roundeven = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %x)
; ret <4 x float> %roundeven
; }
declare half @llvm.roundeven.f16(half) #0
declare float @llvm.roundeven.f32(float) #0
declare <4 x float> @llvm.roundeven.v4f32(<4 x float>) #0
declare double @llvm.roundeven.f64(double) #0
declare x86_fp80 @llvm.roundeven.f80(x86_fp80) #0
declare fp128 @llvm.roundeven.f128(fp128) #0
attributes #0 = { nounwind readnone speculatable willreturn }