forked from OSchip/llvm-project
[PowerPC] Respect rounding mode in the back end
Currently, the floating point instructions that depend on rounding mode are correctly marked in the PPC back end with an implicit use of the RM register. Similarly, instructions that explicitly define the register are marked with an implicit def of the same register. So for the most part, RM-using code won't be moved across RM-setting instructions. However, calls are not marked as RM-setting instructions so code can be moved across calls. This is generally desired, but so is the ability to turn off this behaviour with an appropriate option - and -frounding-math really should be that option. This patch provides a set of call instructions (for direct and indirect calls) that are marked with an implicit def of the RM register. These will be used for calls that are marked with the strictfp attribute. Differential revision: https://reviews.llvm.org/D111433
This commit is contained in:
parent
ba2ac9c97c
commit
5840f7197d
|
@ -312,7 +312,7 @@ def : InstRW<[P10W_BR_2C, P10W_DISP_ANY],
|
|||
(instrs
|
||||
BCLR, BCLRn, BDNZLR, BDNZLR8, BDNZLRm, BDNZLRp, BDZLR, BDZLR8, BDZLRm, BDZLRp, gBCLR,
|
||||
BCLRL, BCLRLn, BDNZLRL, BDNZLRLm, BDNZLRLp, BDZLRL, BDZLRLm, BDZLRLp, gBCLRL,
|
||||
BL, BL8, BL8_NOP, BL8_NOP_TLS, BL8_NOTOC, BL8_NOTOC_TLS, BL8_TLS, BL8_TLS_, BLR, BLR8, BLRL, BL_NOP, BL_TLS
|
||||
BL, BL8, BL8_NOP, BL8_NOP_RM, BL8_NOP_TLS, BL8_NOTOC, BL8_NOTOC_RM, BL8_NOTOC_TLS, BL8_RM, BL8_TLS, BL8_TLS_, BLR, BLR8, BLRL, BL_NOP, BL_NOP_RM, BL_RM, BL_TLS
|
||||
)>;
|
||||
|
||||
// 2 Cycles Branch operations, 1 input operands
|
||||
|
@ -320,9 +320,9 @@ def : InstRW<[P10W_BR_2C, P10W_DISP_ANY, P10BR_Read],
|
|||
(instrs
|
||||
B, BCC, BCCA, BCCCTR, BCCCTR8, BCCCTRL, BCCCTRL8, BCCL, BCCLA, BCCLR, BCCLRL, CTRL_DEP, TAILB, TAILB8,
|
||||
BA, TAILBA, TAILBA8,
|
||||
BC, BCTR, BCTR8, BCTRL, BCTRL8, BCTRL8_LDinto_toc, BCTRL_LWZinto_toc, BCn, BDNZ, BDNZ8, BDNZm, BDNZp, BDZ, BDZ8, BDZm, BDZp, TAILBCTR, TAILBCTR8, gBC, gBCat,
|
||||
BC, BCTR, BCTR8, BCTRL, BCTRL8, BCTRL8_LDinto_toc, BCTRL8_LDinto_toc_RM, BCTRL8_RM, BCTRL_LWZinto_toc, BCTRL_LWZinto_toc_RM, BCTRL_RM, BCn, BDNZ, BDNZ8, BDNZm, BDNZp, BDZ, BDZ8, BDZm, BDZp, TAILBCTR, TAILBCTR8, gBC, gBCat,
|
||||
BCL, BCLalways, BCLn, BDNZL, BDNZLm, BDNZLp, BDZL, BDZLm, BDZLp, gBCL, gBCLat,
|
||||
BLA, BLA8, BLA8_NOP
|
||||
BLA, BLA8, BLA8_NOP, BLA8_NOP_RM, BLA8_RM, BLA_RM
|
||||
)>;
|
||||
|
||||
// 2 Cycles Branch operations, 3 input operands
|
||||
|
|
|
@ -1302,15 +1302,15 @@ def : InstRW<[P9_BR_2C, DISP_BR_1C],
|
|||
(instregex "BCCTR(L)?(8)?(n)?$"),
|
||||
(instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"),
|
||||
(instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"),
|
||||
(instregex "BL(_TLS|_NOP)?$"),
|
||||
(instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?$"),
|
||||
(instregex "BLA(8|8_NOP)?$"),
|
||||
(instregex "BL(_TLS|_NOP)?(_RM)?$"),
|
||||
(instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?(_RM)?$"),
|
||||
(instregex "BLA(8|8_NOP)?(_RM)?$"),
|
||||
(instregex "BLR(8|L)?$"),
|
||||
(instregex "TAILB(A)?(8)?$"),
|
||||
(instregex "TAILBCTR(8)?$"),
|
||||
(instregex "gBC(A|Aat|CTR|CTRL|L|LA|LAat|LR|LRL|Lat|at)?$"),
|
||||
(instregex "BCLR(L)?(n)?$"),
|
||||
(instregex "BCTR(L)?(8)?$"),
|
||||
(instregex "BCTR(L)?(8)?(_RM)?$"),
|
||||
B,
|
||||
BA,
|
||||
BC,
|
||||
|
@ -1321,6 +1321,8 @@ def : InstRW<[P9_BR_2C, DISP_BR_1C],
|
|||
BCLn,
|
||||
BCTRL8_LDinto_toc,
|
||||
BCTRL_LWZinto_toc,
|
||||
BCTRL8_LDinto_toc_RM,
|
||||
BCTRL_LWZinto_toc_RM,
|
||||
BCn,
|
||||
CTRL_DEP
|
||||
)>;
|
||||
|
|
|
@ -1630,9 +1630,19 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
case PPCISD::CALL: return "PPCISD::CALL";
|
||||
case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
|
||||
case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
|
||||
case PPCISD::CALL_RM:
|
||||
return "PPCISD::CALL_RM";
|
||||
case PPCISD::CALL_NOP_RM:
|
||||
return "PPCISD::CALL_NOP_RM";
|
||||
case PPCISD::CALL_NOTOC_RM:
|
||||
return "PPCISD::CALL_NOTOC_RM";
|
||||
case PPCISD::MTCTR: return "PPCISD::MTCTR";
|
||||
case PPCISD::BCTRL: return "PPCISD::BCTRL";
|
||||
case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
|
||||
case PPCISD::BCTRL_RM:
|
||||
return "PPCISD::BCTRL_RM";
|
||||
case PPCISD::BCTRL_LOAD_TOC_RM:
|
||||
return "PPCISD::BCTRL_LOAD_TOC_RM";
|
||||
case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
|
||||
case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
|
||||
case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
|
||||
|
@ -5172,13 +5182,14 @@ static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
|
|||
}
|
||||
|
||||
static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
|
||||
const Function &Caller,
|
||||
const SDValue &Callee,
|
||||
const Function &Caller, const SDValue &Callee,
|
||||
const PPCSubtarget &Subtarget,
|
||||
const TargetMachine &TM) {
|
||||
const TargetMachine &TM,
|
||||
bool IsStrictFPCall = false) {
|
||||
if (CFlags.IsTailCall)
|
||||
return PPCISD::TC_RETURN;
|
||||
|
||||
unsigned RetOpc = 0;
|
||||
// This is a call through a function pointer.
|
||||
if (CFlags.IsIndirect) {
|
||||
// AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
|
||||
|
@ -5189,28 +5200,46 @@ static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
|
|||
// immediately followed by a load of the TOC pointer from the the stack save
|
||||
// slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
|
||||
// as it is not saved or used.
|
||||
return isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
|
||||
RetOpc = isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
|
||||
: PPCISD::BCTRL;
|
||||
}
|
||||
|
||||
if (Subtarget.isUsingPCRelativeCalls()) {
|
||||
} else if (Subtarget.isUsingPCRelativeCalls()) {
|
||||
assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
|
||||
return PPCISD::CALL_NOTOC;
|
||||
}
|
||||
|
||||
RetOpc = PPCISD::CALL_NOTOC;
|
||||
} else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
|
||||
// The ABIs that maintain a TOC pointer accross calls need to have a nop
|
||||
// immediately following the call instruction if the caller and callee may
|
||||
// have different TOC bases. At link time if the linker determines the calls
|
||||
// may not share a TOC base, the call is redirected to a trampoline inserted
|
||||
// by the linker. The trampoline will (among other things) save the callers
|
||||
// TOC pointer at an ABI designated offset in the linkage area and the linker
|
||||
// will rewrite the nop to be a load of the TOC pointer from the linkage area
|
||||
// into gpr2.
|
||||
if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
|
||||
return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL
|
||||
// TOC pointer at an ABI designated offset in the linkage area and the
|
||||
// linker will rewrite the nop to be a load of the TOC pointer from the
|
||||
// linkage area into gpr2.
|
||||
RetOpc = callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL
|
||||
: PPCISD::CALL_NOP;
|
||||
|
||||
return PPCISD::CALL;
|
||||
else
|
||||
RetOpc = PPCISD::CALL;
|
||||
if (IsStrictFPCall) {
|
||||
switch (RetOpc) {
|
||||
default:
|
||||
llvm_unreachable("Unknown call opcode");
|
||||
case PPCISD::BCTRL_LOAD_TOC:
|
||||
RetOpc = PPCISD::BCTRL_LOAD_TOC_RM;
|
||||
break;
|
||||
case PPCISD::BCTRL:
|
||||
RetOpc = PPCISD::BCTRL_RM;
|
||||
break;
|
||||
case PPCISD::CALL_NOTOC:
|
||||
RetOpc = PPCISD::CALL_NOTOC_RM;
|
||||
break;
|
||||
case PPCISD::CALL:
|
||||
RetOpc = PPCISD::CALL_RM;
|
||||
break;
|
||||
case PPCISD::CALL_NOP:
|
||||
RetOpc = PPCISD::CALL_NOP_RM;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return RetOpc;
|
||||
}
|
||||
|
||||
static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
|
||||
|
@ -5506,7 +5535,7 @@ SDValue PPCTargetLowering::FinishCall(
|
|||
|
||||
unsigned CallOpc =
|
||||
getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
|
||||
Subtarget, DAG.getTarget());
|
||||
Subtarget, DAG.getTarget(), CB ? CB->isStrictFP() : false);
|
||||
|
||||
if (!CFlags.IsIndirect)
|
||||
Callee = transformCallee(Callee, DAG, dl, Subtarget);
|
||||
|
|
|
@ -200,6 +200,14 @@ namespace llvm {
|
|||
/// and 64-bit AIX.
|
||||
BCTRL_LOAD_TOC,
|
||||
|
||||
/// The variants that implicitly define rounding mode for calls with
|
||||
/// strictfp semantics.
|
||||
CALL_RM,
|
||||
CALL_NOP_RM,
|
||||
CALL_NOTOC_RM,
|
||||
BCTRL_RM,
|
||||
BCTRL_LOAD_TOC_RM,
|
||||
|
||||
/// Return with a flag operand, matched by 'blr'
|
||||
RET_FLAG,
|
||||
|
||||
|
|
|
@ -178,6 +178,39 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8], hasSideEffects = 0 in {
|
|||
}
|
||||
}
|
||||
|
||||
let isCall = 1, PPC970_Unit = 7, Defs = [LR8, RM], hasSideEffects = 0,
|
||||
isCodeGenOnly = 1, Uses = [RM] in {
|
||||
// Convenient aliases for call instructions
|
||||
def BL8_RM : IForm<18, 0, 1, (outs), (ins calltarget:$func),
|
||||
"bl $func", IIC_BrB, []>; // See Pat patterns below.
|
||||
|
||||
def BLA8_RM : IForm<18, 1, 1, (outs), (ins abscalltarget:$func),
|
||||
"bla $func", IIC_BrB, [(PPCcall_rm (i64 imm:$func))]>;
|
||||
def BL8_NOP_RM : IForm_and_DForm_4_zero<18, 0, 1, 24,
|
||||
(outs), (ins calltarget:$func),
|
||||
"bl $func\n\tnop", IIC_BrB, []>;
|
||||
|
||||
def BLA8_NOP_RM : IForm_and_DForm_4_zero<18, 1, 1, 24,
|
||||
(outs), (ins abscalltarget:$func),
|
||||
"bla $func\n\tnop", IIC_BrB,
|
||||
[(PPCcall_nop_rm (i64 imm:$func))]>;
|
||||
let Predicates = [PCRelativeMemops] in {
|
||||
// BL8_NOTOC means that the caller does not use the TOC pointer and if
|
||||
// it does use R2 then it is just a caller saved register. Therefore it is
|
||||
// safe to emit only the bl and not the nop for this instruction. The
|
||||
// linker will not try to restore R2 after the call.
|
||||
def BL8_NOTOC_RM : IForm<18, 0, 1, (outs),
|
||||
(ins calltarget:$func),
|
||||
"bl $func", IIC_BrB, []>;
|
||||
}
|
||||
let Uses = [CTR8, RM] in {
|
||||
let isPredicable = 1 in
|
||||
def BCTRL8_RM : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
|
||||
"bctrl", IIC_BrB, [(PPCbctrl_rm)]>,
|
||||
Requires<[In64BitMode]>;
|
||||
}
|
||||
}
|
||||
|
||||
let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
|
||||
Defs = [LR8, X2], Uses = [CTR8, RM], RST = 2 in {
|
||||
def BCTRL8_LDinto_toc :
|
||||
|
@ -188,6 +221,16 @@ let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
|
|||
Requires<[In64BitMode]>;
|
||||
}
|
||||
|
||||
let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
|
||||
Defs = [LR8, X2, RM], Uses = [CTR8, RM], RST = 2 in {
|
||||
def BCTRL8_LDinto_toc_RM :
|
||||
XLForm_2_ext_and_DSForm_1<19, 528, 20, 0, 1, 58, 0, (outs),
|
||||
(ins memrix:$src),
|
||||
"bctrl\n\tld 2, $src", IIC_BrB,
|
||||
[(PPCbctrl_load_toc_rm iaddrX4:$src)]>,
|
||||
Requires<[In64BitMode]>;
|
||||
}
|
||||
|
||||
} // Interpretation64Bit
|
||||
|
||||
// FIXME: Duplicating this for the asm parser should be unnecessary, but the
|
||||
|
@ -214,12 +257,32 @@ def : Pat<(PPCcall_notoc (i64 tglobaladdr:$dst)),
|
|||
def : Pat<(PPCcall_notoc (i64 texternalsym:$dst)),
|
||||
(BL8_NOTOC texternalsym:$dst)>;
|
||||
|
||||
def : Pat<(PPCcall_rm (i64 tglobaladdr:$dst)),
|
||||
(BL8_RM tglobaladdr:$dst)>;
|
||||
def : Pat<(PPCcall_nop_rm (i64 tglobaladdr:$dst)),
|
||||
(BL8_NOP_RM tglobaladdr:$dst)>;
|
||||
|
||||
def : Pat<(PPCcall_rm (i64 texternalsym:$dst)),
|
||||
(BL8_RM texternalsym:$dst)>;
|
||||
def : Pat<(PPCcall_nop_rm (i64 texternalsym:$dst)),
|
||||
(BL8_NOP_RM texternalsym:$dst)>;
|
||||
|
||||
def : Pat<(PPCcall_notoc_rm (i64 tglobaladdr:$dst)),
|
||||
(BL8_NOTOC_RM tglobaladdr:$dst)>;
|
||||
def : Pat<(PPCcall_notoc_rm (i64 texternalsym:$dst)),
|
||||
(BL8_NOTOC_RM texternalsym:$dst)>;
|
||||
|
||||
// Calls for AIX
|
||||
def : Pat<(PPCcall (i64 mcsym:$dst)),
|
||||
(BL8 mcsym:$dst)>;
|
||||
def : Pat<(PPCcall_nop (i64 mcsym:$dst)),
|
||||
(BL8_NOP mcsym:$dst)>;
|
||||
|
||||
def : Pat<(PPCcall_rm (i64 mcsym:$dst)),
|
||||
(BL8_RM mcsym:$dst)>;
|
||||
def : Pat<(PPCcall_nop_rm (i64 mcsym:$dst)),
|
||||
(BL8_NOP_RM mcsym:$dst)>;
|
||||
|
||||
// Atomic operations
|
||||
// FIXME: some of these might be used with constant operands. This will result
|
||||
// in constant materialization instructions that may be redundant. We currently
|
||||
|
|
|
@ -2246,11 +2246,13 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
|
|||
|
||||
return true;
|
||||
} else if (OpC == PPC::BCTR || OpC == PPC::BCTR8 || OpC == PPC::BCTRL ||
|
||||
OpC == PPC::BCTRL8) {
|
||||
OpC == PPC::BCTRL8 || OpC == PPC::BCTRL_RM ||
|
||||
OpC == PPC::BCTRL8_RM) {
|
||||
if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR)
|
||||
llvm_unreachable("Cannot predicate bctr[l] on the ctr register");
|
||||
|
||||
bool setLR = OpC == PPC::BCTRL || OpC == PPC::BCTRL8;
|
||||
bool setLR = OpC == PPC::BCTRL || OpC == PPC::BCTRL8 ||
|
||||
OpC == PPC::BCTRL_RM || OpC == PPC::BCTRL8_RM;
|
||||
bool isPPC64 = Subtarget.isPPC64();
|
||||
|
||||
if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
|
||||
|
@ -2274,6 +2276,9 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
|
|||
MachineInstrBuilder(*MI.getParent()->getParent(), MI)
|
||||
.addReg(isPPC64 ? PPC::LR8 : PPC::LR, RegState::Implicit)
|
||||
.addReg(isPPC64 ? PPC::LR8 : PPC::LR, RegState::ImplicitDefine);
|
||||
if (OpC == PPC::BCTRL_RM || OpC == PPC::BCTRL8_RM)
|
||||
MachineInstrBuilder(*MI.getParent()->getParent(), MI)
|
||||
.addReg(PPC::RM, RegState::ImplicitDefine);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -316,6 +316,24 @@ def PPCbctrl_load_toc : SDNode<"PPCISD::BCTRL_LOAD_TOC",
|
|||
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
|
||||
SDNPVariadic]>;
|
||||
|
||||
// Call nodes for strictfp calls (that define RM).
|
||||
def PPCcall_rm : SDNode<"PPCISD::CALL_RM", SDT_PPCCall,
|
||||
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
|
||||
SDNPVariadic]>;
|
||||
def PPCcall_nop_rm : SDNode<"PPCISD::CALL_NOP_RM", SDT_PPCCall,
|
||||
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
|
||||
SDNPVariadic]>;
|
||||
def PPCcall_notoc_rm : SDNode<"PPCISD::CALL_NOTOC_RM", SDT_PPCCall,
|
||||
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
|
||||
SDNPVariadic]>;
|
||||
def PPCbctrl_rm : SDNode<"PPCISD::BCTRL_RM", SDTNone,
|
||||
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
|
||||
SDNPVariadic]>;
|
||||
def PPCbctrl_load_toc_rm : SDNode<"PPCISD::BCTRL_LOAD_TOC_RM",
|
||||
SDTypeProfile<0, 1, []>,
|
||||
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
|
||||
SDNPVariadic]>;
|
||||
|
||||
def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone,
|
||||
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
|
||||
|
||||
|
@ -1892,6 +1910,26 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
|
|||
}
|
||||
}
|
||||
|
||||
let isCall = 1, PPC970_Unit = 7, Defs = [LR, RM], isCodeGenOnly = 1 in {
|
||||
// Convenient aliases for call instructions
|
||||
let Uses = [RM] in {
|
||||
def BL_RM : IForm<18, 0, 1, (outs), (ins calltarget:$func),
|
||||
"bl $func", IIC_BrB, []>; // See Pat patterns below.
|
||||
def BLA_RM : IForm<18, 1, 1, (outs), (ins abscalltarget:$func),
|
||||
"bla $func", IIC_BrB, [(PPCcall_rm (i32 imm:$func))]>;
|
||||
|
||||
def BL_NOP_RM : IForm_and_DForm_4_zero<18, 0, 1, 24,
|
||||
(outs), (ins calltarget:$func),
|
||||
"bl $func\n\tnop", IIC_BrB, []>;
|
||||
}
|
||||
let Uses = [CTR, RM] in {
|
||||
let isPredicable = 1 in
|
||||
def BCTRL_RM : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
|
||||
"bctrl", IIC_BrB, [(PPCbctrl_rm)]>,
|
||||
Requires<[In32BitMode]>;
|
||||
}
|
||||
}
|
||||
|
||||
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
|
||||
def TCRETURNdi :PPCEmitTimePseudo< (outs),
|
||||
(ins calltarget:$dst, i32imm:$offset),
|
||||
|
@ -1918,6 +1956,14 @@ let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
|
|||
|
||||
}
|
||||
|
||||
let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
|
||||
Defs = [LR, R2, RM], Uses = [CTR, RM], RST = 2 in {
|
||||
def BCTRL_LWZinto_toc_RM:
|
||||
XLForm_2_ext_and_DForm_1<19, 528, 20, 0, 1, 32, (outs),
|
||||
(ins memri:$src), "bctrl\n\tlwz 2, $src", IIC_BrB,
|
||||
[(PPCbctrl_load_toc_rm iaddr:$src)]>, Requires<[In32BitMode]>;
|
||||
|
||||
}
|
||||
|
||||
let isCodeGenOnly = 1, hasSideEffects = 0 in {
|
||||
|
||||
|
@ -3435,6 +3481,12 @@ def : Pat<(PPCcall (i32 tglobaladdr:$dst)),
|
|||
def : Pat<(PPCcall (i32 texternalsym:$dst)),
|
||||
(BL texternalsym:$dst)>;
|
||||
|
||||
def : Pat<(PPCcall_rm (i32 tglobaladdr:$dst)),
|
||||
(BL_RM tglobaladdr:$dst)>;
|
||||
|
||||
def : Pat<(PPCcall_rm (i32 texternalsym:$dst)),
|
||||
(BL_RM texternalsym:$dst)>;
|
||||
|
||||
// Calls for AIX only
|
||||
def : Pat<(PPCcall (i32 mcsym:$dst)),
|
||||
(BL mcsym:$dst)>;
|
||||
|
@ -3445,6 +3497,15 @@ def : Pat<(PPCcall_nop (i32 mcsym:$dst)),
|
|||
def : Pat<(PPCcall_nop (i32 texternalsym:$dst)),
|
||||
(BL_NOP texternalsym:$dst)>;
|
||||
|
||||
def : Pat<(PPCcall_rm (i32 mcsym:$dst)),
|
||||
(BL_RM mcsym:$dst)>;
|
||||
|
||||
def : Pat<(PPCcall_nop_rm (i32 mcsym:$dst)),
|
||||
(BL_NOP_RM mcsym:$dst)>;
|
||||
|
||||
def : Pat<(PPCcall_nop_rm (i32 texternalsym:$dst)),
|
||||
(BL_NOP_RM texternalsym:$dst)>;
|
||||
|
||||
def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm),
|
||||
(TCRETURNdi tglobaladdr:$dst, imm:$imm)>;
|
||||
|
||||
|
|
|
@ -906,16 +906,13 @@ let hasSideEffects = 0 in {
|
|||
// Rounding Instructions respecting current rounding mode
|
||||
def XSRDPIC : XX2Form<60, 107,
|
||||
(outs vsfrc:$XT), (ins vsfrc:$XB),
|
||||
"xsrdpic $XT, $XB", IIC_VecFP,
|
||||
[(set f64:$XT, (fnearbyint f64:$XB))]>;
|
||||
"xsrdpic $XT, $XB", IIC_VecFP, []>;
|
||||
def XVRDPIC : XX2Form<60, 235,
|
||||
(outs vsrc:$XT), (ins vsrc:$XB),
|
||||
"xvrdpic $XT, $XB", IIC_VecFP,
|
||||
[(set v2f64:$XT, (fnearbyint v2f64:$XB))]>;
|
||||
"xvrdpic $XT, $XB", IIC_VecFP, []>;
|
||||
def XVRSPIC : XX2Form<60, 171,
|
||||
(outs vsrc:$XT), (ins vsrc:$XB),
|
||||
"xvrspic $XT, $XB", IIC_VecFP,
|
||||
[(set v4f32:$XT, (fnearbyint v4f32:$XB))]>;
|
||||
"xvrspic $XT, $XB", IIC_VecFP, []>;
|
||||
// Max/Min Instructions
|
||||
let isCommutable = 1 in {
|
||||
def XSMAXDP : XX3Form<60, 160,
|
||||
|
@ -2783,9 +2780,6 @@ def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be ForceXForm:$src)), (LXVD2X ForceXForm:$s
|
|||
def : Pat<(f32 (any_fround f32:$S)),
|
||||
(f32 (COPY_TO_REGCLASS (XSRDPI
|
||||
(COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
|
||||
def : Pat<(f32 (fnearbyint f32:$S)),
|
||||
(f32 (COPY_TO_REGCLASS (XSRDPIC
|
||||
(COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
|
||||
def : Pat<(f32 (any_ffloor f32:$S)),
|
||||
(f32 (COPY_TO_REGCLASS (XSRDPIM
|
||||
(COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
|
||||
|
@ -2804,6 +2798,19 @@ def : Pat<(v4f32 (any_frint v4f32:$S)), (v4f32 (XVRSPIC $S))>;
|
|||
def : Pat<(f64 (any_frint f64:$S)), (f64 (XSRDPIC $S))>;
|
||||
def : Pat<(v2f64 (any_frint v2f64:$S)), (v2f64 (XVRDPIC $S))>;
|
||||
|
||||
// Rounding without exceptions (nearbyint). Due to strange tblgen behaviour,
|
||||
// these need to be defined after the any_frint versions so ISEL will correctly
|
||||
// add the chain to the strict versions.
|
||||
def : Pat<(f32 (fnearbyint f32:$S)),
|
||||
(f32 (COPY_TO_REGCLASS (XSRDPIC
|
||||
(COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
|
||||
def : Pat<(f64 (fnearbyint f64:$S)),
|
||||
(f64 (XSRDPIC $S))>;
|
||||
def : Pat<(v2f64 (fnearbyint v2f64:$S)),
|
||||
(v2f64 (XVRDPIC $S))>;
|
||||
def : Pat<(v4f32 (fnearbyint v4f32:$S)),
|
||||
(v4f32 (XVRSPIC $S))>;
|
||||
|
||||
// Materialize a zero-vector of long long
|
||||
def : Pat<(v2i64 immAllZerosV),
|
||||
(v2i64 (XXLXORz))>;
|
||||
|
|
|
@ -0,0 +1,127 @@
|
|||
; The non-strictfp version of test/CodeGen/PowerPC/respect-rounding-mode.ll
|
||||
; Without strictfp, CSE should be free to eliminate the repeated multiply
|
||||
; and conversion instructions.
|
||||
; RUN: llc -verify-machineinstrs --mtriple powerpc64le-unknown-linux-gnu \
|
||||
; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names < %s | grep 'xvrdpic' | count 2
|
||||
; RUN: llc -verify-machineinstrs --mtriple powerpc-unknown-linux-gnu \
|
||||
; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names < %s | grep 'xvrdpic' | count 2
|
||||
; RUN: llc -verify-machineinstrs --mtriple powerpc64le-unknown-linux-gnu \
|
||||
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names < %s | grep 'xvrdpic' | count 2
|
||||
|
||||
; RUN: llc -verify-machineinstrs --mtriple powerpc64le-unknown-linux-gnu \
|
||||
; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names < %s | grep 'xvmuldp' | count 2
|
||||
; RUN: llc -verify-machineinstrs --mtriple powerpc-unknown-linux-gnu \
|
||||
; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names < %s | grep 'xvmuldp' | count 2
|
||||
; RUN: llc -verify-machineinstrs --mtriple powerpc64le-unknown-linux-gnu \
|
||||
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names < %s | grep 'xvmuldp' | count 2
|
||||
@IndirectCallPtr = dso_local local_unnamed_addr global void (...)* null, align 8
|
||||
|
||||
define dso_local signext i32 @func1() local_unnamed_addr #0 {
|
||||
entry:
|
||||
tail call void bitcast (void (...)* @directCall to void ()*)() #0
|
||||
%0 = tail call <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double> <double -9.990000e+01, double 9.990000e+01>, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
|
||||
%vecext = extractelement <2 x double> %0, i32 0
|
||||
%sub = tail call double @llvm.experimental.constrained.fsub.f64(double %vecext, double -9.900000e+01, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
|
||||
%conv = tail call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %sub, metadata !"fpexcept.ignore") #0
|
||||
tail call void bitcast (void (...)* @directCall to void ()*)() #0
|
||||
%1 = tail call <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double> <double -9.990000e+01, double 9.990000e+01>, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
|
||||
%vecext3 = extractelement <2 x double> %1, i32 1
|
||||
%cmp = tail call i1 @llvm.experimental.constrained.fcmp.f64(double %vecext3, double 9.900000e+01, metadata !"une", metadata !"fpexcept.ignore") #0
|
||||
br i1 %cmp, label %if.then, label %if.end
|
||||
|
||||
if.then: ; preds = %entry
|
||||
tail call void @exit(i32 signext 2) #0
|
||||
unreachable
|
||||
|
||||
if.end: ; preds = %entry
|
||||
ret i32 %conv
|
||||
}
|
||||
|
||||
declare void @directCall(...) local_unnamed_addr
|
||||
|
||||
declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata)
|
||||
|
||||
declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata)
|
||||
|
||||
declare i1 @llvm.experimental.constrained.fcmp.f64(double, double, metadata, metadata)
|
||||
|
||||
declare void @exit(i32 signext) local_unnamed_addr
|
||||
|
||||
define dso_local signext i32 @func2() local_unnamed_addr #0 {
|
||||
entry:
|
||||
%call = tail call <2 x double> bitcast (<2 x double> (...)* @getvector1 to <2 x double> ()*)() #0
|
||||
%call1 = tail call <2 x double> bitcast (<2 x double> (...)* @getvector2 to <2 x double> ()*)() #0
|
||||
tail call void bitcast (void (...)* @directCall to void ()*)() #0
|
||||
%mul = tail call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> %call, <2 x double> %call1, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
|
||||
%vecext = extractelement <2 x double> %mul, i32 0
|
||||
%cmp = tail call i1 @llvm.experimental.constrained.fcmp.f64(double %vecext, double 4.000000e+00, metadata !"oeq", metadata !"fpexcept.ignore") #0
|
||||
br i1 %cmp, label %cleanup, label %if.end
|
||||
|
||||
if.end: ; preds = %entry
|
||||
tail call void bitcast (void (...)* @directCall to void ()*)() #0
|
||||
%mul10 = tail call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> %call, <2 x double> %call1, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
|
||||
%0 = tail call i32 @llvm.ppc.vsx.xvcmpeqdp.p(i32 2, <2 x double> %mul, <2 x double> %mul10) #0
|
||||
br label %cleanup
|
||||
|
||||
cleanup: ; preds = %entry, %if.end
|
||||
%retval.0 = phi i32 [ %0, %if.end ], [ 11, %entry ]
|
||||
ret i32 %retval.0
|
||||
}
|
||||
|
||||
declare <2 x double> @getvector1(...) local_unnamed_addr
|
||||
|
||||
declare <2 x double> @getvector2(...) local_unnamed_addr
|
||||
|
||||
declare <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double>, <2 x double>, metadata, metadata)
|
||||
|
||||
declare i32 @llvm.ppc.vsx.xvcmpeqdp.p(i32, <2 x double>, <2 x double>)
|
||||
|
||||
define dso_local signext i32 @func3() local_unnamed_addr #0 {
|
||||
entry:
|
||||
%0 = load void ()*, void ()** bitcast (void (...)** @IndirectCallPtr to void ()**), align 8
|
||||
tail call void %0() #0
|
||||
%1 = tail call <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double> <double -9.990000e+01, double 9.990000e+01>, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
|
||||
%vecext = extractelement <2 x double> %1, i32 0
|
||||
%sub = tail call double @llvm.experimental.constrained.fsub.f64(double %vecext, double -9.900000e+01, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
|
||||
%conv = tail call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %sub, metadata !"fpexcept.ignore") #0
|
||||
%2 = load void ()*, void ()** bitcast (void (...)** @IndirectCallPtr to void ()**), align 8
|
||||
tail call void %2() #0
|
||||
%3 = tail call <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double> <double -9.990000e+01, double 9.990000e+01>, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
|
||||
%vecext4 = extractelement <2 x double> %3, i32 1
|
||||
%cmp = tail call i1 @llvm.experimental.constrained.fcmp.f64(double %vecext4, double 9.900000e+01, metadata !"une", metadata !"fpexcept.ignore") #0
|
||||
br i1 %cmp, label %if.then, label %if.end
|
||||
|
||||
if.then: ; preds = %entry
|
||||
tail call void @exit(i32 signext 2) #0
|
||||
unreachable
|
||||
|
||||
if.end: ; preds = %entry
|
||||
ret i32 %conv
|
||||
}
|
||||
|
||||
define dso_local signext i32 @func4() local_unnamed_addr #0 {
|
||||
entry:
|
||||
%call = tail call <2 x double> bitcast (<2 x double> (...)* @getvector1 to <2 x double> ()*)() #0
|
||||
%call1 = tail call <2 x double> bitcast (<2 x double> (...)* @getvector2 to <2 x double> ()*)() #0
|
||||
%0 = load void ()*, void ()** bitcast (void (...)** @IndirectCallPtr to void ()**), align 8
|
||||
tail call void %0() #0
|
||||
%mul = tail call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> %call, <2 x double> %call1, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
|
||||
%vecext = extractelement <2 x double> %mul, i32 0
|
||||
%cmp = tail call i1 @llvm.experimental.constrained.fcmp.f64(double %vecext, double 4.000000e+00, metadata !"oeq", metadata !"fpexcept.ignore") #0
|
||||
br i1 %cmp, label %cleanup, label %if.end
|
||||
|
||||
if.end: ; preds = %entry
|
||||
%1 = load void ()*, void ()** bitcast (void (...)** @IndirectCallPtr to void ()**), align 8
|
||||
tail call void %1() #0
|
||||
%mul11 = tail call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> %call, <2 x double> %call1, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
|
||||
%2 = tail call i32 @llvm.ppc.vsx.xvcmpeqdp.p(i32 2, <2 x double> %mul, <2 x double> %mul11) #0
|
||||
br label %cleanup
|
||||
|
||||
cleanup: ; preds = %entry, %if.end
|
||||
%retval.0 = phi i32 [ %2, %if.end ], [ 11, %entry ]
|
||||
ret i32 %retval.0
|
||||
}
|
||||
|
||||
declare <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double>, metadata, metadata)
|
||||
|
||||
attributes #0 = { nounwind }
|
|
@ -0,0 +1,128 @@
|
|||
; The strictfp version of test/CodeGen/PowerPC/cse-despit-rounding-mode.ll
|
||||
; With strictfp, the MachineIR optimizations need to assume that a call
|
||||
; can change the rounding mode and must not move/eliminate the repeated
|
||||
; multiply/convert instructions in this test.
|
||||
; RUN: llc -verify-machineinstrs --mtriple powerpc64le-unknown-linux-gnu \
|
||||
; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names < %s | grep 'xvrdpic' | count 4
|
||||
; RUN: llc -verify-machineinstrs --mtriple powerpc-unknown-linux-gnu \
|
||||
; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names < %s | grep 'xvrdpic' | count 4
|
||||
; RUN: llc -verify-machineinstrs --mtriple powerpc64le-unknown-linux-gnu \
|
||||
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names < %s | grep 'xvrdpic' | count 4
|
||||
|
||||
; RUN: llc -verify-machineinstrs --mtriple powerpc64le-unknown-linux-gnu \
|
||||
; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names < %s | grep 'xvmuldp' | count 4
|
||||
; RUN: llc -verify-machineinstrs --mtriple powerpc-unknown-linux-gnu \
|
||||
; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names < %s | grep 'xvmuldp' | count 4
|
||||
; RUN: llc -verify-machineinstrs --mtriple powerpc64le-unknown-linux-gnu \
|
||||
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names < %s | grep 'xvmuldp' | count 4
|
||||
@IndirectCallPtr = dso_local local_unnamed_addr global void (...)* null, align 8
|
||||
|
||||
define dso_local signext i32 @func1() local_unnamed_addr #0 {
|
||||
entry:
|
||||
tail call void bitcast (void (...)* @directCall to void ()*)() #0
|
||||
%0 = tail call <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double> <double -9.990000e+01, double 9.990000e+01>, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
|
||||
%vecext = extractelement <2 x double> %0, i32 0
|
||||
%sub = tail call double @llvm.experimental.constrained.fsub.f64(double %vecext, double -9.900000e+01, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
|
||||
%conv = tail call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %sub, metadata !"fpexcept.ignore") #0
|
||||
tail call void bitcast (void (...)* @directCall to void ()*)() #0
|
||||
%1 = tail call <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double> <double -9.990000e+01, double 9.990000e+01>, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
|
||||
%vecext3 = extractelement <2 x double> %1, i32 1
|
||||
%cmp = tail call i1 @llvm.experimental.constrained.fcmp.f64(double %vecext3, double 9.900000e+01, metadata !"une", metadata !"fpexcept.ignore") #0
|
||||
br i1 %cmp, label %if.then, label %if.end
|
||||
|
||||
if.then: ; preds = %entry
|
||||
tail call void @exit(i32 signext 2) #0
|
||||
unreachable
|
||||
|
||||
if.end: ; preds = %entry
|
||||
ret i32 %conv
|
||||
}
|
||||
|
||||
declare void @directCall(...) local_unnamed_addr
|
||||
|
||||
declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata)
|
||||
|
||||
declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata)
|
||||
|
||||
declare i1 @llvm.experimental.constrained.fcmp.f64(double, double, metadata, metadata)
|
||||
|
||||
declare void @exit(i32 signext) local_unnamed_addr
|
||||
|
||||
define dso_local signext i32 @func2() local_unnamed_addr #0 {
|
||||
entry:
|
||||
%call = tail call <2 x double> bitcast (<2 x double> (...)* @getvector1 to <2 x double> ()*)() #0
|
||||
%call1 = tail call <2 x double> bitcast (<2 x double> (...)* @getvector2 to <2 x double> ()*)() #0
|
||||
tail call void bitcast (void (...)* @directCall to void ()*)() #0
|
||||
%mul = tail call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> %call, <2 x double> %call1, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
|
||||
%vecext = extractelement <2 x double> %mul, i32 0
|
||||
%cmp = tail call i1 @llvm.experimental.constrained.fcmp.f64(double %vecext, double 4.000000e+00, metadata !"oeq", metadata !"fpexcept.ignore") #0
|
||||
br i1 %cmp, label %cleanup, label %if.end
|
||||
|
||||
if.end: ; preds = %entry
|
||||
tail call void bitcast (void (...)* @directCall to void ()*)() #0
|
||||
%mul10 = tail call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> %call, <2 x double> %call1, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
|
||||
%0 = tail call i32 @llvm.ppc.vsx.xvcmpeqdp.p(i32 2, <2 x double> %mul, <2 x double> %mul10) #0
|
||||
br label %cleanup
|
||||
|
||||
cleanup: ; preds = %entry, %if.end
|
||||
%retval.0 = phi i32 [ %0, %if.end ], [ 11, %entry ]
|
||||
ret i32 %retval.0
|
||||
}
|
||||
|
||||
declare <2 x double> @getvector1(...) local_unnamed_addr
|
||||
|
||||
declare <2 x double> @getvector2(...) local_unnamed_addr
|
||||
|
||||
declare <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double>, <2 x double>, metadata, metadata)
|
||||
|
||||
declare i32 @llvm.ppc.vsx.xvcmpeqdp.p(i32, <2 x double>, <2 x double>)
|
||||
|
||||
define dso_local signext i32 @func3() local_unnamed_addr #0 {
|
||||
entry:
|
||||
%0 = load void ()*, void ()** bitcast (void (...)** @IndirectCallPtr to void ()**), align 8
|
||||
tail call void %0() #0
|
||||
%1 = tail call <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double> <double -9.990000e+01, double 9.990000e+01>, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
|
||||
%vecext = extractelement <2 x double> %1, i32 0
|
||||
%sub = tail call double @llvm.experimental.constrained.fsub.f64(double %vecext, double -9.900000e+01, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
|
||||
%conv = tail call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %sub, metadata !"fpexcept.ignore") #0
|
||||
%2 = load void ()*, void ()** bitcast (void (...)** @IndirectCallPtr to void ()**), align 8
|
||||
tail call void %2() #0
|
||||
%3 = tail call <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double> <double -9.990000e+01, double 9.990000e+01>, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
|
||||
%vecext4 = extractelement <2 x double> %3, i32 1
|
||||
%cmp = tail call i1 @llvm.experimental.constrained.fcmp.f64(double %vecext4, double 9.900000e+01, metadata !"une", metadata !"fpexcept.ignore") #0
|
||||
br i1 %cmp, label %if.then, label %if.end
|
||||
|
||||
if.then: ; preds = %entry
|
||||
tail call void @exit(i32 signext 2) #0
|
||||
unreachable
|
||||
|
||||
if.end: ; preds = %entry
|
||||
ret i32 %conv
|
||||
}
|
||||
|
||||
define dso_local signext i32 @func4() local_unnamed_addr #0 {
|
||||
entry:
|
||||
%call = tail call <2 x double> bitcast (<2 x double> (...)* @getvector1 to <2 x double> ()*)() #0
|
||||
%call1 = tail call <2 x double> bitcast (<2 x double> (...)* @getvector2 to <2 x double> ()*)() #0
|
||||
%0 = load void ()*, void ()** bitcast (void (...)** @IndirectCallPtr to void ()**), align 8
|
||||
tail call void %0() #0
|
||||
%mul = tail call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> %call, <2 x double> %call1, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
|
||||
%vecext = extractelement <2 x double> %mul, i32 0
|
||||
%cmp = tail call i1 @llvm.experimental.constrained.fcmp.f64(double %vecext, double 4.000000e+00, metadata !"oeq", metadata !"fpexcept.ignore") #0
|
||||
br i1 %cmp, label %cleanup, label %if.end
|
||||
|
||||
if.end: ; preds = %entry
|
||||
%1 = load void ()*, void ()** bitcast (void (...)** @IndirectCallPtr to void ()**), align 8
|
||||
tail call void %1() #0
|
||||
%mul11 = tail call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> %call, <2 x double> %call1, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
|
||||
%2 = tail call i32 @llvm.ppc.vsx.xvcmpeqdp.p(i32 2, <2 x double> %mul, <2 x double> %mul11) #0
|
||||
br label %cleanup
|
||||
|
||||
cleanup: ; preds = %entry, %if.end
|
||||
%retval.0 = phi i32 [ %2, %if.end ], [ 11, %entry ]
|
||||
ret i32 %retval.0
|
||||
}
|
||||
|
||||
declare <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double>, metadata, metadata)
|
||||
|
||||
attributes #0 = { nounwind strictfp }
|
|
@ -4631,14 +4631,14 @@ entry:
|
|||
define <4 x double> @constrained_vector_rint_v4f64(<4 x double> %x) #0 {
|
||||
; PC64LE-LABEL: constrained_vector_rint_v4f64:
|
||||
; PC64LE: # %bb.0: # %entry
|
||||
; PC64LE-NEXT: xvrdpic 34, 34
|
||||
; PC64LE-NEXT: xvrdpic 35, 35
|
||||
; PC64LE-NEXT: xvrdpic 34, 34
|
||||
; PC64LE-NEXT: blr
|
||||
;
|
||||
; PC64LE9-LABEL: constrained_vector_rint_v4f64:
|
||||
; PC64LE9: # %bb.0: # %entry
|
||||
; PC64LE9-NEXT: xvrdpic 34, 34
|
||||
; PC64LE9-NEXT: xvrdpic 35, 35
|
||||
; PC64LE9-NEXT: xvrdpic 34, 34
|
||||
; PC64LE9-NEXT: blr
|
||||
entry:
|
||||
%rint = call <4 x double> @llvm.experimental.constrained.rint.v4f64(
|
||||
|
|
Loading…
Reference in New Issue