forked from OSchip/llvm-project
[SelectionDAG][PowerPC][AArch64][X86][ARM] Add chain input and output the ISD::FLT_ROUNDS_
This node reads the rounding control which means it needs to be ordered properly with operations that change the rounding control. So it needs to be chained to maintain order. This patch adds a chain input and output to the node and connects it to the chain in SelectionDAGBuilder. I've update all in-tree targets to connect their chain through their lowering code. Differential Revision: https://reviews.llvm.org/D75132
This commit is contained in:
parent
28d38a25e9
commit
735d27dc40
|
@ -609,6 +609,7 @@ namespace ISD {
|
|||
/// 1 Round to nearest
|
||||
/// 2 Round to +inf
|
||||
/// 3 Round to -inf
|
||||
/// Result is rounding mode and chain. Input is a chain.
|
||||
FLT_ROUNDS_,
|
||||
|
||||
/// X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
|
||||
|
|
|
@ -619,7 +619,7 @@ def int_aarch64_neon_tbx4 : AdvSIMD_Tbx4_Intrinsic;
|
|||
|
||||
let TargetPrefix = "aarch64" in {
|
||||
class FPCR_Get_Intrinsic
|
||||
: Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>;
|
||||
: Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrHasSideEffects]>;
|
||||
}
|
||||
|
||||
// FPCR
|
||||
|
|
|
@ -2823,6 +2823,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
|
|||
}
|
||||
case ISD::FLT_ROUNDS_:
|
||||
Results.push_back(DAG.getConstant(1, dl, Node->getValueType(0)));
|
||||
Results.push_back(Node->getOperand(0));
|
||||
break;
|
||||
case ISD::EH_RETURN:
|
||||
case ISD::EH_LABEL:
|
||||
|
|
|
@ -563,7 +563,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FLT_ROUNDS(SDNode *N) {
|
|||
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
|
||||
SDLoc dl(N);
|
||||
|
||||
return DAG.getNode(N->getOpcode(), dl, NVT);
|
||||
SDValue Res =
|
||||
DAG.getNode(N->getOpcode(), dl, {NVT, MVT::Other}, N->getOperand(0));
|
||||
|
||||
// Legalize the chain result - switch anything that used the old chain to
|
||||
// use the new one.
|
||||
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
|
||||
return Res;
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) {
|
||||
|
@ -2744,10 +2750,15 @@ void DAGTypeLegalizer::ExpandIntRes_FLT_ROUNDS(SDNode *N, SDValue &Lo,
|
|||
unsigned NBitWidth = NVT.getSizeInBits();
|
||||
|
||||
EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
|
||||
Lo = DAG.getNode(ISD::FLT_ROUNDS_, dl, NVT);
|
||||
Lo = DAG.getNode(ISD::FLT_ROUNDS_, dl, {NVT, MVT::Other}, N->getOperand(0));
|
||||
SDValue Chain = Lo.getValue(1);
|
||||
// The high part is the sign of Lo, as -1 is a valid value for FLT_ROUNDS
|
||||
Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
|
||||
DAG.getConstant(NBitWidth - 1, dl, ShiftAmtTy));
|
||||
|
||||
// Legalize the chain result - switch anything that used the old chain to
|
||||
// use the new one.
|
||||
ReplaceValueWith(SDValue(N, 1), Chain);
|
||||
}
|
||||
|
||||
void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
|
||||
|
|
|
@ -6630,7 +6630,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
|
|||
case Intrinsic::gcwrite:
|
||||
llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
|
||||
case Intrinsic::flt_rounds:
|
||||
setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, sdl, MVT::i32));
|
||||
Res = DAG.getNode(ISD::FLT_ROUNDS_, sdl, {MVT::i32, MVT::Other}, getRoot());
|
||||
setValue(&I, Res);
|
||||
DAG.setRoot(Res.getValue(1));
|
||||
return;
|
||||
|
||||
case Intrinsic::expect:
|
||||
|
|
|
@ -2869,16 +2869,19 @@ SDValue AArch64TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
|
|||
// so that the shift + and get folded into a bitfield extract.
|
||||
SDLoc dl(Op);
|
||||
|
||||
SDValue FPCR_64 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i64,
|
||||
DAG.getConstant(Intrinsic::aarch64_get_fpcr, dl,
|
||||
MVT::i64));
|
||||
SDValue Chain = Op.getOperand(0);
|
||||
SDValue FPCR_64 = DAG.getNode(
|
||||
ISD::INTRINSIC_W_CHAIN, dl, {MVT::i64, MVT::Other},
|
||||
{Chain, DAG.getConstant(Intrinsic::aarch64_get_fpcr, dl, MVT::i64)});
|
||||
Chain = FPCR_64.getValue(1);
|
||||
SDValue FPCR_32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, FPCR_64);
|
||||
SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPCR_32,
|
||||
DAG.getConstant(1U << 22, dl, MVT::i32));
|
||||
SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
|
||||
DAG.getConstant(22, dl, MVT::i32));
|
||||
return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
|
||||
DAG.getConstant(3, dl, MVT::i32));
|
||||
SDValue AND = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
|
||||
DAG.getConstant(3, dl, MVT::i32));
|
||||
return DAG.getMergeValues({AND, Chain}, dl);
|
||||
}
|
||||
|
||||
static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
|
||||
|
|
|
@ -5948,16 +5948,20 @@ SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
|
|||
// The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
|
||||
// so that the shift + and get folded into a bitfield extract.
|
||||
SDLoc dl(Op);
|
||||
SDValue Ops[] = { DAG.getEntryNode(),
|
||||
DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32) };
|
||||
SDValue Chain = Op.getOperand(0);
|
||||
SDValue Ops[] = {Chain,
|
||||
DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32)};
|
||||
|
||||
SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, MVT::i32, Ops);
|
||||
SDValue FPSCR =
|
||||
DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, {MVT::i32, MVT::Other}, Ops);
|
||||
Chain = FPSCR.getValue(1);
|
||||
SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
|
||||
DAG.getConstant(1U << 22, dl, MVT::i32));
|
||||
SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
|
||||
DAG.getConstant(22, dl, MVT::i32));
|
||||
return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
|
||||
DAG.getConstant(3, dl, MVT::i32));
|
||||
SDValue And = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
|
||||
DAG.getConstant(3, dl, MVT::i32));
|
||||
return DAG.getMergeValues({And, Chain}, dl);
|
||||
}
|
||||
|
||||
static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
|
||||
|
|
|
@ -8306,22 +8306,20 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
|
|||
EVT PtrVT = getPointerTy(MF.getDataLayout());
|
||||
|
||||
// Save FP Control Word to register
|
||||
EVT NodeTys[] = {
|
||||
MVT::f64, // return register
|
||||
MVT::Glue // unused in this context
|
||||
};
|
||||
SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None);
|
||||
SDValue Chain = Op.getOperand(0);
|
||||
SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
|
||||
Chain = MFFS.getValue(1);
|
||||
|
||||
// Save FP register to stack slot
|
||||
int SSFI = MF.getFrameInfo().CreateStackObject(8, 8, false);
|
||||
SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
|
||||
SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, StackSlot,
|
||||
MachinePointerInfo());
|
||||
Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
|
||||
|
||||
// Load FP Control Word from low 32 bits of stack slot.
|
||||
SDValue Four = DAG.getConstant(4, dl, PtrVT);
|
||||
SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
|
||||
SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo());
|
||||
SDValue CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
|
||||
Chain = CWD.getValue(1);
|
||||
|
||||
// Transform as necessary
|
||||
SDValue CWD1 =
|
||||
|
@ -8338,8 +8336,11 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
|
|||
SDValue RetVal =
|
||||
DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
|
||||
|
||||
return DAG.getNode((VT.getSizeInBits() < 16 ?
|
||||
ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
|
||||
RetVal =
|
||||
DAG.getNode((VT.getSizeInBits() < 16 ? ISD::TRUNCATE : ISD::ZERO_EXTEND),
|
||||
dl, VT, RetVal);
|
||||
|
||||
return DAG.getMergeValues({RetVal, Chain}, dl);
|
||||
}
|
||||
|
||||
SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
|
||||
|
|
|
@ -155,7 +155,8 @@ def PPCSExtVElems : SDNode<"PPCISD::SExtVElems", SDT_PPCSExtVElems, []>;
|
|||
|
||||
// Extract FPSCR (not modeled at the DAG level).
|
||||
def PPCmffs : SDNode<"PPCISD::MFFS",
|
||||
SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>, []>;
|
||||
SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>,
|
||||
[SDNPHasChain]>;
|
||||
|
||||
// Perform FADD in round-to-zero mode.
|
||||
def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp, []>;
|
||||
|
|
|
@ -25647,14 +25647,15 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
|
|||
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, SSFI),
|
||||
MachineMemOperand::MOStore, 2, 2);
|
||||
|
||||
SDValue Ops[] = { DAG.getEntryNode(), StackSlot };
|
||||
SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::FNSTCW16m, DL,
|
||||
DAG.getVTList(MVT::Other),
|
||||
Ops, MVT::i16, MMO);
|
||||
SDValue Chain = Op.getOperand(0);
|
||||
SDValue Ops[] = {Chain, StackSlot};
|
||||
Chain = DAG.getMemIntrinsicNode(
|
||||
X86ISD::FNSTCW16m, DL, DAG.getVTList(MVT::Other), Ops, MVT::i16, MMO);
|
||||
|
||||
// Load FP Control Word from stack slot
|
||||
SDValue CWD =
|
||||
DAG.getLoad(MVT::i16, DL, Chain, StackSlot, MachinePointerInfo());
|
||||
Chain = CWD.getValue(1);
|
||||
|
||||
// Mask and turn the control bits into a shift for the lookup table.
|
||||
SDValue Shift =
|
||||
|
@ -25670,7 +25671,9 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
|
|||
DAG.getNode(ISD::SRL, DL, MVT::i32, LUT, Shift),
|
||||
DAG.getConstant(3, DL, MVT::i32));
|
||||
|
||||
return DAG.getZExtOrTrunc(RetVal, DL, VT);
|
||||
RetVal = DAG.getZExtOrTrunc(RetVal, DL, VT);
|
||||
|
||||
return DAG.getMergeValues({RetVal, Chain}, DL);
|
||||
}
|
||||
|
||||
// Split an unary integer op into 2 half sized ops.
|
||||
|
|
|
@ -49,25 +49,49 @@ define i32 @multiple_flt_rounds() nounwind {
|
|||
; X86-NEXT: shrl $9, %ecx
|
||||
; X86-NEXT: andb $6, %cl
|
||||
; X86-NEXT: movl $45, %esi
|
||||
; X86-NEXT: movl $45, %eax
|
||||
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X86-NEXT: shrl %cl, %esi
|
||||
; X86-NEXT: andl $3, %esi
|
||||
; X86-NEXT: shrl %cl, %eax
|
||||
; X86-NEXT: andl $3, %eax
|
||||
; X86-NEXT: xorl %ebx, %ebx
|
||||
; X86-NEXT: cmpl $3, %esi
|
||||
; X86-NEXT: cmpl $3, %eax
|
||||
; X86-NEXT: setne %bl
|
||||
; X86-NEXT: movl $0, (%esp)
|
||||
; X86-NEXT: calll fesetround
|
||||
; X86-NEXT: movl $3072, (%esp) # imm = 0xC00
|
||||
; X86-NEXT: calll fesetround
|
||||
; X86-NEXT: cmpl $1, %esi
|
||||
; X86-NEXT: leal 1(%ebx), %eax
|
||||
; X86-NEXT: fnstcw {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: shrl $9, %ecx
|
||||
; X86-NEXT: andb $6, %cl
|
||||
; X86-NEXT: movl $45, %eax
|
||||
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X86-NEXT: shrl %cl, %eax
|
||||
; X86-NEXT: andl $3, %eax
|
||||
; X86-NEXT: cmpl $1, %eax
|
||||
; X86-NEXT: je .LBB1_2
|
||||
; X86-NEXT: # %bb.1: # %entry
|
||||
; X86-NEXT: movl %eax, %ebx
|
||||
; X86-NEXT: incl %ebx
|
||||
; X86-NEXT: .LBB1_2: # %entry
|
||||
; X86-NEXT: movl $3072, (%esp) # imm = 0xC00
|
||||
; X86-NEXT: calll fesetround
|
||||
; X86-NEXT: fnstcw {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: shrl $9, %ecx
|
||||
; X86-NEXT: andb $6, %cl
|
||||
; X86-NEXT: movl $45, %eax
|
||||
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X86-NEXT: shrl %cl, %eax
|
||||
; X86-NEXT: andl $3, %eax
|
||||
; X86-NEXT: cmpl $1, %eax
|
||||
; X86-NEXT: sbbl $-1, %ebx
|
||||
; X86-NEXT: movl $2048, (%esp) # imm = 0x800
|
||||
; X86-NEXT: calll fesetround
|
||||
; X86-NEXT: fnstcw {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: shrl $9, %ecx
|
||||
; X86-NEXT: andb $6, %cl
|
||||
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X86-NEXT: shrl %cl, %esi
|
||||
; X86-NEXT: andl $3, %esi
|
||||
; X86-NEXT: xorl %ecx, %ecx
|
||||
; X86-NEXT: cmpl $2, %esi
|
||||
; X86-NEXT: setne %cl
|
||||
|
@ -92,25 +116,50 @@ define i32 @multiple_flt_rounds() nounwind {
|
|||
; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx
|
||||
; X64-NEXT: shrl $9, %ecx
|
||||
; X64-NEXT: andb $6, %cl
|
||||
; X64-NEXT: movl $45, %ebx
|
||||
; X64-NEXT: movl $45, %r14d
|
||||
; X64-NEXT: movl $45, %eax
|
||||
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X64-NEXT: shrl %cl, %ebx
|
||||
; X64-NEXT: andl $3, %ebx
|
||||
; X64-NEXT: xorl %r14d, %r14d
|
||||
; X64-NEXT: cmpl $3, %ebx
|
||||
; X64-NEXT: setne %r14b
|
||||
; X64-NEXT: shrl %cl, %eax
|
||||
; X64-NEXT: andl $3, %eax
|
||||
; X64-NEXT: xorl %ebx, %ebx
|
||||
; X64-NEXT: cmpl $3, %eax
|
||||
; X64-NEXT: setne %bl
|
||||
; X64-NEXT: xorl %edi, %edi
|
||||
; X64-NEXT: callq fesetround
|
||||
; X64-NEXT: leal 1(%r14), %ebp
|
||||
; X64-NEXT: fnstcw {{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx
|
||||
; X64-NEXT: shrl $9, %ecx
|
||||
; X64-NEXT: andb $6, %cl
|
||||
; X64-NEXT: movl $45, %eax
|
||||
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X64-NEXT: shrl %cl, %eax
|
||||
; X64-NEXT: andl $3, %eax
|
||||
; X64-NEXT: leal 1(%rbx), %ebp
|
||||
; X64-NEXT: cmpl $1, %eax
|
||||
; X64-NEXT: cmovel %ebx, %ebp
|
||||
; X64-NEXT: movl $3072, %edi # imm = 0xC00
|
||||
; X64-NEXT: callq fesetround
|
||||
; X64-NEXT: cmpl $1, %ebx
|
||||
; X64-NEXT: cmovel %r14d, %ebp
|
||||
; X64-NEXT: fnstcw {{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx
|
||||
; X64-NEXT: shrl $9, %ecx
|
||||
; X64-NEXT: andb $6, %cl
|
||||
; X64-NEXT: movl $45, %eax
|
||||
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X64-NEXT: shrl %cl, %eax
|
||||
; X64-NEXT: andl $3, %eax
|
||||
; X64-NEXT: cmpl $1, %eax
|
||||
; X64-NEXT: sbbl $-1, %ebp
|
||||
; X64-NEXT: movl $2048, %edi # imm = 0x800
|
||||
; X64-NEXT: callq fesetround
|
||||
; X64-NEXT: fnstcw {{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx
|
||||
; X64-NEXT: shrl $9, %ecx
|
||||
; X64-NEXT: andb $6, %cl
|
||||
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X64-NEXT: shrl %cl, %r14d
|
||||
; X64-NEXT: andl $3, %r14d
|
||||
; X64-NEXT: xorl %ecx, %ecx
|
||||
; X64-NEXT: cmpl $2, %ebx
|
||||
; X64-NEXT: cmpl $2, %r14d
|
||||
; X64-NEXT: setne %cl
|
||||
; X64-NEXT: negl %ecx
|
||||
; X64-NEXT: xorl %eax, %eax
|
||||
|
|
Loading…
Reference in New Issue