[SelectionDAG][PowerPC][AArch64][X86][ARM] Add chain input and output the ISD::FLT_ROUNDS_

This node reads the rounding control which means it needs to be ordered properly with operations that change the rounding control. So it needs to be chained to maintain order.

This patch adds a chain input and output to the node and connects it to the chain in SelectionDAGBuilder. I've update all in-tree targets to connect their chain through their lowering code.

Differential Revision: https://reviews.llvm.org/D75132
This commit is contained in:
Craig Topper 2020-02-25 16:57:42 -08:00
parent 28d38a25e9
commit 735d27dc40
11 changed files with 124 additions and 48 deletions

View File

@ -609,6 +609,7 @@ namespace ISD {
/// 1 Round to nearest
/// 2 Round to +inf
/// 3 Round to -inf
/// Result is rounding mode and chain. Input is a chain.
FLT_ROUNDS_,
/// X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.

View File

@ -619,7 +619,7 @@ def int_aarch64_neon_tbx4 : AdvSIMD_Tbx4_Intrinsic;
let TargetPrefix = "aarch64" in {
class FPCR_Get_Intrinsic
: Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>;
: Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrHasSideEffects]>;
}
// FPCR

View File

@ -2823,6 +2823,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
case ISD::FLT_ROUNDS_:
Results.push_back(DAG.getConstant(1, dl, Node->getValueType(0)));
Results.push_back(Node->getOperand(0));
break;
case ISD::EH_RETURN:
case ISD::EH_LABEL:

View File

@ -563,7 +563,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FLT_ROUNDS(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDLoc dl(N);
return DAG.getNode(N->getOpcode(), dl, NVT);
SDValue Res =
DAG.getNode(N->getOpcode(), dl, {NVT, MVT::Other}, N->getOperand(0));
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
return Res;
}
SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) {
@ -2744,10 +2750,15 @@ void DAGTypeLegalizer::ExpandIntRes_FLT_ROUNDS(SDNode *N, SDValue &Lo,
unsigned NBitWidth = NVT.getSizeInBits();
EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
Lo = DAG.getNode(ISD::FLT_ROUNDS_, dl, NVT);
Lo = DAG.getNode(ISD::FLT_ROUNDS_, dl, {NVT, MVT::Other}, N->getOperand(0));
SDValue Chain = Lo.getValue(1);
// The high part is the sign of Lo, as -1 is a valid value for FLT_ROUNDS
Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
DAG.getConstant(NBitWidth - 1, dl, ShiftAmtTy));
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Chain);
}
void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,

View File

@ -6630,7 +6630,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::gcwrite:
llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
case Intrinsic::flt_rounds:
setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, sdl, MVT::i32));
Res = DAG.getNode(ISD::FLT_ROUNDS_, sdl, {MVT::i32, MVT::Other}, getRoot());
setValue(&I, Res);
DAG.setRoot(Res.getValue(1));
return;
case Intrinsic::expect:

View File

@ -2869,16 +2869,19 @@ SDValue AArch64TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
// so that the shift + and get folded into a bitfield extract.
SDLoc dl(Op);
SDValue FPCR_64 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i64,
DAG.getConstant(Intrinsic::aarch64_get_fpcr, dl,
MVT::i64));
SDValue Chain = Op.getOperand(0);
SDValue FPCR_64 = DAG.getNode(
ISD::INTRINSIC_W_CHAIN, dl, {MVT::i64, MVT::Other},
{Chain, DAG.getConstant(Intrinsic::aarch64_get_fpcr, dl, MVT::i64)});
Chain = FPCR_64.getValue(1);
SDValue FPCR_32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, FPCR_64);
SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPCR_32,
DAG.getConstant(1U << 22, dl, MVT::i32));
SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
DAG.getConstant(22, dl, MVT::i32));
return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
DAG.getConstant(3, dl, MVT::i32));
SDValue AND = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
DAG.getConstant(3, dl, MVT::i32));
return DAG.getMergeValues({AND, Chain}, dl);
}
static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {

View File

@ -5948,16 +5948,20 @@ SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
// The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
// so that the shift + and get folded into a bitfield extract.
SDLoc dl(Op);
SDValue Ops[] = { DAG.getEntryNode(),
DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32) };
SDValue Chain = Op.getOperand(0);
SDValue Ops[] = {Chain,
DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32)};
SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, MVT::i32, Ops);
SDValue FPSCR =
DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, {MVT::i32, MVT::Other}, Ops);
Chain = FPSCR.getValue(1);
SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
DAG.getConstant(1U << 22, dl, MVT::i32));
SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
DAG.getConstant(22, dl, MVT::i32));
return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
DAG.getConstant(3, dl, MVT::i32));
SDValue And = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
DAG.getConstant(3, dl, MVT::i32));
return DAG.getMergeValues({And, Chain}, dl);
}
static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,

View File

@ -8306,22 +8306,20 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
EVT PtrVT = getPointerTy(MF.getDataLayout());
// Save FP Control Word to register
EVT NodeTys[] = {
MVT::f64, // return register
MVT::Glue // unused in this context
};
SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None);
SDValue Chain = Op.getOperand(0);
SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
Chain = MFFS.getValue(1);
// Save FP register to stack slot
int SSFI = MF.getFrameInfo().CreateStackObject(8, 8, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, StackSlot,
MachinePointerInfo());
Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
// Load FP Control Word from low 32 bits of stack slot.
SDValue Four = DAG.getConstant(4, dl, PtrVT);
SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo());
SDValue CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
Chain = CWD.getValue(1);
// Transform as necessary
SDValue CWD1 =
@ -8338,8 +8336,11 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
SDValue RetVal =
DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
return DAG.getNode((VT.getSizeInBits() < 16 ?
ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
RetVal =
DAG.getNode((VT.getSizeInBits() < 16 ? ISD::TRUNCATE : ISD::ZERO_EXTEND),
dl, VT, RetVal);
return DAG.getMergeValues({RetVal, Chain}, dl);
}
SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {

View File

@ -155,7 +155,8 @@ def PPCSExtVElems : SDNode<"PPCISD::SExtVElems", SDT_PPCSExtVElems, []>;
// Extract FPSCR (not modeled at the DAG level).
def PPCmffs : SDNode<"PPCISD::MFFS",
SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>, []>;
SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>,
[SDNPHasChain]>;
// Perform FADD in round-to-zero mode.
def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp, []>;

View File

@ -25647,14 +25647,15 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, SSFI),
MachineMemOperand::MOStore, 2, 2);
SDValue Ops[] = { DAG.getEntryNode(), StackSlot };
SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::FNSTCW16m, DL,
DAG.getVTList(MVT::Other),
Ops, MVT::i16, MMO);
SDValue Chain = Op.getOperand(0);
SDValue Ops[] = {Chain, StackSlot};
Chain = DAG.getMemIntrinsicNode(
X86ISD::FNSTCW16m, DL, DAG.getVTList(MVT::Other), Ops, MVT::i16, MMO);
// Load FP Control Word from stack slot
SDValue CWD =
DAG.getLoad(MVT::i16, DL, Chain, StackSlot, MachinePointerInfo());
Chain = CWD.getValue(1);
// Mask and turn the control bits into a shift for the lookup table.
SDValue Shift =
@ -25670,7 +25671,9 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
DAG.getNode(ISD::SRL, DL, MVT::i32, LUT, Shift),
DAG.getConstant(3, DL, MVT::i32));
return DAG.getZExtOrTrunc(RetVal, DL, VT);
RetVal = DAG.getZExtOrTrunc(RetVal, DL, VT);
return DAG.getMergeValues({RetVal, Chain}, DL);
}
// Split an unary integer op into 2 half sized ops.

View File

@ -49,25 +49,49 @@ define i32 @multiple_flt_rounds() nounwind {
; X86-NEXT: shrl $9, %ecx
; X86-NEXT: andb $6, %cl
; X86-NEXT: movl $45, %esi
; X86-NEXT: movl $45, %eax
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NEXT: shrl %cl, %esi
; X86-NEXT: andl $3, %esi
; X86-NEXT: shrl %cl, %eax
; X86-NEXT: andl $3, %eax
; X86-NEXT: xorl %ebx, %ebx
; X86-NEXT: cmpl $3, %esi
; X86-NEXT: cmpl $3, %eax
; X86-NEXT: setne %bl
; X86-NEXT: movl $0, (%esp)
; X86-NEXT: calll fesetround
; X86-NEXT: movl $3072, (%esp) # imm = 0xC00
; X86-NEXT: calll fesetround
; X86-NEXT: cmpl $1, %esi
; X86-NEXT: leal 1(%ebx), %eax
; X86-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: shrl $9, %ecx
; X86-NEXT: andb $6, %cl
; X86-NEXT: movl $45, %eax
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NEXT: shrl %cl, %eax
; X86-NEXT: andl $3, %eax
; X86-NEXT: cmpl $1, %eax
; X86-NEXT: je .LBB1_2
; X86-NEXT: # %bb.1: # %entry
; X86-NEXT: movl %eax, %ebx
; X86-NEXT: incl %ebx
; X86-NEXT: .LBB1_2: # %entry
; X86-NEXT: movl $3072, (%esp) # imm = 0xC00
; X86-NEXT: calll fesetround
; X86-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: shrl $9, %ecx
; X86-NEXT: andb $6, %cl
; X86-NEXT: movl $45, %eax
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NEXT: shrl %cl, %eax
; X86-NEXT: andl $3, %eax
; X86-NEXT: cmpl $1, %eax
; X86-NEXT: sbbl $-1, %ebx
; X86-NEXT: movl $2048, (%esp) # imm = 0x800
; X86-NEXT: calll fesetround
; X86-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: shrl $9, %ecx
; X86-NEXT: andb $6, %cl
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NEXT: shrl %cl, %esi
; X86-NEXT: andl $3, %esi
; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: cmpl $2, %esi
; X86-NEXT: setne %cl
@ -92,25 +116,50 @@ define i32 @multiple_flt_rounds() nounwind {
; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx
; X64-NEXT: shrl $9, %ecx
; X64-NEXT: andb $6, %cl
; X64-NEXT: movl $45, %ebx
; X64-NEXT: movl $45, %r14d
; X64-NEXT: movl $45, %eax
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shrl %cl, %ebx
; X64-NEXT: andl $3, %ebx
; X64-NEXT: xorl %r14d, %r14d
; X64-NEXT: cmpl $3, %ebx
; X64-NEXT: setne %r14b
; X64-NEXT: shrl %cl, %eax
; X64-NEXT: andl $3, %eax
; X64-NEXT: xorl %ebx, %ebx
; X64-NEXT: cmpl $3, %eax
; X64-NEXT: setne %bl
; X64-NEXT: xorl %edi, %edi
; X64-NEXT: callq fesetround
; X64-NEXT: leal 1(%r14), %ebp
; X64-NEXT: fnstcw {{[0-9]+}}(%rsp)
; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx
; X64-NEXT: shrl $9, %ecx
; X64-NEXT: andb $6, %cl
; X64-NEXT: movl $45, %eax
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shrl %cl, %eax
; X64-NEXT: andl $3, %eax
; X64-NEXT: leal 1(%rbx), %ebp
; X64-NEXT: cmpl $1, %eax
; X64-NEXT: cmovel %ebx, %ebp
; X64-NEXT: movl $3072, %edi # imm = 0xC00
; X64-NEXT: callq fesetround
; X64-NEXT: cmpl $1, %ebx
; X64-NEXT: cmovel %r14d, %ebp
; X64-NEXT: fnstcw {{[0-9]+}}(%rsp)
; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx
; X64-NEXT: shrl $9, %ecx
; X64-NEXT: andb $6, %cl
; X64-NEXT: movl $45, %eax
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shrl %cl, %eax
; X64-NEXT: andl $3, %eax
; X64-NEXT: cmpl $1, %eax
; X64-NEXT: sbbl $-1, %ebp
; X64-NEXT: movl $2048, %edi # imm = 0x800
; X64-NEXT: callq fesetround
; X64-NEXT: fnstcw {{[0-9]+}}(%rsp)
; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx
; X64-NEXT: shrl $9, %ecx
; X64-NEXT: andb $6, %cl
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shrl %cl, %r14d
; X64-NEXT: andl $3, %r14d
; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: cmpl $2, %ebx
; X64-NEXT: cmpl $2, %r14d
; X64-NEXT: setne %cl
; X64-NEXT: negl %ecx
; X64-NEXT: xorl %eax, %eax