[SelectionDAG][PowerPC][AArch64][X86][ARM] Add chain input and output the ISD::FLT_ROUNDS_

This node reads the rounding control which means it needs to be ordered properly with operations that change the rounding control. So it needs to be chained to maintain order.

This patch adds a chain input and output to the node and connects it to the chain in SelectionDAGBuilder. I've update all in-tree targets to connect their chain through their lowering code.

Differential Revision: https://reviews.llvm.org/D75132
This commit is contained in:
Craig Topper 2020-02-25 16:57:42 -08:00
parent 28d38a25e9
commit 735d27dc40
11 changed files with 124 additions and 48 deletions

View File

@ -609,6 +609,7 @@ namespace ISD {
/// 1 Round to nearest /// 1 Round to nearest
/// 2 Round to +inf /// 2 Round to +inf
/// 3 Round to -inf /// 3 Round to -inf
/// Result is rounding mode and chain. Input is a chain.
FLT_ROUNDS_, FLT_ROUNDS_,
/// X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type. /// X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.

View File

@ -619,7 +619,7 @@ def int_aarch64_neon_tbx4 : AdvSIMD_Tbx4_Intrinsic;
let TargetPrefix = "aarch64" in { let TargetPrefix = "aarch64" in {
class FPCR_Get_Intrinsic class FPCR_Get_Intrinsic
: Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>; : Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrHasSideEffects]>;
} }
// FPCR // FPCR

View File

@ -2823,6 +2823,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
} }
case ISD::FLT_ROUNDS_: case ISD::FLT_ROUNDS_:
Results.push_back(DAG.getConstant(1, dl, Node->getValueType(0))); Results.push_back(DAG.getConstant(1, dl, Node->getValueType(0)));
Results.push_back(Node->getOperand(0));
break; break;
case ISD::EH_RETURN: case ISD::EH_RETURN:
case ISD::EH_LABEL: case ISD::EH_LABEL:

View File

@ -563,7 +563,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FLT_ROUNDS(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDLoc dl(N); SDLoc dl(N);
return DAG.getNode(N->getOpcode(), dl, NVT); SDValue Res =
DAG.getNode(N->getOpcode(), dl, {NVT, MVT::Other}, N->getOperand(0));
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
return Res;
} }
SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) {
@ -2744,10 +2750,15 @@ void DAGTypeLegalizer::ExpandIntRes_FLT_ROUNDS(SDNode *N, SDValue &Lo,
unsigned NBitWidth = NVT.getSizeInBits(); unsigned NBitWidth = NVT.getSizeInBits();
EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout()); EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
Lo = DAG.getNode(ISD::FLT_ROUNDS_, dl, NVT); Lo = DAG.getNode(ISD::FLT_ROUNDS_, dl, {NVT, MVT::Other}, N->getOperand(0));
SDValue Chain = Lo.getValue(1);
// The high part is the sign of Lo, as -1 is a valid value for FLT_ROUNDS // The high part is the sign of Lo, as -1 is a valid value for FLT_ROUNDS
Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo, Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
DAG.getConstant(NBitWidth - 1, dl, ShiftAmtTy)); DAG.getConstant(NBitWidth - 1, dl, ShiftAmtTy));
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Chain);
} }
void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,

View File

@ -6630,7 +6630,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::gcwrite: case Intrinsic::gcwrite:
llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!"); llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
case Intrinsic::flt_rounds: case Intrinsic::flt_rounds:
setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, sdl, MVT::i32)); Res = DAG.getNode(ISD::FLT_ROUNDS_, sdl, {MVT::i32, MVT::Other}, getRoot());
setValue(&I, Res);
DAG.setRoot(Res.getValue(1));
return; return;
case Intrinsic::expect: case Intrinsic::expect:

View File

@ -2869,16 +2869,19 @@ SDValue AArch64TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
// so that the shift + and get folded into a bitfield extract. // so that the shift + and get folded into a bitfield extract.
SDLoc dl(Op); SDLoc dl(Op);
SDValue FPCR_64 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i64, SDValue Chain = Op.getOperand(0);
DAG.getConstant(Intrinsic::aarch64_get_fpcr, dl, SDValue FPCR_64 = DAG.getNode(
MVT::i64)); ISD::INTRINSIC_W_CHAIN, dl, {MVT::i64, MVT::Other},
{Chain, DAG.getConstant(Intrinsic::aarch64_get_fpcr, dl, MVT::i64)});
Chain = FPCR_64.getValue(1);
SDValue FPCR_32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, FPCR_64); SDValue FPCR_32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, FPCR_64);
SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPCR_32, SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPCR_32,
DAG.getConstant(1U << 22, dl, MVT::i32)); DAG.getConstant(1U << 22, dl, MVT::i32));
SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds, SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
DAG.getConstant(22, dl, MVT::i32)); DAG.getConstant(22, dl, MVT::i32));
return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE, SDValue AND = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
DAG.getConstant(3, dl, MVT::i32)); DAG.getConstant(3, dl, MVT::i32));
return DAG.getMergeValues({AND, Chain}, dl);
} }
static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {

View File

@ -5948,16 +5948,20 @@ SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
// The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3) // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
// so that the shift + and get folded into a bitfield extract. // so that the shift + and get folded into a bitfield extract.
SDLoc dl(Op); SDLoc dl(Op);
SDValue Ops[] = { DAG.getEntryNode(), SDValue Chain = Op.getOperand(0);
DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32) }; SDValue Ops[] = {Chain,
DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32)};
SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, MVT::i32, Ops); SDValue FPSCR =
DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, {MVT::i32, MVT::Other}, Ops);
Chain = FPSCR.getValue(1);
SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR, SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
DAG.getConstant(1U << 22, dl, MVT::i32)); DAG.getConstant(1U << 22, dl, MVT::i32));
SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds, SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
DAG.getConstant(22, dl, MVT::i32)); DAG.getConstant(22, dl, MVT::i32));
return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE, SDValue And = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
DAG.getConstant(3, dl, MVT::i32)); DAG.getConstant(3, dl, MVT::i32));
return DAG.getMergeValues({And, Chain}, dl);
} }
static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,

View File

@ -8306,22 +8306,20 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
EVT PtrVT = getPointerTy(MF.getDataLayout()); EVT PtrVT = getPointerTy(MF.getDataLayout());
// Save FP Control Word to register // Save FP Control Word to register
EVT NodeTys[] = { SDValue Chain = Op.getOperand(0);
MVT::f64, // return register SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
MVT::Glue // unused in this context Chain = MFFS.getValue(1);
};
SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None);
// Save FP register to stack slot // Save FP register to stack slot
int SSFI = MF.getFrameInfo().CreateStackObject(8, 8, false); int SSFI = MF.getFrameInfo().CreateStackObject(8, 8, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, StackSlot, Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
MachinePointerInfo());
// Load FP Control Word from low 32 bits of stack slot. // Load FP Control Word from low 32 bits of stack slot.
SDValue Four = DAG.getConstant(4, dl, PtrVT); SDValue Four = DAG.getConstant(4, dl, PtrVT);
SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four); SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo()); SDValue CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
Chain = CWD.getValue(1);
// Transform as necessary // Transform as necessary
SDValue CWD1 = SDValue CWD1 =
@ -8338,8 +8336,11 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
SDValue RetVal = SDValue RetVal =
DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2); DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
return DAG.getNode((VT.getSizeInBits() < 16 ? RetVal =
ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal); DAG.getNode((VT.getSizeInBits() < 16 ? ISD::TRUNCATE : ISD::ZERO_EXTEND),
dl, VT, RetVal);
return DAG.getMergeValues({RetVal, Chain}, dl);
} }
SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const { SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {

View File

@ -155,7 +155,8 @@ def PPCSExtVElems : SDNode<"PPCISD::SExtVElems", SDT_PPCSExtVElems, []>;
// Extract FPSCR (not modeled at the DAG level). // Extract FPSCR (not modeled at the DAG level).
def PPCmffs : SDNode<"PPCISD::MFFS", def PPCmffs : SDNode<"PPCISD::MFFS",
SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>, []>; SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>,
[SDNPHasChain]>;
// Perform FADD in round-to-zero mode. // Perform FADD in round-to-zero mode.
def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp, []>; def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp, []>;

View File

@ -25647,14 +25647,15 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, SSFI), MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, SSFI),
MachineMemOperand::MOStore, 2, 2); MachineMemOperand::MOStore, 2, 2);
SDValue Ops[] = { DAG.getEntryNode(), StackSlot }; SDValue Chain = Op.getOperand(0);
SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::FNSTCW16m, DL, SDValue Ops[] = {Chain, StackSlot};
DAG.getVTList(MVT::Other), Chain = DAG.getMemIntrinsicNode(
Ops, MVT::i16, MMO); X86ISD::FNSTCW16m, DL, DAG.getVTList(MVT::Other), Ops, MVT::i16, MMO);
// Load FP Control Word from stack slot // Load FP Control Word from stack slot
SDValue CWD = SDValue CWD =
DAG.getLoad(MVT::i16, DL, Chain, StackSlot, MachinePointerInfo()); DAG.getLoad(MVT::i16, DL, Chain, StackSlot, MachinePointerInfo());
Chain = CWD.getValue(1);
// Mask and turn the control bits into a shift for the lookup table. // Mask and turn the control bits into a shift for the lookup table.
SDValue Shift = SDValue Shift =
@ -25670,7 +25671,9 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
DAG.getNode(ISD::SRL, DL, MVT::i32, LUT, Shift), DAG.getNode(ISD::SRL, DL, MVT::i32, LUT, Shift),
DAG.getConstant(3, DL, MVT::i32)); DAG.getConstant(3, DL, MVT::i32));
return DAG.getZExtOrTrunc(RetVal, DL, VT); RetVal = DAG.getZExtOrTrunc(RetVal, DL, VT);
return DAG.getMergeValues({RetVal, Chain}, DL);
} }
// Split an unary integer op into 2 half sized ops. // Split an unary integer op into 2 half sized ops.

View File

@ -49,25 +49,49 @@ define i32 @multiple_flt_rounds() nounwind {
; X86-NEXT: shrl $9, %ecx ; X86-NEXT: shrl $9, %ecx
; X86-NEXT: andb $6, %cl ; X86-NEXT: andb $6, %cl
; X86-NEXT: movl $45, %esi ; X86-NEXT: movl $45, %esi
; X86-NEXT: movl $45, %eax
; X86-NEXT: # kill: def $cl killed $cl killed $ecx ; X86-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NEXT: shrl %cl, %esi ; X86-NEXT: shrl %cl, %eax
; X86-NEXT: andl $3, %esi ; X86-NEXT: andl $3, %eax
; X86-NEXT: xorl %ebx, %ebx ; X86-NEXT: xorl %ebx, %ebx
; X86-NEXT: cmpl $3, %esi ; X86-NEXT: cmpl $3, %eax
; X86-NEXT: setne %bl ; X86-NEXT: setne %bl
; X86-NEXT: movl $0, (%esp) ; X86-NEXT: movl $0, (%esp)
; X86-NEXT: calll fesetround ; X86-NEXT: calll fesetround
; X86-NEXT: movl $3072, (%esp) # imm = 0xC00 ; X86-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-NEXT: calll fesetround ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: cmpl $1, %esi ; X86-NEXT: shrl $9, %ecx
; X86-NEXT: leal 1(%ebx), %eax ; X86-NEXT: andb $6, %cl
; X86-NEXT: movl $45, %eax
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NEXT: shrl %cl, %eax
; X86-NEXT: andl $3, %eax
; X86-NEXT: cmpl $1, %eax
; X86-NEXT: je .LBB1_2 ; X86-NEXT: je .LBB1_2
; X86-NEXT: # %bb.1: # %entry ; X86-NEXT: # %bb.1: # %entry
; X86-NEXT: movl %eax, %ebx ; X86-NEXT: incl %ebx
; X86-NEXT: .LBB1_2: # %entry ; X86-NEXT: .LBB1_2: # %entry
; X86-NEXT: movl $3072, (%esp) # imm = 0xC00
; X86-NEXT: calll fesetround
; X86-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: shrl $9, %ecx
; X86-NEXT: andb $6, %cl
; X86-NEXT: movl $45, %eax
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NEXT: shrl %cl, %eax
; X86-NEXT: andl $3, %eax
; X86-NEXT: cmpl $1, %eax
; X86-NEXT: sbbl $-1, %ebx ; X86-NEXT: sbbl $-1, %ebx
; X86-NEXT: movl $2048, (%esp) # imm = 0x800 ; X86-NEXT: movl $2048, (%esp) # imm = 0x800
; X86-NEXT: calll fesetround ; X86-NEXT: calll fesetround
; X86-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: shrl $9, %ecx
; X86-NEXT: andb $6, %cl
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NEXT: shrl %cl, %esi
; X86-NEXT: andl $3, %esi
; X86-NEXT: xorl %ecx, %ecx ; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: cmpl $2, %esi ; X86-NEXT: cmpl $2, %esi
; X86-NEXT: setne %cl ; X86-NEXT: setne %cl
@ -92,25 +116,50 @@ define i32 @multiple_flt_rounds() nounwind {
; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx ; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx
; X64-NEXT: shrl $9, %ecx ; X64-NEXT: shrl $9, %ecx
; X64-NEXT: andb $6, %cl ; X64-NEXT: andb $6, %cl
; X64-NEXT: movl $45, %ebx ; X64-NEXT: movl $45, %r14d
; X64-NEXT: movl $45, %eax
; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shrl %cl, %ebx ; X64-NEXT: shrl %cl, %eax
; X64-NEXT: andl $3, %ebx ; X64-NEXT: andl $3, %eax
; X64-NEXT: xorl %r14d, %r14d ; X64-NEXT: xorl %ebx, %ebx
; X64-NEXT: cmpl $3, %ebx ; X64-NEXT: cmpl $3, %eax
; X64-NEXT: setne %r14b ; X64-NEXT: setne %bl
; X64-NEXT: xorl %edi, %edi ; X64-NEXT: xorl %edi, %edi
; X64-NEXT: callq fesetround ; X64-NEXT: callq fesetround
; X64-NEXT: leal 1(%r14), %ebp ; X64-NEXT: fnstcw {{[0-9]+}}(%rsp)
; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx
; X64-NEXT: shrl $9, %ecx
; X64-NEXT: andb $6, %cl
; X64-NEXT: movl $45, %eax
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shrl %cl, %eax
; X64-NEXT: andl $3, %eax
; X64-NEXT: leal 1(%rbx), %ebp
; X64-NEXT: cmpl $1, %eax
; X64-NEXT: cmovel %ebx, %ebp
; X64-NEXT: movl $3072, %edi # imm = 0xC00 ; X64-NEXT: movl $3072, %edi # imm = 0xC00
; X64-NEXT: callq fesetround ; X64-NEXT: callq fesetround
; X64-NEXT: cmpl $1, %ebx ; X64-NEXT: fnstcw {{[0-9]+}}(%rsp)
; X64-NEXT: cmovel %r14d, %ebp ; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx
; X64-NEXT: shrl $9, %ecx
; X64-NEXT: andb $6, %cl
; X64-NEXT: movl $45, %eax
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shrl %cl, %eax
; X64-NEXT: andl $3, %eax
; X64-NEXT: cmpl $1, %eax
; X64-NEXT: sbbl $-1, %ebp ; X64-NEXT: sbbl $-1, %ebp
; X64-NEXT: movl $2048, %edi # imm = 0x800 ; X64-NEXT: movl $2048, %edi # imm = 0x800
; X64-NEXT: callq fesetround ; X64-NEXT: callq fesetround
; X64-NEXT: fnstcw {{[0-9]+}}(%rsp)
; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx
; X64-NEXT: shrl $9, %ecx
; X64-NEXT: andb $6, %cl
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shrl %cl, %r14d
; X64-NEXT: andl $3, %r14d
; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: cmpl $2, %ebx ; X64-NEXT: cmpl $2, %r14d
; X64-NEXT: setne %cl ; X64-NEXT: setne %cl
; X64-NEXT: negl %ecx ; X64-NEXT: negl %ecx
; X64-NEXT: xorl %eax, %eax ; X64-NEXT: xorl %eax, %eax