[RISCV] Custom lower (i32 (fptoui/fptosi X)).

I stumbled onto a case where our (sext_inreg (assertzexti32 (fptoui X)), i32)
isel pattern can cause an fcvt.wu and fcvt.lu to be emitted if
the assertzexti32 has an additional user. If we add a one use check
it would just cause a fcvt.lu followed by a sext.w when only need
a fcvt.wu to satisfy both users.

To mitigate this I've added custom isel and new ISD opcodes for
fcvt.wu. This allows us to keep know it started life as a conversion
to i32 without needing to match multiple nodes. ComputeNumSignBits
has been taught that this new nodes produces 33 sign bits. To
prevent regressions when we need to zero extend the result of an
(i32 (fptoui X)), I've added a DAG combine to convert it to an
(i64 (fptoui X)) before type legalization. In most cases this would
happen in InstCombine, but a zero_extend can be created for function
returns or arguments.

To keep everything consistent I've added new nodes for fptosi as well.

Reviewed By: luismarques

Differential Revision: https://reviews.llvm.org/D106346
This commit is contained in:
Craig Topper 2021-07-24 10:34:49 -07:00
parent c7e69e46c8
commit c63dbd8501
11 changed files with 80 additions and 48 deletions

View File

@ -849,6 +849,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::XOR);
setTargetDAGCombine(ISD::ANY_EXTEND);
setTargetDAGCombine(ISD::ZERO_EXTEND);
if (Subtarget.hasStdExtV()) {
setTargetDAGCombine(ISD::FCOPYSIGN);
setTargetDAGCombine(ISD::MGATHER);
@ -4859,20 +4860,30 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::STRICT_FP_TO_UINT:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: {
bool IsStrict = N->isStrictFPOpcode();
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
bool IsStrict = N->isStrictFPOpcode();
bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
N->getOpcode() == ISD::STRICT_FP_TO_SINT;
SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
TargetLowering::TypeSoftenFloat) {
// FIXME: Support strict FP.
if (IsStrict)
return;
if (!isTypeLegal(Op0.getValueType()))
return;
unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
SDValue Res = DAG.getNode(Opc, DL, MVT::i64, Op0);
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
return;
}
// If the FP type needs to be softened, emit a library call using the 'si'
// version. If we left it to default legalization we'd end up with 'di'. If
// the FP type doesn't need to be softened just let generic type
// legalization promote the result type.
if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
TargetLowering::TypeSoftenFloat)
return;
RTLIB::Libcall LC;
if (N->getOpcode() == ISD::FP_TO_SINT ||
N->getOpcode() == ISD::STRICT_FP_TO_SINT)
if (IsSigned)
LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
else
LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
@ -6056,6 +6067,16 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return performXORCombine(N, DCI, Subtarget);
case ISD::ANY_EXTEND:
return performANY_EXTENDCombine(N, DCI, Subtarget);
case ISD::ZERO_EXTEND:
// Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
// type legalization. This is safe because fp_to_uint produces poison if
// it overflows.
if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit() &&
N->getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
isTypeLegal(N->getOperand(0).getOperand(0).getValueType()))
return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
N->getOperand(0).getOperand(0));
return SDValue();
case RISCVISD::SELECT_CC: {
// Transform
SDValue LHS = N->getOperand(0);
@ -6586,6 +6607,8 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
case RISCVISD::UNSHFLW:
case RISCVISD::BCOMPRESSW:
case RISCVISD::BDECOMPRESSW:
case RISCVISD::FCVT_W_RV64:
case RISCVISD::FCVT_WU_RV64:
// TODO: As the result is sign-extended, this is conservatively correct. A
// more precise answer could be calculated for SRAW depending on known
// bits in the shift amount.
@ -8298,6 +8321,8 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(FMV_X_ANYEXTH)
NODE_NAME_CASE(FMV_W_X_RV64)
NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
NODE_NAME_CASE(FCVT_W_RV64)
NODE_NAME_CASE(FCVT_WU_RV64)
NODE_NAME_CASE(READ_CYCLE_WIDE)
NODE_NAME_CASE(GREV)
NODE_NAME_CASE(GREVW)

View File

@ -84,6 +84,10 @@ enum NodeType : unsigned {
FMV_X_ANYEXTH,
FMV_W_X_RV64,
FMV_X_ANYEXTW_RV64,
// FP to 32 bit int conversions for RV64. These are used to keep track of the
// result being sign extended to 64 bit.
FCVT_W_RV64,
FCVT_WU_RV64,
// READ_CYCLE_WIDE - A read of the 64-bit cycle CSR on a 32-bit target
// (returns (Lo, Hi)). It takes a chain operand.
READ_CYCLE_WIDE,

View File

@ -351,12 +351,11 @@ def : Pat<(f64 (fpimm0)), (FMV_D_X (i64 X0))>;
def : Pat<(bitconvert (i64 GPR:$rs1)), (FMV_D_X GPR:$rs1)>;
def : Pat<(i64 (bitconvert FPR64:$rs1)), (FMV_X_D FPR64:$rs1)>;
// FP->[u]int32 is mostly handled by the FP->[u]int64 patterns. This is safe
// because fpto[u|s]i produce poison if the value can't fit into the target.
// We match the single case below because fcvt.wu.d sign-extends its result so
// is cheaper than fcvt.lu.d+sext.w.
def : Pat<(sext_inreg (assertzexti32 (fp_to_uint FPR64:$rs1)), i32),
(FCVT_WU_D $rs1, 0b001)>;
// Use target specific isd nodes to help us remember the result is sign
// extended. Matching sext_inreg+fptoui/fptosi may cause the conversion to be
// duplicated if it has another user that didn't need the sign_extend.
def : Pat<(riscv_fcvt_w_rv64 FPR64:$rs1), (FCVT_W_D $rs1, 0b001)>;
def : Pat<(riscv_fcvt_wu_rv64 FPR64:$rs1), (FCVT_WU_D $rs1, 0b001)>;
// [u]int32->fp
def : Pat<(sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_D_W $rs1)>;

View File

@ -19,11 +19,17 @@ def SDT_RISCVFMV_W_X_RV64
: SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisVT<1, i64>]>;
def SDT_RISCVFMV_X_ANYEXTW_RV64
: SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, f32>]>;
def STD_RISCVFCVT_W_RV64
: SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisFP<1>]>;
def riscv_fmv_w_x_rv64
: SDNode<"RISCVISD::FMV_W_X_RV64", SDT_RISCVFMV_W_X_RV64>;
def riscv_fmv_x_anyextw_rv64
: SDNode<"RISCVISD::FMV_X_ANYEXTW_RV64", SDT_RISCVFMV_X_ANYEXTW_RV64>;
def riscv_fcvt_w_rv64
: SDNode<"RISCVISD::FCVT_W_RV64", STD_RISCVFCVT_W_RV64>;
def riscv_fcvt_wu_rv64
: SDNode<"RISCVISD::FCVT_WU_RV64", STD_RISCVFCVT_W_RV64>;
//===----------------------------------------------------------------------===//
// Operand and SDNode transformation definitions.
@ -391,12 +397,11 @@ def : Pat<(riscv_fmv_x_anyextw_rv64 FPR32:$src), (FMV_X_W FPR32:$src)>;
def : Pat<(sext_inreg (riscv_fmv_x_anyextw_rv64 FPR32:$src), i32),
(FMV_X_W FPR32:$src)>;
// FP->[u]int32 is mostly handled by the FP->[u]int64 patterns. This is safe
// because fpto[u|s]i produces poison if the value can't fit into the target.
// We match the single case below because fcvt.wu.s sign-extends its result so
// is cheaper than fcvt.lu.s+sext.w.
def : Pat<(sext_inreg (assertzexti32 (fp_to_uint FPR32:$rs1)), i32),
(FCVT_WU_S $rs1, 0b001)>;
// Use target specific isd nodes to help us remember the result is sign
// extended. Matching sext_inreg+fptoui/fptosi may cause the conversion to be
// duplicated if it has another user that didn't need the sign_extend.
def : Pat<(riscv_fcvt_w_rv64 FPR32:$rs1), (FCVT_W_S $rs1, 0b001)>;
def : Pat<(riscv_fcvt_wu_rv64 FPR32:$rs1), (FCVT_WU_S $rs1, 0b001)>;
// float->[u]int64. Round-to-zero must be used.
def : Pat<(i64 (fp_to_sint FPR32:$rs1)), (FCVT_L_S $rs1, 0b001)>;

View File

@ -350,12 +350,11 @@ def : Pat<(uint_to_fp (i32 GPR:$rs1)), (FCVT_H_WU $rs1, 0b111)>;
} // Predicates = [HasStdExtZfh, IsRV32]
let Predicates = [HasStdExtZfh, IsRV64] in {
// FP->[u]int32 is mostly handled by the FP->[u]int64 patterns. This is safe
// because fpto[u|s]i produces poison if the value can't fit into the target.
// We match the single case below because fcvt.wu.s sign-extends its result so
// is cheaper than fcvt.lu.h+sext.w.
def : Pat<(sext_inreg (assertzexti32 (fp_to_uint FPR16:$rs1)), i32),
(FCVT_WU_H $rs1, 0b001)>;
// Use target specific isd nodes to help us remember the result is sign
// extended. Matching sext_inreg+fptoui/fptosi may cause the conversion to be
// duplicated if it has another user that didn't need the sign_extend.
def : Pat<(riscv_fcvt_w_rv64 FPR16:$rs1), (FCVT_W_H $rs1, 0b001)>;
def : Pat<(riscv_fcvt_wu_rv64 FPR16:$rs1), (FCVT_WU_H $rs1, 0b001)>;
// half->[u]int64. Round-to-zero must be used.
def : Pat<(i64 (fp_to_sint FPR16:$rs1)), (FCVT_L_H $rs1, 0b001)>;

View File

@ -64,7 +64,7 @@ define i32 @fcvt_w_d(double %a) nounwind {
; RV64IFD-LABEL: fcvt_w_d:
; RV64IFD: # %bb.0:
; RV64IFD-NEXT: fmv.d.x ft0, a0
; RV64IFD-NEXT: fcvt.l.d a0, ft0, rtz
; RV64IFD-NEXT: fcvt.w.d a0, ft0, rtz
; RV64IFD-NEXT: ret
%1 = fptosi double %a to i32
ret i32 %1
@ -133,7 +133,7 @@ define i32 @fcvt_wu_d(double %a) nounwind {
; RV64IFD-LABEL: fcvt_wu_d:
; RV64IFD: # %bb.0:
; RV64IFD-NEXT: fmv.d.x ft0, a0
; RV64IFD-NEXT: fcvt.lu.d a0, ft0, rtz
; RV64IFD-NEXT: fcvt.wu.d a0, ft0, rtz
; RV64IFD-NEXT: ret
%1 = fptoui double %a to i32
ret i32 %1
@ -166,7 +166,7 @@ define i32 @fcvt_wu_d_multiple_use(double %x, i32* %y) {
; RV64IFD-NEXT: addi a0, zero, 1
; RV64IFD-NEXT: beqz a1, .LBB5_2
; RV64IFD-NEXT: # %bb.1:
; RV64IFD-NEXT: fcvt.lu.d a0, ft0, rtz
; RV64IFD-NEXT: mv a0, a1
; RV64IFD-NEXT: .LBB5_2:
; RV64IFD-NEXT: ret
%a = fptoui double %x to i32

View File

@ -16,7 +16,7 @@ define i32 @fcvt_w_s(float %a) nounwind {
; RV64IF-LABEL: fcvt_w_s:
; RV64IF: # %bb.0:
; RV64IF-NEXT: fmv.w.x ft0, a0
; RV64IF-NEXT: fcvt.l.s a0, ft0, rtz
; RV64IF-NEXT: fcvt.w.s a0, ft0, rtz
; RV64IF-NEXT: ret
%1 = fptosi float %a to i32
ret i32 %1
@ -102,7 +102,7 @@ define i32 @fcvt_wu_s(float %a) nounwind {
; RV64IF-LABEL: fcvt_wu_s:
; RV64IF: # %bb.0:
; RV64IF-NEXT: fmv.w.x ft0, a0
; RV64IF-NEXT: fcvt.lu.s a0, ft0, rtz
; RV64IF-NEXT: fcvt.wu.s a0, ft0, rtz
; RV64IF-NEXT: ret
%1 = fptoui float %a to i32
ret i32 %1
@ -130,7 +130,7 @@ define i32 @fcvt_wu_s_multiple_use(float %x, i32* %y) {
; RV64IF-NEXT: addi a0, zero, 1
; RV64IF-NEXT: beqz a1, .LBB3_2
; RV64IF-NEXT: # %bb.1:
; RV64IF-NEXT: fcvt.lu.s a0, ft0, rtz
; RV64IF-NEXT: mv a0, a1
; RV64IF-NEXT: .LBB3_2:
; RV64IF-NEXT: ret
%a = fptoui float %x to i32

View File

@ -164,7 +164,7 @@ define i32 @fcvt_ui_h_multiple_use(half %x, i32* %y) {
; RV64IZFH-NEXT: addi a0, zero, 1
; RV64IZFH-NEXT: beqz a1, .LBB3_2
; RV64IZFH-NEXT: # %bb.1:
; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rtz
; RV64IZFH-NEXT: mv a0, a1
; RV64IZFH-NEXT: .LBB3_2:
; RV64IZFH-NEXT: ret
;
@ -174,7 +174,7 @@ define i32 @fcvt_ui_h_multiple_use(half %x, i32* %y) {
; RV64IDZFH-NEXT: addi a0, zero, 1
; RV64IDZFH-NEXT: beqz a1, .LBB3_2
; RV64IDZFH-NEXT: # %bb.1:
; RV64IDZFH-NEXT: fcvt.lu.h a0, fa0, rtz
; RV64IDZFH-NEXT: mv a0, a1
; RV64IDZFH-NEXT: .LBB3_2:
; RV64IDZFH-NEXT: ret
%a = fptoui half %x to i32
@ -246,12 +246,12 @@ define i32 @fcvt_w_h(half %a) nounwind {
;
; RV64IZFH-LABEL: fcvt_w_h:
; RV64IZFH: # %bb.0:
; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rtz
; RV64IZFH-NEXT: fcvt.w.h a0, fa0, rtz
; RV64IZFH-NEXT: ret
;
; RV64IDZFH-LABEL: fcvt_w_h:
; RV64IDZFH: # %bb.0:
; RV64IDZFH-NEXT: fcvt.l.h a0, fa0, rtz
; RV64IDZFH-NEXT: fcvt.w.h a0, fa0, rtz
; RV64IDZFH-NEXT: ret
%1 = fptosi half %a to i32
ret i32 %1
@ -400,12 +400,12 @@ define i32 @fcvt_wu_h(half %a) nounwind {
;
; RV64IZFH-LABEL: fcvt_wu_h:
; RV64IZFH: # %bb.0:
; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rtz
; RV64IZFH-NEXT: fcvt.wu.h a0, fa0, rtz
; RV64IZFH-NEXT: ret
;
; RV64IDZFH-LABEL: fcvt_wu_h:
; RV64IDZFH: # %bb.0:
; RV64IDZFH-NEXT: fcvt.lu.h a0, fa0, rtz
; RV64IDZFH-NEXT: fcvt.wu.h a0, fa0, rtz
; RV64IDZFH-NEXT: ret
%1 = fptoui half %a to i32
ret i32 %1

View File

@ -11,7 +11,7 @@ define i32 @aext_fptosi(double %a) nounwind {
; RV64ID-LABEL: aext_fptosi:
; RV64ID: # %bb.0:
; RV64ID-NEXT: fmv.d.x ft0, a0
; RV64ID-NEXT: fcvt.l.d a0, ft0, rtz
; RV64ID-NEXT: fcvt.w.d a0, ft0, rtz
; RV64ID-NEXT: ret
%1 = fptosi double %a to i32
ret i32 %1
@ -21,7 +21,7 @@ define signext i32 @sext_fptosi(double %a) nounwind {
; RV64ID-LABEL: sext_fptosi:
; RV64ID: # %bb.0:
; RV64ID-NEXT: fmv.d.x ft0, a0
; RV64ID-NEXT: fcvt.l.d a0, ft0, rtz
; RV64ID-NEXT: fcvt.w.d a0, ft0, rtz
; RV64ID-NEXT: ret
%1 = fptosi double %a to i32
ret i32 %1
@ -31,7 +31,7 @@ define zeroext i32 @zext_fptosi(double %a) nounwind {
; RV64ID-LABEL: zext_fptosi:
; RV64ID: # %bb.0:
; RV64ID-NEXT: fmv.d.x ft0, a0
; RV64ID-NEXT: fcvt.l.d a0, ft0, rtz
; RV64ID-NEXT: fcvt.w.d a0, ft0, rtz
; RV64ID-NEXT: slli a0, a0, 32
; RV64ID-NEXT: srli a0, a0, 32
; RV64ID-NEXT: ret
@ -43,7 +43,7 @@ define i32 @aext_fptoui(double %a) nounwind {
; RV64ID-LABEL: aext_fptoui:
; RV64ID: # %bb.0:
; RV64ID-NEXT: fmv.d.x ft0, a0
; RV64ID-NEXT: fcvt.lu.d a0, ft0, rtz
; RV64ID-NEXT: fcvt.wu.d a0, ft0, rtz
; RV64ID-NEXT: ret
%1 = fptoui double %a to i32
ret i32 %1

View File

@ -11,7 +11,7 @@ define i32 @aext_fptosi(float %a) nounwind {
; RV64IF-LABEL: aext_fptosi:
; RV64IF: # %bb.0:
; RV64IF-NEXT: fmv.w.x ft0, a0
; RV64IF-NEXT: fcvt.l.s a0, ft0, rtz
; RV64IF-NEXT: fcvt.w.s a0, ft0, rtz
; RV64IF-NEXT: ret
%1 = fptosi float %a to i32
ret i32 %1
@ -21,7 +21,7 @@ define signext i32 @sext_fptosi(float %a) nounwind {
; RV64IF-LABEL: sext_fptosi:
; RV64IF: # %bb.0:
; RV64IF-NEXT: fmv.w.x ft0, a0
; RV64IF-NEXT: fcvt.l.s a0, ft0, rtz
; RV64IF-NEXT: fcvt.w.s a0, ft0, rtz
; RV64IF-NEXT: ret
%1 = fptosi float %a to i32
ret i32 %1
@ -31,7 +31,7 @@ define zeroext i32 @zext_fptosi(float %a) nounwind {
; RV64IF-LABEL: zext_fptosi:
; RV64IF: # %bb.0:
; RV64IF-NEXT: fmv.w.x ft0, a0
; RV64IF-NEXT: fcvt.l.s a0, ft0, rtz
; RV64IF-NEXT: fcvt.w.s a0, ft0, rtz
; RV64IF-NEXT: slli a0, a0, 32
; RV64IF-NEXT: srli a0, a0, 32
; RV64IF-NEXT: ret
@ -43,7 +43,7 @@ define i32 @aext_fptoui(float %a) nounwind {
; RV64IF-LABEL: aext_fptoui:
; RV64IF: # %bb.0:
; RV64IF-NEXT: fmv.w.x ft0, a0
; RV64IF-NEXT: fcvt.lu.s a0, ft0, rtz
; RV64IF-NEXT: fcvt.wu.s a0, ft0, rtz
; RV64IF-NEXT: ret
%1 = fptoui float %a to i32
ret i32 %1

View File

@ -10,7 +10,7 @@
define i32 @aext_fptosi(half %a) nounwind {
; RV64IZFH-LABEL: aext_fptosi:
; RV64IZFH: # %bb.0:
; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rtz
; RV64IZFH-NEXT: fcvt.w.h a0, fa0, rtz
; RV64IZFH-NEXT: ret
%1 = fptosi half %a to i32
ret i32 %1
@ -19,7 +19,7 @@ define i32 @aext_fptosi(half %a) nounwind {
define signext i32 @sext_fptosi(half %a) nounwind {
; RV64IZFH-LABEL: sext_fptosi:
; RV64IZFH: # %bb.0:
; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rtz
; RV64IZFH-NEXT: fcvt.w.h a0, fa0, rtz
; RV64IZFH-NEXT: ret
%1 = fptosi half %a to i32
ret i32 %1
@ -28,7 +28,7 @@ define signext i32 @sext_fptosi(half %a) nounwind {
define zeroext i32 @zext_fptosi(half %a) nounwind {
; RV64IZFH-LABEL: zext_fptosi:
; RV64IZFH: # %bb.0:
; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rtz
; RV64IZFH-NEXT: fcvt.w.h a0, fa0, rtz
; RV64IZFH-NEXT: slli a0, a0, 32
; RV64IZFH-NEXT: srli a0, a0, 32
; RV64IZFH-NEXT: ret
@ -39,7 +39,7 @@ define zeroext i32 @zext_fptosi(half %a) nounwind {
define i32 @aext_fptoui(half %a) nounwind {
; RV64IZFH-LABEL: aext_fptoui:
; RV64IZFH: # %bb.0:
; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rtz
; RV64IZFH-NEXT: fcvt.wu.h a0, fa0, rtz
; RV64IZFH-NEXT: ret
%1 = fptoui half %a to i32
ret i32 %1