[RISCV] Custom type legalize i32 fshl/fshr on RV64 with Zbt.

This adds custom opcodes for FSLW/FSRW so we can type legalize fshl/fshr without needing to match a sign_extend_inreg. I've used the operand order from fshl/fshr to make the isel pattern similar to the non-W form. It was also hard to decide another order since the register instruction has the shift amount as the second operand, but the immediate instruction has it as the third operand. Differential Revision: https://reviews.llvm.org/D91479
2020-11-25 09:43:16 -08:00 · 2020-11-25 09:43:16 -08:00 · c26e8697d7
parent 227c8ff189
commit c26e8697d7
4 changed files with 72 additions and 43 deletions
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@ -181,6 +181,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
  if (Subtarget.hasStdExtZbt()) {
    setOperationAction(ISD::FSHL, XLenVT, Legal);
    setOperationAction(ISD::FSHR, XLenVT, Legal);
+
+    if (Subtarget.is64Bit()) {
+      setOperationAction(ISD::FSHL, MVT::i32, Custom);
+      setOperationAction(ISD::FSHR, MVT::i32, Custom);
+    }
  }

  ISD::CondCode FPCCToExtend[] = {
@ -1091,6 +1096,26 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
    Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, GREVIW));
    break;
  }
+  case ISD::FSHL:
+  case ISD::FSHR: {
+    assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
+           Subtarget.hasStdExtZbt() && "Unexpected custom legalisation");
+    SDValue NewOp0 =
+        DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
+    SDValue NewOp1 =
+        DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
+    SDValue NewOp2 =
+        DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
+    // FSLW/FSRW take a 6 bit shift amount but i32 FSHL/FSHR only use 5 bits.
+    // Mask the shift amount to 5 bits.
+    NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2,
+                         DAG.getConstant(0x1f, DL, MVT::i64));
+    unsigned Opc =
+        N->getOpcode() == ISD::FSHL ? RISCVISD::FSLW : RISCVISD::FSRW;
+    SDValue NewOp = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, NewOp2);
+    Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewOp));
+    break;
+  }
  }
 }

@ -1322,6 +1347,24 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
    }
    break;
  }
+  case RISCVISD::FSLW:
+  case RISCVISD::FSRW: {
+    // Only the lower 32 bits of Values and lower 6 bits of shift amount are
+    // read.
+    SDValue Op0 = N->getOperand(0);
+    SDValue Op1 = N->getOperand(1);
+    SDValue ShAmt = N->getOperand(2);
+    APInt OpMask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32);
+    APInt ShAmtMask = APInt::getLowBitsSet(ShAmt.getValueSizeInBits(), 6);
+    if (SimplifyDemandedBits(Op0, OpMask, DCI) ||
+        SimplifyDemandedBits(Op1, OpMask, DCI) ||
+        SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) {
+      if (N->getOpcode() != ISD::DELETED_NODE)
+        DCI.AddToWorklist(N);
+      return SDValue(N, 0);
+    }
+    break;
+  }
  case RISCVISD::GREVIW:
  case RISCVISD::GORCIW: {
    // Only the lower 32 bits of the first operand are read
@ -1454,6 +1497,8 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
  case RISCVISD::RORW:
  case RISCVISD::GREVIW:
  case RISCVISD::GORCIW:
+  case RISCVISD::FSLW:
+  case RISCVISD::FSRW:
    // TODO: As the result is sign-extended, this is conservatively correct. A
    // more precise answer could be calculated for SRAW depending on known
    // bits in the shift amount.
@ -2951,6 +2996,8 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
  NODE_NAME_CASE(REMUW)
  NODE_NAME_CASE(ROLW)
  NODE_NAME_CASE(RORW)
+  NODE_NAME_CASE(FSLW)
+  NODE_NAME_CASE(FSRW)
  NODE_NAME_CASE(FMV_W_X_RV64)
  NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
  NODE_NAME_CASE(READ_CYCLE_WIDE)
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@ -46,6 +46,10 @@ enum NodeType : unsigned {
  // instructions.
  ROLW,
  RORW,
+  // RV64IB funnel shifts, with the semantics of the named RISC-V instructions,
+  // but the same operand order as fshl/fshr intrinsics.
+  FSRW,
+  FSLW,
  // FPR32<->GPR transfer operations for RV64. Needed as an i32<->f32 bitcast
  // is not legal on RV64. FMV_W_X_RV64 matches the semantics of the FMV.W.X.
  // FMV_X_ANYEXTW_RV64 is similar to FMV.X.W but has an any-extended result.
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
@ -17,8 +17,10 @@
 // Operand and SDNode transformation definitions.
 //===----------------------------------------------------------------------===//

-def riscv_rolw      : SDNode<"RISCVISD::ROLW", SDTIntShiftOp>;
-def riscv_rorw      : SDNode<"RISCVISD::RORW", SDTIntShiftOp>;
+def riscv_rolw : SDNode<"RISCVISD::ROLW", SDTIntShiftOp>;
+def riscv_rorw : SDNode<"RISCVISD::RORW", SDTIntShiftOp>;
+def riscv_fslw : SDNode<"RISCVISD::FSLW", SDTIntShiftDOp>;
+def riscv_fsrw : SDNode<"RISCVISD::FSRW", SDTIntShiftDOp>;

 def UImmLog2XLenHalfAsmOperand : AsmOperandClass {
  let Name = "UImmLog2XLenHalf";
@ -920,21 +922,13 @@ def : Pat<(riscv_gorciw GPR:$rs1, timm:$shamt), (GORCIW GPR:$rs1, timm:$shamt)>;
 } // Predicates = [HasStdExtZbp, IsRV64]

 let Predicates = [HasStdExtZbt, IsRV64] in {
-def : Pat<(sext_inreg (fshl GPR:$rs1, (shl GPR:$rs3, (i64 32)),
-                            (and GPR:$rs2, (i64 31))),
-                      i32),
-          (FSLW GPR:$rs1, (ANDI GPR:$rs2, 31), GPR:$rs3)>;
-def : Pat<(sext_inreg (fshr GPR:$rs3, (shl GPR:$rs1, (i64 32)),
-                            (or GPR:$rs2, (i64 32))),
-                      i32),
-          (FSRW GPR:$rs1, (ANDI GPR:$rs2, 31), GPR:$rs3)>;
-def : Pat<(sext_inreg (fshr GPR:$rs3, (shl GPR:$rs1, (i64 32)),
-                            uimm6gt32:$shamt),
-                      i32),
-          (FSRIW GPR:$rs1, GPR:$rs3, (ImmSub32 uimm6gt32:$shamt))>;
-def : Pat<(sext_inreg (fshl GPR:$rs3, (shl GPR:$rs1, (i64 32)),
-                            uimm5:$shamt),
-                      i32),
+def : Pat<(riscv_fslw GPR:$rs1, GPR:$rs3, GPR:$rs2),
+          (FSLW GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
+def : Pat<(riscv_fsrw GPR:$rs3, GPR:$rs1, GPR:$rs2),
+          (FSRW GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
+def : Pat<(riscv_fsrw GPR:$rs3, GPR:$rs1, uimm5:$shamt),
+          (FSRIW GPR:$rs1, GPR:$rs3, uimm5:$shamt)>;
+def : Pat<(riscv_fslw GPR:$rs3, GPR:$rs1, uimm5:$shamt),
          (FSRIW GPR:$rs1, GPR:$rs3, (ImmROTL2RW uimm5:$shamt))>;
 } // Predicates = [HasStdExtZbt, IsRV64]

--- a/llvm/test/CodeGen/RISCV/rv64Zbt.ll
+++ b/llvm/test/CodeGen/RISCV/rv64Zbt.ll
@ -134,7 +134,6 @@ define signext i32 @fshl_i32(i32 signext %a, i32 signext %b, i32 signext %c) nou
 }

 ; Similar to fshl_i32 but result is not sign extended.
-; FIXME: This should use fslw
 define void @fshl_i32_nosext(i32 signext %a, i32 signext %b, i32 signext %c, i32* %x) nounwind {
 ; RV64I-LABEL: fshl_i32_nosext:
 ; RV64I:       # %bb.0:
@ -150,19 +149,15 @@ define void @fshl_i32_nosext(i32 signext %a, i32 signext %b, i32 signext %c, i32
 ;
 ; RV64IB-LABEL: fshl_i32_nosext:
 ; RV64IB:       # %bb.0:
-; RV64IB-NEXT:    slli a1, a1, 32
 ; RV64IB-NEXT:    andi a2, a2, 31
-; RV64IB-NEXT:    andi a2, a2, 63
-; RV64IB-NEXT:    fsl a0, a0, a1, a2
+; RV64IB-NEXT:    fslw a0, a0, a1, a2
 ; RV64IB-NEXT:    sw a0, 0(a3)
 ; RV64IB-NEXT:    ret
 ;
 ; RV64IBT-LABEL: fshl_i32_nosext:
 ; RV64IBT:       # %bb.0:
-; RV64IBT-NEXT:    slli a1, a1, 32
 ; RV64IBT-NEXT:    andi a2, a2, 31
-; RV64IBT-NEXT:    andi a2, a2, 63
-; RV64IBT-NEXT:    fsl a0, a0, a1, a2
+; RV64IBT-NEXT:    fslw a0, a0, a1, a2
 ; RV64IBT-NEXT:    sw a0, 0(a3)
 ; RV64IBT-NEXT:    ret
  %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
@ -227,7 +222,6 @@ define signext i32 @fshr_i32(i32 signext %a, i32 signext %b, i32 signext %c) nou
 }

 ; Similar to fshr_i32 but result is not sign extended.
-; FIXME: This should use fsrw
 define void @fshr_i32_nosext(i32 signext %a, i32 signext %b, i32 signext %c, i32* %x) nounwind {
 ; RV64I-LABEL: fshr_i32_nosext:
 ; RV64I:       # %bb.0:
@ -242,19 +236,15 @@ define void @fshr_i32_nosext(i32 signext %a, i32 signext %b, i32 signext %c, i32
 ;
 ; RV64IB-LABEL: fshr_i32_nosext:
 ; RV64IB:       # %bb.0:
-; RV64IB-NEXT:    slli a1, a1, 32
-; RV64IB-NEXT:    ori a2, a2, 32
-; RV64IB-NEXT:    andi a2, a2, 63
-; RV64IB-NEXT:    fsr a0, a1, a0, a2
+; RV64IB-NEXT:    andi a2, a2, 31
+; RV64IB-NEXT:    fsrw a0, a1, a0, a2
 ; RV64IB-NEXT:    sw a0, 0(a3)
 ; RV64IB-NEXT:    ret
 ;
 ; RV64IBT-LABEL: fshr_i32_nosext:
 ; RV64IBT:       # %bb.0:
-; RV64IBT-NEXT:    slli a1, a1, 32
-; RV64IBT-NEXT:    ori a2, a2, 32
-; RV64IBT-NEXT:    andi a2, a2, 63
-; RV64IBT-NEXT:    fsr a0, a1, a0, a2
+; RV64IBT-NEXT:    andi a2, a2, 31
+; RV64IBT-NEXT:    fsrw a0, a1, a0, a2
 ; RV64IBT-NEXT:    sw a0, 0(a3)
 ; RV64IBT-NEXT:    ret
  %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c)
@ -312,7 +302,6 @@ define signext i32 @fshri_i32(i32 signext %a, i32 signext %b) nounwind {
 }

 ; Similar to fshr_i32 but result is not sign extended.
-; FIXME: This should use fsriw
 define void @fshri_i32_nosext(i32 signext %a, i32 signext %b, i32* %x) nounwind {
 ; RV64I-LABEL: fshri_i32_nosext:
 ; RV64I:       # %bb.0:
@ -324,15 +313,13 @@ define void @fshri_i32_nosext(i32 signext %a, i32 signext %b, i32* %x) nounwind
 ;
 ; RV64IB-LABEL: fshri_i32_nosext:
 ; RV64IB:       # %bb.0:
-; RV64IB-NEXT:    slli a1, a1, 32
-; RV64IB-NEXT:    fsri a0, a1, a0, 37
+; RV64IB-NEXT:    fsriw a0, a1, a0, 5
 ; RV64IB-NEXT:    sw a0, 0(a2)
 ; RV64IB-NEXT:    ret
 ;
 ; RV64IBT-LABEL: fshri_i32_nosext:
 ; RV64IBT:       # %bb.0:
-; RV64IBT-NEXT:    slli a1, a1, 32
-; RV64IBT-NEXT:    fsri a0, a1, a0, 37
+; RV64IBT-NEXT:    fsriw a0, a1, a0, 5
 ; RV64IBT-NEXT:    sw a0, 0(a2)
 ; RV64IBT-NEXT:    ret
  %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 5)
@ -384,7 +371,6 @@ define signext i32 @fshli_i32(i32 signext %a, i32 signext %b) nounwind {
 }

 ; Similar to fshl_i32 but result is not sign extended.
-; FIXME: This should use fsriw
 define void @fshli_i32_nosext(i32 signext %a, i32 signext %b, i32* %x) nounwind {
 ; RV64I-LABEL: fshli_i32_nosext:
 ; RV64I:       # %bb.0:
@ -396,15 +382,13 @@ define void @fshli_i32_nosext(i32 signext %a, i32 signext %b, i32* %x) nounwind
 ;
 ; RV64IB-LABEL: fshli_i32_nosext:
 ; RV64IB:       # %bb.0:
-; RV64IB-NEXT:    slli a1, a1, 32
-; RV64IB-NEXT:    fsri a0, a1, a0, 59
+; RV64IB-NEXT:    fsriw a0, a1, a0, 27
 ; RV64IB-NEXT:    sw a0, 0(a2)
 ; RV64IB-NEXT:    ret
 ;
 ; RV64IBT-LABEL: fshli_i32_nosext:
 ; RV64IBT:       # %bb.0:
-; RV64IBT-NEXT:    slli a1, a1, 32
-; RV64IBT-NEXT:    fsri a0, a1, a0, 59
+; RV64IBT-NEXT:    fsriw a0, a1, a0, 27
 ; RV64IBT-NEXT:    sw a0, 0(a2)
 ; RV64IBT-NEXT:    ret
  %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 5)