[RISCV] Select (srl (sext_inreg X, i32), uimm5) to SRAIW if only lower 32 bits are used.

SimplifyDemandedBits can turn srl into sra if the bits being shifted in aren't demanded. This patch can recover the original sra in some cases. I've renamed the tablegen class for detecting W users since the "overflowing operator" term I originally borrowed from Operator.h does not include srl. Reviewed By: luismarques Differential Revision: https://reviews.llvm.org/D109162
2021-09-16 10:37:55 -07:00 · 2021-09-16 10:37:55 -07:00 · 73e5b9ea90
parent b4fa71eed3
commit 73e5b9ea90
5 changed files with 19 additions and 16 deletions
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@ -1536,6 +1536,7 @@ bool RISCVDAGToDAGISel::selectZExti32(SDValue N, SDValue &Val) {
 bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const {
  assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
          Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
+          Node->getOpcode() == ISD::SRL ||
          Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
          isa<ConstantSDNode>(Node)) &&
         "Unexpected opcode");
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@ -1358,7 +1358,7 @@ def : Pat<(i64 (shl (and GPR:$rs1, 0xffffffff), uimm5:$shamt)),

 // PatFrag to allow ADDW/SUBW/MULW/SLLW to be selected from i64 add/sub/mul/shl
 // if only the lower 32 bits of their result is used.
-class overflowingbinopw<SDPatternOperator operator>
+class binop_allwusers<SDPatternOperator operator>
    : PatFrag<(ops node:$lhs, node:$rhs),
              (operator node:$lhs, node:$rhs), [{
  return hasAllWUsers(Node);
@ -1393,12 +1393,17 @@ def : PatGprGpr<shiftopw<riscv_sllw>, SLLW>;
 def : PatGprGpr<shiftopw<riscv_srlw>, SRLW>;
 def : PatGprGpr<shiftopw<riscv_sraw>, SRAW>;

-// Select W instructions without sext_inreg if only the lower 32 bits of the
-// result are used.
-def : PatGprGpr<overflowingbinopw<add>, ADDW>;
-def : PatGprSimm12<overflowingbinopw<add>, ADDIW>;
-def : PatGprGpr<overflowingbinopw<sub>, SUBW>;
-def : PatGprImm<overflowingbinopw<shl>, SLLIW, uimm5>;
+// Select W instructions if only the lower 32 bits of the result are used.
+def : PatGprGpr<binop_allwusers<add>, ADDW>;
+def : PatGprSimm12<binop_allwusers<add>, ADDIW>;
+def : PatGprGpr<binop_allwusers<sub>, SUBW>;
+def : PatGprImm<binop_allwusers<shl>, SLLIW, uimm5>;
+
+// If this is a shr of a value sign extended from i32, and all the users only
+// use the lower 32 bits, we can use an sraiw to remove the sext_inreg. This
+// occurs because SimplifyDemandedBits prefers srl over sra.
+def : Pat<(binop_allwusers<srl> (sext_inreg GPR:$rs1, i32), uimm5:$shamt),
+          (SRAIW GPR:$rs1, uimm5:$shamt)>;

 /// Loads

@ -1441,7 +1446,7 @@ def : Pat<(add GPR:$rs1, (AddiPair:$rs2)),

 let Predicates = [IsRV64] in {
 // Select W instructions if only the lower 32-bits of the result are used.
-def : Pat<(overflowingbinopw<add> GPR:$rs1, (AddiPair:$rs2)),
+def : Pat<(binop_allwusers<add> GPR:$rs1, (AddiPair:$rs2)),
          (ADDIW (ADDIW GPR:$rs1, (AddiPairImmB AddiPair:$rs2)),
                 (AddiPairImmA AddiPair:$rs2))>;
 }
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
@ -72,9 +72,8 @@ def : PatGprGpr<urem, REMU>;
 } // Predicates = [HasStdExtM]

 let Predicates = [HasStdExtM, IsRV64] in {
-// Select W instructions without sext_inreg if only the lower 32-bits of the
-// result are used.
-def : PatGprGpr<overflowingbinopw<mul>, MULW>;
+// Select W instructions if only the lower 32-bits of the result are used.
+def : PatGprGpr<binop_allwusers<mul>, MULW>;

 def : PatGprGpr<riscv_divw, DIVW>;
 def : PatGprGpr<riscv_divuw, DIVUW>;
--- a/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll
+++ b/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll
@ -1964,8 +1964,7 @@ define zeroext i32 @zext_sraiw_aext(i32 %a) nounwind {
 ;
 ; RV64ZBA-LABEL: zext_sraiw_aext:
 ; RV64ZBA:       # %bb.0:
-; RV64ZBA-NEXT:    sext.w a0, a0
-; RV64ZBA-NEXT:    srli a0, a0, 7
+; RV64ZBA-NEXT:    sraiw a0, a0, 7
 ; RV64ZBA-NEXT:    zext.w a0, a0
 ; RV64ZBA-NEXT:    ret
  %1 = ashr i32 %a, 7
@ -1999,8 +1998,7 @@ define zeroext i32 @zext_sraiw_zext(i32 zeroext %a) nounwind {
 ;
 ; RV64ZBA-LABEL: zext_sraiw_zext:
 ; RV64ZBA:       # %bb.0:
-; RV64ZBA-NEXT:    sext.w a0, a0
-; RV64ZBA-NEXT:    srli a0, a0, 9
+; RV64ZBA-NEXT:    sraiw a0, a0, 9
 ; RV64ZBA-NEXT:    zext.w a0, a0
 ; RV64ZBA-NEXT:    ret
  %1 = ashr i32 %a, 9
--- a/llvm/test/CodeGen/RISCV/srem-lkk.ll
+++ b/llvm/test/CodeGen/RISCV/srem-lkk.ll
@ -53,7 +53,7 @@ define i32 @fold_srem_positive_odd(i32 %x) nounwind {
 ; RV64IM-NEXT:    srli a1, a1, 32
 ; RV64IM-NEXT:    addw a1, a1, a0
 ; RV64IM-NEXT:    srliw a2, a1, 31
-; RV64IM-NEXT:    srli a1, a1, 6
+; RV64IM-NEXT:    sraiw a1, a1, 6
 ; RV64IM-NEXT:    addw a1, a1, a2
 ; RV64IM-NEXT:    addi a2, zero, 95
 ; RV64IM-NEXT:    mulw a1, a1, a2