[RISCV] ISel (and (shift X, C1), C2)) to shift pair in more cases

Previously, these isel optimizations were disabled if the AND could
be selected as a ANDI instruction. This patch disables the optimizations
only if the immediate is valid for C.ANDI. If we can't use C.ANDI,
we might be able to compress the shift instructions instead.

I'm not checking the C extension since we have relatively poor test
coverage of the C extension. Without C extension the code size
should be equal. My only concern would be if the shift+andi had
better latency/throughput on a particular CPU.

I did have to add a peephole to match SRLIW if the input is zexti32
to prevent a regression in rv64zbp.ll.

Reviewed By: luismarques

Differential Revision: https://reviews.llvm.org/D122701
This commit is contained in:
Craig Topper 2022-03-30 11:37:34 -07:00
parent 5fd0925bc9
commit 4477500533
6 changed files with 44 additions and 33 deletions

View File

@ -724,8 +724,12 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
uint64_t C1 = N1C->getZExtValue();
// Keep track of whether this is an andi.
bool IsANDI = isInt<12>(N1C->getSExtValue());
// Keep track of whether this is a c.andi. If we can't use c.andi, the
// shift pair might offer more compression opportunities.
// TODO: We could check for C extension here, but we don't have many lit
// tests with the C extension enabled so not checking gets better coverage.
// TODO: What if ANDI faster than shift?
bool IsCANDI = isInt<6>(N1C->getSExtValue());
// Clear irrelevant bits in the mask.
if (LeftShift)
@ -776,7 +780,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
bool Skip = Subtarget->hasStdExtZba() && C3 == 32 &&
X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
if (OneUseOrZExtW && !IsANDI && !Skip) {
if (OneUseOrZExtW && !IsCANDI && !Skip) {
SDNode *SLLI = CurDAG->getMachineNode(
RISCV::SLLI, DL, XLenVT, X,
CurDAG->getTargetConstant(C3 - C2, DL, XLenVT));
@ -806,7 +810,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
}
// (srli (slli c2+c3), c3)
if (OneUseOrZExtW && !IsANDI) {
if (OneUseOrZExtW && !IsCANDI) {
SDNode *SLLI = CurDAG->getMachineNode(
RISCV::SLLI, DL, XLenVT, X,
CurDAG->getTargetConstant(C2 + C3, DL, XLenVT));
@ -824,9 +828,16 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
if (!LeftShift && isShiftedMask_64(C1)) {
uint64_t Leading = XLen - (64 - countLeadingZeros(C1));
uint64_t C3 = countTrailingZeros(C1);
if (Leading == C2 && C2 + C3 < XLen && OneUseOrZExtW && !IsANDI) {
if (Leading == C2 && C2 + C3 < XLen && OneUseOrZExtW && !IsCANDI) {
unsigned SrliOpc = RISCV::SRLI;
// If the input is zexti32 we should use SRLIW.
if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&
X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {
SrliOpc = RISCV::SRLIW;
X = X.getOperand(0);
}
SDNode *SRLI = CurDAG->getMachineNode(
RISCV::SRLI, DL, XLenVT, X,
SrliOpc, DL, XLenVT, X,
CurDAG->getTargetConstant(C2 + C3, DL, XLenVT));
SDNode *SLLI =
CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLI, 0),
@ -836,7 +847,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
}
// If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
if (Leading > 32 && (Leading - 32) == C2 && C2 + C3 < 32 &&
OneUseOrZExtW && !IsANDI) {
OneUseOrZExtW && !IsCANDI) {
SDNode *SRLIW = CurDAG->getMachineNode(
RISCV::SRLIW, DL, XLenVT, X,
CurDAG->getTargetConstant(C2 + C3, DL, XLenVT));
@ -853,7 +864,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
if (LeftShift && isShiftedMask_64(C1)) {
uint64_t Leading = XLen - (64 - countLeadingZeros(C1));
uint64_t C3 = countTrailingZeros(C1);
if (Leading == 0 && C2 < C3 && OneUseOrZExtW && !IsANDI) {
if (Leading == 0 && C2 < C3 && OneUseOrZExtW && !IsCANDI) {
SDNode *SRLI = CurDAG->getMachineNode(
RISCV::SRLI, DL, XLenVT, X,
CurDAG->getTargetConstant(C3 - C2, DL, XLenVT));
@ -864,7 +875,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
return;
}
// If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
if (C2 < C3 && Leading + C2 == 32 && OneUseOrZExtW && !IsANDI) {
if (C2 < C3 && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
SDNode *SRLIW = CurDAG->getMachineNode(
RISCV::SRLIW, DL, XLenVT, X,
CurDAG->getTargetConstant(C3 - C2, DL, XLenVT));

View File

@ -120,8 +120,8 @@ define i8 @test_bitreverse_shli_bitreverse_i8(i8 %a) nounwind {
; RV32ZBKB: # %bb.0:
; RV32ZBKB-NEXT: rev8 a0, a0
; RV32ZBKB-NEXT: brev8 a0, a0
; RV32ZBKB-NEXT: srli a0, a0, 21
; RV32ZBKB-NEXT: andi a0, a0, 2040
; RV32ZBKB-NEXT: srli a0, a0, 24
; RV32ZBKB-NEXT: slli a0, a0, 3
; RV32ZBKB-NEXT: rev8 a0, a0
; RV32ZBKB-NEXT: brev8 a0, a0
; RV32ZBKB-NEXT: srli a0, a0, 24
@ -131,8 +131,8 @@ define i8 @test_bitreverse_shli_bitreverse_i8(i8 %a) nounwind {
; RV64ZBKB: # %bb.0:
; RV64ZBKB-NEXT: rev8 a0, a0
; RV64ZBKB-NEXT: brev8 a0, a0
; RV64ZBKB-NEXT: srli a0, a0, 53
; RV64ZBKB-NEXT: andi a0, a0, 2040
; RV64ZBKB-NEXT: srli a0, a0, 56
; RV64ZBKB-NEXT: slli a0, a0, 3
; RV64ZBKB-NEXT: rev8 a0, a0
; RV64ZBKB-NEXT: brev8 a0, a0
; RV64ZBKB-NEXT: srli a0, a0, 56
@ -148,8 +148,8 @@ define i16 @test_bitreverse_shli_bitreverse_i16(i16 %a) nounwind {
; RV32ZBKB: # %bb.0:
; RV32ZBKB-NEXT: rev8 a0, a0
; RV32ZBKB-NEXT: brev8 a0, a0
; RV32ZBKB-NEXT: srli a0, a0, 9
; RV32ZBKB-NEXT: andi a0, a0, -128
; RV32ZBKB-NEXT: srli a0, a0, 16
; RV32ZBKB-NEXT: slli a0, a0, 7
; RV32ZBKB-NEXT: rev8 a0, a0
; RV32ZBKB-NEXT: brev8 a0, a0
; RV32ZBKB-NEXT: srli a0, a0, 16
@ -159,8 +159,8 @@ define i16 @test_bitreverse_shli_bitreverse_i16(i16 %a) nounwind {
; RV64ZBKB: # %bb.0:
; RV64ZBKB-NEXT: rev8 a0, a0
; RV64ZBKB-NEXT: brev8 a0, a0
; RV64ZBKB-NEXT: srli a0, a0, 41
; RV64ZBKB-NEXT: andi a0, a0, -128
; RV64ZBKB-NEXT: srli a0, a0, 48
; RV64ZBKB-NEXT: slli a0, a0, 7
; RV64ZBKB-NEXT: rev8 a0, a0
; RV64ZBKB-NEXT: brev8 a0, a0
; RV64ZBKB-NEXT: srli a0, a0, 48

View File

@ -123,8 +123,8 @@ define i16 @test_bswap_shli_7_bswap_i16(i16 %a) nounwind {
; RV32ZB-LABEL: test_bswap_shli_7_bswap_i16:
; RV32ZB: # %bb.0:
; RV32ZB-NEXT: rev8 a0, a0
; RV32ZB-NEXT: srli a0, a0, 9
; RV32ZB-NEXT: andi a0, a0, -128
; RV32ZB-NEXT: srli a0, a0, 16
; RV32ZB-NEXT: slli a0, a0, 7
; RV32ZB-NEXT: rev8 a0, a0
; RV32ZB-NEXT: srli a0, a0, 16
; RV32ZB-NEXT: ret
@ -132,8 +132,8 @@ define i16 @test_bswap_shli_7_bswap_i16(i16 %a) nounwind {
; RV64ZB-LABEL: test_bswap_shli_7_bswap_i16:
; RV64ZB: # %bb.0:
; RV64ZB-NEXT: rev8 a0, a0
; RV64ZB-NEXT: srli a0, a0, 41
; RV64ZB-NEXT: andi a0, a0, -128
; RV64ZB-NEXT: srli a0, a0, 48
; RV64ZB-NEXT: slli a0, a0, 7
; RV64ZB-NEXT: rev8 a0, a0
; RV64ZB-NEXT: srli a0, a0, 48
; RV64ZB-NEXT: ret

View File

@ -2758,8 +2758,8 @@ define i32 @bswap_rotr_i32(i32 %a) {
; RV64I-NEXT: slli a2, a0, 24
; RV64I-NEXT: or a1, a2, a1
; RV64I-NEXT: srliw a2, a0, 24
; RV64I-NEXT: srliw a0, a0, 8
; RV64I-NEXT: andi a0, a0, -256
; RV64I-NEXT: srliw a0, a0, 16
; RV64I-NEXT: slli a0, a0, 8
; RV64I-NEXT: or a0, a0, a2
; RV64I-NEXT: slliw a0, a0, 16
; RV64I-NEXT: srliw a1, a1, 16
@ -2779,8 +2779,8 @@ define i32 @bswap_rotl_i32(i32 %a) {
; RV64I-LABEL: bswap_rotl_i32:
; RV64I: # %bb.0:
; RV64I-NEXT: srliw a1, a0, 24
; RV64I-NEXT: srliw a2, a0, 8
; RV64I-NEXT: andi a2, a2, -256
; RV64I-NEXT: srliw a2, a0, 16
; RV64I-NEXT: slli a2, a2, 8
; RV64I-NEXT: or a1, a2, a1
; RV64I-NEXT: slli a2, a0, 8
; RV64I-NEXT: lui a3, 4080

View File

@ -31,8 +31,8 @@ define i32 @neg_sel_constants(i32 signext %a) {
define i32 @neg_sel_special_constant(i32 signext %a) {
; RV32-LABEL: neg_sel_special_constant:
; RV32: # %bb.0:
; RV32-NEXT: srli a0, a0, 22
; RV32-NEXT: andi a0, a0, 512
; RV32-NEXT: srli a0, a0, 31
; RV32-NEXT: slli a0, a0, 9
; RV32-NEXT: ret
;
; RV64-LABEL: neg_sel_special_constant:
@ -100,8 +100,8 @@ define i32 @pos_sel_special_constant(i32 signext %a) {
; RV32-LABEL: pos_sel_special_constant:
; RV32: # %bb.0:
; RV32-NEXT: not a0, a0
; RV32-NEXT: srli a0, a0, 22
; RV32-NEXT: andi a0, a0, 512
; RV32-NEXT: srli a0, a0, 31
; RV32-NEXT: slli a0, a0, 9
; RV32-NEXT: ret
;
; RV64-LABEL: pos_sel_special_constant:

View File

@ -535,8 +535,8 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind {
; RV32MV-NEXT: lw a1, 0(a0)
; RV32MV-NEXT: andi a2, a1, 2047
; RV32MV-NEXT: sh a2, 8(sp)
; RV32MV-NEXT: srli a2, a1, 11
; RV32MV-NEXT: andi a2, a2, 2047
; RV32MV-NEXT: slli a2, a1, 10
; RV32MV-NEXT: srli a2, a2, 21
; RV32MV-NEXT: sh a2, 10(sp)
; RV32MV-NEXT: lb a2, 4(a0)
; RV32MV-NEXT: slli a2, a2, 10
@ -606,8 +606,8 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind {
; RV64MV-NEXT: sh a2, 12(sp)
; RV64MV-NEXT: andi a2, a1, 2047
; RV64MV-NEXT: sh a2, 8(sp)
; RV64MV-NEXT: srli a1, a1, 11
; RV64MV-NEXT: andi a1, a1, 2047
; RV64MV-NEXT: slli a1, a1, 42
; RV64MV-NEXT: srli a1, a1, 53
; RV64MV-NEXT: sh a1, 10(sp)
; RV64MV-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
; RV64MV-NEXT: addi a1, sp, 8