forked from OSchip/llvm-project
[RISCV] Add more sign-extending ops to MIR sext.w pass.
This patch adds single-bit and bit-counting ops to list of sign-extending ops. A single-bit write propagates sign-extendedness if it's not in the sign-bits. Bit extraction and bit counting always outputs a small number, so sign-extended. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D121152
This commit is contained in:
parent
53491e4519
commit
7afa44f5f5
|
@ -91,7 +91,7 @@ static bool isSignExtendingOpW(const MachineInstr &MI) {
|
|||
case RISCV::FCVT_WU_D:
|
||||
case RISCV::FMV_X_W:
|
||||
// The following aren't W instructions, but are either sign extended from a
|
||||
// smaller size or put zeros in bits 63:31.
|
||||
// smaller size, always outputs a small integer, or put zeros in bits 63:31.
|
||||
case RISCV::LBU:
|
||||
case RISCV::LHU:
|
||||
case RISCV::LB:
|
||||
|
@ -104,6 +104,11 @@ static bool isSignExtendingOpW(const MachineInstr &MI) {
|
|||
case RISCV::SEXT_H:
|
||||
case RISCV::ZEXT_H_RV64:
|
||||
case RISCV::FMV_X_H:
|
||||
case RISCV::BEXT:
|
||||
case RISCV::BEXTI:
|
||||
case RISCV::CLZ:
|
||||
case RISCV::CPOP:
|
||||
case RISCV::CTZ:
|
||||
return true;
|
||||
// shifting right sufficiently makes the value 32-bit sign-extended
|
||||
case RISCV::SRAI:
|
||||
|
@ -167,14 +172,21 @@ static bool isSignExtendedW(const MachineInstr &OrigMI,
|
|||
Worklist.push_back(SrcMI);
|
||||
break;
|
||||
}
|
||||
|
||||
// For these, we just need to check if the 1st operand is sign extended.
|
||||
case RISCV::BCLRI:
|
||||
case RISCV::BINVI:
|
||||
case RISCV::BSETI:
|
||||
if (MI->getOperand(2).getImm() >= 31)
|
||||
return false;
|
||||
LLVM_FALLTHROUGH;
|
||||
case RISCV::REM:
|
||||
case RISCV::ANDI:
|
||||
case RISCV::ORI:
|
||||
case RISCV::XORI: {
|
||||
// |Remainder| is always <= |Dividend|. If D is 32-bit, then so is R.
|
||||
// DIV doesn't work because of the edge case 0xf..f 8000 0000 / (long)-1
|
||||
// Logical operations use a sign extended 12-bit immediate. We just need
|
||||
// to check if the other operand is sign extended.
|
||||
// Logical operations use a sign extended 12-bit immediate.
|
||||
Register SrcReg = MI->getOperand(1).getReg();
|
||||
if (!SrcReg.isVirtual())
|
||||
return false;
|
||||
|
|
|
@ -315,17 +315,108 @@ bb7: ; preds = %bb2
|
|||
}
|
||||
declare float @baz(i32 signext %i3)
|
||||
|
||||
define void @test7(i32 signext %arg, i32 signext %arg1) nounwind {
|
||||
; RV64I-LABEL: test7:
|
||||
; RV64I: # %bb.0: # %bb
|
||||
; RV64I-NEXT: addi sp, sp, -48
|
||||
; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
|
||||
; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
|
||||
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
|
||||
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
|
||||
; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
|
||||
; RV64I-NEXT: lui a2, %hi(.LCPI6_0)
|
||||
; RV64I-NEXT: ld s0, %lo(.LCPI6_0)(a2)
|
||||
; RV64I-NEXT: lui a2, %hi(.LCPI6_1)
|
||||
; RV64I-NEXT: ld s1, %lo(.LCPI6_1)(a2)
|
||||
; RV64I-NEXT: lui a2, %hi(.LCPI6_2)
|
||||
; RV64I-NEXT: ld s2, %lo(.LCPI6_2)(a2)
|
||||
; RV64I-NEXT: lui a2, %hi(.LCPI6_3)
|
||||
; RV64I-NEXT: ld s3, %lo(.LCPI6_3)(a2)
|
||||
; RV64I-NEXT: sraw a0, a0, a1
|
||||
; RV64I-NEXT: .LBB6_1: # %bb2
|
||||
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; RV64I-NEXT: call foo@plt
|
||||
; RV64I-NEXT: srli a1, a0, 1
|
||||
; RV64I-NEXT: and a1, a1, s0
|
||||
; RV64I-NEXT: sub a0, a0, a1
|
||||
; RV64I-NEXT: and a1, a0, s1
|
||||
; RV64I-NEXT: srli a0, a0, 2
|
||||
; RV64I-NEXT: and a0, a0, s1
|
||||
; RV64I-NEXT: add a0, a1, a0
|
||||
; RV64I-NEXT: srli a1, a0, 4
|
||||
; RV64I-NEXT: add a0, a0, a1
|
||||
; RV64I-NEXT: and a0, a0, s2
|
||||
; RV64I-NEXT: mul a0, a0, s3
|
||||
; RV64I-NEXT: srli a0, a0, 56
|
||||
; RV64I-NEXT: bnez a0, .LBB6_1
|
||||
; RV64I-NEXT: # %bb.2: # %bb7
|
||||
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
|
||||
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
|
||||
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
|
||||
; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
|
||||
; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
|
||||
; RV64I-NEXT: addi sp, sp, 48
|
||||
; RV64I-NEXT: ret
|
||||
;
|
||||
; RV64ZBB-LABEL: test7:
|
||||
; RV64ZBB: # %bb.0: # %bb
|
||||
; RV64ZBB-NEXT: addi sp, sp, -16
|
||||
; RV64ZBB-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
|
||||
; RV64ZBB-NEXT: sraw a0, a0, a1
|
||||
; RV64ZBB-NEXT: .LBB6_1: # %bb2
|
||||
; RV64ZBB-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; RV64ZBB-NEXT: call foo@plt
|
||||
; RV64ZBB-NEXT: cpop a0, a0
|
||||
; RV64ZBB-NEXT: bnez a0, .LBB6_1
|
||||
; RV64ZBB-NEXT: # %bb.2: # %bb7
|
||||
; RV64ZBB-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
|
||||
; RV64ZBB-NEXT: addi sp, sp, 16
|
||||
; RV64ZBB-NEXT: ret
|
||||
;
|
||||
; NOREMOVAL-LABEL: test7:
|
||||
; NOREMOVAL: # %bb.0: # %bb
|
||||
; NOREMOVAL-NEXT: addi sp, sp, -16
|
||||
; NOREMOVAL-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
|
||||
; NOREMOVAL-NEXT: sraw a0, a0, a1
|
||||
; NOREMOVAL-NEXT: .LBB6_1: # %bb2
|
||||
; NOREMOVAL-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; NOREMOVAL-NEXT: sext.w a0, a0
|
||||
; NOREMOVAL-NEXT: call foo@plt
|
||||
; NOREMOVAL-NEXT: cpop a0, a0
|
||||
; NOREMOVAL-NEXT: bnez a0, .LBB6_1
|
||||
; NOREMOVAL-NEXT: # %bb.2: # %bb7
|
||||
; NOREMOVAL-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
|
||||
; NOREMOVAL-NEXT: addi sp, sp, 16
|
||||
; NOREMOVAL-NEXT: ret
|
||||
bb:
|
||||
%i = ashr i32 %arg, %arg1
|
||||
br label %bb2
|
||||
|
||||
bb2: ; preds = %bb2, %bb
|
||||
%i3 = phi i32 [ %i, %bb ], [ %i6, %bb2 ]
|
||||
%i4 = tail call signext i64 @foo(i32 signext %i3)
|
||||
%i5 = tail call i64 @llvm.ctpop.i64(i64 %i4)
|
||||
%i6 = trunc i64 %i5 to i32
|
||||
%i7 = icmp eq i32 %i6, 0
|
||||
br i1 %i7, label %bb7, label %bb2
|
||||
|
||||
bb7: ; preds = %bb2
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i64 @llvm.ctpop.i64(i64)
|
||||
|
||||
define void @test8(i32 signext %arg, i32 signext %arg1) nounwind {
|
||||
; CHECK-LABEL: test8:
|
||||
; CHECK: # %bb.0: # %bb
|
||||
; CHECK-NEXT: addi sp, sp, -16
|
||||
; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
|
||||
; CHECK-NEXT: sraw a0, a0, a1
|
||||
; CHECK-NEXT: .LBB6_1: # %bb2
|
||||
; CHECK-NEXT: .LBB7_1: # %bb2
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: call foo@plt
|
||||
; CHECK-NEXT: ori a0, a0, -256
|
||||
; CHECK-NEXT: bnez a0, .LBB6_1
|
||||
; CHECK-NEXT: bnez a0, .LBB7_1
|
||||
; CHECK-NEXT: # %bb.2: # %bb7
|
||||
; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
|
||||
; CHECK-NEXT: addi sp, sp, 16
|
||||
|
@ -336,12 +427,12 @@ define void @test8(i32 signext %arg, i32 signext %arg1) nounwind {
|
|||
; NOREMOVAL-NEXT: addi sp, sp, -16
|
||||
; NOREMOVAL-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
|
||||
; NOREMOVAL-NEXT: sraw a0, a0, a1
|
||||
; NOREMOVAL-NEXT: .LBB6_1: # %bb2
|
||||
; NOREMOVAL-NEXT: .LBB7_1: # %bb2
|
||||
; NOREMOVAL-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; NOREMOVAL-NEXT: sext.w a0, a0
|
||||
; NOREMOVAL-NEXT: call foo@plt
|
||||
; NOREMOVAL-NEXT: ori a0, a0, -256
|
||||
; NOREMOVAL-NEXT: bnez a0, .LBB6_1
|
||||
; NOREMOVAL-NEXT: bnez a0, .LBB7_1
|
||||
; NOREMOVAL-NEXT: # %bb.2: # %bb7
|
||||
; NOREMOVAL-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
|
||||
; NOREMOVAL-NEXT: addi sp, sp, 16
|
||||
|
@ -372,12 +463,12 @@ define void @test9(i32 signext %arg, i32 signext %arg1) nounwind {
|
|||
; CHECK-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
|
||||
; CHECK-NEXT: sraw a0, a0, a1
|
||||
; CHECK-NEXT: li s0, 254
|
||||
; CHECK-NEXT: .LBB7_1: # %bb2
|
||||
; CHECK-NEXT: .LBB8_1: # %bb2
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: call bar@plt
|
||||
; CHECK-NEXT: mv a1, a0
|
||||
; CHECK-NEXT: slti a0, a0, 255
|
||||
; CHECK-NEXT: blt s0, a1, .LBB7_1
|
||||
; CHECK-NEXT: blt s0, a1, .LBB8_1
|
||||
; CHECK-NEXT: # %bb.2: # %bb7
|
||||
; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
|
||||
; CHECK-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
|
||||
|
@ -391,12 +482,12 @@ define void @test9(i32 signext %arg, i32 signext %arg1) nounwind {
|
|||
; NOREMOVAL-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
|
||||
; NOREMOVAL-NEXT: sraw a1, a0, a1
|
||||
; NOREMOVAL-NEXT: li s0, 254
|
||||
; NOREMOVAL-NEXT: .LBB7_1: # %bb2
|
||||
; NOREMOVAL-NEXT: .LBB8_1: # %bb2
|
||||
; NOREMOVAL-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; NOREMOVAL-NEXT: sext.w a0, a1
|
||||
; NOREMOVAL-NEXT: call bar@plt
|
||||
; NOREMOVAL-NEXT: slti a1, a0, 255
|
||||
; NOREMOVAL-NEXT: blt s0, a0, .LBB7_1
|
||||
; NOREMOVAL-NEXT: blt s0, a0, .LBB8_1
|
||||
; NOREMOVAL-NEXT: # %bb.2: # %bb7
|
||||
; NOREMOVAL-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
|
||||
; NOREMOVAL-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
|
||||
|
@ -426,12 +517,12 @@ define void @test10(i32 signext %arg, i32 signext %arg1) nounwind {
|
|||
; CHECK-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
|
||||
; CHECK-NEXT: sraw a0, a0, a1
|
||||
; CHECK-NEXT: fmv.w.x fs0, zero
|
||||
; CHECK-NEXT: .LBB8_1: # %bb2
|
||||
; CHECK-NEXT: .LBB9_1: # %bb2
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: call baz@plt
|
||||
; CHECK-NEXT: feq.s a1, fa0, fs0
|
||||
; CHECK-NEXT: fmv.x.w a0, fa0
|
||||
; CHECK-NEXT: beqz a1, .LBB8_1
|
||||
; CHECK-NEXT: beqz a1, .LBB9_1
|
||||
; CHECK-NEXT: # %bb.2: # %bb7
|
||||
; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
|
||||
; CHECK-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
|
||||
|
@ -445,13 +536,13 @@ define void @test10(i32 signext %arg, i32 signext %arg1) nounwind {
|
|||
; NOREMOVAL-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
|
||||
; NOREMOVAL-NEXT: sraw a0, a0, a1
|
||||
; NOREMOVAL-NEXT: fmv.w.x fs0, zero
|
||||
; NOREMOVAL-NEXT: .LBB8_1: # %bb2
|
||||
; NOREMOVAL-NEXT: .LBB9_1: # %bb2
|
||||
; NOREMOVAL-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; NOREMOVAL-NEXT: sext.w a0, a0
|
||||
; NOREMOVAL-NEXT: call baz@plt
|
||||
; NOREMOVAL-NEXT: feq.s a1, fa0, fs0
|
||||
; NOREMOVAL-NEXT: fmv.x.w a0, fa0
|
||||
; NOREMOVAL-NEXT: beqz a1, .LBB8_1
|
||||
; NOREMOVAL-NEXT: beqz a1, .LBB9_1
|
||||
; NOREMOVAL-NEXT: # %bb.2: # %bb7
|
||||
; NOREMOVAL-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
|
||||
; NOREMOVAL-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
|
||||
|
|
Loading…
Reference in New Issue