[GlobalISel] Combine shr(shl x, c1), c2 to G_SBFX/G_UBFX

Reviewed By: foad

Differential Revision: https://reviews.llvm.org/D107330
This commit is contained in:
Dominik Montada 2021-08-03 09:56:32 +02:00
parent a82c7476a7
commit cc947e29ea
6 changed files with 242 additions and 7 deletions

View File

@ -541,6 +541,10 @@ public:
bool matchBitfieldExtractFromAnd(
MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo);
/// Match: shr (shl x, n), k -> sbfx/ubfx x, pos, width
bool matchBitfieldExtractFromShr(
MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo);
/// Reassociate pointer calculations with G_ADD involved, to allow better
/// addressing mode usage.
bool matchReassocPtrAdd(MachineInstr &MI,

View File

@ -652,8 +652,15 @@ def bitfield_extract_from_sext_inreg : GICombineRule<
[{ return Helper.matchBitfieldExtractFromSExtInReg(*${root}, ${info}); }]),
(apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
def bitfield_extract_from_shr : GICombineRule<
(defs root:$root, build_fn_matchinfo:$info),
(match (wip_match_opcode G_ASHR, G_LSHR):$root,
[{ return Helper.matchBitfieldExtractFromShr(*${root}, ${info}); }]),
(apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
def form_bitfield_extract : GICombineGroup<[bitfield_extract_from_sext_inreg,
bitfield_extract_from_and]>;
bitfield_extract_from_and,
bitfield_extract_from_shr]>;
def reassoc_ptradd : GICombineRule<
(defs root:$root, build_fn_matchinfo:$matchinfo),
(match (wip_match_opcode G_PTR_ADD):$root,

View File

@ -4128,6 +4128,55 @@ bool CombinerHelper::matchBitfieldExtractFromAnd(
return true;
}
bool CombinerHelper::matchBitfieldExtractFromShr(
MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
const unsigned Opcode = MI.getOpcode();
assert(Opcode == TargetOpcode::G_ASHR || Opcode == TargetOpcode::G_LSHR);
const Register Dst = MI.getOperand(0).getReg();
const unsigned ExtrOpcode = Opcode == TargetOpcode::G_ASHR
? TargetOpcode::G_SBFX
: TargetOpcode::G_UBFX;
// Check if the type we would use for the extract is legal
LLT Ty = MRI.getType(Dst);
LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
if (!LI || !LI->isLegalOrCustom({ExtrOpcode, {Ty, ExtractTy}}))
return false;
Register ShlSrc;
int64_t ShrAmt;
int64_t ShlAmt;
const unsigned Size = Ty.getScalarSizeInBits();
// Try to match shr (shl x, c1), c2
if (!mi_match(Dst, MRI,
m_BinOp(Opcode,
m_OneNonDBGUse(m_GShl(m_Reg(ShlSrc), m_ICst(ShlAmt))),
m_ICst(ShrAmt))))
return false;
// Make sure that the shift sizes can fit a bitfield extract
if (ShlAmt < 0 || ShlAmt > ShrAmt || ShrAmt >= Size)
return false;
// Skip this combine if the G_SEXT_INREG combine could handle it
if (Opcode == TargetOpcode::G_ASHR && ShlAmt == ShrAmt)
return false;
// Calculate start position and width of the extract
const int64_t Pos = ShrAmt - ShlAmt;
const int64_t Width = Size - ShrAmt;
MatchInfo = [=](MachineIRBuilder &B) {
auto WidthCst = B.buildConstant(ExtractTy, Width);
auto PosCst = B.buildConstant(ExtractTy, Pos);
B.buildInstr(ExtrOpcode, {Dst}, {ShlSrc, PosCst, WidthCst});
};
return true;
}
bool CombinerHelper::reassociationCanBreakAddressingModePattern(
MachineInstr &PtrAdd) {
assert(PtrAdd.getOpcode() == TargetOpcode::G_PTR_ADD);

View File

@ -0,0 +1,178 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
# Check that we can fold a G_ASHR/G_LSHR fed by a G_SHL into a G_SBFX/G_UBFX.
---
name: apply_ashr_shl_to_sbfx
legalized: true
body: |
bb.0.entry:
; CHECK-LABEL: name: apply_ashr_shl_to_sbfx
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
; CHECK: [[SBFX:%[0-9]+]]:_(s32) = G_SBFX [[COPY]], [[C1]](s32), [[C]]
; CHECK: $w0 = COPY [[SBFX]](s32)
%0:_(s32) = COPY $w0
%1:_(s32) = G_CONSTANT i32 5
%2:_(s32) = G_CONSTANT i32 16
%3:_(s32) = G_SHL %0, %1
%4:_(s32) = G_ASHR %3, %2
$w0 = COPY %4(s32)
...
---
name: apply_ashr_shl_to_sbfx_lower_bound
legalized: true
body: |
bb.0.entry:
; CHECK-LABEL: name: apply_ashr_shl_to_sbfx_lower_bound
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK: [[SBFX:%[0-9]+]]:_(s32) = G_SBFX [[COPY]], [[C]](s32), [[C]]
; CHECK: $w0 = COPY [[SBFX]](s32)
%0:_(s32) = COPY $w0
%1:_(s32) = G_CONSTANT i32 30
%2:_(s32) = G_CONSTANT i32 31
%3:_(s32) = G_SHL %0, %1
%4:_(s32) = G_ASHR %3, %2
$w0 = COPY %4(s32)
...
---
name: apply_ashr_shl_to_sbfx_upper_bound
legalized: true
body: |
bb.0.entry:
; CHECK-LABEL: name: apply_ashr_shl_to_sbfx_upper_bound
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 30
; CHECK: [[SBFX:%[0-9]+]]:_(s32) = G_SBFX [[COPY]], [[C1]](s32), [[C]]
; CHECK: $w0 = COPY [[SBFX]](s32)
%0:_(s32) = COPY $w0
%1:_(s32) = G_CONSTANT i32 1
%2:_(s32) = G_CONSTANT i32 31
%3:_(s32) = G_SHL %0, %1
%4:_(s32) = G_ASHR %3, %2
$w0 = COPY %4(s32)
...
---
name: apply_lshr_shl_to_ubfx
legalized: true
body: |
bb.0.entry:
; CHECK-LABEL: name: apply_lshr_shl_to_ubfx
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
; CHECK: [[UBFX:%[0-9]+]]:_(s32) = G_UBFX [[COPY]], [[C1]](s32), [[C]]
; CHECK: $w0 = COPY [[UBFX]](s32)
%0:_(s32) = COPY $w0
%1:_(s32) = G_CONSTANT i32 5
%2:_(s32) = G_CONSTANT i32 16
%3:_(s32) = G_SHL %0, %1
%4:_(s32) = G_LSHR %3, %2
$w0 = COPY %4(s32)
...
---
name: apply_lshr_shl_to_ubfx_lower_bound
legalized: true
body: |
bb.0.entry:
; CHECK-LABEL: name: apply_lshr_shl_to_ubfx_lower_bound
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK: [[UBFX:%[0-9]+]]:_(s32) = G_UBFX [[COPY]], [[C]](s32), [[C]]
; CHECK: $w0 = COPY [[UBFX]](s32)
%0:_(s32) = COPY $w0
%1:_(s32) = G_CONSTANT i32 30
%2:_(s32) = G_CONSTANT i32 31
%3:_(s32) = G_SHL %0, %1
%4:_(s32) = G_LSHR %3, %2
$w0 = COPY %4(s32)
...
---
name: apply_lshr_shl_to_ubfx_upper_bound
legalized: true
body: |
bb.0.entry:
; CHECK-LABEL: name: apply_lshr_shl_to_ubfx_upper_bound
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 30
; CHECK: [[UBFX:%[0-9]+]]:_(s32) = G_UBFX [[COPY]], [[C1]](s32), [[C]]
; CHECK: $w0 = COPY [[UBFX]](s32)
%0:_(s32) = COPY $w0
%1:_(s32) = G_CONSTANT i32 1
%2:_(s32) = G_CONSTANT i32 31
%3:_(s32) = G_SHL %0, %1
%4:_(s32) = G_LSHR %3, %2
$w0 = COPY %4(s32)
...
---
name: dont_apply_pos_out_of_bounds
legalized: true
body: |
bb.0.entry:
; CHECK-LABEL: name: dont_apply_pos_out_of_bounds
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32)
; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C1]](s32)
; CHECK: $w0 = COPY [[ASHR]](s32)
%0:_(s32) = COPY $w0
%1:_(s32) = G_CONSTANT i32 5
%2:_(s32) = G_CONSTANT i32 2
%3:_(s32) = G_SHL %0, %1
%4:_(s32) = G_ASHR %3, %2
$w0 = COPY %4(s32)
...
---
name: dont_apply_no_constant
legalized: true
body: |
bb.0.entry:
; CHECK-LABEL: name: dont_apply_no_constant
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32)
; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32)
; CHECK: $w0 = COPY [[ASHR]](s32)
%0:_(s32) = COPY $w0
%1:_(s32) = COPY $w1
%2:_(s32) = G_CONSTANT i32 2
%3:_(s32) = G_SHL %0, %1
%4:_(s32) = G_ASHR %3, %2
$w0 = COPY %4(s32)
...
---
name: dont_apply_more_than_one_use
legalized: true
body: |
bb.0.entry:
; CHECK-LABEL: name: dont_apply_more_than_one_use
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32)
; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C1]](s32)
; CHECK: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SHL]], [[ASHR]]
; CHECK: $w0 = COPY [[MUL]](s32)
%0:_(s32) = COPY $w0
%1:_(s32) = G_CONSTANT i32 5
%2:_(s32) = G_CONSTANT i32 16
%3:_(s32) = G_SHL %0, %1
%4:_(s32) = G_ASHR %3, %2
%5:_(s32) = G_MUL %3, %4
$w0 = COPY %5(s32)
...

View File

@ -156,8 +156,7 @@ define i64 @test_rev_x_srl32_shift(i64 %a) {
;
; GISEL-LABEL: test_rev_x_srl32_shift:
; GISEL: // %bb.0: // %entry
; GISEL-NEXT: lsl x8, x0, #33
; GISEL-NEXT: lsr x8, x8, #35
; GISEL-NEXT: ubfx x8, x0, #2, #29
; GISEL-NEXT: rev x8, x8
; GISEL-NEXT: lsr x0, x8, #32
; GISEL-NEXT: ret

View File

@ -399,8 +399,7 @@ define amdgpu_kernel void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_lshl_b32 s0, s0, 31
; GFX6-NEXT: s_lshr_b32 s0, s0, 31
; GFX6-NEXT: s_bfe_u32 s0, s0, 0x10000
; GFX6-NEXT: s_bfe_u32 s0, s0, 0x1001f
; GFX6-NEXT: v_mov_b32_e32 v0, s0
; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
@ -1059,8 +1058,7 @@ define amdgpu_kernel void @shl_lshr(i32 addrspace(1)* %out, i32 %a) #0 {
; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_lshl_b32 s0, s0, 9
; GFX6-NEXT: s_lshr_b32 s0, s0, 11
; GFX6-NEXT: s_bfe_u32 s0, s0, 0x150002
; GFX6-NEXT: v_mov_b32_e32 v0, s0
; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX6-NEXT: s_endpgm