forked from OSchip/llvm-project
[GlobalISel] Combine shr(shl x, c1), c2 to G_SBFX/G_UBFX
Reviewed By: foad Differential Revision: https://reviews.llvm.org/D107330
This commit is contained in:
parent
a82c7476a7
commit
cc947e29ea
|
@ -541,6 +541,10 @@ public:
|
|||
bool matchBitfieldExtractFromAnd(
|
||||
MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo);
|
||||
|
||||
/// Match: shr (shl x, n), k -> sbfx/ubfx x, pos, width
|
||||
bool matchBitfieldExtractFromShr(
|
||||
MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo);
|
||||
|
||||
/// Reassociate pointer calculations with G_ADD involved, to allow better
|
||||
/// addressing mode usage.
|
||||
bool matchReassocPtrAdd(MachineInstr &MI,
|
||||
|
|
|
@ -652,8 +652,15 @@ def bitfield_extract_from_sext_inreg : GICombineRule<
|
|||
[{ return Helper.matchBitfieldExtractFromSExtInReg(*${root}, ${info}); }]),
|
||||
(apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
|
||||
|
||||
def bitfield_extract_from_shr : GICombineRule<
|
||||
(defs root:$root, build_fn_matchinfo:$info),
|
||||
(match (wip_match_opcode G_ASHR, G_LSHR):$root,
|
||||
[{ return Helper.matchBitfieldExtractFromShr(*${root}, ${info}); }]),
|
||||
(apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
|
||||
|
||||
def form_bitfield_extract : GICombineGroup<[bitfield_extract_from_sext_inreg,
|
||||
bitfield_extract_from_and]>;
|
||||
bitfield_extract_from_and,
|
||||
bitfield_extract_from_shr]>;
|
||||
def reassoc_ptradd : GICombineRule<
|
||||
(defs root:$root, build_fn_matchinfo:$matchinfo),
|
||||
(match (wip_match_opcode G_PTR_ADD):$root,
|
||||
|
|
|
@ -4128,6 +4128,55 @@ bool CombinerHelper::matchBitfieldExtractFromAnd(
|
|||
return true;
|
||||
}
|
||||
|
||||
bool CombinerHelper::matchBitfieldExtractFromShr(
|
||||
MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
|
||||
const unsigned Opcode = MI.getOpcode();
|
||||
assert(Opcode == TargetOpcode::G_ASHR || Opcode == TargetOpcode::G_LSHR);
|
||||
|
||||
const Register Dst = MI.getOperand(0).getReg();
|
||||
|
||||
const unsigned ExtrOpcode = Opcode == TargetOpcode::G_ASHR
|
||||
? TargetOpcode::G_SBFX
|
||||
: TargetOpcode::G_UBFX;
|
||||
|
||||
// Check if the type we would use for the extract is legal
|
||||
LLT Ty = MRI.getType(Dst);
|
||||
LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
|
||||
if (!LI || !LI->isLegalOrCustom({ExtrOpcode, {Ty, ExtractTy}}))
|
||||
return false;
|
||||
|
||||
Register ShlSrc;
|
||||
int64_t ShrAmt;
|
||||
int64_t ShlAmt;
|
||||
const unsigned Size = Ty.getScalarSizeInBits();
|
||||
|
||||
// Try to match shr (shl x, c1), c2
|
||||
if (!mi_match(Dst, MRI,
|
||||
m_BinOp(Opcode,
|
||||
m_OneNonDBGUse(m_GShl(m_Reg(ShlSrc), m_ICst(ShlAmt))),
|
||||
m_ICst(ShrAmt))))
|
||||
return false;
|
||||
|
||||
// Make sure that the shift sizes can fit a bitfield extract
|
||||
if (ShlAmt < 0 || ShlAmt > ShrAmt || ShrAmt >= Size)
|
||||
return false;
|
||||
|
||||
// Skip this combine if the G_SEXT_INREG combine could handle it
|
||||
if (Opcode == TargetOpcode::G_ASHR && ShlAmt == ShrAmt)
|
||||
return false;
|
||||
|
||||
// Calculate start position and width of the extract
|
||||
const int64_t Pos = ShrAmt - ShlAmt;
|
||||
const int64_t Width = Size - ShrAmt;
|
||||
|
||||
MatchInfo = [=](MachineIRBuilder &B) {
|
||||
auto WidthCst = B.buildConstant(ExtractTy, Width);
|
||||
auto PosCst = B.buildConstant(ExtractTy, Pos);
|
||||
B.buildInstr(ExtrOpcode, {Dst}, {ShlSrc, PosCst, WidthCst});
|
||||
};
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CombinerHelper::reassociationCanBreakAddressingModePattern(
|
||||
MachineInstr &PtrAdd) {
|
||||
assert(PtrAdd.getOpcode() == TargetOpcode::G_PTR_ADD);
|
||||
|
|
|
@ -0,0 +1,178 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
|
||||
|
||||
# Check that we can fold a G_ASHR/G_LSHR fed by a G_SHL into a G_SBFX/G_UBFX.
|
||||
|
||||
---
|
||||
name: apply_ashr_shl_to_sbfx
|
||||
legalized: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
; CHECK-LABEL: name: apply_ashr_shl_to_sbfx
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
|
||||
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
|
||||
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
|
||||
; CHECK: [[SBFX:%[0-9]+]]:_(s32) = G_SBFX [[COPY]], [[C1]](s32), [[C]]
|
||||
; CHECK: $w0 = COPY [[SBFX]](s32)
|
||||
%0:_(s32) = COPY $w0
|
||||
%1:_(s32) = G_CONSTANT i32 5
|
||||
%2:_(s32) = G_CONSTANT i32 16
|
||||
%3:_(s32) = G_SHL %0, %1
|
||||
%4:_(s32) = G_ASHR %3, %2
|
||||
$w0 = COPY %4(s32)
|
||||
...
|
||||
|
||||
---
|
||||
name: apply_ashr_shl_to_sbfx_lower_bound
|
||||
legalized: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
; CHECK-LABEL: name: apply_ashr_shl_to_sbfx_lower_bound
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
|
||||
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
|
||||
; CHECK: [[SBFX:%[0-9]+]]:_(s32) = G_SBFX [[COPY]], [[C]](s32), [[C]]
|
||||
; CHECK: $w0 = COPY [[SBFX]](s32)
|
||||
%0:_(s32) = COPY $w0
|
||||
%1:_(s32) = G_CONSTANT i32 30
|
||||
%2:_(s32) = G_CONSTANT i32 31
|
||||
%3:_(s32) = G_SHL %0, %1
|
||||
%4:_(s32) = G_ASHR %3, %2
|
||||
$w0 = COPY %4(s32)
|
||||
...
|
||||
|
||||
---
|
||||
name: apply_ashr_shl_to_sbfx_upper_bound
|
||||
legalized: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
; CHECK-LABEL: name: apply_ashr_shl_to_sbfx_upper_bound
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
|
||||
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
|
||||
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 30
|
||||
; CHECK: [[SBFX:%[0-9]+]]:_(s32) = G_SBFX [[COPY]], [[C1]](s32), [[C]]
|
||||
; CHECK: $w0 = COPY [[SBFX]](s32)
|
||||
%0:_(s32) = COPY $w0
|
||||
%1:_(s32) = G_CONSTANT i32 1
|
||||
%2:_(s32) = G_CONSTANT i32 31
|
||||
%3:_(s32) = G_SHL %0, %1
|
||||
%4:_(s32) = G_ASHR %3, %2
|
||||
$w0 = COPY %4(s32)
|
||||
...
|
||||
|
||||
---
|
||||
name: apply_lshr_shl_to_ubfx
|
||||
legalized: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
; CHECK-LABEL: name: apply_lshr_shl_to_ubfx
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
|
||||
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
|
||||
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
|
||||
; CHECK: [[UBFX:%[0-9]+]]:_(s32) = G_UBFX [[COPY]], [[C1]](s32), [[C]]
|
||||
; CHECK: $w0 = COPY [[UBFX]](s32)
|
||||
%0:_(s32) = COPY $w0
|
||||
%1:_(s32) = G_CONSTANT i32 5
|
||||
%2:_(s32) = G_CONSTANT i32 16
|
||||
%3:_(s32) = G_SHL %0, %1
|
||||
%4:_(s32) = G_LSHR %3, %2
|
||||
$w0 = COPY %4(s32)
|
||||
...
|
||||
|
||||
---
|
||||
name: apply_lshr_shl_to_ubfx_lower_bound
|
||||
legalized: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
; CHECK-LABEL: name: apply_lshr_shl_to_ubfx_lower_bound
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
|
||||
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
|
||||
; CHECK: [[UBFX:%[0-9]+]]:_(s32) = G_UBFX [[COPY]], [[C]](s32), [[C]]
|
||||
; CHECK: $w0 = COPY [[UBFX]](s32)
|
||||
%0:_(s32) = COPY $w0
|
||||
%1:_(s32) = G_CONSTANT i32 30
|
||||
%2:_(s32) = G_CONSTANT i32 31
|
||||
%3:_(s32) = G_SHL %0, %1
|
||||
%4:_(s32) = G_LSHR %3, %2
|
||||
$w0 = COPY %4(s32)
|
||||
...
|
||||
|
||||
---
|
||||
name: apply_lshr_shl_to_ubfx_upper_bound
|
||||
legalized: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
; CHECK-LABEL: name: apply_lshr_shl_to_ubfx_upper_bound
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
|
||||
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
|
||||
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 30
|
||||
; CHECK: [[UBFX:%[0-9]+]]:_(s32) = G_UBFX [[COPY]], [[C1]](s32), [[C]]
|
||||
; CHECK: $w0 = COPY [[UBFX]](s32)
|
||||
%0:_(s32) = COPY $w0
|
||||
%1:_(s32) = G_CONSTANT i32 1
|
||||
%2:_(s32) = G_CONSTANT i32 31
|
||||
%3:_(s32) = G_SHL %0, %1
|
||||
%4:_(s32) = G_LSHR %3, %2
|
||||
$w0 = COPY %4(s32)
|
||||
...
|
||||
|
||||
---
|
||||
name: dont_apply_pos_out_of_bounds
|
||||
legalized: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
; CHECK-LABEL: name: dont_apply_pos_out_of_bounds
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
|
||||
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
|
||||
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
|
||||
; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32)
|
||||
; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C1]](s32)
|
||||
; CHECK: $w0 = COPY [[ASHR]](s32)
|
||||
%0:_(s32) = COPY $w0
|
||||
%1:_(s32) = G_CONSTANT i32 5
|
||||
%2:_(s32) = G_CONSTANT i32 2
|
||||
%3:_(s32) = G_SHL %0, %1
|
||||
%4:_(s32) = G_ASHR %3, %2
|
||||
$w0 = COPY %4(s32)
|
||||
...
|
||||
|
||||
---
|
||||
name: dont_apply_no_constant
|
||||
legalized: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
; CHECK-LABEL: name: dont_apply_no_constant
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
|
||||
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
|
||||
; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32)
|
||||
; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32)
|
||||
; CHECK: $w0 = COPY [[ASHR]](s32)
|
||||
%0:_(s32) = COPY $w0
|
||||
%1:_(s32) = COPY $w1
|
||||
%2:_(s32) = G_CONSTANT i32 2
|
||||
%3:_(s32) = G_SHL %0, %1
|
||||
%4:_(s32) = G_ASHR %3, %2
|
||||
$w0 = COPY %4(s32)
|
||||
...
|
||||
|
||||
---
|
||||
name: dont_apply_more_than_one_use
|
||||
legalized: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
; CHECK-LABEL: name: dont_apply_more_than_one_use
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
|
||||
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
|
||||
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
|
||||
; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32)
|
||||
; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C1]](s32)
|
||||
; CHECK: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SHL]], [[ASHR]]
|
||||
; CHECK: $w0 = COPY [[MUL]](s32)
|
||||
%0:_(s32) = COPY $w0
|
||||
%1:_(s32) = G_CONSTANT i32 5
|
||||
%2:_(s32) = G_CONSTANT i32 16
|
||||
%3:_(s32) = G_SHL %0, %1
|
||||
%4:_(s32) = G_ASHR %3, %2
|
||||
%5:_(s32) = G_MUL %3, %4
|
||||
$w0 = COPY %5(s32)
|
||||
...
|
|
@ -156,8 +156,7 @@ define i64 @test_rev_x_srl32_shift(i64 %a) {
|
|||
;
|
||||
; GISEL-LABEL: test_rev_x_srl32_shift:
|
||||
; GISEL: // %bb.0: // %entry
|
||||
; GISEL-NEXT: lsl x8, x0, #33
|
||||
; GISEL-NEXT: lsr x8, x8, #35
|
||||
; GISEL-NEXT: ubfx x8, x0, #2, #29
|
||||
; GISEL-NEXT: rev x8, x8
|
||||
; GISEL-NEXT: lsr x0, x8, #32
|
||||
; GISEL-NEXT: ret
|
||||
|
|
|
@ -399,8 +399,7 @@ define amdgpu_kernel void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(
|
|||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0
|
||||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX6-NEXT: s_lshl_b32 s0, s0, 31
|
||||
; GFX6-NEXT: s_lshr_b32 s0, s0, 31
|
||||
; GFX6-NEXT: s_bfe_u32 s0, s0, 0x10000
|
||||
; GFX6-NEXT: s_bfe_u32 s0, s0, 0x1001f
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
|
@ -1059,8 +1058,7 @@ define amdgpu_kernel void @shl_lshr(i32 addrspace(1)* %out, i32 %a) #0 {
|
|||
; GFX6-NEXT: s_mov_b32 s6, -1
|
||||
; GFX6-NEXT: s_mov_b32 s7, 0xf000
|
||||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX6-NEXT: s_lshl_b32 s0, s0, 9
|
||||
; GFX6-NEXT: s_lshr_b32 s0, s0, 11
|
||||
; GFX6-NEXT: s_bfe_u32 s0, s0, 0x150002
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; GFX6-NEXT: s_endpgm
|
||||
|
|
Loading…
Reference in New Issue