forked from OSchip/llvm-project
[GISel] Eliminate redundant bitmasking
This was a GISel vs SDAG regression that showed up at -Os on arm64 in: SingleSource/Benchmarks/Adobe-C++/simple_types_constant_folding.test https://llvm.godbolt.org/z/aecjodsjG Differential revision: https://reviews.llvm.org/D103334
This commit is contained in:
parent
7b06120882
commit
a2ab765029
|
@ -435,6 +435,11 @@ public:
|
||||||
std::tuple<Register, int64_t> &MatchInfo);
|
std::tuple<Register, int64_t> &MatchInfo);
|
||||||
bool applyAshShlToSextInreg(MachineInstr &MI,
|
bool applyAshShlToSextInreg(MachineInstr &MI,
|
||||||
std::tuple<Register, int64_t> &MatchInfo);
|
std::tuple<Register, int64_t> &MatchInfo);
|
||||||
|
|
||||||
|
/// Fold and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
|
||||||
|
bool matchOverlappingAnd(MachineInstr &MI,
|
||||||
|
std::function<void(MachineIRBuilder &)> &MatchInfo);
|
||||||
|
|
||||||
/// \return true if \p MI is a G_AND instruction whose operands are x and y
|
/// \return true if \p MI is a G_AND instruction whose operands are x and y
|
||||||
/// where x & y == x or x & y == y. (E.g., one of operands is all-ones value.)
|
/// where x & y == x or x & y == y. (E.g., one of operands is all-ones value.)
|
||||||
///
|
///
|
||||||
|
|
|
@ -402,6 +402,15 @@ def shl_ashr_to_sext_inreg : GICombineRule<
|
||||||
[{ return Helper.matchAshrShlToSextInreg(*${root}, ${info}); }]),
|
[{ return Helper.matchAshrShlToSextInreg(*${root}, ${info}); }]),
|
||||||
(apply [{ return Helper.applyAshShlToSextInreg(*${root}, ${info});}])
|
(apply [{ return Helper.applyAshShlToSextInreg(*${root}, ${info});}])
|
||||||
>;
|
>;
|
||||||
|
|
||||||
|
// Fold and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
|
||||||
|
def overlapping_and: GICombineRule <
|
||||||
|
(defs root:$root, build_fn_matchinfo:$info),
|
||||||
|
(match (wip_match_opcode G_AND):$root,
|
||||||
|
[{ return Helper.matchOverlappingAnd(*${root}, ${info}); }]),
|
||||||
|
(apply [{ return Helper.applyBuildFn(*${root}, ${info}); }])
|
||||||
|
>;
|
||||||
|
|
||||||
// Fold (x & y) -> x or (x & y) -> y when (x & y) is known to equal x or equal y.
|
// Fold (x & y) -> x or (x & y) -> y when (x & y) is known to equal x or equal y.
|
||||||
def redundant_and: GICombineRule <
|
def redundant_and: GICombineRule <
|
||||||
(defs root:$root, register_matchinfo:$matchinfo),
|
(defs root:$root, register_matchinfo:$matchinfo),
|
||||||
|
@ -643,7 +652,8 @@ def identity_combines : GICombineGroup<[select_same_val, right_identity_zero,
|
||||||
i2p_to_p2i, anyext_trunc_fold,
|
i2p_to_p2i, anyext_trunc_fold,
|
||||||
fneg_fneg_fold, right_identity_one]>;
|
fneg_fneg_fold, right_identity_one]>;
|
||||||
|
|
||||||
def const_combines : GICombineGroup<[constant_fp_op, const_ptradd_to_i2p]>;
|
def const_combines : GICombineGroup<[constant_fp_op, const_ptradd_to_i2p,
|
||||||
|
overlapping_and]>;
|
||||||
|
|
||||||
def known_bits_simplifications : GICombineGroup<[
|
def known_bits_simplifications : GICombineGroup<[
|
||||||
redundant_and, redundant_sext_inreg, redundant_or, urem_pow2_to_mask,
|
redundant_and, redundant_sext_inreg, redundant_or, urem_pow2_to_mask,
|
||||||
|
|
|
@ -2997,6 +2997,33 @@ bool CombinerHelper::applyAshShlToSextInreg(
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
|
||||||
|
bool CombinerHelper::matchOverlappingAnd(
|
||||||
|
MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
|
||||||
|
assert(MI.getOpcode() == TargetOpcode::G_AND);
|
||||||
|
|
||||||
|
Register Dst = MI.getOperand(0).getReg();
|
||||||
|
LLT Ty = MRI.getType(Dst);
|
||||||
|
|
||||||
|
Register R;
|
||||||
|
int64_t C1;
|
||||||
|
int64_t C2;
|
||||||
|
if (!mi_match(
|
||||||
|
Dst, MRI,
|
||||||
|
m_GAnd(m_GAnd(m_Reg(R), m_ICst(C1)), m_ICst(C2))))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
MatchInfo = [=](MachineIRBuilder &B) {
|
||||||
|
if (C1 & C2) {
|
||||||
|
B.buildAnd(Dst, R, B.buildConstant(Ty, C1 & C2));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
auto Zero = B.buildConstant(Ty, 0);
|
||||||
|
replaceRegWith(MRI, Dst, Zero->getOperand(0).getReg());
|
||||||
|
};
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool CombinerHelper::matchRedundantAnd(MachineInstr &MI,
|
bool CombinerHelper::matchRedundantAnd(MachineInstr &MI,
|
||||||
Register &Replacement) {
|
Register &Replacement) {
|
||||||
// Given
|
// Given
|
||||||
|
|
|
@ -0,0 +1,121 @@
|
||||||
|
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||||
|
# RUN: llc -debugify-and-strip-all-safe -mtriple arm64-apple-ios -O0 -run-pass=aarch64-prelegalizer-combiner --aarch64prelegalizercombinerhelper-only-enable-rule="overlapping_and" -global-isel -verify-machineinstrs %s -o - | FileCheck %s
|
||||||
|
# REQUIRES: asserts
|
||||||
|
---
|
||||||
|
name: bitmask_overlap1
|
||||||
|
body: |
|
||||||
|
bb.1:
|
||||||
|
; CHECK-LABEL: name: bitmask_overlap1
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
|
||||||
|
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
|
||||||
|
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
|
||||||
|
; CHECK: $w0 = COPY [[AND]](s32)
|
||||||
|
; CHECK: RET_ReallyLR implicit $w0
|
||||||
|
%0:_(s32) = COPY $w0
|
||||||
|
%1:_(s32) = G_CONSTANT i32 -128
|
||||||
|
%3:_(s32) = G_CONSTANT i32 255
|
||||||
|
%2:_(s32) = G_AND %0, %1
|
||||||
|
%4:_(s32) = G_AND %2, %3
|
||||||
|
$w0 = COPY %4(s32)
|
||||||
|
RET_ReallyLR implicit $w0
|
||||||
|
|
||||||
|
...
|
||||||
|
---
|
||||||
|
name: bitmask_overlap2
|
||||||
|
body: |
|
||||||
|
bb.1:
|
||||||
|
; CHECK-LABEL: name: bitmask_overlap2
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
|
||||||
|
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
|
||||||
|
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
|
||||||
|
; CHECK: $w0 = COPY [[AND]](s32)
|
||||||
|
; CHECK: RET_ReallyLR implicit $w0
|
||||||
|
%0:_(s32) = COPY $w0
|
||||||
|
%1:_(s32) = G_CONSTANT i32 255
|
||||||
|
%3:_(s32) = G_CONSTANT i32 -128
|
||||||
|
%2:_(s32) = G_AND %1, %0
|
||||||
|
%4:_(s32) = G_AND %2, %3
|
||||||
|
$w0 = COPY %4(s32)
|
||||||
|
RET_ReallyLR implicit $w0
|
||||||
|
|
||||||
|
...
|
||||||
|
---
|
||||||
|
name: bitmask_overlap3
|
||||||
|
body: |
|
||||||
|
bb.1:
|
||||||
|
; CHECK-LABEL: name: bitmask_overlap3
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
|
||||||
|
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
|
||||||
|
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
|
||||||
|
; CHECK: $w0 = COPY [[AND]](s32)
|
||||||
|
; CHECK: RET_ReallyLR implicit $w0
|
||||||
|
%0:_(s32) = COPY $w0
|
||||||
|
%1:_(s32) = G_CONSTANT i32 255
|
||||||
|
%3:_(s32) = G_CONSTANT i32 -128
|
||||||
|
%2:_(s32) = G_AND %1, %0
|
||||||
|
%4:_(s32) = G_AND %3, %2
|
||||||
|
$w0 = COPY %4(s32)
|
||||||
|
RET_ReallyLR implicit $w0
|
||||||
|
|
||||||
|
...
|
||||||
|
---
|
||||||
|
name: bitmask_overlap4
|
||||||
|
body: |
|
||||||
|
bb.1:
|
||||||
|
; CHECK-LABEL: name: bitmask_overlap4
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
|
||||||
|
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
|
||||||
|
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
|
||||||
|
; CHECK: $w0 = COPY [[AND]](s32)
|
||||||
|
; CHECK: RET_ReallyLR implicit $w0
|
||||||
|
%0:_(s32) = COPY $w0
|
||||||
|
%1:_(s32) = G_CONSTANT i32 255
|
||||||
|
%3:_(s32) = G_CONSTANT i32 -128
|
||||||
|
%2:_(s32) = G_AND %0, %1
|
||||||
|
%4:_(s32) = G_AND %3, %2
|
||||||
|
$w0 = COPY %4(s32)
|
||||||
|
RET_ReallyLR implicit $w0
|
||||||
|
|
||||||
|
...
|
||||||
|
---
|
||||||
|
name: bitmask_no_overlap
|
||||||
|
body: |
|
||||||
|
bb.1:
|
||||||
|
; CHECK-LABEL: name: bitmask_no_overlap
|
||||||
|
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||||
|
; CHECK: $w0 = COPY [[C]](s32)
|
||||||
|
; CHECK: RET_ReallyLR implicit $w0
|
||||||
|
%0:_(s32) = COPY $w0
|
||||||
|
%1:_(s32) = G_CONSTANT i32 1
|
||||||
|
%3:_(s32) = G_CONSTANT i32 2
|
||||||
|
%2:_(s32) = G_AND %0, %1
|
||||||
|
%4:_(s32) = G_AND %2, %3
|
||||||
|
$w0 = COPY %4(s32)
|
||||||
|
RET_ReallyLR implicit $w0
|
||||||
|
|
||||||
|
...
|
||||||
|
---
|
||||||
|
name: bitmask_overlap_extrause
|
||||||
|
body: |
|
||||||
|
bb.1:
|
||||||
|
; CHECK-LABEL: name: bitmask_overlap_extrause
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
|
||||||
|
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
|
||||||
|
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
|
||||||
|
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
|
||||||
|
; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]]
|
||||||
|
; CHECK: G_STORE [[AND]](s32), [[COPY1]](p0) :: (store 4)
|
||||||
|
; CHECK: $w0 = COPY [[AND1]](s32)
|
||||||
|
; CHECK: RET_ReallyLR implicit $w0
|
||||||
|
%0:_(s32) = COPY $w0
|
||||||
|
%1:_(p0) = COPY $x1
|
||||||
|
%2:_(s32) = G_CONSTANT i32 255
|
||||||
|
%4:_(s32) = G_CONSTANT i32 -128
|
||||||
|
%3:_(s32) = G_AND %0, %2
|
||||||
|
%5:_(s32) = G_AND %3, %4
|
||||||
|
G_STORE %3(s32), %1(p0) :: (store 4)
|
||||||
|
$w0 = COPY %5(s32)
|
||||||
|
RET_ReallyLR implicit $w0
|
||||||
|
|
||||||
|
...
|
|
@ -1151,7 +1151,7 @@ define float @v_test_sitofp_i64_byte_to_f32(i64 %arg0) {
|
||||||
; SI-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
|
; SI-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
|
||||||
; SI-NEXT: v_and_b32_e32 v1, 0x7fffffff, v3
|
; SI-NEXT: v_and_b32_e32 v1, 0x7fffffff, v3
|
||||||
; SI-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
|
; SI-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
|
||||||
; SI-NEXT: v_and_b32_e32 v3, s6, v1
|
; SI-NEXT: v_and_b32_e32 v3, s6, v3
|
||||||
; SI-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
; SI-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
||||||
; SI-NEXT: v_lshlrev_b32_e32 v0, 23, v0
|
; SI-NEXT: v_lshlrev_b32_e32 v0, 23, v0
|
||||||
; SI-NEXT: s_mov_b32 s4, 0
|
; SI-NEXT: s_mov_b32 s4, 0
|
||||||
|
@ -1183,7 +1183,7 @@ define float @v_test_sitofp_i64_byte_to_f32(i64 %arg0) {
|
||||||
; VI-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
|
; VI-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
|
||||||
; VI-NEXT: v_and_b32_e32 v1, 0x7fffffff, v3
|
; VI-NEXT: v_and_b32_e32 v1, 0x7fffffff, v3
|
||||||
; VI-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
|
; VI-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
|
||||||
; VI-NEXT: v_and_b32_e32 v3, s6, v1
|
; VI-NEXT: v_and_b32_e32 v3, s6, v3
|
||||||
; VI-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
; VI-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
||||||
; VI-NEXT: v_lshlrev_b32_e32 v0, 23, v0
|
; VI-NEXT: v_lshlrev_b32_e32 v0, 23, v0
|
||||||
; VI-NEXT: s_mov_b32 s4, 0
|
; VI-NEXT: s_mov_b32 s4, 0
|
||||||
|
@ -1218,7 +1218,7 @@ define float @v_test_uitofp_i64_byte_to_f32(i64 %arg0) {
|
||||||
; SI-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
|
; SI-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
|
||||||
; SI-NEXT: v_and_b32_e32 v1, 0x7fffffff, v3
|
; SI-NEXT: v_and_b32_e32 v1, 0x7fffffff, v3
|
||||||
; SI-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
|
; SI-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
|
||||||
; SI-NEXT: v_and_b32_e32 v3, s4, v1
|
; SI-NEXT: v_and_b32_e32 v3, s4, v3
|
||||||
; SI-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
; SI-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
||||||
; SI-NEXT: v_lshlrev_b32_e32 v0, 23, v0
|
; SI-NEXT: v_lshlrev_b32_e32 v0, 23, v0
|
||||||
; SI-NEXT: s_mov_b32 s4, 0
|
; SI-NEXT: s_mov_b32 s4, 0
|
||||||
|
@ -1248,7 +1248,7 @@ define float @v_test_uitofp_i64_byte_to_f32(i64 %arg0) {
|
||||||
; VI-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
|
; VI-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
|
||||||
; VI-NEXT: v_and_b32_e32 v1, 0x7fffffff, v3
|
; VI-NEXT: v_and_b32_e32 v1, 0x7fffffff, v3
|
||||||
; VI-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
|
; VI-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
|
||||||
; VI-NEXT: v_and_b32_e32 v3, s4, v1
|
; VI-NEXT: v_and_b32_e32 v3, s4, v3
|
||||||
; VI-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
; VI-NEXT: v_lshrrev_b32_e32 v1, 8, v1
|
||||||
; VI-NEXT: v_lshlrev_b32_e32 v0, 23, v0
|
; VI-NEXT: v_lshlrev_b32_e32 v0, 23, v0
|
||||||
; VI-NEXT: s_mov_b32 s4, 0
|
; VI-NEXT: s_mov_b32 s4, 0
|
||||||
|
|
|
@ -547,21 +547,18 @@ define amdgpu_ps i32 @s_shl_i32_zext_i16(i16 inreg %x) {
|
||||||
;
|
;
|
||||||
; GFX8-LABEL: s_shl_i32_zext_i16:
|
; GFX8-LABEL: s_shl_i32_zext_i16:
|
||||||
; GFX8: ; %bb.0:
|
; GFX8: ; %bb.0:
|
||||||
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
|
|
||||||
; GFX8-NEXT: s_and_b32 s0, s0, 0x3fff
|
; GFX8-NEXT: s_and_b32 s0, s0, 0x3fff
|
||||||
; GFX8-NEXT: s_lshl_b32 s0, s0, 2
|
; GFX8-NEXT: s_lshl_b32 s0, s0, 2
|
||||||
; GFX8-NEXT: ; return to shader part epilog
|
; GFX8-NEXT: ; return to shader part epilog
|
||||||
;
|
;
|
||||||
; GFX9-LABEL: s_shl_i32_zext_i16:
|
; GFX9-LABEL: s_shl_i32_zext_i16:
|
||||||
; GFX9: ; %bb.0:
|
; GFX9: ; %bb.0:
|
||||||
; GFX9-NEXT: s_and_b32 s0, s0, 0xffff
|
|
||||||
; GFX9-NEXT: s_and_b32 s0, s0, 0x3fff
|
; GFX9-NEXT: s_and_b32 s0, s0, 0x3fff
|
||||||
; GFX9-NEXT: s_lshl_b32 s0, s0, 2
|
; GFX9-NEXT: s_lshl_b32 s0, s0, 2
|
||||||
; GFX9-NEXT: ; return to shader part epilog
|
; GFX9-NEXT: ; return to shader part epilog
|
||||||
;
|
;
|
||||||
; GFX10-LABEL: s_shl_i32_zext_i16:
|
; GFX10-LABEL: s_shl_i32_zext_i16:
|
||||||
; GFX10: ; %bb.0:
|
; GFX10: ; %bb.0:
|
||||||
; GFX10-NEXT: s_and_b32 s0, s0, 0xffff
|
|
||||||
; GFX10-NEXT: s_and_b32 s0, s0, 0x3fff
|
; GFX10-NEXT: s_and_b32 s0, s0, 0x3fff
|
||||||
; GFX10-NEXT: s_lshl_b32 s0, s0, 2
|
; GFX10-NEXT: s_lshl_b32 s0, s0, 2
|
||||||
; GFX10-NEXT: ; return to shader part epilog
|
; GFX10-NEXT: ; return to shader part epilog
|
||||||
|
|
Loading…
Reference in New Issue