[GlobalISel] Fold away G_BUILD_VECTOR with all elements extracted.

If every element is extracted from a G_BUILD_VECTOR, pass through the source
registers. This is different to the extract(build_vector) combine because this
one tolerates multiple users as long as they're exhaustive.

Differential Revision: https://reviews.llvm.org/D97890
This commit is contained in:
Amara Emerson 2021-03-03 14:55:03 -08:00
parent e85d798b5b
commit 55e760769b
4 changed files with 229 additions and 1 deletions

View File

@ -495,6 +495,13 @@ public:
bool matchExtractVecEltBuildVec(MachineInstr &MI, Register &Reg);
void applyExtractVecEltBuildVec(MachineInstr &MI, Register &Reg);
bool matchExtractAllEltsFromBuildVector(
MachineInstr &MI,
SmallVectorImpl<std::pair<Register, MachineInstr *>> &MatchInfo);
void applyExtractAllEltsFromBuildVector(
MachineInstr &MI,
SmallVectorImpl<std::pair<Register, MachineInstr *>> &MatchInfo);
/// Try to transform \p MI by using all of the above
/// combine functions. Returns true if changed.
bool tryCombine(MachineInstr &MI);

View File

@ -576,7 +576,18 @@ def extract_vec_elt_build_vec : GICombineRule<
[{ return Helper.matchExtractVecEltBuildVec(*${root}, ${matchinfo}); }]),
(apply [{ Helper.applyExtractVecEltBuildVec(*${root}, ${matchinfo}); }])>;
def extract_vec_elt_combines : GICombineGroup<[extract_vec_elt_build_vec]>;
// Fold away full elt extracts from a build_vector.
def extract_all_elts_from_build_vector_matchinfo :
GIDefMatchData<"SmallVector<std::pair<Register, MachineInstr*>>">;
def extract_all_elts_from_build_vector : GICombineRule<
(defs root:$root, extract_all_elts_from_build_vector_matchinfo:$matchinfo),
(match (wip_match_opcode G_BUILD_VECTOR):$root,
[{ return Helper.matchExtractAllEltsFromBuildVector(*${root}, ${matchinfo}); }]),
(apply [{ Helper.applyExtractAllEltsFromBuildVector(*${root}, ${matchinfo}); }])>;
def extract_vec_elt_combines : GICombineGroup<[
extract_vec_elt_build_vec,
extract_all_elts_from_build_vector]>;
// FIXME: These should use the custom predicate feature once it lands.
def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,

View File

@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
@ -3719,6 +3720,61 @@ void CombinerHelper::applyExtractVecEltBuildVec(MachineInstr &MI,
replaceSingleDefInstWithReg(MI, Reg);
}
bool CombinerHelper::matchExtractAllEltsFromBuildVector(
MachineInstr &MI,
SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
// This combine tries to find build_vector's which have every source element
// extracted using G_EXTRACT_VECTOR_ELT. This can happen when transforms like
// the masked load scalarization is run late in the pipeline. There's already
// a combine for a similar pattern starting from the extract, but that
// doesn't attempt to do it if there are multiple uses of the build_vector,
// which in this case is true. Starting the combine from the build_vector
// feels more natural than trying to find sibling nodes of extracts.
// E.g.
// %vec(<4 x s32>) = G_BUILD_VECTOR %s1(s32), %s2, %s3, %s4
// %ext1 = G_EXTRACT_VECTOR_ELT %vec, 0
// %ext2 = G_EXTRACT_VECTOR_ELT %vec, 1
// %ext3 = G_EXTRACT_VECTOR_ELT %vec, 2
// %ext4 = G_EXTRACT_VECTOR_ELT %vec, 3
// ==>
// replace ext{1,2,3,4} with %s{1,2,3,4}
Register DstReg = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(DstReg);
unsigned NumElts = DstTy.getNumElements();
SmallBitVector ExtractedElts(NumElts);
for (auto &II : make_range(MRI.use_instr_nodbg_begin(DstReg),
MRI.use_instr_nodbg_end())) {
if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)
return false;
auto Cst = getConstantVRegVal(II.getOperand(2).getReg(), MRI);
if (!Cst)
return false;
unsigned Idx = Cst.getValue().getZExtValue();
if (Idx >= NumElts)
return false; // Out of range.
ExtractedElts.set(Idx);
SrcDstPairs.emplace_back(
std::make_pair(MI.getOperand(Idx + 1).getReg(), &II));
}
// Match if every element was extracted.
return ExtractedElts.all();
}
void CombinerHelper::applyExtractAllEltsFromBuildVector(
MachineInstr &MI,
SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
for (auto &Pair : SrcDstPairs) {
auto *ExtMI = Pair.second;
replaceRegWith(MRI, ExtMI->getOperand(0).getReg(), Pair.first);
ExtMI->eraseFromParent();
}
MI.eraseFromParent();
}
bool CombinerHelper::applyLoadOrCombine(
MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
Builder.setInstrAndDebugLoc(MI);

View File

@ -0,0 +1,154 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -o - -march=aarch64 -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s
---
name: full_extracts_from_build_vector
alignment: 4
tracksRegLiveness: true
liveins:
- { reg: '$x0' }
- { reg: '$x1' }
frameInfo:
maxAlignment: 1
maxCallFrameSize: 0
machineFunctionInfo: {}
body: |
bb.0:
liveins: $x0, $x1
; CHECK-LABEL: name: full_extracts_from_build_vector
; CHECK: liveins: $x0, $x1
; CHECK: %arg1:_(s64) = COPY $x0
; CHECK: %arg2:_(s64) = COPY $x1
; CHECK: $x0 = COPY %arg1(s64)
; CHECK: $x1 = COPY %arg2(s64)
; CHECK: RET_ReallyLR implicit $x0
%arg1:_(s64) = COPY $x0
%arg2:_(s64) = COPY $x1
%zero:_(s32) = G_CONSTANT i32 0
%one:_(s32) = G_CONSTANT i32 1
%bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
%extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s32)
%extract2:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %one(s32)
$x0 = COPY %extract(s64)
$x1 = COPY %extract2(s64)
RET_ReallyLR implicit $x0
...
---
name: full_extracts_from_build_vector_other_use
alignment: 4
tracksRegLiveness: true
liveins:
- { reg: '$x0' }
- { reg: '$x1' }
frameInfo:
maxAlignment: 1
maxCallFrameSize: 0
machineFunctionInfo: {}
body: |
bb.0:
liveins: $x0, $x1
; CHECK-LABEL: name: full_extracts_from_build_vector_other_use
; CHECK: liveins: $x0, $x1
; CHECK: %arg1:_(s64) = COPY $x0
; CHECK: %arg2:_(s64) = COPY $x1
; CHECK: %zero:_(s32) = G_CONSTANT i32 0
; CHECK: %one:_(s32) = G_CONSTANT i32 1
; CHECK: %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
; CHECK: %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s32)
; CHECK: %extract2:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %one(s32)
; CHECK: $x0 = COPY %extract(s64)
; CHECK: $x1 = COPY %extract2(s64)
; CHECK: $q0 = COPY %bv(<2 x s64>)
; CHECK: RET_ReallyLR implicit $x0
%arg1:_(s64) = COPY $x0
%arg2:_(s64) = COPY $x1
%zero:_(s32) = G_CONSTANT i32 0
%one:_(s32) = G_CONSTANT i32 1
%bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
%extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s32)
%extract2:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %one(s32)
$x0 = COPY %extract(s64)
$x1 = COPY %extract2(s64)
$q0 = COPY %bv(<2 x s64>)
RET_ReallyLR implicit $x0
...
---
name: partial_extracts_from_build_vector_multiple_per_elt
alignment: 4
tracksRegLiveness: true
liveins:
- { reg: '$x0' }
- { reg: '$x1' }
frameInfo:
maxAlignment: 1
maxCallFrameSize: 0
machineFunctionInfo: {}
body: |
bb.0:
liveins: $x0, $x1
; CHECK-LABEL: name: partial_extracts_from_build_vector_multiple_per_elt
; CHECK: liveins: $x0, $x1
; CHECK: %arg1:_(s64) = COPY $x0
; CHECK: %arg2:_(s64) = COPY $x1
; CHECK: $x0 = COPY %arg1(s64)
; CHECK: $x1 = COPY %arg2(s64)
; CHECK: $x2 = COPY %arg2(s64)
; CHECK: RET_ReallyLR implicit $x0
%arg1:_(s64) = COPY $x0
%arg2:_(s64) = COPY $x1
%zero:_(s32) = G_CONSTANT i32 0
%one:_(s32) = G_CONSTANT i32 1
%bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
%extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s32)
%extract2:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %one(s32)
%extract3:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %one(s32)
$x0 = COPY %extract(s64)
$x1 = COPY %extract2(s64)
$x2 = COPY %extract3(s64)
RET_ReallyLR implicit $x0
...
---
name: full_extracts_from_build_vector_idx_out_of_range
alignment: 4
tracksRegLiveness: true
liveins:
- { reg: '$x0' }
- { reg: '$x1' }
frameInfo:
maxAlignment: 1
maxCallFrameSize: 0
machineFunctionInfo: {}
body: |
bb.0:
liveins: $x0, $x1
; CHECK-LABEL: name: full_extracts_from_build_vector_idx_out_of_range
; CHECK: liveins: $x0, $x1
; CHECK: %arg1:_(s64) = COPY $x0
; CHECK: %arg2:_(s64) = COPY $x1
; CHECK: %zero:_(s32) = G_CONSTANT i32 0
; CHECK: %two:_(s32) = G_CONSTANT i32 2
; CHECK: %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
; CHECK: %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s32)
; CHECK: %extract2:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %two(s32)
; CHECK: $x0 = COPY %extract(s64)
; CHECK: $x1 = COPY %extract2(s64)
; CHECK: RET_ReallyLR implicit $x0
%arg1:_(s64) = COPY $x0
%arg2:_(s64) = COPY $x1
%zero:_(s32) = G_CONSTANT i32 0
%one:_(s32) = G_CONSTANT i32 1
%two:_(s32) = G_CONSTANT i32 2
%bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
%extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s32)
%extract2:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %two(s32)
$x0 = COPY %extract(s64)
$x1 = COPY %extract2(s64)
RET_ReallyLR implicit $x0
...