forked from OSchip/llvm-project
[GlobalISel] Fold away G_BUILD_VECTOR with all elements extracted.
If every element is extracted from a G_BUILD_VECTOR, pass through the source registers. This is different to the extract(build_vector) combine because this one tolerates multiple users as long as they're exhaustive. Differential Revision: https://reviews.llvm.org/D97890
This commit is contained in:
parent
e85d798b5b
commit
55e760769b
|
@ -495,6 +495,13 @@ public:
|
|||
bool matchExtractVecEltBuildVec(MachineInstr &MI, Register &Reg);
|
||||
void applyExtractVecEltBuildVec(MachineInstr &MI, Register &Reg);
|
||||
|
||||
bool matchExtractAllEltsFromBuildVector(
|
||||
MachineInstr &MI,
|
||||
SmallVectorImpl<std::pair<Register, MachineInstr *>> &MatchInfo);
|
||||
void applyExtractAllEltsFromBuildVector(
|
||||
MachineInstr &MI,
|
||||
SmallVectorImpl<std::pair<Register, MachineInstr *>> &MatchInfo);
|
||||
|
||||
/// Try to transform \p MI by using all of the above
|
||||
/// combine functions. Returns true if changed.
|
||||
bool tryCombine(MachineInstr &MI);
|
||||
|
|
|
@ -576,7 +576,18 @@ def extract_vec_elt_build_vec : GICombineRule<
|
|||
[{ return Helper.matchExtractVecEltBuildVec(*${root}, ${matchinfo}); }]),
|
||||
(apply [{ Helper.applyExtractVecEltBuildVec(*${root}, ${matchinfo}); }])>;
|
||||
|
||||
def extract_vec_elt_combines : GICombineGroup<[extract_vec_elt_build_vec]>;
|
||||
// Fold away full elt extracts from a build_vector.
|
||||
def extract_all_elts_from_build_vector_matchinfo :
|
||||
GIDefMatchData<"SmallVector<std::pair<Register, MachineInstr*>>">;
|
||||
def extract_all_elts_from_build_vector : GICombineRule<
|
||||
(defs root:$root, extract_all_elts_from_build_vector_matchinfo:$matchinfo),
|
||||
(match (wip_match_opcode G_BUILD_VECTOR):$root,
|
||||
[{ return Helper.matchExtractAllEltsFromBuildVector(*${root}, ${matchinfo}); }]),
|
||||
(apply [{ Helper.applyExtractAllEltsFromBuildVector(*${root}, ${matchinfo}); }])>;
|
||||
|
||||
def extract_vec_elt_combines : GICombineGroup<[
|
||||
extract_vec_elt_build_vec,
|
||||
extract_all_elts_from_build_vector]>;
|
||||
|
||||
// FIXME: These should use the custom predicate feature once it lands.
|
||||
def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
|
||||
#include "llvm/ADT/SetVector.h"
|
||||
#include "llvm/ADT/SmallBitVector.h"
|
||||
#include "llvm/CodeGen/GlobalISel/Combiner.h"
|
||||
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
|
||||
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
|
||||
|
@ -3719,6 +3720,61 @@ void CombinerHelper::applyExtractVecEltBuildVec(MachineInstr &MI,
|
|||
replaceSingleDefInstWithReg(MI, Reg);
|
||||
}
|
||||
|
||||
bool CombinerHelper::matchExtractAllEltsFromBuildVector(
|
||||
MachineInstr &MI,
|
||||
SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
|
||||
assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
|
||||
// This combine tries to find build_vector's which have every source element
|
||||
// extracted using G_EXTRACT_VECTOR_ELT. This can happen when transforms like
|
||||
// the masked load scalarization is run late in the pipeline. There's already
|
||||
// a combine for a similar pattern starting from the extract, but that
|
||||
// doesn't attempt to do it if there are multiple uses of the build_vector,
|
||||
// which in this case is true. Starting the combine from the build_vector
|
||||
// feels more natural than trying to find sibling nodes of extracts.
|
||||
// E.g.
|
||||
// %vec(<4 x s32>) = G_BUILD_VECTOR %s1(s32), %s2, %s3, %s4
|
||||
// %ext1 = G_EXTRACT_VECTOR_ELT %vec, 0
|
||||
// %ext2 = G_EXTRACT_VECTOR_ELT %vec, 1
|
||||
// %ext3 = G_EXTRACT_VECTOR_ELT %vec, 2
|
||||
// %ext4 = G_EXTRACT_VECTOR_ELT %vec, 3
|
||||
// ==>
|
||||
// replace ext{1,2,3,4} with %s{1,2,3,4}
|
||||
|
||||
Register DstReg = MI.getOperand(0).getReg();
|
||||
LLT DstTy = MRI.getType(DstReg);
|
||||
unsigned NumElts = DstTy.getNumElements();
|
||||
|
||||
SmallBitVector ExtractedElts(NumElts);
|
||||
for (auto &II : make_range(MRI.use_instr_nodbg_begin(DstReg),
|
||||
MRI.use_instr_nodbg_end())) {
|
||||
if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)
|
||||
return false;
|
||||
auto Cst = getConstantVRegVal(II.getOperand(2).getReg(), MRI);
|
||||
if (!Cst)
|
||||
return false;
|
||||
unsigned Idx = Cst.getValue().getZExtValue();
|
||||
if (Idx >= NumElts)
|
||||
return false; // Out of range.
|
||||
ExtractedElts.set(Idx);
|
||||
SrcDstPairs.emplace_back(
|
||||
std::make_pair(MI.getOperand(Idx + 1).getReg(), &II));
|
||||
}
|
||||
// Match if every element was extracted.
|
||||
return ExtractedElts.all();
|
||||
}
|
||||
|
||||
void CombinerHelper::applyExtractAllEltsFromBuildVector(
|
||||
MachineInstr &MI,
|
||||
SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
|
||||
assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
|
||||
for (auto &Pair : SrcDstPairs) {
|
||||
auto *ExtMI = Pair.second;
|
||||
replaceRegWith(MRI, ExtMI->getOperand(0).getReg(), Pair.first);
|
||||
ExtMI->eraseFromParent();
|
||||
}
|
||||
MI.eraseFromParent();
|
||||
}
|
||||
|
||||
bool CombinerHelper::applyLoadOrCombine(
|
||||
MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
|
||||
Builder.setInstrAndDebugLoc(MI);
|
||||
|
|
|
@ -0,0 +1,154 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -o - -march=aarch64 -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s
|
||||
---
|
||||
name: full_extracts_from_build_vector
|
||||
alignment: 4
|
||||
tracksRegLiveness: true
|
||||
liveins:
|
||||
- { reg: '$x0' }
|
||||
- { reg: '$x1' }
|
||||
frameInfo:
|
||||
maxAlignment: 1
|
||||
maxCallFrameSize: 0
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $x0, $x1
|
||||
|
||||
; CHECK-LABEL: name: full_extracts_from_build_vector
|
||||
; CHECK: liveins: $x0, $x1
|
||||
; CHECK: %arg1:_(s64) = COPY $x0
|
||||
; CHECK: %arg2:_(s64) = COPY $x1
|
||||
; CHECK: $x0 = COPY %arg1(s64)
|
||||
; CHECK: $x1 = COPY %arg2(s64)
|
||||
; CHECK: RET_ReallyLR implicit $x0
|
||||
%arg1:_(s64) = COPY $x0
|
||||
%arg2:_(s64) = COPY $x1
|
||||
%zero:_(s32) = G_CONSTANT i32 0
|
||||
%one:_(s32) = G_CONSTANT i32 1
|
||||
%bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
|
||||
%extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s32)
|
||||
%extract2:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %one(s32)
|
||||
$x0 = COPY %extract(s64)
|
||||
$x1 = COPY %extract2(s64)
|
||||
RET_ReallyLR implicit $x0
|
||||
|
||||
...
|
||||
---
|
||||
name: full_extracts_from_build_vector_other_use
|
||||
alignment: 4
|
||||
tracksRegLiveness: true
|
||||
liveins:
|
||||
- { reg: '$x0' }
|
||||
- { reg: '$x1' }
|
||||
frameInfo:
|
||||
maxAlignment: 1
|
||||
maxCallFrameSize: 0
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $x0, $x1
|
||||
|
||||
; CHECK-LABEL: name: full_extracts_from_build_vector_other_use
|
||||
; CHECK: liveins: $x0, $x1
|
||||
; CHECK: %arg1:_(s64) = COPY $x0
|
||||
; CHECK: %arg2:_(s64) = COPY $x1
|
||||
; CHECK: %zero:_(s32) = G_CONSTANT i32 0
|
||||
; CHECK: %one:_(s32) = G_CONSTANT i32 1
|
||||
; CHECK: %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
|
||||
; CHECK: %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s32)
|
||||
; CHECK: %extract2:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %one(s32)
|
||||
; CHECK: $x0 = COPY %extract(s64)
|
||||
; CHECK: $x1 = COPY %extract2(s64)
|
||||
; CHECK: $q0 = COPY %bv(<2 x s64>)
|
||||
; CHECK: RET_ReallyLR implicit $x0
|
||||
%arg1:_(s64) = COPY $x0
|
||||
%arg2:_(s64) = COPY $x1
|
||||
%zero:_(s32) = G_CONSTANT i32 0
|
||||
%one:_(s32) = G_CONSTANT i32 1
|
||||
%bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
|
||||
%extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s32)
|
||||
%extract2:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %one(s32)
|
||||
$x0 = COPY %extract(s64)
|
||||
$x1 = COPY %extract2(s64)
|
||||
$q0 = COPY %bv(<2 x s64>)
|
||||
RET_ReallyLR implicit $x0
|
||||
|
||||
...
|
||||
---
|
||||
name: partial_extracts_from_build_vector_multiple_per_elt
|
||||
alignment: 4
|
||||
tracksRegLiveness: true
|
||||
liveins:
|
||||
- { reg: '$x0' }
|
||||
- { reg: '$x1' }
|
||||
frameInfo:
|
||||
maxAlignment: 1
|
||||
maxCallFrameSize: 0
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $x0, $x1
|
||||
|
||||
; CHECK-LABEL: name: partial_extracts_from_build_vector_multiple_per_elt
|
||||
; CHECK: liveins: $x0, $x1
|
||||
; CHECK: %arg1:_(s64) = COPY $x0
|
||||
; CHECK: %arg2:_(s64) = COPY $x1
|
||||
; CHECK: $x0 = COPY %arg1(s64)
|
||||
; CHECK: $x1 = COPY %arg2(s64)
|
||||
; CHECK: $x2 = COPY %arg2(s64)
|
||||
; CHECK: RET_ReallyLR implicit $x0
|
||||
%arg1:_(s64) = COPY $x0
|
||||
%arg2:_(s64) = COPY $x1
|
||||
%zero:_(s32) = G_CONSTANT i32 0
|
||||
%one:_(s32) = G_CONSTANT i32 1
|
||||
%bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
|
||||
%extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s32)
|
||||
%extract2:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %one(s32)
|
||||
%extract3:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %one(s32)
|
||||
$x0 = COPY %extract(s64)
|
||||
$x1 = COPY %extract2(s64)
|
||||
$x2 = COPY %extract3(s64)
|
||||
RET_ReallyLR implicit $x0
|
||||
|
||||
...
|
||||
---
|
||||
name: full_extracts_from_build_vector_idx_out_of_range
|
||||
alignment: 4
|
||||
tracksRegLiveness: true
|
||||
liveins:
|
||||
- { reg: '$x0' }
|
||||
- { reg: '$x1' }
|
||||
frameInfo:
|
||||
maxAlignment: 1
|
||||
maxCallFrameSize: 0
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $x0, $x1
|
||||
|
||||
; CHECK-LABEL: name: full_extracts_from_build_vector_idx_out_of_range
|
||||
; CHECK: liveins: $x0, $x1
|
||||
; CHECK: %arg1:_(s64) = COPY $x0
|
||||
; CHECK: %arg2:_(s64) = COPY $x1
|
||||
; CHECK: %zero:_(s32) = G_CONSTANT i32 0
|
||||
; CHECK: %two:_(s32) = G_CONSTANT i32 2
|
||||
; CHECK: %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
|
||||
; CHECK: %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s32)
|
||||
; CHECK: %extract2:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %two(s32)
|
||||
; CHECK: $x0 = COPY %extract(s64)
|
||||
; CHECK: $x1 = COPY %extract2(s64)
|
||||
; CHECK: RET_ReallyLR implicit $x0
|
||||
%arg1:_(s64) = COPY $x0
|
||||
%arg2:_(s64) = COPY $x1
|
||||
%zero:_(s32) = G_CONSTANT i32 0
|
||||
%one:_(s32) = G_CONSTANT i32 1
|
||||
%two:_(s32) = G_CONSTANT i32 2
|
||||
%bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
|
||||
%extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s32)
|
||||
%extract2:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %two(s32)
|
||||
$x0 = COPY %extract(s64)
|
||||
$x1 = COPY %extract2(s64)
|
||||
RET_ReallyLR implicit $x0
|
||||
|
||||
...
|
Loading…
Reference in New Issue