forked from OSchip/llvm-project
LoadStoreVectorizer: Check TTI for vec reg bit width
llvm-svn: 274322
This commit is contained in:
parent
42ad17059a
commit
370e8226c7
|
@ -183,7 +183,7 @@ namespace {
|
||||||
(void) llvm::createInstructionSimplifierPass();
|
(void) llvm::createInstructionSimplifierPass();
|
||||||
(void) llvm::createLoopVectorizePass();
|
(void) llvm::createLoopVectorizePass();
|
||||||
(void) llvm::createSLPVectorizerPass();
|
(void) llvm::createSLPVectorizerPass();
|
||||||
(void) llvm::createLoadStoreVectorizerPass(128);
|
(void) llvm::createLoadStoreVectorizerPass();
|
||||||
(void) llvm::createBBVectorizePass();
|
(void) llvm::createBBVectorizePass();
|
||||||
(void) llvm::createPartiallyInlineLibCallsPass();
|
(void) llvm::createPartiallyInlineLibCallsPass();
|
||||||
(void) llvm::createScalarizerPass();
|
(void) llvm::createScalarizerPass();
|
||||||
|
|
|
@ -144,7 +144,7 @@ bool vectorizeBasicBlock(Pass *P, BasicBlock &BB,
|
||||||
// LoadStoreVectorizer - Create vector loads and stores, but leave scalar
|
// LoadStoreVectorizer - Create vector loads and stores, but leave scalar
|
||||||
// operations.
|
// operations.
|
||||||
//
|
//
|
||||||
Pass *createLoadStoreVectorizerPass(unsigned VecRegSize = 128);
|
Pass *createLoadStoreVectorizerPass();
|
||||||
|
|
||||||
} // End llvm namespace
|
} // End llvm namespace
|
||||||
|
|
||||||
|
|
|
@ -51,17 +51,18 @@ class Vectorizer {
|
||||||
AliasAnalysis &AA;
|
AliasAnalysis &AA;
|
||||||
DominatorTree &DT;
|
DominatorTree &DT;
|
||||||
ScalarEvolution &SE;
|
ScalarEvolution &SE;
|
||||||
|
TargetTransformInfo &TTI;
|
||||||
const DataLayout &DL;
|
const DataLayout &DL;
|
||||||
IRBuilder<> Builder;
|
IRBuilder<> Builder;
|
||||||
ValueListMap StoreRefs;
|
ValueListMap StoreRefs;
|
||||||
ValueListMap LoadRefs;
|
ValueListMap LoadRefs;
|
||||||
unsigned VecRegSize;
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Vectorizer(Function &F, AliasAnalysis &AA, DominatorTree &DT,
|
Vectorizer(Function &F, AliasAnalysis &AA, DominatorTree &DT,
|
||||||
ScalarEvolution &SE, unsigned VecRegSize)
|
ScalarEvolution &SE, TargetTransformInfo &TTI)
|
||||||
: F(F), AA(AA), DT(DT), SE(SE), DL(F.getParent()->getDataLayout()),
|
: F(F), AA(AA), DT(DT), SE(SE), TTI(TTI),
|
||||||
Builder(SE.getContext()), VecRegSize(VecRegSize) {}
|
DL(F.getParent()->getDataLayout()),
|
||||||
|
Builder(SE.getContext()) {}
|
||||||
|
|
||||||
bool run();
|
bool run();
|
||||||
|
|
||||||
|
@ -116,10 +117,8 @@ private:
|
||||||
class LoadStoreVectorizer : public FunctionPass {
|
class LoadStoreVectorizer : public FunctionPass {
|
||||||
public:
|
public:
|
||||||
static char ID;
|
static char ID;
|
||||||
unsigned VecRegSize;
|
|
||||||
|
|
||||||
LoadStoreVectorizer(unsigned VecRegSize = 128) : FunctionPass(ID),
|
LoadStoreVectorizer() : FunctionPass(ID) {
|
||||||
VecRegSize(VecRegSize) {
|
|
||||||
initializeLoadStoreVectorizerPass(*PassRegistry::getPassRegistry());
|
initializeLoadStoreVectorizerPass(*PassRegistry::getPassRegistry());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -133,6 +132,7 @@ public:
|
||||||
AU.addRequired<AAResultsWrapperPass>();
|
AU.addRequired<AAResultsWrapperPass>();
|
||||||
AU.addRequired<ScalarEvolutionWrapperPass>();
|
AU.addRequired<ScalarEvolutionWrapperPass>();
|
||||||
AU.addRequired<DominatorTreeWrapperPass>();
|
AU.addRequired<DominatorTreeWrapperPass>();
|
||||||
|
AU.addRequired<TargetTransformInfoWrapperPass>();
|
||||||
AU.setPreservesCFG();
|
AU.setPreservesCFG();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -144,13 +144,14 @@ INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
|
||||||
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
|
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
|
||||||
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
|
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
|
||||||
INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
|
INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
|
||||||
|
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
|
||||||
INITIALIZE_PASS_END(LoadStoreVectorizer, DEBUG_TYPE,
|
INITIALIZE_PASS_END(LoadStoreVectorizer, DEBUG_TYPE,
|
||||||
"Vectorize load and store instructions", false, false);
|
"Vectorize load and store instructions", false, false);
|
||||||
|
|
||||||
char LoadStoreVectorizer::ID = 0;
|
char LoadStoreVectorizer::ID = 0;
|
||||||
|
|
||||||
Pass *llvm::createLoadStoreVectorizerPass(unsigned VecRegSize) {
|
Pass *llvm::createLoadStoreVectorizerPass() {
|
||||||
return new LoadStoreVectorizer(VecRegSize);
|
return new LoadStoreVectorizer();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool LoadStoreVectorizer::runOnFunction(Function &F) {
|
bool LoadStoreVectorizer::runOnFunction(Function &F) {
|
||||||
|
@ -161,8 +162,10 @@ bool LoadStoreVectorizer::runOnFunction(Function &F) {
|
||||||
AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
|
AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
|
||||||
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
|
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
|
||||||
ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
|
ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
|
||||||
|
TargetTransformInfo &TTI
|
||||||
|
= getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
|
||||||
|
|
||||||
Vectorizer V(F, AA, DT, SE, VecRegSize);
|
Vectorizer V(F, AA, DT, SE, TTI);
|
||||||
return V.run();
|
return V.run();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -440,6 +443,10 @@ void Vectorizer::collectInstructions(BasicBlock *BB) {
|
||||||
if (TySize < 8)
|
if (TySize < 8)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
Value *Ptr = LI->getPointerOperand();
|
||||||
|
unsigned AS = Ptr->getType()->getPointerAddressSpace();
|
||||||
|
unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
|
||||||
|
|
||||||
// No point in looking at these if they're too big to vectorize.
|
// No point in looking at these if they're too big to vectorize.
|
||||||
if (TySize > VecRegSize / 2)
|
if (TySize > VecRegSize / 2)
|
||||||
continue;
|
continue;
|
||||||
|
@ -456,8 +463,8 @@ void Vectorizer::collectInstructions(BasicBlock *BB) {
|
||||||
// TODO: Target hook to filter types.
|
// TODO: Target hook to filter types.
|
||||||
|
|
||||||
// Save the load locations.
|
// Save the load locations.
|
||||||
Value *Ptr = GetUnderlyingObject(LI->getPointerOperand(), DL);
|
Value *ObjPtr = GetUnderlyingObject(Ptr, DL);
|
||||||
LoadRefs[Ptr].push_back(LI);
|
LoadRefs[ObjPtr].push_back(LI);
|
||||||
|
|
||||||
} else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
|
} else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
|
||||||
if (!SI->isSimple())
|
if (!SI->isSimple())
|
||||||
|
@ -473,6 +480,9 @@ void Vectorizer::collectInstructions(BasicBlock *BB) {
|
||||||
if (TySize < 8)
|
if (TySize < 8)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
Value *Ptr = SI->getPointerOperand();
|
||||||
|
unsigned AS = Ptr->getType()->getPointerAddressSpace();
|
||||||
|
unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
|
||||||
if (TySize > VecRegSize / 2)
|
if (TySize > VecRegSize / 2)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
@ -485,8 +495,8 @@ void Vectorizer::collectInstructions(BasicBlock *BB) {
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
// Save store location.
|
// Save store location.
|
||||||
Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), DL);
|
Value *ObjPtr = GetUnderlyingObject(Ptr, DL);
|
||||||
StoreRefs[Ptr].push_back(SI);
|
StoreRefs[ObjPtr].push_back(SI);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -592,6 +602,8 @@ bool Vectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain) {
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned Sz = DL.getTypeSizeInBits(StoreTy);
|
unsigned Sz = DL.getTypeSizeInBits(StoreTy);
|
||||||
|
unsigned AS = S0->getPointerAddressSpace();
|
||||||
|
unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
|
||||||
unsigned VF = VecRegSize / Sz;
|
unsigned VF = VecRegSize / Sz;
|
||||||
unsigned ChainSize = Chain.size();
|
unsigned ChainSize = Chain.size();
|
||||||
|
|
||||||
|
@ -664,7 +676,6 @@ bool Vectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain) {
|
||||||
|
|
||||||
// Set insert point.
|
// Set insert point.
|
||||||
Builder.SetInsertPoint(&*Last);
|
Builder.SetInsertPoint(&*Last);
|
||||||
unsigned AS = S0->getPointerAddressSpace();
|
|
||||||
|
|
||||||
Value *Vec = UndefValue::get(VecTy);
|
Value *Vec = UndefValue::get(VecTy);
|
||||||
|
|
||||||
|
@ -728,6 +739,8 @@ bool Vectorizer::vectorizeLoadChain(ArrayRef<Value *> Chain) {
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned Sz = DL.getTypeSizeInBits(LoadTy);
|
unsigned Sz = DL.getTypeSizeInBits(LoadTy);
|
||||||
|
unsigned AS = L0->getPointerAddressSpace();
|
||||||
|
unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
|
||||||
unsigned VF = VecRegSize / Sz;
|
unsigned VF = VecRegSize / Sz;
|
||||||
unsigned ChainSize = Chain.size();
|
unsigned ChainSize = Chain.size();
|
||||||
|
|
||||||
|
@ -798,7 +811,6 @@ bool Vectorizer::vectorizeLoadChain(ArrayRef<Value *> Chain) {
|
||||||
// Set insert point.
|
// Set insert point.
|
||||||
Builder.SetInsertPoint(&*Last);
|
Builder.SetInsertPoint(&*Last);
|
||||||
|
|
||||||
unsigned AS = L0->getPointerAddressSpace();
|
|
||||||
Value *Bitcast =
|
Value *Bitcast =
|
||||||
Builder.CreateBitCast(L0->getPointerOperand(), VecTy->getPointerTo(AS));
|
Builder.CreateBitCast(L0->getPointerOperand(), VecTy->getPointerTo(AS));
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,51 @@
|
||||||
|
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mattr=+max-private-element-size-4 -load-store-vectorizer -S -o - %s | FileCheck -check-prefix=ELT4 -check-prefix=ALL %s
|
||||||
|
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mattr=+max-private-element-size-8 -load-store-vectorizer -S -o - %s | FileCheck -check-prefix=ELT8 -check-prefix=ALL %s
|
||||||
|
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mattr=+max-private-element-size-16 -load-store-vectorizer -S -o - %s | FileCheck -check-prefix=ELT16 -check-prefix=ALL %s
|
||||||
|
|
||||||
|
; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v4i32
|
||||||
|
; ELT4: store i32
|
||||||
|
; ELT4: store i32
|
||||||
|
; ELT4: store i32
|
||||||
|
; ELT4: store i32
|
||||||
|
|
||||||
|
; ELT8: store <2 x i32>
|
||||||
|
; ELT8: store <2 x i32>
|
||||||
|
|
||||||
|
; ELT16: store <4 x i32>
|
||||||
|
define void @merge_private_store_4_vector_elts_loads_v4i32(i32* %out) #0 {
|
||||||
|
%out.gep.1 = getelementptr i32, i32* %out, i32 1
|
||||||
|
%out.gep.2 = getelementptr i32, i32* %out, i32 2
|
||||||
|
%out.gep.3 = getelementptr i32, i32* %out, i32 3
|
||||||
|
|
||||||
|
store i32 9, i32* %out
|
||||||
|
store i32 1, i32* %out.gep.1
|
||||||
|
store i32 23, i32* %out.gep.2
|
||||||
|
store i32 19, i32* %out.gep.3
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v4i8(
|
||||||
|
; ALL: store <4 x i8>
|
||||||
|
define void @merge_private_store_4_vector_elts_loads_v4i8(i8* %out) #0 {
|
||||||
|
%out.gep.1 = getelementptr i8, i8* %out, i32 1
|
||||||
|
%out.gep.2 = getelementptr i8, i8* %out, i32 2
|
||||||
|
%out.gep.3 = getelementptr i8, i8* %out, i32 3
|
||||||
|
|
||||||
|
store i8 9, i8* %out
|
||||||
|
store i8 1, i8* %out.gep.1
|
||||||
|
store i8 23, i8* %out.gep.2
|
||||||
|
store i8 19, i8* %out.gep.3
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v2i16(
|
||||||
|
; ALL: store <2 x i16>
|
||||||
|
define void @merge_private_store_4_vector_elts_loads_v2i16(i16* %out) #0 {
|
||||||
|
%out.gep.1 = getelementptr i16, i16* %out, i32 1
|
||||||
|
|
||||||
|
store i16 9, i16* %out
|
||||||
|
store i16 12, i16* %out.gep.1
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
attributes #0 = { nounwind }
|
|
@ -502,7 +502,8 @@ define void @merge_local_store_2_constants_i32_align_2(i32 addrspace(3)* %out) #
|
||||||
}
|
}
|
||||||
|
|
||||||
; CHECK-LABEL: @merge_local_store_4_constants_i32
|
; CHECK-LABEL: @merge_local_store_4_constants_i32
|
||||||
; CHECK: store <4 x i32> <i32 1234, i32 123, i32 456, i32 333>, <4 x i32> addrspace(3)*
|
; CHECK: store <2 x i32> <i32 456, i32 333>, <2 x i32> addrspace(3)*
|
||||||
|
; CHECK: store <2 x i32> <i32 1234, i32 123>, <2 x i32> addrspace(3)*
|
||||||
define void @merge_local_store_4_constants_i32(i32 addrspace(3)* %out) #0 {
|
define void @merge_local_store_4_constants_i32(i32 addrspace(3)* %out) #0 {
|
||||||
%out.gep.1 = getelementptr i32, i32 addrspace(3)* %out, i32 1
|
%out.gep.1 = getelementptr i32, i32 addrspace(3)* %out, i32 1
|
||||||
%out.gep.2 = getelementptr i32, i32 addrspace(3)* %out, i32 2
|
%out.gep.2 = getelementptr i32, i32 addrspace(3)* %out, i32 2
|
||||||
|
|
Loading…
Reference in New Issue