forked from OSchip/llvm-project
[AMDGPU] Fixed function traversal in attribute propagation
AMDGPUPropagateAttributes pass was skipping some of the functions when cloning. Functions were added to root set and then skipped on the next interation because they are already in the root set, while were meant to be processed with different features. Differential Revision: https://reviews.llvm.org/D76815
This commit is contained in:
parent
93f7743851
commit
e06d707aa2
|
@ -192,12 +192,13 @@ bool AMDGPUPropagateAttributes::process() {
|
|||
NewRoots.clear();
|
||||
|
||||
for (auto &F : M.functions()) {
|
||||
if (F.isDeclaration() || Roots.count(&F) || Roots.count(&F))
|
||||
if (F.isDeclaration())
|
||||
continue;
|
||||
|
||||
const FeatureBitset &CalleeBits =
|
||||
TM->getSubtargetImpl(F)->getFeatureBits();
|
||||
SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace;
|
||||
SmallSet<CallBase *, 32> Visited;
|
||||
|
||||
for (User *U : F.users()) {
|
||||
Instruction *I = dyn_cast<Instruction>(U);
|
||||
|
@ -207,16 +208,17 @@ bool AMDGPUPropagateAttributes::process() {
|
|||
if (!CI)
|
||||
continue;
|
||||
Function *Caller = CI->getCaller();
|
||||
if (!Caller)
|
||||
if (!Caller || !Visited.insert(CI).second)
|
||||
continue;
|
||||
if (!Roots.count(Caller))
|
||||
if (!Roots.count(Caller) && !NewRoots.count(Caller))
|
||||
continue;
|
||||
|
||||
const FeatureBitset &CallerBits =
|
||||
TM->getSubtargetImpl(*Caller)->getFeatureBits() & TargetFeatures;
|
||||
|
||||
if (CallerBits == (CalleeBits & TargetFeatures)) {
|
||||
NewRoots.insert(&F);
|
||||
if (!Roots.count(&F))
|
||||
NewRoots.insert(&F);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -258,6 +260,9 @@ bool AMDGPUPropagateAttributes::process() {
|
|||
F->eraseFromParent();
|
||||
}
|
||||
|
||||
Roots.clear();
|
||||
Clones.clear();
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
||||
|
|
|
@ -7,17 +7,54 @@
|
|||
; OPT-EXT: define void @foo3() local_unnamed_addr #1
|
||||
; OPT-INT: define internal fastcc void @foo3.2() unnamed_addr #1
|
||||
; OPT-EXT: define void @foo2() local_unnamed_addr #1
|
||||
; OPT-INT: define internal fastcc void @foo2() unnamed_addr #1
|
||||
; OPT-INT: define internal fastcc void @foo2.3() unnamed_addr #1
|
||||
; OPT-EXT: define void @foo1() local_unnamed_addr #1
|
||||
; OPT-EXT: tail call void @foo4()
|
||||
; OPT-EXT: tail call void @foo3()
|
||||
; OPT-EXT: tail call void @foo2()
|
||||
; OPT-EXT: tail call void @foo2()
|
||||
; OPT-EXT: tail call void @foo1()
|
||||
; OPT-EXT: tail call fastcc void @0()
|
||||
; OPT-INT: define internal fastcc void @foo1.1() unnamed_addr #1
|
||||
; OPT-INT: tail call void @foo4()
|
||||
; OPT-INT: tail call fastcc void @foo3.2()
|
||||
; OPT-INT: tail call fastcc void @foo2.3()
|
||||
; OPT-INT: tail call fastcc void @foo2.3()
|
||||
; OPT-INT: tail call fastcc void @foo1.1()
|
||||
; OPT-INT: tail call fastcc void @0()
|
||||
; OPT: ret void
|
||||
; OPT: define amdgpu_kernel void @kernel1() local_unnamed_addr #2
|
||||
; OPT-EXT: tail call fastcc void @foo1.1()
|
||||
; OPT-INT: tail call fastcc void @foo1()
|
||||
; OPT: ret void
|
||||
; OPT: define amdgpu_kernel void @kernel2() local_unnamed_addr #3
|
||||
; OPT-EXT: tail call void @foo2()
|
||||
; OPT-INT: tail call fastcc void @foo2.3()
|
||||
; OPT: ret void
|
||||
; OPT: define amdgpu_kernel void @kernel3() local_unnamed_addr #3
|
||||
; OPT-EXT: tail call void @foo1()
|
||||
; OPT-INT: tail call fastcc void @foo1.1()
|
||||
; OPT: ret void
|
||||
; OPT-EXT: define internal fastcc void @foo1.1() unnamed_addr #4
|
||||
; OPT-EXT: tail call void @foo4()
|
||||
; OPT-EXT: tail call fastcc void @foo3.2()
|
||||
; OPT-EXT: tail call fastcc void @foo2.3()
|
||||
; OPT-EXT: tail call fastcc void @foo2.3()
|
||||
; OPT-EXT: tail call fastcc void @foo1.1()
|
||||
; OPT-EXT: tail call fastcc void @1()
|
||||
; OPT-INT: define internal fastcc void @foo1() unnamed_addr #4
|
||||
; OPT-INT: tail call void @foo4()
|
||||
; OPT-INT: tail call fastcc void @foo3()
|
||||
; OPT-INT: tail call fastcc void @foo2()
|
||||
; OPT-INT: tail call fastcc void @foo2()
|
||||
; OPT-INT: tail call fastcc void @foo1()
|
||||
; OPT-INT: tail call fastcc void @1()
|
||||
; OPT: ret void
|
||||
; OPT: define internal fastcc void @1() unnamed_addr #4
|
||||
; OPT-EXT: define internal fastcc void @foo3.2() unnamed_addr #4
|
||||
; OPT-INT: define internal fastcc void @foo3() unnamed_addr #4
|
||||
; OPT-EXT: define internal fastcc void @foo2.3() unnamed_addr #4
|
||||
; OPT-INT: define internal fastcc void @foo2() unnamed_addr #4
|
||||
; OPT: attributes #0 = { {{.*}} "target-features"="+wavefrontsize64" }
|
||||
; OPT: attributes #1 = { {{.*}} "target-features"="{{.*}},-wavefrontsize16,-wavefrontsize32,+wavefrontsize64{{.*}}" }
|
||||
; OPT: attributes #2 = { {{.*}} "target-features"="+wavefrontsize32" }
|
||||
|
|
Loading…
Reference in New Issue