[AMDGPU] Fixed function traversal in attribute propagation

AMDGPUPropagateAttributes pass was skipping some of the functions
when cloning. Functions were added to root set and then skipped
on the next interation because they are already in the root set,
while were meant to be processed with different features.

Differential Revision: https://reviews.llvm.org/D76815
This commit is contained in:
Stanislav Mekhanoshin 2020-03-25 17:34:49 -07:00
parent 93f7743851
commit e06d707aa2
2 changed files with 47 additions and 5 deletions

View File

@ -192,12 +192,13 @@ bool AMDGPUPropagateAttributes::process() {
NewRoots.clear();
for (auto &F : M.functions()) {
if (F.isDeclaration() || Roots.count(&F) || Roots.count(&F))
if (F.isDeclaration())
continue;
const FeatureBitset &CalleeBits =
TM->getSubtargetImpl(F)->getFeatureBits();
SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace;
SmallSet<CallBase *, 32> Visited;
for (User *U : F.users()) {
Instruction *I = dyn_cast<Instruction>(U);
@ -207,16 +208,17 @@ bool AMDGPUPropagateAttributes::process() {
if (!CI)
continue;
Function *Caller = CI->getCaller();
if (!Caller)
if (!Caller || !Visited.insert(CI).second)
continue;
if (!Roots.count(Caller))
if (!Roots.count(Caller) && !NewRoots.count(Caller))
continue;
const FeatureBitset &CallerBits =
TM->getSubtargetImpl(*Caller)->getFeatureBits() & TargetFeatures;
if (CallerBits == (CalleeBits & TargetFeatures)) {
NewRoots.insert(&F);
if (!Roots.count(&F))
NewRoots.insert(&F);
continue;
}
@ -258,6 +260,9 @@ bool AMDGPUPropagateAttributes::process() {
F->eraseFromParent();
}
Roots.clear();
Clones.clear();
return Changed;
}

View File

@ -7,17 +7,54 @@
; OPT-EXT: define void @foo3() local_unnamed_addr #1
; OPT-INT: define internal fastcc void @foo3.2() unnamed_addr #1
; OPT-EXT: define void @foo2() local_unnamed_addr #1
; OPT-INT: define internal fastcc void @foo2() unnamed_addr #1
; OPT-INT: define internal fastcc void @foo2.3() unnamed_addr #1
; OPT-EXT: define void @foo1() local_unnamed_addr #1
; OPT-EXT: tail call void @foo4()
; OPT-EXT: tail call void @foo3()
; OPT-EXT: tail call void @foo2()
; OPT-EXT: tail call void @foo2()
; OPT-EXT: tail call void @foo1()
; OPT-EXT: tail call fastcc void @0()
; OPT-INT: define internal fastcc void @foo1.1() unnamed_addr #1
; OPT-INT: tail call void @foo4()
; OPT-INT: tail call fastcc void @foo3.2()
; OPT-INT: tail call fastcc void @foo2.3()
; OPT-INT: tail call fastcc void @foo2.3()
; OPT-INT: tail call fastcc void @foo1.1()
; OPT-INT: tail call fastcc void @0()
; OPT: ret void
; OPT: define amdgpu_kernel void @kernel1() local_unnamed_addr #2
; OPT-EXT: tail call fastcc void @foo1.1()
; OPT-INT: tail call fastcc void @foo1()
; OPT: ret void
; OPT: define amdgpu_kernel void @kernel2() local_unnamed_addr #3
; OPT-EXT: tail call void @foo2()
; OPT-INT: tail call fastcc void @foo2.3()
; OPT: ret void
; OPT: define amdgpu_kernel void @kernel3() local_unnamed_addr #3
; OPT-EXT: tail call void @foo1()
; OPT-INT: tail call fastcc void @foo1.1()
; OPT: ret void
; OPT-EXT: define internal fastcc void @foo1.1() unnamed_addr #4
; OPT-EXT: tail call void @foo4()
; OPT-EXT: tail call fastcc void @foo3.2()
; OPT-EXT: tail call fastcc void @foo2.3()
; OPT-EXT: tail call fastcc void @foo2.3()
; OPT-EXT: tail call fastcc void @foo1.1()
; OPT-EXT: tail call fastcc void @1()
; OPT-INT: define internal fastcc void @foo1() unnamed_addr #4
; OPT-INT: tail call void @foo4()
; OPT-INT: tail call fastcc void @foo3()
; OPT-INT: tail call fastcc void @foo2()
; OPT-INT: tail call fastcc void @foo2()
; OPT-INT: tail call fastcc void @foo1()
; OPT-INT: tail call fastcc void @1()
; OPT: ret void
; OPT: define internal fastcc void @1() unnamed_addr #4
; OPT-EXT: define internal fastcc void @foo3.2() unnamed_addr #4
; OPT-INT: define internal fastcc void @foo3() unnamed_addr #4
; OPT-EXT: define internal fastcc void @foo2.3() unnamed_addr #4
; OPT-INT: define internal fastcc void @foo2() unnamed_addr #4
; OPT: attributes #0 = { {{.*}} "target-features"="+wavefrontsize64" }
; OPT: attributes #1 = { {{.*}} "target-features"="{{.*}},-wavefrontsize16,-wavefrontsize32,+wavefrontsize64{{.*}}" }
; OPT: attributes #2 = { {{.*}} "target-features"="+wavefrontsize32" }