From e06d707aa2ae4582b26f8b4839f07574cc67deee Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Wed, 25 Mar 2020 17:34:49 -0700 Subject: [PATCH] [AMDGPU] Fixed function traversal in attribute propagation AMDGPUPropagateAttributes pass was skipping some of the functions when cloning. Functions were added to root set and then skipped on the next interation because they are already in the root set, while were meant to be processed with different features. Differential Revision: https://reviews.llvm.org/D76815 --- .../AMDGPU/AMDGPUPropagateAttributes.cpp | 13 +++++-- .../AMDGPU/propagate-attributes-clone.ll | 39 ++++++++++++++++++- 2 files changed, 47 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp index 154a4eae8ce2..0ad4eebcf3f9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp @@ -192,12 +192,13 @@ bool AMDGPUPropagateAttributes::process() { NewRoots.clear(); for (auto &F : M.functions()) { - if (F.isDeclaration() || Roots.count(&F) || Roots.count(&F)) + if (F.isDeclaration()) continue; const FeatureBitset &CalleeBits = TM->getSubtargetImpl(F)->getFeatureBits(); SmallVector, 32> ToReplace; + SmallSet Visited; for (User *U : F.users()) { Instruction *I = dyn_cast(U); @@ -207,16 +208,17 @@ bool AMDGPUPropagateAttributes::process() { if (!CI) continue; Function *Caller = CI->getCaller(); - if (!Caller) + if (!Caller || !Visited.insert(CI).second) continue; - if (!Roots.count(Caller)) + if (!Roots.count(Caller) && !NewRoots.count(Caller)) continue; const FeatureBitset &CallerBits = TM->getSubtargetImpl(*Caller)->getFeatureBits() & TargetFeatures; if (CallerBits == (CalleeBits & TargetFeatures)) { - NewRoots.insert(&F); + if (!Roots.count(&F)) + NewRoots.insert(&F); continue; } @@ -258,6 +260,9 @@ bool AMDGPUPropagateAttributes::process() { F->eraseFromParent(); } + Roots.clear(); + Clones.clear(); + return Changed; } diff --git a/llvm/test/CodeGen/AMDGPU/propagate-attributes-clone.ll b/llvm/test/CodeGen/AMDGPU/propagate-attributes-clone.ll index b55a87de4be2..580fb31bc202 100644 --- a/llvm/test/CodeGen/AMDGPU/propagate-attributes-clone.ll +++ b/llvm/test/CodeGen/AMDGPU/propagate-attributes-clone.ll @@ -7,17 +7,54 @@ ; OPT-EXT: define void @foo3() local_unnamed_addr #1 ; OPT-INT: define internal fastcc void @foo3.2() unnamed_addr #1 ; OPT-EXT: define void @foo2() local_unnamed_addr #1 -; OPT-INT: define internal fastcc void @foo2() unnamed_addr #1 +; OPT-INT: define internal fastcc void @foo2.3() unnamed_addr #1 ; OPT-EXT: define void @foo1() local_unnamed_addr #1 +; OPT-EXT: tail call void @foo4() +; OPT-EXT: tail call void @foo3() +; OPT-EXT: tail call void @foo2() +; OPT-EXT: tail call void @foo2() +; OPT-EXT: tail call void @foo1() +; OPT-EXT: tail call fastcc void @0() ; OPT-INT: define internal fastcc void @foo1.1() unnamed_addr #1 +; OPT-INT: tail call void @foo4() +; OPT-INT: tail call fastcc void @foo3.2() +; OPT-INT: tail call fastcc void @foo2.3() +; OPT-INT: tail call fastcc void @foo2.3() +; OPT-INT: tail call fastcc void @foo1.1() +; OPT-INT: tail call fastcc void @0() +; OPT: ret void ; OPT: define amdgpu_kernel void @kernel1() local_unnamed_addr #2 +; OPT-EXT: tail call fastcc void @foo1.1() +; OPT-INT: tail call fastcc void @foo1() +; OPT: ret void ; OPT: define amdgpu_kernel void @kernel2() local_unnamed_addr #3 +; OPT-EXT: tail call void @foo2() +; OPT-INT: tail call fastcc void @foo2.3() +; OPT: ret void ; OPT: define amdgpu_kernel void @kernel3() local_unnamed_addr #3 +; OPT-EXT: tail call void @foo1() +; OPT-INT: tail call fastcc void @foo1.1() +; OPT: ret void ; OPT-EXT: define internal fastcc void @foo1.1() unnamed_addr #4 +; OPT-EXT: tail call void @foo4() +; OPT-EXT: tail call fastcc void @foo3.2() +; OPT-EXT: tail call fastcc void @foo2.3() +; OPT-EXT: tail call fastcc void @foo2.3() +; OPT-EXT: tail call fastcc void @foo1.1() +; OPT-EXT: tail call fastcc void @1() ; OPT-INT: define internal fastcc void @foo1() unnamed_addr #4 +; OPT-INT: tail call void @foo4() +; OPT-INT: tail call fastcc void @foo3() +; OPT-INT: tail call fastcc void @foo2() +; OPT-INT: tail call fastcc void @foo2() +; OPT-INT: tail call fastcc void @foo1() +; OPT-INT: tail call fastcc void @1() +; OPT: ret void ; OPT: define internal fastcc void @1() unnamed_addr #4 ; OPT-EXT: define internal fastcc void @foo3.2() unnamed_addr #4 ; OPT-INT: define internal fastcc void @foo3() unnamed_addr #4 +; OPT-EXT: define internal fastcc void @foo2.3() unnamed_addr #4 +; OPT-INT: define internal fastcc void @foo2() unnamed_addr #4 ; OPT: attributes #0 = { {{.*}} "target-features"="+wavefrontsize64" } ; OPT: attributes #1 = { {{.*}} "target-features"="{{.*}},-wavefrontsize16,-wavefrontsize32,+wavefrontsize64{{.*}}" } ; OPT: attributes #2 = { {{.*}} "target-features"="+wavefrontsize32" }