diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 514842b50647..b15ef767f0ed 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -710,6 +710,12 @@ bool GCNTTIImpl::isAlwaysUniform(const Value *V) const { } } + if (const CallInst *CI = dyn_cast(V)) { + if (isa(CI->getCalledValue())) + return !isInlineAsmSourceOfDivergence(CI); + return false; + } + const ExtractValueInst *ExtValue = dyn_cast(V); if (!ExtValue) return false; diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/always_uniform.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/always_uniform.ll index b5e8e49c6876..2720028e5af4 100644 --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/always_uniform.ll +++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/always_uniform.ll @@ -1,6 +1,6 @@ ; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s -; CHECK: for function 'readfirstlane': +; CHECK-LABEL: for function 'readfirstlane': define amdgpu_kernel void @readfirstlane() { %id.x = call i32 @llvm.amdgcn.workitem.id.x() ; CHECK: DIVERGENT: %id.x = call i32 @llvm.amdgcn.workitem.id.x() @@ -9,20 +9,42 @@ define amdgpu_kernel void @readfirstlane() { ret void } -; CHECK: for function 'icmp': +; CHECK-LABEL: for function 'icmp': define amdgpu_kernel void @icmp(i32 inreg %x) { ; CHECK-NOT: DIVERGENT: %icmp = call i64 @llvm.amdgcn.icmp.i32 %icmp = call i64 @llvm.amdgcn.icmp.i32(i32 %x, i32 0, i32 33) ret void } -; CHECK: for function 'fcmp': +; CHECK-LABEL: for function 'fcmp': define amdgpu_kernel void @fcmp(float inreg %x, float inreg %y) { ; CHECK-NOT: DIVERGENT: %fcmp = call i64 @llvm.amdgcn.fcmp.i32 %fcmp = call i64 @llvm.amdgcn.fcmp.i32(float %x, float %y, i32 33) ret void } +; SGPR asm outputs are uniform regardless of the input operands. +; CHECK-LABEL: for function 'asm_sgpr': +; CHECK: DIVERGENT: i32 %divergent +; CHECK-NOT: DIVERGENT +define i32 @asm_sgpr(i32 %divergent) { + %sgpr = call i32 asm "; def $0, $1","=s,v"(i32 %divergent) + ret i32 %sgpr +} + +; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'asm_mixed_sgpr_vgpr': +; CHECK: DIVERGENT: %asm = call { i32, i32 } asm "; def $0, $1, $2", "=s,=v,v"(i32 %divergent) +; CHECK-NEXT: {{^[ \t]+}}%sgpr = extractvalue { i32, i32 } %asm, 0 +; CHECK-NEXT: DIVERGENT: %vgpr = extractvalue { i32, i32 } %asm, 1 +define void @asm_mixed_sgpr_vgpr(i32 %divergent) { + %asm = call { i32, i32 } asm "; def $0, $1, $2","=s,=v,v"(i32 %divergent) + %sgpr = extractvalue { i32, i32 } %asm, 0 + %vgpr = extractvalue { i32, i32 } %asm, 1 + store i32 %sgpr, i32 addrspace(1)* undef + store i32 %vgpr, i32 addrspace(1)* undef + ret void +} + declare i32 @llvm.amdgcn.workitem.id.x() #0 declare i32 @llvm.amdgcn.readfirstlane(i32) #0 declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) #1