From aafff87ddaa174a3981302407865977c338a9bce Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 5 Oct 2017 00:13:17 +0000 Subject: [PATCH] AMDGPU: Do not fold clamp instructions when sources are different Patch by hakzsam (Samuel Pitoiset) llvm-svn: 314951 --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 1 + llvm/test/CodeGen/AMDGPU/clamp.ll | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 9fc38aeefaa2..a527afb25da0 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -737,6 +737,7 @@ const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const { const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); if (!Src0->isReg() || !Src1->isReg() || + Src0->getReg() != Src1->getReg() || Src0->getSubReg() != Src1->getSubReg() || Src0->getSubReg() != AMDGPU::NoSubRegister) return nullptr; diff --git a/llvm/test/CodeGen/AMDGPU/clamp.ll b/llvm/test/CodeGen/AMDGPU/clamp.ll index 2d6b4f3c0c57..216ecf763456 100644 --- a/llvm/test/CodeGen/AMDGPU/clamp.ll +++ b/llvm/test/CodeGen/AMDGPU/clamp.ll @@ -663,6 +663,28 @@ define amdgpu_kernel void @v_clamp_v2f16_shuffle(<2 x half> addrspace(1)* %out, ret void } +; GCN-LABEL: {{^}}v_clamp_diff_source_f32: +; GCN: v_add_f32_e32 [[A:v[0-9]+]] +; GCN: v_add_f32_e32 [[B:v[0-9]+]] +; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[B]] clamp{{$}} +define amdgpu_kernel void @v_clamp_diff_source_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 +{ + %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 0 + %gep1 = getelementptr float, float addrspace(1)* %aptr, i32 1 + %gep2 = getelementptr float, float addrspace(1)* %aptr, i32 2 + %l0 = load float, float addrspace(1)* %gep0 + %l1 = load float, float addrspace(1)* %gep1 + %l2 = load float, float addrspace(1)* %gep2 + %a = fadd nsz float %l0, %l1 + %b = fadd nsz float %l0, %l2 + %res = call nsz float @llvm.maxnum.f32(float %a, float %b) + %max = call nsz float @llvm.maxnum.f32(float %res, float 0.0) + %min = call nsz float @llvm.minnum.f32(float %max, float 1.0) + %out.gep = getelementptr float, float addrspace(1)* %out, i32 3 + store float %min, float addrspace(1)* %out.gep + ret void +} + declare i32 @llvm.amdgcn.workitem.id.x() #1 declare float @llvm.fabs.f32(float) #1 declare float @llvm.minnum.f32(float, float) #1