forked from OSchip/llvm-project
AMDGPU: Do not fold clamp instructions when sources are different
Patch by hakzsam (Samuel Pitoiset) llvm-svn: 314951
This commit is contained in:
parent
89937534b2
commit
aafff87dda
|
@ -737,6 +737,7 @@ const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const {
|
|||
const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
|
||||
const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
|
||||
if (!Src0->isReg() || !Src1->isReg() ||
|
||||
Src0->getReg() != Src1->getReg() ||
|
||||
Src0->getSubReg() != Src1->getSubReg() ||
|
||||
Src0->getSubReg() != AMDGPU::NoSubRegister)
|
||||
return nullptr;
|
||||
|
|
|
@ -663,6 +663,28 @@ define amdgpu_kernel void @v_clamp_v2f16_shuffle(<2 x half> addrspace(1)* %out,
|
|||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_clamp_diff_source_f32:
|
||||
; GCN: v_add_f32_e32 [[A:v[0-9]+]]
|
||||
; GCN: v_add_f32_e32 [[B:v[0-9]+]]
|
||||
; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[B]] clamp{{$}}
|
||||
define amdgpu_kernel void @v_clamp_diff_source_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0
|
||||
{
|
||||
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 0
|
||||
%gep1 = getelementptr float, float addrspace(1)* %aptr, i32 1
|
||||
%gep2 = getelementptr float, float addrspace(1)* %aptr, i32 2
|
||||
%l0 = load float, float addrspace(1)* %gep0
|
||||
%l1 = load float, float addrspace(1)* %gep1
|
||||
%l2 = load float, float addrspace(1)* %gep2
|
||||
%a = fadd nsz float %l0, %l1
|
||||
%b = fadd nsz float %l0, %l2
|
||||
%res = call nsz float @llvm.maxnum.f32(float %a, float %b)
|
||||
%max = call nsz float @llvm.maxnum.f32(float %res, float 0.0)
|
||||
%min = call nsz float @llvm.minnum.f32(float %max, float 1.0)
|
||||
%out.gep = getelementptr float, float addrspace(1)* %out, i32 3
|
||||
store float %min, float addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
declare float @llvm.fabs.f32(float) #1
|
||||
declare float @llvm.minnum.f32(float, float) #1
|
||||
|
|
Loading…
Reference in New Issue