diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 1ed6dd963fa5..d98c2a272c90 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -6755,7 +6755,7 @@ SDValue SITargetLowering::performAddCombine(SDNode *N, return SDValue(); } - if (VT != MVT::i32) + if (VT != MVT::i32 || !DCI.isAfterLegalizeDAG()) return SDValue(); // add x, zext (setcc) => addcarry x, 0, setcc diff --git a/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll b/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll index f94b2fa3aa33..5b3ad15fb1a9 100644 --- a/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll +++ b/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll @@ -1,10 +1,13 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=gfx902 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s ; GCN-LABEL: {{^}}add1: ; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}} ; GCN: v_addc_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, 0, v{{[0-9]+}}, [[CC]] ; GCN-NOT: v_cndmask +; GFX9-LABEL: {{^}}add1: +; GFX9: v_addc_co_u32_e{{32|64}} v{{[0-9]+}}, vcc define amdgpu_kernel void @add1(i32 addrspace(1)* nocapture %arg) { bb: %x = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -18,11 +21,33 @@ bb: ret void } +; GCN-LABEL: {{^}}add1_i16: +; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}} +; GCN: v_addc_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, 0, v{{[0-9]+}}, [[CC]] +; GCN-NOT: v_cndmask + +; GFX9-LABEL: {{^}}add1_i16: +; GFX9: v_addc_co_u32_e{{32|64}} v{{[0-9]+}}, vcc +define i16 @add1_i16(i32 addrspace(1)* nocapture %arg, i16 addrspace(1)* nocapture %dst) { +bb: + %x = tail call i32 @llvm.amdgcn.workitem.id.x() + %y = tail call i32 @llvm.amdgcn.workitem.id.y() + %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x + %v = load i32, i32 addrspace(1)* %gep, align 4 + %cmp = icmp ugt i32 %x, %y + %ext = zext i1 %cmp to i32 + %add = add i32 %v, %ext + %trunc = trunc i32 %add to i16 + ret i16 %trunc +} + ; GCN-LABEL: {{^}}sub1: ; GCN: v_cmp_gt_u32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}} ; GCN: v_subbrev_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc ; GCN-NOT: v_cndmask +; GFX9-LABEL: {{^}}sub1: +; GFX9: v_subbrev_co_u32_e{{32|64}} v{{[0-9]+}}, vcc define amdgpu_kernel void @sub1(i32 addrspace(1)* nocapture %arg) { bb: %x = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -42,6 +67,8 @@ bb: ; GCN-NOT: v_cndmask ; GCN-NOT: v_add +; GFX9-LABEL: {{^}}add_adde: +; GFX9: v_addc_co_u32_e{{32|64}} v{{[0-9]+}}, vcc define amdgpu_kernel void @add_adde(i32 addrspace(1)* nocapture %arg, i32 %a) { bb: %x = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -62,6 +89,8 @@ bb: ; GCN-NOT: v_cndmask ; GCN-NOT: v_add +; GFX9-LABEL: {{^}}adde_add: +; GFX9: v_addc_co_u32_e{{32|64}} v{{[0-9]+}}, vcc define amdgpu_kernel void @adde_add(i32 addrspace(1)* nocapture %arg, i32 %a) { bb: %x = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -82,6 +111,8 @@ bb: ; GCN-NOT: v_cndmask ; GCN-NOT: v_sub +; GFX9-LABEL: {{^}}sub_sube: +; GFX9: v_subb_co_u32_e{{32|64}} v{{[0-9]+}}, vcc define amdgpu_kernel void @sub_sube(i32 addrspace(1)* nocapture %arg, i32 %a) { bb: %x = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -102,6 +133,8 @@ bb: ; GCN-NOT: v_cndmask ; GCN-NOT: v_sub +; GFX9-LABEL: {{^}}sube_sub: +; GFX9: v_subb_co_u32_e{{32|64}} v{{[0-9]+}}, vcc define amdgpu_kernel void @sube_sub(i32 addrspace(1)* nocapture %arg, i32 %a) { bb: %x = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -121,6 +154,8 @@ bb: ; GCN: v_addc_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, 0, v{{[0-9]+}}, [[CC]] ; GCN-NOT: v_cndmask +; GFX9-LABEL: {{^}}zext_flclass: +; GFX9: v_addc_co_u32_e{{32|64}} v{{[0-9]+}}, vcc define amdgpu_kernel void @zext_flclass(i32 addrspace(1)* nocapture %arg, float %x) { bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -138,6 +173,8 @@ bb: ; GCN: v_subbrev_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc ; GCN-NOT: v_cndmask +; GFX9-LABEL: {{^}}sext_flclass: +; GFX9: v_subbrev_co_u32_e32 v{{[0-9]+}}, vcc define amdgpu_kernel void @sext_flclass(i32 addrspace(1)* nocapture %arg, float %x) { bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -155,6 +192,8 @@ bb: ; GCN: v_addc_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, 0, v{{[0-9]+}}, [[CC]] ; GCN-NOT: v_cndmask +; GFX9-LABEL: {{^}}add_and: +; GFX9: v_addc_co_u32_e{{32|64}} v{{[0-9]+}}, vcc define amdgpu_kernel void @add_and(i32 addrspace(1)* nocapture %arg) { bb: %x = tail call i32 @llvm.amdgcn.workitem.id.x()