diff --git a/llvm/include/llvm/Support/KnownBits.h b/llvm/include/llvm/Support/KnownBits.h index df0b02c1335d..ff25b6fc572c 100644 --- a/llvm/include/llvm/Support/KnownBits.h +++ b/llvm/include/llvm/Support/KnownBits.h @@ -97,6 +97,9 @@ public: /// Returns true if this value is known to be non-negative. bool isNonNegative() const { return Zero.isSignBitSet(); } + /// Returns true if this value is known to be positive. + bool isStrictlyPositive() const { return Zero.isSignBitSet() && !One.isNullValue(); } + /// Make this value negative. void makeNegative() { One.setSignBit(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index b2500875e239..8a7b4be809ba 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -4445,7 +4445,6 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode( LHSKnown = LHSKnown.trunc(24); RHSKnown = RHSKnown.trunc(24); - bool Negative = false; if (Opc == AMDGPUISD::MUL_I24) { unsigned LHSValBits = 24 - LHSKnown.countMinSignBits(); unsigned RHSValBits = 24 - RHSKnown.countMinSignBits(); @@ -4453,16 +4452,16 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode( if (MaxValBits >= 32) break; bool LHSNegative = LHSKnown.isNegative(); - bool LHSPositive = LHSKnown.isNonNegative(); + bool LHSNonNegative = LHSKnown.isNonNegative(); + bool LHSPositive = LHSKnown.isStrictlyPositive(); bool RHSNegative = RHSKnown.isNegative(); - bool RHSPositive = RHSKnown.isNonNegative(); - if ((!LHSNegative && !LHSPositive) || (!RHSNegative && !RHSPositive)) - break; - Negative = (LHSNegative && RHSPositive) || (LHSPositive && RHSNegative); - if (Negative) - Known.One.setHighBits(32 - MaxValBits); - else + bool RHSNonNegative = RHSKnown.isNonNegative(); + bool RHSPositive = RHSKnown.isStrictlyPositive(); + + if((LHSNonNegative && RHSNonNegative) || (LHSNegative && RHSNegative)) Known.Zero.setHighBits(32 - MaxValBits); + else if((LHSNegative && RHSPositive) || (LHSPositive && RHSNegative)) + Known.One.setHighBits(32 - MaxValBits); } else { unsigned LHSValBits = 24 - LHSKnown.countMinLeadingZeros(); unsigned RHSValBits = 24 - RHSKnown.countMinLeadingZeros(); diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-mul24-knownbits.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-mul24-knownbits.ll new file mode 100644 index 000000000000..1f660df598e4 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-mul24-knownbits.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py + +; RUN: llc -mtriple amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefix=GCN %s +define weak_odr amdgpu_kernel void @test_mul24_knownbits_kernel(float addrspace(1)* %p) #4 { +; GCN-LABEL: test_mul24_knownbits_kernel: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: v_and_b32_e32 v0, 3, v0 +; GCN-NEXT: v_mul_i32_i24_e32 v0, 0xfffffb, v0 +; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GCN-NEXT: v_and_b32_e32 v0, 0xffffffe0, v0 +; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GCN-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1] +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v2, s1 +; GCN-NEXT: v_add_co_u32_e32 v0, vcc, s0, v0 +; GCN-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc +; GCN-NEXT: v_mov_b32_e32 v2, 0 +; GCN-NEXT: global_store_dword v[0:1], v2, off +; GCN-NEXT: s_endpgm +entry: + %0 = tail call i32 @llvm.amdgcn.workitem.id.x() #28, !range !4 + %tid = and i32 %0, 3 + %1 = mul nsw i32 %tid, -5 + %v1 = and i32 %1, -32 + %v2 = sext i32 %v1 to i64 + %v3 = getelementptr inbounds float, float addrspace(1)* %p, i64 %v2 + store float 0.000, float addrspace(1)* %v3, align 4 + ret void +} + +; Function Attrs: nounwind readnone speculatable +declare i32 @llvm.amdgcn.workitem.id.x() #20 + +!4 = !{i32 0, i32 1024}