AMDGPU/GlobalISel: Implement computeKnownBits for groupstaticsize

This commit is contained in:
Matt Arsenault 2020-08-27 17:21:41 -04:00
parent 9d3dc276a6
commit af1c1e20f4
3 changed files with 93 additions and 3 deletions

View File

@ -78,7 +78,7 @@ protected:
bool EnablePromoteAlloca; bool EnablePromoteAlloca;
bool HasTrigReducedRange; bool HasTrigReducedRange;
unsigned MaxWavesPerEU; unsigned MaxWavesPerEU;
int LocalMemorySize; unsigned LocalMemorySize;
char WavefrontSizeLog2; char WavefrontSizeLog2;
public: public:
@ -202,7 +202,7 @@ public:
return WavefrontSizeLog2; return WavefrontSizeLog2;
} }
int getLocalMemorySize() const { unsigned getLocalMemorySize() const {
return LocalMemorySize; return LocalMemorySize;
} }

View File

@ -11425,6 +11425,13 @@ void SITargetLowering::computeKnownBitsForTargetInstr(
Known.Zero.setHighBits(Size - getSubtarget()->getWavefrontSizeLog2()); Known.Zero.setHighBits(Size - getSubtarget()->getWavefrontSizeLog2());
break; break;
} }
case Intrinsic::amdgcn_groupstaticsize: {
// We can report everything over the maximum size as 0. We can't report
// based on the actual size because we don't know if it's accurate or not
// at any given point.
Known.Zero.setHighBits(countLeadingZeros(getSubtarget()->getLocalMemorySize()));
break;
}
default: default:
break; break;
} }

View File

@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
--- ---
name: remove_and_255_zextload name: remove_and_255_zextload
@ -182,3 +182,86 @@ body: |
$vgpr0 = COPY %and $vgpr0 = COPY %and
... ...
# Test known bits for groupstaticsize is the maximum LDS size.
---
name: remove_and_65535_groupstaticsize
legalized: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0_vgpr1
; CHECK-LABEL: name: remove_and_65535_groupstaticsize
; CHECK: liveins: $vgpr0_vgpr1
; CHECK: %lds_size:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize)
; CHECK: %mask:_(s32) = G_CONSTANT i32 65535
; CHECK: %and:_(s32) = G_AND %lds_size, %mask
; CHECK: $vgpr0 = COPY %and(s32)
%ptr:_(p1) = COPY $vgpr0_vgpr1
%lds_size:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize)
%mask:_(s32) = G_CONSTANT i32 65535
%and:_(s32) = G_AND %lds_size, %mask
$vgpr0 = COPY %and
...
---
name: remove_and_131071_groupstaticsize
legalized: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0_vgpr1
; CHECK-LABEL: name: remove_and_131071_groupstaticsize
; CHECK: liveins: $vgpr0_vgpr1
; CHECK: %lds_size:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize)
; CHECK: $vgpr0 = COPY %lds_size(s32)
%ptr:_(p1) = COPY $vgpr0_vgpr1
%lds_size:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize)
%mask:_(s32) = G_CONSTANT i32 131071
%and:_(s32) = G_AND %lds_size, %mask
$vgpr0 = COPY %and
...
---
name: no_remove_and_65536_groupstaticsize
legalized: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0_vgpr1
; CHECK-LABEL: name: no_remove_and_65536_groupstaticsize
; CHECK: liveins: $vgpr0_vgpr1
; CHECK: %lds_size:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize)
; CHECK: %mask:_(s32) = G_CONSTANT i32 65536
; CHECK: %and:_(s32) = G_AND %lds_size, %mask
; CHECK: $vgpr0 = COPY %and(s32)
%ptr:_(p1) = COPY $vgpr0_vgpr1
%lds_size:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize)
%mask:_(s32) = G_CONSTANT i32 65536
%and:_(s32) = G_AND %lds_size, %mask
$vgpr0 = COPY %and
...
---
name: no_remove_and_32767_groupstaticsize
legalized: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0_vgpr1
; CHECK-LABEL: name: no_remove_and_32767_groupstaticsize
; CHECK: liveins: $vgpr0_vgpr1
; CHECK: %lds_size:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize)
; CHECK: %mask:_(s32) = G_CONSTANT i32 32767
; CHECK: %and:_(s32) = G_AND %lds_size, %mask
; CHECK: $vgpr0 = COPY %and(s32)
%ptr:_(p1) = COPY $vgpr0_vgpr1
%lds_size:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize)
%mask:_(s32) = G_CONSTANT i32 32767
%and:_(s32) = G_AND %lds_size, %mask
$vgpr0 = COPY %and
...