forked from OSchip/llvm-project
[AMDGPU] Fix for negative offsets in buffer/tbuffer intrinsics
Summary: The new buffer/tbuffer intrinsics handle an out-of-range immediate offset by moving/adding offset&-4096 to a vgpr, leaving an in-range immediate offset, with a chance of the move/add being CSEd for similar loads/stores. However it turns out that a negative offset in a vgpr is illegal, even if adding the immediate offset makes it legal again. Therefore, this commit disables the offset&-4096 thing if the offset is negative. Differential Revision: https://reviews.llvm.org/D52683 Change-Id: Ie02f0a74f240a138dc2a29d17cfbd9e350e4ed13 llvm-svn: 343672
This commit is contained in:
parent
c68cc4efbe
commit
a37679d67b
|
@ -5983,11 +5983,18 @@ std::pair<SDValue, SDValue> SITargetLowering::splitBufferOffsets(
|
|||
if (C1) {
|
||||
unsigned ImmOffset = C1->getZExtValue();
|
||||
// If the immediate value is too big for the immoffset field, put the value
|
||||
// mod 4096 into the immoffset field so that the value that is copied/added
|
||||
// and -4096 into the immoffset field so that the value that is copied/added
|
||||
// for the voffset field is a multiple of 4096, and it stands more chance
|
||||
// of being CSEd with the copy/add for another similar load/store.
|
||||
// However, do not do that rounding down to a multiple of 4096 if that is a
|
||||
// negative number, as it appears to be illegal to have a negative offset
|
||||
// in the vgpr, even if adding the immediate offset makes it positive.
|
||||
unsigned Overflow = ImmOffset & ~MaxImm;
|
||||
ImmOffset -= Overflow;
|
||||
if ((int32_t)Overflow < 0) {
|
||||
Overflow += ImmOffset;
|
||||
ImmOffset = 0;
|
||||
}
|
||||
C1 = cast<ConstantSDNode>(DAG.getConstant(ImmOffset, DL, MVT::i32));
|
||||
if (Overflow) {
|
||||
auto OverflowVal = DAG.getConstant(Overflow, DL, MVT::i32);
|
||||
|
|
|
@ -74,8 +74,8 @@ main_body:
|
|||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}buffer_load_negative_offset:
|
||||
;CHECK: v_add_{{[iu]}}32_e32 [[VOFS:v[0-9]+]], vcc, 0xfffff000, v0
|
||||
;CHECK: buffer_load_dwordx4 v[0:3], [[VOFS]], s[0:3], 0 offen offset:4080
|
||||
;CHECK: v_add_{{[iu]}}32_e32 [[VOFS:v[0-9]+]], vcc, -16, v0
|
||||
;CHECK: buffer_load_dwordx4 v[0:3], [[VOFS]], s[0:3], 0 offen
|
||||
define amdgpu_ps <4 x float> @buffer_load_negative_offset(<4 x i32> inreg, i32 %ofs) {
|
||||
main_body:
|
||||
%ofs.1 = add i32 %ofs, -16
|
||||
|
|
|
@ -102,8 +102,8 @@ main_body:
|
|||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}buffer_load_negative_offset:
|
||||
;CHECK: v_add_{{[iu]}}32_e32 {{v[0-9]+}}, vcc, 0xfffff000, v0
|
||||
;CHECK: buffer_load_dwordx4 v[0:3], {{v\[[0-9]+:[0-9]+\]}}, s[0:3], 0 idxen offen offset:4080
|
||||
;CHECK: v_add_{{[iu]}}32_e32 {{v[0-9]+}}, vcc, -16, v0
|
||||
;CHECK: buffer_load_dwordx4 v[0:3], {{v\[[0-9]+:[0-9]+\]}}, s[0:3], 0 idxen offen
|
||||
define amdgpu_ps <4 x float> @buffer_load_negative_offset(<4 x i32> inreg, i32 %ofs) {
|
||||
main_body:
|
||||
%ofs.1 = add i32 %ofs, -16
|
||||
|
|
Loading…
Reference in New Issue