forked from OSchip/llvm-project
R600/SI: Reimplement isLegalAddressingMode
Now that we sometimes know the address space, this can theoretically do a better job. This needs better test coverage, but this mostly depends on first updating the loop optimizatiosn to provide the address space. llvm-svn: 239053
This commit is contained in:
parent
f72b49bc17
commit
73e06fa262
|
@ -137,7 +137,10 @@ enum AddressSpaces {
|
||||||
CONSTANT_BUFFER_14 = 22,
|
CONSTANT_BUFFER_14 = 22,
|
||||||
CONSTANT_BUFFER_15 = 23,
|
CONSTANT_BUFFER_15 = 23,
|
||||||
ADDRESS_NONE = 24, ///< Address space for unknown memory.
|
ADDRESS_NONE = 24, ///< Address space for unknown memory.
|
||||||
LAST_ADDRESS = ADDRESS_NONE
|
LAST_ADDRESS = ADDRESS_NONE,
|
||||||
|
|
||||||
|
// Some places use this if the address space can't be determined.
|
||||||
|
UNKNOWN_ADDRESS_SPACE = ~0u
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace AMDGPUAS
|
} // namespace AMDGPUAS
|
||||||
|
|
|
@ -250,47 +250,83 @@ bool SITargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: This really needs an address space argument. The immediate offset
|
|
||||||
// size is different for different sets of memory instruction sets.
|
|
||||||
|
|
||||||
// The single offset DS instructions have a 16-bit unsigned byte offset.
|
|
||||||
//
|
|
||||||
// MUBUF / MTBUF have a 12-bit unsigned byte offset, and additionally can do r +
|
|
||||||
// r + i with addr64. 32-bit has more addressing mode options. Depending on the
|
|
||||||
// resource constant, it can also do (i64 r0) + (i32 r1) * (i14 i).
|
|
||||||
//
|
|
||||||
// SMRD instructions have an 8-bit, dword offset.
|
|
||||||
//
|
|
||||||
bool SITargetLowering::isLegalAddressingMode(const AddrMode &AM,
|
bool SITargetLowering::isLegalAddressingMode(const AddrMode &AM,
|
||||||
Type *Ty, unsigned AS) const {
|
Type *Ty, unsigned AS) const {
|
||||||
// No global is ever allowed as a base.
|
// No global is ever allowed as a base.
|
||||||
if (AM.BaseGV)
|
if (AM.BaseGV)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// Allow a 16-bit unsigned immediate field, since this is what DS instructions
|
switch (AS) {
|
||||||
// use.
|
case AMDGPUAS::GLOBAL_ADDRESS:
|
||||||
if (!isUInt<16>(AM.BaseOffs))
|
case AMDGPUAS::CONSTANT_ADDRESS: // XXX - Should we assume SMRD instructions?
|
||||||
return false;
|
case AMDGPUAS::PRIVATE_ADDRESS:
|
||||||
|
case AMDGPUAS::UNKNOWN_ADDRESS_SPACE: {
|
||||||
|
// MUBUF / MTBUF instructions have a 12-bit unsigned byte offset, and
|
||||||
|
// additionally can do r + r + i with addr64. 32-bit has more addressing
|
||||||
|
// mode options. Depending on the resource constant, it can also do
|
||||||
|
// (i64 r0) + (i32 r1) * (i14 i).
|
||||||
|
//
|
||||||
|
// SMRD instructions have an 8-bit, dword offset.
|
||||||
|
//
|
||||||
|
// Assume nonunifom access, since the address space isn't enough to know
|
||||||
|
// what instruction we will use, and since we don't know if this is a load
|
||||||
|
// or store and scalar stores are only available on VI.
|
||||||
|
//
|
||||||
|
// We also know if we are doing an extload, we can't do a scalar load.
|
||||||
|
//
|
||||||
|
// Private arrays end up using a scratch buffer most of the time, so also
|
||||||
|
// assume those use MUBUF instructions. Scratch loads / stores are currently
|
||||||
|
// implemented as mubuf instructions with offen bit set, so slightly
|
||||||
|
// different than the normal addr64.
|
||||||
|
if (!isUInt<12>(AM.BaseOffs))
|
||||||
|
return false;
|
||||||
|
|
||||||
// Only support r+r,
|
// FIXME: Since we can split immediate into soffset and immediate offset,
|
||||||
switch (AM.Scale) {
|
// would it make sense to allow any immediate?
|
||||||
case 0: // "r+i" or just "i", depending on HasBaseReg.
|
|
||||||
break;
|
switch (AM.Scale) {
|
||||||
case 1:
|
case 0: // r + i or just i, depending on HasBaseReg.
|
||||||
if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
|
return true;
|
||||||
|
case 1:
|
||||||
|
return true; // We have r + r or r + i.
|
||||||
|
case 2:
|
||||||
|
if (AM.HasBaseReg) {
|
||||||
|
// Reject 2 * r + r.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Allow 2 * r as r + r
|
||||||
|
// Or 2 * r + i is allowed as r + r + i.
|
||||||
|
return true;
|
||||||
|
default: // Don't allow n * r
|
||||||
return false;
|
return false;
|
||||||
// Otherwise we have r+r or r+i.
|
}
|
||||||
break;
|
}
|
||||||
case 2:
|
case AMDGPUAS::LOCAL_ADDRESS:
|
||||||
if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
|
case AMDGPUAS::REGION_ADDRESS: {
|
||||||
|
// Basic, single offset DS instructions allow a 16-bit unsigned immediate
|
||||||
|
// field.
|
||||||
|
// XXX - If doing a 4-byte aligned 8-byte type access, we effectively have
|
||||||
|
// an 8-bit dword offset but we don't know the alignment here.
|
||||||
|
if (!isUInt<16>(AM.BaseOffs))
|
||||||
return false;
|
return false;
|
||||||
// Allow 2*r as r+r.
|
|
||||||
break;
|
if (AM.Scale == 0) // r + i or just i, depending on HasBaseReg.
|
||||||
default: // Don't allow n * r
|
return true;
|
||||||
|
|
||||||
|
if (AM.Scale == 1 && AM.HasBaseReg)
|
||||||
|
return true;
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
case AMDGPUAS::FLAT_ADDRESS: {
|
||||||
return true;
|
// Flat instructions do not have offsets, and only have the register
|
||||||
|
// address.
|
||||||
|
return AM.BaseOffs == 0 && (AM.Scale == 0 || AM.Scale == 1);
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
llvm_unreachable("unhandled address space");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
|
bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
|
||||||
|
|
|
@ -0,0 +1,242 @@
|
||||||
|
; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown < %s | FileCheck -check-prefix=OPT %s
|
||||||
|
; RUN: llc -march=amdgcn -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN %s
|
||||||
|
|
||||||
|
declare i32 @llvm.r600.read.tidig.x() #0
|
||||||
|
|
||||||
|
; OPT-LABEL: @test_sink_global_small_offset_i32(
|
||||||
|
; OPT-NOT: getelementptr i32, i32 addrspace(1)* %in
|
||||||
|
; OPT: br i1
|
||||||
|
; OPT: ptrtoint
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}test_sink_global_small_offset_i32:
|
||||||
|
; GCN: {{^}}BB0_2:
|
||||||
|
define void @test_sink_global_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond) {
|
||||||
|
entry:
|
||||||
|
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
|
||||||
|
%in.gep = getelementptr i32, i32 addrspace(1)* %in, i64 7
|
||||||
|
%tmp0 = icmp eq i32 %cond, 0
|
||||||
|
br i1 %tmp0, label %endif, label %if
|
||||||
|
|
||||||
|
if:
|
||||||
|
%tmp1 = load i32, i32 addrspace(1)* %in.gep
|
||||||
|
br label %endif
|
||||||
|
|
||||||
|
endif:
|
||||||
|
%x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
|
||||||
|
store i32 %x, i32 addrspace(1)* %out.gep
|
||||||
|
br label %done
|
||||||
|
|
||||||
|
done:
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; OPT-LABEL: @test_sink_global_small_max_i32_ds_offset(
|
||||||
|
; OPT: %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 65535
|
||||||
|
; OPT: br i1
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}test_sink_global_small_max_i32_ds_offset:
|
||||||
|
; GCN: s_and_saveexec_b64
|
||||||
|
; GCN: buffer_load_sbyte {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
|
||||||
|
; GCN: {{^}}BB1_2:
|
||||||
|
; GCN: s_or_b64 exec
|
||||||
|
define void @test_sink_global_small_max_i32_ds_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in, i32 %cond) {
|
||||||
|
entry:
|
||||||
|
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999
|
||||||
|
%in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 65535
|
||||||
|
%tmp0 = icmp eq i32 %cond, 0
|
||||||
|
br i1 %tmp0, label %endif, label %if
|
||||||
|
|
||||||
|
if:
|
||||||
|
%tmp1 = load i8, i8 addrspace(1)* %in.gep
|
||||||
|
%tmp2 = sext i8 %tmp1 to i32
|
||||||
|
br label %endif
|
||||||
|
|
||||||
|
endif:
|
||||||
|
%x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
|
||||||
|
store i32 %x, i32 addrspace(1)* %out.gep
|
||||||
|
br label %done
|
||||||
|
|
||||||
|
done:
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}test_sink_global_small_max_mubuf_offset:
|
||||||
|
; GCN: s_and_saveexec_b64
|
||||||
|
; GCN: buffer_load_sbyte {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:4095{{$}}
|
||||||
|
; GCN: {{^}}BB2_2:
|
||||||
|
; GCN: s_or_b64 exec
|
||||||
|
define void @test_sink_global_small_max_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in, i32 %cond) {
|
||||||
|
entry:
|
||||||
|
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 1024
|
||||||
|
%in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 4095
|
||||||
|
%tmp0 = icmp eq i32 %cond, 0
|
||||||
|
br i1 %tmp0, label %endif, label %if
|
||||||
|
|
||||||
|
if:
|
||||||
|
%tmp1 = load i8, i8 addrspace(1)* %in.gep
|
||||||
|
%tmp2 = sext i8 %tmp1 to i32
|
||||||
|
br label %endif
|
||||||
|
|
||||||
|
endif:
|
||||||
|
%x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
|
||||||
|
store i32 %x, i32 addrspace(1)* %out.gep
|
||||||
|
br label %done
|
||||||
|
|
||||||
|
done:
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}test_sink_global_small_max_plus_1_mubuf_offset:
|
||||||
|
; GCN: s_and_saveexec_b64
|
||||||
|
; GCN: buffer_load_sbyte {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
|
||||||
|
; GCN: {{^}}BB3_2:
|
||||||
|
; GCN: s_or_b64 exec
|
||||||
|
define void @test_sink_global_small_max_plus_1_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in, i32 %cond) {
|
||||||
|
entry:
|
||||||
|
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999
|
||||||
|
%in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 4096
|
||||||
|
%tmp0 = icmp eq i32 %cond, 0
|
||||||
|
br i1 %tmp0, label %endif, label %if
|
||||||
|
|
||||||
|
if:
|
||||||
|
%tmp1 = load i8, i8 addrspace(1)* %in.gep
|
||||||
|
%tmp2 = sext i8 %tmp1 to i32
|
||||||
|
br label %endif
|
||||||
|
|
||||||
|
endif:
|
||||||
|
%x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
|
||||||
|
store i32 %x, i32 addrspace(1)* %out.gep
|
||||||
|
br label %done
|
||||||
|
|
||||||
|
done:
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; OPT-LABEL: @test_no_sink_flat_small_offset_i32(
|
||||||
|
; OPT: getelementptr i32, i32 addrspace(4)* %in
|
||||||
|
; OPT: br i1
|
||||||
|
; OPT-NOT: ptrtoint
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}test_no_sink_flat_small_offset_i32:
|
||||||
|
; GCN: flat_load_dword
|
||||||
|
; GCN: {{^}}BB4_2:
|
||||||
|
|
||||||
|
define void @test_no_sink_flat_small_offset_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %in, i32 %cond) {
|
||||||
|
entry:
|
||||||
|
%out.gep = getelementptr i32, i32 addrspace(4)* %out, i64 999999
|
||||||
|
%in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 7
|
||||||
|
%tmp0 = icmp eq i32 %cond, 0
|
||||||
|
br i1 %tmp0, label %endif, label %if
|
||||||
|
|
||||||
|
if:
|
||||||
|
%tmp1 = load i32, i32 addrspace(4)* %in.gep
|
||||||
|
br label %endif
|
||||||
|
|
||||||
|
endif:
|
||||||
|
%x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
|
||||||
|
store i32 %x, i32 addrspace(4)* %out.gep
|
||||||
|
br label %done
|
||||||
|
|
||||||
|
done:
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; OPT-LABEL: @test_sink_scratch_small_offset_i32(
|
||||||
|
; OPT-NOT: getelementptr [512 x i32]
|
||||||
|
; OPT: br i1
|
||||||
|
; OPT: ptrtoint
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}test_sink_scratch_small_offset_i32:
|
||||||
|
; GCN: s_and_saveexec_b64
|
||||||
|
; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}}
|
||||||
|
; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}}
|
||||||
|
; GCN: {{^}}BB5_2:
|
||||||
|
define void @test_sink_scratch_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond, i32 %arg) {
|
||||||
|
entry:
|
||||||
|
%alloca = alloca [512 x i32], align 4
|
||||||
|
%out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998
|
||||||
|
%out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999
|
||||||
|
%add.arg = add i32 %arg, 8
|
||||||
|
%alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1023
|
||||||
|
%tmp0 = icmp eq i32 %cond, 0
|
||||||
|
br i1 %tmp0, label %endif, label %if
|
||||||
|
|
||||||
|
if:
|
||||||
|
store volatile i32 123, i32* %alloca.gep
|
||||||
|
%tmp1 = load volatile i32, i32* %alloca.gep
|
||||||
|
br label %endif
|
||||||
|
|
||||||
|
endif:
|
||||||
|
%x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
|
||||||
|
store i32 %x, i32 addrspace(1)* %out.gep.0
|
||||||
|
%load = load volatile i32, i32* %alloca.gep
|
||||||
|
store i32 %load, i32 addrspace(1)* %out.gep.1
|
||||||
|
br label %done
|
||||||
|
|
||||||
|
done:
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; OPT-LABEL: @test_no_sink_scratch_large_offset_i32(
|
||||||
|
; OPT: %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1024
|
||||||
|
; OPT: br i1
|
||||||
|
; OPT-NOT: ptrtoint
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}test_no_sink_scratch_large_offset_i32:
|
||||||
|
; GCN: s_and_saveexec_b64
|
||||||
|
; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
|
||||||
|
; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
|
||||||
|
; GCN: {{^}}BB6_2:
|
||||||
|
define void @test_no_sink_scratch_large_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond, i32 %arg) {
|
||||||
|
entry:
|
||||||
|
%alloca = alloca [512 x i32], align 4
|
||||||
|
%out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998
|
||||||
|
%out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999
|
||||||
|
%add.arg = add i32 %arg, 8
|
||||||
|
%alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1024
|
||||||
|
%tmp0 = icmp eq i32 %cond, 0
|
||||||
|
br i1 %tmp0, label %endif, label %if
|
||||||
|
|
||||||
|
if:
|
||||||
|
store volatile i32 123, i32* %alloca.gep
|
||||||
|
%tmp1 = load volatile i32, i32* %alloca.gep
|
||||||
|
br label %endif
|
||||||
|
|
||||||
|
endif:
|
||||||
|
%x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
|
||||||
|
store i32 %x, i32 addrspace(1)* %out.gep.0
|
||||||
|
%load = load volatile i32, i32* %alloca.gep
|
||||||
|
store i32 %load, i32 addrspace(1)* %out.gep.1
|
||||||
|
br label %done
|
||||||
|
|
||||||
|
done:
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}test_sink_global_vreg_sreg_i32:
|
||||||
|
; GCN: s_and_saveexec_b64
|
||||||
|
; GCN: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||||
|
; GCN: {{^}}BB7_2:
|
||||||
|
define void @test_sink_global_vreg_sreg_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset, i32 %cond) {
|
||||||
|
entry:
|
||||||
|
%offset.ext = zext i32 %offset to i64
|
||||||
|
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
|
||||||
|
%in.gep = getelementptr i32, i32 addrspace(1)* %in, i64 %offset.ext
|
||||||
|
%tmp0 = icmp eq i32 %cond, 0
|
||||||
|
br i1 %tmp0, label %endif, label %if
|
||||||
|
|
||||||
|
if:
|
||||||
|
%tmp1 = load i32, i32 addrspace(1)* %in.gep
|
||||||
|
br label %endif
|
||||||
|
|
||||||
|
endif:
|
||||||
|
%x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
|
||||||
|
store i32 %x, i32 addrspace(1)* %out.gep
|
||||||
|
br label %done
|
||||||
|
|
||||||
|
done:
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
attributes #0 = { nounwind readnone }
|
||||||
|
attributes #1 = { nounwind }
|
Loading…
Reference in New Issue