AMDGPU: Fix selection error on constant loads with < 4 byte alignment

llvm-svn: 328818
This commit is contained in:
Matt Arsenault 2018-03-29 19:59:28 +00:00
parent 5706161806
commit 6c041a3cab
3 changed files with 28 additions and 6 deletions

View File

@ -3464,10 +3464,6 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
return false; return false;
} }
static bool isDwordAligned(unsigned Alignment) {
return Alignment % 4 == 0;
}
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// Custom DAG Lowering Operations // Custom DAG Lowering Operations
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
@ -5385,21 +5381,23 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
AMDGPUASI.PRIVATE_ADDRESS : AMDGPUASI.GLOBAL_ADDRESS; AMDGPUASI.PRIVATE_ADDRESS : AMDGPUASI.GLOBAL_ADDRESS;
unsigned NumElements = MemVT.getVectorNumElements(); unsigned NumElements = MemVT.getVectorNumElements();
if (AS == AMDGPUASI.CONSTANT_ADDRESS || if (AS == AMDGPUASI.CONSTANT_ADDRESS ||
AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT) { AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT) {
if (!Op->isDivergent()) if (!Op->isDivergent() && Alignment >= 4)
return SDValue(); return SDValue();
// Non-uniform loads will be selected to MUBUF instructions, so they // Non-uniform loads will be selected to MUBUF instructions, so they
// have the same legalization requirements as global and private // have the same legalization requirements as global and private
// loads. // loads.
// //
} }
if (AS == AMDGPUASI.CONSTANT_ADDRESS || if (AS == AMDGPUASI.CONSTANT_ADDRESS ||
AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT || AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT ||
AS == AMDGPUASI.GLOBAL_ADDRESS) { AS == AMDGPUASI.GLOBAL_ADDRESS) {
if (Subtarget->getScalarizeGlobalBehavior() && !Op->isDivergent() && if (Subtarget->getScalarizeGlobalBehavior() && !Op->isDivergent() &&
!Load->isVolatile() && isMemOpHasNoClobberedMemOperand(Load) && !Load->isVolatile() && isMemOpHasNoClobberedMemOperand(Load) &&
isDwordAligned(Alignment)) Alignment >= 4)
return SDValue(); return SDValue();
// Non-uniform loads will be selected to MUBUF instructions, so they // Non-uniform loads will be selected to MUBUF instructions, so they
// have the same legalization requirements as global and private // have the same legalization requirements as global and private

View File

@ -72,6 +72,18 @@ entry:
ret void ret void
} }
; FUNC-LABEL: {{^}}constant_load_v16i16_align2:
; GCN-HSA: flat_load_dwordx4
; GCN-HSA: flat_load_dwordx4
; GCN-HSA: flat_store_dwordx4
; GCN-HSA: flat_store_dwordx4
define amdgpu_kernel void @constant_load_v16i16_align2(<16 x i16> addrspace(4)* %ptr0) #0 {
entry:
%ld = load <16 x i16>, <16 x i16> addrspace(4)* %ptr0, align 2
store <16 x i16> %ld, <16 x i16> addrspace(1)* undef, align 32
ret void
}
; FUNC-LABEL: {{^}}constant_zextload_i16_to_i32: ; FUNC-LABEL: {{^}}constant_zextload_i16_to_i32:
; GCN-NOHSA: buffer_load_ushort ; GCN-NOHSA: buffer_load_ushort
; GCN-NOHSA: buffer_store_dword ; GCN-NOHSA: buffer_store_dword

View File

@ -83,6 +83,18 @@ entry:
ret void ret void
} }
; GCN-LABEL: {{^}}global_load_v16i16_align2:
; GCN-HSA: flat_load_dwordx4
; GCN-HSA: flat_load_dwordx4
; GCN-HSA: flat_store_dwordx4
; GCN-HSA: flat_store_dwordx4
define amdgpu_kernel void @global_load_v16i16_align2(<16 x i16> addrspace(1)* %in, <16 x i16> addrspace(1)* %out) #0 {
entry:
%ld = load <16 x i16>, <16 x i16> addrspace(1)* %in, align 2
store <16 x i16> %ld, <16 x i16> addrspace(1)* %out, align 32
ret void
}
; FUNC-LABEL: {{^}}global_zextload_i16_to_i32: ; FUNC-LABEL: {{^}}global_zextload_i16_to_i32:
; GCN-NOHSA: buffer_load_ushort ; GCN-NOHSA: buffer_load_ushort
; GCN-NOHSA: buffer_store_dword ; GCN-NOHSA: buffer_store_dword