forked from OSchip/llvm-project
AMDGPU: Fix selection error on constant loads with < 4 byte alignment
llvm-svn: 328818
This commit is contained in:
parent
5706161806
commit
6c041a3cab
|
@ -3464,10 +3464,6 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool isDwordAligned(unsigned Alignment) {
|
|
||||||
return Alignment % 4 == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Custom DAG Lowering Operations
|
// Custom DAG Lowering Operations
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
@ -5385,21 +5381,23 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
|
||||||
AMDGPUASI.PRIVATE_ADDRESS : AMDGPUASI.GLOBAL_ADDRESS;
|
AMDGPUASI.PRIVATE_ADDRESS : AMDGPUASI.GLOBAL_ADDRESS;
|
||||||
|
|
||||||
unsigned NumElements = MemVT.getVectorNumElements();
|
unsigned NumElements = MemVT.getVectorNumElements();
|
||||||
|
|
||||||
if (AS == AMDGPUASI.CONSTANT_ADDRESS ||
|
if (AS == AMDGPUASI.CONSTANT_ADDRESS ||
|
||||||
AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT) {
|
AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT) {
|
||||||
if (!Op->isDivergent())
|
if (!Op->isDivergent() && Alignment >= 4)
|
||||||
return SDValue();
|
return SDValue();
|
||||||
// Non-uniform loads will be selected to MUBUF instructions, so they
|
// Non-uniform loads will be selected to MUBUF instructions, so they
|
||||||
// have the same legalization requirements as global and private
|
// have the same legalization requirements as global and private
|
||||||
// loads.
|
// loads.
|
||||||
//
|
//
|
||||||
}
|
}
|
||||||
|
|
||||||
if (AS == AMDGPUASI.CONSTANT_ADDRESS ||
|
if (AS == AMDGPUASI.CONSTANT_ADDRESS ||
|
||||||
AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT ||
|
AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT ||
|
||||||
AS == AMDGPUASI.GLOBAL_ADDRESS) {
|
AS == AMDGPUASI.GLOBAL_ADDRESS) {
|
||||||
if (Subtarget->getScalarizeGlobalBehavior() && !Op->isDivergent() &&
|
if (Subtarget->getScalarizeGlobalBehavior() && !Op->isDivergent() &&
|
||||||
!Load->isVolatile() && isMemOpHasNoClobberedMemOperand(Load) &&
|
!Load->isVolatile() && isMemOpHasNoClobberedMemOperand(Load) &&
|
||||||
isDwordAligned(Alignment))
|
Alignment >= 4)
|
||||||
return SDValue();
|
return SDValue();
|
||||||
// Non-uniform loads will be selected to MUBUF instructions, so they
|
// Non-uniform loads will be selected to MUBUF instructions, so they
|
||||||
// have the same legalization requirements as global and private
|
// have the same legalization requirements as global and private
|
||||||
|
|
|
@ -72,6 +72,18 @@ entry:
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; FUNC-LABEL: {{^}}constant_load_v16i16_align2:
|
||||||
|
; GCN-HSA: flat_load_dwordx4
|
||||||
|
; GCN-HSA: flat_load_dwordx4
|
||||||
|
; GCN-HSA: flat_store_dwordx4
|
||||||
|
; GCN-HSA: flat_store_dwordx4
|
||||||
|
define amdgpu_kernel void @constant_load_v16i16_align2(<16 x i16> addrspace(4)* %ptr0) #0 {
|
||||||
|
entry:
|
||||||
|
%ld = load <16 x i16>, <16 x i16> addrspace(4)* %ptr0, align 2
|
||||||
|
store <16 x i16> %ld, <16 x i16> addrspace(1)* undef, align 32
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
; FUNC-LABEL: {{^}}constant_zextload_i16_to_i32:
|
; FUNC-LABEL: {{^}}constant_zextload_i16_to_i32:
|
||||||
; GCN-NOHSA: buffer_load_ushort
|
; GCN-NOHSA: buffer_load_ushort
|
||||||
; GCN-NOHSA: buffer_store_dword
|
; GCN-NOHSA: buffer_store_dword
|
||||||
|
|
|
@ -83,6 +83,18 @@ entry:
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}global_load_v16i16_align2:
|
||||||
|
; GCN-HSA: flat_load_dwordx4
|
||||||
|
; GCN-HSA: flat_load_dwordx4
|
||||||
|
; GCN-HSA: flat_store_dwordx4
|
||||||
|
; GCN-HSA: flat_store_dwordx4
|
||||||
|
define amdgpu_kernel void @global_load_v16i16_align2(<16 x i16> addrspace(1)* %in, <16 x i16> addrspace(1)* %out) #0 {
|
||||||
|
entry:
|
||||||
|
%ld = load <16 x i16>, <16 x i16> addrspace(1)* %in, align 2
|
||||||
|
store <16 x i16> %ld, <16 x i16> addrspace(1)* %out, align 32
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
; FUNC-LABEL: {{^}}global_zextload_i16_to_i32:
|
; FUNC-LABEL: {{^}}global_zextload_i16_to_i32:
|
||||||
; GCN-NOHSA: buffer_load_ushort
|
; GCN-NOHSA: buffer_load_ushort
|
||||||
; GCN-NOHSA: buffer_store_dword
|
; GCN-NOHSA: buffer_store_dword
|
||||||
|
|
Loading…
Reference in New Issue