forked from OSchip/llvm-project
AMDGPU: Expand unaligned accesses early
Due to visit order problems, in the case of an unaligned copy the legalized DAG fails to eliminate extra instructions introduced by the expansion of both unaligned parts. llvm-svn: 274397
This commit is contained in:
parent
53547d95ca
commit
8af47a09e5
|
@ -2182,14 +2182,11 @@ static bool hasVolatileUser(SDNode *Val) {
|
|||
return false;
|
||||
}
|
||||
|
||||
bool AMDGPUTargetLowering::shouldCombineMemoryType(const MemSDNode *M) const {
|
||||
EVT VT = M->getMemoryVT();
|
||||
|
||||
bool AMDGPUTargetLowering::shouldCombineMemoryType(EVT VT) const {
|
||||
// i32 vectors are the canonical memory type.
|
||||
if (VT.getScalarType() == MVT::i32 || isTypeLegal(VT))
|
||||
return false;
|
||||
|
||||
|
||||
if (!VT.isByteSized())
|
||||
return false;
|
||||
|
||||
|
@ -2201,15 +2198,6 @@ bool AMDGPUTargetLowering::shouldCombineMemoryType(const MemSDNode *M) const {
|
|||
if (Size == 3 || (Size > 4 && (Size % 4 != 0)))
|
||||
return false;
|
||||
|
||||
unsigned Align = M->getAlignment();
|
||||
if (Align < Size) {
|
||||
bool IsFast;
|
||||
if (!allowsMisalignedMemoryAccesses(VT, M->getAddressSpace(), Align, &IsFast) ||
|
||||
!IsFast) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -2224,12 +2212,32 @@ SDValue AMDGPUTargetLowering::performLoadCombine(SDNode *N,
|
|||
if (LN->isVolatile() || !ISD::isNormalLoad(LN) || hasVolatileUser(LN))
|
||||
return SDValue();
|
||||
|
||||
if (!shouldCombineMemoryType(LN))
|
||||
return SDValue();
|
||||
|
||||
SDLoc SL(N);
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
EVT VT = LN->getMemoryVT();
|
||||
|
||||
unsigned Size = VT.getStoreSize();
|
||||
unsigned Align = LN->getAlignment();
|
||||
if (Align < Size && isTypeLegal(VT)) {
|
||||
bool IsFast;
|
||||
unsigned AS = LN->getAddressSpace();
|
||||
|
||||
// Expand unaligned loads earlier than legalization. Due to visitation order
|
||||
// problems during legalization, the emitted instructions to pack and unpack
|
||||
// the bytes again are not eliminated in the case of an unaligned copy.
|
||||
if (!allowsMisalignedMemoryAccesses(VT, AS, Align, &IsFast)) {
|
||||
SDValue Ops[2];
|
||||
std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(LN, DAG);
|
||||
return DAG.getMergeValues(Ops, SDLoc(N));
|
||||
}
|
||||
|
||||
if (!IsFast)
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
if (!shouldCombineMemoryType(VT))
|
||||
return SDValue();
|
||||
|
||||
EVT NewVT = getEquivalentMemType(*DAG.getContext(), VT);
|
||||
|
||||
SDValue NewLoad
|
||||
|
@ -2252,15 +2260,34 @@ SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N,
|
|||
if (SN->isVolatile() || !ISD::isNormalStore(SN))
|
||||
return SDValue();
|
||||
|
||||
if (!shouldCombineMemoryType(SN))
|
||||
return SDValue();
|
||||
|
||||
SDValue Val = SN->getValue();
|
||||
EVT VT = SN->getMemoryVT();
|
||||
unsigned Size = VT.getStoreSize();
|
||||
|
||||
SDLoc SL(N);
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
unsigned Align = SN->getAlignment();
|
||||
if (Align < Size && isTypeLegal(VT)) {
|
||||
bool IsFast;
|
||||
unsigned AS = SN->getAddressSpace();
|
||||
|
||||
// Expand unaligned stores earlier than legalization. Due to visitation
|
||||
// order problems during legalization, the emitted instructions to pack and
|
||||
// unpack the bytes again are not eliminated in the case of an unaligned
|
||||
// copy.
|
||||
if (!allowsMisalignedMemoryAccesses(VT, AS, Align, &IsFast))
|
||||
return expandUnalignedStore(SN, DAG);
|
||||
|
||||
if (!IsFast)
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
if (!shouldCombineMemoryType(VT))
|
||||
return SDValue();
|
||||
|
||||
EVT NewVT = getEquivalentMemType(*DAG.getContext(), VT);
|
||||
SDValue Val = SN->getValue();
|
||||
|
||||
//DCI.AddToWorklist(Val.getNode());
|
||||
|
||||
bool OtherUses = !Val.hasOneUse();
|
||||
SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, NewVT, Val);
|
||||
|
|
|
@ -66,7 +66,7 @@ protected:
|
|||
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
protected:
|
||||
bool shouldCombineMemoryType(const MemSDNode *M) const;
|
||||
bool shouldCombineMemoryType(EVT VT) const;
|
||||
SDValue performLoadCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
SDValue performAndCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
|
|
|
@ -59,20 +59,20 @@ define void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8>
|
|||
; This should not be adding instructions to shift into the correct
|
||||
; position in the word for the component.
|
||||
|
||||
; FIXME: Packing bytes
|
||||
; SI-LABEL: {{^}}load_v4i8_to_v4f32_unaligned:
|
||||
; SI: buffer_load_ubyte [[LOADREG3:v[0-9]+]]
|
||||
; SI: buffer_load_ubyte [[LOADREG2:v[0-9]+]]
|
||||
; SI: buffer_load_ubyte [[LOADREG1:v[0-9]+]]
|
||||
; SI: buffer_load_ubyte [[LOADREG0:v[0-9]+]]
|
||||
; SI-NOT: v_lshlrev_b32
|
||||
; SI-NOT: v_or_b32
|
||||
; SI-DAG: v_lshlrev_b32
|
||||
; SI-DAG: v_or_b32
|
||||
; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]],
|
||||
; SI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}},
|
||||
; SI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}},
|
||||
; SI-DAG: v_cvt_f32_ubyte0_e32 v[[HIRESULT:[0-9]+]]
|
||||
|
||||
; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG0]]
|
||||
; SI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, [[LOADREG1]]
|
||||
; SI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, [[LOADREG2]]
|
||||
; SI-DAG: v_cvt_f32_ubyte0_e32 v[[HIRESULT:[0-9]+]], [[LOADREG3]]
|
||||
|
||||
; SI: buffer_store_dwordx4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
|
||||
; SI: buffer_store_dwordx4
|
||||
define void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
|
||||
%load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 1
|
||||
%cvt = uitofp <4 x i8> %load to <4 x float>
|
||||
|
|
|
@ -0,0 +1,22 @@
|
|||
; XFAIL: *
|
||||
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s
|
||||
; XUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG %s
|
||||
;
|
||||
; EG-LABEL: {{^}}sext_in_reg_v2i1_in_v2i32_other_amount:
|
||||
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
|
||||
; EG-NOT: BFE
|
||||
; EG: ADD_INT
|
||||
; EG: LSHL
|
||||
; EG: ASHR [[RES]]
|
||||
; EG: LSHL
|
||||
; EG: ASHR [[RES]]
|
||||
; EG: LSHR {{\*?}} [[ADDR]]
|
||||
|
||||
; Works with the align 2 removed
|
||||
define void @sext_in_reg_v2i1_in_v2i32_other_amount(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
|
||||
%c = add <2 x i32> %a, %b
|
||||
%x = shl <2 x i32> %c, <i32 6, i32 6>
|
||||
%y = ashr <2 x i32> %x, <i32 7, i32 7>
|
||||
store <2 x i32> %y, <2 x i32> addrspace(1)* %out, align 2
|
||||
ret void
|
||||
}
|
|
@ -268,7 +268,7 @@ define void @sext_in_reg_v2i1_in_v2i32_other_amount(<2 x i32> addrspace(1)* %out
|
|||
%c = add <2 x i32> %a, %b
|
||||
%x = shl <2 x i32> %c, <i32 6, i32 6>
|
||||
%y = ashr <2 x i32> %x, <i32 7, i32 7>
|
||||
store <2 x i32> %y, <2 x i32> addrspace(1)* %out, align 2
|
||||
store <2 x i32> %y, <2 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@ define void @local_unaligned_load_store_i16(i16 addrspace(3)* %p, i16 addrspace(
|
|||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}unaligned_load_store_i16_global:
|
||||
; FUNC-LABEL: {{^}}global_unaligned_load_store_i16:
|
||||
; GCN-NOHSA: buffer_load_ubyte
|
||||
; GCN-NOHSA: buffer_load_ubyte
|
||||
; GCN-NOHSA: buffer_store_byte
|
||||
|
@ -25,22 +25,25 @@ define void @local_unaligned_load_store_i16(i16 addrspace(3)* %p, i16 addrspace(
|
|||
; GCN-HSA: flat_load_ubyte
|
||||
; GCN-HSA: flat_store_byte
|
||||
; GCN-HSA: flat_store_byte
|
||||
define void @unaligned_load_store_i16_global(i16 addrspace(1)* %p, i16 addrspace(1)* %r) #0 {
|
||||
define void @global_unaligned_load_store_i16(i16 addrspace(1)* %p, i16 addrspace(1)* %r) #0 {
|
||||
%v = load i16, i16 addrspace(1)* %p, align 1
|
||||
store i16 %v, i16 addrspace(1)* %r, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}local_unaligned_load_store_i32:
|
||||
; GCN: ds_read_u8
|
||||
; GCN: ds_read_u8
|
||||
; GCN: ds_read_u8
|
||||
; GCN: ds_read_u8
|
||||
; GCN: ds_write_b8
|
||||
; GCN: ds_write_b8
|
||||
; GCN: ds_write_b8
|
||||
; GCN: ds_write_b8
|
||||
; GCN: s_endpgm
|
||||
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI-NOT: v_or
|
||||
; SI-NOT: v_lshl
|
||||
; SI: ds_write_b8
|
||||
; SI: ds_write_b8
|
||||
; SI: ds_write_b8
|
||||
; SI: ds_write_b8
|
||||
; SI: s_endpgm
|
||||
define void @local_unaligned_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r) #0 {
|
||||
%v = load i32, i32 addrspace(3)* %p, align 1
|
||||
store i32 %v, i32 addrspace(3)* %r, align 1
|
||||
|
@ -98,141 +101,149 @@ define void @local_align2_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)*
|
|||
ret void
|
||||
}
|
||||
|
||||
; FIXME: Unnecessary packing and unpacking of bytes.
|
||||
; FUNC-LABEL: {{^}}local_unaligned_load_store_i64:
|
||||
; GCN: ds_read_u8
|
||||
; GCN: ds_read_u8
|
||||
; GCN: ds_read_u8
|
||||
; GCN: ds_read_u8
|
||||
; GCN: ds_read_u8
|
||||
; GCN: ds_read_u8
|
||||
; GCN: ds_read_u8
|
||||
; GCN: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
|
||||
; XGCN-NOT: v_or_b32
|
||||
; XGCN-NOT: v_lshl
|
||||
; GCN: ds_write_b8
|
||||
; XGCN-NOT: v_or_b32
|
||||
; XGCN-NOT: v_lshl
|
||||
; SI-NOT: v_or_b32
|
||||
; SI-NOT: v_lshl
|
||||
; SI: ds_write_b8
|
||||
; SI-NOT: v_or_b32
|
||||
; SI-NOT: v_lshl
|
||||
|
||||
; GCN: ds_write_b8
|
||||
; XGCN-NOT: v_or_b32
|
||||
; XGCN-NOT: v_lshl
|
||||
; SI: ds_write_b8
|
||||
; SI-NOT: v_or_b32
|
||||
; SI-NOT: v_lshl
|
||||
|
||||
; GCN: ds_write_b8
|
||||
; XGCN-NOT: v_or_b32
|
||||
; XGCN-NOT: v_lshl
|
||||
; SI: ds_write_b8
|
||||
; SI-NOT: v_or_b32
|
||||
; SI-NOT: v_lshl
|
||||
|
||||
; GCN: ds_write_b8
|
||||
; XGCN-NOT: v_or_b32
|
||||
; XGCN-NOT: v_lshl
|
||||
; SI: ds_write_b8
|
||||
; SI-NOT: v_or_b32
|
||||
; SI-NOT: v_lshl
|
||||
|
||||
; GCN: ds_write_b8
|
||||
; XGCN-NOT: v_or_b32
|
||||
; XGCN-NOT: v_lshl
|
||||
; SI: ds_write_b8
|
||||
; SI-NOT: v_or_b32
|
||||
; SI-NOT: v_lshl
|
||||
|
||||
; GCN: ds_write_b8
|
||||
; XGCN-NOT: v_or_b32
|
||||
; XGCN-NOT: v_lshl
|
||||
; SI: ds_write_b8
|
||||
; SI-NOT: v_or_b32
|
||||
; SI-NOT: v_lshl
|
||||
|
||||
; GCN: ds_write_b8
|
||||
; XGCN-NOT: v_or_b32
|
||||
; XGCN-NOT: v_lshl
|
||||
; GCN: ds_write_b8
|
||||
; GCN: s_endpgm
|
||||
; SI: ds_write_b8
|
||||
; SI-NOT: v_or_b32
|
||||
; SI-NOT: v_lshl
|
||||
; SI: ds_write_b8
|
||||
; SI: s_endpgm
|
||||
define void @local_unaligned_load_store_i64(i64 addrspace(3)* %p, i64 addrspace(3)* %r) {
|
||||
%v = load i64, i64 addrspace(3)* %p, align 1
|
||||
store i64 %v, i64 addrspace(3)* %r, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}local_unaligned_load_store_v2i32:
|
||||
; GCN: ds_read_u8
|
||||
; GCN: ds_read_u8
|
||||
; GCN: ds_read_u8
|
||||
; GCN: ds_read_u8
|
||||
; GCN: ds_read_u8
|
||||
; GCN: ds_read_u8
|
||||
; GCN: ds_read_u8
|
||||
; GCN: ds_read_u8
|
||||
; SI-LABEL: {{^}}local_unaligned_load_store_v2i32:
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
|
||||
; XGCN-NOT: v_or_b32
|
||||
; XGCN-NOT: v_lshl
|
||||
; GCN: ds_write_b8
|
||||
; XGCN-NOT: v_or_b32
|
||||
; XGCN-NOT: v_lshl
|
||||
; SI-NOT: v_or_b32
|
||||
; SI-NOT: v_lshl
|
||||
; SI: ds_write_b8
|
||||
; SI-NOT: v_or_b32
|
||||
; SI-NOT: v_lshl
|
||||
|
||||
; GCN: ds_write_b8
|
||||
; XGCN-NOT: v_or_b32
|
||||
; XGCN-NOT: v_lshl
|
||||
; SI: ds_write_b8
|
||||
; SI-NOT: v_or_b32
|
||||
; SI-NOT: v_lshl
|
||||
|
||||
; GCN: ds_write_b8
|
||||
; XGCN-NOT: v_or_b32
|
||||
; XGCN-NOT: v_lshl
|
||||
; SI: ds_write_b8
|
||||
; SI-NOT: v_or_b32
|
||||
; SI-NOT: v_lshl
|
||||
|
||||
; GCN: ds_write_b8
|
||||
; XGCN-NOT: v_or_b32
|
||||
; XGCN-NOT: v_lshl
|
||||
; SI: ds_write_b8
|
||||
; SI-NOT: v_or_b32
|
||||
; SI-NOT: v_lshl
|
||||
|
||||
; GCN: ds_write_b8
|
||||
; XGCN-NOT: v_or_b32
|
||||
; XGCN-NOT: v_lshl
|
||||
; SI: ds_write_b8
|
||||
; SI-NOT: v_or_b32
|
||||
; SI-NOT: v_lshl
|
||||
|
||||
; GCN: ds_write_b8
|
||||
; XGCN-NOT: v_or_b32
|
||||
; XGCN-NOT: v_lshl
|
||||
; SI: ds_write_b8
|
||||
; SI-NOT: v_or_b32
|
||||
; SI-NOT: v_lshl
|
||||
|
||||
; GCN: ds_write_b8
|
||||
; XGCN-NOT: v_or_b32
|
||||
; XGCN-NOT: v_lshl
|
||||
; GCN: ds_write_b8
|
||||
; GCN: s_endpgm
|
||||
; SI: ds_write_b8
|
||||
; SI-NOT: v_or_b32
|
||||
; SI-NOT: v_lshl
|
||||
; SI: ds_write_b8
|
||||
; SI: s_endpgm
|
||||
define void @local_unaligned_load_store_v2i32(<2 x i32> addrspace(3)* %p, <2 x i32> addrspace(3)* %r) {
|
||||
%v = load <2 x i32>, <2 x i32> addrspace(3)* %p, align 1
|
||||
store <2 x i32> %v, <2 x i32> addrspace(3)* %r, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}unaligned_load_store_i64_global:
|
||||
; GCN-NOHSA: buffer_load_ubyte
|
||||
; GCN-NOHSA: buffer_load_ubyte
|
||||
; GCN-NOHSA: buffer_load_ubyte
|
||||
; GCN-NOHSA: buffer_load_ubyte
|
||||
; GCN-NOHSA: buffer_load_ubyte
|
||||
; GCN-NOHSA: buffer_load_ubyte
|
||||
; GCN-NOHSA: buffer_load_ubyte
|
||||
; GCN-NOHSA: buffer_load_ubyte
|
||||
; SI-LABEL: {{^}}global_align2_load_store_i64:
|
||||
; SI: buffer_load_ushort
|
||||
; SI: buffer_load_ushort
|
||||
|
||||
; GCN-HSA: flat_load_ubyte
|
||||
; GCN-HSA: flat_load_ubyte
|
||||
; GCN-HSA: flat_load_ubyte
|
||||
; GCN-HSA: flat_load_ubyte
|
||||
; GCN-HSA: flat_load_ubyte
|
||||
; GCN-HSA: flat_load_ubyte
|
||||
; GCN-HSA: flat_load_ubyte
|
||||
; GCN-HSA: flat_load_ubyte
|
||||
; SI-NOT: v_or_
|
||||
; SI-NOT: v_lshl
|
||||
|
||||
; XGCN-NOT: v_or_
|
||||
; XGCN-NOT: v_lshl
|
||||
; SI: buffer_load_ushort
|
||||
|
||||
; GCN-NOHSA: buffer_store_byte
|
||||
; GCN-NOHSA: buffer_store_byte
|
||||
; GCN-NOHSA: buffer_store_byte
|
||||
; GCN-NOHSA: buffer_store_byte
|
||||
; GCN-NOHSA: buffer_store_byte
|
||||
; GCN-NOHSA: buffer_store_byte
|
||||
; GCN-NOHSA: buffer_store_byte
|
||||
; GCN-NOHSA: buffer_store_byte
|
||||
; SI-NOT: v_or_
|
||||
; SI-NOT: v_lshl
|
||||
|
||||
; GCN-HSA: flat_store_byte
|
||||
; GCN-HSA: flat_store_byte
|
||||
; GCN-HSA: flat_store_byte
|
||||
; GCN-HSA: flat_store_byte
|
||||
; GCN-HSA: flat_store_byte
|
||||
; GCN-HSA: flat_store_byte
|
||||
; GCN-HSA: flat_store_byte
|
||||
; GCN-HSA: flat_store_byte
|
||||
define void @unaligned_load_store_i64_global(i64 addrspace(1)* %p, i64 addrspace(1)* %r) #0 {
|
||||
; SI: buffer_load_ushort
|
||||
|
||||
; SI-NOT: v_or_
|
||||
; SI-NOT: v_lshl
|
||||
|
||||
; SI: buffer_store_short
|
||||
; SI: buffer_store_short
|
||||
; SI: buffer_store_short
|
||||
; SI: buffer_store_short
|
||||
define void @global_align2_load_store_i64(i64 addrspace(1)* %p, i64 addrspace(1)* %r) {
|
||||
%v = load i64, i64 addrspace(1)* %p, align 2
|
||||
store i64 %v, i64 addrspace(1)* %r, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}unaligned_load_store_i64_global:
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
|
||||
; SI-NOT: v_or_
|
||||
; SI-NOT: v_lshl
|
||||
|
||||
; SI: buffer_store_byte
|
||||
; SI: buffer_store_byte
|
||||
; SI: buffer_store_byte
|
||||
; SI: buffer_store_byte
|
||||
; SI: buffer_store_byte
|
||||
; SI: buffer_store_byte
|
||||
; SI: buffer_store_byte
|
||||
; SI: buffer_store_byte
|
||||
define void @unaligned_load_store_i64_global(i64 addrspace(1)* %p, i64 addrspace(1)* %r) {
|
||||
%v = load i64, i64 addrspace(1)* %p, align 1
|
||||
store i64 %v, i64 addrspace(1)* %r, align 1
|
||||
ret void
|
||||
|
@ -285,76 +296,41 @@ define void @local_unaligned_load_store_v4i32(<4 x i32> addrspace(3)* %p, <4 x i
|
|||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}global_unaligned_load_store_v4i32:
|
||||
; GCN-NOHSA: buffer_load_ubyte
|
||||
; GCN-NOHSA: buffer_load_ubyte
|
||||
; GCN-NOHSA: buffer_load_ubyte
|
||||
; GCN-NOHSA: buffer_load_ubyte
|
||||
; GCN-NOHSA: buffer_load_ubyte
|
||||
; GCN-NOHSA: buffer_load_ubyte
|
||||
; GCN-NOHSA: buffer_load_ubyte
|
||||
; GCN-NOHSA: buffer_load_ubyte
|
||||
; GCN-NOHSA: buffer_load_ubyte
|
||||
; GCN-NOHSA: buffer_load_ubyte
|
||||
; GCN-NOHSA: buffer_load_ubyte
|
||||
; GCN-NOHSA: buffer_load_ubyte
|
||||
; GCN-NOHSA: buffer_load_ubyte
|
||||
; GCN-NOHSA: buffer_load_ubyte
|
||||
; GCN-NOHSA: buffer_load_ubyte
|
||||
; GCN-NOHSA: buffer_load_ubyte
|
||||
; SI-LABEL: {{^}}global_unaligned_load_store_v4i32
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
|
||||
; GCN-NOHSA: buffer_store_byte
|
||||
; GCN-NOHSA: buffer_store_byte
|
||||
; GCN-NOHSA: buffer_store_byte
|
||||
; GCN-NOHSA: buffer_store_byte
|
||||
; GCN-NOHSA: buffer_store_byte
|
||||
; GCN-NOHSA: buffer_store_byte
|
||||
; GCN-NOHSA: buffer_store_byte
|
||||
; GCN-NOHSA: buffer_store_byte
|
||||
; GCN-NOHSA: buffer_store_byte
|
||||
; GCN-NOHSA: buffer_store_byte
|
||||
; GCN-NOHSA: buffer_store_byte
|
||||
; GCN-NOHSA: buffer_store_byte
|
||||
; GCN-NOHSA: buffer_store_byte
|
||||
; GCN-NOHSA: buffer_store_byte
|
||||
; GCN-NOHSA: buffer_store_byte
|
||||
; GCN-NOHSA: buffer_store_byte
|
||||
|
||||
|
||||
; GCN-HSA: flat_load_ubyte
|
||||
; GCN-HSA: flat_load_ubyte
|
||||
; GCN-HSA: flat_load_ubyte
|
||||
; GCN-HSA: flat_load_ubyte
|
||||
; GCN-HSA: flat_load_ubyte
|
||||
; GCN-HSA: flat_load_ubyte
|
||||
; GCN-HSA: flat_load_ubyte
|
||||
; GCN-HSA: flat_load_ubyte
|
||||
; GCN-HSA: flat_load_ubyte
|
||||
; GCN-HSA: flat_load_ubyte
|
||||
; GCN-HSA: flat_load_ubyte
|
||||
; GCN-HSA: flat_load_ubyte
|
||||
; GCN-HSA: flat_load_ubyte
|
||||
; GCN-HSA: flat_load_ubyte
|
||||
; GCN-HSA: flat_load_ubyte
|
||||
; GCN-HSA: flat_load_ubyte
|
||||
|
||||
; GCN-HSA: flat_store_byte
|
||||
; GCN-HSA: flat_store_byte
|
||||
; GCN-HSA: flat_store_byte
|
||||
; GCN-HSA: flat_store_byte
|
||||
; GCN-HSA: flat_store_byte
|
||||
; GCN-HSA: flat_store_byte
|
||||
; GCN-HSA: flat_store_byte
|
||||
; GCN-HSA: flat_store_byte
|
||||
; GCN-HSA: flat_store_byte
|
||||
; GCN-HSA: flat_store_byte
|
||||
; GCN-HSA: flat_store_byte
|
||||
; GCN-HSA: flat_store_byte
|
||||
; GCN-HSA: flat_store_byte
|
||||
; GCN-HSA: flat_store_byte
|
||||
; GCN-HSA: flat_store_byte
|
||||
; GCN-HSA: flat_store_byte
|
||||
define void @global_unaligned_load_store_v4i32(<4 x i32> addrspace(1)* %p, <4 x i32> addrspace(1)* %r) #0 {
|
||||
; SI: buffer_store_byte
|
||||
; SI: buffer_store_byte
|
||||
; SI: buffer_store_byte
|
||||
; SI: buffer_store_byte
|
||||
; SI: buffer_store_byte
|
||||
; SI: buffer_store_byte
|
||||
; SI: buffer_store_byte
|
||||
; SI: buffer_store_byte
|
||||
; SI: buffer_store_byte
|
||||
; SI: buffer_store_byte
|
||||
; SI: buffer_store_byte
|
||||
; SI: buffer_store_byte
|
||||
; SI: buffer_store_byte
|
||||
; SI: buffer_store_byte
|
||||
; SI: buffer_store_byte
|
||||
; SI: buffer_store_byte
|
||||
define void @global_unaligned_load_store_v4i32(<4 x i32> addrspace(1)* %p, <4 x i32> addrspace(1)* %r) nounwind {
|
||||
%v = load <4 x i32>, <4 x i32> addrspace(1)* %p, align 1
|
||||
store <4 x i32> %v, <4 x i32> addrspace(1)* %r, align 1
|
||||
ret void
|
||||
|
|
Loading…
Reference in New Issue