forked from OSchip/llvm-project
Add alignment value to allowsUnalignedMemoryAccess
Rename to allowsMisalignedMemoryAccess. On R600, 8 and 16 byte accesses are mostly OK with 4-byte alignment, and don't need to be split into multiple accesses. Vector loads with an alignment of the element type are not uncommon in OpenCL code. llvm-svn: 214055
This commit is contained in:
parent
b3cd5a1037
commit
6f2a526101
|
@ -782,9 +782,10 @@ public:
|
|||
/// copy/move/set is converted to a sequence of store operations. Its use
|
||||
/// helps to ensure that such replacements don't generate code that causes an
|
||||
/// alignment error (trap) on the target machine.
|
||||
virtual bool allowsUnalignedMemoryAccesses(EVT,
|
||||
unsigned AddrSpace = 0,
|
||||
bool * /*Fast*/ = nullptr) const {
|
||||
virtual bool allowsMisalignedMemoryAccesses(EVT,
|
||||
unsigned AddrSpace = 0,
|
||||
unsigned Align = 1,
|
||||
bool * /*Fast*/ = nullptr) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -724,10 +724,11 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
|
|||
// If this is an unaligned store and the target doesn't support it,
|
||||
// expand it.
|
||||
unsigned AS = ST->getAddressSpace();
|
||||
if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT(), AS)) {
|
||||
unsigned Align = ST->getAlignment();
|
||||
if (!TLI.allowsMisalignedMemoryAccesses(ST->getMemoryVT(), AS, Align)) {
|
||||
Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
|
||||
unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty);
|
||||
if (ST->getAlignment() < ABIAlignment)
|
||||
if (Align < ABIAlignment)
|
||||
ExpandUnalignedStore(cast<StoreSDNode>(Node),
|
||||
DAG, TLI, this);
|
||||
}
|
||||
|
@ -835,12 +836,13 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
|
|||
default: llvm_unreachable("This action is not supported yet!");
|
||||
case TargetLowering::Legal: {
|
||||
unsigned AS = ST->getAddressSpace();
|
||||
unsigned Align = ST->getAlignment();
|
||||
// If this is an unaligned store and the target doesn't support it,
|
||||
// expand it.
|
||||
if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT(), AS)) {
|
||||
if (!TLI.allowsMisalignedMemoryAccesses(ST->getMemoryVT(), AS, Align)) {
|
||||
Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
|
||||
unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty);
|
||||
if (ST->getAlignment() < ABIAlignment)
|
||||
if (Align < ABIAlignment)
|
||||
ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this);
|
||||
}
|
||||
break;
|
||||
|
@ -886,13 +888,14 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
|
|||
default: llvm_unreachable("This action is not supported yet!");
|
||||
case TargetLowering::Legal: {
|
||||
unsigned AS = LD->getAddressSpace();
|
||||
unsigned Align = LD->getAlignment();
|
||||
// If this is an unaligned load and the target doesn't support it,
|
||||
// expand it.
|
||||
if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT(), AS)) {
|
||||
if (!TLI.allowsMisalignedMemoryAccesses(LD->getMemoryVT(), AS, Align)) {
|
||||
Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
|
||||
unsigned ABIAlignment =
|
||||
TLI.getDataLayout()->getABITypeAlignment(Ty);
|
||||
if (LD->getAlignment() < ABIAlignment){
|
||||
if (Align < ABIAlignment){
|
||||
ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, RVal, RChain);
|
||||
}
|
||||
}
|
||||
|
@ -1077,12 +1080,13 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
|
|||
// it, expand it.
|
||||
EVT MemVT = LD->getMemoryVT();
|
||||
unsigned AS = LD->getAddressSpace();
|
||||
if (!TLI.allowsUnalignedMemoryAccesses(MemVT, AS)) {
|
||||
unsigned Align = LD->getAlignment();
|
||||
if (!TLI.allowsMisalignedMemoryAccesses(MemVT, AS, Align)) {
|
||||
Type *Ty =
|
||||
LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
|
||||
unsigned ABIAlignment =
|
||||
TLI.getDataLayout()->getABITypeAlignment(Ty);
|
||||
if (LD->getAlignment() < ABIAlignment){
|
||||
if (Align < ABIAlignment){
|
||||
ExpandUnalignedLoad(cast<LoadSDNode>(Node),
|
||||
DAG, TLI, Value, Chain);
|
||||
}
|
||||
|
|
|
@ -3810,7 +3810,7 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
|
|||
if (VT == MVT::Other) {
|
||||
unsigned AS = 0;
|
||||
if (DstAlign >= TLI.getDataLayout()->getPointerPrefAlignment(AS) ||
|
||||
TLI.allowsUnalignedMemoryAccesses(VT, AS)) {
|
||||
TLI.allowsMisalignedMemoryAccesses(VT, AS, DstAlign)) {
|
||||
VT = TLI.getPointerTy();
|
||||
} else {
|
||||
switch (DstAlign & 7) {
|
||||
|
@ -3870,7 +3870,7 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
|
|||
unsigned AS = 0;
|
||||
if (NumMemOps && AllowOverlap &&
|
||||
VTSize >= 8 && NewVTSize < Size &&
|
||||
TLI.allowsUnalignedMemoryAccesses(VT, AS, &Fast) && Fast)
|
||||
TLI.allowsMisalignedMemoryAccesses(VT, AS, DstAlign, &Fast) && Fast)
|
||||
VTSize = Size;
|
||||
else {
|
||||
VT = NewVT;
|
||||
|
|
|
@ -5725,9 +5725,10 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
|
|||
unsigned SrcAS = RHS->getType()->getPointerAddressSpace();
|
||||
// TODO: Handle 5 byte compare as 4-byte + 1 byte.
|
||||
// TODO: Handle 8 byte compare on x86-32 as two 32-bit loads.
|
||||
// TODO: Check alignment of src and dest ptrs.
|
||||
if (!TLI->isTypeLegal(LoadVT) ||
|
||||
!TLI->allowsUnalignedMemoryAccesses(LoadVT, SrcAS) ||
|
||||
!TLI->allowsUnalignedMemoryAccesses(LoadVT, DstAS))
|
||||
!TLI->allowsMisalignedMemoryAccesses(LoadVT, SrcAS) ||
|
||||
!TLI->allowsMisalignedMemoryAccesses(LoadVT, DstAS))
|
||||
ActuallyDoIt = false;
|
||||
}
|
||||
|
||||
|
|
|
@ -6229,7 +6229,7 @@ EVT AArch64TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
|
|||
!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
|
||||
Attribute::NoImplicitFloat) &&
|
||||
(memOpAlign(SrcAlign, DstAlign, 16) ||
|
||||
(allowsUnalignedMemoryAccesses(MVT::f128, 0, &Fast) && Fast)))
|
||||
(allowsMisalignedMemoryAccesses(MVT::f128, 0, 1, &Fast) && Fast)))
|
||||
return MVT::f128;
|
||||
|
||||
return Size >= 8 ? MVT::i64 : MVT::i32;
|
||||
|
|
|
@ -212,10 +212,11 @@ public:
|
|||
|
||||
MVT getScalarShiftAmountTy(EVT LHSTy) const override;
|
||||
|
||||
/// allowsUnalignedMemoryAccesses - Returns true if the target allows
|
||||
/// allowsMisalignedMemoryAccesses - Returns true if the target allows
|
||||
/// unaligned memory accesses. of the specified type.
|
||||
bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AddrSpace = 0,
|
||||
bool *Fast = nullptr) const override {
|
||||
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace = 0,
|
||||
unsigned Align = 1,
|
||||
bool *Fast = nullptr) const override {
|
||||
if (RequireStrictAlign)
|
||||
return false;
|
||||
// FIXME: True for Cyclone, but not necessary others.
|
||||
|
|
|
@ -9696,8 +9696,10 @@ bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc,
|
|||
return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
|
||||
}
|
||||
|
||||
bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, unsigned,
|
||||
bool *Fast) const {
|
||||
bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
|
||||
unsigned,
|
||||
unsigned,
|
||||
bool *Fast) const {
|
||||
// The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus
|
||||
bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
|
||||
|
||||
|
@ -9751,11 +9753,12 @@ EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size,
|
|||
bool Fast;
|
||||
if (Size >= 16 &&
|
||||
(memOpAlign(SrcAlign, DstAlign, 16) ||
|
||||
(allowsUnalignedMemoryAccesses(MVT::v2f64, 0, &Fast) && Fast))) {
|
||||
(allowsMisalignedMemoryAccesses(MVT::v2f64, 0, 1, &Fast) && Fast))) {
|
||||
return MVT::v2f64;
|
||||
} else if (Size >= 8 &&
|
||||
(memOpAlign(SrcAlign, DstAlign, 8) ||
|
||||
(allowsUnalignedMemoryAccesses(MVT::f64, 0, &Fast) && Fast))) {
|
||||
(allowsMisalignedMemoryAccesses(MVT::f64, 0, 1, &Fast) &&
|
||||
Fast))) {
|
||||
return MVT::f64;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -266,11 +266,12 @@ namespace llvm {
|
|||
|
||||
bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override;
|
||||
|
||||
/// allowsUnalignedMemoryAccesses - Returns true if the target allows
|
||||
/// allowsMisalignedMemoryAccesses - Returns true if the target allows
|
||||
/// unaligned memory accesses of the specified type. Returns whether it
|
||||
/// is "fast" by reference in the second argument.
|
||||
bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
|
||||
bool *Fast) const override;
|
||||
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
|
||||
unsigned Align,
|
||||
bool *Fast) const override;
|
||||
|
||||
EVT getOptimalMemOpType(uint64_t Size,
|
||||
unsigned DstAlign, unsigned SrcAlign,
|
||||
|
|
|
@ -188,7 +188,7 @@ protected:
|
|||
|
||||
/// AllowsUnalignedMem - If true, the subtarget allows unaligned memory
|
||||
/// accesses for some types. For details, see
|
||||
/// ARMTargetLowering::allowsUnalignedMemoryAccesses().
|
||||
/// ARMTargetLowering::allowsMisalignedMemoryAccesses().
|
||||
bool AllowsUnalignedMem;
|
||||
|
||||
/// RestrictIT - If true, the subtarget disallows generation of deprecated IT
|
||||
|
|
|
@ -157,9 +157,10 @@ llvm::createMips16TargetLowering(MipsTargetMachine &TM,
|
|||
}
|
||||
|
||||
bool
|
||||
Mips16TargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
|
||||
unsigned,
|
||||
bool *Fast) const {
|
||||
Mips16TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
|
||||
unsigned,
|
||||
unsigned,
|
||||
bool *Fast) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -22,8 +22,9 @@ namespace llvm {
|
|||
explicit Mips16TargetLowering(MipsTargetMachine &TM,
|
||||
const MipsSubtarget &STI);
|
||||
|
||||
bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
|
||||
bool *Fast) const override;
|
||||
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
|
||||
unsigned Align,
|
||||
bool *Fast) const override;
|
||||
|
||||
MachineBasicBlock *
|
||||
EmitInstrWithCustomInserter(MachineInstr *MI,
|
||||
|
|
|
@ -329,9 +329,10 @@ addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) {
|
|||
}
|
||||
|
||||
bool
|
||||
MipsSETargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
|
||||
unsigned,
|
||||
bool *Fast) const {
|
||||
MipsSETargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
|
||||
unsigned,
|
||||
unsigned,
|
||||
bool *Fast) const {
|
||||
MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy;
|
||||
|
||||
if (Subtarget.systemSupportsUnalignedAccess()) {
|
||||
|
|
|
@ -31,8 +31,9 @@ namespace llvm {
|
|||
void addMSAFloatType(MVT::SimpleValueType Ty,
|
||||
const TargetRegisterClass *RC);
|
||||
|
||||
bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AS = 0,
|
||||
bool *Fast = nullptr) const override;
|
||||
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS = 0,
|
||||
unsigned Align = 1,
|
||||
bool *Fast = nullptr) const override;
|
||||
|
||||
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
|
||||
|
||||
|
|
|
@ -9214,9 +9214,10 @@ bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
|
|||
return isInt<16>(Imm) || isUInt<16>(Imm);
|
||||
}
|
||||
|
||||
bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
|
||||
unsigned,
|
||||
bool *Fast) const {
|
||||
bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
|
||||
unsigned,
|
||||
unsigned,
|
||||
bool *Fast) const {
|
||||
if (DisablePPCUnaligned)
|
||||
return false;
|
||||
|
||||
|
|
|
@ -494,9 +494,10 @@ namespace llvm {
|
|||
|
||||
/// Is unaligned memory access allowed for the given type, and is it fast
|
||||
/// relative to software emulation.
|
||||
bool allowsUnalignedMemoryAccesses(EVT VT,
|
||||
unsigned AddrSpace,
|
||||
bool *Fast = nullptr) const override;
|
||||
bool allowsMisalignedMemoryAccesses(EVT VT,
|
||||
unsigned AddrSpace,
|
||||
unsigned Align = 1,
|
||||
bool *Fast = nullptr) const override;
|
||||
|
||||
/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
|
||||
/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
|
||||
|
|
|
@ -240,15 +240,13 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
|
|||
// TargetLowering queries
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
bool SITargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
|
||||
unsigned AddrSpace,
|
||||
bool *IsFast) const {
|
||||
bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
|
||||
unsigned AddrSpace,
|
||||
unsigned Align,
|
||||
bool *IsFast) const {
|
||||
if (IsFast)
|
||||
*IsFast = false;
|
||||
|
||||
// XXX: This depends on the address space and also we may want to revist
|
||||
// the alignment values we specify in the DataLayout.
|
||||
|
||||
// TODO: I think v3i32 should allow unaligned accesses on CI with DS_READ_B96,
|
||||
// which isn't a simple VT.
|
||||
if (!VT.isSimple() || VT == MVT::Other)
|
||||
|
@ -261,8 +259,12 @@ bool SITargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
|
|||
// XXX - The only mention I see of this in the ISA manual is for LDS direct
|
||||
// reads the "byte address and must be dword aligned". Is it also true for the
|
||||
// normal loads and stores?
|
||||
if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS)
|
||||
return false;
|
||||
if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS) {
|
||||
// ds_read/write_b64 require 8-byte alignment, but we can do a 4 byte
|
||||
// aligned, 8 byte access in a single operation using ds_read2/write2_b32
|
||||
// with adjacent offsets.
|
||||
return Align % 4 == 0;
|
||||
}
|
||||
|
||||
// 8.1.6 - For Dword or larger reads or writes, the two LSBs of the
|
||||
// byte-address are ignored, thus forcing Dword alignment.
|
||||
|
|
|
@ -59,8 +59,9 @@ class SITargetLowering : public AMDGPUTargetLowering {
|
|||
|
||||
public:
|
||||
SITargetLowering(TargetMachine &tm);
|
||||
bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AS,
|
||||
bool *IsFast) const override;
|
||||
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS,
|
||||
unsigned Align,
|
||||
bool *IsFast) const override;
|
||||
|
||||
TargetLoweringBase::LegalizeTypeAction
|
||||
getPreferredVectorAction(EVT VT) const override;
|
||||
|
|
|
@ -339,9 +339,10 @@ bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
|
|||
return Imm.isZero() || Imm.isNegZero();
|
||||
}
|
||||
|
||||
bool SystemZTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
|
||||
unsigned,
|
||||
bool *Fast) const {
|
||||
bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
|
||||
unsigned,
|
||||
unsigned,
|
||||
bool *Fast) const {
|
||||
// Unaligned accesses should never be slower than the expanded version.
|
||||
// We check specifically for aligned accesses in the few cases where
|
||||
// they are required.
|
||||
|
|
|
@ -208,8 +208,9 @@ public:
|
|||
bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
|
||||
bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
|
||||
bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
|
||||
bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AS,
|
||||
bool *Fast) const override;
|
||||
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS,
|
||||
unsigned Align,
|
||||
bool *Fast) const override;
|
||||
bool isTruncateFree(Type *, Type *) const override;
|
||||
bool isTruncateFree(EVT, EVT) const override;
|
||||
const char *getTargetNodeName(unsigned Opcode) const override;
|
||||
|
|
|
@ -1775,9 +1775,10 @@ bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
|
|||
}
|
||||
|
||||
bool
|
||||
X86TargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
|
||||
unsigned,
|
||||
bool *Fast) const {
|
||||
X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
|
||||
unsigned,
|
||||
unsigned,
|
||||
bool *Fast) const {
|
||||
if (Fast)
|
||||
*Fast = Subtarget->isUnalignedMemAccessFast();
|
||||
return true;
|
||||
|
|
|
@ -565,10 +565,10 @@ namespace llvm {
|
|||
/// legal as the hook is used before type legalization.
|
||||
bool isSafeMemOpType(MVT VT) const override;
|
||||
|
||||
/// allowsUnalignedMemoryAccesses - Returns true if the target allows
|
||||
/// allowsMisalignedMemoryAccesses - Returns true if the target allows
|
||||
/// unaligned memory accesses. of the specified type. Returns whether it
|
||||
/// is "fast" by reference in the second argument.
|
||||
bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AS,
|
||||
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align,
|
||||
bool *Fast) const override;
|
||||
|
||||
/// LowerOperation - Provide custom lowering hooks for some operations.
|
||||
|
|
|
@ -426,7 +426,9 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
|
|||
assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
|
||||
"Unexpected extension type");
|
||||
assert(LD->getMemoryVT() == MVT::i32 && "Unexpected load EVT");
|
||||
if (allowsUnalignedMemoryAccesses(LD->getMemoryVT()))
|
||||
if (allowsMisalignedMemoryAccesses(LD->getMemoryVT(),
|
||||
LD->getAddressSpace(),
|
||||
LD->getAlignment()))
|
||||
return SDValue();
|
||||
|
||||
unsigned ABIAlignment = getDataLayout()->
|
||||
|
@ -504,7 +506,9 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const
|
|||
StoreSDNode *ST = cast<StoreSDNode>(Op);
|
||||
assert(!ST->isTruncatingStore() && "Unexpected store type");
|
||||
assert(ST->getMemoryVT() == MVT::i32 && "Unexpected store EVT");
|
||||
if (allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
|
||||
if (allowsMisalignedMemoryAccesses(ST->getMemoryVT(),
|
||||
ST->getAddressSpace(),
|
||||
ST->getAlignment())) {
|
||||
return SDValue();
|
||||
}
|
||||
unsigned ABIAlignment = getDataLayout()->
|
||||
|
@ -1803,7 +1807,9 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
|
|||
// Replace unaligned store of unaligned load with memmove.
|
||||
StoreSDNode *ST = cast<StoreSDNode>(N);
|
||||
if (!DCI.isBeforeLegalize() ||
|
||||
allowsUnalignedMemoryAccesses(ST->getMemoryVT()) ||
|
||||
allowsMisalignedMemoryAccesses(ST->getMemoryVT(),
|
||||
ST->getAddressSpace(),
|
||||
ST->getAlignment()) ||
|
||||
ST->isVolatile() || ST->isIndexed()) {
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -31,3 +31,20 @@ define void @unaligned_load_store_v4i32(<4 x i32> addrspace(3)* %p, <4 x i32> ad
|
|||
store <4 x i32> %v, <4 x i32> addrspace(3)* %r, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; FIXME: This should use ds_read2_b32
|
||||
; SI-LABEL: @load_lds_i64_align_4
|
||||
; SI: DS_READ_B64
|
||||
; SI: S_ENDPGM
|
||||
define void @load_lds_i64_align_4(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
|
||||
%val = load i64 addrspace(3)* %in, align 4
|
||||
store i64 %val, i64 addrspace(1)* %out, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; FIXME: Need to fix this case.
|
||||
; define void @load_lds_i64_align_1(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
|
||||
; %val = load i64 addrspace(3)* %in, align 1
|
||||
; store i64 %val, i64 addrspace(1)* %out, align 8
|
||||
; ret void
|
||||
; }
|
||||
|
|
Loading…
Reference in New Issue