From fa67bdbde06f330a7d0c1ced7023951f6a238b8e Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 22 Feb 2016 21:04:16 +0000 Subject: [PATCH] AMDGPU/R600: Implement allowsMisalignedMemoryAccess This avoids some test regressions in a future commit when unaligned operations are expanded when they have custom lowering. llvm-svn: 261570 --- llvm/lib/Target/AMDGPU/R600ISelLowering.cpp | 20 ++++++++++++++++++++ llvm/lib/Target/AMDGPU/R600ISelLowering.h | 4 ++++ llvm/test/CodeGen/AMDGPU/store.ll | 17 ++++++----------- 3 files changed, 30 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp index f87ddeefe0ab..be5b30d28c8b 100644 --- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -1784,6 +1784,26 @@ EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, return VT.changeVectorElementTypeToInteger(); } +bool R600TargetLowering::allowsMisalignedMemoryAccesses(EVT VT, + unsigned AddrSpace, + unsigned Align, + bool *IsFast) const { + if (IsFast) + *IsFast = false; + + if (!VT.isSimple() || VT == MVT::Other) + return false; + + if (VT.bitsLT(MVT::i32)) + return false; + + // TODO: This is a rough estimate. + if (IsFast) + *IsFast = true; + + return VT.bitsGT(MVT::i32) && Align % 4 == 0; +} + static SDValue CompactSwizzlableVector( SelectionDAG &DAG, SDValue VectorEntry, DenseMap &RemapSwizzle) { diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.h b/llvm/lib/Target/AMDGPU/R600ISelLowering.h index d0ac11b56257..fc68f9045c03 100644 --- a/llvm/lib/Target/AMDGPU/R600ISelLowering.h +++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.h @@ -41,6 +41,10 @@ public: EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const override; + bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, + unsigned Align, + bool *IsFast) const override; + private: unsigned Gen; /// Each OpenCL kernel has nine implicit parameters that are stored in the diff --git a/llvm/test/CodeGen/AMDGPU/store.ll b/llvm/test/CodeGen/AMDGPU/store.ll index d22f43fa05ef..e4034093f898 100644 --- a/llvm/test/CodeGen/AMDGPU/store.ll +++ b/llvm/test/CodeGen/AMDGPU/store.ll @@ -358,20 +358,13 @@ entry: ret void } -attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } - ; When i128 was a legal type this program generated cannot select errors: ; FUNC-LABEL: {{^}}"i128-const-store": -; FIXME: We should be able to to this with one store instruction -; EG: STORE_RAW -; EG: STORE_RAW -; EG: STORE_RAW -; EG: STORE_RAW -; CM: STORE_DWORD -; CM: STORE_DWORD -; CM: STORE_DWORD -; CM: STORE_DWORD +; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 1 + +; CM: MEM_RAT_CACHELESS STORE_DWORD T{{[0-9]+}}, T{{[0-9]+}}.X + ; SI: buffer_store_dwordx4 define void @i128-const-store(i32 addrspace(1)* %out) { entry: @@ -384,3 +377,5 @@ entry: store i32 2, i32 addrspace(1)* %arrayidx6, align 4 ret void } + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }