forked from OSchip/llvm-project
[AMDGPU] Add address space based alias analysis pass
This is direct port of HSAILAliasAnalysis pass, just cleaned for style and renamed. Differential Revision: https://reviews.llvm.org/D31103 llvm-svn: 298172
This commit is contained in:
parent
0f5063c754
commit
8e45acfc38
|
@ -119,6 +119,9 @@ extern char &SIDebuggerInsertNopsID;
|
||||||
void initializeSIInsertWaitsPass(PassRegistry&);
|
void initializeSIInsertWaitsPass(PassRegistry&);
|
||||||
extern char &SIInsertWaitsID;
|
extern char &SIInsertWaitsID;
|
||||||
|
|
||||||
|
ImmutablePass *createAMDGPUAAWrapperPass();
|
||||||
|
void initializeAMDGPUAAWrapperPassPass(PassRegistry&);
|
||||||
|
|
||||||
Target &getTheAMDGPUTarget();
|
Target &getTheAMDGPUTarget();
|
||||||
Target &getTheGCNTarget();
|
Target &getTheGCNTarget();
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,117 @@
|
||||||
|
//===- AMDGPUAliasAnalysis ---------------------------------------*- C++ -*-==//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
/// \file
|
||||||
|
/// This is the AMGPU address space based alias analysis pass.
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "AMDGPU.h"
|
||||||
|
#include "AMDGPUAliasAnalysis.h"
|
||||||
|
#include "llvm/Analysis/AliasAnalysis.h"
|
||||||
|
#include "llvm/Analysis/ValueTracking.h"
|
||||||
|
#include "llvm/Analysis/Passes.h"
|
||||||
|
#include "llvm/Support/raw_ostream.h"
|
||||||
|
#include "llvm/IR/Function.h"
|
||||||
|
#include "llvm/IR/Module.h"
|
||||||
|
#include "llvm/Pass.h"
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
#define DEBUG_TYPE "amdgpu-aa"
|
||||||
|
|
||||||
|
// Register this pass...
|
||||||
|
char AMDGPUAAWrapperPass::ID = 0;
|
||||||
|
INITIALIZE_PASS(AMDGPUAAWrapperPass, "amdgpu-aa",
|
||||||
|
"AMDGPU Address space based Alias Analysis", false, true)
|
||||||
|
|
||||||
|
ImmutablePass *llvm::createAMDGPUAAWrapperPass() {
|
||||||
|
return new AMDGPUAAWrapperPass();
|
||||||
|
}
|
||||||
|
|
||||||
|
void AMDGPUAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
|
||||||
|
AU.setPreservesAll();
|
||||||
|
}
|
||||||
|
|
||||||
|
AliasResult AMDGPUAAResult::alias(const MemoryLocation &LocA,
|
||||||
|
const MemoryLocation &LocB) {
|
||||||
|
// This array is indexed by the AMDGPUAS::AddressSpaces
|
||||||
|
// enum elements PRIVATE_ADDRESS ... to FLAT_ADDRESS
|
||||||
|
// see "llvm/Transforms/AMDSPIRUtils.h"
|
||||||
|
static const AliasResult ASAliasRules[5][5] = {
|
||||||
|
/* Private Global Constant Group Flat */
|
||||||
|
/* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , MayAlias},
|
||||||
|
/* Global */ {NoAlias , MayAlias, NoAlias , NoAlias , MayAlias},
|
||||||
|
/* Constant */ {NoAlias , NoAlias , MayAlias, NoAlias , MayAlias},
|
||||||
|
/* Group */ {NoAlias , NoAlias , NoAlias , MayAlias, MayAlias},
|
||||||
|
/* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias}
|
||||||
|
};
|
||||||
|
unsigned asA = LocA.Ptr->getType()->getPointerAddressSpace();
|
||||||
|
unsigned asB = LocB.Ptr->getType()->getPointerAddressSpace();
|
||||||
|
if (asA > AMDGPUAS::AddressSpaces::FLAT_ADDRESS ||
|
||||||
|
asB > AMDGPUAS::AddressSpaces::FLAT_ADDRESS)
|
||||||
|
report_fatal_error("Pointer address space out of range");
|
||||||
|
|
||||||
|
AliasResult Result = ASAliasRules[asA][asB];
|
||||||
|
if (Result == NoAlias) return Result;
|
||||||
|
|
||||||
|
if (isa<Argument>(LocA.Ptr) && isa<Argument>(LocB.Ptr)) {
|
||||||
|
Type *T1 = cast<PointerType>(LocA.Ptr->getType())->getElementType();
|
||||||
|
Type *T2 = cast<PointerType>(LocB.Ptr->getType())->getElementType();
|
||||||
|
|
||||||
|
if ((T1->isVectorTy() && !T2->isVectorTy()) ||
|
||||||
|
(T2->isVectorTy() && !T1->isVectorTy()))
|
||||||
|
return NoAlias;
|
||||||
|
}
|
||||||
|
// Forward the query to the next alias analysis.
|
||||||
|
return AAResultBase::alias(LocA, LocB);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDGPUAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
|
||||||
|
bool OrLocal) {
|
||||||
|
const Value *Base = GetUnderlyingObject(Loc.Ptr, DL);
|
||||||
|
|
||||||
|
if (Base->getType()->getPointerAddressSpace() ==
|
||||||
|
AMDGPUAS::AddressSpaces::CONSTANT_ADDRESS) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) {
|
||||||
|
if (GV->isConstant())
|
||||||
|
return true;
|
||||||
|
} else if (const Argument *Arg = dyn_cast<Argument>(Base)) {
|
||||||
|
const Function *F = Arg->getParent();
|
||||||
|
|
||||||
|
// Only assume constant memory for arguments on kernels.
|
||||||
|
switch (F->getCallingConv()) {
|
||||||
|
default:
|
||||||
|
return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
|
||||||
|
case CallingConv::AMDGPU_VS:
|
||||||
|
case CallingConv::AMDGPU_GS:
|
||||||
|
case CallingConv::AMDGPU_PS:
|
||||||
|
case CallingConv::AMDGPU_CS:
|
||||||
|
case CallingConv::AMDGPU_KERNEL:
|
||||||
|
case CallingConv::SPIR_KERNEL:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned ArgNo = Arg->getArgNo();
|
||||||
|
/* On an argument, ReadOnly attribute indicates that the function does
|
||||||
|
not write through this pointer argument, even though it may write
|
||||||
|
to the memory that the pointer points to.
|
||||||
|
On an argument, ReadNone attribute indicates that the function does
|
||||||
|
not dereference that pointer argument, even though it may read or write
|
||||||
|
the memory that the pointer points to if accessed through other pointers.
|
||||||
|
*/
|
||||||
|
if (F->getAttributes().hasAttribute(ArgNo + 1, Attribute::NoAlias) &&
|
||||||
|
(F->getAttributes().hasAttribute(ArgNo + 1, Attribute::ReadNone) ||
|
||||||
|
F->getAttributes().hasAttribute(ArgNo + 1, Attribute::ReadOnly))) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
|
||||||
|
}
|
|
@ -0,0 +1,86 @@
|
||||||
|
//===- AMDGPUAliasAnalysis ---------------------------------------*- C++ -*-==//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
/// \file
|
||||||
|
/// This is the AMGPU address space based alias analysis pass.
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#ifndef LLVM_ANALYSIS_AMDGPUALIASANALYSIS_H
|
||||||
|
#define LLVM_ANALYSIS_AMDGPUALIASANALYSIS_H
|
||||||
|
|
||||||
|
#include "llvm/Analysis/AliasAnalysis.h"
|
||||||
|
#include "llvm/IR/Function.h"
|
||||||
|
#include "llvm/IR/Module.h"
|
||||||
|
#include "llvm/Pass.h"
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
|
||||||
|
/// A simple AA result that uses TBAA metadata to answer queries.
|
||||||
|
class AMDGPUAAResult : public AAResultBase<AMDGPUAAResult> {
|
||||||
|
friend AAResultBase<AMDGPUAAResult>;
|
||||||
|
|
||||||
|
const DataLayout &DL;
|
||||||
|
|
||||||
|
public:
|
||||||
|
explicit AMDGPUAAResult(const DataLayout &DL) : AAResultBase(), DL(DL) {}
|
||||||
|
AMDGPUAAResult(AMDGPUAAResult &&Arg)
|
||||||
|
: AAResultBase(std::move(Arg)), DL(Arg.DL){}
|
||||||
|
|
||||||
|
/// Handle invalidation events from the new pass manager.
|
||||||
|
///
|
||||||
|
/// By definition, this result is stateless and so remains valid.
|
||||||
|
bool invalidate(Function &, const PreservedAnalyses &) { return false; }
|
||||||
|
|
||||||
|
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
|
||||||
|
bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal);
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool Aliases(const MDNode *A, const MDNode *B) const;
|
||||||
|
bool PathAliases(const MDNode *A, const MDNode *B) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Analysis pass providing a never-invalidated alias analysis result.
|
||||||
|
class AMDGPUAA : public AnalysisInfoMixin<AMDGPUAA> {
|
||||||
|
friend AnalysisInfoMixin<AMDGPUAA>;
|
||||||
|
static char PassID;
|
||||||
|
|
||||||
|
public:
|
||||||
|
typedef AMDGPUAAResult Result;
|
||||||
|
|
||||||
|
AMDGPUAAResult run(Function &F, AnalysisManager<Function> &AM) {
|
||||||
|
return AMDGPUAAResult(F.getParent()->getDataLayout());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Legacy wrapper pass to provide the AMDGPUAAResult object.
|
||||||
|
class AMDGPUAAWrapperPass : public ImmutablePass {
|
||||||
|
std::unique_ptr<AMDGPUAAResult> Result;
|
||||||
|
|
||||||
|
public:
|
||||||
|
static char ID;
|
||||||
|
|
||||||
|
AMDGPUAAWrapperPass() : ImmutablePass(ID) {
|
||||||
|
initializeAMDGPUAAWrapperPassPass(*PassRegistry::getPassRegistry());
|
||||||
|
}
|
||||||
|
|
||||||
|
AMDGPUAAResult &getResult() { return *Result; }
|
||||||
|
const AMDGPUAAResult &getResult() const { return *Result; }
|
||||||
|
|
||||||
|
bool doInitialization(Module &M) override {
|
||||||
|
Result.reset(new AMDGPUAAResult(M.getDataLayout()));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
bool doFinalization(Module &M) override {
|
||||||
|
Result.reset();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
void getAnalysisUsage(AnalysisUsage &AU) const override;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
#endif // LLVM_ANALYSIS_AMDGPUALIASANALYSIS_H
|
|
@ -15,6 +15,7 @@
|
||||||
|
|
||||||
#include "AMDGPUTargetMachine.h"
|
#include "AMDGPUTargetMachine.h"
|
||||||
#include "AMDGPU.h"
|
#include "AMDGPU.h"
|
||||||
|
#include "AMDGPUAliasAnalysis.h"
|
||||||
#include "AMDGPUCallLowering.h"
|
#include "AMDGPUCallLowering.h"
|
||||||
#include "AMDGPUInstructionSelector.h"
|
#include "AMDGPUInstructionSelector.h"
|
||||||
#include "AMDGPULegalizerInfo.h"
|
#include "AMDGPULegalizerInfo.h"
|
||||||
|
@ -93,6 +94,11 @@ static cl::opt<bool> InternalizeSymbols(
|
||||||
cl::init(false),
|
cl::init(false),
|
||||||
cl::Hidden);
|
cl::Hidden);
|
||||||
|
|
||||||
|
// Enable address space based alias analysis
|
||||||
|
static cl::opt<bool> EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden,
|
||||||
|
cl::desc("Enable AMDGPU Alias Analysis"),
|
||||||
|
cl::init(true));
|
||||||
|
|
||||||
extern "C" void LLVMInitializeAMDGPUTarget() {
|
extern "C" void LLVMInitializeAMDGPUTarget() {
|
||||||
// Register the target
|
// Register the target
|
||||||
RegisterTargetMachine<R600TargetMachine> X(getTheAMDGPUTarget());
|
RegisterTargetMachine<R600TargetMachine> X(getTheAMDGPUTarget());
|
||||||
|
@ -119,6 +125,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
|
||||||
initializeSIInsertSkipsPass(*PR);
|
initializeSIInsertSkipsPass(*PR);
|
||||||
initializeSIDebuggerInsertNopsPass(*PR);
|
initializeSIDebuggerInsertNopsPass(*PR);
|
||||||
initializeSIOptimizeExecMaskingPass(*PR);
|
initializeSIOptimizeExecMaskingPass(*PR);
|
||||||
|
initializeAMDGPUAAWrapperPassPass(*PR);
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
|
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
|
||||||
|
@ -507,6 +514,15 @@ void AMDGPUPassConfig::addIRPasses() {
|
||||||
addPass(createSROAPass());
|
addPass(createSROAPass());
|
||||||
|
|
||||||
addStraightLineScalarOptimizationPasses();
|
addStraightLineScalarOptimizationPasses();
|
||||||
|
|
||||||
|
if (EnableAMDGPUAliasAnalysis) {
|
||||||
|
addPass(createAMDGPUAAWrapperPass());
|
||||||
|
addPass(createExternalAAWrapperPass([](Pass &P, Function &,
|
||||||
|
AAResults &AAR) {
|
||||||
|
if (auto *WrapperPass = P.getAnalysisIfAvailable<AMDGPUAAWrapperPass>())
|
||||||
|
AAR.addAAResult(WrapperPass->getResult());
|
||||||
|
}));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TargetPassConfig::addIRPasses();
|
TargetPassConfig::addIRPasses();
|
||||||
|
|
|
@ -36,6 +36,7 @@ endif()
|
||||||
|
|
||||||
add_llvm_target(AMDGPUCodeGen
|
add_llvm_target(AMDGPUCodeGen
|
||||||
AMDILCFGStructurizer.cpp
|
AMDILCFGStructurizer.cpp
|
||||||
|
AMDGPUAliasAnalysis.cpp
|
||||||
AMDGPUAlwaysInlinePass.cpp
|
AMDGPUAlwaysInlinePass.cpp
|
||||||
AMDGPUAnnotateKernelFeatures.cpp
|
AMDGPUAnnotateKernelFeatures.cpp
|
||||||
AMDGPUAnnotateUniformValues.cpp
|
AMDGPUAnnotateUniformValues.cpp
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
; RUN: llc -show-mc-encoding -mattr=+promote-alloca -amdgpu-load-store-vectorizer=0 -verify-machineinstrs -march=amdgcn < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC
|
; RUN: llc -show-mc-encoding -mattr=+promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -march=amdgcn < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC
|
||||||
; RUN: llc -show-mc-encoding -mattr=+promote-alloca -amdgpu-load-store-vectorizer=0 -verify-machineinstrs -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-unaligned-buffer-access < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-PROMOTE
|
; RUN: llc -show-mc-encoding -mattr=+promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-unaligned-buffer-access < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-PROMOTE
|
||||||
; RUN: llc -show-mc-encoding -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -verify-machineinstrs -march=amdgcn < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC
|
; RUN: llc -show-mc-encoding -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -march=amdgcn < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC
|
||||||
; RUN: llc -show-mc-encoding -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=kaveri -mattr=-unaligned-buffer-access < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-ALLOCA
|
; RUN: llc -show-mc-encoding -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=kaveri -mattr=-unaligned-buffer-access < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-ALLOCA
|
||||||
; RUN: llc -show-mc-encoding -mattr=+promote-alloca -amdgpu-load-store-vectorizer=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-buffer-access < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC
|
; RUN: llc -show-mc-encoding -mattr=+promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-buffer-access < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC
|
||||||
; RUN: llc -show-mc-encoding -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-buffer-access < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC
|
; RUN: llc -show-mc-encoding -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-buffer-access < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC
|
||||||
|
|
||||||
; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck -check-prefix=HSAOPT -check-prefix=OPT %s
|
; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck -check-prefix=HSAOPT -check-prefix=OPT %s
|
||||||
; RUN: opt -S -mtriple=amdgcn-unknown-unknown -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck -check-prefix=NOHSAOPT -check-prefix=OPT %s
|
; RUN: opt -S -mtriple=amdgcn-unknown-unknown -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck -check-prefix=NOHSAOPT -check-prefix=OPT %s
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=gfx901 -mattr=+flat-for-global,-fp64-fp16-denormals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s
|
; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=gfx901 -enable-amdgpu-aa=0 -mattr=+flat-for-global,-fp64-fp16-denormals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s
|
||||||
; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=fiji -mattr=+flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=CIVI -check-prefix=VI -check-prefix=GFX89 %s
|
; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=fiji -enable-amdgpu-aa=0 -mattr=+flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=CIVI -check-prefix=VI -check-prefix=GFX89 %s
|
||||||
; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=hawaii -mattr=+flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=CIVI -check-prefix=CI %s
|
; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=hawaii -enable-amdgpu-aa=0 -mattr=+flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=CIVI -check-prefix=CI %s
|
||||||
|
|
||||||
; GCN-LABEL: {{^}}s_insertelement_v2i16_0:
|
; GCN-LABEL: {{^}}s_insertelement_v2i16_0:
|
||||||
; GCN: s_load_dword [[VEC:s[0-9]+]]
|
; GCN: s_load_dword [[VEC:s[0-9]+]]
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -enable-misched -enable-aa-sched-mi < %s | FileCheck -check-prefix=FUNC -check-prefix=CI %s
|
; RUN: llc -march=amdgcn -mcpu=bonaire -enable-amdgpu-aa=0 -verify-machineinstrs -enable-misched -enable-aa-sched-mi < %s | FileCheck -check-prefix=FUNC -check-prefix=CI %s
|
||||||
|
|
||||||
declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
|
declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
|
||||||
declare void @llvm.SI.tbuffer.store.v4i32(<16 x i8>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
|
declare void @llvm.SI.tbuffer.store.v4i32(<16 x i8>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs -mattr=-promote-alloca,-load-store-opt < %s | FileCheck -check-prefix=GCN %s
|
; RUN: llc -march=amdgcn -mcpu=hawaii -enable-amdgpu-aa=0 -verify-machineinstrs -mattr=-promote-alloca,-load-store-opt < %s | FileCheck -check-prefix=GCN %s
|
||||||
|
|
||||||
@sPrivateStorage = internal addrspace(3) global [256 x [8 x <4 x i64>]] undef
|
@sPrivateStorage = internal addrspace(3) global [256 x [8 x <4 x i64>]] undef
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,80 @@
|
||||||
|
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
|
||||||
|
; CHECK-DAG: flat_load_dwordx4
|
||||||
|
; CHECK-DAG: flat_load_dwordx4
|
||||||
|
; CHECK-DAG: flat_load_dwordx4
|
||||||
|
; CHECK-DAG: flat_load_dwordx4
|
||||||
|
; CHECK-DAG: ds_write2_b32
|
||||||
|
; CHECK-DAG: ds_write2_b32
|
||||||
|
; CHECK-DAG: ds_write2_b32
|
||||||
|
; CHECK-DAG: ds_write2_b32
|
||||||
|
; CHECK-DAG: ds_write2_b32
|
||||||
|
; CHECK-DAG: ds_write2_b32
|
||||||
|
; CHECK-DAG: ds_write2_b32
|
||||||
|
; CHECK-DAG: ds_write2_b32
|
||||||
|
|
||||||
|
define void @vectorize_global_local(i32 addrspace(1)* nocapture readonly %arg, i32 addrspace(3)* nocapture %arg1) {
|
||||||
|
bb:
|
||||||
|
%tmp = load i32, i32 addrspace(1)* %arg, align 4
|
||||||
|
store i32 %tmp, i32 addrspace(3)* %arg1, align 4
|
||||||
|
%tmp2 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1
|
||||||
|
%tmp3 = load i32, i32 addrspace(1)* %tmp2, align 4
|
||||||
|
%tmp4 = getelementptr inbounds i32, i32 addrspace(3)* %arg1, i32 1
|
||||||
|
store i32 %tmp3, i32 addrspace(3)* %tmp4, align 4
|
||||||
|
%tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2
|
||||||
|
%tmp6 = load i32, i32 addrspace(1)* %tmp5, align 4
|
||||||
|
%tmp7 = getelementptr inbounds i32, i32 addrspace(3)* %arg1, i32 2
|
||||||
|
store i32 %tmp6, i32 addrspace(3)* %tmp7, align 4
|
||||||
|
%tmp8 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 3
|
||||||
|
%tmp9 = load i32, i32 addrspace(1)* %tmp8, align 4
|
||||||
|
%tmp10 = getelementptr inbounds i32, i32 addrspace(3)* %arg1, i32 3
|
||||||
|
store i32 %tmp9, i32 addrspace(3)* %tmp10, align 4
|
||||||
|
%tmp11 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 4
|
||||||
|
%tmp12 = load i32, i32 addrspace(1)* %tmp11, align 4
|
||||||
|
%tmp13 = getelementptr inbounds i32, i32 addrspace(3)* %arg1, i32 4
|
||||||
|
store i32 %tmp12, i32 addrspace(3)* %tmp13, align 4
|
||||||
|
%tmp14 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 5
|
||||||
|
%tmp15 = load i32, i32 addrspace(1)* %tmp14, align 4
|
||||||
|
%tmp16 = getelementptr inbounds i32, i32 addrspace(3)* %arg1, i32 5
|
||||||
|
store i32 %tmp15, i32 addrspace(3)* %tmp16, align 4
|
||||||
|
%tmp17 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 6
|
||||||
|
%tmp18 = load i32, i32 addrspace(1)* %tmp17, align 4
|
||||||
|
%tmp19 = getelementptr inbounds i32, i32 addrspace(3)* %arg1, i32 6
|
||||||
|
store i32 %tmp18, i32 addrspace(3)* %tmp19, align 4
|
||||||
|
%tmp20 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 7
|
||||||
|
%tmp21 = load i32, i32 addrspace(1)* %tmp20, align 4
|
||||||
|
%tmp22 = getelementptr inbounds i32, i32 addrspace(3)* %arg1, i32 7
|
||||||
|
store i32 %tmp21, i32 addrspace(3)* %tmp22, align 4
|
||||||
|
%tmp23 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 8
|
||||||
|
%tmp24 = load i32, i32 addrspace(1)* %tmp23, align 4
|
||||||
|
%tmp25 = getelementptr inbounds i32, i32 addrspace(3)* %arg1, i32 8
|
||||||
|
store i32 %tmp24, i32 addrspace(3)* %tmp25, align 4
|
||||||
|
%tmp26 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 9
|
||||||
|
%tmp27 = load i32, i32 addrspace(1)* %tmp26, align 4
|
||||||
|
%tmp28 = getelementptr inbounds i32, i32 addrspace(3)* %arg1, i32 9
|
||||||
|
store i32 %tmp27, i32 addrspace(3)* %tmp28, align 4
|
||||||
|
%tmp29 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 10
|
||||||
|
%tmp30 = load i32, i32 addrspace(1)* %tmp29, align 4
|
||||||
|
%tmp31 = getelementptr inbounds i32, i32 addrspace(3)* %arg1, i32 10
|
||||||
|
store i32 %tmp30, i32 addrspace(3)* %tmp31, align 4
|
||||||
|
%tmp32 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 11
|
||||||
|
%tmp33 = load i32, i32 addrspace(1)* %tmp32, align 4
|
||||||
|
%tmp34 = getelementptr inbounds i32, i32 addrspace(3)* %arg1, i32 11
|
||||||
|
store i32 %tmp33, i32 addrspace(3)* %tmp34, align 4
|
||||||
|
%tmp35 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 12
|
||||||
|
%tmp36 = load i32, i32 addrspace(1)* %tmp35, align 4
|
||||||
|
%tmp37 = getelementptr inbounds i32, i32 addrspace(3)* %arg1, i32 12
|
||||||
|
store i32 %tmp36, i32 addrspace(3)* %tmp37, align 4
|
||||||
|
%tmp38 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 13
|
||||||
|
%tmp39 = load i32, i32 addrspace(1)* %tmp38, align 4
|
||||||
|
%tmp40 = getelementptr inbounds i32, i32 addrspace(3)* %arg1, i32 13
|
||||||
|
store i32 %tmp39, i32 addrspace(3)* %tmp40, align 4
|
||||||
|
%tmp41 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 14
|
||||||
|
%tmp42 = load i32, i32 addrspace(1)* %tmp41, align 4
|
||||||
|
%tmp43 = getelementptr inbounds i32, i32 addrspace(3)* %arg1, i32 14
|
||||||
|
store i32 %tmp42, i32 addrspace(3)* %tmp43, align 4
|
||||||
|
%tmp44 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 15
|
||||||
|
%tmp45 = load i32, i32 addrspace(1)* %tmp44, align 4
|
||||||
|
%tmp46 = getelementptr inbounds i32, i32 addrspace(3)* %arg1, i32 15
|
||||||
|
store i32 %tmp45, i32 addrspace(3)* %tmp46, align 4
|
||||||
|
ret void
|
||||||
|
}
|
Loading…
Reference in New Issue