forked from OSchip/llvm-project
[NVPTX] allow address space inference for volatile loads/stores.
If particular target supports volatile memory access operations, we can avoid AS casting to generic AS. Currently it's only enabled in NVPTX for loads and stores that access global & shared AS. Differential Revision: https://reviews.llvm.org/D39026 llvm-svn: 316495
This commit is contained in:
parent
d20442d383
commit
cb8f6328dc
|
@ -489,6 +489,13 @@ public:
|
|||
/// would typically be allowed using throughput or size cost models.
|
||||
bool hasDivRemOp(Type *DataType, bool IsSigned) const;
|
||||
|
||||
/// Return true if the given instruction (assumed to be a memory access
|
||||
/// instruction) has a volatile variant. If that's the case then we can avoid
|
||||
/// addrspacecast to generic AS for volatile loads/stores. Default
|
||||
/// implementation returns false, which prevents address space inference for
|
||||
/// volatile loads/stores.
|
||||
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
|
||||
|
||||
/// Return true if target doesn't mind addresses in vectors.
|
||||
bool prefersVectorizedAddressing() const;
|
||||
|
||||
|
@ -967,6 +974,7 @@ public:
|
|||
virtual bool isLegalMaskedScatter(Type *DataType) = 0;
|
||||
virtual bool isLegalMaskedGather(Type *DataType) = 0;
|
||||
virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
|
||||
virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
|
||||
virtual bool prefersVectorizedAddressing() = 0;
|
||||
virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
|
||||
int64_t BaseOffset, bool HasBaseReg,
|
||||
|
@ -1192,6 +1200,9 @@ public:
|
|||
bool hasDivRemOp(Type *DataType, bool IsSigned) override {
|
||||
return Impl.hasDivRemOp(DataType, IsSigned);
|
||||
}
|
||||
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
|
||||
return Impl.hasVolatileVariant(I, AddrSpace);
|
||||
}
|
||||
bool prefersVectorizedAddressing() override {
|
||||
return Impl.prefersVectorizedAddressing();
|
||||
}
|
||||
|
|
|
@ -255,6 +255,8 @@ public:
|
|||
|
||||
bool hasDivRemOp(Type *DataType, bool IsSigned) { return false; }
|
||||
|
||||
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) { return false; }
|
||||
|
||||
bool prefersVectorizedAddressing() { return true; }
|
||||
|
||||
int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
|
||||
|
|
|
@ -180,6 +180,11 @@ bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const {
|
|||
return TTIImpl->hasDivRemOp(DataType, IsSigned);
|
||||
}
|
||||
|
||||
bool TargetTransformInfo::hasVolatileVariant(Instruction *I,
|
||||
unsigned AddrSpace) const {
|
||||
return TTIImpl->hasVolatileVariant(I, AddrSpace);
|
||||
}
|
||||
|
||||
bool TargetTransformInfo::prefersVectorizedAddressing() const {
|
||||
return TTIImpl->prefersVectorizedAddressing();
|
||||
}
|
||||
|
|
|
@ -63,6 +63,22 @@ public:
|
|||
|
||||
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
TTI::UnrollingPreferences &UP);
|
||||
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) {
|
||||
// Volatile loads/stores are only supported for shared and global address
|
||||
// spaces, or for generic AS that maps to them.
|
||||
if (!(AddrSpace == llvm::ADDRESS_SPACE_GENERIC ||
|
||||
AddrSpace == llvm::ADDRESS_SPACE_GLOBAL ||
|
||||
AddrSpace == llvm::ADDRESS_SPACE_SHARED))
|
||||
return false;
|
||||
|
||||
switch(I->getOpcode()){
|
||||
default:
|
||||
return false;
|
||||
case Instruction::Load:
|
||||
case Instruction::Store:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
|
|
@ -148,10 +148,9 @@ private:
|
|||
// Changes the flat address expressions in function F to point to specific
|
||||
// address spaces if InferredAddrSpace says so. Postorder is the postorder of
|
||||
// all flat expressions in the use-def graph of function F.
|
||||
bool
|
||||
rewriteWithNewAddressSpaces(ArrayRef<WeakTrackingVH> Postorder,
|
||||
const ValueToAddrSpaceMapTy &InferredAddrSpace,
|
||||
Function *F) const;
|
||||
bool rewriteWithNewAddressSpaces(
|
||||
const TargetTransformInfo &TTI, ArrayRef<WeakTrackingVH> Postorder,
|
||||
const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const;
|
||||
|
||||
void appendsFlatAddressExpressionToPostorderStack(
|
||||
Value *V, std::vector<std::pair<Value *, bool>> &PostorderStack,
|
||||
|
@ -602,7 +601,7 @@ bool InferAddressSpaces::runOnFunction(Function &F) {
|
|||
|
||||
// Changes the address spaces of the flat address expressions who are inferred
|
||||
// to point to a specific address space.
|
||||
return rewriteWithNewAddressSpaces(Postorder, InferredAddrSpace, &F);
|
||||
return rewriteWithNewAddressSpaces(TTI, Postorder, InferredAddrSpace, &F);
|
||||
}
|
||||
|
||||
// Constants need to be tracked through RAUW to handle cases with nested
|
||||
|
@ -710,23 +709,32 @@ Optional<unsigned> InferAddressSpaces::updateAddressSpace(
|
|||
|
||||
/// \p returns true if \p U is the pointer operand of a memory instruction with
|
||||
/// a single pointer operand that can have its address space changed by simply
|
||||
/// mutating the use to a new value.
|
||||
static bool isSimplePointerUseValidToReplace(Use &U) {
|
||||
/// mutating the use to a new value. If the memory instruction is volatile,
|
||||
/// return true only if the target allows the memory instruction to be volatile
|
||||
/// in the new address space.
|
||||
static bool isSimplePointerUseValidToReplace(const TargetTransformInfo &TTI,
|
||||
Use &U, unsigned AddrSpace) {
|
||||
User *Inst = U.getUser();
|
||||
unsigned OpNo = U.getOperandNo();
|
||||
bool VolatileIsAllowed = false;
|
||||
if (auto *I = dyn_cast<Instruction>(Inst))
|
||||
VolatileIsAllowed = TTI.hasVolatileVariant(I, AddrSpace);
|
||||
|
||||
if (auto *LI = dyn_cast<LoadInst>(Inst))
|
||||
return OpNo == LoadInst::getPointerOperandIndex() && !LI->isVolatile();
|
||||
return OpNo == LoadInst::getPointerOperandIndex() &&
|
||||
(VolatileIsAllowed || !LI->isVolatile());
|
||||
|
||||
if (auto *SI = dyn_cast<StoreInst>(Inst))
|
||||
return OpNo == StoreInst::getPointerOperandIndex() && !SI->isVolatile();
|
||||
return OpNo == StoreInst::getPointerOperandIndex() &&
|
||||
(VolatileIsAllowed || !SI->isVolatile());
|
||||
|
||||
if (auto *RMW = dyn_cast<AtomicRMWInst>(Inst))
|
||||
return OpNo == AtomicRMWInst::getPointerOperandIndex() && !RMW->isVolatile();
|
||||
return OpNo == AtomicRMWInst::getPointerOperandIndex() &&
|
||||
(VolatileIsAllowed || !RMW->isVolatile());
|
||||
|
||||
if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst)) {
|
||||
return OpNo == AtomicCmpXchgInst::getPointerOperandIndex() &&
|
||||
!CmpX->isVolatile();
|
||||
(VolatileIsAllowed || !CmpX->isVolatile());
|
||||
}
|
||||
|
||||
return false;
|
||||
|
@ -820,7 +828,7 @@ static Value::use_iterator skipToNextUser(Value::use_iterator I,
|
|||
}
|
||||
|
||||
bool InferAddressSpaces::rewriteWithNewAddressSpaces(
|
||||
ArrayRef<WeakTrackingVH> Postorder,
|
||||
const TargetTransformInfo &TTI, ArrayRef<WeakTrackingVH> Postorder,
|
||||
const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const {
|
||||
// For each address expression to be modified, creates a clone of it with its
|
||||
// pointer operands converted to the new address space. Since the pointer
|
||||
|
@ -880,7 +888,8 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces(
|
|||
// to the next instruction.
|
||||
I = skipToNextUser(I, E);
|
||||
|
||||
if (isSimplePointerUseValidToReplace(U)) {
|
||||
if (isSimplePointerUseValidToReplace(
|
||||
TTI, U, V->getType()->getPointerAddressSpace())) {
|
||||
// If V is used as the pointer operand of a compatible memory operation,
|
||||
// sets the pointer operand to NewV. This replacement does not change
|
||||
// the element type, so the resultant load/store is still valid.
|
||||
|
|
|
@ -0,0 +1,97 @@
|
|||
# This test generates all variants of load/store instructions and verifies that
|
||||
# LLVM generates correct PTX for them.
|
||||
|
||||
# RUN: python %s > %t.ll
|
||||
# RUN: llc < %t.ll -march=nvptx64 -mcpu=sm_30 | FileCheck -check-prefixes=CHECK,CHECK_P64 %t.ll
|
||||
# RUN: llc < %t.ll -march=nvptx -mcpu=sm_30 | FileCheck -check-prefixes=CHECK,CHECK_P32 %t.ll
|
||||
|
||||
from itertools import product
|
||||
from string import Template
|
||||
|
||||
llvm_type_to_ptx_type = {
|
||||
"i8": "u8",
|
||||
"i16": "u16",
|
||||
"i32": "u32",
|
||||
"i64": "u64",
|
||||
"half": "b16",
|
||||
"<2 x half>": "b32",
|
||||
"float": "f32",
|
||||
"double": "f64"
|
||||
}
|
||||
|
||||
llvm_type_to_ptx_reg = {
|
||||
"i8": "r",
|
||||
"i16": "r",
|
||||
"i32": "r",
|
||||
"i64": "rd",
|
||||
"half": "h",
|
||||
"<2 x half>": "hh",
|
||||
"float": "f",
|
||||
"double": "fd"
|
||||
}
|
||||
|
||||
addrspace_id = {
|
||||
"": 0,
|
||||
".global": 1,
|
||||
".shared": 3,
|
||||
".const": 4,
|
||||
".local": 5,
|
||||
".param": 101
|
||||
}
|
||||
|
||||
|
||||
def gen_load_tests():
|
||||
load_template = """
|
||||
define ${type} @ld${_volatile}${_space}.${ptx_type}(${type} addrspace(${asid})* %ptr) {
|
||||
; CHECK_P32: ld${_volatile}${_volatile_as}.${ptx_type} %${ptx_reg}{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; CHECK_P64: ld${_volatile}${_volatile_as}.${ptx_type} %${ptx_reg}{{[0-9]+}}, [%rd{{[0-9]+}}]
|
||||
; CHECK: ret
|
||||
%p = ${generic_ptr}
|
||||
%a = load ${volatile} ${type}, ${type}* %p
|
||||
ret ${type} %a
|
||||
}
|
||||
"""
|
||||
for op_type, volatile, space in product(
|
||||
["i8", "i16", "i32", "i64", "half", "float", "double", "<2 x half>"],
|
||||
[True, False], # volatile
|
||||
["", ".shared", ".global", ".const", ".local", ".param"]):
|
||||
|
||||
# Volatile is only supported for global, shared and generic.
|
||||
if volatile and not space in ["", ".global", ".shared"]:
|
||||
continue
|
||||
|
||||
# Volatile is only supported for global, shared and generic.
|
||||
# All other volatile accesses are done in generic AS.
|
||||
if volatile and not space in ["", ".global", ".shared"]:
|
||||
volatile_as = ""
|
||||
else:
|
||||
volatile_as = space
|
||||
|
||||
params = {
|
||||
"type": op_type,
|
||||
"volatile": "volatile" if volatile else "",
|
||||
"_volatile": ".volatile" if volatile else "",
|
||||
"_volatile_as": volatile_as,
|
||||
"_space": space,
|
||||
"ptx_reg": llvm_type_to_ptx_reg[op_type],
|
||||
"ptx_type": llvm_type_to_ptx_type[op_type],
|
||||
"asid": addrspace_id[space],
|
||||
}
|
||||
|
||||
# LLVM does not accept "addrspacecast Type* addrspace(0) to Type*", so we
|
||||
# need to avoid it for generic pointer tests.
|
||||
if space:
|
||||
generic_ptr_template = ("addrspacecast ${type} addrspace(${asid})* %ptr "
|
||||
"to ${type}*")
|
||||
else:
|
||||
generic_ptr_template = "select i1 true, ${type}* %ptr, ${type}* %ptr"
|
||||
params["generic_ptr"] = Template(generic_ptr_template).substitute(params)
|
||||
|
||||
print(Template(load_template).substitute(params))
|
||||
|
||||
|
||||
def main():
|
||||
gen_load_tests()
|
||||
|
||||
|
||||
main()
|
Loading…
Reference in New Issue