[NVPTX] allow address space inference for volatile loads/stores.

If particular target supports volatile memory access operations, we can
avoid AS casting to generic AS. Currently it's only enabled in NVPTX for
loads and stores that access global & shared AS.

Differential Revision: https://reviews.llvm.org/D39026

llvm-svn: 316495
This commit is contained in:
Artem Belevich 2017-10-24 20:31:44 +00:00
parent d20442d383
commit cb8f6328dc
6 changed files with 153 additions and 13 deletions

View File

@ -489,6 +489,13 @@ public:
/// would typically be allowed using throughput or size cost models.
bool hasDivRemOp(Type *DataType, bool IsSigned) const;
/// Return true if the given instruction (assumed to be a memory access
/// instruction) has a volatile variant. If that's the case then we can avoid
/// addrspacecast to generic AS for volatile loads/stores. Default
/// implementation returns false, which prevents address space inference for
/// volatile loads/stores.
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
/// Return true if target doesn't mind addresses in vectors.
bool prefersVectorizedAddressing() const;
@ -967,6 +974,7 @@ public:
virtual bool isLegalMaskedScatter(Type *DataType) = 0;
virtual bool isLegalMaskedGather(Type *DataType) = 0;
virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
virtual bool prefersVectorizedAddressing() = 0;
virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
int64_t BaseOffset, bool HasBaseReg,
@ -1192,6 +1200,9 @@ public:
bool hasDivRemOp(Type *DataType, bool IsSigned) override {
return Impl.hasDivRemOp(DataType, IsSigned);
}
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
return Impl.hasVolatileVariant(I, AddrSpace);
}
bool prefersVectorizedAddressing() override {
return Impl.prefersVectorizedAddressing();
}

View File

@ -255,6 +255,8 @@ public:
bool hasDivRemOp(Type *DataType, bool IsSigned) { return false; }
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) { return false; }
bool prefersVectorizedAddressing() { return true; }
int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,

View File

@ -180,6 +180,11 @@ bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const {
return TTIImpl->hasDivRemOp(DataType, IsSigned);
}
bool TargetTransformInfo::hasVolatileVariant(Instruction *I,
unsigned AddrSpace) const {
return TTIImpl->hasVolatileVariant(I, AddrSpace);
}
bool TargetTransformInfo::prefersVectorizedAddressing() const {
return TTIImpl->prefersVectorizedAddressing();
}

View File

@ -63,6 +63,22 @@ public:
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) {
// Volatile loads/stores are only supported for shared and global address
// spaces, or for generic AS that maps to them.
if (!(AddrSpace == llvm::ADDRESS_SPACE_GENERIC ||
AddrSpace == llvm::ADDRESS_SPACE_GLOBAL ||
AddrSpace == llvm::ADDRESS_SPACE_SHARED))
return false;
switch(I->getOpcode()){
default:
return false;
case Instruction::Load:
case Instruction::Store:
return true;
}
}
};
} // end namespace llvm

View File

@ -148,10 +148,9 @@ private:
// Changes the flat address expressions in function F to point to specific
// address spaces if InferredAddrSpace says so. Postorder is the postorder of
// all flat expressions in the use-def graph of function F.
bool
rewriteWithNewAddressSpaces(ArrayRef<WeakTrackingVH> Postorder,
const ValueToAddrSpaceMapTy &InferredAddrSpace,
Function *F) const;
bool rewriteWithNewAddressSpaces(
const TargetTransformInfo &TTI, ArrayRef<WeakTrackingVH> Postorder,
const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const;
void appendsFlatAddressExpressionToPostorderStack(
Value *V, std::vector<std::pair<Value *, bool>> &PostorderStack,
@ -602,7 +601,7 @@ bool InferAddressSpaces::runOnFunction(Function &F) {
// Changes the address spaces of the flat address expressions who are inferred
// to point to a specific address space.
return rewriteWithNewAddressSpaces(Postorder, InferredAddrSpace, &F);
return rewriteWithNewAddressSpaces(TTI, Postorder, InferredAddrSpace, &F);
}
// Constants need to be tracked through RAUW to handle cases with nested
@ -710,23 +709,32 @@ Optional<unsigned> InferAddressSpaces::updateAddressSpace(
/// \p returns true if \p U is the pointer operand of a memory instruction with
/// a single pointer operand that can have its address space changed by simply
/// mutating the use to a new value.
static bool isSimplePointerUseValidToReplace(Use &U) {
/// mutating the use to a new value. If the memory instruction is volatile,
/// return true only if the target allows the memory instruction to be volatile
/// in the new address space.
static bool isSimplePointerUseValidToReplace(const TargetTransformInfo &TTI,
Use &U, unsigned AddrSpace) {
User *Inst = U.getUser();
unsigned OpNo = U.getOperandNo();
bool VolatileIsAllowed = false;
if (auto *I = dyn_cast<Instruction>(Inst))
VolatileIsAllowed = TTI.hasVolatileVariant(I, AddrSpace);
if (auto *LI = dyn_cast<LoadInst>(Inst))
return OpNo == LoadInst::getPointerOperandIndex() && !LI->isVolatile();
return OpNo == LoadInst::getPointerOperandIndex() &&
(VolatileIsAllowed || !LI->isVolatile());
if (auto *SI = dyn_cast<StoreInst>(Inst))
return OpNo == StoreInst::getPointerOperandIndex() && !SI->isVolatile();
return OpNo == StoreInst::getPointerOperandIndex() &&
(VolatileIsAllowed || !SI->isVolatile());
if (auto *RMW = dyn_cast<AtomicRMWInst>(Inst))
return OpNo == AtomicRMWInst::getPointerOperandIndex() && !RMW->isVolatile();
return OpNo == AtomicRMWInst::getPointerOperandIndex() &&
(VolatileIsAllowed || !RMW->isVolatile());
if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst)) {
return OpNo == AtomicCmpXchgInst::getPointerOperandIndex() &&
!CmpX->isVolatile();
(VolatileIsAllowed || !CmpX->isVolatile());
}
return false;
@ -820,7 +828,7 @@ static Value::use_iterator skipToNextUser(Value::use_iterator I,
}
bool InferAddressSpaces::rewriteWithNewAddressSpaces(
ArrayRef<WeakTrackingVH> Postorder,
const TargetTransformInfo &TTI, ArrayRef<WeakTrackingVH> Postorder,
const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const {
// For each address expression to be modified, creates a clone of it with its
// pointer operands converted to the new address space. Since the pointer
@ -880,7 +888,8 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces(
// to the next instruction.
I = skipToNextUser(I, E);
if (isSimplePointerUseValidToReplace(U)) {
if (isSimplePointerUseValidToReplace(
TTI, U, V->getType()->getPointerAddressSpace())) {
// If V is used as the pointer operand of a compatible memory operation,
// sets the pointer operand to NewV. This replacement does not change
// the element type, so the resultant load/store is still valid.

View File

@ -0,0 +1,97 @@
# This test generates all variants of load/store instructions and verifies that
# LLVM generates correct PTX for them.
# RUN: python %s > %t.ll
# RUN: llc < %t.ll -march=nvptx64 -mcpu=sm_30 | FileCheck -check-prefixes=CHECK,CHECK_P64 %t.ll
# RUN: llc < %t.ll -march=nvptx -mcpu=sm_30 | FileCheck -check-prefixes=CHECK,CHECK_P32 %t.ll
from itertools import product
from string import Template
llvm_type_to_ptx_type = {
"i8": "u8",
"i16": "u16",
"i32": "u32",
"i64": "u64",
"half": "b16",
"<2 x half>": "b32",
"float": "f32",
"double": "f64"
}
llvm_type_to_ptx_reg = {
"i8": "r",
"i16": "r",
"i32": "r",
"i64": "rd",
"half": "h",
"<2 x half>": "hh",
"float": "f",
"double": "fd"
}
addrspace_id = {
"": 0,
".global": 1,
".shared": 3,
".const": 4,
".local": 5,
".param": 101
}
def gen_load_tests():
load_template = """
define ${type} @ld${_volatile}${_space}.${ptx_type}(${type} addrspace(${asid})* %ptr) {
; CHECK_P32: ld${_volatile}${_volatile_as}.${ptx_type} %${ptx_reg}{{[0-9]+}}, [%r{{[0-9]+}}]
; CHECK_P64: ld${_volatile}${_volatile_as}.${ptx_type} %${ptx_reg}{{[0-9]+}}, [%rd{{[0-9]+}}]
; CHECK: ret
%p = ${generic_ptr}
%a = load ${volatile} ${type}, ${type}* %p
ret ${type} %a
}
"""
for op_type, volatile, space in product(
["i8", "i16", "i32", "i64", "half", "float", "double", "<2 x half>"],
[True, False], # volatile
["", ".shared", ".global", ".const", ".local", ".param"]):
# Volatile is only supported for global, shared and generic.
if volatile and not space in ["", ".global", ".shared"]:
continue
# Volatile is only supported for global, shared and generic.
# All other volatile accesses are done in generic AS.
if volatile and not space in ["", ".global", ".shared"]:
volatile_as = ""
else:
volatile_as = space
params = {
"type": op_type,
"volatile": "volatile" if volatile else "",
"_volatile": ".volatile" if volatile else "",
"_volatile_as": volatile_as,
"_space": space,
"ptx_reg": llvm_type_to_ptx_reg[op_type],
"ptx_type": llvm_type_to_ptx_type[op_type],
"asid": addrspace_id[space],
}
# LLVM does not accept "addrspacecast Type* addrspace(0) to Type*", so we
# need to avoid it for generic pointer tests.
if space:
generic_ptr_template = ("addrspacecast ${type} addrspace(${asid})* %ptr "
"to ${type}*")
else:
generic_ptr_template = "select i1 true, ${type}* %ptr, ${type}* %ptr"
params["generic_ptr"] = Template(generic_ptr_template).substitute(params)
print(Template(load_template).substitute(params))
def main():
gen_load_tests()
main()