forked from OSchip/llvm-project
[SystemZ, LoopStrengthReduce]
This patch makes LSR generate better code for SystemZ in the cases of memory intrinsics, Load->Store pairs or comparison of immediate with memory. In order to achieve this, the following common code changes were made: * New TTI hook: LSRWithInstrQueries(), which defaults to false. Controls if LSR should do instruction-based addressing evaluations by calling isLegalAddressingMode() with the Instruction pointers. * In LoopStrengthReduce: handle address operands of memset, memmove and memcpy as address uses, and call isFoldableMemAccessOffset() for any LSRUse::Address, not just loads or stores. SystemZ changes: * isLSRCostLess() implemented with Insns first, and without ImmCost. * New function supportedAddressingMode() that is a helper for TTI methods looking at Instructions passed via pointers. Review: Ulrich Weigand, Quentin Colombet https://reviews.llvm.org/D35262 https://reviews.llvm.org/D35049 llvm-svn: 308729
This commit is contained in:
parent
7d2b15a7ab
commit
024e319489
|
@ -420,10 +420,12 @@ public:
|
|||
/// this target, for a load/store of the specified type.
|
||||
/// The type may be VoidTy, in which case only return true if the addressing
|
||||
/// mode is legal for a load/store of any legal type.
|
||||
/// If target returns true in LSRWithInstrQueries(), I may be valid.
|
||||
/// TODO: Handle pre/postinc as well.
|
||||
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
|
||||
bool HasBaseReg, int64_t Scale,
|
||||
unsigned AddrSpace = 0) const;
|
||||
unsigned AddrSpace = 0,
|
||||
Instruction *I = nullptr) const;
|
||||
|
||||
/// \brief Return true if LSR cost of C1 is lower than C1.
|
||||
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
|
||||
|
@ -453,6 +455,12 @@ public:
|
|||
bool HasBaseReg, int64_t Scale,
|
||||
unsigned AddrSpace = 0) const;
|
||||
|
||||
/// \brief Return true if the loop strength reduce pass should make
|
||||
/// Instruction* based TTI queries to isLegalAddressingMode(). This is
|
||||
/// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
|
||||
/// immediate offset and no index register.
|
||||
bool LSRWithInstrQueries() const;
|
||||
|
||||
/// \brief Return true if target supports the load / store
|
||||
/// instruction with the given Offset on the form reg + Offset. It
|
||||
/// may be that Offset is too big for a certain type (register
|
||||
|
@ -882,7 +890,8 @@ public:
|
|||
virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
|
||||
int64_t BaseOffset, bool HasBaseReg,
|
||||
int64_t Scale,
|
||||
unsigned AddrSpace) = 0;
|
||||
unsigned AddrSpace,
|
||||
Instruction *I) = 0;
|
||||
virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
|
||||
TargetTransformInfo::LSRCost &C2) = 0;
|
||||
virtual bool isLegalMaskedStore(Type *DataType) = 0;
|
||||
|
@ -893,6 +902,7 @@ public:
|
|||
virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
|
||||
int64_t BaseOffset, bool HasBaseReg,
|
||||
int64_t Scale, unsigned AddrSpace) = 0;
|
||||
virtual bool LSRWithInstrQueries() = 0;
|
||||
virtual bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) = 0;
|
||||
virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
|
||||
virtual bool isProfitableToHoist(Instruction *I) = 0;
|
||||
|
@ -1085,9 +1095,10 @@ public:
|
|||
}
|
||||
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
|
||||
bool HasBaseReg, int64_t Scale,
|
||||
unsigned AddrSpace) override {
|
||||
unsigned AddrSpace,
|
||||
Instruction *I) override {
|
||||
return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
|
||||
Scale, AddrSpace);
|
||||
Scale, AddrSpace, I);
|
||||
}
|
||||
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
|
||||
TargetTransformInfo::LSRCost &C2) override {
|
||||
|
@ -1114,6 +1125,9 @@ public:
|
|||
return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
|
||||
Scale, AddrSpace);
|
||||
}
|
||||
bool LSRWithInstrQueries() override {
|
||||
return Impl.LSRWithInstrQueries();
|
||||
}
|
||||
bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) override {
|
||||
return Impl.isFoldableMemAccessOffset(I, Offset);
|
||||
}
|
||||
|
|
|
@ -230,7 +230,7 @@ public:
|
|||
|
||||
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
|
||||
bool HasBaseReg, int64_t Scale,
|
||||
unsigned AddrSpace) {
|
||||
unsigned AddrSpace, Instruction *I = nullptr) {
|
||||
// Guess that only reg and reg+reg addressing is allowed. This heuristic is
|
||||
// taken from the implementation of LSR.
|
||||
return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
|
||||
|
@ -262,6 +262,8 @@ public:
|
|||
return -1;
|
||||
}
|
||||
|
||||
bool LSRWithInstrQueries() { return false; }
|
||||
|
||||
bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) { return true; }
|
||||
|
||||
bool isTruncateFree(Type *Ty1, Type *Ty2) { return false; }
|
||||
|
|
|
@ -110,13 +110,13 @@ public:
|
|||
|
||||
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
|
||||
bool HasBaseReg, int64_t Scale,
|
||||
unsigned AddrSpace) {
|
||||
unsigned AddrSpace, Instruction *I = nullptr) {
|
||||
TargetLoweringBase::AddrMode AM;
|
||||
AM.BaseGV = BaseGV;
|
||||
AM.BaseOffs = BaseOffset;
|
||||
AM.HasBaseReg = HasBaseReg;
|
||||
AM.Scale = Scale;
|
||||
return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace);
|
||||
return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
|
||||
}
|
||||
|
||||
bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) {
|
||||
|
|
|
@ -1887,7 +1887,8 @@ public:
|
|||
///
|
||||
/// TODO: Remove default argument
|
||||
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
|
||||
Type *Ty, unsigned AddrSpace) const;
|
||||
Type *Ty, unsigned AddrSpace,
|
||||
Instruction *I = nullptr) const;
|
||||
|
||||
/// \brief Return the cost of the scaling factor used in the addressing mode
|
||||
/// represented by AM for this target, for a load/store of the specified type.
|
||||
|
|
|
@ -144,9 +144,10 @@ bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
|
|||
int64_t BaseOffset,
|
||||
bool HasBaseReg,
|
||||
int64_t Scale,
|
||||
unsigned AddrSpace) const {
|
||||
unsigned AddrSpace,
|
||||
Instruction *I) const {
|
||||
return TTIImpl->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
|
||||
Scale, AddrSpace);
|
||||
Scale, AddrSpace, I);
|
||||
}
|
||||
|
||||
bool TargetTransformInfo::isLSRCostLess(LSRCost &C1, LSRCost &C2) const {
|
||||
|
@ -184,6 +185,10 @@ int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
|
|||
return Cost;
|
||||
}
|
||||
|
||||
bool TargetTransformInfo::LSRWithInstrQueries() const {
|
||||
return TTIImpl->LSRWithInstrQueries();
|
||||
}
|
||||
|
||||
bool TargetTransformInfo::isFoldableMemAccessOffset(Instruction *I,
|
||||
int64_t Offset) const {
|
||||
return TTIImpl->isFoldableMemAccessOffset(I, Offset);
|
||||
|
|
|
@ -1481,7 +1481,7 @@ Value *TargetLoweringBase::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
|
|||
/// by AM is legal for this target, for a load/store of the specified type.
|
||||
bool TargetLoweringBase::isLegalAddressingMode(const DataLayout &DL,
|
||||
const AddrMode &AM, Type *Ty,
|
||||
unsigned AS) const {
|
||||
unsigned AS, Instruction *I) const {
|
||||
// The default implementation of this implements a conservative RISCy, r+r and
|
||||
// r+i addr mode.
|
||||
|
||||
|
|
|
@ -7818,7 +7818,7 @@ bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Immed) const {
|
|||
/// by AM is legal for this target, for a load/store of the specified type.
|
||||
bool AArch64TargetLowering::isLegalAddressingMode(const DataLayout &DL,
|
||||
const AddrMode &AM, Type *Ty,
|
||||
unsigned AS) const {
|
||||
unsigned AS, Instruction *I) const {
|
||||
// AArch64 has five basic addressing modes:
|
||||
// reg
|
||||
// reg + 9-bit signed offset
|
||||
|
|
|
@ -338,7 +338,8 @@ public:
|
|||
/// Return true if the addressing mode represented by AM is legal for this
|
||||
/// target, for a load/store of the specified type.
|
||||
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
|
||||
unsigned AS) const override;
|
||||
unsigned AS,
|
||||
Instruction *I = nullptr) const override;
|
||||
|
||||
/// \brief Return the cost of the scaling factor used in the addressing
|
||||
/// mode represented by AM for this target, for a load/store
|
||||
|
|
|
@ -624,7 +624,7 @@ bool SITargetLowering::isLegalMUBUFAddressingMode(const AddrMode &AM) const {
|
|||
|
||||
bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
|
||||
const AddrMode &AM, Type *Ty,
|
||||
unsigned AS) const {
|
||||
unsigned AS, Instruction *I) const {
|
||||
// No global is ever allowed as a base.
|
||||
if (AM.BaseGV)
|
||||
return false;
|
||||
|
|
|
@ -151,7 +151,8 @@ public:
|
|||
Type *&/*AccessTy*/) const override;
|
||||
|
||||
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
|
||||
unsigned AS) const override;
|
||||
unsigned AS,
|
||||
Instruction *I = nullptr) const override;
|
||||
|
||||
bool canMergeStoresTo(unsigned AS, EVT MemVT,
|
||||
const SelectionDAG &DAG) const override;
|
||||
|
|
|
@ -12380,7 +12380,7 @@ bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM,
|
|||
/// by AM is legal for this target, for a load/store of the specified type.
|
||||
bool ARMTargetLowering::isLegalAddressingMode(const DataLayout &DL,
|
||||
const AddrMode &AM, Type *Ty,
|
||||
unsigned AS) const {
|
||||
unsigned AS, Instruction *I) const {
|
||||
EVT VT = getValueType(DL, Ty, true);
|
||||
if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
|
||||
return false;
|
||||
|
|
|
@ -317,7 +317,8 @@ class InstrItineraryData;
|
|||
/// isLegalAddressingMode - Return true if the addressing mode represented
|
||||
/// by AM is legal for this target, for a load/store of the specified type.
|
||||
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
|
||||
Type *Ty, unsigned AS) const override;
|
||||
Type *Ty, unsigned AS,
|
||||
Instruction *I = nullptr) const override;
|
||||
|
||||
/// getScalingFactorCost - Return the cost of the scaling used in
|
||||
/// addressing mode represented by AM.
|
||||
|
|
|
@ -724,7 +724,7 @@ void AVRTargetLowering::ReplaceNodeResults(SDNode *N,
|
|||
/// by AM is legal for this target, for a load/store of the specified type.
|
||||
bool AVRTargetLowering::isLegalAddressingMode(const DataLayout &DL,
|
||||
const AddrMode &AM, Type *Ty,
|
||||
unsigned AS) const {
|
||||
unsigned AS, Instruction *I) const {
|
||||
int64_t Offs = AM.BaseOffs;
|
||||
|
||||
// Allow absolute addresses.
|
||||
|
|
|
@ -83,7 +83,8 @@ public:
|
|||
SelectionDAG &DAG) const override;
|
||||
|
||||
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
|
||||
unsigned AS) const override;
|
||||
unsigned AS,
|
||||
Instruction *I = nullptr) const override;
|
||||
|
||||
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
|
||||
ISD::MemIndexedMode &AM,
|
||||
|
|
|
@ -2993,7 +2993,7 @@ bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
|
|||
/// AM is legal for this target, for a load/store of the specified type.
|
||||
bool HexagonTargetLowering::isLegalAddressingMode(const DataLayout &DL,
|
||||
const AddrMode &AM, Type *Ty,
|
||||
unsigned AS) const {
|
||||
unsigned AS, Instruction *I) const {
|
||||
if (Ty->isSized()) {
|
||||
// When LSR detects uses of the same base address to access different
|
||||
// types (e.g. unions), it will assume a conservative type for these
|
||||
|
|
|
@ -231,7 +231,8 @@ namespace HexagonISD {
|
|||
/// mode is legal for a load/store of any legal type.
|
||||
/// TODO: Handle pre/postinc as well.
|
||||
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
|
||||
Type *Ty, unsigned AS) const override;
|
||||
Type *Ty, unsigned AS,
|
||||
Instruction *I = nullptr) const override;
|
||||
/// Return true if folding a constant offset with the given GlobalAddress
|
||||
/// is legal. It is frequently not legal in PIC relocation models.
|
||||
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
|
||||
|
|
|
@ -3991,7 +3991,7 @@ void MipsTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
|
|||
|
||||
bool MipsTargetLowering::isLegalAddressingMode(const DataLayout &DL,
|
||||
const AddrMode &AM, Type *Ty,
|
||||
unsigned AS) const {
|
||||
unsigned AS, Instruction *I) const {
|
||||
// No global is ever allowed as a base.
|
||||
if (AM.BaseGV)
|
||||
return false;
|
||||
|
|
|
@ -625,7 +625,8 @@ namespace llvm {
|
|||
}
|
||||
|
||||
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
|
||||
Type *Ty, unsigned AS) const override;
|
||||
Type *Ty, unsigned AS,
|
||||
Instruction *I = nullptr) const override;
|
||||
|
||||
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
|
||||
|
||||
|
|
|
@ -3805,7 +3805,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
|
|||
/// (CodeGenPrepare.cpp)
|
||||
bool NVPTXTargetLowering::isLegalAddressingMode(const DataLayout &DL,
|
||||
const AddrMode &AM, Type *Ty,
|
||||
unsigned AS) const {
|
||||
unsigned AS, Instruction *I) const {
|
||||
// AddrMode - This represents an addressing mode of:
|
||||
// BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
|
||||
//
|
||||
|
|
|
@ -456,7 +456,8 @@ public:
|
|||
/// reduction (LoopStrengthReduce.cpp) and memory optimization for
|
||||
/// address mode (CodeGenPrepare.cpp)
|
||||
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
|
||||
unsigned AS) const override;
|
||||
unsigned AS,
|
||||
Instruction *I = nullptr) const override;
|
||||
|
||||
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override {
|
||||
// Truncating 64-bit to 32-bit is free in SASS.
|
||||
|
|
|
@ -12810,7 +12810,7 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
|
|||
// by AM is legal for this target, for a load/store of the specified type.
|
||||
bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
|
||||
const AddrMode &AM, Type *Ty,
|
||||
unsigned AS) const {
|
||||
unsigned AS, Instruction *I) const {
|
||||
// PPC does not allow r+i addressing modes for vectors!
|
||||
if (Ty->isVectorTy() && AM.BaseOffs != 0)
|
||||
return false;
|
||||
|
|
|
@ -727,7 +727,8 @@ namespace llvm {
|
|||
/// isLegalAddressingMode - Return true if the addressing mode represented
|
||||
/// by AM is legal for this target, for a load/store of the specified type.
|
||||
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
|
||||
Type *Ty, unsigned AS) const override;
|
||||
Type *Ty, unsigned AS,
|
||||
Instruction *I = nullptr) const override;
|
||||
|
||||
/// isLegalICmpImmediate - Return true if the specified immediate is legal
|
||||
/// icmp immediate, that is the target has icmp instructions which can
|
||||
|
|
|
@ -586,9 +586,107 @@ bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
|
|||
return true;
|
||||
}
|
||||
|
||||
// Information about the addressing mode for a memory access.
|
||||
struct AddressingMode {
|
||||
// True if a long displacement is supported.
|
||||
bool LongDisplacement;
|
||||
|
||||
// True if use of index register is supported.
|
||||
bool IndexReg;
|
||||
|
||||
AddressingMode(bool LongDispl, bool IdxReg) :
|
||||
LongDisplacement(LongDispl), IndexReg(IdxReg) {}
|
||||
};
|
||||
|
||||
// Return the desired addressing mode for a Load which has only one use (in
|
||||
// the same block) which is a Store.
|
||||
static AddressingMode getLoadStoreAddrMode(bool HasVector,
|
||||
Type *Ty) {
|
||||
// With vector support a Load->Store combination may be combined to either
|
||||
// an MVC or vector operations and it seems to work best to allow the
|
||||
// vector addressing mode.
|
||||
if (HasVector)
|
||||
return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
|
||||
|
||||
// Otherwise only the MVC case is special.
|
||||
bool MVC = Ty->isIntegerTy(8);
|
||||
return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
|
||||
}
|
||||
|
||||
// Return the addressing mode which seems most desirable given an LLVM
|
||||
// Instruction pointer.
|
||||
static AddressingMode
|
||||
supportedAddressingMode(Instruction *I, bool HasVector) {
|
||||
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
|
||||
switch (II->getIntrinsicID()) {
|
||||
default: break;
|
||||
case Intrinsic::memset:
|
||||
case Intrinsic::memmove:
|
||||
case Intrinsic::memcpy:
|
||||
return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
|
||||
}
|
||||
}
|
||||
|
||||
if (isa<LoadInst>(I) && I->hasOneUse()) {
|
||||
auto *SingleUser = dyn_cast<Instruction>(*I->user_begin());
|
||||
if (SingleUser->getParent() == I->getParent()) {
|
||||
if (isa<ICmpInst>(SingleUser)) {
|
||||
if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
|
||||
if (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue()))
|
||||
// Comparison of memory with 16 bit signed / unsigned immediate
|
||||
return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
|
||||
} else if (isa<StoreInst>(SingleUser))
|
||||
// Load->Store
|
||||
return getLoadStoreAddrMode(HasVector, I->getType());
|
||||
}
|
||||
} else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
|
||||
if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
|
||||
if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
|
||||
// Load->Store
|
||||
return getLoadStoreAddrMode(HasVector, LoadI->getType());
|
||||
}
|
||||
|
||||
if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
|
||||
|
||||
// * Use LDE instead of LE/LEY for z13 to avoid partial register
|
||||
// dependencies (LDE only supports small offsets).
|
||||
// * Utilize the vector registers to hold floating point
|
||||
// values (vector load / store instructions only support small
|
||||
// offsets).
|
||||
|
||||
Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
|
||||
I->getOperand(0)->getType());
|
||||
bool IsFPAccess = MemAccessTy->isFloatingPointTy();
|
||||
bool IsVectorAccess = MemAccessTy->isVectorTy();
|
||||
|
||||
// A store of an extracted vector element will be combined into a VSTE type
|
||||
// instruction.
|
||||
if (!IsVectorAccess && isa<StoreInst>(I)) {
|
||||
Value *DataOp = I->getOperand(0);
|
||||
if (isa<ExtractElementInst>(DataOp))
|
||||
IsVectorAccess = true;
|
||||
}
|
||||
|
||||
// A load which gets inserted into a vector element will be combined into a
|
||||
// VLE type instruction.
|
||||
if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
|
||||
User *LoadUser = *I->user_begin();
|
||||
if (isa<InsertElementInst>(LoadUser))
|
||||
IsVectorAccess = true;
|
||||
}
|
||||
|
||||
if (IsFPAccess || IsVectorAccess)
|
||||
return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
|
||||
}
|
||||
|
||||
return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
|
||||
}
|
||||
|
||||
// TODO: This method should also check for the displacement when *I is
|
||||
// passed. It may also be possible to merge with isFoldableMemAccessOffset()
|
||||
// now that both methods get the *I.
|
||||
bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL,
|
||||
const AddrMode &AM, Type *Ty,
|
||||
unsigned AS) const {
|
||||
const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
|
||||
// Punt on globals for now, although they can be used in limited
|
||||
// RELATIVE LONG cases.
|
||||
if (AM.BaseGV)
|
||||
|
@ -598,46 +696,20 @@ bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL,
|
|||
if (!isInt<20>(AM.BaseOffs))
|
||||
return false;
|
||||
|
||||
// Indexing is OK but no scale factor can be applied.
|
||||
return AM.Scale == 0 || AM.Scale == 1;
|
||||
if (I != nullptr &&
|
||||
!supportedAddressingMode(I, Subtarget.hasVector()).IndexReg)
|
||||
// No indexing allowed.
|
||||
return AM.Scale == 0;
|
||||
else
|
||||
// Indexing is OK but no scale factor can be applied.
|
||||
return AM.Scale == 0 || AM.Scale == 1;
|
||||
}
|
||||
|
||||
// TODO: Should we check for isInt<20> also?
|
||||
bool SystemZTargetLowering::isFoldableMemAccessOffset(Instruction *I,
|
||||
int64_t Offset) const {
|
||||
// This only applies to z13.
|
||||
if (!Subtarget.hasVector())
|
||||
return true;
|
||||
|
||||
// * Use LDE instead of LE/LEY to avoid partial register
|
||||
// dependencies (LDE only supports small offsets).
|
||||
// * Utilize the vector registers to hold floating point
|
||||
// values (vector load / store instructions only support small
|
||||
// offsets).
|
||||
|
||||
assert (isa<LoadInst>(I) || isa<StoreInst>(I));
|
||||
Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
|
||||
I->getOperand(0)->getType());
|
||||
bool IsFPAccess = MemAccessTy->isFloatingPointTy();
|
||||
bool IsVectorAccess = MemAccessTy->isVectorTy();
|
||||
|
||||
// A store of an extracted vector element will be combined into a VSTE type
|
||||
// instruction.
|
||||
if (!IsVectorAccess && isa<StoreInst>(I)) {
|
||||
Value *DataOp = I->getOperand(0);
|
||||
if (isa<ExtractElementInst>(DataOp))
|
||||
IsVectorAccess = true;
|
||||
}
|
||||
|
||||
// A load which gets inserted into a vector element will be combined into a
|
||||
// VLE type instruction.
|
||||
if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
|
||||
User *LoadUser = *I->user_begin();
|
||||
if (isa<InsertElementInst>(LoadUser))
|
||||
IsVectorAccess = true;
|
||||
}
|
||||
|
||||
if (!isUInt<12>(Offset) && (IsFPAccess || IsVectorAccess))
|
||||
return false;
|
||||
if (!supportedAddressingMode(I, Subtarget.hasVector()).LongDisplacement)
|
||||
return (isUInt<12>(Offset));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -384,7 +384,8 @@ public:
|
|||
bool isLegalICmpImmediate(int64_t Imm) const override;
|
||||
bool isLegalAddImmediate(int64_t Imm) const override;
|
||||
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
|
||||
unsigned AS) const override;
|
||||
unsigned AS,
|
||||
Instruction *I = nullptr) const override;
|
||||
bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) const override;
|
||||
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS,
|
||||
unsigned Align,
|
||||
|
|
|
@ -292,6 +292,19 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
|||
UP.Force = true;
|
||||
}
|
||||
|
||||
|
||||
bool SystemZTTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1,
|
||||
TargetTransformInfo::LSRCost &C2) {
|
||||
// SystemZ specific: check instruction count (first), and don't care about
|
||||
// ImmCost, since offsets are checked explicitly.
|
||||
return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost,
|
||||
C1.NumIVMuls, C1.NumBaseAdds,
|
||||
C1.ScaleCost, C1.SetupCost) <
|
||||
std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost,
|
||||
C2.NumIVMuls, C2.NumBaseAdds,
|
||||
C2.ScaleCost, C2.SetupCost);
|
||||
}
|
||||
|
||||
unsigned SystemZTTIImpl::getNumberOfRegisters(bool Vector) {
|
||||
if (!Vector)
|
||||
// Discount the stack pointer. Also leave out %r0, since it can't
|
||||
|
|
|
@ -48,6 +48,8 @@ public:
|
|||
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
TTI::UnrollingPreferences &UP);
|
||||
|
||||
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
|
||||
TargetTransformInfo::LSRCost &C2);
|
||||
/// @}
|
||||
|
||||
/// \name Vector TTI Implementations
|
||||
|
@ -61,6 +63,7 @@ public:
|
|||
unsigned getMinPrefetchStride() { return 2048; }
|
||||
|
||||
bool prefersVectorizedAddressing() { return false; }
|
||||
bool LSRWithInstrQueries() { return true; }
|
||||
bool supportsEfficientVectorElementLoadStore() { return true; }
|
||||
bool enableInterleavedAccessVectorization() { return true; }
|
||||
|
||||
|
|
|
@ -233,7 +233,8 @@ bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz() const {
|
|||
bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
|
||||
const AddrMode &AM,
|
||||
Type *Ty,
|
||||
unsigned AS) const {
|
||||
unsigned AS,
|
||||
Instruction *I) const {
|
||||
// WebAssembly offsets are added as unsigned without wrapping. The
|
||||
// isLegalAddressingMode gives us no way to determine if wrapping could be
|
||||
// happening, so we approximate this by accepting only non-negative offsets.
|
||||
|
|
|
@ -55,7 +55,8 @@ class WebAssemblyTargetLowering final : public TargetLowering {
|
|||
bool isCheapToSpeculateCttz() const override;
|
||||
bool isCheapToSpeculateCtlz() const override;
|
||||
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
|
||||
unsigned AS) const override;
|
||||
unsigned AS,
|
||||
Instruction *I = nullptr) const override;
|
||||
bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace, unsigned Align,
|
||||
bool *Fast) const override;
|
||||
bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
|
||||
|
|
|
@ -24757,7 +24757,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
/// target, for a load/store of the specified type.
|
||||
bool X86TargetLowering::isLegalAddressingMode(const DataLayout &DL,
|
||||
const AddrMode &AM, Type *Ty,
|
||||
unsigned AS) const {
|
||||
unsigned AS,
|
||||
Instruction *I) const {
|
||||
// X86 supports extremely general addressing modes.
|
||||
CodeModel::Model M = getTargetMachine().getCodeModel();
|
||||
|
||||
|
|
|
@ -903,7 +903,8 @@ namespace llvm {
|
|||
/// Return true if the addressing mode represented
|
||||
/// by AM is legal for this target, for a load/store of the specified type.
|
||||
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
|
||||
Type *Ty, unsigned AS) const override;
|
||||
Type *Ty, unsigned AS,
|
||||
Instruction *I = nullptr) const override;
|
||||
|
||||
/// Return true if the specified immediate is legal
|
||||
/// icmp immediate, that is the target has icmp instructions which can
|
||||
|
|
|
@ -1889,7 +1889,8 @@ static inline bool isImmUs4(int64_t val)
|
|||
/// by AM is legal for this target, for a load/store of the specified type.
|
||||
bool XCoreTargetLowering::isLegalAddressingMode(const DataLayout &DL,
|
||||
const AddrMode &AM, Type *Ty,
|
||||
unsigned AS) const {
|
||||
unsigned AS,
|
||||
Instruction *I) const {
|
||||
if (Ty->getTypeID() == Type::VoidTyID)
|
||||
return AM.Scale == 0 && isImmUs(AM.BaseOffs) && isImmUs4(AM.BaseOffs);
|
||||
|
||||
|
|
|
@ -123,7 +123,8 @@ namespace llvm {
|
|||
MachineBasicBlock *MBB) const override;
|
||||
|
||||
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
|
||||
Type *Ty, unsigned AS) const override;
|
||||
Type *Ty, unsigned AS,
|
||||
Instruction *I = nullptr) const override;
|
||||
|
||||
/// If a physical register, this returns the register that receives the
|
||||
/// exception address on entry to an EH pad.
|
||||
|
|
|
@ -783,10 +783,17 @@ static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
|
|||
// of intrinsics.
|
||||
switch (II->getIntrinsicID()) {
|
||||
default: break;
|
||||
case Intrinsic::memset:
|
||||
case Intrinsic::prefetch:
|
||||
if (II->getArgOperand(0) == OperandVal)
|
||||
isAddress = true;
|
||||
break;
|
||||
case Intrinsic::memmove:
|
||||
case Intrinsic::memcpy:
|
||||
if (II->getArgOperand(0) == OperandVal ||
|
||||
II->getArgOperand(1) == OperandVal)
|
||||
isAddress = true;
|
||||
break;
|
||||
}
|
||||
} else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
|
||||
if (RMW->getPointerOperand() == OperandVal)
|
||||
|
@ -1280,7 +1287,7 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
|
|||
|
||||
// Check with target if this offset with this instruction is
|
||||
// specifically not supported.
|
||||
if ((isa<LoadInst>(Fixup.UserInst) || isa<StoreInst>(Fixup.UserInst)) &&
|
||||
if (LU.Kind == LSRUse::Address && Offset != 0 &&
|
||||
!TTI.isFoldableMemAccessOffset(Fixup.UserInst, Offset))
|
||||
C.NumBaseAdds++;
|
||||
}
|
||||
|
@ -1535,11 +1542,12 @@ LLVM_DUMP_METHOD void LSRUse::dump() const {
|
|||
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
|
||||
LSRUse::KindType Kind, MemAccessTy AccessTy,
|
||||
GlobalValue *BaseGV, int64_t BaseOffset,
|
||||
bool HasBaseReg, int64_t Scale) {
|
||||
bool HasBaseReg, int64_t Scale,
|
||||
Instruction *Fixup = nullptr) {
|
||||
switch (Kind) {
|
||||
case LSRUse::Address:
|
||||
return TTI.isLegalAddressingMode(AccessTy.MemTy, BaseGV, BaseOffset,
|
||||
HasBaseReg, Scale, AccessTy.AddrSpace);
|
||||
HasBaseReg, Scale, AccessTy.AddrSpace, Fixup);
|
||||
|
||||
case LSRUse::ICmpZero:
|
||||
// There's not even a target hook for querying whether it would be legal to
|
||||
|
@ -1645,6 +1653,16 @@ static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
|
|||
|
||||
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
|
||||
const LSRUse &LU, const Formula &F) {
|
||||
// Target may want to look at the user instructions.
|
||||
if (LU.Kind == LSRUse::Address && TTI.LSRWithInstrQueries()) {
|
||||
for (const LSRFixup &Fixup : LU.Fixups)
|
||||
if (!isAMCompletelyFolded(TTI, LSRUse::Address, LU.AccessTy, F.BaseGV,
|
||||
F.BaseOffset, F.HasBaseReg, F.Scale,
|
||||
Fixup.UserInst))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
return isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
|
||||
LU.AccessTy, F.BaseGV, F.BaseOffset, F.HasBaseReg,
|
||||
F.Scale);
|
||||
|
|
|
@ -40,7 +40,7 @@ for.body.3.lr.ph.i: ; preds = %for.body.3.lr.ph.i.
|
|||
for.body.3.i: ; preds = %for.body.3.i, %for.body.3.lr.ph.i
|
||||
; CHECK-LABEL: .LBB0_5:
|
||||
; CHECK-NOT: stfh %r{{.*}}, 0(%r{{.*}})
|
||||
; CHECK: lg %r{{.*}}, -4(%r{{.*}})
|
||||
; CHECK: lg %r{{.*}}, 8(%r{{.*}})
|
||||
; Overlapping load should go before the store
|
||||
%indvars.iv.i = phi i64 [ 0, %for.body.3.lr.ph.i ], [ %indvars.iv.next.i, %for.body.3.i ]
|
||||
%3 = shl nsw i64 %indvars.iv.i, 6
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
define void @f1(i32 *%dest, i32 %a) {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK-NOT: sllg
|
||||
; CHECK: st %r3, 0({{%r[1-5],%r[1-5]}})
|
||||
; CHECK: st %r3, 400({{%r[1-5],%r[1-5]}})
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
br label %loop
|
||||
|
@ -239,3 +239,84 @@ for.body: ; preds = %for.body.preheader, %for.body
|
|||
%exitcond = icmp eq i32 %lftr.wideiv, %S
|
||||
br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
|
||||
}
|
||||
|
||||
; Test that a memcpy loop does not get a lot of lays before each mvc (D12 and no index-reg).
|
||||
%0 = type { %1, %2* }
|
||||
%1 = type { %2*, %2* }
|
||||
%2 = type <{ %3, i32, [4 x i8] }>
|
||||
%3 = type { i16*, i16*, i16* }
|
||||
|
||||
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #0
|
||||
|
||||
define void @f8() {
|
||||
; CHECK-Z13-LABEL: f8:
|
||||
; CHECK-Z13: mvc
|
||||
; CHECK-Z13-NEXT: mvc
|
||||
; CHECK-Z13-NEXT: mvc
|
||||
; CHECK-Z13-NEXT: mvc
|
||||
|
||||
bb:
|
||||
%tmp = load %0*, %0** undef, align 8
|
||||
br i1 undef, label %bb2, label %bb1
|
||||
|
||||
bb1: ; preds = %bb
|
||||
br label %bb2
|
||||
|
||||
bb2: ; preds = %bb1, %bb
|
||||
%tmp3 = phi %0* [ %tmp, %bb ], [ undef, %bb1 ]
|
||||
%tmp4 = phi %0* [ undef, %bb ], [ undef, %bb1 ]
|
||||
br label %bb5
|
||||
|
||||
bb5: ; preds = %bb5, %bb2
|
||||
%tmp6 = phi %0* [ %tmp21, %bb5 ], [ %tmp3, %bb2 ]
|
||||
%tmp7 = phi %0* [ %tmp20, %bb5 ], [ %tmp4, %bb2 ]
|
||||
%tmp8 = getelementptr inbounds %0, %0* %tmp7, i64 -1
|
||||
%tmp9 = getelementptr inbounds %0, %0* %tmp6, i64 -1
|
||||
%tmp10 = bitcast %0* %tmp9 to i8*
|
||||
%tmp11 = bitcast %0* %tmp8 to i8*
|
||||
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp10, i8* %tmp11, i64 24, i32 8, i1 false)
|
||||
%tmp12 = getelementptr inbounds %0, %0* %tmp7, i64 -2
|
||||
%tmp13 = getelementptr inbounds %0, %0* %tmp6, i64 -2
|
||||
%tmp14 = bitcast %0* %tmp13 to i8*
|
||||
%tmp15 = bitcast %0* %tmp12 to i8*
|
||||
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp14, i8* %tmp15, i64 24, i32 8, i1 false)
|
||||
%tmp16 = getelementptr inbounds %0, %0* %tmp7, i64 -3
|
||||
%tmp17 = getelementptr inbounds %0, %0* %tmp6, i64 -3
|
||||
%tmp18 = bitcast %0* %tmp17 to i8*
|
||||
%tmp19 = bitcast %0* %tmp16 to i8*
|
||||
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp18, i8* %tmp19, i64 24, i32 8, i1 false)
|
||||
%tmp20 = getelementptr inbounds %0, %0* %tmp7, i64 -4
|
||||
%tmp21 = getelementptr inbounds %0, %0* %tmp6, i64 -4
|
||||
%tmp22 = bitcast %0* %tmp21 to i8*
|
||||
%tmp23 = bitcast %0* %tmp20 to i8*
|
||||
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp22, i8* %tmp23, i64 24, i32 8, i1 false)
|
||||
br label %bb5
|
||||
}
|
||||
|
||||
; Test that a chsi does not need an aghik inside the loop (no index reg)
|
||||
define void @f9() {
|
||||
; CHECK-Z13-LABEL: f9:
|
||||
; CHECK-Z13: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-Z13-NOT: aghik
|
||||
; CHECK-Z13: chsi
|
||||
|
||||
entry:
|
||||
br label %for.body.i63
|
||||
|
||||
for.body.i63: ; preds = %for.inc.i, %entry
|
||||
%indvars.iv155.i = phi i64 [ 0, %entry ], [ %indvars.iv.next156.i.3, %for.inc.i ]
|
||||
%arrayidx.i62 = getelementptr inbounds i32, i32* undef, i64 %indvars.iv155.i
|
||||
%tmp = load i32, i32* %arrayidx.i62, align 4
|
||||
%cmp9.i = icmp eq i32 %tmp, 0
|
||||
br i1 %cmp9.i, label %for.inc.i, label %if.then10.i
|
||||
|
||||
if.then10.i: ; preds = %for.body.i63
|
||||
unreachable
|
||||
|
||||
for.inc.i: ; preds = %for.body.i63
|
||||
%indvars.iv.next156.i = or i64 %indvars.iv155.i, 1
|
||||
%arrayidx.i62.1 = getelementptr inbounds i32, i32* undef, i64 %indvars.iv.next156.i
|
||||
%tmp1 = load i32, i32* %arrayidx.i62.1, align 4
|
||||
%indvars.iv.next156.i.3 = add nsw i64 %indvars.iv155.i, 4
|
||||
br label %for.body.i63
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue