[SystemZ, LoopStrengthReduce]

This patch makes LSR generate better code for SystemZ in the cases of memory
intrinsics, Load->Store pairs or comparison of immediate with memory.

In order to achieve this, the following common code changes were made:

 * New TTI hook: LSRWithInstrQueries(), which defaults to false. Controls if
 LSR should do instruction-based addressing evaluations by calling
 isLegalAddressingMode() with the Instruction pointers.
 * In LoopStrengthReduce: handle address operands of memset, memmove and memcpy
 as address uses, and call isFoldableMemAccessOffset() for any LSRUse::Address,
 not just loads or stores.

SystemZ changes:

 * isLSRCostLess() implemented with Insns first, and without ImmCost.
 * New function supportedAddressingMode() that is a helper for TTI methods
 looking at Instructions passed via pointers.

Review: Ulrich Weigand, Quentin Colombet
https://reviews.llvm.org/D35262
https://reviews.llvm.org/D35049

llvm-svn: 308729
This commit is contained in:
Jonas Paulsson 2017-07-21 11:59:37 +00:00
parent 7d2b15a7ab
commit 024e319489
35 changed files with 301 additions and 77 deletions

View File

@ -420,10 +420,12 @@ public:
/// this target, for a load/store of the specified type.
/// The type may be VoidTy, in which case only return true if the addressing
/// mode is legal for a load/store of any legal type.
/// If target returns true in LSRWithInstrQueries(), I may be valid.
/// TODO: Handle pre/postinc as well.
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
bool HasBaseReg, int64_t Scale,
unsigned AddrSpace = 0) const;
unsigned AddrSpace = 0,
Instruction *I = nullptr) const;
/// \brief Return true if LSR cost of C1 is lower than C1.
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
@ -453,6 +455,12 @@ public:
bool HasBaseReg, int64_t Scale,
unsigned AddrSpace = 0) const;
/// \brief Return true if the loop strength reduce pass should make
/// Instruction* based TTI queries to isLegalAddressingMode(). This is
/// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
/// immediate offset and no index register.
bool LSRWithInstrQueries() const;
/// \brief Return true if target supports the load / store
/// instruction with the given Offset on the form reg + Offset. It
/// may be that Offset is too big for a certain type (register
@ -882,7 +890,8 @@ public:
virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
int64_t BaseOffset, bool HasBaseReg,
int64_t Scale,
unsigned AddrSpace) = 0;
unsigned AddrSpace,
Instruction *I) = 0;
virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
TargetTransformInfo::LSRCost &C2) = 0;
virtual bool isLegalMaskedStore(Type *DataType) = 0;
@ -893,6 +902,7 @@ public:
virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
int64_t BaseOffset, bool HasBaseReg,
int64_t Scale, unsigned AddrSpace) = 0;
virtual bool LSRWithInstrQueries() = 0;
virtual bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) = 0;
virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
virtual bool isProfitableToHoist(Instruction *I) = 0;
@ -1085,9 +1095,10 @@ public:
}
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
bool HasBaseReg, int64_t Scale,
unsigned AddrSpace) override {
unsigned AddrSpace,
Instruction *I) override {
return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
Scale, AddrSpace);
Scale, AddrSpace, I);
}
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
TargetTransformInfo::LSRCost &C2) override {
@ -1114,6 +1125,9 @@ public:
return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
Scale, AddrSpace);
}
bool LSRWithInstrQueries() override {
return Impl.LSRWithInstrQueries();
}
bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) override {
return Impl.isFoldableMemAccessOffset(I, Offset);
}

View File

@ -230,7 +230,7 @@ public:
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
bool HasBaseReg, int64_t Scale,
unsigned AddrSpace) {
unsigned AddrSpace, Instruction *I = nullptr) {
// Guess that only reg and reg+reg addressing is allowed. This heuristic is
// taken from the implementation of LSR.
return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
@ -262,6 +262,8 @@ public:
return -1;
}
bool LSRWithInstrQueries() { return false; }
bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) { return true; }
bool isTruncateFree(Type *Ty1, Type *Ty2) { return false; }

View File

@ -110,13 +110,13 @@ public:
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
bool HasBaseReg, int64_t Scale,
unsigned AddrSpace) {
unsigned AddrSpace, Instruction *I = nullptr) {
TargetLoweringBase::AddrMode AM;
AM.BaseGV = BaseGV;
AM.BaseOffs = BaseOffset;
AM.HasBaseReg = HasBaseReg;
AM.Scale = Scale;
return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace);
return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
}
bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) {

View File

@ -1887,7 +1887,8 @@ public:
///
/// TODO: Remove default argument
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
Type *Ty, unsigned AddrSpace) const;
Type *Ty, unsigned AddrSpace,
Instruction *I = nullptr) const;
/// \brief Return the cost of the scaling factor used in the addressing mode
/// represented by AM for this target, for a load/store of the specified type.

View File

@ -144,9 +144,10 @@ bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
int64_t BaseOffset,
bool HasBaseReg,
int64_t Scale,
unsigned AddrSpace) const {
unsigned AddrSpace,
Instruction *I) const {
return TTIImpl->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
Scale, AddrSpace);
Scale, AddrSpace, I);
}
bool TargetTransformInfo::isLSRCostLess(LSRCost &C1, LSRCost &C2) const {
@ -184,6 +185,10 @@ int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
return Cost;
}
bool TargetTransformInfo::LSRWithInstrQueries() const {
return TTIImpl->LSRWithInstrQueries();
}
bool TargetTransformInfo::isFoldableMemAccessOffset(Instruction *I,
int64_t Offset) const {
return TTIImpl->isFoldableMemAccessOffset(I, Offset);

View File

@ -1481,7 +1481,7 @@ Value *TargetLoweringBase::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
/// by AM is legal for this target, for a load/store of the specified type.
bool TargetLoweringBase::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
unsigned AS) const {
unsigned AS, Instruction *I) const {
// The default implementation of this implements a conservative RISCy, r+r and
// r+i addr mode.

View File

@ -7818,7 +7818,7 @@ bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Immed) const {
/// by AM is legal for this target, for a load/store of the specified type.
bool AArch64TargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
unsigned AS) const {
unsigned AS, Instruction *I) const {
// AArch64 has five basic addressing modes:
// reg
// reg + 9-bit signed offset

View File

@ -338,7 +338,8 @@ public:
/// Return true if the addressing mode represented by AM is legal for this
/// target, for a load/store of the specified type.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
unsigned AS) const override;
unsigned AS,
Instruction *I = nullptr) const override;
/// \brief Return the cost of the scaling factor used in the addressing
/// mode represented by AM for this target, for a load/store

View File

@ -624,7 +624,7 @@ bool SITargetLowering::isLegalMUBUFAddressingMode(const AddrMode &AM) const {
bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
unsigned AS) const {
unsigned AS, Instruction *I) const {
// No global is ever allowed as a base.
if (AM.BaseGV)
return false;

View File

@ -151,7 +151,8 @@ public:
Type *&/*AccessTy*/) const override;
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
unsigned AS) const override;
unsigned AS,
Instruction *I = nullptr) const override;
bool canMergeStoresTo(unsigned AS, EVT MemVT,
const SelectionDAG &DAG) const override;

View File

@ -12380,7 +12380,7 @@ bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM,
/// by AM is legal for this target, for a load/store of the specified type.
bool ARMTargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
unsigned AS) const {
unsigned AS, Instruction *I) const {
EVT VT = getValueType(DL, Ty, true);
if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
return false;

View File

@ -317,7 +317,8 @@ class InstrItineraryData;
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
Type *Ty, unsigned AS) const override;
Type *Ty, unsigned AS,
Instruction *I = nullptr) const override;
/// getScalingFactorCost - Return the cost of the scaling used in
/// addressing mode represented by AM.

View File

@ -724,7 +724,7 @@ void AVRTargetLowering::ReplaceNodeResults(SDNode *N,
/// by AM is legal for this target, for a load/store of the specified type.
bool AVRTargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
unsigned AS) const {
unsigned AS, Instruction *I) const {
int64_t Offs = AM.BaseOffs;
// Allow absolute addresses.

View File

@ -83,7 +83,8 @@ public:
SelectionDAG &DAG) const override;
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
unsigned AS) const override;
unsigned AS,
Instruction *I = nullptr) const override;
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
ISD::MemIndexedMode &AM,

View File

@ -2993,7 +2993,7 @@ bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
/// AM is legal for this target, for a load/store of the specified type.
bool HexagonTargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
unsigned AS) const {
unsigned AS, Instruction *I) const {
if (Ty->isSized()) {
// When LSR detects uses of the same base address to access different
// types (e.g. unions), it will assume a conservative type for these

View File

@ -231,7 +231,8 @@ namespace HexagonISD {
/// mode is legal for a load/store of any legal type.
/// TODO: Handle pre/postinc as well.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
Type *Ty, unsigned AS) const override;
Type *Ty, unsigned AS,
Instruction *I = nullptr) const override;
/// Return true if folding a constant offset with the given GlobalAddress
/// is legal. It is frequently not legal in PIC relocation models.
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;

View File

@ -3991,7 +3991,7 @@ void MipsTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
bool MipsTargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
unsigned AS) const {
unsigned AS, Instruction *I) const {
// No global is ever allowed as a base.
if (AM.BaseGV)
return false;

View File

@ -625,7 +625,8 @@ namespace llvm {
}
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
Type *Ty, unsigned AS) const override;
Type *Ty, unsigned AS,
Instruction *I = nullptr) const override;
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;

View File

@ -3805,7 +3805,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
/// (CodeGenPrepare.cpp)
bool NVPTXTargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
unsigned AS) const {
unsigned AS, Instruction *I) const {
// AddrMode - This represents an addressing mode of:
// BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
//

View File

@ -456,7 +456,8 @@ public:
/// reduction (LoopStrengthReduce.cpp) and memory optimization for
/// address mode (CodeGenPrepare.cpp)
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
unsigned AS) const override;
unsigned AS,
Instruction *I = nullptr) const override;
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override {
// Truncating 64-bit to 32-bit is free in SASS.

View File

@ -12810,7 +12810,7 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
// by AM is legal for this target, for a load/store of the specified type.
bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
unsigned AS) const {
unsigned AS, Instruction *I) const {
// PPC does not allow r+i addressing modes for vectors!
if (Ty->isVectorTy() && AM.BaseOffs != 0)
return false;

View File

@ -727,7 +727,8 @@ namespace llvm {
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
Type *Ty, unsigned AS) const override;
Type *Ty, unsigned AS,
Instruction *I = nullptr) const override;
/// isLegalICmpImmediate - Return true if the specified immediate is legal
/// icmp immediate, that is the target has icmp instructions which can

View File

@ -586,9 +586,107 @@ bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
return true;
}
// Information about the addressing mode for a memory access.
struct AddressingMode {
// True if a long displacement is supported.
bool LongDisplacement;
// True if use of index register is supported.
bool IndexReg;
AddressingMode(bool LongDispl, bool IdxReg) :
LongDisplacement(LongDispl), IndexReg(IdxReg) {}
};
// Return the desired addressing mode for a Load which has only one use (in
// the same block) which is a Store.
static AddressingMode getLoadStoreAddrMode(bool HasVector,
Type *Ty) {
// With vector support a Load->Store combination may be combined to either
// an MVC or vector operations and it seems to work best to allow the
// vector addressing mode.
if (HasVector)
return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
// Otherwise only the MVC case is special.
bool MVC = Ty->isIntegerTy(8);
return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
}
// Return the addressing mode which seems most desirable given an LLVM
// Instruction pointer.
static AddressingMode
supportedAddressingMode(Instruction *I, bool HasVector) {
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
default: break;
case Intrinsic::memset:
case Intrinsic::memmove:
case Intrinsic::memcpy:
return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
}
}
if (isa<LoadInst>(I) && I->hasOneUse()) {
auto *SingleUser = dyn_cast<Instruction>(*I->user_begin());
if (SingleUser->getParent() == I->getParent()) {
if (isa<ICmpInst>(SingleUser)) {
if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
if (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue()))
// Comparison of memory with 16 bit signed / unsigned immediate
return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
} else if (isa<StoreInst>(SingleUser))
// Load->Store
return getLoadStoreAddrMode(HasVector, I->getType());
}
} else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
// Load->Store
return getLoadStoreAddrMode(HasVector, LoadI->getType());
}
if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
// * Use LDE instead of LE/LEY for z13 to avoid partial register
// dependencies (LDE only supports small offsets).
// * Utilize the vector registers to hold floating point
// values (vector load / store instructions only support small
// offsets).
Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
I->getOperand(0)->getType());
bool IsFPAccess = MemAccessTy->isFloatingPointTy();
bool IsVectorAccess = MemAccessTy->isVectorTy();
// A store of an extracted vector element will be combined into a VSTE type
// instruction.
if (!IsVectorAccess && isa<StoreInst>(I)) {
Value *DataOp = I->getOperand(0);
if (isa<ExtractElementInst>(DataOp))
IsVectorAccess = true;
}
// A load which gets inserted into a vector element will be combined into a
// VLE type instruction.
if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
User *LoadUser = *I->user_begin();
if (isa<InsertElementInst>(LoadUser))
IsVectorAccess = true;
}
if (IsFPAccess || IsVectorAccess)
return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
}
return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
}
// TODO: This method should also check for the displacement when *I is
// passed. It may also be possible to merge with isFoldableMemAccessOffset()
// now that both methods get the *I.
bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
unsigned AS) const {
const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
// Punt on globals for now, although they can be used in limited
// RELATIVE LONG cases.
if (AM.BaseGV)
@ -598,46 +696,20 @@ bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL,
if (!isInt<20>(AM.BaseOffs))
return false;
// Indexing is OK but no scale factor can be applied.
return AM.Scale == 0 || AM.Scale == 1;
if (I != nullptr &&
!supportedAddressingMode(I, Subtarget.hasVector()).IndexReg)
// No indexing allowed.
return AM.Scale == 0;
else
// Indexing is OK but no scale factor can be applied.
return AM.Scale == 0 || AM.Scale == 1;
}
// TODO: Should we check for isInt<20> also?
bool SystemZTargetLowering::isFoldableMemAccessOffset(Instruction *I,
int64_t Offset) const {
// This only applies to z13.
if (!Subtarget.hasVector())
return true;
// * Use LDE instead of LE/LEY to avoid partial register
// dependencies (LDE only supports small offsets).
// * Utilize the vector registers to hold floating point
// values (vector load / store instructions only support small
// offsets).
assert (isa<LoadInst>(I) || isa<StoreInst>(I));
Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
I->getOperand(0)->getType());
bool IsFPAccess = MemAccessTy->isFloatingPointTy();
bool IsVectorAccess = MemAccessTy->isVectorTy();
// A store of an extracted vector element will be combined into a VSTE type
// instruction.
if (!IsVectorAccess && isa<StoreInst>(I)) {
Value *DataOp = I->getOperand(0);
if (isa<ExtractElementInst>(DataOp))
IsVectorAccess = true;
}
// A load which gets inserted into a vector element will be combined into a
// VLE type instruction.
if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
User *LoadUser = *I->user_begin();
if (isa<InsertElementInst>(LoadUser))
IsVectorAccess = true;
}
if (!isUInt<12>(Offset) && (IsFPAccess || IsVectorAccess))
return false;
if (!supportedAddressingMode(I, Subtarget.hasVector()).LongDisplacement)
return (isUInt<12>(Offset));
return true;
}

View File

@ -384,7 +384,8 @@ public:
bool isLegalICmpImmediate(int64_t Imm) const override;
bool isLegalAddImmediate(int64_t Imm) const override;
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
unsigned AS) const override;
unsigned AS,
Instruction *I = nullptr) const override;
bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) const override;
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS,
unsigned Align,

View File

@ -292,6 +292,19 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
UP.Force = true;
}
bool SystemZTTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1,
TargetTransformInfo::LSRCost &C2) {
// SystemZ specific: check instruction count (first), and don't care about
// ImmCost, since offsets are checked explicitly.
return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost,
C1.NumIVMuls, C1.NumBaseAdds,
C1.ScaleCost, C1.SetupCost) <
std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost,
C2.NumIVMuls, C2.NumBaseAdds,
C2.ScaleCost, C2.SetupCost);
}
unsigned SystemZTTIImpl::getNumberOfRegisters(bool Vector) {
if (!Vector)
// Discount the stack pointer. Also leave out %r0, since it can't

View File

@ -48,6 +48,8 @@ public:
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
TargetTransformInfo::LSRCost &C2);
/// @}
/// \name Vector TTI Implementations
@ -61,6 +63,7 @@ public:
unsigned getMinPrefetchStride() { return 2048; }
bool prefersVectorizedAddressing() { return false; }
bool LSRWithInstrQueries() { return true; }
bool supportsEfficientVectorElementLoadStore() { return true; }
bool enableInterleavedAccessVectorization() { return true; }

View File

@ -233,7 +233,8 @@ bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz() const {
bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM,
Type *Ty,
unsigned AS) const {
unsigned AS,
Instruction *I) const {
// WebAssembly offsets are added as unsigned without wrapping. The
// isLegalAddressingMode gives us no way to determine if wrapping could be
// happening, so we approximate this by accepting only non-negative offsets.

View File

@ -55,7 +55,8 @@ class WebAssemblyTargetLowering final : public TargetLowering {
bool isCheapToSpeculateCttz() const override;
bool isCheapToSpeculateCtlz() const override;
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
unsigned AS) const override;
unsigned AS,
Instruction *I = nullptr) const override;
bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace, unsigned Align,
bool *Fast) const override;
bool isIntDivCheap(EVT VT, AttributeList Attr) const override;

View File

@ -24757,7 +24757,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
/// target, for a load/store of the specified type.
bool X86TargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
unsigned AS) const {
unsigned AS,
Instruction *I) const {
// X86 supports extremely general addressing modes.
CodeModel::Model M = getTargetMachine().getCodeModel();

View File

@ -903,7 +903,8 @@ namespace llvm {
/// Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
Type *Ty, unsigned AS) const override;
Type *Ty, unsigned AS,
Instruction *I = nullptr) const override;
/// Return true if the specified immediate is legal
/// icmp immediate, that is the target has icmp instructions which can

View File

@ -1889,7 +1889,8 @@ static inline bool isImmUs4(int64_t val)
/// by AM is legal for this target, for a load/store of the specified type.
bool XCoreTargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
unsigned AS) const {
unsigned AS,
Instruction *I) const {
if (Ty->getTypeID() == Type::VoidTyID)
return AM.Scale == 0 && isImmUs(AM.BaseOffs) && isImmUs4(AM.BaseOffs);

View File

@ -123,7 +123,8 @@ namespace llvm {
MachineBasicBlock *MBB) const override;
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
Type *Ty, unsigned AS) const override;
Type *Ty, unsigned AS,
Instruction *I = nullptr) const override;
/// If a physical register, this returns the register that receives the
/// exception address on entry to an EH pad.

View File

@ -783,10 +783,17 @@ static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
// of intrinsics.
switch (II->getIntrinsicID()) {
default: break;
case Intrinsic::memset:
case Intrinsic::prefetch:
if (II->getArgOperand(0) == OperandVal)
isAddress = true;
break;
case Intrinsic::memmove:
case Intrinsic::memcpy:
if (II->getArgOperand(0) == OperandVal ||
II->getArgOperand(1) == OperandVal)
isAddress = true;
break;
}
} else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
if (RMW->getPointerOperand() == OperandVal)
@ -1280,7 +1287,7 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
// Check with target if this offset with this instruction is
// specifically not supported.
if ((isa<LoadInst>(Fixup.UserInst) || isa<StoreInst>(Fixup.UserInst)) &&
if (LU.Kind == LSRUse::Address && Offset != 0 &&
!TTI.isFoldableMemAccessOffset(Fixup.UserInst, Offset))
C.NumBaseAdds++;
}
@ -1535,11 +1542,12 @@ LLVM_DUMP_METHOD void LSRUse::dump() const {
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
LSRUse::KindType Kind, MemAccessTy AccessTy,
GlobalValue *BaseGV, int64_t BaseOffset,
bool HasBaseReg, int64_t Scale) {
bool HasBaseReg, int64_t Scale,
Instruction *Fixup = nullptr) {
switch (Kind) {
case LSRUse::Address:
return TTI.isLegalAddressingMode(AccessTy.MemTy, BaseGV, BaseOffset,
HasBaseReg, Scale, AccessTy.AddrSpace);
HasBaseReg, Scale, AccessTy.AddrSpace, Fixup);
case LSRUse::ICmpZero:
// There's not even a target hook for querying whether it would be legal to
@ -1645,6 +1653,16 @@ static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
const LSRUse &LU, const Formula &F) {
// Target may want to look at the user instructions.
if (LU.Kind == LSRUse::Address && TTI.LSRWithInstrQueries()) {
for (const LSRFixup &Fixup : LU.Fixups)
if (!isAMCompletelyFolded(TTI, LSRUse::Address, LU.AccessTy, F.BaseGV,
F.BaseOffset, F.HasBaseReg, F.Scale,
Fixup.UserInst))
return false;
return true;
}
return isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
LU.AccessTy, F.BaseGV, F.BaseOffset, F.HasBaseReg,
F.Scale);

View File

@ -40,7 +40,7 @@ for.body.3.lr.ph.i: ; preds = %for.body.3.lr.ph.i.
for.body.3.i: ; preds = %for.body.3.i, %for.body.3.lr.ph.i
; CHECK-LABEL: .LBB0_5:
; CHECK-NOT: stfh %r{{.*}}, 0(%r{{.*}})
; CHECK: lg %r{{.*}}, -4(%r{{.*}})
; CHECK: lg %r{{.*}}, 8(%r{{.*}})
; Overlapping load should go before the store
%indvars.iv.i = phi i64 [ 0, %for.body.3.lr.ph.i ], [ %indvars.iv.next.i, %for.body.3.i ]
%3 = shl nsw i64 %indvars.iv.i, 6

View File

@ -9,7 +9,7 @@
define void @f1(i32 *%dest, i32 %a) {
; CHECK-LABEL: f1:
; CHECK-NOT: sllg
; CHECK: st %r3, 0({{%r[1-5],%r[1-5]}})
; CHECK: st %r3, 400({{%r[1-5],%r[1-5]}})
; CHECK: br %r14
entry:
br label %loop
@ -239,3 +239,84 @@ for.body: ; preds = %for.body.preheader, %for.body
%exitcond = icmp eq i32 %lftr.wideiv, %S
br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
}
; Test that a memcpy loop does not get a lot of lays before each mvc (D12 and no index-reg).
%0 = type { %1, %2* }
%1 = type { %2*, %2* }
%2 = type <{ %3, i32, [4 x i8] }>
%3 = type { i16*, i16*, i16* }
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #0
define void @f8() {
; CHECK-Z13-LABEL: f8:
; CHECK-Z13: mvc
; CHECK-Z13-NEXT: mvc
; CHECK-Z13-NEXT: mvc
; CHECK-Z13-NEXT: mvc
bb:
%tmp = load %0*, %0** undef, align 8
br i1 undef, label %bb2, label %bb1
bb1: ; preds = %bb
br label %bb2
bb2: ; preds = %bb1, %bb
%tmp3 = phi %0* [ %tmp, %bb ], [ undef, %bb1 ]
%tmp4 = phi %0* [ undef, %bb ], [ undef, %bb1 ]
br label %bb5
bb5: ; preds = %bb5, %bb2
%tmp6 = phi %0* [ %tmp21, %bb5 ], [ %tmp3, %bb2 ]
%tmp7 = phi %0* [ %tmp20, %bb5 ], [ %tmp4, %bb2 ]
%tmp8 = getelementptr inbounds %0, %0* %tmp7, i64 -1
%tmp9 = getelementptr inbounds %0, %0* %tmp6, i64 -1
%tmp10 = bitcast %0* %tmp9 to i8*
%tmp11 = bitcast %0* %tmp8 to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp10, i8* %tmp11, i64 24, i32 8, i1 false)
%tmp12 = getelementptr inbounds %0, %0* %tmp7, i64 -2
%tmp13 = getelementptr inbounds %0, %0* %tmp6, i64 -2
%tmp14 = bitcast %0* %tmp13 to i8*
%tmp15 = bitcast %0* %tmp12 to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp14, i8* %tmp15, i64 24, i32 8, i1 false)
%tmp16 = getelementptr inbounds %0, %0* %tmp7, i64 -3
%tmp17 = getelementptr inbounds %0, %0* %tmp6, i64 -3
%tmp18 = bitcast %0* %tmp17 to i8*
%tmp19 = bitcast %0* %tmp16 to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp18, i8* %tmp19, i64 24, i32 8, i1 false)
%tmp20 = getelementptr inbounds %0, %0* %tmp7, i64 -4
%tmp21 = getelementptr inbounds %0, %0* %tmp6, i64 -4
%tmp22 = bitcast %0* %tmp21 to i8*
%tmp23 = bitcast %0* %tmp20 to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp22, i8* %tmp23, i64 24, i32 8, i1 false)
br label %bb5
}
; Test that a chsi does not need an aghik inside the loop (no index reg)
define void @f9() {
; CHECK-Z13-LABEL: f9:
; CHECK-Z13: # =>This Inner Loop Header: Depth=1
; CHECK-Z13-NOT: aghik
; CHECK-Z13: chsi
entry:
br label %for.body.i63
for.body.i63: ; preds = %for.inc.i, %entry
%indvars.iv155.i = phi i64 [ 0, %entry ], [ %indvars.iv.next156.i.3, %for.inc.i ]
%arrayidx.i62 = getelementptr inbounds i32, i32* undef, i64 %indvars.iv155.i
%tmp = load i32, i32* %arrayidx.i62, align 4
%cmp9.i = icmp eq i32 %tmp, 0
br i1 %cmp9.i, label %for.inc.i, label %if.then10.i
if.then10.i: ; preds = %for.body.i63
unreachable
for.inc.i: ; preds = %for.body.i63
%indvars.iv.next156.i = or i64 %indvars.iv155.i, 1
%arrayidx.i62.1 = getelementptr inbounds i32, i32* undef, i64 %indvars.iv.next156.i
%tmp1 = load i32, i32* %arrayidx.i62.1, align 4
%indvars.iv.next156.i.3 = add nsw i64 %indvars.iv155.i, 4
br label %for.body.i63
}