forked from OSchip/llvm-project
Revert "Merge memtag instructions with adjacent stack slots."
*** Bad machine code: Tied use must be a register ***
- function: stg_alloca17
- basic block: %bb.0 entry (0x20076710580)
- instruction: early-clobber %0:gpr64common, early-clobber %1:gpr64sp = STGloop 272, %stack.0.a :: (store 272 into %ir.a, align 16)
- operand 3: %stack.0.a
http://lab.llvm.org:8011/builders/llvm-clang-x86_64-expensive-checks-win/builds/21481/steps/test-check-all/logs/stdio
This reverts commit b675a7628c
.
This commit is contained in:
parent
28b9cdd260
commit
58deb20dd2
|
@ -309,13 +309,6 @@ public:
|
||||||
RegScavenger *RS = nullptr) const {
|
RegScavenger *RS = nullptr) const {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// processFunctionBeforeFrameIndicesReplaced - This method is called
|
|
||||||
/// immediately before MO_FrameIndex operands are eliminated, but after the
|
|
||||||
/// frame is finalized. This method is optional.
|
|
||||||
virtual void
|
|
||||||
processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF,
|
|
||||||
RegScavenger *RS = nullptr) const {}
|
|
||||||
|
|
||||||
virtual unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const {
|
virtual unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const {
|
||||||
report_fatal_error("WinEH not implemented for this target");
|
report_fatal_error("WinEH not implemented for this target");
|
||||||
}
|
}
|
||||||
|
|
|
@ -259,10 +259,6 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {
|
||||||
for (auto &I : EntryDbgValues)
|
for (auto &I : EntryDbgValues)
|
||||||
I.first->insert(I.first->begin(), I.second.begin(), I.second.end());
|
I.first->insert(I.first->begin(), I.second.begin(), I.second.end());
|
||||||
|
|
||||||
// Allow the target machine to make final modifications to the function
|
|
||||||
// before the frame layout is finalized.
|
|
||||||
TFI->processFunctionBeforeFrameIndicesReplaced(MF, RS);
|
|
||||||
|
|
||||||
// Replace all MO_FrameIndex operands with physical register references
|
// Replace all MO_FrameIndex operands with physical register references
|
||||||
// and actual offsets.
|
// and actual offsets.
|
||||||
//
|
//
|
||||||
|
|
|
@ -349,38 +349,22 @@ bool AArch64ExpandPseudo::expandSetTagLoop(
|
||||||
MachineBasicBlock::iterator &NextMBBI) {
|
MachineBasicBlock::iterator &NextMBBI) {
|
||||||
MachineInstr &MI = *MBBI;
|
MachineInstr &MI = *MBBI;
|
||||||
DebugLoc DL = MI.getDebugLoc();
|
DebugLoc DL = MI.getDebugLoc();
|
||||||
Register SizeReg = MI.getOperand(0).getReg();
|
Register SizeReg = MI.getOperand(2).getReg();
|
||||||
Register AddressReg = MI.getOperand(1).getReg();
|
Register AddressReg = MI.getOperand(3).getReg();
|
||||||
|
|
||||||
MachineFunction *MF = MBB.getParent();
|
MachineFunction *MF = MBB.getParent();
|
||||||
|
|
||||||
bool ZeroData = MI.getOpcode() == AArch64::STZGloop;
|
bool ZeroData = MI.getOpcode() == AArch64::STZGloop;
|
||||||
const unsigned OpCode1 =
|
const unsigned OpCode =
|
||||||
ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex;
|
|
||||||
const unsigned OpCode2 =
|
|
||||||
ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex;
|
ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex;
|
||||||
|
|
||||||
unsigned Size = MI.getOperand(2).getImm();
|
|
||||||
assert(Size > 0 && Size % 16 == 0);
|
|
||||||
if (Size % (16 * 2) != 0) {
|
|
||||||
BuildMI(MBB, MBBI, DL, TII->get(OpCode1), AddressReg)
|
|
||||||
.addReg(AddressReg)
|
|
||||||
.addReg(AddressReg)
|
|
||||||
.addImm(1);
|
|
||||||
Size -= 16;
|
|
||||||
}
|
|
||||||
MachineBasicBlock::iterator I =
|
|
||||||
BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), SizeReg)
|
|
||||||
.addImm(Size);
|
|
||||||
expandMOVImm(MBB, I, 64);
|
|
||||||
|
|
||||||
auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
||||||
auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
||||||
|
|
||||||
MF->insert(++MBB.getIterator(), LoopBB);
|
MF->insert(++MBB.getIterator(), LoopBB);
|
||||||
MF->insert(++LoopBB->getIterator(), DoneBB);
|
MF->insert(++LoopBB->getIterator(), DoneBB);
|
||||||
|
|
||||||
BuildMI(LoopBB, DL, TII->get(OpCode2))
|
BuildMI(LoopBB, DL, TII->get(OpCode))
|
||||||
.addDef(AddressReg)
|
.addDef(AddressReg)
|
||||||
.addReg(AddressReg)
|
.addReg(AddressReg)
|
||||||
.addReg(AddressReg)
|
.addReg(AddressReg)
|
||||||
|
|
|
@ -170,11 +170,6 @@ static cl::opt<bool>
|
||||||
cl::desc("reverse the CSR restore sequence"),
|
cl::desc("reverse the CSR restore sequence"),
|
||||||
cl::init(false), cl::Hidden);
|
cl::init(false), cl::Hidden);
|
||||||
|
|
||||||
static cl::opt<bool> StackTaggingMergeSetTag(
|
|
||||||
"stack-tagging-merge-settag",
|
|
||||||
cl::desc("merge settag instruction in function epilog"), cl::init(true),
|
|
||||||
cl::Hidden);
|
|
||||||
|
|
||||||
STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
|
STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
|
||||||
|
|
||||||
/// This is the biggest offset to the stack pointer we can encode in aarch64
|
/// This is the biggest offset to the stack pointer we can encode in aarch64
|
||||||
|
@ -485,39 +480,6 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool AArch64FrameLowering::shouldCombineCSRLocalStackBumpInEpilogue(
|
|
||||||
MachineBasicBlock &MBB, unsigned StackBumpBytes) const {
|
|
||||||
if (!shouldCombineCSRLocalStackBump(*MBB.getParent(), StackBumpBytes))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (MBB.empty())
|
|
||||||
return true;
|
|
||||||
|
|
||||||
// Disable combined SP bump if the last instruction is an MTE tag store. It
|
|
||||||
// is almost always better to merge SP adjustment into those instructions.
|
|
||||||
MachineBasicBlock::iterator LastI = MBB.getFirstTerminator();
|
|
||||||
MachineBasicBlock::iterator Begin = MBB.begin();
|
|
||||||
while (LastI != Begin) {
|
|
||||||
--LastI;
|
|
||||||
if (LastI->isTransient())
|
|
||||||
continue;
|
|
||||||
if (!LastI->getFlag(MachineInstr::FrameDestroy))
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
switch (LastI->getOpcode()) {
|
|
||||||
case AArch64::STGloop:
|
|
||||||
case AArch64::STZGloop:
|
|
||||||
case AArch64::STGOffset:
|
|
||||||
case AArch64::STZGOffset:
|
|
||||||
case AArch64::ST2GOffset:
|
|
||||||
case AArch64::STZ2GOffset:
|
|
||||||
return false;
|
|
||||||
default:
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
llvm_unreachable("unreachable");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Given a load or a store instruction, generate an appropriate unwinding SEH
|
// Given a load or a store instruction, generate an appropriate unwinding SEH
|
||||||
// code on Windows.
|
// code on Windows.
|
||||||
static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI,
|
static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI,
|
||||||
|
@ -1497,7 +1459,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
|
||||||
// function.
|
// function.
|
||||||
if (MF.hasEHFunclets())
|
if (MF.hasEHFunclets())
|
||||||
AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
|
AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
|
||||||
bool CombineSPBump = shouldCombineCSRLocalStackBumpInEpilogue(MBB, NumBytes);
|
bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
|
||||||
// Assume we can't combine the last pop with the sp restore.
|
// Assume we can't combine the last pop with the sp restore.
|
||||||
|
|
||||||
if (!CombineSPBump && PrologueSaveSize != 0) {
|
if (!CombineSPBump && PrologueSaveSize != 0) {
|
||||||
|
@ -2675,399 +2637,9 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
|
||||||
.addImm(0);
|
.addImm(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace {
|
/// For Win64 AArch64 EH, the offset to the Unwind object is from the SP before
|
||||||
struct TagStoreInstr {
|
/// the update. This is easily retrieved as it is exactly the offset that is set
|
||||||
MachineInstr *MI;
|
/// in processFunctionBeforeFrameFinalized.
|
||||||
int64_t Offset, Size;
|
|
||||||
explicit TagStoreInstr(MachineInstr *MI, int64_t Offset, int64_t Size)
|
|
||||||
: MI(MI), Offset(Offset), Size(Size) {}
|
|
||||||
};
|
|
||||||
|
|
||||||
class TagStoreEdit {
|
|
||||||
MachineFunction *MF;
|
|
||||||
MachineBasicBlock *MBB;
|
|
||||||
MachineRegisterInfo *MRI;
|
|
||||||
// Tag store instructions that are being replaced.
|
|
||||||
SmallVector<TagStoreInstr, 8> TagStores;
|
|
||||||
// Combined memref arguments of the above instructions.
|
|
||||||
SmallVector<MachineMemOperand *, 8> CombinedMemRefs;
|
|
||||||
|
|
||||||
// Replace allocation tags in [FrameReg + FrameRegOffset, FrameReg +
|
|
||||||
// FrameRegOffset + Size) with the address tag of SP.
|
|
||||||
Register FrameReg;
|
|
||||||
StackOffset FrameRegOffset;
|
|
||||||
int64_t Size;
|
|
||||||
// If not None, move FrameReg to (FrameReg + FrameRegUpdate) at the end.
|
|
||||||
Optional<int64_t> FrameRegUpdate;
|
|
||||||
// MIFlags for any FrameReg updating instructions.
|
|
||||||
unsigned FrameRegUpdateFlags;
|
|
||||||
|
|
||||||
// Use zeroing instruction variants.
|
|
||||||
bool ZeroData;
|
|
||||||
DebugLoc DL;
|
|
||||||
|
|
||||||
void emitUnrolled(MachineBasicBlock::iterator InsertI);
|
|
||||||
void emitLoop(MachineBasicBlock::iterator InsertI);
|
|
||||||
|
|
||||||
public:
|
|
||||||
TagStoreEdit(MachineBasicBlock *MBB, bool ZeroData)
|
|
||||||
: MBB(MBB), ZeroData(ZeroData) {
|
|
||||||
MF = MBB->getParent();
|
|
||||||
MRI = &MF->getRegInfo();
|
|
||||||
}
|
|
||||||
// Add an instruction to be replaced. Instructions must be added in the
|
|
||||||
// ascending order of Offset, and have to be adjacent.
|
|
||||||
void addInstruction(TagStoreInstr I) {
|
|
||||||
assert((TagStores.empty() ||
|
|
||||||
TagStores.back().Offset + TagStores.back().Size == I.Offset) &&
|
|
||||||
"Non-adjacent tag store instructions.");
|
|
||||||
TagStores.push_back(I);
|
|
||||||
}
|
|
||||||
void clear() { TagStores.clear(); }
|
|
||||||
// Emit equivalent code at the given location, and erase the current set of
|
|
||||||
// instructions. May skip if the replacement is not profitable. May invalidate
|
|
||||||
// the input iterator and replace it with a valid one.
|
|
||||||
void emitCode(MachineBasicBlock::iterator &InsertI,
|
|
||||||
const AArch64FrameLowering *TFI, bool IsLast);
|
|
||||||
};
|
|
||||||
|
|
||||||
void TagStoreEdit::emitUnrolled(MachineBasicBlock::iterator InsertI) {
|
|
||||||
const AArch64InstrInfo *TII =
|
|
||||||
MF->getSubtarget<AArch64Subtarget>().getInstrInfo();
|
|
||||||
|
|
||||||
const int64_t kMinOffset = -256 * 16;
|
|
||||||
const int64_t kMaxOffset = 255 * 16;
|
|
||||||
|
|
||||||
Register BaseReg = FrameReg;
|
|
||||||
int64_t BaseRegOffsetBytes = FrameRegOffset.getBytes();
|
|
||||||
if (BaseRegOffsetBytes < kMinOffset ||
|
|
||||||
BaseRegOffsetBytes + (Size - Size % 32) > kMaxOffset) {
|
|
||||||
Register ScratchReg = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
|
|
||||||
emitFrameOffset(*MBB, InsertI, DL, ScratchReg, BaseReg,
|
|
||||||
{BaseRegOffsetBytes, MVT::i8}, TII);
|
|
||||||
BaseReg = ScratchReg;
|
|
||||||
BaseRegOffsetBytes = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
MachineInstr *LastI = nullptr;
|
|
||||||
while (Size) {
|
|
||||||
int64_t InstrSize = (Size > 16) ? 32 : 16;
|
|
||||||
unsigned Opcode =
|
|
||||||
InstrSize == 16
|
|
||||||
? (ZeroData ? AArch64::STZGOffset : AArch64::STGOffset)
|
|
||||||
: (ZeroData ? AArch64::STZ2GOffset : AArch64::ST2GOffset);
|
|
||||||
MachineInstr *I = BuildMI(*MBB, InsertI, DL, TII->get(Opcode))
|
|
||||||
.addReg(AArch64::SP)
|
|
||||||
.addReg(BaseReg)
|
|
||||||
.addImm(BaseRegOffsetBytes / 16)
|
|
||||||
.setMemRefs(CombinedMemRefs);
|
|
||||||
// A store to [BaseReg, #0] should go last for an opportunity to fold the
|
|
||||||
// final SP adjustment in the epilogue.
|
|
||||||
if (BaseRegOffsetBytes == 0)
|
|
||||||
LastI = I;
|
|
||||||
BaseRegOffsetBytes += InstrSize;
|
|
||||||
Size -= InstrSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (LastI)
|
|
||||||
MBB->splice(InsertI, MBB, LastI);
|
|
||||||
}
|
|
||||||
|
|
||||||
void TagStoreEdit::emitLoop(MachineBasicBlock::iterator InsertI) {
|
|
||||||
const AArch64InstrInfo *TII =
|
|
||||||
MF->getSubtarget<AArch64Subtarget>().getInstrInfo();
|
|
||||||
|
|
||||||
Register BaseReg = FrameRegUpdate
|
|
||||||
? FrameReg
|
|
||||||
: MRI->createVirtualRegister(&AArch64::GPR64RegClass);
|
|
||||||
Register SizeReg = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
|
|
||||||
|
|
||||||
emitFrameOffset(*MBB, InsertI, DL, BaseReg, FrameReg, FrameRegOffset, TII);
|
|
||||||
|
|
||||||
int64_t LoopSize = Size;
|
|
||||||
// If the loop size is not a multiple of 32, split off one 16-byte store at
|
|
||||||
// the end to fold BaseReg update into.
|
|
||||||
if (FrameRegUpdate && *FrameRegUpdate)
|
|
||||||
LoopSize -= LoopSize % 32;
|
|
||||||
MachineInstr *LoopI =
|
|
||||||
BuildMI(*MBB, InsertI, DL,
|
|
||||||
TII->get(ZeroData ? AArch64::STZGloop : AArch64::STGloop))
|
|
||||||
.addDef(SizeReg)
|
|
||||||
.addDef(BaseReg)
|
|
||||||
.addImm(LoopSize)
|
|
||||||
.addReg(BaseReg)
|
|
||||||
.setMemRefs(CombinedMemRefs);
|
|
||||||
if (FrameRegUpdate)
|
|
||||||
LoopI->setFlags(FrameRegUpdateFlags);
|
|
||||||
|
|
||||||
int64_t ExtraBaseRegUpdate =
|
|
||||||
FrameRegUpdate ? (*FrameRegUpdate - FrameRegOffset.getBytes() - Size) : 0;
|
|
||||||
if (LoopSize < Size) {
|
|
||||||
assert(FrameRegUpdate);
|
|
||||||
assert(Size - LoopSize == 16);
|
|
||||||
// Tag 16 more bytes at BaseReg and update BaseReg.
|
|
||||||
BuildMI(*MBB, InsertI, DL,
|
|
||||||
TII->get(ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex))
|
|
||||||
.addDef(BaseReg)
|
|
||||||
.addReg(BaseReg)
|
|
||||||
.addReg(BaseReg)
|
|
||||||
.addImm(1 + ExtraBaseRegUpdate / 16)
|
|
||||||
.setMemRefs(CombinedMemRefs)
|
|
||||||
.setMIFlags(FrameRegUpdateFlags);
|
|
||||||
} else if (ExtraBaseRegUpdate) {
|
|
||||||
// Update BaseReg.
|
|
||||||
BuildMI(
|
|
||||||
*MBB, InsertI, DL,
|
|
||||||
TII->get(ExtraBaseRegUpdate > 0 ? AArch64::ADDXri : AArch64::SUBXri))
|
|
||||||
.addDef(BaseReg)
|
|
||||||
.addReg(BaseReg)
|
|
||||||
.addImm(std::abs(ExtraBaseRegUpdate))
|
|
||||||
.addImm(0)
|
|
||||||
.setMIFlags(FrameRegUpdateFlags);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if *II is a register update that can be merged into STGloop that ends
|
|
||||||
// at (Reg + Size). RemainingOffset is the required adjustment to Reg after the
|
|
||||||
// end of the loop.
|
|
||||||
bool canMergeRegUpdate(MachineBasicBlock::iterator II, unsigned Reg,
|
|
||||||
int64_t Size, int64_t *TotalOffset) {
|
|
||||||
MachineInstr &MI = *II;
|
|
||||||
if ((MI.getOpcode() == AArch64::ADDXri ||
|
|
||||||
MI.getOpcode() == AArch64::SUBXri) &&
|
|
||||||
MI.getOperand(0).getReg() == Reg && MI.getOperand(1).getReg() == Reg) {
|
|
||||||
unsigned Shift = AArch64_AM::getShiftValue(MI.getOperand(3).getImm());
|
|
||||||
int64_t Offset = MI.getOperand(2).getImm() << Shift;
|
|
||||||
if (MI.getOpcode() == AArch64::SUBXri)
|
|
||||||
Offset = -Offset;
|
|
||||||
int64_t AbsPostOffset = std::abs(Offset - Size);
|
|
||||||
const int64_t kMaxOffset =
|
|
||||||
0xFFF; // Max encoding for unshifted ADDXri / SUBXri
|
|
||||||
if (AbsPostOffset <= kMaxOffset && AbsPostOffset % 16 == 0) {
|
|
||||||
*TotalOffset = Offset;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
void mergeMemRefs(const SmallVectorImpl<TagStoreInstr> &TSE,
|
|
||||||
SmallVectorImpl<MachineMemOperand *> &MemRefs) {
|
|
||||||
MemRefs.clear();
|
|
||||||
for (auto &TS : TSE) {
|
|
||||||
MachineInstr *MI = TS.MI;
|
|
||||||
// An instruction without memory operands may access anything. Be
|
|
||||||
// conservative and return an empty list.
|
|
||||||
if (MI->memoperands_empty()) {
|
|
||||||
MemRefs.clear();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
MemRefs.append(MI->memoperands_begin(), MI->memoperands_end());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void TagStoreEdit::emitCode(MachineBasicBlock::iterator &InsertI,
|
|
||||||
const AArch64FrameLowering *TFI, bool IsLast) {
|
|
||||||
if (TagStores.empty())
|
|
||||||
return;
|
|
||||||
TagStoreInstr &FirstTagStore = TagStores[0];
|
|
||||||
TagStoreInstr &LastTagStore = TagStores[TagStores.size() - 1];
|
|
||||||
Size = LastTagStore.Offset - FirstTagStore.Offset + LastTagStore.Size;
|
|
||||||
DL = TagStores[0].MI->getDebugLoc();
|
|
||||||
|
|
||||||
unsigned Reg;
|
|
||||||
FrameRegOffset = TFI->resolveFrameOffsetReference(
|
|
||||||
*MF, FirstTagStore.Offset, false /*isFixed*/, false /*isSVE*/, Reg,
|
|
||||||
/*PreferFP=*/false, /*ForSimm=*/true);
|
|
||||||
FrameReg = Reg;
|
|
||||||
FrameRegUpdate = None;
|
|
||||||
|
|
||||||
mergeMemRefs(TagStores, CombinedMemRefs);
|
|
||||||
|
|
||||||
LLVM_DEBUG(dbgs() << "Replacing adjacent STG instructions:\n";
|
|
||||||
for (const auto &Instr
|
|
||||||
: TagStores) { dbgs() << " " << *Instr.MI; });
|
|
||||||
|
|
||||||
// Size threshold where a loop becomes shorter than a linear sequence of
|
|
||||||
// tagging instructions.
|
|
||||||
const int kSetTagLoopThreshold = 176;
|
|
||||||
if (Size < kSetTagLoopThreshold) {
|
|
||||||
if (TagStores.size() < 2)
|
|
||||||
return;
|
|
||||||
emitUnrolled(InsertI);
|
|
||||||
} else {
|
|
||||||
MachineInstr *UpdateInstr = nullptr;
|
|
||||||
int64_t TotalOffset;
|
|
||||||
if (IsLast) {
|
|
||||||
// See if we can merge base register update into the STGloop.
|
|
||||||
// This is done in AArch64LoadStoreOptimizer for "normal" stores,
|
|
||||||
// but STGloop is way too unusual for that, and also it only
|
|
||||||
// realistically happens in function epilogue. Also, STGloop is expanded
|
|
||||||
// before that pass.
|
|
||||||
if (InsertI != MBB->end() &&
|
|
||||||
canMergeRegUpdate(InsertI, FrameReg, FrameRegOffset.getBytes() + Size,
|
|
||||||
&TotalOffset)) {
|
|
||||||
UpdateInstr = &*InsertI++;
|
|
||||||
LLVM_DEBUG(dbgs() << "Folding SP update into loop:\n "
|
|
||||||
<< *UpdateInstr);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!UpdateInstr && TagStores.size() < 2)
|
|
||||||
return;
|
|
||||||
|
|
||||||
if (UpdateInstr) {
|
|
||||||
FrameRegUpdate = TotalOffset;
|
|
||||||
FrameRegUpdateFlags = UpdateInstr->getFlags();
|
|
||||||
}
|
|
||||||
emitLoop(InsertI);
|
|
||||||
if (UpdateInstr)
|
|
||||||
UpdateInstr->eraseFromParent();
|
|
||||||
}
|
|
||||||
|
|
||||||
for (auto &TS : TagStores)
|
|
||||||
TS.MI->eraseFromParent();
|
|
||||||
}
|
|
||||||
|
|
||||||
bool isMergeableStackTaggingInstruction(MachineInstr &MI, int64_t &Offset,
|
|
||||||
int64_t &Size, bool &ZeroData) {
|
|
||||||
MachineFunction &MF = *MI.getParent()->getParent();
|
|
||||||
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
|
||||||
|
|
||||||
unsigned Opcode = MI.getOpcode();
|
|
||||||
ZeroData = (Opcode == AArch64::STZGloop || Opcode == AArch64::STZGOffset ||
|
|
||||||
Opcode == AArch64::STZ2GOffset);
|
|
||||||
|
|
||||||
if (Opcode == AArch64::STGloop || Opcode == AArch64::STZGloop) {
|
|
||||||
if (!MI.getOperand(0).isDead() || !MI.getOperand(1).isDead())
|
|
||||||
return false;
|
|
||||||
if (!MI.getOperand(2).isImm() || !MI.getOperand(3).isFI())
|
|
||||||
return false;
|
|
||||||
Offset = MFI.getObjectOffset(MI.getOperand(3).getIndex());
|
|
||||||
Size = MI.getOperand(2).getImm();
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (Opcode == AArch64::STGOffset || Opcode == AArch64::STZGOffset)
|
|
||||||
Size = 16;
|
|
||||||
else if (Opcode == AArch64::ST2GOffset || Opcode == AArch64::STZ2GOffset)
|
|
||||||
Size = 32;
|
|
||||||
else
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (MI.getOperand(0).getReg() != AArch64::SP || !MI.getOperand(1).isFI())
|
|
||||||
return false;
|
|
||||||
|
|
||||||
Offset = MFI.getObjectOffset(MI.getOperand(1).getIndex()) +
|
|
||||||
16 * MI.getOperand(2).getImm();
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Detect a run of memory tagging instructions for adjacent stack frame slots,
|
|
||||||
// and replace them with a shorter instruction sequence:
|
|
||||||
// * replace STG + STG with ST2G
|
|
||||||
// * replace STGloop + STGloop with STGloop
|
|
||||||
// This code needs to run when stack slot offsets are already known, but before
|
|
||||||
// FrameIndex operands in STG instructions are eliminated.
|
|
||||||
MachineBasicBlock::iterator tryMergeAdjacentSTG(MachineBasicBlock::iterator II,
|
|
||||||
const AArch64FrameLowering *TFI,
|
|
||||||
RegScavenger *RS) {
|
|
||||||
bool FirstZeroData;
|
|
||||||
int64_t Size, Offset;
|
|
||||||
MachineInstr &MI = *II;
|
|
||||||
MachineBasicBlock *MBB = MI.getParent();
|
|
||||||
MachineBasicBlock::iterator NextI = ++II;
|
|
||||||
if (&MI == &MBB->instr_back())
|
|
||||||
return II;
|
|
||||||
if (!isMergeableStackTaggingInstruction(MI, Offset, Size, FirstZeroData))
|
|
||||||
return II;
|
|
||||||
|
|
||||||
SmallVector<TagStoreInstr, 4> Instrs;
|
|
||||||
Instrs.emplace_back(&MI, Offset, Size);
|
|
||||||
|
|
||||||
constexpr int kScanLimit = 10;
|
|
||||||
int Count = 0;
|
|
||||||
for (MachineBasicBlock::iterator E = MBB->end();
|
|
||||||
NextI != E && Count < kScanLimit; ++NextI) {
|
|
||||||
MachineInstr &MI = *NextI;
|
|
||||||
bool ZeroData;
|
|
||||||
int64_t Size, Offset;
|
|
||||||
// Collect instructions that update memory tags with a FrameIndex operand
|
|
||||||
// and (when applicable) constant size, and whose output registers are dead
|
|
||||||
// (the latter is almost always the case in practice). Since these
|
|
||||||
// instructions effectively have no inputs or outputs, we are free to skip
|
|
||||||
// any non-aliasing instructions in between without tracking used registers.
|
|
||||||
if (isMergeableStackTaggingInstruction(MI, Offset, Size, ZeroData)) {
|
|
||||||
if (ZeroData != FirstZeroData)
|
|
||||||
break;
|
|
||||||
Instrs.emplace_back(&MI, Offset, Size);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Only count non-transient, non-tagging instructions toward the scan
|
|
||||||
// limit.
|
|
||||||
if (!MI.isTransient())
|
|
||||||
++Count;
|
|
||||||
|
|
||||||
// Just in case, stop before the epilogue code starts.
|
|
||||||
if (MI.getFlag(MachineInstr::FrameSetup) ||
|
|
||||||
MI.getFlag(MachineInstr::FrameDestroy))
|
|
||||||
break;
|
|
||||||
|
|
||||||
// Reject anything that may alias the collected instructions.
|
|
||||||
if (MI.mayLoadOrStore() || MI.hasUnmodeledSideEffects())
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// New code will be inserted after the last tagging instruction we've found.
|
|
||||||
MachineBasicBlock::iterator InsertI = Instrs.back().MI;
|
|
||||||
InsertI++;
|
|
||||||
|
|
||||||
llvm::stable_sort(Instrs,
|
|
||||||
[](const TagStoreInstr &Left, const TagStoreInstr &Right) {
|
|
||||||
return Left.Offset < Right.Offset;
|
|
||||||
});
|
|
||||||
|
|
||||||
// Make sure that we don't have any overlapping stores.
|
|
||||||
int64_t CurOffset = Instrs[0].Offset;
|
|
||||||
for (auto &Instr : Instrs) {
|
|
||||||
if (CurOffset > Instr.Offset)
|
|
||||||
return NextI;
|
|
||||||
CurOffset = Instr.Offset + Instr.Size;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find contiguous runs of tagged memory and emit shorter instruction
|
|
||||||
// sequencies for them when possible.
|
|
||||||
TagStoreEdit TSE(MBB, FirstZeroData);
|
|
||||||
Optional<int64_t> EndOffset;
|
|
||||||
for (auto &Instr : Instrs) {
|
|
||||||
if (EndOffset && *EndOffset != Instr.Offset) {
|
|
||||||
// Found a gap.
|
|
||||||
TSE.emitCode(InsertI, TFI, /*IsLast = */ false);
|
|
||||||
TSE.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
TSE.addInstruction(Instr);
|
|
||||||
EndOffset = Instr.Offset + Instr.Size;
|
|
||||||
}
|
|
||||||
|
|
||||||
TSE.emitCode(InsertI, TFI, /*IsLast = */ true);
|
|
||||||
|
|
||||||
return InsertI;
|
|
||||||
}
|
|
||||||
} // namespace
|
|
||||||
|
|
||||||
void AArch64FrameLowering::processFunctionBeforeFrameIndicesReplaced(
|
|
||||||
MachineFunction &MF, RegScavenger *RS = nullptr) const {
|
|
||||||
if (StackTaggingMergeSetTag)
|
|
||||||
for (auto &BB : MF)
|
|
||||||
for (MachineBasicBlock::iterator II = BB.begin(); II != BB.end();)
|
|
||||||
II = tryMergeAdjacentSTG(II, this, RS);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// For Win64 AArch64 EH, the offset to the Unwind object is from the SP
|
|
||||||
/// before the update. This is easily retrieved as it is exactly the offset
|
|
||||||
/// that is set in processFunctionBeforeFrameFinalized.
|
|
||||||
int AArch64FrameLowering::getFrameIndexReferencePreferSP(
|
int AArch64FrameLowering::getFrameIndexReferencePreferSP(
|
||||||
const MachineFunction &MF, int FI, unsigned &FrameReg,
|
const MachineFunction &MF, int FI, unsigned &FrameReg,
|
||||||
bool IgnoreSPUpdates) const {
|
bool IgnoreSPUpdates) const {
|
||||||
|
|
|
@ -77,10 +77,6 @@ public:
|
||||||
void processFunctionBeforeFrameFinalized(MachineFunction &MF,
|
void processFunctionBeforeFrameFinalized(MachineFunction &MF,
|
||||||
RegScavenger *RS) const override;
|
RegScavenger *RS) const override;
|
||||||
|
|
||||||
void
|
|
||||||
processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF,
|
|
||||||
RegScavenger *RS) const override;
|
|
||||||
|
|
||||||
unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const override;
|
unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const override;
|
||||||
|
|
||||||
unsigned getWinEHFuncletFrameSize(const MachineFunction &MF) const;
|
unsigned getWinEHFuncletFrameSize(const MachineFunction &MF) const;
|
||||||
|
@ -111,8 +107,6 @@ private:
|
||||||
int64_t assignSVEStackObjectOffsets(MachineFrameInfo &MF,
|
int64_t assignSVEStackObjectOffsets(MachineFrameInfo &MF,
|
||||||
int &MinCSFrameIndex,
|
int &MinCSFrameIndex,
|
||||||
int &MaxCSFrameIndex) const;
|
int &MaxCSFrameIndex) const;
|
||||||
bool shouldCombineCSRLocalStackBumpInEpilogue(MachineBasicBlock &MBB,
|
|
||||||
unsigned StackBumpBytes) const;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // End llvm namespace
|
} // End llvm namespace
|
||||||
|
|
|
@ -3458,8 +3458,6 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
|
||||||
case AArch64::ST1Fourv1d:
|
case AArch64::ST1Fourv1d:
|
||||||
case AArch64::IRG:
|
case AArch64::IRG:
|
||||||
case AArch64::IRGstack:
|
case AArch64::IRGstack:
|
||||||
case AArch64::STGloop:
|
|
||||||
case AArch64::STZGloop:
|
|
||||||
return AArch64FrameOffsetCannotUpdate;
|
return AArch64FrameOffsetCannotUpdate;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1514,17 +1514,17 @@ def TAGPstack
|
||||||
// register / expression for the tagged base pointer of the current function.
|
// register / expression for the tagged base pointer of the current function.
|
||||||
def : Pat<(int_aarch64_irg_sp i64:$Rm), (IRGstack SP, i64:$Rm)>;
|
def : Pat<(int_aarch64_irg_sp i64:$Rm), (IRGstack SP, i64:$Rm)>;
|
||||||
|
|
||||||
// Large STG to be expanded into a loop. $sz is the size, $Rn is start address.
|
// Large STG to be expanded into a loop. $Rm is the size, $Rn is start address.
|
||||||
// $Rn_wback is one past the end of the range. $Rm is the loop counter.
|
// $Rn_wback is one past the end of the range.
|
||||||
let isCodeGenOnly=1, mayStore=1 in {
|
let isCodeGenOnly=1, mayStore=1 in {
|
||||||
def STGloop
|
def STGloop
|
||||||
: Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn),
|
: Pseudo<(outs GPR64common:$Rm_wback, GPR64sp:$Rn_wback), (ins GPR64common:$Rm, GPR64sp:$Rn),
|
||||||
[], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >,
|
[], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,$Rm = $Rm_wback,@earlyclobber $Rm_wback" >,
|
||||||
Sched<[WriteAdr, WriteST]>;
|
Sched<[WriteAdr, WriteST]>;
|
||||||
|
|
||||||
def STZGloop
|
def STZGloop
|
||||||
: Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn),
|
: Pseudo<(outs GPR64common:$Rm_wback, GPR64sp:$Rn_wback), (ins GPR64common:$Rm, GPR64sp:$Rn),
|
||||||
[], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >,
|
[], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,$Rm = $Rm_wback,@earlyclobber $Rm_wback" >,
|
||||||
Sched<[WriteAdr, WriteST]>;
|
Sched<[WriteAdr, WriteST]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -390,10 +390,6 @@ bool AArch64RegisterInfo::needsFrameBaseReg(MachineInstr *MI,
|
||||||
if (isFrameOffsetLegal(MI, AArch64::SP, Offset))
|
if (isFrameOffsetLegal(MI, AArch64::SP, Offset))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// If even offset 0 is illegal, we don't want a virtual base register.
|
|
||||||
if (!isFrameOffsetLegal(MI, AArch64::SP, 0))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
// The offset likely isn't legal; we want to allocate a virtual base register.
|
// The offset likely isn't legal; we want to allocate a virtual base register.
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -449,17 +445,6 @@ void AArch64RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
|
||||||
(void)Done;
|
(void)Done;
|
||||||
}
|
}
|
||||||
|
|
||||||
static Register getScratchRegisterForInstruction(MachineInstr &MI) {
|
|
||||||
// ST*Gloop can only have #fi in op3, and they have a constraint that
|
|
||||||
// op1==op3. Since op1 is early-clobber, it may (and also must) be used as the
|
|
||||||
// scratch register.
|
|
||||||
if (MI.getOpcode() == AArch64::STGloop || MI.getOpcode() == AArch64::STZGloop)
|
|
||||||
return MI.getOperand(1).getReg();
|
|
||||||
else
|
|
||||||
return MI.getMF()->getRegInfo().createVirtualRegister(
|
|
||||||
&AArch64::GPR64RegClass);
|
|
||||||
}
|
|
||||||
|
|
||||||
void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
||||||
int SPAdj, unsigned FIOperandNum,
|
int SPAdj, unsigned FIOperandNum,
|
||||||
RegScavenger *RS) const {
|
RegScavenger *RS) const {
|
||||||
|
@ -516,7 +501,8 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
||||||
// in a scratch register.
|
// in a scratch register.
|
||||||
Offset = TFI->resolveFrameIndexReference(
|
Offset = TFI->resolveFrameIndexReference(
|
||||||
MF, FrameIndex, FrameReg, /*PreferFP=*/false, /*ForSimm=*/true);
|
MF, FrameIndex, FrameReg, /*PreferFP=*/false, /*ForSimm=*/true);
|
||||||
Register ScratchReg = getScratchRegisterForInstruction(MI);
|
Register ScratchReg =
|
||||||
|
MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
|
||||||
emitFrameOffset(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset,
|
emitFrameOffset(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset,
|
||||||
TII);
|
TII);
|
||||||
BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(AArch64::LDG), ScratchReg)
|
BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(AArch64::LDG), ScratchReg)
|
||||||
|
@ -545,7 +531,8 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
||||||
// If we get here, the immediate doesn't fit into the instruction. We folded
|
// If we get here, the immediate doesn't fit into the instruction. We folded
|
||||||
// as much as possible above. Handle the rest, providing a register that is
|
// as much as possible above. Handle the rest, providing a register that is
|
||||||
// SP+LargeImm.
|
// SP+LargeImm.
|
||||||
Register ScratchReg = getScratchRegisterForInstruction(MI);
|
Register ScratchReg =
|
||||||
|
MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
|
||||||
emitFrameOffset(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset, TII);
|
emitFrameOffset(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset, TII);
|
||||||
MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg, false, false, true);
|
MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg, false, false, true);
|
||||||
}
|
}
|
||||||
|
|
|
@ -125,13 +125,19 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForSetTag(
|
||||||
return EmitUnrolledSetTag(DAG, dl, Chain, Addr, ObjSize, BaseMemOperand,
|
return EmitUnrolledSetTag(DAG, dl, Chain, Addr, ObjSize, BaseMemOperand,
|
||||||
ZeroData);
|
ZeroData);
|
||||||
|
|
||||||
const EVT ResTys[] = {MVT::i64, MVT::i64, MVT::Other};
|
if (ObjSize % 32 != 0) {
|
||||||
|
SDNode *St1 = DAG.getMachineNode(
|
||||||
if (Addr.getOpcode() == ISD::FrameIndex) {
|
ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex, dl,
|
||||||
int FI = cast<FrameIndexSDNode>(Addr)->getIndex();
|
{MVT::i64, MVT::Other},
|
||||||
Addr = DAG.getTargetFrameIndex(FI, MVT::i64);
|
{Addr, Addr, DAG.getTargetConstant(1, dl, MVT::i64), Chain});
|
||||||
|
DAG.setNodeMemRefs(cast<MachineSDNode>(St1), {BaseMemOperand});
|
||||||
|
ObjSize -= 16;
|
||||||
|
Addr = SDValue(St1, 0);
|
||||||
|
Chain = SDValue(St1, 1);
|
||||||
}
|
}
|
||||||
SDValue Ops[] = {DAG.getTargetConstant(ObjSize, dl, MVT::i64), Addr, Chain};
|
|
||||||
|
const EVT ResTys[] = {MVT::i64, MVT::i64, MVT::Other};
|
||||||
|
SDValue Ops[] = {DAG.getConstant(ObjSize, dl, MVT::i64), Addr, Chain};
|
||||||
SDNode *St = DAG.getMachineNode(
|
SDNode *St = DAG.getMachineNode(
|
||||||
ZeroData ? AArch64::STZGloop : AArch64::STGloop, dl, ResTys, Ops);
|
ZeroData ? AArch64::STZGloop : AArch64::STGloop, dl, ResTys, Ops);
|
||||||
|
|
||||||
|
|
|
@ -1,214 +0,0 @@
|
||||||
; RUN: llc < %s -mtriple=aarch64 -mattr=+mte | FileCheck %s
|
|
||||||
|
|
||||||
declare void @use(i8* %p)
|
|
||||||
declare void @llvm.aarch64.settag(i8* %p, i64 %a)
|
|
||||||
declare void @llvm.aarch64.settag.zero(i8* %p, i64 %a)
|
|
||||||
|
|
||||||
define void @stg16_16() {
|
|
||||||
entry:
|
|
||||||
; CHECK-LABEL: stg16_16:
|
|
||||||
; CHECK: st2g sp, [sp], #32
|
|
||||||
; CHECK: ret
|
|
||||||
%a = alloca i8, i32 16, align 16
|
|
||||||
%b = alloca i8, i32 16, align 16
|
|
||||||
call void @llvm.aarch64.settag(i8* %a, i64 16)
|
|
||||||
call void @llvm.aarch64.settag(i8* %b, i64 16)
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
define i32 @stg16_16_16_16_ret() {
|
|
||||||
entry:
|
|
||||||
; CHECK-LABEL: stg16_16_16_16_ret:
|
|
||||||
; CHECK: st2g sp, [sp, #32]
|
|
||||||
; CHECK: st2g sp, [sp], #64
|
|
||||||
; CHECK: mov w0, wzr
|
|
||||||
; CHECK: ret
|
|
||||||
%a = alloca i8, i32 16, align 16
|
|
||||||
%b = alloca i8, i32 16, align 16
|
|
||||||
%c = alloca i8, i32 16, align 16
|
|
||||||
%d = alloca i8, i32 16, align 16
|
|
||||||
call void @llvm.aarch64.settag(i8* %a, i64 16)
|
|
||||||
call void @llvm.aarch64.settag(i8* %b, i64 16)
|
|
||||||
call void @llvm.aarch64.settag(i8* %c, i64 16)
|
|
||||||
call void @llvm.aarch64.settag(i8* %d, i64 16)
|
|
||||||
ret i32 0
|
|
||||||
}
|
|
||||||
|
|
||||||
define void @stg16_16_16_16() {
|
|
||||||
entry:
|
|
||||||
; CHECK-LABEL: stg16_16_16_16:
|
|
||||||
; CHECK: st2g sp, [sp, #32]
|
|
||||||
; CHECK: st2g sp, [sp], #64
|
|
||||||
; CHECK: ret
|
|
||||||
%a = alloca i8, i32 16, align 16
|
|
||||||
%b = alloca i8, i32 16, align 16
|
|
||||||
%c = alloca i8, i32 16, align 16
|
|
||||||
%d = alloca i8, i32 16, align 16
|
|
||||||
call void @llvm.aarch64.settag(i8* %a, i64 16)
|
|
||||||
call void @llvm.aarch64.settag(i8* %b, i64 16)
|
|
||||||
call void @llvm.aarch64.settag(i8* %c, i64 16)
|
|
||||||
call void @llvm.aarch64.settag(i8* %d, i64 16)
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
define void @stg128_128_128_128() {
|
|
||||||
entry:
|
|
||||||
; CHECK-LABEL: stg128_128_128_128:
|
|
||||||
; CHECK: mov x8, #512
|
|
||||||
; CHECK: st2g sp, [sp], #32
|
|
||||||
; CHECK: sub x8, x8, #32
|
|
||||||
; CHECK: cbnz x8,
|
|
||||||
; CHECK: ret
|
|
||||||
%a = alloca i8, i32 128, align 16
|
|
||||||
%b = alloca i8, i32 128, align 16
|
|
||||||
%c = alloca i8, i32 128, align 16
|
|
||||||
%d = alloca i8, i32 128, align 16
|
|
||||||
call void @llvm.aarch64.settag(i8* %a, i64 128)
|
|
||||||
call void @llvm.aarch64.settag(i8* %b, i64 128)
|
|
||||||
call void @llvm.aarch64.settag(i8* %c, i64 128)
|
|
||||||
call void @llvm.aarch64.settag(i8* %d, i64 128)
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
define void @stg16_512_16() {
|
|
||||||
entry:
|
|
||||||
; CHECK-LABEL: stg16_512_16:
|
|
||||||
; CHECK: mov x8, #544
|
|
||||||
; CHECK: st2g sp, [sp], #32
|
|
||||||
; CHECK: sub x8, x8, #32
|
|
||||||
; CHECK: cbnz x8,
|
|
||||||
; CHECK: ret
|
|
||||||
%a = alloca i8, i32 16, align 16
|
|
||||||
%b = alloca i8, i32 512, align 16
|
|
||||||
%c = alloca i8, i32 16, align 16
|
|
||||||
call void @llvm.aarch64.settag(i8* %a, i64 16)
|
|
||||||
call void @llvm.aarch64.settag(i8* %b, i64 512)
|
|
||||||
call void @llvm.aarch64.settag(i8* %c, i64 16)
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
define void @stg512_512_512() {
|
|
||||||
entry:
|
|
||||||
; CHECK-LABEL: stg512_512_512:
|
|
||||||
; CHECK: mov x8, #1536
|
|
||||||
; CHECK: st2g sp, [sp], #32
|
|
||||||
; CHECK: sub x8, x8, #32
|
|
||||||
; CHECK: cbnz x8,
|
|
||||||
; CHECK: ret
|
|
||||||
%a = alloca i8, i32 512, align 16
|
|
||||||
%b = alloca i8, i32 512, align 16
|
|
||||||
%c = alloca i8, i32 512, align 16
|
|
||||||
call void @llvm.aarch64.settag(i8* %a, i64 512)
|
|
||||||
call void @llvm.aarch64.settag(i8* %b, i64 512)
|
|
||||||
call void @llvm.aarch64.settag(i8* %c, i64 512)
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
define void @early(i1 %flag) {
|
|
||||||
entry:
|
|
||||||
; CHECK-LABEL: early:
|
|
||||||
; CHECK: tbz w0, #0, [[LABEL:.LBB.*]]
|
|
||||||
; CHECK: st2g sp, [sp, #
|
|
||||||
; CHECK: st2g sp, [sp, #
|
|
||||||
; CHECK: st2g sp, [sp, #
|
|
||||||
; CHECK: [[LABEL]]:
|
|
||||||
; CHECK: stg sp, [sp, #
|
|
||||||
; CHECK: st2g sp, [sp], #
|
|
||||||
; CHECK: ret
|
|
||||||
%a = alloca i8, i32 48, align 16
|
|
||||||
%b = alloca i8, i32 48, align 16
|
|
||||||
%c = alloca i8, i32 48, align 16
|
|
||||||
br i1 %flag, label %if.then, label %if.end
|
|
||||||
|
|
||||||
if.then:
|
|
||||||
call void @llvm.aarch64.settag(i8* %a, i64 48)
|
|
||||||
call void @llvm.aarch64.settag(i8* %b, i64 48)
|
|
||||||
br label %if.end
|
|
||||||
|
|
||||||
if.end:
|
|
||||||
call void @llvm.aarch64.settag(i8* %c, i64 48)
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
define void @early_128_128(i1 %flag) {
|
|
||||||
entry:
|
|
||||||
; CHECK-LABEL: early_128_128:
|
|
||||||
; CHECK: tbz w0, #0, [[LABEL:.LBB.*]]
|
|
||||||
; CHECK: add x9, sp, #
|
|
||||||
; CHECK: mov x8, #256
|
|
||||||
; CHECK: st2g x9, [x9], #32
|
|
||||||
; CHECK: sub x8, x8, #32
|
|
||||||
; CHECK: cbnz x8,
|
|
||||||
; CHECK: [[LABEL]]:
|
|
||||||
; CHECK: stg sp, [sp, #
|
|
||||||
; CHECK: st2g sp, [sp], #
|
|
||||||
; CHECK: ret
|
|
||||||
%a = alloca i8, i32 128, align 16
|
|
||||||
%b = alloca i8, i32 128, align 16
|
|
||||||
%c = alloca i8, i32 48, align 16
|
|
||||||
br i1 %flag, label %if.then, label %if.end
|
|
||||||
|
|
||||||
if.then:
|
|
||||||
call void @llvm.aarch64.settag(i8* %a, i64 128)
|
|
||||||
call void @llvm.aarch64.settag(i8* %b, i64 128)
|
|
||||||
br label %if.end
|
|
||||||
|
|
||||||
if.end:
|
|
||||||
call void @llvm.aarch64.settag(i8* %c, i64 48)
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
define void @early_512_512(i1 %flag) {
|
|
||||||
entry:
|
|
||||||
; CHECK-LABEL: early_512_512:
|
|
||||||
; CHECK: tbz w0, #0, [[LABEL:.LBB.*]]
|
|
||||||
; CHECK: add x9, sp, #
|
|
||||||
; CHECK: mov x8, #1024
|
|
||||||
; CHECK: st2g x9, [x9], #32
|
|
||||||
; CHECK: sub x8, x8, #32
|
|
||||||
; CHECK: cbnz x8,
|
|
||||||
; CHECK: [[LABEL]]:
|
|
||||||
; CHECK: stg sp, [sp, #
|
|
||||||
; CHECK: st2g sp, [sp], #
|
|
||||||
; CHECK: ret
|
|
||||||
%a = alloca i8, i32 512, align 16
|
|
||||||
%b = alloca i8, i32 512, align 16
|
|
||||||
%c = alloca i8, i32 48, align 16
|
|
||||||
br i1 %flag, label %if.then, label %if.end
|
|
||||||
|
|
||||||
if.then:
|
|
||||||
call void @llvm.aarch64.settag(i8* %a, i64 512)
|
|
||||||
call void @llvm.aarch64.settag(i8* %b, i64 512)
|
|
||||||
br label %if.end
|
|
||||||
|
|
||||||
if.end:
|
|
||||||
call void @llvm.aarch64.settag(i8* %c, i64 48)
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
; Two loops of size 256; the second loop updates SP.
|
|
||||||
define void @stg128_128_gap_128_128() {
|
|
||||||
entry:
|
|
||||||
; CHECK-LABEL: stg128_128_gap_128_128:
|
|
||||||
; CHECK: mov x9, sp
|
|
||||||
; CHECK: mov x8, #256
|
|
||||||
; CHECK: st2g x9, [x9], #32
|
|
||||||
; CHECK: sub x8, x8, #32
|
|
||||||
; CHECK: cbnz x8,
|
|
||||||
; CHECK: mov x8, #256
|
|
||||||
; CHECK: st2g sp, [sp], #32
|
|
||||||
; CHECK: sub x8, x8, #32
|
|
||||||
; CHECK: cbnz x8,
|
|
||||||
; CHECK: ret
|
|
||||||
%a = alloca i8, i32 128, align 16
|
|
||||||
%a2 = alloca i8, i32 128, align 16
|
|
||||||
%b = alloca i8, i32 32, align 16
|
|
||||||
%c = alloca i8, i32 128, align 16
|
|
||||||
%c2 = alloca i8, i32 128, align 16
|
|
||||||
call void @use(i8* %b)
|
|
||||||
call void @llvm.aarch64.settag(i8* %a, i64 128)
|
|
||||||
call void @llvm.aarch64.settag(i8* %a2, i64 128)
|
|
||||||
call void @llvm.aarch64.settag(i8* %c, i64 128)
|
|
||||||
call void @llvm.aarch64.settag(i8* %c2, i64 128)
|
|
||||||
ret void
|
|
||||||
}
|
|
|
@ -1,83 +0,0 @@
|
||||||
# RUN: llc -mtriple=aarch64 -mattr=+mte -run-pass=prologepilog %s -o - | FileCheck %s
|
|
||||||
|
|
||||||
--- |
|
|
||||||
declare void @llvm.aarch64.settag(i8* nocapture writeonly, i64) argmemonly nounwind writeonly "target-features"="+mte"
|
|
||||||
define i32 @stg16_16_16_16_ret() "target-features"="+mte" {
|
|
||||||
entry:
|
|
||||||
%a = alloca i8, i32 16, align 16
|
|
||||||
%b = alloca i8, i32 16, align 16
|
|
||||||
%c = alloca i8, i32 16, align 16
|
|
||||||
%d = alloca i8, i32 16, align 16
|
|
||||||
call void @llvm.aarch64.settag(i8* %a, i64 16)
|
|
||||||
call void @llvm.aarch64.settag(i8* %b, i64 16)
|
|
||||||
call void @llvm.aarch64.settag(i8* %c, i64 16)
|
|
||||||
call void @llvm.aarch64.settag(i8* %d, i64 16)
|
|
||||||
ret i32 0
|
|
||||||
}
|
|
||||||
|
|
||||||
define void @stg16_store_128() "target-features"="+mte" {
|
|
||||||
entry:
|
|
||||||
%a = alloca i8, i32 16, align 16
|
|
||||||
%b = alloca i8, i32 128, align 16
|
|
||||||
call void @llvm.aarch64.settag(i8* %a, i64 16)
|
|
||||||
store i8 42, i8* %a
|
|
||||||
call void @llvm.aarch64.settag(i8* %b, i64 128)
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
...
|
|
||||||
---
|
|
||||||
# A sequence of STG with a register copy in the middle.
|
|
||||||
# Can be merged into ST2G + ST2G.
|
|
||||||
# CHECK-LABEL: name:{{.*}}stg16_16_16_16_ret
|
|
||||||
# CHECK-DAG: ST2GOffset $sp, $sp, 2
|
|
||||||
# CHECK-DAG: ST2GOffset $sp, $sp, 0
|
|
||||||
# CHECK-DAG: $w0 = COPY $wzr
|
|
||||||
# CHECK-DAG: RET_ReallyLR implicit killed $w0
|
|
||||||
|
|
||||||
name: stg16_16_16_16_ret
|
|
||||||
tracksRegLiveness: true
|
|
||||||
stack:
|
|
||||||
- { id: 0, name: a, size: 16, alignment: 16 }
|
|
||||||
- { id: 1, name: b, size: 16, alignment: 16 }
|
|
||||||
- { id: 2, name: c, size: 16, alignment: 16 }
|
|
||||||
- { id: 3, name: d, size: 16, alignment: 16 }
|
|
||||||
body: |
|
|
||||||
bb.0.entry:
|
|
||||||
STGOffset $sp, %stack.0.a, 0 :: (store 16 into %ir.a)
|
|
||||||
STGOffset $sp, %stack.1.b, 0 :: (store 16 into %ir.b)
|
|
||||||
STGOffset $sp, %stack.2.c, 0 :: (store 16 into %ir.c)
|
|
||||||
$w0 = COPY $wzr
|
|
||||||
STGOffset $sp, %stack.3.d, 0 :: (store 16 into %ir.d)
|
|
||||||
RET_ReallyLR implicit killed $w0
|
|
||||||
|
|
||||||
...
|
|
||||||
|
|
||||||
---
|
|
||||||
# A store in the middle prevents merging.
|
|
||||||
# CHECK-LABEL: name:{{.*}}stg16_store_128
|
|
||||||
# CHECK: ST2GOffset $sp, $sp, 2
|
|
||||||
# CHECK: ST2GOffset $sp, $sp, 4
|
|
||||||
# CHECK: ST2GOffset $sp, $sp, 6
|
|
||||||
# CHECK: STGOffset $sp, $sp, 8
|
|
||||||
# CHECK: STRBBui
|
|
||||||
# CHECK: ST2GOffset $sp, $sp, 0
|
|
||||||
# CHECK: RET_ReallyLR
|
|
||||||
|
|
||||||
name: stg16_store_128
|
|
||||||
tracksRegLiveness: true
|
|
||||||
stack:
|
|
||||||
- { id: 0, name: a, size: 16, alignment: 16 }
|
|
||||||
- { id: 1, name: b, size: 128, alignment: 16 }
|
|
||||||
body: |
|
|
||||||
bb.0.entry:
|
|
||||||
STGOffset $sp, %stack.0.a, 0 :: (store 16 into %ir.a)
|
|
||||||
renamable $w8 = MOVi32imm 42
|
|
||||||
ST2GOffset $sp, %stack.1.b, 6 :: (store 32 into %ir.b + 96, align 16)
|
|
||||||
ST2GOffset $sp, %stack.1.b, 4 :: (store 32 into %ir.b + 64, align 16)
|
|
||||||
ST2GOffset $sp, %stack.1.b, 2 :: (store 32 into %ir.b + 32, align 16)
|
|
||||||
STRBBui killed renamable $w8, %stack.0.a, 0 :: (store 1 into %ir.a, align 16)
|
|
||||||
ST2GOffset $sp, %stack.1.b, 0 :: (store 32 into %ir.b, align 16)
|
|
||||||
RET_ReallyLR
|
|
||||||
|
|
||||||
...
|
|
|
@ -64,8 +64,8 @@ entry:
|
||||||
define void @stg17(i8* %p) {
|
define void @stg17(i8* %p) {
|
||||||
entry:
|
entry:
|
||||||
; CHECK-LABEL: stg17:
|
; CHECK-LABEL: stg17:
|
||||||
; CHECK: stg x0, [x0], #16
|
|
||||||
; CHECK: mov {{(w|x)}}[[R:[0-9]+]], #256
|
; CHECK: mov {{(w|x)}}[[R:[0-9]+]], #256
|
||||||
|
; CHECK: stg x0, [x0], #16
|
||||||
; CHECK: st2g x0, [x0], #32
|
; CHECK: st2g x0, [x0], #32
|
||||||
; CHECK: sub x[[R]], x[[R]], #32
|
; CHECK: sub x[[R]], x[[R]], #32
|
||||||
; CHECK: cbnz x[[R]],
|
; CHECK: cbnz x[[R]],
|
||||||
|
@ -87,8 +87,8 @@ entry:
|
||||||
define void @stzg17(i8* %p) {
|
define void @stzg17(i8* %p) {
|
||||||
entry:
|
entry:
|
||||||
; CHECK-LABEL: stzg17:
|
; CHECK-LABEL: stzg17:
|
||||||
; CHECK: stzg x0, [x0], #16
|
|
||||||
; CHECK: mov {{w|x}}[[R:[0-9]+]], #256
|
; CHECK: mov {{w|x}}[[R:[0-9]+]], #256
|
||||||
|
; CHECK: stzg x0, [x0], #16
|
||||||
; CHECK: stz2g x0, [x0], #32
|
; CHECK: stz2g x0, [x0], #32
|
||||||
; CHECK: sub x[[R]], x[[R]], #32
|
; CHECK: sub x[[R]], x[[R]], #32
|
||||||
; CHECK: cbnz x[[R]],
|
; CHECK: cbnz x[[R]],
|
||||||
|
@ -110,10 +110,10 @@ entry:
|
||||||
define void @stg_alloca5() {
|
define void @stg_alloca5() {
|
||||||
entry:
|
entry:
|
||||||
; CHECK-LABEL: stg_alloca5:
|
; CHECK-LABEL: stg_alloca5:
|
||||||
|
; CHECK: stg sp, [sp, #64]
|
||||||
; CHECK: st2g sp, [sp, #32]
|
; CHECK: st2g sp, [sp, #32]
|
||||||
; CHECK-NEXT: stg sp, [sp, #64]
|
; CHECK: st2g sp, [sp]
|
||||||
; CHECK-NEXT: st2g sp, [sp], #80
|
; CHECK: ret
|
||||||
; CHECK-NEXT: ret
|
|
||||||
%a = alloca i8, i32 80, align 16
|
%a = alloca i8, i32 80, align 16
|
||||||
call void @llvm.aarch64.settag(i8* %a, i64 80)
|
call void @llvm.aarch64.settag(i8* %a, i64 80)
|
||||||
ret void
|
ret void
|
||||||
|
@ -122,11 +122,12 @@ entry:
|
||||||
define void @stg_alloca17() {
|
define void @stg_alloca17() {
|
||||||
entry:
|
entry:
|
||||||
; CHECK-LABEL: stg_alloca17:
|
; CHECK-LABEL: stg_alloca17:
|
||||||
|
; CHECK: mov [[P:x[0-9]+]], sp
|
||||||
|
; CHECK: stg [[P]], {{\[}}[[P]]{{\]}}, #16
|
||||||
; CHECK: mov {{w|x}}[[R:[0-9]+]], #256
|
; CHECK: mov {{w|x}}[[R:[0-9]+]], #256
|
||||||
; CHECK: st2g sp, [sp], #32
|
; CHECK: st2g [[P]], {{\[}}[[P]]{{\]}}, #32
|
||||||
; CHECK: sub x[[R]], x[[R]], #32
|
; CHECK: sub x[[R]], x[[R]], #32
|
||||||
; CHECK: cbnz x[[R]],
|
; CHECK: cbnz x[[R]],
|
||||||
; CHECK: stg sp, [sp], #16
|
|
||||||
; CHECK: ret
|
; CHECK: ret
|
||||||
%a = alloca i8, i32 272, align 16
|
%a = alloca i8, i32 272, align 16
|
||||||
call void @llvm.aarch64.settag(i8* %a, i64 272)
|
call void @llvm.aarch64.settag(i8* %a, i64 272)
|
||||||
|
|
|
@ -210,10 +210,11 @@ entry:
|
||||||
; DEFAULT: ldrb [[A:w.*]], [x{{.*}}]
|
; DEFAULT: ldrb [[A:w.*]], [x{{.*}}]
|
||||||
; DEFAULT: ldrb [[B:w.*]], [x{{.*}}]
|
; DEFAULT: ldrb [[B:w.*]], [x{{.*}}]
|
||||||
|
|
||||||
; ALWAYS-DAG: ldg [[PA:x.*]], [x{{.*}}]
|
; ALWAYS: ldg [[PA:x.*]], [x{{.*}}]
|
||||||
; ALWAYS-DAG: ldrb [[B:w.*]], [sp]
|
; ALWAYS: ldrb [[B:w.*]], [sp]
|
||||||
; ALWAYS-DAG: ldrb [[A:w.*]], {{\[}}[[PA]]{{\]}}
|
; ALWAYS: ldrb [[A:w.*]], {{\[}}[[PA]]{{\]}}
|
||||||
|
|
||||||
|
; COMMON: add w0, [[B]], [[A]]
|
||||||
; COMMON: ret
|
; COMMON: ret
|
||||||
|
|
||||||
; One of these allocas is closer to FP than to SP, and within 256 bytes
|
; One of these allocas is closer to FP than to SP, and within 256 bytes
|
||||||
|
|
Loading…
Reference in New Issue