forked from OSchip/llvm-project
[MTE] Handle MTE instructions in AArch64LoadStoreOptimizer.
Summary: Generate pre- and post-indexed forms of ST*G and STGP when possible. Reviewers: ostannard, vitalybuka Subscribers: kristof.beyls, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D67741 llvm-svn: 372412
This commit is contained in:
parent
330014843c
commit
c2bda3e422
|
@ -201,8 +201,22 @@ static bool isNarrowStore(unsigned Opc) {
|
|||
}
|
||||
}
|
||||
|
||||
// These instruction set memory tag and either keep memory contents unchanged or
|
||||
// set it to zero, ignoring the address part of the source register.
|
||||
static bool isTagStore(const MachineInstr &MI) {
|
||||
switch (MI.getOpcode()) {
|
||||
default:
|
||||
return false;
|
||||
case AArch64::STGOffset:
|
||||
case AArch64::STZGOffset:
|
||||
case AArch64::ST2GOffset:
|
||||
case AArch64::STZ2GOffset:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Scaling factor for unscaled load or store.
|
||||
static int getMemScale(MachineInstr &MI) {
|
||||
static int getMemScale(const MachineInstr &MI) {
|
||||
switch (MI.getOpcode()) {
|
||||
default:
|
||||
llvm_unreachable("Opcode has unknown scale!");
|
||||
|
@ -255,6 +269,11 @@ static int getMemScale(MachineInstr &MI) {
|
|||
case AArch64::STURQi:
|
||||
case AArch64::LDPQi:
|
||||
case AArch64::STPQi:
|
||||
case AArch64::STGOffset:
|
||||
case AArch64::STZGOffset:
|
||||
case AArch64::ST2GOffset:
|
||||
case AArch64::STZ2GOffset:
|
||||
case AArch64::STGPi:
|
||||
return 16;
|
||||
}
|
||||
}
|
||||
|
@ -449,6 +468,16 @@ static unsigned getPreIndexedOpcode(unsigned Opc) {
|
|||
return AArch64::STPWpre;
|
||||
case AArch64::STPXi:
|
||||
return AArch64::STPXpre;
|
||||
case AArch64::STGOffset:
|
||||
return AArch64::STGPreIndex;
|
||||
case AArch64::STZGOffset:
|
||||
return AArch64::STZGPreIndex;
|
||||
case AArch64::ST2GOffset:
|
||||
return AArch64::ST2GPreIndex;
|
||||
case AArch64::STZ2GOffset:
|
||||
return AArch64::STZ2GPreIndex;
|
||||
case AArch64::STGPi:
|
||||
return AArch64::STGPpre;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -518,6 +547,16 @@ static unsigned getPostIndexedOpcode(unsigned Opc) {
|
|||
return AArch64::STPWpost;
|
||||
case AArch64::STPXi:
|
||||
return AArch64::STPXpost;
|
||||
case AArch64::STGOffset:
|
||||
return AArch64::STGPostIndex;
|
||||
case AArch64::STZGOffset:
|
||||
return AArch64::STZGPostIndex;
|
||||
case AArch64::ST2GOffset:
|
||||
return AArch64::ST2GPostIndex;
|
||||
case AArch64::STZ2GOffset:
|
||||
return AArch64::STZ2GPostIndex;
|
||||
case AArch64::STGPi:
|
||||
return AArch64::STGPpost;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -536,10 +575,30 @@ static bool isPairedLdSt(const MachineInstr &MI) {
|
|||
case AArch64::STPQi:
|
||||
case AArch64::STPWi:
|
||||
case AArch64::STPXi:
|
||||
case AArch64::STGPi:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Returns the scale and offset range of pre/post indexed variants of MI.
|
||||
static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale,
|
||||
int &MinOffset, int &MaxOffset) {
|
||||
bool IsPaired = isPairedLdSt(MI);
|
||||
bool IsTagStore = isTagStore(MI);
|
||||
// ST*G and all paired ldst have the same scale in pre/post-indexed variants
|
||||
// as in the "unsigned offset" variant.
|
||||
// All other pre/post indexed ldst instructions are unscaled.
|
||||
Scale = (IsTagStore || IsPaired) ? getMemScale(MI) : 1;
|
||||
|
||||
if (IsPaired) {
|
||||
MinOffset = -64;
|
||||
MaxOffset = 63;
|
||||
} else {
|
||||
MinOffset = -256;
|
||||
MaxOffset = 255;
|
||||
}
|
||||
}
|
||||
|
||||
static const MachineOperand &getLdStRegOp(const MachineInstr &MI,
|
||||
unsigned PairedRegOp = 0) {
|
||||
assert(PairedRegOp < 2 && "Unexpected register operand idx.");
|
||||
|
@ -618,6 +677,11 @@ static bool isMergeableLdStUpdate(MachineInstr &MI) {
|
|||
case AArch64::LDRWui:
|
||||
case AArch64::LDRHHui:
|
||||
case AArch64::LDRBBui:
|
||||
case AArch64::STGOffset:
|
||||
case AArch64::STZGOffset:
|
||||
case AArch64::ST2GOffset:
|
||||
case AArch64::STZ2GOffset:
|
||||
case AArch64::STGPi:
|
||||
// Unscaled instructions.
|
||||
case AArch64::STURSi:
|
||||
case AArch64::STURDi:
|
||||
|
@ -1328,18 +1392,19 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
|
|||
unsigned NewOpc = IsPreIdx ? getPreIndexedOpcode(I->getOpcode())
|
||||
: getPostIndexedOpcode(I->getOpcode());
|
||||
MachineInstrBuilder MIB;
|
||||
int Scale, MinOffset, MaxOffset;
|
||||
getPrePostIndexedMemOpInfo(*I, Scale, MinOffset, MaxOffset);
|
||||
if (!isPairedLdSt(*I)) {
|
||||
// Non-paired instruction.
|
||||
MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
|
||||
.add(getLdStRegOp(*Update))
|
||||
.add(getLdStRegOp(*I))
|
||||
.add(getLdStBaseOp(*I))
|
||||
.addImm(Value)
|
||||
.addImm(Value / Scale)
|
||||
.setMemRefs(I->memoperands())
|
||||
.setMIFlags(I->mergeFlagsWith(*Update));
|
||||
} else {
|
||||
// Paired instruction.
|
||||
int Scale = getMemScale(*I);
|
||||
MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
|
||||
.add(getLdStRegOp(*Update))
|
||||
.add(getLdStRegOp(*I, 0))
|
||||
|
@ -1395,28 +1460,21 @@ bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
|
|||
MI.getOperand(1).getReg() != BaseReg)
|
||||
break;
|
||||
|
||||
bool IsPairedInsn = isPairedLdSt(MemMI);
|
||||
int UpdateOffset = MI.getOperand(2).getImm();
|
||||
if (MI.getOpcode() == AArch64::SUBXri)
|
||||
UpdateOffset = -UpdateOffset;
|
||||
|
||||
// For non-paired load/store instructions, the immediate must fit in a
|
||||
// signed 9-bit integer.
|
||||
if (!IsPairedInsn && (UpdateOffset > 255 || UpdateOffset < -256))
|
||||
break;
|
||||
|
||||
// For paired load/store instructions, the immediate must be a multiple of
|
||||
// the scaling factor. The scaled offset must also fit into a signed 7-bit
|
||||
// integer.
|
||||
if (IsPairedInsn) {
|
||||
int Scale = getMemScale(MemMI);
|
||||
// The immediate must be a multiple of the scaling factor of the pre/post
|
||||
// indexed instruction.
|
||||
int Scale, MinOffset, MaxOffset;
|
||||
getPrePostIndexedMemOpInfo(MemMI, Scale, MinOffset, MaxOffset);
|
||||
if (UpdateOffset % Scale != 0)
|
||||
break;
|
||||
|
||||
// Scaled offset must fit in the instruction immediate.
|
||||
int ScaledOffset = UpdateOffset / Scale;
|
||||
if (ScaledOffset > 63 || ScaledOffset < -64)
|
||||
if (ScaledOffset > MaxOffset || ScaledOffset < MinOffset)
|
||||
break;
|
||||
}
|
||||
|
||||
// If we have a non-zero Offset, we check that it matches the amount
|
||||
// we're adding to the register.
|
||||
|
@ -1442,14 +1500,20 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
|
|||
if (MIUnscaledOffset != UnscaledOffset)
|
||||
return E;
|
||||
|
||||
// If the base register overlaps a destination register, we can't
|
||||
// merge the update.
|
||||
// If the base register overlaps a source/destination register, we can't
|
||||
// merge the update. This does not apply to tag store instructions which
|
||||
// ignore the address part of the source register.
|
||||
// This does not apply to STGPi as well, which does not have unpredictable
|
||||
// behavior in this case unlike normal stores, and always performs writeback
|
||||
// after reading the source register value.
|
||||
if (!isTagStore(MemMI) && MemMI.getOpcode() != AArch64::STGPi) {
|
||||
bool IsPairedInsn = isPairedLdSt(MemMI);
|
||||
for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
|
||||
Register DestReg = getLdStRegOp(MemMI, i).getReg();
|
||||
if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
|
||||
return E;
|
||||
}
|
||||
}
|
||||
|
||||
// Track which register units have been modified and used between the first
|
||||
// insn (inclusive) and the second insn.
|
||||
|
@ -1496,12 +1560,14 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
|
|||
return E;
|
||||
// If the base register overlaps a destination register, we can't
|
||||
// merge the update.
|
||||
if (!isTagStore(MemMI)) {
|
||||
bool IsPairedInsn = isPairedLdSt(MemMI);
|
||||
for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
|
||||
Register DestReg = getLdStRegOp(MemMI, i).getReg();
|
||||
if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
|
||||
return E;
|
||||
}
|
||||
}
|
||||
|
||||
// Track which register units have been modified and used between the first
|
||||
// insn (inclusive) and the second insn.
|
||||
|
@ -1659,7 +1725,7 @@ bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
|
|||
// however, is not, so adjust here.
|
||||
int UnscaledOffset = getLdStOffsetOp(MI).getImm() * getMemScale(MI);
|
||||
|
||||
// Look forward to try to find a post-index instruction. For example,
|
||||
// Look forward to try to find a pre-index instruction. For example,
|
||||
// ldr x1, [x0, #64]
|
||||
// add x0, x0, #64
|
||||
// merged into:
|
||||
|
|
|
@ -0,0 +1,285 @@
|
|||
# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass aarch64-ldst-opt -verify-machineinstrs -o - %s | FileCheck %s
|
||||
---
|
||||
|
||||
### STG and its offset limits
|
||||
|
||||
# CHECK-LABEL: name: test_STG_post
|
||||
# CHECK: STGPostIndex $x0, $x0, 7
|
||||
name: test_STG_post
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $x0
|
||||
|
||||
STGOffset $x0, $x0, 0
|
||||
$x0 = ADDXri $x0, 112, 0
|
||||
RET_ReallyLR implicit $x0
|
||||
...
|
||||
|
||||
# CHECK-LABEL: name: test_STG_post_same_reg
|
||||
# CHECK: STGPostIndex $x1, $x0, 7
|
||||
name: test_STG_post_same_reg
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $x0, $x1
|
||||
|
||||
STGOffset $x1, $x0, 0
|
||||
$x0 = ADDXri $x0, 112, 0
|
||||
RET_ReallyLR implicit $x0
|
||||
...
|
||||
|
||||
# CHECK-LABEL: name: test_STG_post_unaligned
|
||||
# CHECK: STGOffset $x0, $x0, 0
|
||||
# CHECK-NEXT: ADDXri $x0, 8, 0
|
||||
name: test_STG_post_unaligned
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $x0
|
||||
|
||||
STGOffset $x0, $x0, 0
|
||||
$x0 = ADDXri $x0, 8, 0
|
||||
RET_ReallyLR implicit $x0
|
||||
...
|
||||
|
||||
# CHECK-LABEL: name: test_STG_post2
|
||||
# CHECK: STGPostIndex $x0, $x0, -256
|
||||
name: test_STG_post2
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $x0
|
||||
|
||||
STGOffset $x0, $x0, 0
|
||||
$x0 = SUBXri $x0, 4096, 0
|
||||
RET_ReallyLR implicit $x0
|
||||
...
|
||||
|
||||
# CHECK-LABEL: name: test_STG_post3
|
||||
# CHECK: STGOffset $x0, $x0, 0
|
||||
# CHECK-NEXT: SUBXri $x0, 4112, 0
|
||||
name: test_STG_post3
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $x0
|
||||
|
||||
STGOffset $x0, $x0, 0
|
||||
$x0 = SUBXri $x0, 4112, 0
|
||||
RET_ReallyLR implicit $x0
|
||||
...
|
||||
|
||||
# CHECK-LABEL: name: test_STG_post4
|
||||
# CHECK: STGPostIndex $x0, $x0, 255
|
||||
name: test_STG_post4
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $x0
|
||||
|
||||
STGOffset $x0, $x0, 0
|
||||
$x0 = ADDXri $x0, 4080, 0
|
||||
RET_ReallyLR implicit $x0
|
||||
...
|
||||
|
||||
# CHECK-LABEL: name: test_STG_post5
|
||||
# CHECK: STGOffset $x0, $x0, 0
|
||||
# CHECK-NEXT: ADDXri $x0, 4096, 0
|
||||
name: test_STG_post5
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $x0
|
||||
|
||||
STGOffset $x0, $x0, 0
|
||||
$x0 = ADDXri $x0, 4096, 0
|
||||
RET_ReallyLR implicit $x0
|
||||
...
|
||||
|
||||
### The rest of ST*G variants.
|
||||
|
||||
# CHECK-LABEL: name: test_STZG_post
|
||||
# CHECK: STZGPostIndex $x0, $x0, 7
|
||||
name: test_STZG_post
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $x0
|
||||
|
||||
STZGOffset $x0, $x0, 0
|
||||
$x0 = ADDXri $x0, 112, 0
|
||||
RET_ReallyLR implicit $x0
|
||||
...
|
||||
|
||||
# CHECK-LABEL: name: test_ST2G_post
|
||||
# CHECK: ST2GPostIndex $x0, $x0, 7
|
||||
name: test_ST2G_post
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $x0
|
||||
|
||||
ST2GOffset $x0, $x0, 0
|
||||
$x0 = ADDXri $x0, 112, 0
|
||||
RET_ReallyLR implicit $x0
|
||||
...
|
||||
|
||||
# CHECK-LABEL: name: test_STZ2G_post
|
||||
# CHECK: STZ2GPostIndex $x0, $x0, 7
|
||||
name: test_STZ2G_post
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $x0
|
||||
|
||||
STZ2GOffset $x0, $x0, 0
|
||||
$x0 = ADDXri $x0, 112, 0
|
||||
RET_ReallyLR implicit $x0
|
||||
...
|
||||
|
||||
### STGP and its offset limits
|
||||
|
||||
# CHECK-LABEL: name: test_STGP_post
|
||||
# CHECK: STGPpost $x1, $x2, $x0, 7
|
||||
name: test_STGP_post
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $x0, $x1, $x2
|
||||
|
||||
STGPi $x1, $x2, $x0, 0
|
||||
$x0 = ADDXri $x0, 112, 0
|
||||
RET_ReallyLR implicit $x0
|
||||
...
|
||||
|
||||
# CHECK-LABEL: name: test_STGP_post2
|
||||
# CHECK: STGPpost $x1, $x2, $x0, -64
|
||||
name: test_STGP_post2
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $x0, $x1, $x2
|
||||
|
||||
STGPi $x1, $x2, $x0, 0
|
||||
$x0 = SUBXri $x0, 1024, 0
|
||||
RET_ReallyLR implicit $x0
|
||||
...
|
||||
|
||||
# CHECK-LABEL: name: test_STGP_post3
|
||||
# CHECK: STGPi $x1, $x2, $x0, 0
|
||||
# CHECK-NEXT: SUBXri $x0, 1040, 0
|
||||
name: test_STGP_post3
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $x0, $x1, $x2
|
||||
|
||||
STGPi $x1, $x2, $x0, 0
|
||||
$x0 = SUBXri $x0, 1040, 0
|
||||
RET_ReallyLR implicit $x0
|
||||
...
|
||||
|
||||
# CHECK-LABEL: name: test_STGP_post4
|
||||
# CHECK: STGPpost $x1, $x2, $x0, 63
|
||||
name: test_STGP_post4
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $x0, $x1, $x2
|
||||
|
||||
STGPi $x1, $x2, $x0, 0
|
||||
$x0 = ADDXri $x0, 1008, 0
|
||||
RET_ReallyLR implicit $x0
|
||||
...
|
||||
|
||||
# CHECK-LABEL: name: test_STGP_post5
|
||||
# CHECK: STGPi $x1, $x2, $x0, 0
|
||||
# CHECK-NEXT: ADDXri $x0, 1024, 0
|
||||
name: test_STGP_post5
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $x0, $x1, $x2
|
||||
|
||||
STGPi $x1, $x2, $x0, 0
|
||||
$x0 = ADDXri $x0, 1024, 0
|
||||
RET_ReallyLR implicit $x0
|
||||
...
|
||||
|
||||
### Pre-indexed forms
|
||||
|
||||
# CHECK-LABEL: name: test_STG_pre
|
||||
# CHECK: STGPreIndex $x0, $x0, 10
|
||||
name: test_STG_pre
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $x0
|
||||
|
||||
STGOffset $x0, $x0, 10
|
||||
$x0 = ADDXri $x0, 160, 0
|
||||
RET_ReallyLR implicit $x0
|
||||
...
|
||||
|
||||
# CHECK-LABEL: name: test_STGP_pre
|
||||
# CHECK: STGPpre $x1, $x2, $x0, 10
|
||||
name: test_STGP_pre
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $x0, $x1, $x2
|
||||
|
||||
STGPi $x1, $x2, $x0, 10
|
||||
$x0 = ADDXri $x0, 160, 0
|
||||
RET_ReallyLR implicit $x0
|
||||
...
|
||||
|
||||
### Pre-indexed forms with add/sub coming before the store.
|
||||
|
||||
# CHECK-LABEL: name: test_STG_pre_back
|
||||
# CHECK: STGPreIndex $x0, $x0, 2
|
||||
name: test_STG_pre_back
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $x0
|
||||
|
||||
$x0 = ADDXri $x0, 32, 0
|
||||
STGOffset $x0, $x0, 0
|
||||
RET_ReallyLR implicit $x0
|
||||
...
|
||||
|
||||
# CHECK-LABEL: name: test_STGP_pre_back
|
||||
# CHECK: STGPpre $x1, $x2, $x0, -3
|
||||
name: test_STGP_pre_back
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $x0, $x1, $x2
|
||||
|
||||
$x0 = SUBXri $x0, 48, 0
|
||||
STGPi $x1, $x2, $x0, 0
|
||||
RET_ReallyLR implicit $x0
|
||||
...
|
||||
|
||||
### STGP with source register == address register
|
||||
|
||||
# CHECK-LABEL: name: test_STGP_post_same_reg
|
||||
# CHECK: STGPpost $x0, $x0, $x0, 7
|
||||
name: test_STGP_post_same_reg
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $x0
|
||||
|
||||
STGPi $x0, $x0, $x0, 0
|
||||
$x0 = ADDXri $x0, 112, 0
|
||||
RET_ReallyLR implicit $x0
|
||||
...
|
||||
|
||||
# CHECK-LABEL: name: test_STGP_pre_same_reg
|
||||
# CHECK: STGPpre $x0, $x0, $x0, 7
|
||||
name: test_STGP_pre_same_reg
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $x0
|
||||
|
||||
STGPi $x0, $x0, $x0, 7
|
||||
$x0 = ADDXri $x0, 112, 0
|
||||
RET_ReallyLR implicit $x0
|
||||
...
|
||||
|
||||
# This case can not be merged because the source register is always read before writeback.
|
||||
# CHECK-LABEL: name: test_STGP_pre_back_same_reg
|
||||
# CHECK: SUBXri $x0, 48, 0
|
||||
# CHECK-NEXT: STGPi $x0, $x0, $x0, 0
|
||||
name: test_STGP_pre_back_same_reg
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $x0
|
||||
|
||||
$x0 = SUBXri $x0, 48, 0
|
||||
STGPi $x0, $x0, $x0, 0
|
||||
RET_ReallyLR implicit $x0
|
||||
...
|
|
@ -65,7 +65,7 @@ entry:
|
|||
define void @stgp_alloca(i64 %a, i64 %b) {
|
||||
entry:
|
||||
; CHECK-LABEL: stgp_alloca:
|
||||
; CHECK: stgp x0, x1, [sp]
|
||||
; CHECK: stgp x0, x1, [sp, #-32]!
|
||||
; CHECK: stgp x1, x0, [sp, #16]
|
||||
; CHECK: ret
|
||||
%x = alloca i8, i32 32, align 16
|
||||
|
|
Loading…
Reference in New Issue