forked from OSchip/llvm-project
[AArch64] Add load/store pair instructions to getMemOpBaseRegImmOfsWidth().
This improves AA in the MI schduler when reason about paired instructions. Phabricator Revision: http://reviews.llvm.org/D17098 PR26358 llvm-svn: 266462
This commit is contained in:
parent
74cba6427a
commit
1fbe9bcab4
|
@ -1466,9 +1466,15 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
|
|||
const TargetRegisterInfo *TRI) const {
|
||||
assert(LdSt->mayLoadOrStore() && "Expected a memory operation.");
|
||||
// Handle only loads/stores with base register followed by immediate offset.
|
||||
if (LdSt->getNumExplicitOperands() != 3)
|
||||
return false;
|
||||
if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm())
|
||||
if (LdSt->getNumExplicitOperands() == 3) {
|
||||
// Non-paired instruction (e.g., ldr x1, [x0, #8]).
|
||||
if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm())
|
||||
return false;
|
||||
} else if (LdSt->getNumExplicitOperands() == 4) {
|
||||
// Paired instruction (e.g., ldp x1, x2, [x0, #8]).
|
||||
if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isReg() || !LdSt->getOperand(3).isImm())
|
||||
return false;
|
||||
} else
|
||||
return false;
|
||||
|
||||
// Offset is calculated as the immediate operand multiplied by the scaling factor.
|
||||
|
@ -1515,16 +1521,45 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
|
|||
Width = 1;
|
||||
Scale = 1;
|
||||
break;
|
||||
case AArch64::LDPQi:
|
||||
case AArch64::LDNPQi:
|
||||
case AArch64::STPQi:
|
||||
case AArch64::STNPQi:
|
||||
Scale = 16;
|
||||
Width = 32;
|
||||
break;
|
||||
case AArch64::LDRQui:
|
||||
case AArch64::STRQui:
|
||||
Scale = Width = 16;
|
||||
break;
|
||||
case AArch64::LDPXi:
|
||||
case AArch64::LDPDi:
|
||||
case AArch64::LDNPXi:
|
||||
case AArch64::LDNPDi:
|
||||
case AArch64::STPXi:
|
||||
case AArch64::STPDi:
|
||||
case AArch64::STNPXi:
|
||||
case AArch64::STNPDi:
|
||||
Scale = 8;
|
||||
Width = 16;
|
||||
break;
|
||||
case AArch64::LDRXui:
|
||||
case AArch64::LDRDui:
|
||||
case AArch64::STRXui:
|
||||
case AArch64::STRDui:
|
||||
Scale = Width = 8;
|
||||
break;
|
||||
case AArch64::LDPWi:
|
||||
case AArch64::LDPSi:
|
||||
case AArch64::LDNPWi:
|
||||
case AArch64::LDNPSi:
|
||||
case AArch64::STPWi:
|
||||
case AArch64::STPSi:
|
||||
case AArch64::STNPWi:
|
||||
case AArch64::STNPSi:
|
||||
Scale = 4;
|
||||
Width = 8;
|
||||
break;
|
||||
case AArch64::LDRWui:
|
||||
case AArch64::LDRSui:
|
||||
case AArch64::LDRSWui:
|
||||
|
@ -1546,8 +1581,14 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
|
|||
break;
|
||||
}
|
||||
|
||||
BaseReg = LdSt->getOperand(1).getReg();
|
||||
Offset = LdSt->getOperand(2).getImm() * Scale;
|
||||
if (LdSt->getNumExplicitOperands() == 3) {
|
||||
BaseReg = LdSt->getOperand(1).getReg();
|
||||
Offset = LdSt->getOperand(2).getImm() * Scale;
|
||||
} else {
|
||||
assert(LdSt->getNumExplicitOperands() == 4 && "invalid number of operands");
|
||||
BaseReg = LdSt->getOperand(2).getReg();
|
||||
Offset = LdSt->getOperand(3).getImm() * Scale;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: llc < %s -mtriple=arm64-apple-darwin -enable-misched=false | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=arm64-apple-darwin -enable-misched=false -enable-post-misched=false | FileCheck %s
|
||||
|
||||
; rdar://12713765
|
||||
; Make sure we are not creating stack objects that are assumed to be 64-byte
|
||||
|
|
|
@ -109,3 +109,37 @@ define double @stp_double_aa_after(double %d0, double %a, double %b, double* noc
|
|||
store double %b, double* %add.ptr, align 8
|
||||
ret double %tmp
|
||||
}
|
||||
|
||||
; Check that the stores %c and %d are paired after the fadd instruction,
|
||||
; and then the stores %a and %d are paired after proving that they do not
|
||||
; depend on the the (%c, %d) pair.
|
||||
;
|
||||
; CHECK-LABEL: st1:
|
||||
; CHECK: stp q0, q1, [x{{[0-9]+}}]
|
||||
; CHECK: fadd
|
||||
; CHECK: stp q2, q0, [x{{[0-9]+}}, #32]
|
||||
define void @st1(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, float* %base, i64 %index) {
|
||||
entry:
|
||||
%a0 = getelementptr inbounds float, float* %base, i64 %index
|
||||
%b0 = getelementptr float, float* %a0, i64 4
|
||||
%c0 = getelementptr float, float* %a0, i64 8
|
||||
%d0 = getelementptr float, float* %a0, i64 12
|
||||
|
||||
%a1 = bitcast float* %a0 to <4 x float>*
|
||||
%b1 = bitcast float* %b0 to <4 x float>*
|
||||
%c1 = bitcast float* %c0 to <4 x float>*
|
||||
%d1 = bitcast float* %d0 to <4 x float>*
|
||||
|
||||
store <4 x float> %c, <4 x float> * %c1, align 4
|
||||
store <4 x float> %a, <4 x float> * %a1, align 4
|
||||
|
||||
; This fadd forces the compiler to pair %c and %e after fadd, and leave the
|
||||
; stores %a and %b separated by a stp. The dependence analysis needs then to
|
||||
; prove that it is safe to move %b past the stp to be paired with %a.
|
||||
%e = fadd fast <4 x float> %d, %a
|
||||
|
||||
store <4 x float> %e, <4 x float>* %d1, align 4
|
||||
store <4 x float> %b, <4 x float>* %b1, align 4
|
||||
|
||||
ret void
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue