[AArch64] Refactor the Exynos scheduling predicates

Refactor the scheduling predicates based on `MCInstPredicate`.  In this
case, for the Exynos processors.

Differential revision: https://reviews.llvm.org/D55345

llvm-svn: 348774
This commit is contained in:
Evandro Menezes 2018-12-10 17:17:26 +00:00
parent 1f6b247717
commit 53f0d41dc4
10 changed files with 297 additions and 380 deletions

View File

@ -39,7 +39,7 @@
// processor scheduling model.
//
// The `MCInstPredicateExample` definition above is equivalent (and therefore
// could replace) the following definition from the ExynosM3 model (see
// could replace) the following definition from a previous ExynosM3 model (see
// AArch64SchedExynosM3.td):
//
// def M3BranchLinkFastPred : SchedPredicate<[{

View File

@ -371,6 +371,7 @@ include "AArch64CallingConvention.td"
include "AArch64Schedule.td"
include "AArch64InstrInfo.td"
include "AArch64SchedPredicates.td"
include "AArch64SchedPredExynos.td"
def AArch64InstrInfo : InstrInfo;

View File

@ -705,7 +705,7 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
// Secondly, check cases specific to sub-targets.
if (Subtarget.hasExynosCheapAsMoveHandling()) {
if (isExynosResetFast(MI) || isExynosShiftExtFast(MI))
if (isExynosCheapAsMove(MI))
return true;
return MI.isAsCheapAsAMove();
@ -759,213 +759,6 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
llvm_unreachable("Unknown opcode to check as cheap as a move!");
}
bool AArch64InstrInfo::isExynosResetFast(const MachineInstr &MI) {
unsigned Reg, Imm, Shift;
switch (MI.getOpcode()) {
default:
return false;
// MOV Rd, SP
case AArch64::ADDWri:
case AArch64::ADDXri:
if (!MI.getOperand(1).isReg() || !MI.getOperand(2).isImm())
return false;
Reg = MI.getOperand(1).getReg();
Imm = MI.getOperand(2).getImm();
return ((Reg == AArch64::WSP || Reg == AArch64::SP) && Imm == 0);
// Literal
case AArch64::ADR:
case AArch64::ADRP:
return true;
// MOVI Vd, #0
case AArch64::MOVID:
case AArch64::MOVIv8b_ns:
case AArch64::MOVIv2d_ns:
case AArch64::MOVIv16b_ns:
Imm = MI.getOperand(1).getImm();
return (Imm == 0);
// MOVI Vd, #0
case AArch64::MOVIv2i32:
case AArch64::MOVIv4i16:
case AArch64::MOVIv4i32:
case AArch64::MOVIv8i16:
Imm = MI.getOperand(1).getImm();
Shift = MI.getOperand(2).getImm();
return (Imm == 0 && Shift == 0);
// MOV Rd, Imm
case AArch64::MOVNWi:
case AArch64::MOVNXi:
// MOV Rd, Imm
case AArch64::MOVZWi:
case AArch64::MOVZXi:
return true;
// MOV Rd, Imm
case AArch64::ORRWri:
case AArch64::ORRXri:
if (!MI.getOperand(1).isReg())
return false;
Reg = MI.getOperand(1).getReg();
Imm = MI.getOperand(2).getImm();
return ((Reg == AArch64::WZR || Reg == AArch64::XZR) && Imm == 0);
// MOV Rd, Rm
case AArch64::ORRWrs:
case AArch64::ORRXrs:
if (!MI.getOperand(1).isReg())
return false;
Reg = MI.getOperand(1).getReg();
Imm = MI.getOperand(3).getImm();
Shift = AArch64_AM::getShiftValue(Imm);
return ((Reg == AArch64::WZR || Reg == AArch64::XZR) && Shift == 0);
}
}
bool AArch64InstrInfo::isExynosLdStExtFast(const MachineInstr &MI) {
unsigned Imm;
AArch64_AM::ShiftExtendType Ext;
switch (MI.getOpcode()) {
default:
return false;
// WriteLD
case AArch64::PRFMroW:
case AArch64::PRFMroX:
// WriteLDIdx
case AArch64::LDRBBroW:
case AArch64::LDRBBroX:
case AArch64::LDRHHroW:
case AArch64::LDRHHroX:
case AArch64::LDRSBWroW:
case AArch64::LDRSBWroX:
case AArch64::LDRSBXroW:
case AArch64::LDRSBXroX:
case AArch64::LDRSHWroW:
case AArch64::LDRSHWroX:
case AArch64::LDRSHXroW:
case AArch64::LDRSHXroX:
case AArch64::LDRSWroW:
case AArch64::LDRSWroX:
case AArch64::LDRWroW:
case AArch64::LDRWroX:
case AArch64::LDRXroW:
case AArch64::LDRXroX:
case AArch64::LDRBroW:
case AArch64::LDRBroX:
case AArch64::LDRDroW:
case AArch64::LDRDroX:
case AArch64::LDRHroW:
case AArch64::LDRHroX:
case AArch64::LDRSroW:
case AArch64::LDRSroX:
// WriteSTIdx
case AArch64::STRBBroW:
case AArch64::STRBBroX:
case AArch64::STRHHroW:
case AArch64::STRHHroX:
case AArch64::STRWroW:
case AArch64::STRWroX:
case AArch64::STRXroW:
case AArch64::STRXroX:
case AArch64::STRBroW:
case AArch64::STRBroX:
case AArch64::STRDroW:
case AArch64::STRDroX:
case AArch64::STRHroW:
case AArch64::STRHroX:
case AArch64::STRSroW:
case AArch64::STRSroX:
Imm = MI.getOperand(3).getImm();
Ext = AArch64_AM::getMemExtendType(Imm);
return (Ext == AArch64_AM::SXTX || Ext == AArch64_AM::UXTX);
}
}
bool AArch64InstrInfo::isExynosShiftExtFast(const MachineInstr &MI) {
unsigned Imm, Shift;
AArch64_AM::ShiftExtendType Ext = AArch64_AM::UXTX;
switch (MI.getOpcode()) {
default:
return false;
// WriteI
case AArch64::ADDSWri:
case AArch64::ADDSXri:
case AArch64::ADDWri:
case AArch64::ADDXri:
case AArch64::SUBSWri:
case AArch64::SUBSXri:
case AArch64::SUBWri:
case AArch64::SUBXri:
return true;
// WriteISReg
case AArch64::ADDSWrs:
case AArch64::ADDSXrs:
case AArch64::ADDWrs:
case AArch64::ADDXrs:
case AArch64::ANDSWrs:
case AArch64::ANDSXrs:
case AArch64::ANDWrs:
case AArch64::ANDXrs:
case AArch64::BICSWrs:
case AArch64::BICSXrs:
case AArch64::BICWrs:
case AArch64::BICXrs:
case AArch64::EONWrs:
case AArch64::EONXrs:
case AArch64::EORWrs:
case AArch64::EORXrs:
case AArch64::ORNWrs:
case AArch64::ORNXrs:
case AArch64::ORRWrs:
case AArch64::ORRXrs:
case AArch64::SUBSWrs:
case AArch64::SUBSXrs:
case AArch64::SUBWrs:
case AArch64::SUBXrs:
Imm = MI.getOperand(3).getImm();
Shift = AArch64_AM::getShiftValue(Imm);
Ext = AArch64_AM::getShiftType(Imm);
return (Shift == 0 || (Shift <= 3 && Ext == AArch64_AM::LSL));
// WriteIEReg
case AArch64::ADDSWrx:
case AArch64::ADDSXrx:
case AArch64::ADDWrx:
case AArch64::ADDXrx:
case AArch64::SUBSWrx:
case AArch64::SUBSXrx:
case AArch64::SUBWrx:
case AArch64::SUBXrx:
Ext = AArch64_AM::UXTW;
LLVM_FALLTHROUGH;
case AArch64::ADDSXrx64:
case AArch64::ADDXrx64:
case AArch64::SUBSXrx64:
case AArch64::SUBXrx64:
Imm = MI.getOperand(3).getImm();
Shift = AArch64_AM::getArithShiftValue(Imm);
return (Shift == 0 ||
(Shift <= 3 && Ext == AArch64_AM::getArithExtendType(Imm)));
}
}
bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) {
switch (MI.getOpcode()) {
default:

View File

@ -241,15 +241,6 @@ public:
MachineBasicBlock::iterator &It, MachineFunction &MF,
const outliner::Candidate &C) const override;
bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override;
/// Returns true if the instruction sets a constant value that can be
/// executed more efficiently.
static bool isExynosResetFast(const MachineInstr &MI);
/// Returns true if the load or store has an extension that can be executed
/// more efficiently.
static bool isExynosLdStExtFast(const MachineInstr &MI);
/// Returns true if the instruction has a constant shift left or extension
/// that can be executed more efficiently.
static bool isExynosShiftExtFast(const MachineInstr &MI);
/// Returns true if the instruction has a shift by immediate that can be
/// executed in one cycle less.
static bool isFalkorShiftExtFast(const MachineInstr &MI);

View File

@ -61,14 +61,6 @@ def M1UnitALU : ProcResGroup<[M1UnitA,
def M1UnitNALU : ProcResGroup<[M1UnitNAL0,
M1UnitNAL1]>; // All simple vector
//===----------------------------------------------------------------------===//
// Predicates.
def M1BranchLinkPred : SchedPredicate<[{MI->getOpcode() == AArch64::BLR &&
MI->getOperand(0).getReg() != AArch64::LR}]>;
def M1LdStExtPred : SchedPredicate<[{TII->isExynosLdStExtFast(*MI)}]>;
def M1ShiftExtPred : SchedPredicate<[{TII->isExynosShiftExtFast(*MI)}]>;
//===----------------------------------------------------------------------===//
// Coarse scheduling model.
@ -86,14 +78,16 @@ def M1WriteAC : SchedWriteRes<[M1UnitALU,
def M1WriteAD : SchedWriteRes<[M1UnitALU,
M1UnitC]> { let Latency = 2;
let NumMicroOps = 2; }
def M1WriteAX : SchedWriteVariant<[SchedVar<M1ShiftExtPred, [M1WriteA1]>,
SchedVar<NoSchedPred, [M1WriteAA]>]>;
def M1WriteAX : SchedWriteVariant<[SchedVar<ExynosExtPred, [M1WriteA1]>,
SchedVar<NoSchedPred, [M1WriteAA]>]>;
def M1WriteAY : SchedWriteVariant<[SchedVar<ExynosShiftPred, [M1WriteA1]>,
SchedVar<NoSchedPred, [M1WriteAA]>]>;
def M1WriteC1 : SchedWriteRes<[M1UnitC]> { let Latency = 1; }
def M1WriteC2 : SchedWriteRes<[M1UnitC]> { let Latency = 2; }
def M1WriteB1 : SchedWriteRes<[M1UnitB]> { let Latency = 1; }
def M1WriteBX : SchedWriteVariant<[SchedVar<M1BranchLinkPred, [M1WriteAB]>,
SchedVar<NoSchedPred, [M1WriteAC]>]>;
def M1WriteBX : SchedWriteVariant<[SchedVar<ExynosBranchLinkLRPred, [M1WriteAC]>,
SchedVar<NoSchedPred, [M1WriteAB]>]>;
def M1WriteL5 : SchedWriteRes<[M1UnitL]> { let Latency = 5; }
def M1WriteL6 : SchedWriteRes<[M1UnitL]> { let Latency = 6; }
@ -111,40 +105,27 @@ def M1WriteLD : SchedWriteRes<[M1UnitL,
let ResourceCycles = [2, 1]; }
def M1WriteLH : SchedWriteRes<[]> { let Latency = 5;
let NumMicroOps = 0; }
def M1WriteLX : SchedWriteVariant<[SchedVar<M1LdStExtPred, [M1WriteL5]>,
SchedVar<NoSchedPred, [M1WriteLC]>]>;
def M1WriteLY : SchedWriteVariant<[SchedVar<M1LdStExtPred, [M1WriteL5]>,
SchedVar<NoSchedPred, [M1WriteLD]>]>;
def M1WriteLX : SchedWriteVariant<[SchedVar<ScaledIdxPred, [M1WriteLC]>,
SchedVar<NoSchedPred, [M1WriteL5]>]>;
def M1WriteS1 : SchedWriteRes<[M1UnitS]> { let Latency = 1; }
def M1WriteS3 : SchedWriteRes<[M1UnitS]> { let Latency = 3; }
def M1WriteS4 : SchedWriteRes<[M1UnitS]> { let Latency = 4; }
def M1WriteSA : SchedWriteRes<[M1UnitS,
M1UnitFST,
M1UnitS,
M1UnitFST]> { let Latency = 1;
let NumMicroOps = 2; }
def M1WriteSB : SchedWriteRes<[M1UnitS,
M1UnitFST,
M1UnitA]> { let Latency = 3;
let NumMicroOps = 2; }
def M1WriteSC : SchedWriteRes<[M1UnitS,
def M1WriteSB : SchedWriteRes<[M1UnitS,
M1UnitFST,
M1UnitS,
M1UnitFST,
M1UnitA]> { let Latency = 3;
let NumMicroOps = 3; }
def M1WriteSD : SchedWriteRes<[M1UnitS,
M1UnitFST,
M1UnitA]> { let Latency = 1;
let NumMicroOps = 2; }
def M1WriteSE : SchedWriteRes<[M1UnitS,
def M1WriteSC : SchedWriteRes<[M1UnitS,
M1UnitA]> { let Latency = 2;
let NumMicroOps = 2; }
def M1WriteSX : SchedWriteVariant<[SchedVar<M1LdStExtPred, [M1WriteS1]>,
SchedVar<NoSchedPred, [M1WriteSE]>]>;
def M1WriteSY : SchedWriteVariant<[SchedVar<M1LdStExtPred, [M1WriteS1]>,
SchedVar<NoSchedPred, [M1WriteSB]>]>;
def M1WriteSX : SchedWriteVariant<[SchedVar<ScaledIdxPred, [M1WriteSC]>,
SchedVar<NoSchedPred, [M1WriteS1]>]>;
def M1ReadAdrBase : SchedReadVariant<[SchedVar<ScaledIdxPred, [ReadDefault]>,
SchedVar<NoSchedPred, [ReadDefault]>]>;
@ -415,9 +396,9 @@ def M1WriteVSTH : SchedWriteRes<[M1UnitNALU,
M1UnitS,
M1UnitFST,
M1UnitFST,
M1UnitFST]> { let Latency = 14;
let NumMicroOps = 4;
let ResourceCycles = [1, 7, 1, 7, 1]; }
M1UnitFST]> { let Latency = 14;
let NumMicroOps = 4;
let ResourceCycles = [1, 7, 1, 7, 1]; }
def M1WriteVSTI : SchedWriteRes<[M1UnitNALU,
M1UnitS,
M1UnitFST,
@ -428,9 +409,17 @@ def M1WriteVSTI : SchedWriteRes<[M1UnitNALU,
M1UnitS,
M1UnitFST,
M1UnitFST,
M1UnitFST]> { let Latency = 17;
let NumMicroOps = 7;
let ResourceCycles = [1, 7, 1, 7, 1, 1, 1, 1, 1, 1, 1]; }
M1UnitFST]> { let Latency = 17;
let NumMicroOps = 7;
let ResourceCycles = [1, 7, 1, 7, 1, 1, 1, 1, 1, 1, 1]; }
// Special cases.
def M1WriteAES : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; }
def M1WriteCOPY : SchedWriteVariant<[SchedVar<ExynosFPPred, [M1WriteNALU1]>,
SchedVar<NoSchedPred, [M1WriteA1]>]>;
// Fast forwarding.
def M1ReadAES : SchedReadAdvance<1, [M1WriteAES]>;
// Branch instructions
def : InstRW<[M1WriteB1], (instrs Bcc)>;
@ -440,21 +429,34 @@ def : InstRW<[M1WriteC1], (instregex "^CBN?Z[WX]")>;
def : InstRW<[M1WriteAD], (instregex "^TBN?Z[WX]")>;
// Arithmetic and logical integer instructions.
def : InstRW<[M1WriteA1], (instrs COPY)>;
def : InstRW<[M1WriteAX], (instregex ".+r[sx](64)?$")>;
def : InstRW<[M1WriteAX], (instregex ".+rx(64)?$")>;
def : InstRW<[M1WriteAY], (instregex ".+rs$")>;
// Move instructions.
def : InstRW<[M1WriteCOPY], (instrs COPY)>;
// Divide and multiply instructions.
// Miscellaneous instructions.
// Load instructions.
def : InstRW<[M1WriteLC,
ReadAdrBase], (instregex "^LDR(BB|HH|SBW|SBX|SHW|SWX|SW|W|X)roW")>;
def : InstRW<[M1WriteL5,
ReadAdrBase], (instregex "^LDR(BB|HH|SBW|SBX|SHW|SWX|SW|W|X)roX")>;
def : InstRW<[M1WriteLB,
WriteLDHi,
WriteAdr], (instregex "^LDP(SW|W|X)(post|pre)")>;
def : InstRW<[M1WriteLX,
ReadAdrBase], (instregex "^PRFMro[WX]")>;
def : InstRW<[M1WriteLC,
ReadAdrBase], (instrs PRFMroW)>;
def : InstRW<[M1WriteL5,
ReadAdrBase], (instrs PRFMroX)>;
// Store instructions.
def : InstRW<[M1WriteSC,
ReadAdrBase], (instregex "^STR(BB|HH|W|X)roW")>;
def : InstRW<[WriteST,
ReadAdrBase], (instregex "^STR(BB|HH|W|X)roX")>;
// FP data instructions.
def : InstRW<[M1WriteNALU1], (instregex "^F(ABS|NEG)[DS]r")>;
@ -488,8 +490,10 @@ def : InstRW<[WriteVLD], (instregex "^LDUR[BDHSQ]i")>;
def : InstRW<[WriteVLD,
WriteAdr], (instregex "^LDR[BDHSQ](post|pre)")>;
def : InstRW<[WriteVLD], (instregex "^LDR[BDHSQ]ui")>;
def : InstRW<[M1WriteLY,
ReadAdrBase], (instregex "^LDR[BDHS]ro[WX]")>;
def : InstRW<[M1WriteLD,
ReadAdrBase], (instregex "^LDR[BDHS]roW")>;
def : InstRW<[WriteVLD,
ReadAdrBase], (instregex "^LDR[BDHS]roX")>;
def : InstRW<[M1WriteLD,
ReadAdrBase], (instregex "^LDRQro[WX]")>;
def : InstRW<[WriteVLD,
@ -508,14 +512,16 @@ def : InstRW<[WriteVST], (instregex "^STUR[BDHSQ]i")>;
def : InstRW<[WriteVST,
WriteAdr], (instregex "^STR[BDHSQ](post|pre)")>;
def : InstRW<[WriteVST], (instregex "^STR[BDHSQ]ui")>;
def : InstRW<[M1WriteSY,
ReadAdrBase], (instregex "^STR[BDHS]ro[WX]")>;
def : InstRW<[M1WriteSB,
def : InstRW<[M1WriteSA,
ReadAdrBase], (instregex "^STR[BDHS]roW")>;
def : InstRW<[WriteVST,
ReadAdrBase], (instregex "^STR[BDHS]roX")>;
def : InstRW<[M1WriteSA,
ReadAdrBase], (instregex "^STRQro[WX]")>;
def : InstRW<[WriteVST], (instregex "^STN?P[DSQ]i")>;
def : InstRW<[WriteVST,
WriteAdr], (instregex "^STP[DS](post|pre)")>;
def : InstRW<[M1WriteSC,
def : InstRW<[M1WriteSB,
WriteAdr], (instregex "^STPQ(post|pre)")>;
// ASIMD instructions.
@ -609,21 +615,21 @@ def : InstRW<[M1WriteVLDE], (instregex "LD1i(64)$")>;
def : InstRW<[M1WriteVLDE,
WriteAdr], (instregex "LD1i(64)_POST$")>;
def : InstRW<[M1WriteL5], (instregex "LD1Rv(8b|4h|2s)$")>;
def : InstRW<[M1WriteL5,
def : InstRW<[WriteVLD], (instregex "LD1Rv(8b|4h|2s)$")>;
def : InstRW<[WriteVLD,
WriteAdr], (instregex "LD1Rv(8b|4h|2s)_POST$")>;
def : InstRW<[M1WriteL5], (instregex "LD1Rv(1d)$")>;
def : InstRW<[M1WriteL5,
def : InstRW<[WriteVLD], (instregex "LD1Rv(1d)$")>;
def : InstRW<[WriteVLD,
WriteAdr], (instregex "LD1Rv(1d)_POST$")>;
def : InstRW<[M1WriteL5], (instregex "LD1Rv(16b|8h|4s|2d)$")>;
def : InstRW<[M1WriteL5,
def : InstRW<[WriteVLD], (instregex "LD1Rv(16b|8h|4s|2d)$")>;
def : InstRW<[WriteVLD,
WriteAdr], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>;
def : InstRW<[M1WriteL5], (instregex "LD1Onev(8b|4h|2s|1d)$")>;
def : InstRW<[M1WriteL5,
def : InstRW<[WriteVLD], (instregex "LD1Onev(8b|4h|2s|1d)$")>;
def : InstRW<[WriteVLD,
WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>;
def : InstRW<[M1WriteL5], (instregex "LD1Onev(16b|8h|4s|2d)$")>;
def : InstRW<[M1WriteL5,
def : InstRW<[WriteVLD], (instregex "LD1Onev(16b|8h|4s|2d)$")>;
def : InstRW<[WriteVLD,
WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>;
def : InstRW<[M1WriteVLDA], (instregex "LD1Twov(8b|4h|2s|1d)$")>;
def : InstRW<[M1WriteVLDA,
@ -831,8 +837,6 @@ def : InstRW<[M1WriteVSTI,
WriteAdr], (instregex "ST4Fourv(2d)_POST$")>;
// Cryptography instructions.
def M1WriteAES : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; }
def M1ReadAES : SchedReadAdvance<1, [M1WriteAES]>;
def : InstRW<[M1WriteAES], (instregex "^AES[DE]")>;
def : InstRW<[M1WriteAES, M1ReadAES], (instregex "^AESI?MC")>;

View File

@ -103,20 +103,6 @@ def M3UnitNSHF : ProcResGroup<[M3UnitNSHF0,
M3UnitNSHF1,
M3UnitNSHF2]>;
//===----------------------------------------------------------------------===//
// Predicates.
def M3BranchLinkPred : SchedPredicate<[{MI->getOpcode() == AArch64::BLR &&
MI->getOperand(0).isReg() &&
MI->getOperand(0).getReg() != AArch64::LR}]>;
def M3ResetPred : SchedPredicate<[{TII->isExynosResetFast(*MI)}]>;
def M3RotatePred : SchedPredicate<[{(MI->getOpcode() == AArch64::EXTRWrri ||
MI->getOpcode() == AArch64::EXTRXrri) &&
MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
MI->getOperand(1).getReg() == MI->getOperand(2).getReg()}]>;
def M3LdStExtPred : SchedPredicate<[{TII->isExynosLdStExtFast(*MI)}]>;
def M3ShiftExtPred : SchedPredicate<[{TII->isExynosShiftExtFast(*MI)}]>;
//===----------------------------------------------------------------------===//
// Coarse scheduling model.
@ -138,15 +124,23 @@ def M3WriteAD : SchedWriteRes<[M3UnitALU,
let NumMicroOps = 2; }
def M3WriteC1 : SchedWriteRes<[M3UnitC]> { let Latency = 1; }
def M3WriteC2 : SchedWriteRes<[M3UnitC]> { let Latency = 2; }
def M3WriteAX : SchedWriteVariant<[SchedVar<M3ResetPred, [M3WriteZ0]>,
SchedVar<M3ShiftExtPred, [M3WriteA1]>,
SchedVar<NoSchedPred, [M3WriteAA]>]>;
def M3WriteAY : SchedWriteVariant<[SchedVar<M3RotatePred, [M3WriteA1]>,
SchedVar<NoSchedPred, [M3WriteAA]>]>;
def M3WriteAU : SchedWriteVariant<[SchedVar<IsCopyIdiomPred, [M3WriteZ0]>,
SchedVar<ExynosShiftPred, [M3WriteA1]>,
SchedVar<NoSchedPred, [M3WriteAA]>]>;
def M3WriteAV : SchedWriteVariant<[SchedVar<IsCopyIdiomPred, [M3WriteZ0]>,
SchedVar<NoSchedPred, [M3WriteAA]>]>;
def M3WriteAW : SchedWriteVariant<[SchedVar<IsZeroIdiomPred, [M3WriteZ0]>,
SchedVar<NoSchedPred, [M3WriteAA]>]>;
def M3WriteAX : SchedWriteVariant<[SchedVar<ExynosExtPred, [M3WriteA1]>,
SchedVar<NoSchedPred, [M3WriteAA]>]>;
def M3WriteAY : SchedWriteVariant<[SchedVar<ExynosRotateRightImmPred, [M3WriteA1]>,
SchedVar<NoSchedPred, [M3WriteAA]>]>;
def M3WriteAZ : SchedWriteVariant<[SchedVar<ExynosShiftPred, [M3WriteA1]>,
SchedVar<NoSchedPred, [M3WriteAA]>]>;
def M3WriteB1 : SchedWriteRes<[M3UnitB]> { let Latency = 1; }
def M3WriteBX : SchedWriteVariant<[SchedVar<M3BranchLinkPred, [M3WriteAB]>,
SchedVar<NoSchedPred, [M3WriteAC]>]>;
def M3WriteBX : SchedWriteVariant<[SchedVar<ExynosBranchLinkLRPred, [M3WriteAC]>,
SchedVar<NoSchedPred, [M3WriteAB]>]>;
def M3WriteL4 : SchedWriteRes<[M3UnitL]> { let Latency = 4; }
def M3WriteL5 : SchedWriteRes<[M3UnitL]> { let Latency = 5; }
@ -163,28 +157,24 @@ def M3WriteLC : SchedWriteRes<[M3UnitA,
def M3WriteLD : SchedWriteRes<[M3UnitA,
M3UnitL]> { let Latency = 4;
let NumMicroOps = 2; }
def M3WriteLE : SchedWriteRes<[M3UnitA,
M3UnitL]> { let Latency = 6;
let NumMicroOps = 2; }
def M3WriteLH : SchedWriteRes<[]> { let Latency = 5;
let NumMicroOps = 0; }
def M3WriteLX : SchedWriteVariant<[SchedVar<M3LdStExtPred, [M3WriteL5]>,
SchedVar<NoSchedPred, [M3WriteLB]>]>;
def M3WriteLX : SchedWriteVariant<[SchedVar<ScaledIdxPred, [M3WriteLB]>,
SchedVar<NoSchedPred, [M3WriteL5]>]>;
def M3WriteS1 : SchedWriteRes<[M3UnitS]> { let Latency = 1; }
def M3WriteSA : SchedWriteRes<[M3UnitA,
M3UnitS,
M3UnitFST]> { let Latency = 2;
M3UnitFST]> { let Latency = 3;
let NumMicroOps = 2; }
def M3WriteSB : SchedWriteRes<[M3UnitA,
M3UnitS]> { let Latency = 1;
let NumMicroOps = 2; }
def M3WriteSC : SchedWriteRes<[M3UnitA,
M3UnitS]> { let Latency = 2;
let NumMicroOps = 2; }
def M3WriteSX : SchedWriteVariant<[SchedVar<M3LdStExtPred, [M3WriteS1]>,
SchedVar<NoSchedPred, [M3WriteSB]>]>;
def M3WriteSY : SchedWriteVariant<[SchedVar<M3LdStExtPred, [M3WriteS1]>,
SchedVar<NoSchedPred, [M3WriteSC]>]>;
def M3WriteSX : SchedWriteVariant<[SchedVar<ScaledIdxPred, [M3WriteSB]>,
SchedVar<NoSchedPred, [M3WriteS1]>]>;
def M3ReadAdrBase : SchedReadVariant<[SchedVar<ScaledIdxPred, [ReadDefault]>,
SchedVar<NoSchedPred, [ReadDefault]>]>;
@ -214,9 +204,7 @@ def : WriteRes<WriteIM64, [M3UnitC]> { let Latency = 4;
let ResourceCycles = [2]; }
// Miscellaneous instructions.
def : WriteRes<WriteExtr, [M3UnitALU,
M3UnitALU]> { let Latency = 1;
let NumMicroOps = 2; }
def : SchedAlias<WriteExtr, M3WriteAY>;
// Addressing modes.
def : WriteRes<WriteAdr, []> { let Latency = 1;
@ -479,11 +467,15 @@ def M3WriteVSTI : SchedWriteRes<[M3UnitNALU,
// Special cases.
def M3WriteAES : SchedWriteRes<[M3UnitNCRY]> { let Latency = 1; }
def M3WriteCOPY : SchedWriteVariant<[SchedVar<ExynosFPPred, [M3WriteNALU1]>,
SchedVar<NoSchedPred, [M3WriteZ0]>]>;
def M3WriteMOVI : SchedWriteVariant<[SchedVar<IsZeroFPIdiomPred, [M3WriteZ0]>,
SchedVar<NoSchedPred, [M3WriteNALU1]>]>;
// Fast forwarding.
def M3ReadAES : SchedReadAdvance<1, [M3WriteAES]>;
def M3ReadFMAC : SchedReadAdvance<1, [M3WriteFMAC4,
M3WriteFMAC5]>;
def M3WriteMOVI : SchedWriteVariant<[SchedVar<M3ResetPred, [M3WriteZ0]>,
SchedVar<NoSchedPred, [M3WriteNALU1]>]>;
def M3ReadNMUL : SchedReadAdvance<1, [M3WriteNMUL3]>;
// Branch instructions
@ -494,29 +486,40 @@ def : InstRW<[M3WriteC1], (instregex "^CBN?Z[WX]")>;
def : InstRW<[M3WriteAD], (instregex "^TBN?Z[WX]")>;
// Arithmetic and logical integer instructions.
def : InstRW<[M3WriteA1], (instrs COPY)>;
def : InstRW<[M3WriteAX], (instregex "^(ADD|SUB)S?Xrx64")>;
def : InstRW<[M3WriteAX], (instregex "^(ADD|AND|BIC|EON|EOR|ORN|ORR|SUB)[WX]r[sx]$")>;
def : InstRW<[M3WriteAX], (instregex "^(ADD|BIC|SUB)S[WX]r[sx]$")>;
def : InstRW<[M3WriteAX], (instregex "^(ADD|AND|EOR|ORR|SUB)[WX]ri")>;
def : InstRW<[M3WriteAZ], (instregex "^(ADD|AND|BIC|EON|EOR|ORN|SUB)[WX]rs$")>;
def : InstRW<[M3WriteAU], (instrs ORRWrs, ORRXrs)>;
def : InstRW<[M3WriteAX], (instregex "^(ADD|SUB)S?[WX]rx(64)?$")>;
def : InstRW<[M3WriteAZ], (instregex "^(ADD|AND|BIC|SUB)S[WX]rs$")>;
def : InstRW<[M3WriteAV], (instrs ADDWri, ADDXri)>;
def : InstRW<[M3WriteAW], (instrs ORRWri, ORRXri)>;
// Move instructions.
def : InstRW<[M3WriteZ0], (instrs ADR, ADRP)>;
def : InstRW<[M3WriteZ0], (instregex "^MOV[NZ][WX]i")>;
def : InstRW<[M3WriteCOPY], (instrs COPY)>;
def : InstRW<[M3WriteZ0], (instrs ADR, ADRP)>;
def : InstRW<[M3WriteZ0], (instregex "^MOV[NZ][WX]i")>;
// Divide and multiply instructions.
// Miscellaneous instructions.
def : InstRW<[M3WriteAY], (instrs EXTRWrri, EXTRXrri)>;
// Load instructions.
def : InstRW<[M3WriteLB,
ReadAdrBase], (instregex "^LDR(BB|HH|SBW|SBX|SHW|SWX|SW|W|X)roW")>;
def : InstRW<[M3WriteL5,
ReadAdrBase], (instregex "^LDR(BB|HH|SBW|SBX|SHW|SWX|SW|W|X)roX")>;
def : InstRW<[M3WriteLD,
WriteLDHi,
WriteAdr], (instregex "^LDP(SW|W|X)(post|pre)")>;
def : InstRW<[M3WriteLX,
ReadAdrBase], (instregex "^PRFMro[WX]")>;
def : InstRW<[M3WriteLB,
ReadAdrBase], (instrs PRFMroW)>;
def : InstRW<[M3WriteL5,
ReadAdrBase], (instrs PRFMroX)>;
// Store instructions.
def : InstRW<[M3WriteSB,
ReadAdrBase], (instregex "^STR(BB|HH|W|X)roW")>;
def : InstRW<[WriteST,
ReadAdrBase], (instregex "^STR(BB|HH|W|X)roX")>;
// FP data instructions.
def : InstRW<[M3WriteNSHF1], (instregex "^FABS[DS]r")>;
@ -553,9 +556,11 @@ def : InstRW<[WriteVLD], (instregex "^LDUR[BDHSQ]i")>;
def : InstRW<[WriteVLD,
WriteAdr], (instregex "^LDR[BDHSQ](post|pre)")>;
def : InstRW<[WriteVLD], (instregex "^LDR[BDHSQ]ui")>;
def : InstRW<[M3WriteLX,
ReadAdrBase], (instregex "^LDR[BDHS]ro[WX]")>;
def : InstRW<[M3WriteLB,
def : InstRW<[M3WriteLE,
ReadAdrBase], (instregex "^LDR[BDHS]roW")>;
def : InstRW<[WriteVLD,
ReadAdrBase], (instregex "^LDR[BDHS]roX")>;
def : InstRW<[M3WriteLE,
ReadAdrBase], (instregex "^LDRQro[WX]")>;
def : InstRW<[WriteVLD,
M3WriteLH], (instregex "^LDN?P[DS]i")>;
@ -573,8 +578,10 @@ def : InstRW<[WriteVST], (instregex "^STUR[BDHSQ]i")>;
def : InstRW<[WriteVST,
WriteAdr], (instregex "^STR[BDHSQ](post|pre)")>;
def : InstRW<[WriteVST], (instregex "^STR[BDHSQ]ui")>;
def : InstRW<[M3WriteSY,
ReadAdrBase], (instregex "^STR[BDHS]ro[WX]")>;
def : InstRW<[M3WriteSA,
ReadAdrBase], (instregex "^STR[BDHS]roW")>;
def : InstRW<[WriteVST,
ReadAdrBase], (instregex "^STR[BDHS]roX")>;
def : InstRW<[M3WriteSA,
ReadAdrBase], (instregex "^STRQro[WX]")>;
def : InstRW<[WriteVST], (instregex "^STN?P[DSQ]i")>;

View File

@ -0,0 +1,124 @@
//===- AArch64SchedPredExynos.td - AArch64 Sched Preds -----*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines scheduling predicate definitions that are used by the
// AArch64 Exynos processors.
//
//===----------------------------------------------------------------------===//
// Identify BLR specifying the LR register as the indirect target register.
def ExynosBranchLinkLRPred : MCSchedPredicate<
CheckAll<[CheckOpcode<[BLR]>,
CheckRegOperand<0, LR>]>>;
// Identify arithmetic and logic instructions without or with limited extension.
def ExynosExtFn : TIIPredicate<
"isExynosExtFast",
MCOpcodeSwitchStatement<
[MCOpcodeSwitchCase<
IsArithExt32Op.ValidOpcodes,
MCReturnStatement<
CheckAny<[CheckExtBy0,
CheckAll<
[CheckExtUXTW,
CheckAny<
[CheckExtBy1,
CheckExtBy2,
CheckExtBy3]>]>]>>>,
MCOpcodeSwitchCase<
IsArithExt64Op.ValidOpcodes,
MCReturnStatement<
CheckAny<[CheckExtBy0,
CheckAll<
[CheckExtUXTX,
CheckAny<
[CheckExtBy1,
CheckExtBy2,
CheckExtBy3]>]>]>>>],
MCReturnStatement<FalsePred>>>;
def ExynosExtPred : MCSchedPredicate<ExynosExtFn>;
// Identify FP instructions.
def ExynosFPPred : MCSchedPredicate<CheckAny<[CheckDForm, CheckQForm]>>;
// Identify whether an instruction whose result is a long vector
// operates on the upper half of the input registers.
def ExynosLongVectorUpperFn : TIIPredicate<
"isExynosLongVectorUpper",
MCOpcodeSwitchStatement<
[MCOpcodeSwitchCase<
IsLongVectorUpperOp.ValidOpcodes,
MCReturnStatement<TruePred>>],
MCReturnStatement<FalsePred>>>;
def ExynosLongVectorUpperPred : MCSchedPredicate<ExynosLongVectorUpperFn>;
// Identify 128-bit NEON instructions.
def ExynosQFormPred : MCSchedPredicate<CheckQForm>;
// Identify instructions that reset a register efficiently.
def ExynosResetFn : TIIPredicate<
"isExynosResetFast",
MCOpcodeSwitchStatement<
[MCOpcodeSwitchCase<
[ADR, ADRP,
MOVNWi, MOVNXi,
MOVZWi, MOVZXi],
MCReturnStatement<TruePred>>],
MCReturnStatement<
CheckAny<
[IsCopyIdiomFn,
IsZeroFPIdiomFn,
IsZeroIdiomFn]>>>>;
def ExynosResetPred : MCSchedPredicate<ExynosResetFn>;
// Identify EXTR as the alias for ROR (immediate).
def ExynosRotateRightImmPred : MCSchedPredicate<
CheckAll<[CheckOpcode<[EXTRWrri, EXTRXrri]>,
CheckSameRegOperand<1, 2>]>>;
// Identify arithmetic and logic instructions without or with limited shift.
def ExynosShiftFn : TIIPredicate<
"isExynosShiftFast",
MCOpcodeSwitchStatement<
[MCOpcodeSwitchCase<
IsArithLogicShiftOp.ValidOpcodes,
MCReturnStatement<
CheckAny<[CheckShiftBy0,
CheckAll<
[CheckShiftLSL,
CheckAny<
[CheckShiftBy1,
CheckShiftBy2,
CheckShiftBy3]>]>]>>>],
MCReturnStatement<FalsePred>>>;
def ExynosShiftPred : MCSchedPredicate<ExynosShiftFn>;
// Identify more arithmetic and logic instructions without or limited shift.
def ExynosShiftExFn : TIIPredicate<
"isExynosShiftExFast",
MCOpcodeSwitchStatement<
[MCOpcodeSwitchCase<
IsArithLogicShiftOp.ValidOpcodes,
MCReturnStatement<
CheckAll<
[CheckShiftLSL,
CheckShiftBy8]>>>],
MCReturnStatement<ExynosShiftFn>>>;
def ExynosShiftExPred : MCSchedPredicate<ExynosShiftExFn>;
// Identify arithmetic and logic immediate instructions.
def ExynosCheapFn : TIIPredicate<
"isExynosCheapAsMove",
MCOpcodeSwitchStatement<
[MCOpcodeSwitchCase<
IsArithLogicImmOp.ValidOpcodes,
MCReturnStatement<TruePred>>],
MCReturnStatement<
CheckAny<
[ExynosExtFn, ExynosResetFn, ExynosShiftFn]>>>>;

View File

@ -3,31 +3,31 @@
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM3
sub w0, w1, w2, sxtb #0
add w3, w4, w5, sxth #1
add x3, x4, w5, sxth #1
subs x6, x7, w8, uxtw #2
adds x9, x10, x11, uxtx #3
sub w12, w13, w14, uxtb #3
add w15, w16, w17, uxth #2
add x15, x16, w17, uxth #2
subs x18, x19, w20, sxtw #1
adds x21, x22, x23, sxtx #0
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 800
# EM1-NEXT: Total Cycles: 537
# EM3-NEXT: Total Cycles: 403
# EM1-NEXT: Total Cycles: 403
# EM3-NEXT: Total Cycles: 303
# ALL-NEXT: Total uOps: 800
# EM1: Dispatch Width: 4
# EM1-NEXT: uOps Per Cycle: 1.49
# EM1-NEXT: IPC: 1.49
# EM1-NEXT: Block RThroughput: 5.3
# EM1-NEXT: uOps Per Cycle: 1.99
# EM1-NEXT: IPC: 1.99
# EM1-NEXT: Block RThroughput: 4.0
# EM3: Dispatch Width: 6
# EM3-NEXT: uOps Per Cycle: 1.99
# EM3-NEXT: IPC: 1.99
# EM3-NEXT: Block RThroughput: 4.0
# EM3-NEXT: uOps Per Cycle: 2.64
# EM3-NEXT: IPC: 2.64
# EM3-NEXT: Block RThroughput: 3.0
# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
@ -39,20 +39,20 @@
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
# EM1-NEXT: 1 2 0.67 sub w0, w1, w2, sxtb
# EM1-NEXT: 1 2 0.67 add w3, w4, w5, sxth #1
# EM1-NEXT: 1 2 0.67 subs x6, x7, w8, uxtw #2
# EM1-NEXT: 1 2 0.67 adds x9, x10, x11, uxtx #3
# EM1-NEXT: 1 1 0.33 sub w0, w1, w2, sxtb
# EM1-NEXT: 1 2 0.67 add x3, x4, w5, sxth #1
# EM1-NEXT: 1 1 0.33 subs x6, x7, w8, uxtw #2
# EM1-NEXT: 1 1 0.33 adds x9, x10, x11, uxtx #3
# EM1-NEXT: 1 2 0.67 sub w12, w13, w14, uxtb #3
# EM1-NEXT: 1 2 0.67 add w15, w16, w17, uxth #2
# EM1-NEXT: 1 2 0.67 add x15, x16, w17, uxth #2
# EM1-NEXT: 1 2 0.67 subs x18, x19, w20, sxtw #1
# EM1-NEXT: 1 2 0.67 adds x21, x22, x23, sxtx
# EM1-NEXT: 1 1 0.33 adds x21, x22, x23, sxtx
# EM3-NEXT: 1 2 0.50 sub w0, w1, w2, sxtb
# EM3-NEXT: 1 2 0.50 add w3, w4, w5, sxth #1
# EM3-NEXT: 1 2 0.50 subs x6, x7, w8, uxtw #2
# EM3-NEXT: 1 2 0.50 adds x9, x10, x11, uxtx #3
# EM3-NEXT: 1 1 0.25 sub w0, w1, w2, sxtb
# EM3-NEXT: 1 2 0.50 add x3, x4, w5, sxth #1
# EM3-NEXT: 1 1 0.25 subs x6, x7, w8, uxtw #2
# EM3-NEXT: 1 1 0.25 adds x9, x10, x11, uxtx #3
# EM3-NEXT: 1 2 0.50 sub w12, w13, w14, uxtb #3
# EM3-NEXT: 1 2 0.50 add w15, w16, w17, uxth #2
# EM3-NEXT: 1 2 0.50 add x15, x16, w17, uxth #2
# EM3-NEXT: 1 2 0.50 subs x18, x19, w20, sxtw #1
# EM3-NEXT: 1 2 0.50 adds x21, x22, x23, sxtx
# EM3-NEXT: 1 1 0.25 adds x21, x22, x23, sxtx

View File

@ -9,20 +9,17 @@
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 400
# EM1-NEXT: Total Cycles: 408
# EM3-NEXT: Total Cycles: 208
# ALL-NEXT: Total uOps: 800
# ALL-NEXT: Total Cycles: 308
# ALL-NEXT: Total uOps: 600
# EM1: Dispatch Width: 4
# EM1-NEXT: uOps Per Cycle: 1.96
# EM1-NEXT: IPC: 0.98
# EM1-NEXT: uOps Per Cycle: 1.95
# EM1-NEXT: IPC: 1.30
# EM1-NEXT: Block RThroughput: 2.0
# EM3: Dispatch Width: 6
# EM3-NEXT: uOps Per Cycle: 3.85
# EM3-NEXT: IPC: 1.92
# EM3-NEXT: uOps Per Cycle: 1.95
# EM3-NEXT: IPC: 1.30
# EM3-NEXT: Block RThroughput: 2.0
# ALL: Instruction Info:
@ -35,12 +32,12 @@
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
# EM1-NEXT: 2 5 1.00 * ldr w0, [x1, x2]
# EM1-NEXT: 2 2 1.00 * str x3, [x4, w5, sxtw]
# EM1-NEXT: 2 5 1.00 * ldr x6, [x7, w8, uxtw #3]
# EM1-NEXT: 2 2 1.00 * str x9, [x10, x11, lsl #3]
# EM1-NEXT: 1 5 1.00 * ldr w0, [x1, x2]
# EM3-NEXT: 1 5 0.50 * ldr w0, [x1, x2]
# EM3-NEXT: 2 5 0.50 * ldr w0, [x1, x2]
# EM3-NEXT: 2 1 1.00 * str x3, [x4, w5, sxtw]
# ALL-NEXT: 2 2 1.00 * str x3, [x4, w5, sxtw]
# EM1-NEXT: 2 5 1.00 * ldr x6, [x7, w8, uxtw #3]
# EM3-NEXT: 2 5 0.50 * ldr x6, [x7, w8, uxtw #3]
# EM3-NEXT: 2 1 1.00 * str x9, [x10, x11, lsl #3]
# ALL-NEXT: 1 1 1.00 * str x9, [x10, x11, lsl #3]

View File

@ -10,20 +10,20 @@
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 400
# EM1-NEXT: Total Cycles: 271
# EM3-NEXT: Total Cycles: 203
# EM1-NEXT: Total Cycles: 204
# EM3-NEXT: Total Cycles: 154
# ALL-NEXT: Total uOps: 400
# EM1: Dispatch Width: 4
# EM1-NEXT: uOps Per Cycle: 1.48
# EM1-NEXT: IPC: 1.48
# EM1-NEXT: Block RThroughput: 2.7
# EM1-NEXT: uOps Per Cycle: 1.96
# EM1-NEXT: IPC: 1.96
# EM1-NEXT: Block RThroughput: 2.0
# EM3: Dispatch Width: 6
# EM3-NEXT: uOps Per Cycle: 1.97
# EM3-NEXT: IPC: 1.97
# EM3-NEXT: Block RThroughput: 2.0
# EM3-NEXT: uOps Per Cycle: 2.60
# EM3-NEXT: IPC: 2.60
# EM3-NEXT: Block RThroughput: 1.5
# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
@ -35,12 +35,12 @@
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
# EM1-NEXT: 1 2 0.67 add w0, w1, w2
# EM1-NEXT: 1 1 0.33 add w0, w1, w2
# EM1-NEXT: 1 2 0.67 sub x3, x4, x5, lsr #1
# EM1-NEXT: 1 2 0.67 adds x6, x7, x8, lsl #2
# EM1-NEXT: 1 1 0.33 adds x6, x7, x8, lsl #2
# EM1-NEXT: 1 2 0.67 subs w9, w10, w11, asr #3
# EM3-NEXT: 1 2 0.50 add w0, w1, w2
# EM3-NEXT: 1 1 0.25 add w0, w1, w2
# EM3-NEXT: 1 2 0.50 sub x3, x4, x5, lsr #1
# EM3-NEXT: 1 2 0.50 adds x6, x7, x8, lsl #2
# EM3-NEXT: 1 1 0.25 adds x6, x7, x8, lsl #2
# EM3-NEXT: 1 2 0.50 subs w9, w10, w11, asr #3