forked from OSchip/llvm-project
[PATCH] [AArch64] Add new target feature to fuse conditional select
This feature enables the fusion of the comparison and the conditional select instructions together. Differential revision: https://reviews.llvm.org/D42392 llvm-svn: 325939
This commit is contained in:
parent
d6ba3dbbbd
commit
1afffac05b
|
@ -128,6 +128,10 @@ def FeatureFuseAES : SubtargetFeature<
|
|||
"fuse-aes", "HasFuseAES", "true",
|
||||
"CPU fuses AES crypto operations">;
|
||||
|
||||
def FeatureFuseCCSelect : SubtargetFeature<
|
||||
"fuse-csel", "HasFuseCCSelect", "true",
|
||||
"CPU fuses conditional select operations">;
|
||||
|
||||
def FeatureFuseLiterals : SubtargetFeature<
|
||||
"fuse-literals", "HasFuseLiterals", "true",
|
||||
"CPU fuses literal generation operations">;
|
||||
|
@ -352,6 +356,7 @@ def ProcExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",
|
|||
FeatureFPARMv8,
|
||||
FeatureFuseAddress,
|
||||
FeatureFuseAES,
|
||||
FeatureFuseCCSelect,
|
||||
FeatureFuseLiterals,
|
||||
FeatureLSLFast,
|
||||
FeatureNEON,
|
||||
|
|
|
@ -53,8 +53,7 @@ static bool isArithmeticBccPair(const MachineInstr *FirstMI,
|
|||
case AArch64::BICSWrs:
|
||||
case AArch64::BICSXrs:
|
||||
// Shift value can be 0 making these behave like the "rr" variant...
|
||||
if (!AArch64InstrInfo::hasShiftedReg(*FirstMI))
|
||||
return true;
|
||||
return (!AArch64InstrInfo::hasShiftedReg(*FirstMI));
|
||||
}
|
||||
}
|
||||
return false;
|
||||
|
@ -102,8 +101,7 @@ static bool isArithmeticCbzPair(const MachineInstr *FirstMI,
|
|||
case AArch64::BICWrs:
|
||||
case AArch64::BICXrs:
|
||||
// Shift value can be 0 making these behave like the "rr" variant...
|
||||
if (!AArch64InstrInfo::hasShiftedReg(*FirstMI))
|
||||
return true;
|
||||
return (!AArch64InstrInfo::hasShiftedReg(*FirstMI));
|
||||
}
|
||||
}
|
||||
return false;
|
||||
|
@ -125,10 +123,10 @@ static bool isAESPair(const MachineInstr *FirstMI,
|
|||
SecondOpcode == AArch64::AESMCrrTied))
|
||||
return true;
|
||||
// AES decode.
|
||||
if ((FirstOpcode == AArch64::INSTRUCTION_LIST_END ||
|
||||
FirstOpcode == AArch64::AESDrr) &&
|
||||
(SecondOpcode == AArch64::AESIMCrr ||
|
||||
SecondOpcode == AArch64::AESIMCrrTied))
|
||||
else if ((FirstOpcode == AArch64::INSTRUCTION_LIST_END ||
|
||||
FirstOpcode == AArch64::AESDrr) &&
|
||||
(SecondOpcode == AArch64::AESIMCrr ||
|
||||
SecondOpcode == AArch64::AESIMCrrTied))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
|
@ -149,20 +147,23 @@ static bool isLiteralsPair(const MachineInstr *FirstMI,
|
|||
SecondOpcode == AArch64::ADDXri)
|
||||
return true;
|
||||
// 32 bit immediate.
|
||||
if ((FirstOpcode == AArch64::INSTRUCTION_LIST_END ||
|
||||
FirstOpcode == AArch64::MOVZWi) &&
|
||||
SecondOpcode == AArch64::MOVKWi && SecondMI.getOperand(3).getImm() == 16)
|
||||
else if ((FirstOpcode == AArch64::INSTRUCTION_LIST_END ||
|
||||
FirstOpcode == AArch64::MOVZWi) &&
|
||||
(SecondOpcode == AArch64::MOVKWi &&
|
||||
SecondMI.getOperand(3).getImm() == 16))
|
||||
return true;
|
||||
// Lower half of 64 bit immediate.
|
||||
if((FirstOpcode == AArch64::INSTRUCTION_LIST_END ||
|
||||
FirstOpcode == AArch64::MOVZXi) &&
|
||||
SecondOpcode == AArch64::MOVKXi && SecondMI.getOperand(3).getImm() == 16)
|
||||
else if((FirstOpcode == AArch64::INSTRUCTION_LIST_END ||
|
||||
FirstOpcode == AArch64::MOVZXi) &&
|
||||
(SecondOpcode == AArch64::MOVKXi &&
|
||||
SecondMI.getOperand(3).getImm() == 16))
|
||||
return true;
|
||||
// Upper half of 64 bit immediate.
|
||||
if ((FirstOpcode == AArch64::INSTRUCTION_LIST_END ||
|
||||
(FirstOpcode == AArch64::MOVKXi &&
|
||||
FirstMI->getOperand(3).getImm() == 32)) &&
|
||||
SecondOpcode == AArch64::MOVKXi && SecondMI.getOperand(3).getImm() == 48)
|
||||
else if ((FirstOpcode == AArch64::INSTRUCTION_LIST_END ||
|
||||
(FirstOpcode == AArch64::MOVKXi &&
|
||||
FirstMI->getOperand(3).getImm() == 32)) &&
|
||||
(SecondOpcode == AArch64::MOVKXi &&
|
||||
SecondMI.getOperand(3).getImm() == 48))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
|
@ -203,9 +204,7 @@ static bool isAddressLdStPair(const MachineInstr *FirstMI,
|
|||
|
||||
switch (FirstMI->getOpcode()) {
|
||||
case AArch64::ADR:
|
||||
if (SecondMI.getOperand(2).getImm() == 0)
|
||||
return true;
|
||||
return false;
|
||||
return (SecondMI.getOperand(2).getImm() == 0);
|
||||
case AArch64::ADRP:
|
||||
return true;
|
||||
}
|
||||
|
@ -213,6 +212,49 @@ static bool isAddressLdStPair(const MachineInstr *FirstMI,
|
|||
return false;
|
||||
}
|
||||
|
||||
// Fuse compare and conditional select.
|
||||
static bool isCCSelectPair(const MachineInstr *FirstMI,
|
||||
const MachineInstr &SecondMI) {
|
||||
unsigned SecondOpcode = SecondMI.getOpcode();
|
||||
|
||||
// 32 bits
|
||||
if (SecondOpcode == AArch64::CSELWr) {
|
||||
// Assume the 1st instr to be a wildcard if it is unspecified.
|
||||
if (!FirstMI)
|
||||
return true;
|
||||
|
||||
if (FirstMI->definesRegister(AArch64::WZR))
|
||||
switch (FirstMI->getOpcode()) {
|
||||
case AArch64::SUBSWrs:
|
||||
return (!AArch64InstrInfo::hasShiftedReg(*FirstMI));
|
||||
case AArch64::SUBSWrx:
|
||||
return (!AArch64InstrInfo::hasExtendedReg(*FirstMI));
|
||||
case AArch64::SUBSWrr:
|
||||
case AArch64::SUBSWri:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
// 64 bits
|
||||
else if (SecondOpcode == AArch64::CSELXr) {
|
||||
// Assume the 1st instr to be a wildcard if it is unspecified.
|
||||
if (!FirstMI)
|
||||
return true;
|
||||
|
||||
if (FirstMI->definesRegister(AArch64::XZR))
|
||||
switch (FirstMI->getOpcode()) {
|
||||
case AArch64::SUBSXrs:
|
||||
return (!AArch64InstrInfo::hasShiftedReg(*FirstMI));
|
||||
case AArch64::SUBSXrx:
|
||||
case AArch64::SUBSXrx64:
|
||||
return (!AArch64InstrInfo::hasExtendedReg(*FirstMI));
|
||||
case AArch64::SUBSXrr:
|
||||
case AArch64::SUBSXri:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// \brief Check if the instr pair, FirstMI and SecondMI, should be fused
|
||||
/// together. Given SecondMI, when FirstMI is unspecified, then check if
|
||||
/// SecondMI may be part of a fused pair at all.
|
||||
|
@ -232,6 +274,8 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
|
|||
return true;
|
||||
if (ST.hasFuseAddress() && isAddressLdStPair(FirstMI, SecondMI))
|
||||
return true;
|
||||
if (ST.hasFuseCCSelect() && isCCSelectPair(FirstMI, SecondMI))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -113,6 +113,7 @@ protected:
|
|||
bool HasArithmeticCbzFusion = false;
|
||||
bool HasFuseAddress = false;
|
||||
bool HasFuseAES = false;
|
||||
bool HasFuseCCSelect = false;
|
||||
bool HasFuseLiterals = false;
|
||||
bool DisableLatencySchedHeuristic = false;
|
||||
bool UseRSqrt = false;
|
||||
|
@ -239,12 +240,13 @@ public:
|
|||
bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; }
|
||||
bool hasFuseAddress() const { return HasFuseAddress; }
|
||||
bool hasFuseAES() const { return HasFuseAES; }
|
||||
bool hasFuseCCSelect() const { return HasFuseCCSelect; }
|
||||
bool hasFuseLiterals() const { return HasFuseLiterals; }
|
||||
|
||||
/// \brief Return true if the CPU supports any kind of instruction fusion.
|
||||
bool hasFusion() const {
|
||||
return hasArithmeticBccFusion() || hasArithmeticCbzFusion() ||
|
||||
hasFuseAES() || hasFuseLiterals();
|
||||
hasFuseAES() || hasFuseCCSelect() || hasFuseLiterals();
|
||||
}
|
||||
|
||||
bool useRSqrt() const { return UseRSqrt; }
|
||||
|
|
|
@ -0,0 +1,30 @@
|
|||
; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=fuse-csel | FileCheck %s
|
||||
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m3 | FileCheck %s
|
||||
|
||||
target triple = "aarch64-unknown"
|
||||
|
||||
define i32 @test_sub_cselw(i32 %a0, i32 %a1, i32 %a2) {
|
||||
entry:
|
||||
%v0 = sub i32 %a0, 13
|
||||
%cond = icmp eq i32 %v0, 0
|
||||
%v1 = add i32 %a1, 7
|
||||
%v2 = select i1 %cond, i32 %a0, i32 %v1
|
||||
ret i32 %v2
|
||||
|
||||
; CHECK-LABEL: test_sub_cselw:
|
||||
; CHECK: cmp {{w[0-9]}}, #13
|
||||
; CHECK-NEXT: csel {{w[0-9]}}
|
||||
}
|
||||
|
||||
define i64 @test_sub_cselx(i64 %a0, i64 %a1, i64 %a2) {
|
||||
entry:
|
||||
%v0 = sub i64 %a0, 13
|
||||
%cond = icmp eq i64 %v0, 0
|
||||
%v1 = add i64 %a1, 7
|
||||
%v2 = select i1 %cond, i64 %a0, i64 %v1
|
||||
ret i64 %v2
|
||||
|
||||
; CHECK-LABEL: test_sub_cselx:
|
||||
; CHECK: cmp {{x[0-9]}}, #13
|
||||
; CHECK-NEXT: csel {{x[0-9]}}
|
||||
}
|
Loading…
Reference in New Issue