2014-05-24 20:50:23 +08:00
|
|
|
//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
|
2014-03-29 18:18:08 +08:00
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2014-03-29 18:18:08 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
2014-05-24 20:50:23 +08:00
|
|
|
// This file contains the AArch64 implementation of the TargetInstrInfo class.
|
2014-03-29 18:18:08 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
#include "AArch64InstrInfo.h"
|
2017-03-18 06:26:55 +08:00
|
|
|
#include "AArch64MachineFunctionInfo.h"
|
2014-05-24 20:50:23 +08:00
|
|
|
#include "AArch64Subtarget.h"
|
|
|
|
#include "MCTargetDesc/AArch64AddressingModes.h"
|
2017-01-06 08:30:53 +08:00
|
|
|
#include "Utils/AArch64BaseInfo.h"
|
|
|
|
#include "llvm/ADT/ArrayRef.h"
|
|
|
|
#include "llvm/ADT/STLExtras.h"
|
2017-06-06 19:49:48 +08:00
|
|
|
#include "llvm/ADT/SmallVector.h"
|
2017-01-06 08:30:53 +08:00
|
|
|
#include "llvm/CodeGen/MachineBasicBlock.h"
|
2014-03-29 18:18:08 +08:00
|
|
|
#include "llvm/CodeGen/MachineFrameInfo.h"
|
2017-01-06 08:30:53 +08:00
|
|
|
#include "llvm/CodeGen/MachineFunction.h"
|
|
|
|
#include "llvm/CodeGen/MachineInstr.h"
|
2014-03-29 18:18:08 +08:00
|
|
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
|
|
|
#include "llvm/CodeGen/MachineMemOperand.h"
|
2017-01-06 08:30:53 +08:00
|
|
|
#include "llvm/CodeGen/MachineOperand.h"
|
2014-03-29 18:18:08 +08:00
|
|
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
2017-12-19 03:33:21 +08:00
|
|
|
#include "llvm/CodeGen/MachineModuleInfo.h"
|
2016-09-13 15:45:17 +08:00
|
|
|
#include "llvm/CodeGen/StackMaps.h"
|
2017-11-17 09:07:10 +08:00
|
|
|
#include "llvm/CodeGen/TargetRegisterInfo.h"
|
|
|
|
#include "llvm/CodeGen/TargetSubtargetInfo.h"
|
2017-01-06 08:30:53 +08:00
|
|
|
#include "llvm/IR/DebugLoc.h"
|
|
|
|
#include "llvm/IR/GlobalValue.h"
|
2014-03-29 18:18:08 +08:00
|
|
|
#include "llvm/MC/MCInst.h"
|
2017-01-06 08:30:53 +08:00
|
|
|
#include "llvm/MC/MCInstrDesc.h"
|
|
|
|
#include "llvm/Support/Casting.h"
|
|
|
|
#include "llvm/Support/CodeGen.h"
|
|
|
|
#include "llvm/Support/CommandLine.h"
|
|
|
|
#include "llvm/Support/Compiler.h"
|
2014-03-29 18:18:08 +08:00
|
|
|
#include "llvm/Support/ErrorHandling.h"
|
2017-01-06 08:30:53 +08:00
|
|
|
#include "llvm/Support/MathExtras.h"
|
|
|
|
#include "llvm/Target/TargetMachine.h"
|
|
|
|
#include "llvm/Target/TargetOptions.h"
|
|
|
|
#include <cassert>
|
|
|
|
#include <cstdint>
|
|
|
|
#include <iterator>
|
|
|
|
#include <utility>
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-04-22 10:03:14 +08:00
|
|
|
using namespace llvm;
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
#define GET_INSTRINFO_CTOR_DTOR
|
2014-05-24 20:50:23 +08:00
|
|
|
#include "AArch64GenInstrInfo.inc"
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2017-07-28 11:21:58 +08:00
|
|
|
static cl::opt<unsigned> TBZDisplacementBits(
|
|
|
|
"aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
|
|
|
|
cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
|
2016-08-02 16:06:17 +08:00
|
|
|
|
2017-07-28 11:21:58 +08:00
|
|
|
static cl::opt<unsigned> CBZDisplacementBits(
|
|
|
|
"aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
|
|
|
|
cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
|
2016-08-02 16:06:17 +08:00
|
|
|
|
|
|
|
static cl::opt<unsigned>
|
2017-07-28 11:21:58 +08:00
|
|
|
BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
|
|
|
|
cl::desc("Restrict range of Bcc instructions (DEBUG)"));
|
2016-08-02 16:06:17 +08:00
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
|
2018-11-10 07:33:30 +08:00
|
|
|
: AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
|
|
|
|
AArch64::CATCHRET),
|
2015-03-19 04:37:30 +08:00
|
|
|
RI(STI.getTargetTriple()), Subtarget(STI) {}
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
/// GetInstSize - Return the number of bytes of code the specified
|
|
|
|
/// instruction may be. This returns the maximum number of bytes.
|
2016-07-29 00:32:22 +08:00
|
|
|
unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
|
2016-06-30 08:01:54 +08:00
|
|
|
const MachineBasicBlock &MBB = *MI.getParent();
|
2014-06-17 19:31:42 +08:00
|
|
|
const MachineFunction *MF = MBB.getParent();
|
|
|
|
const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
|
|
|
|
|
2016-06-30 08:01:54 +08:00
|
|
|
if (MI.getOpcode() == AArch64::INLINEASM)
|
|
|
|
return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2016-09-13 15:45:17 +08:00
|
|
|
// FIXME: We currently only handle pseudoinstructions that don't get expanded
|
|
|
|
// before the assembly printer.
|
|
|
|
unsigned NumBytes = 0;
|
2016-06-30 08:01:54 +08:00
|
|
|
const MCInstrDesc &Desc = MI.getDesc();
|
2014-03-29 18:18:08 +08:00
|
|
|
switch (Desc.getOpcode()) {
|
|
|
|
default:
|
2016-07-27 23:13:25 +08:00
|
|
|
// Anything not explicitly designated otherwise is a normal 4-byte insn.
|
2016-09-13 15:45:17 +08:00
|
|
|
NumBytes = 4;
|
|
|
|
break;
|
2014-03-29 18:18:08 +08:00
|
|
|
case TargetOpcode::DBG_VALUE:
|
|
|
|
case TargetOpcode::EH_LABEL:
|
|
|
|
case TargetOpcode::IMPLICIT_DEF:
|
|
|
|
case TargetOpcode::KILL:
|
2016-09-13 15:45:17 +08:00
|
|
|
NumBytes = 0;
|
|
|
|
break;
|
|
|
|
case TargetOpcode::STACKMAP:
|
|
|
|
// The upper bound for a stackmap intrinsic is the full length of its shadow
|
|
|
|
NumBytes = StackMapOpers(&MI).getNumPatchBytes();
|
|
|
|
assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
|
|
|
|
break;
|
|
|
|
case TargetOpcode::PATCHPOINT:
|
|
|
|
// The size of the patchpoint intrinsic is the number of bytes requested
|
|
|
|
NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
|
|
|
|
assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
|
|
|
|
break;
|
2016-08-01 16:38:49 +08:00
|
|
|
case AArch64::TLSDESC_CALLSEQ:
|
|
|
|
// This gets lowered to an instruction sequence which takes 16 bytes
|
2016-09-13 15:45:17 +08:00
|
|
|
NumBytes = 16;
|
|
|
|
break;
|
2018-10-25 04:19:09 +08:00
|
|
|
case AArch64::JumpTableDest32:
|
|
|
|
case AArch64::JumpTableDest16:
|
|
|
|
case AArch64::JumpTableDest8:
|
|
|
|
NumBytes = 12;
|
|
|
|
break;
|
|
|
|
case AArch64::SPACE:
|
|
|
|
NumBytes = MI.getOperand(1).getImm();
|
|
|
|
break;
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
|
|
|
|
2016-09-13 15:45:17 +08:00
|
|
|
return NumBytes;
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
|
|
|
|
SmallVectorImpl<MachineOperand> &Cond) {
|
|
|
|
// Block ends with fall-through condbranch.
|
|
|
|
switch (LastInst->getOpcode()) {
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unknown branch instruction?");
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::Bcc:
|
2014-03-29 18:18:08 +08:00
|
|
|
Target = LastInst->getOperand(1).getMBB();
|
|
|
|
Cond.push_back(LastInst->getOperand(0));
|
|
|
|
break;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::CBZW:
|
|
|
|
case AArch64::CBZX:
|
|
|
|
case AArch64::CBNZW:
|
|
|
|
case AArch64::CBNZX:
|
2014-03-29 18:18:08 +08:00
|
|
|
Target = LastInst->getOperand(1).getMBB();
|
|
|
|
Cond.push_back(MachineOperand::CreateImm(-1));
|
|
|
|
Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
|
|
|
|
Cond.push_back(LastInst->getOperand(0));
|
|
|
|
break;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::TBZW:
|
|
|
|
case AArch64::TBZX:
|
|
|
|
case AArch64::TBNZW:
|
|
|
|
case AArch64::TBNZX:
|
2014-03-29 18:18:08 +08:00
|
|
|
Target = LastInst->getOperand(2).getMBB();
|
|
|
|
Cond.push_back(MachineOperand::CreateImm(-1));
|
|
|
|
Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
|
|
|
|
Cond.push_back(LastInst->getOperand(0));
|
|
|
|
Cond.push_back(LastInst->getOperand(1));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-08-02 16:06:17 +08:00
|
|
|
static unsigned getBranchDisplacementBits(unsigned Opc) {
|
|
|
|
switch (Opc) {
|
|
|
|
default:
|
|
|
|
llvm_unreachable("unexpected opcode!");
|
2016-10-06 23:38:09 +08:00
|
|
|
case AArch64::B:
|
|
|
|
return 64;
|
2016-08-02 16:06:17 +08:00
|
|
|
case AArch64::TBNZW:
|
|
|
|
case AArch64::TBZW:
|
|
|
|
case AArch64::TBNZX:
|
|
|
|
case AArch64::TBZX:
|
|
|
|
return TBZDisplacementBits;
|
|
|
|
case AArch64::CBNZW:
|
|
|
|
case AArch64::CBZW:
|
|
|
|
case AArch64::CBNZX:
|
|
|
|
case AArch64::CBZX:
|
|
|
|
return CBZDisplacementBits;
|
|
|
|
case AArch64::Bcc:
|
|
|
|
return BCCDisplacementBits;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-10-06 23:38:09 +08:00
|
|
|
bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp,
|
|
|
|
int64_t BrOffset) const {
|
|
|
|
unsigned Bits = getBranchDisplacementBits(BranchOp);
|
|
|
|
assert(Bits >= 3 && "max branch displacement must be enough to jump"
|
|
|
|
"over conditional branch expansion");
|
|
|
|
return isIntN(Bits, BrOffset / 4);
|
2016-08-02 16:06:17 +08:00
|
|
|
}
|
|
|
|
|
2017-07-28 11:21:58 +08:00
|
|
|
MachineBasicBlock *
|
|
|
|
AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
|
2016-10-06 23:38:09 +08:00
|
|
|
switch (MI.getOpcode()) {
|
|
|
|
default:
|
|
|
|
llvm_unreachable("unexpected opcode!");
|
|
|
|
case AArch64::B:
|
|
|
|
return MI.getOperand(0).getMBB();
|
|
|
|
case AArch64::TBZW:
|
|
|
|
case AArch64::TBNZW:
|
|
|
|
case AArch64::TBZX:
|
|
|
|
case AArch64::TBNZX:
|
|
|
|
return MI.getOperand(2).getMBB();
|
|
|
|
case AArch64::CBZW:
|
|
|
|
case AArch64::CBNZW:
|
|
|
|
case AArch64::CBZX:
|
|
|
|
case AArch64::CBNZX:
|
|
|
|
case AArch64::Bcc:
|
|
|
|
return MI.getOperand(1).getMBB();
|
|
|
|
}
|
2016-08-02 16:06:17 +08:00
|
|
|
}
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
// Branch analysis.
|
2016-07-15 22:41:04 +08:00
|
|
|
bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
|
|
|
|
MachineBasicBlock *&TBB,
|
|
|
|
MachineBasicBlock *&FBB,
|
|
|
|
SmallVectorImpl<MachineOperand> &Cond,
|
|
|
|
bool AllowModify) const {
|
2014-03-29 18:18:08 +08:00
|
|
|
// If the block has no terminators, it just falls into the block after it.
|
2015-06-25 21:28:24 +08:00
|
|
|
MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
|
|
|
|
if (I == MBB.end())
|
2014-03-29 18:18:08 +08:00
|
|
|
return false;
|
2015-06-25 21:28:24 +08:00
|
|
|
|
2016-02-23 10:46:52 +08:00
|
|
|
if (!isUnpredicatedTerminator(*I))
|
2014-03-29 18:18:08 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
// Get the last instruction in the block.
|
2016-07-09 04:29:42 +08:00
|
|
|
MachineInstr *LastInst = &*I;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
// If there is only one terminator instruction, process it.
|
|
|
|
unsigned LastOpc = LastInst->getOpcode();
|
2016-02-23 10:46:52 +08:00
|
|
|
if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
|
2014-03-29 18:18:08 +08:00
|
|
|
if (isUncondBranchOpcode(LastOpc)) {
|
|
|
|
TBB = LastInst->getOperand(0).getMBB();
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (isCondBranchOpcode(LastOpc)) {
|
|
|
|
// Block ends with fall-through condbranch.
|
|
|
|
parseCondBranch(LastInst, TBB, Cond);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true; // Can't handle indirect branch.
|
|
|
|
}
|
|
|
|
|
|
|
|
// Get the instruction before it if it is a terminator.
|
2016-07-09 04:29:42 +08:00
|
|
|
MachineInstr *SecondLastInst = &*I;
|
2014-03-29 18:18:08 +08:00
|
|
|
unsigned SecondLastOpc = SecondLastInst->getOpcode();
|
|
|
|
|
|
|
|
// If AllowModify is true and the block ends with two or more unconditional
|
|
|
|
// branches, delete all but the first unconditional branch.
|
|
|
|
if (AllowModify && isUncondBranchOpcode(LastOpc)) {
|
|
|
|
while (isUncondBranchOpcode(SecondLastOpc)) {
|
|
|
|
LastInst->eraseFromParent();
|
|
|
|
LastInst = SecondLastInst;
|
|
|
|
LastOpc = LastInst->getOpcode();
|
2016-02-23 10:46:52 +08:00
|
|
|
if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
|
2014-03-29 18:18:08 +08:00
|
|
|
// Return now the only terminator is an unconditional branch.
|
|
|
|
TBB = LastInst->getOperand(0).getMBB();
|
|
|
|
return false;
|
|
|
|
} else {
|
2016-07-09 04:29:42 +08:00
|
|
|
SecondLastInst = &*I;
|
2014-03-29 18:18:08 +08:00
|
|
|
SecondLastOpc = SecondLastInst->getOpcode();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// If there are three terminators, we don't know what sort of block this is.
|
2016-02-23 10:46:52 +08:00
|
|
|
if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
|
2014-03-29 18:18:08 +08:00
|
|
|
return true;
|
|
|
|
|
|
|
|
// If the block ends with a B and a Bcc, handle it.
|
|
|
|
if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
|
|
|
|
parseCondBranch(SecondLastInst, TBB, Cond);
|
|
|
|
FBB = LastInst->getOperand(0).getMBB();
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// If the block ends with two unconditional branches, handle it. The second
|
|
|
|
// one is not executed, so remove it.
|
|
|
|
if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
|
|
|
|
TBB = SecondLastInst->getOperand(0).getMBB();
|
|
|
|
I = LastInst;
|
|
|
|
if (AllowModify)
|
|
|
|
I->eraseFromParent();
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// ...likewise if it ends with an indirect branch followed by an unconditional
|
|
|
|
// branch.
|
|
|
|
if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
|
|
|
|
I = LastInst;
|
|
|
|
if (AllowModify)
|
|
|
|
I->eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Otherwise, can't handle this.
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2016-09-15 04:43:16 +08:00
|
|
|
bool AArch64InstrInfo::reverseBranchCondition(
|
2014-03-29 18:18:08 +08:00
|
|
|
SmallVectorImpl<MachineOperand> &Cond) const {
|
|
|
|
if (Cond[0].getImm() != -1) {
|
|
|
|
// Regular Bcc
|
2014-05-24 20:50:23 +08:00
|
|
|
AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
|
|
|
|
Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
|
2014-03-29 18:18:08 +08:00
|
|
|
} else {
|
|
|
|
// Folded compare-and-branch
|
|
|
|
switch (Cond[1].getImm()) {
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unknown conditional branch!");
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::CBZW:
|
|
|
|
Cond[1].setImm(AArch64::CBNZW);
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::CBNZW:
|
|
|
|
Cond[1].setImm(AArch64::CBZW);
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::CBZX:
|
|
|
|
Cond[1].setImm(AArch64::CBNZX);
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::CBNZX:
|
|
|
|
Cond[1].setImm(AArch64::CBZX);
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::TBZW:
|
|
|
|
Cond[1].setImm(AArch64::TBNZW);
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::TBNZW:
|
|
|
|
Cond[1].setImm(AArch64::TBZW);
|
2014-05-19 23:58:15 +08:00
|
|
|
break;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::TBZX:
|
|
|
|
Cond[1].setImm(AArch64::TBNZX);
|
2014-05-19 23:58:15 +08:00
|
|
|
break;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::TBNZX:
|
|
|
|
Cond[1].setImm(AArch64::TBZX);
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2016-09-15 04:43:16 +08:00
|
|
|
unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB,
|
2016-09-15 01:23:48 +08:00
|
|
|
int *BytesRemoved) const {
|
2015-06-25 21:28:24 +08:00
|
|
|
MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
|
|
|
|
if (I == MBB.end())
|
2014-03-29 18:18:08 +08:00
|
|
|
return 0;
|
2015-06-25 21:28:24 +08:00
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
if (!isUncondBranchOpcode(I->getOpcode()) &&
|
|
|
|
!isCondBranchOpcode(I->getOpcode()))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
// Remove the branch.
|
|
|
|
I->eraseFromParent();
|
|
|
|
|
|
|
|
I = MBB.end();
|
|
|
|
|
2016-09-15 01:23:48 +08:00
|
|
|
if (I == MBB.begin()) {
|
|
|
|
if (BytesRemoved)
|
|
|
|
*BytesRemoved = 4;
|
2014-03-29 18:18:08 +08:00
|
|
|
return 1;
|
2016-09-15 01:23:48 +08:00
|
|
|
}
|
2014-03-29 18:18:08 +08:00
|
|
|
--I;
|
2016-09-15 01:23:48 +08:00
|
|
|
if (!isCondBranchOpcode(I->getOpcode())) {
|
|
|
|
if (BytesRemoved)
|
|
|
|
*BytesRemoved = 4;
|
2014-03-29 18:18:08 +08:00
|
|
|
return 1;
|
2016-09-15 01:23:48 +08:00
|
|
|
}
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
// Remove the branch.
|
|
|
|
I->eraseFromParent();
|
2016-09-15 01:23:48 +08:00
|
|
|
if (BytesRemoved)
|
|
|
|
*BytesRemoved = 8;
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
void AArch64InstrInfo::instantiateCondBranch(
|
2016-06-12 23:39:02 +08:00
|
|
|
MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB,
|
2015-06-12 03:30:37 +08:00
|
|
|
ArrayRef<MachineOperand> Cond) const {
|
2014-03-29 18:18:08 +08:00
|
|
|
if (Cond[0].getImm() != -1) {
|
|
|
|
// Regular Bcc
|
2014-05-24 20:50:23 +08:00
|
|
|
BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
|
2014-03-29 18:18:08 +08:00
|
|
|
} else {
|
|
|
|
// Folded compare-and-branch
|
2014-11-07 10:50:00 +08:00
|
|
|
// Note that we use addOperand instead of addReg to keep the flags.
|
2014-03-29 18:18:08 +08:00
|
|
|
const MachineInstrBuilder MIB =
|
2017-01-13 17:58:52 +08:00
|
|
|
BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
|
2014-03-29 18:18:08 +08:00
|
|
|
if (Cond.size() > 3)
|
|
|
|
MIB.addImm(Cond[3].getImm());
|
|
|
|
MIB.addMBB(TBB);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-07-28 11:21:58 +08:00
|
|
|
unsigned AArch64InstrInfo::insertBranch(
|
|
|
|
MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
|
|
|
|
ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
|
2014-03-29 18:18:08 +08:00
|
|
|
// Shouldn't be a fall through.
|
2016-09-15 01:24:15 +08:00
|
|
|
assert(TBB && "insertBranch must not be told to insert a fallthrough");
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-04-25 13:30:21 +08:00
|
|
|
if (!FBB) {
|
2014-03-29 18:18:08 +08:00
|
|
|
if (Cond.empty()) // Unconditional branch?
|
2014-05-24 20:50:23 +08:00
|
|
|
BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
|
2014-03-29 18:18:08 +08:00
|
|
|
else
|
|
|
|
instantiateCondBranch(MBB, DL, TBB, Cond);
|
2016-09-15 01:23:48 +08:00
|
|
|
|
|
|
|
if (BytesAdded)
|
|
|
|
*BytesAdded = 4;
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Two-way conditional branch.
|
|
|
|
instantiateCondBranch(MBB, DL, TBB, Cond);
|
2014-05-24 20:50:23 +08:00
|
|
|
BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
|
2016-09-15 01:23:48 +08:00
|
|
|
|
|
|
|
if (BytesAdded)
|
|
|
|
*BytesAdded = 8;
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Find the original register that VReg is copied from.
|
|
|
|
static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
|
|
|
|
while (TargetRegisterInfo::isVirtualRegister(VReg)) {
|
|
|
|
const MachineInstr *DefMI = MRI.getVRegDef(VReg);
|
|
|
|
if (!DefMI->isFullCopy())
|
|
|
|
return VReg;
|
|
|
|
VReg = DefMI->getOperand(1).getReg();
|
|
|
|
}
|
|
|
|
return VReg;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Determine if VReg is defined by an instruction that can be folded into a
|
|
|
|
// csel instruction. If so, return the folded opcode, and the replacement
|
|
|
|
// register.
|
|
|
|
static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
|
2014-04-25 13:30:21 +08:00
|
|
|
unsigned *NewVReg = nullptr) {
|
2014-03-29 18:18:08 +08:00
|
|
|
VReg = removeCopies(MRI, VReg);
|
|
|
|
if (!TargetRegisterInfo::isVirtualRegister(VReg))
|
|
|
|
return 0;
|
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
|
2014-03-29 18:18:08 +08:00
|
|
|
const MachineInstr *DefMI = MRI.getVRegDef(VReg);
|
|
|
|
unsigned Opc = 0;
|
|
|
|
unsigned SrcOpNum = 0;
|
|
|
|
switch (DefMI->getOpcode()) {
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::ADDSXri:
|
|
|
|
case AArch64::ADDSWri:
|
2014-04-30 21:14:14 +08:00
|
|
|
// if NZCV is used, do not fold.
|
2014-05-24 20:50:23 +08:00
|
|
|
if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
|
2014-03-29 18:18:08 +08:00
|
|
|
return 0;
|
2016-08-17 13:10:15 +08:00
|
|
|
// fall-through to ADDXri and ADDWri.
|
|
|
|
LLVM_FALLTHROUGH;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::ADDXri:
|
|
|
|
case AArch64::ADDWri:
|
2014-03-29 18:18:08 +08:00
|
|
|
// add x, 1 -> csinc.
|
|
|
|
if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
|
|
|
|
DefMI->getOperand(3).getImm() != 0)
|
|
|
|
return 0;
|
|
|
|
SrcOpNum = 1;
|
2014-05-24 20:50:23 +08:00
|
|
|
Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::ORNXrr:
|
|
|
|
case AArch64::ORNWrr: {
|
2014-03-29 18:18:08 +08:00
|
|
|
// not x -> csinv, represented as orn dst, xzr, src.
|
|
|
|
unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
|
2014-05-24 20:50:23 +08:00
|
|
|
if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
|
2014-03-29 18:18:08 +08:00
|
|
|
return 0;
|
|
|
|
SrcOpNum = 2;
|
2014-05-24 20:50:23 +08:00
|
|
|
Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::SUBSXrr:
|
|
|
|
case AArch64::SUBSWrr:
|
2014-04-30 21:14:14 +08:00
|
|
|
// if NZCV is used, do not fold.
|
2014-05-24 20:50:23 +08:00
|
|
|
if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
|
2014-03-29 18:18:08 +08:00
|
|
|
return 0;
|
2016-08-17 13:10:15 +08:00
|
|
|
// fall-through to SUBXrr and SUBWrr.
|
|
|
|
LLVM_FALLTHROUGH;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::SUBXrr:
|
|
|
|
case AArch64::SUBWrr: {
|
2014-03-29 18:18:08 +08:00
|
|
|
// neg x -> csneg, represented as sub dst, xzr, src.
|
|
|
|
unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
|
2014-05-24 20:50:23 +08:00
|
|
|
if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
|
2014-03-29 18:18:08 +08:00
|
|
|
return 0;
|
|
|
|
SrcOpNum = 2;
|
2014-05-24 20:50:23 +08:00
|
|
|
Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
assert(Opc && SrcOpNum && "Missing parameters");
|
|
|
|
|
|
|
|
if (NewVReg)
|
|
|
|
*NewVReg = DefMI->getOperand(SrcOpNum).getReg();
|
|
|
|
return Opc;
|
|
|
|
}
|
|
|
|
|
2017-07-28 11:21:58 +08:00
|
|
|
bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
|
|
|
|
ArrayRef<MachineOperand> Cond,
|
|
|
|
unsigned TrueReg, unsigned FalseReg,
|
|
|
|
int &CondCycles, int &TrueCycles,
|
|
|
|
int &FalseCycles) const {
|
2014-03-29 18:18:08 +08:00
|
|
|
// Check register classes.
|
|
|
|
const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
|
|
|
|
const TargetRegisterClass *RC =
|
2015-03-19 04:37:30 +08:00
|
|
|
RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
|
2014-03-29 18:18:08 +08:00
|
|
|
if (!RC)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Expanding cbz/tbz requires an extra cycle of latency on the condition.
|
|
|
|
unsigned ExtraCondLat = Cond.size() != 1;
|
|
|
|
|
|
|
|
// GPRs are handled by csel.
|
|
|
|
// FIXME: Fold in x+1, -x, and ~x when applicable.
|
2014-05-24 20:50:23 +08:00
|
|
|
if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
|
|
|
|
AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
|
2014-03-29 18:18:08 +08:00
|
|
|
// Single-cycle csel, csinc, csinv, and csneg.
|
|
|
|
CondCycles = 1 + ExtraCondLat;
|
|
|
|
TrueCycles = FalseCycles = 1;
|
|
|
|
if (canFoldIntoCSel(MRI, TrueReg))
|
|
|
|
TrueCycles = 0;
|
|
|
|
else if (canFoldIntoCSel(MRI, FalseReg))
|
|
|
|
FalseCycles = 0;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Scalar floating point is handled by fcsel.
|
|
|
|
// FIXME: Form fabs, fmin, and fmax when applicable.
|
2014-05-24 20:50:23 +08:00
|
|
|
if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
|
|
|
|
AArch64::FPR32RegClass.hasSubClassEq(RC)) {
|
2014-03-29 18:18:08 +08:00
|
|
|
CondCycles = 5 + ExtraCondLat;
|
|
|
|
TrueCycles = FalseCycles = 2;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Can't do vectors.
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
|
2016-06-12 23:39:02 +08:00
|
|
|
MachineBasicBlock::iterator I,
|
|
|
|
const DebugLoc &DL, unsigned DstReg,
|
2015-06-12 03:30:37 +08:00
|
|
|
ArrayRef<MachineOperand> Cond,
|
2014-05-24 20:50:23 +08:00
|
|
|
unsigned TrueReg, unsigned FalseReg) const {
|
2014-03-29 18:18:08 +08:00
|
|
|
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
|
|
|
|
|
|
|
|
// Parse the condition code, see parseCondBranch() above.
|
2014-05-24 20:50:23 +08:00
|
|
|
AArch64CC::CondCode CC;
|
2014-03-29 18:18:08 +08:00
|
|
|
switch (Cond.size()) {
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unknown condition opcode in Cond");
|
|
|
|
case 1: // b.cc
|
2014-05-24 20:50:23 +08:00
|
|
|
CC = AArch64CC::CondCode(Cond[0].getImm());
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
|
|
|
case 3: { // cbz/cbnz
|
|
|
|
// We must insert a compare against 0.
|
|
|
|
bool Is64Bit;
|
|
|
|
switch (Cond[1].getImm()) {
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unknown branch opcode in Cond");
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::CBZW:
|
2017-01-06 08:30:53 +08:00
|
|
|
Is64Bit = false;
|
2014-05-24 20:50:23 +08:00
|
|
|
CC = AArch64CC::EQ;
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::CBZX:
|
2017-01-06 08:30:53 +08:00
|
|
|
Is64Bit = true;
|
2014-05-24 20:50:23 +08:00
|
|
|
CC = AArch64CC::EQ;
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::CBNZW:
|
2017-01-06 08:30:53 +08:00
|
|
|
Is64Bit = false;
|
2014-05-24 20:50:23 +08:00
|
|
|
CC = AArch64CC::NE;
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::CBNZX:
|
2017-01-06 08:30:53 +08:00
|
|
|
Is64Bit = true;
|
2014-05-24 20:50:23 +08:00
|
|
|
CC = AArch64CC::NE;
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
unsigned SrcReg = Cond[2].getReg();
|
|
|
|
if (Is64Bit) {
|
|
|
|
// cmp reg, #0 is actually subs xzr, reg, #0.
|
2014-05-24 20:50:23 +08:00
|
|
|
MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
|
|
|
|
BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
|
2014-03-29 18:18:08 +08:00
|
|
|
.addReg(SrcReg)
|
|
|
|
.addImm(0)
|
|
|
|
.addImm(0);
|
|
|
|
} else {
|
2014-05-24 20:50:23 +08:00
|
|
|
MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
|
|
|
|
BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
|
2014-03-29 18:18:08 +08:00
|
|
|
.addReg(SrcReg)
|
|
|
|
.addImm(0)
|
|
|
|
.addImm(0);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case 4: { // tbz/tbnz
|
|
|
|
// We must insert a tst instruction.
|
|
|
|
switch (Cond[1].getImm()) {
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unknown branch opcode in Cond");
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::TBZW:
|
|
|
|
case AArch64::TBZX:
|
|
|
|
CC = AArch64CC::EQ;
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::TBNZW:
|
|
|
|
case AArch64::TBNZX:
|
|
|
|
CC = AArch64CC::NE;
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
// cmp reg, #foo is actually ands xzr, reg, #1<<foo.
|
2014-05-24 20:50:23 +08:00
|
|
|
if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
|
|
|
|
BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
|
2014-05-19 23:58:15 +08:00
|
|
|
.addReg(Cond[2].getReg())
|
2014-05-24 20:50:23 +08:00
|
|
|
.addImm(
|
|
|
|
AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
|
2014-05-19 23:58:15 +08:00
|
|
|
else
|
2014-05-24 20:50:23 +08:00
|
|
|
BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
|
2014-05-19 23:58:15 +08:00
|
|
|
.addReg(Cond[2].getReg())
|
2014-05-24 20:50:23 +08:00
|
|
|
.addImm(
|
|
|
|
AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned Opc = 0;
|
2014-04-25 13:30:21 +08:00
|
|
|
const TargetRegisterClass *RC = nullptr;
|
2014-03-29 18:18:08 +08:00
|
|
|
bool TryFold = false;
|
2014-05-24 20:50:23 +08:00
|
|
|
if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
|
|
|
|
RC = &AArch64::GPR64RegClass;
|
|
|
|
Opc = AArch64::CSELXr;
|
2014-03-29 18:18:08 +08:00
|
|
|
TryFold = true;
|
2014-05-24 20:50:23 +08:00
|
|
|
} else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
|
|
|
|
RC = &AArch64::GPR32RegClass;
|
|
|
|
Opc = AArch64::CSELWr;
|
2014-03-29 18:18:08 +08:00
|
|
|
TryFold = true;
|
2014-05-24 20:50:23 +08:00
|
|
|
} else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
|
|
|
|
RC = &AArch64::FPR64RegClass;
|
|
|
|
Opc = AArch64::FCSELDrrr;
|
|
|
|
} else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
|
|
|
|
RC = &AArch64::FPR32RegClass;
|
|
|
|
Opc = AArch64::FCSELSrrr;
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
|
|
|
assert(RC && "Unsupported regclass");
|
|
|
|
|
|
|
|
// Try folding simple instructions into the csel.
|
|
|
|
if (TryFold) {
|
|
|
|
unsigned NewVReg = 0;
|
|
|
|
unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
|
|
|
|
if (FoldedOpc) {
|
|
|
|
// The folded opcodes csinc, csinc and csneg apply the operation to
|
|
|
|
// FalseReg, so we need to invert the condition.
|
2014-05-24 20:50:23 +08:00
|
|
|
CC = AArch64CC::getInvertedCondCode(CC);
|
2014-03-29 18:18:08 +08:00
|
|
|
TrueReg = FalseReg;
|
|
|
|
} else
|
|
|
|
FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
|
|
|
|
|
|
|
|
// Fold the operation. Leave any dead instructions for DCE to clean up.
|
|
|
|
if (FoldedOpc) {
|
|
|
|
FalseReg = NewVReg;
|
|
|
|
Opc = FoldedOpc;
|
|
|
|
// The extends the live range of NewVReg.
|
|
|
|
MRI.clearKillFlags(NewVReg);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Pull all virtual register into the appropriate class.
|
|
|
|
MRI.constrainRegClass(TrueReg, RC);
|
|
|
|
MRI.constrainRegClass(FalseReg, RC);
|
|
|
|
|
|
|
|
// Insert the csel.
|
2017-07-28 11:21:58 +08:00
|
|
|
BuildMI(MBB, I, DL, get(Opc), DstReg)
|
|
|
|
.addReg(TrueReg)
|
|
|
|
.addReg(FalseReg)
|
|
|
|
.addImm(CC);
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
|
|
|
|
2015-07-24 07:55:28 +08:00
|
|
|
/// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx.
|
2016-06-30 08:01:54 +08:00
|
|
|
static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) {
|
|
|
|
uint64_t Imm = MI.getOperand(1).getImm();
|
2015-07-24 03:24:53 +08:00
|
|
|
uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
|
|
|
|
uint64_t Encoding;
|
|
|
|
return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
|
|
|
|
}
|
|
|
|
|
2014-07-29 10:09:26 +08:00
|
|
|
// FIXME: this implementation should be micro-architecture dependent, so a
|
|
|
|
// micro-architecture target hook should be introduced here in future.
|
2016-06-30 08:01:54 +08:00
|
|
|
bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
|
2016-06-03 02:03:53 +08:00
|
|
|
if (!Subtarget.hasCustomCheapAsMoveHandling())
|
2016-06-30 08:01:54 +08:00
|
|
|
return MI.isAsCheapAsAMove();
|
2018-01-30 23:40:16 +08:00
|
|
|
|
2018-10-02 00:11:19 +08:00
|
|
|
const unsigned Opcode = MI.getOpcode();
|
|
|
|
|
|
|
|
// Firstly, check cases gated by features.
|
|
|
|
|
|
|
|
if (Subtarget.hasZeroCycleZeroingFP()) {
|
|
|
|
if (Opcode == AArch64::FMOVH0 ||
|
|
|
|
Opcode == AArch64::FMOVS0 ||
|
|
|
|
Opcode == AArch64::FMOVD0)
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (Subtarget.hasZeroCycleZeroingGP()) {
|
|
|
|
if (Opcode == TargetOpcode::COPY &&
|
|
|
|
(MI.getOperand(1).getReg() == AArch64::WZR ||
|
|
|
|
MI.getOperand(1).getReg() == AArch64::XZR))
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Secondly, check cases specific to sub-targets.
|
|
|
|
|
2018-01-30 23:40:22 +08:00
|
|
|
if (Subtarget.hasExynosCheapAsMoveHandling()) {
|
2018-12-11 01:17:26 +08:00
|
|
|
if (isExynosCheapAsMove(MI))
|
2018-01-30 23:40:16 +08:00
|
|
|
return true;
|
2018-12-07 02:25:37 +08:00
|
|
|
|
|
|
|
return MI.isAsCheapAsAMove();
|
2018-01-30 23:40:16 +08:00
|
|
|
}
|
2016-05-05 04:47:25 +08:00
|
|
|
|
2018-10-02 00:11:19 +08:00
|
|
|
// Finally, check generic cases.
|
|
|
|
|
|
|
|
switch (Opcode) {
|
2014-07-29 10:09:26 +08:00
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// add/sub on register without shift
|
|
|
|
case AArch64::ADDWri:
|
|
|
|
case AArch64::ADDXri:
|
|
|
|
case AArch64::SUBWri:
|
|
|
|
case AArch64::SUBXri:
|
2017-08-29 06:51:32 +08:00
|
|
|
return (MI.getOperand(3).getImm() == 0);
|
2014-07-29 10:09:26 +08:00
|
|
|
|
|
|
|
// logical ops on immediate
|
|
|
|
case AArch64::ANDWri:
|
|
|
|
case AArch64::ANDXri:
|
|
|
|
case AArch64::EORWri:
|
|
|
|
case AArch64::EORXri:
|
|
|
|
case AArch64::ORRWri:
|
|
|
|
case AArch64::ORRXri:
|
|
|
|
return true;
|
|
|
|
|
|
|
|
// logical ops on register without shift
|
|
|
|
case AArch64::ANDWrr:
|
|
|
|
case AArch64::ANDXrr:
|
|
|
|
case AArch64::BICWrr:
|
|
|
|
case AArch64::BICXrr:
|
|
|
|
case AArch64::EONWrr:
|
|
|
|
case AArch64::EONXrr:
|
|
|
|
case AArch64::EORWrr:
|
|
|
|
case AArch64::EORXrr:
|
|
|
|
case AArch64::ORNWrr:
|
|
|
|
case AArch64::ORNXrr:
|
|
|
|
case AArch64::ORRWrr:
|
|
|
|
case AArch64::ORRXrr:
|
|
|
|
return true;
|
2016-05-05 04:47:25 +08:00
|
|
|
|
2015-07-24 03:24:53 +08:00
|
|
|
// If MOVi32imm or MOVi64imm can be expanded into ORRWri or
|
|
|
|
// ORRXri, it is as cheap as MOV
|
|
|
|
case AArch64::MOVi32imm:
|
|
|
|
return canBeExpandedToORR(MI, 32);
|
|
|
|
case AArch64::MOVi64imm:
|
|
|
|
return canBeExpandedToORR(MI, 64);
|
2014-07-29 10:09:26 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
llvm_unreachable("Unknown opcode to check as cheap as a move!");
|
|
|
|
}
|
|
|
|
|
2018-11-07 06:17:14 +08:00
|
|
|
bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) {
|
2017-05-24 03:57:45 +08:00
|
|
|
switch (MI.getOpcode()) {
|
|
|
|
default:
|
2017-04-08 11:30:15 +08:00
|
|
|
return false;
|
2017-05-24 03:57:45 +08:00
|
|
|
|
|
|
|
case AArch64::ADDWrs:
|
|
|
|
case AArch64::ADDXrs:
|
|
|
|
case AArch64::ADDSWrs:
|
|
|
|
case AArch64::ADDSXrs: {
|
|
|
|
unsigned Imm = MI.getOperand(3).getImm();
|
|
|
|
unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
|
|
|
|
if (ShiftVal == 0)
|
|
|
|
return true;
|
|
|
|
return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
|
|
|
|
}
|
|
|
|
|
|
|
|
case AArch64::ADDWrx:
|
|
|
|
case AArch64::ADDXrx:
|
|
|
|
case AArch64::ADDXrx64:
|
|
|
|
case AArch64::ADDSWrx:
|
|
|
|
case AArch64::ADDSXrx:
|
|
|
|
case AArch64::ADDSXrx64: {
|
|
|
|
unsigned Imm = MI.getOperand(3).getImm();
|
|
|
|
switch (AArch64_AM::getArithExtendType(Imm)) {
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
case AArch64_AM::UXTB:
|
|
|
|
case AArch64_AM::UXTH:
|
|
|
|
case AArch64_AM::UXTW:
|
|
|
|
case AArch64_AM::UXTX:
|
|
|
|
return AArch64_AM::getArithShiftValue(Imm) <= 4;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
case AArch64::SUBWrs:
|
|
|
|
case AArch64::SUBSWrs: {
|
|
|
|
unsigned Imm = MI.getOperand(3).getImm();
|
|
|
|
unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
|
|
|
|
return ShiftVal == 0 ||
|
|
|
|
(AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
|
|
|
|
}
|
|
|
|
|
|
|
|
case AArch64::SUBXrs:
|
|
|
|
case AArch64::SUBSXrs: {
|
|
|
|
unsigned Imm = MI.getOperand(3).getImm();
|
|
|
|
unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
|
|
|
|
return ShiftVal == 0 ||
|
|
|
|
(AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
|
|
|
|
}
|
|
|
|
|
|
|
|
case AArch64::SUBWrx:
|
|
|
|
case AArch64::SUBXrx:
|
|
|
|
case AArch64::SUBXrx64:
|
|
|
|
case AArch64::SUBSWrx:
|
|
|
|
case AArch64::SUBSXrx:
|
|
|
|
case AArch64::SUBSXrx64: {
|
|
|
|
unsigned Imm = MI.getOperand(3).getImm();
|
|
|
|
switch (AArch64_AM::getArithExtendType(Imm)) {
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
case AArch64_AM::UXTB:
|
|
|
|
case AArch64_AM::UXTH:
|
|
|
|
case AArch64_AM::UXTW:
|
|
|
|
case AArch64_AM::UXTX:
|
|
|
|
return AArch64_AM::getArithShiftValue(Imm) == 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
case AArch64::LDRBBroW:
|
|
|
|
case AArch64::LDRBBroX:
|
|
|
|
case AArch64::LDRBroW:
|
|
|
|
case AArch64::LDRBroX:
|
|
|
|
case AArch64::LDRDroW:
|
|
|
|
case AArch64::LDRDroX:
|
|
|
|
case AArch64::LDRHHroW:
|
|
|
|
case AArch64::LDRHHroX:
|
|
|
|
case AArch64::LDRHroW:
|
|
|
|
case AArch64::LDRHroX:
|
|
|
|
case AArch64::LDRQroW:
|
|
|
|
case AArch64::LDRQroX:
|
|
|
|
case AArch64::LDRSBWroW:
|
|
|
|
case AArch64::LDRSBWroX:
|
|
|
|
case AArch64::LDRSBXroW:
|
|
|
|
case AArch64::LDRSBXroX:
|
|
|
|
case AArch64::LDRSHWroW:
|
|
|
|
case AArch64::LDRSHWroX:
|
|
|
|
case AArch64::LDRSHXroW:
|
|
|
|
case AArch64::LDRSHXroX:
|
|
|
|
case AArch64::LDRSWroW:
|
|
|
|
case AArch64::LDRSWroX:
|
|
|
|
case AArch64::LDRSroW:
|
|
|
|
case AArch64::LDRSroX:
|
|
|
|
case AArch64::LDRWroW:
|
|
|
|
case AArch64::LDRWroX:
|
|
|
|
case AArch64::LDRXroW:
|
|
|
|
case AArch64::LDRXroX:
|
|
|
|
case AArch64::PRFMroW:
|
|
|
|
case AArch64::PRFMroX:
|
|
|
|
case AArch64::STRBBroW:
|
|
|
|
case AArch64::STRBBroX:
|
|
|
|
case AArch64::STRBroW:
|
|
|
|
case AArch64::STRBroX:
|
|
|
|
case AArch64::STRDroW:
|
|
|
|
case AArch64::STRDroX:
|
|
|
|
case AArch64::STRHHroW:
|
|
|
|
case AArch64::STRHHroX:
|
|
|
|
case AArch64::STRHroW:
|
|
|
|
case AArch64::STRHroX:
|
|
|
|
case AArch64::STRQroW:
|
|
|
|
case AArch64::STRQroX:
|
|
|
|
case AArch64::STRSroW:
|
|
|
|
case AArch64::STRSroX:
|
|
|
|
case AArch64::STRWroW:
|
|
|
|
case AArch64::STRWroX:
|
|
|
|
case AArch64::STRXroW:
|
|
|
|
case AArch64::STRXroX: {
|
|
|
|
unsigned IsSigned = MI.getOperand(3).getImm();
|
|
|
|
return !IsSigned;
|
|
|
|
}
|
|
|
|
}
|
2017-04-08 11:30:15 +08:00
|
|
|
}
|
|
|
|
|
2018-10-31 03:24:51 +08:00
|
|
|
bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) {
|
|
|
|
unsigned Opc = MI.getOpcode();
|
|
|
|
switch (Opc) {
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
case AArch64::SEH_StackAlloc:
|
|
|
|
case AArch64::SEH_SaveFPLR:
|
|
|
|
case AArch64::SEH_SaveFPLR_X:
|
|
|
|
case AArch64::SEH_SaveReg:
|
|
|
|
case AArch64::SEH_SaveReg_X:
|
|
|
|
case AArch64::SEH_SaveRegP:
|
|
|
|
case AArch64::SEH_SaveRegP_X:
|
|
|
|
case AArch64::SEH_SaveFReg:
|
|
|
|
case AArch64::SEH_SaveFReg_X:
|
|
|
|
case AArch64::SEH_SaveFRegP:
|
|
|
|
case AArch64::SEH_SaveFRegP_X:
|
|
|
|
case AArch64::SEH_SetFP:
|
|
|
|
case AArch64::SEH_AddFP:
|
|
|
|
case AArch64::SEH_Nop:
|
|
|
|
case AArch64::SEH_PrologEnd:
|
|
|
|
case AArch64::SEH_EpilogStart:
|
|
|
|
case AArch64::SEH_EpilogEnd:
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
|
|
|
|
unsigned &SrcReg, unsigned &DstReg,
|
|
|
|
unsigned &SubIdx) const {
|
2014-03-29 18:18:08 +08:00
|
|
|
switch (MI.getOpcode()) {
|
|
|
|
default:
|
|
|
|
return false;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::SBFMXri: // aka sxtw
|
|
|
|
case AArch64::UBFMXri: // aka uxtw
|
2014-03-29 18:18:08 +08:00
|
|
|
// Check for the 32 -> 64 bit extension case, these instructions can do
|
|
|
|
// much more.
|
|
|
|
if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
|
|
|
|
return false;
|
|
|
|
// This is a signed or unsigned 32 -> 64 bit extension.
|
|
|
|
SrcReg = MI.getOperand(1).getReg();
|
|
|
|
DstReg = MI.getOperand(0).getReg();
|
2014-05-24 20:50:23 +08:00
|
|
|
SubIdx = AArch64::sub_32;
|
2014-03-29 18:18:08 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-06-30 08:01:54 +08:00
|
|
|
bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
|
|
|
|
MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA) const {
|
2015-03-19 04:37:30 +08:00
|
|
|
const TargetRegisterInfo *TRI = &getRegisterInfo();
|
2018-11-28 20:00:20 +08:00
|
|
|
MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
|
2016-03-10 00:46:48 +08:00
|
|
|
int64_t OffsetA = 0, OffsetB = 0;
|
|
|
|
unsigned WidthA = 0, WidthB = 0;
|
2014-09-08 22:43:48 +08:00
|
|
|
|
2016-06-30 08:01:54 +08:00
|
|
|
assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
|
|
|
|
assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
|
2014-09-08 22:43:48 +08:00
|
|
|
|
2016-06-30 08:01:54 +08:00
|
|
|
if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
|
|
|
|
MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
|
2014-09-08 22:43:48 +08:00
|
|
|
return false;
|
|
|
|
|
2018-11-28 20:00:28 +08:00
|
|
|
// Retrieve the base, offset from the base and width. Width
|
2014-09-08 22:43:48 +08:00
|
|
|
// is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
|
2018-11-28 20:00:28 +08:00
|
|
|
// base are identical, and the offset of a lower memory access +
|
2014-09-08 22:43:48 +08:00
|
|
|
// the width doesn't overlap the offset of a higher memory access,
|
|
|
|
// then the memory accesses are different.
|
2018-11-28 20:00:20 +08:00
|
|
|
if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
|
|
|
|
getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
|
|
|
|
if (BaseOpA->isIdenticalTo(*BaseOpB)) {
|
2014-09-08 22:43:48 +08:00
|
|
|
int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
|
|
|
|
int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
|
|
|
|
int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
|
|
|
|
if (LowOffset + LowWidth <= HighOffset)
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2018-10-31 03:24:51 +08:00
|
|
|
bool AArch64InstrInfo::isSchedulingBoundary(const MachineInstr &MI,
|
|
|
|
const MachineBasicBlock *MBB,
|
|
|
|
const MachineFunction &MF) const {
|
|
|
|
if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF))
|
|
|
|
return true;
|
Introduce control flow speculation tracking pass for AArch64
The pass implements tracking of control flow miss-speculation into a "taint"
register. That taint register can then be used to mask off registers with
sensitive data when executing under miss-speculation, a.k.a. "transient
execution".
This pass is aimed at mitigating against SpectreV1-style vulnarabilities.
At the moment, it implements the tracking of miss-speculation of control
flow into a taint register, but doesn't implement a mechanism yet to then
use that taint register to mask off vulnerable data in registers (something
for a follow-on improvement). Possible strategies to mask out vulnerable
data that can be implemented on top of this are:
- speculative load hardening to automatically mask of data loaded
in registers.
- using intrinsics to mask of data in registers as indicated by the
programmer (see https://lwn.net/Articles/759423/).
For AArch64, the following implementation choices are made.
Some of these are different than the implementation choices made in
the similar pass implemented in X86SpeculativeLoadHardening.cpp, as
the instruction set characteristics result in different trade-offs.
- The speculation hardening is done after register allocation. With a
relative abundance of registers, one register is reserved (X16) to be
the taint register. X16 is expected to not clash with other register
reservation mechanisms with very high probability because:
. The AArch64 ABI doesn't guarantee X16 to be retained across any call.
. The only way to request X16 to be used as a programmer is through
inline assembly. In the rare case a function explicitly demands to
use X16/W16, this pass falls back to hardening against speculation
by inserting a DSB SYS/ISB barrier pair which will prevent control
flow speculation.
- It is easy to insert mask operations at this late stage as we have
mask operations available that don't set flags.
- The taint variable contains all-ones when no miss-speculation is detected,
and contains all-zeros when miss-speculation is detected. Therefore, when
masking, an AND instruction (which only changes the register to be masked,
no other side effects) can easily be inserted anywhere that's needed.
- The tracking of miss-speculation is done by using a data-flow conditional
select instruction (CSEL) to evaluate the flags that were also used to
make conditional branch direction decisions. Speculation of the CSEL
instruction can be limited with a CSDB instruction - so the combination of
CSEL + a later CSDB gives the guarantee that the flags as used in the CSEL
aren't speculated. When conditional branch direction gets miss-speculated,
the semantics of the inserted CSEL instruction is such that the taint
register will contain all zero bits.
One key requirement for this to work is that the conditional branch is
followed by an execution of the CSEL instruction, where the CSEL
instruction needs to use the same flags status as the conditional branch.
This means that the conditional branches must not be implemented as one
of the AArch64 conditional branches that do not use the flags as input
(CB(N)Z and TB(N)Z). This is implemented by ensuring in the instruction
selectors to not produce these instructions when speculation hardening
is enabled. This pass will assert if it does encounter such an instruction.
- On function call boundaries, the miss-speculation state is transferred from
the taint register X16 to be encoded in the SP register as value 0.
Future extensions/improvements could be:
- Implement this functionality using full speculation barriers, akin to the
x86-slh-lfence option. This may be more useful for the intrinsics-based
approach than for the SLH approach to masking.
Note that this pass already inserts the full speculation barriers if the
function for some niche reason makes use of X16/W16.
- no indirect branch misprediction gets protected/instrumented; but this
could be done for some indirect branches, such as switch jump tables.
Differential Revision: https://reviews.llvm.org/D54896
llvm-svn: 349456
2018-12-18 16:50:02 +08:00
|
|
|
switch (MI.getOpcode()) {
|
2019-01-09 23:13:34 +08:00
|
|
|
case AArch64::HINT:
|
|
|
|
// CSDB hints are scheduling barriers.
|
|
|
|
if (MI.getOperand(0).getImm() == 0x14)
|
|
|
|
return true;
|
|
|
|
break;
|
Introduce control flow speculation tracking pass for AArch64
The pass implements tracking of control flow miss-speculation into a "taint"
register. That taint register can then be used to mask off registers with
sensitive data when executing under miss-speculation, a.k.a. "transient
execution".
This pass is aimed at mitigating against SpectreV1-style vulnarabilities.
At the moment, it implements the tracking of miss-speculation of control
flow into a taint register, but doesn't implement a mechanism yet to then
use that taint register to mask off vulnerable data in registers (something
for a follow-on improvement). Possible strategies to mask out vulnerable
data that can be implemented on top of this are:
- speculative load hardening to automatically mask of data loaded
in registers.
- using intrinsics to mask of data in registers as indicated by the
programmer (see https://lwn.net/Articles/759423/).
For AArch64, the following implementation choices are made.
Some of these are different than the implementation choices made in
the similar pass implemented in X86SpeculativeLoadHardening.cpp, as
the instruction set characteristics result in different trade-offs.
- The speculation hardening is done after register allocation. With a
relative abundance of registers, one register is reserved (X16) to be
the taint register. X16 is expected to not clash with other register
reservation mechanisms with very high probability because:
. The AArch64 ABI doesn't guarantee X16 to be retained across any call.
. The only way to request X16 to be used as a programmer is through
inline assembly. In the rare case a function explicitly demands to
use X16/W16, this pass falls back to hardening against speculation
by inserting a DSB SYS/ISB barrier pair which will prevent control
flow speculation.
- It is easy to insert mask operations at this late stage as we have
mask operations available that don't set flags.
- The taint variable contains all-ones when no miss-speculation is detected,
and contains all-zeros when miss-speculation is detected. Therefore, when
masking, an AND instruction (which only changes the register to be masked,
no other side effects) can easily be inserted anywhere that's needed.
- The tracking of miss-speculation is done by using a data-flow conditional
select instruction (CSEL) to evaluate the flags that were also used to
make conditional branch direction decisions. Speculation of the CSEL
instruction can be limited with a CSDB instruction - so the combination of
CSEL + a later CSDB gives the guarantee that the flags as used in the CSEL
aren't speculated. When conditional branch direction gets miss-speculated,
the semantics of the inserted CSEL instruction is such that the taint
register will contain all zero bits.
One key requirement for this to work is that the conditional branch is
followed by an execution of the CSEL instruction, where the CSEL
instruction needs to use the same flags status as the conditional branch.
This means that the conditional branches must not be implemented as one
of the AArch64 conditional branches that do not use the flags as input
(CB(N)Z and TB(N)Z). This is implemented by ensuring in the instruction
selectors to not produce these instructions when speculation hardening
is enabled. This pass will assert if it does encounter such an instruction.
- On function call boundaries, the miss-speculation state is transferred from
the taint register X16 to be encoded in the SP register as value 0.
Future extensions/improvements could be:
- Implement this functionality using full speculation barriers, akin to the
x86-slh-lfence option. This may be more useful for the intrinsics-based
approach than for the SLH approach to masking.
Note that this pass already inserts the full speculation barriers if the
function for some niche reason makes use of X16/W16.
- no indirect branch misprediction gets protected/instrumented; but this
could be done for some indirect branches, such as switch jump tables.
Differential Revision: https://reviews.llvm.org/D54896
llvm-svn: 349456
2018-12-18 16:50:02 +08:00
|
|
|
case AArch64::DSB:
|
|
|
|
case AArch64::ISB:
|
|
|
|
// DSB and ISB also are scheduling barriers.
|
|
|
|
return true;
|
|
|
|
default:;
|
|
|
|
}
|
2018-10-31 03:24:51 +08:00
|
|
|
return isSEHInstruction(MI);
|
|
|
|
}
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
/// analyzeCompare - For a comparison instruction, return the source registers
|
|
|
|
/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
|
|
|
|
/// Return true if the comparison instruction can be analyzed.
|
2016-06-30 08:01:54 +08:00
|
|
|
bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
|
2014-05-24 20:50:23 +08:00
|
|
|
unsigned &SrcReg2, int &CmpMask,
|
|
|
|
int &CmpValue) const {
|
2017-10-18 05:43:52 +08:00
|
|
|
// The first operand can be a frame index where we'd normally expect a
|
|
|
|
// register.
|
|
|
|
assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
|
|
|
|
if (!MI.getOperand(1).isReg())
|
|
|
|
return false;
|
|
|
|
|
2016-06-30 08:01:54 +08:00
|
|
|
switch (MI.getOpcode()) {
|
2014-03-29 18:18:08 +08:00
|
|
|
default:
|
|
|
|
break;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::SUBSWrr:
|
|
|
|
case AArch64::SUBSWrs:
|
|
|
|
case AArch64::SUBSWrx:
|
|
|
|
case AArch64::SUBSXrr:
|
|
|
|
case AArch64::SUBSXrs:
|
|
|
|
case AArch64::SUBSXrx:
|
|
|
|
case AArch64::ADDSWrr:
|
|
|
|
case AArch64::ADDSWrs:
|
|
|
|
case AArch64::ADDSWrx:
|
|
|
|
case AArch64::ADDSXrr:
|
|
|
|
case AArch64::ADDSXrs:
|
|
|
|
case AArch64::ADDSXrx:
|
2014-04-30 21:14:14 +08:00
|
|
|
// Replace SUBSWrr with SUBWrr if NZCV is not used.
|
2016-06-30 08:01:54 +08:00
|
|
|
SrcReg = MI.getOperand(1).getReg();
|
|
|
|
SrcReg2 = MI.getOperand(2).getReg();
|
2014-03-29 18:18:08 +08:00
|
|
|
CmpMask = ~0;
|
|
|
|
CmpValue = 0;
|
|
|
|
return true;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::SUBSWri:
|
|
|
|
case AArch64::ADDSWri:
|
|
|
|
case AArch64::SUBSXri:
|
|
|
|
case AArch64::ADDSXri:
|
2016-06-30 08:01:54 +08:00
|
|
|
SrcReg = MI.getOperand(1).getReg();
|
2014-03-29 18:18:08 +08:00
|
|
|
SrcReg2 = 0;
|
|
|
|
CmpMask = ~0;
|
2014-08-08 22:19:29 +08:00
|
|
|
// FIXME: In order to convert CmpValue to 0 or 1
|
2016-06-30 08:01:54 +08:00
|
|
|
CmpValue = MI.getOperand(2).getImm() != 0;
|
2014-03-29 18:18:08 +08:00
|
|
|
return true;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::ANDSWri:
|
|
|
|
case AArch64::ANDSXri:
|
2014-04-24 04:43:38 +08:00
|
|
|
// ANDS does not use the same encoding scheme as the others xxxS
|
|
|
|
// instructions.
|
2016-06-30 08:01:54 +08:00
|
|
|
SrcReg = MI.getOperand(1).getReg();
|
2014-04-24 04:43:38 +08:00
|
|
|
SrcReg2 = 0;
|
|
|
|
CmpMask = ~0;
|
2014-08-08 22:19:29 +08:00
|
|
|
// FIXME:The return val type of decodeLogicalImmediate is uint64_t,
|
|
|
|
// while the type of CmpValue is int. When converting uint64_t to int,
|
|
|
|
// the high 32 bits of uint64_t will be lost.
|
|
|
|
// In fact it causes a bug in spec2006-483.xalancbmk
|
|
|
|
// CmpValue is only used to compare with zero in OptimizeCompareInstr
|
2016-06-30 08:01:54 +08:00
|
|
|
CmpValue = AArch64_AM::decodeLogicalImmediate(
|
|
|
|
MI.getOperand(2).getImm(),
|
|
|
|
MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
|
2014-04-24 04:43:38 +08:00
|
|
|
return true;
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2016-06-30 08:01:54 +08:00
|
|
|
static bool UpdateOperandRegClass(MachineInstr &Instr) {
|
|
|
|
MachineBasicBlock *MBB = Instr.getParent();
|
2014-03-29 18:18:08 +08:00
|
|
|
assert(MBB && "Can't get MachineBasicBlock here");
|
|
|
|
MachineFunction *MF = MBB->getParent();
|
|
|
|
assert(MF && "Can't get MachineFunction here");
|
2015-01-28 11:51:33 +08:00
|
|
|
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
|
|
|
|
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
|
2014-03-29 18:18:08 +08:00
|
|
|
MachineRegisterInfo *MRI = &MF->getRegInfo();
|
|
|
|
|
2016-06-30 08:01:54 +08:00
|
|
|
for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
|
2014-03-29 18:18:08 +08:00
|
|
|
++OpIdx) {
|
2016-06-30 08:01:54 +08:00
|
|
|
MachineOperand &MO = Instr.getOperand(OpIdx);
|
2014-03-29 18:18:08 +08:00
|
|
|
const TargetRegisterClass *OpRegCstraints =
|
2016-06-30 08:01:54 +08:00
|
|
|
Instr.getRegClassConstraint(OpIdx, TII, TRI);
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
// If there's no constraint, there's nothing to do.
|
|
|
|
if (!OpRegCstraints)
|
|
|
|
continue;
|
|
|
|
// If the operand is a frame index, there's nothing to do here.
|
|
|
|
// A frame index operand will resolve correctly during PEI.
|
|
|
|
if (MO.isFI())
|
|
|
|
continue;
|
|
|
|
|
|
|
|
assert(MO.isReg() &&
|
|
|
|
"Operand has register constraints without being a register!");
|
|
|
|
|
|
|
|
unsigned Reg = MO.getReg();
|
|
|
|
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
|
|
|
|
if (!OpRegCstraints->contains(Reg))
|
|
|
|
return false;
|
|
|
|
} else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
|
|
|
|
!MRI->constrainRegClass(Reg, OpRegCstraints))
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Return the opcode that does not set flags when possible - otherwise
|
2014-11-19 05:02:40 +08:00
|
|
|
/// return the original opcode. The caller is responsible to do the actual
|
|
|
|
/// substitution and legality checking.
|
[AArch64] Prefer Bcc to CBZ/CBNZ/TBZ/TBNZ when NZCV flags can be set for "free".
This patch contains a pass that transforms CBZ/CBNZ/TBZ/TBNZ instructions into a
conditional branch (Bcc), when the NZCV flags can be set for "free". This is
preferred on targets that have more flexibility when scheduling Bcc
instructions as compared to CBZ/CBNZ/TBZ/TBNZ (assuming all other variables are
equal). This can reduce register pressure and is also the default behavior for
GCC.
A few examples:
add w8, w0, w1 -> cmn w0, w1 ; CMN is an alias of ADDS.
cbz w8, .LBB_2 -> b.eq .LBB0_2 ; single def/use of w8 removed.
add w8, w0, w1 -> adds w8, w0, w1 ; w8 has multiple uses.
cbz w8, .LBB1_2 -> b.eq .LBB1_2
sub w8, w0, w1 -> subs w8, w0, w1 ; w8 has multiple uses.
tbz w8, #31, .LBB6_2 -> b.ge .LBB6_2
In looking at all current sub-target machine descriptions, this transformation
appears to be either positive or neutral.
Differential Revision: https://reviews.llvm.org/D34220.
llvm-svn: 306144
2017-06-24 03:20:12 +08:00
|
|
|
static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) {
|
2014-11-19 05:02:40 +08:00
|
|
|
// Don't convert all compare instructions, because for some the zero register
|
|
|
|
// encoding becomes the sp register.
|
|
|
|
bool MIDefinesZeroReg = false;
|
2016-06-30 08:01:54 +08:00
|
|
|
if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR))
|
2014-11-19 05:02:40 +08:00
|
|
|
MIDefinesZeroReg = true;
|
|
|
|
|
2016-06-30 08:01:54 +08:00
|
|
|
switch (MI.getOpcode()) {
|
2014-11-19 05:02:40 +08:00
|
|
|
default:
|
2016-06-30 08:01:54 +08:00
|
|
|
return MI.getOpcode();
|
2014-11-19 05:02:40 +08:00
|
|
|
case AArch64::ADDSWrr:
|
|
|
|
return AArch64::ADDWrr;
|
|
|
|
case AArch64::ADDSWri:
|
|
|
|
return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
|
|
|
|
case AArch64::ADDSWrs:
|
|
|
|
return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
|
|
|
|
case AArch64::ADDSWrx:
|
|
|
|
return AArch64::ADDWrx;
|
|
|
|
case AArch64::ADDSXrr:
|
|
|
|
return AArch64::ADDXrr;
|
|
|
|
case AArch64::ADDSXri:
|
|
|
|
return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
|
|
|
|
case AArch64::ADDSXrs:
|
|
|
|
return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
|
|
|
|
case AArch64::ADDSXrx:
|
|
|
|
return AArch64::ADDXrx;
|
|
|
|
case AArch64::SUBSWrr:
|
|
|
|
return AArch64::SUBWrr;
|
|
|
|
case AArch64::SUBSWri:
|
|
|
|
return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
|
|
|
|
case AArch64::SUBSWrs:
|
|
|
|
return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
|
|
|
|
case AArch64::SUBSWrx:
|
|
|
|
return AArch64::SUBWrx;
|
|
|
|
case AArch64::SUBSXrr:
|
|
|
|
return AArch64::SUBXrr;
|
|
|
|
case AArch64::SUBSXri:
|
|
|
|
return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
|
|
|
|
case AArch64::SUBSXrs:
|
|
|
|
return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
|
|
|
|
case AArch64::SUBSXrx:
|
|
|
|
return AArch64::SUBXrx;
|
|
|
|
}
|
2014-08-08 05:40:58 +08:00
|
|
|
}
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2017-07-28 11:21:58 +08:00
|
|
|
enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
|
2016-04-06 19:39:00 +08:00
|
|
|
|
|
|
|
/// True when condition flags are accessed (either by writing or reading)
|
|
|
|
/// on the instruction trace starting at From and ending at To.
|
|
|
|
///
|
|
|
|
/// Note: If From and To are from different blocks it's assumed CC are accessed
|
|
|
|
/// on the path.
|
2016-06-30 08:01:54 +08:00
|
|
|
static bool areCFlagsAccessedBetweenInstrs(
|
|
|
|
MachineBasicBlock::iterator From, MachineBasicBlock::iterator To,
|
|
|
|
const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
|
[AAarch64] Optimize CSINC-branch sequence
Peephole optimization that generates a single conditional branch
for csinc-branch sequences like in the examples below. This is
possible when the csinc sets or clears a register based on a condition
code and the branch checks that register. Also the condition
code may not be modified between the csinc and the original branch.
Examples:
1. Convert csinc w9, wzr, wzr, <CC>;tbnz w9, #0, 0x44
to b.<invCC>
2. Convert csinc w9, wzr, wzr, <CC>; tbz w9, #0, 0x44
to b.<CC>
rdar://problem/18506500
llvm-svn: 219742
2014-10-15 07:07:53 +08:00
|
|
|
// Early exit if To is at the beginning of the BB.
|
2016-06-30 08:01:54 +08:00
|
|
|
if (To == To->getParent()->begin())
|
[AAarch64] Optimize CSINC-branch sequence
Peephole optimization that generates a single conditional branch
for csinc-branch sequences like in the examples below. This is
possible when the csinc sets or clears a register based on a condition
code and the branch checks that register. Also the condition
code may not be modified between the csinc and the original branch.
Examples:
1. Convert csinc w9, wzr, wzr, <CC>;tbnz w9, #0, 0x44
to b.<invCC>
2. Convert csinc w9, wzr, wzr, <CC>; tbz w9, #0, 0x44
to b.<CC>
rdar://problem/18506500
llvm-svn: 219742
2014-10-15 07:07:53 +08:00
|
|
|
return true;
|
|
|
|
|
2016-04-06 19:39:00 +08:00
|
|
|
// Check whether the instructions are in the same basic block
|
|
|
|
// If not, assume the condition flags might get modified somewhere.
|
[AAarch64] Optimize CSINC-branch sequence
Peephole optimization that generates a single conditional branch
for csinc-branch sequences like in the examples below. This is
possible when the csinc sets or clears a register based on a condition
code and the branch checks that register. Also the condition
code may not be modified between the csinc and the original branch.
Examples:
1. Convert csinc w9, wzr, wzr, <CC>;tbnz w9, #0, 0x44
to b.<invCC>
2. Convert csinc w9, wzr, wzr, <CC>; tbz w9, #0, 0x44
to b.<CC>
rdar://problem/18506500
llvm-svn: 219742
2014-10-15 07:07:53 +08:00
|
|
|
if (To->getParent() != From->getParent())
|
|
|
|
return true;
|
|
|
|
|
2016-04-06 19:39:00 +08:00
|
|
|
// From must be above To.
|
2016-09-12 02:51:28 +08:00
|
|
|
assert(std::find_if(++To.getReverse(), To->getParent()->rend(),
|
|
|
|
[From](MachineInstr &MI) {
|
|
|
|
return MI.getIterator() == From;
|
2016-07-09 04:29:42 +08:00
|
|
|
}) != To->getParent()->rend());
|
2016-04-06 19:39:00 +08:00
|
|
|
|
2016-06-30 08:01:54 +08:00
|
|
|
// We iterate backward starting \p To until we hit \p From.
|
|
|
|
for (--To; To != From; --To) {
|
|
|
|
const MachineInstr &Instr = *To;
|
[AAarch64] Optimize CSINC-branch sequence
Peephole optimization that generates a single conditional branch
for csinc-branch sequences like in the examples below. This is
possible when the csinc sets or clears a register based on a condition
code and the branch checks that register. Also the condition
code may not be modified between the csinc and the original branch.
Examples:
1. Convert csinc w9, wzr, wzr, <CC>;tbnz w9, #0, 0x44
to b.<invCC>
2. Convert csinc w9, wzr, wzr, <CC>; tbz w9, #0, 0x44
to b.<CC>
rdar://problem/18506500
llvm-svn: 219742
2014-10-15 07:07:53 +08:00
|
|
|
|
2017-07-28 11:21:58 +08:00
|
|
|
if (((AccessToCheck & AK_Write) &&
|
|
|
|
Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
|
|
|
|
((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
|
[AAarch64] Optimize CSINC-branch sequence
Peephole optimization that generates a single conditional branch
for csinc-branch sequences like in the examples below. This is
possible when the csinc sets or clears a register based on a condition
code and the branch checks that register. Also the condition
code may not be modified between the csinc and the original branch.
Examples:
1. Convert csinc w9, wzr, wzr, <CC>;tbnz w9, #0, 0x44
to b.<invCC>
2. Convert csinc w9, wzr, wzr, <CC>; tbz w9, #0, 0x44
to b.<CC>
rdar://problem/18506500
llvm-svn: 219742
2014-10-15 07:07:53 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
2016-04-06 19:39:00 +08:00
|
|
|
|
|
|
|
/// Try to optimize a compare instruction. A compare instruction is an
|
2017-07-28 11:21:58 +08:00
|
|
|
/// instruction which produces AArch64::NZCV. It can be truly compare
|
|
|
|
/// instruction
|
2016-04-06 19:39:00 +08:00
|
|
|
/// when there are no uses of its destination register.
|
|
|
|
///
|
|
|
|
/// The following steps are tried in order:
|
|
|
|
/// 1. Convert CmpInstr into an unconditional version.
|
|
|
|
/// 2. Remove CmpInstr if above there is an instruction producing a needed
|
2017-07-28 11:21:58 +08:00
|
|
|
/// condition code or an instruction which can be converted into such an
|
|
|
|
/// instruction.
|
2016-04-06 19:39:00 +08:00
|
|
|
/// Only comparison with zero is supported.
|
2014-08-08 05:40:58 +08:00
|
|
|
bool AArch64InstrInfo::optimizeCompareInstr(
|
2016-06-30 08:01:54 +08:00
|
|
|
MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
|
2014-08-08 05:40:58 +08:00
|
|
|
int CmpValue, const MachineRegisterInfo *MRI) const {
|
2016-06-30 08:01:54 +08:00
|
|
|
assert(CmpInstr.getParent());
|
2016-04-06 19:39:00 +08:00
|
|
|
assert(MRI);
|
2014-08-08 05:40:58 +08:00
|
|
|
|
|
|
|
// Replace SUBSWrr with SUBWrr if NZCV is not used.
|
2016-06-30 08:01:54 +08:00
|
|
|
int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true);
|
2016-04-06 19:39:00 +08:00
|
|
|
if (DeadNZCVIdx != -1) {
|
2016-06-30 08:01:54 +08:00
|
|
|
if (CmpInstr.definesRegister(AArch64::WZR) ||
|
|
|
|
CmpInstr.definesRegister(AArch64::XZR)) {
|
|
|
|
CmpInstr.eraseFromParent();
|
2014-11-19 05:02:40 +08:00
|
|
|
return true;
|
|
|
|
}
|
2016-06-30 08:01:54 +08:00
|
|
|
unsigned Opc = CmpInstr.getOpcode();
|
[AArch64] Prefer Bcc to CBZ/CBNZ/TBZ/TBNZ when NZCV flags can be set for "free".
This patch contains a pass that transforms CBZ/CBNZ/TBZ/TBNZ instructions into a
conditional branch (Bcc), when the NZCV flags can be set for "free". This is
preferred on targets that have more flexibility when scheduling Bcc
instructions as compared to CBZ/CBNZ/TBZ/TBNZ (assuming all other variables are
equal). This can reduce register pressure and is also the default behavior for
GCC.
A few examples:
add w8, w0, w1 -> cmn w0, w1 ; CMN is an alias of ADDS.
cbz w8, .LBB_2 -> b.eq .LBB0_2 ; single def/use of w8 removed.
add w8, w0, w1 -> adds w8, w0, w1 ; w8 has multiple uses.
cbz w8, .LBB1_2 -> b.eq .LBB1_2
sub w8, w0, w1 -> subs w8, w0, w1 ; w8 has multiple uses.
tbz w8, #31, .LBB6_2 -> b.ge .LBB6_2
In looking at all current sub-target machine descriptions, this transformation
appears to be either positive or neutral.
Differential Revision: https://reviews.llvm.org/D34220.
llvm-svn: 306144
2017-06-24 03:20:12 +08:00
|
|
|
unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
|
2014-08-08 05:40:58 +08:00
|
|
|
if (NewOpc == Opc)
|
|
|
|
return false;
|
2014-03-29 18:18:08 +08:00
|
|
|
const MCInstrDesc &MCID = get(NewOpc);
|
2016-06-30 08:01:54 +08:00
|
|
|
CmpInstr.setDesc(MCID);
|
|
|
|
CmpInstr.RemoveOperand(DeadNZCVIdx);
|
2014-03-29 18:18:08 +08:00
|
|
|
bool succeeded = UpdateOperandRegClass(CmpInstr);
|
|
|
|
(void)succeeded;
|
|
|
|
assert(succeeded && "Some operands reg class are incompatible!");
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Continue only if we have a "ri" where immediate is zero.
|
2014-08-08 22:19:29 +08:00
|
|
|
// FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
|
|
|
|
// function.
|
|
|
|
assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
|
2014-03-29 18:18:08 +08:00
|
|
|
if (CmpValue != 0 || SrcReg2 != 0)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// CmpInstr is a Compare instruction if destination register is not used.
|
2016-06-30 08:01:54 +08:00
|
|
|
if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
|
2014-03-29 18:18:08 +08:00
|
|
|
return false;
|
|
|
|
|
2016-04-21 16:54:08 +08:00
|
|
|
return substituteCmpToZero(CmpInstr, SrcReg, MRI);
|
2016-04-06 19:39:00 +08:00
|
|
|
}
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2016-04-06 19:39:00 +08:00
|
|
|
/// Get opcode of S version of Instr.
|
|
|
|
/// If Instr is S version its opcode is returned.
|
|
|
|
/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
|
|
|
|
/// or we are not interested in it.
|
|
|
|
static unsigned sForm(MachineInstr &Instr) {
|
|
|
|
switch (Instr.getOpcode()) {
|
2014-03-29 18:18:08 +08:00
|
|
|
default:
|
2016-04-06 19:39:00 +08:00
|
|
|
return AArch64::INSTRUCTION_LIST_END;
|
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::ADDSWrr:
|
|
|
|
case AArch64::ADDSWri:
|
|
|
|
case AArch64::ADDSXrr:
|
|
|
|
case AArch64::ADDSXri:
|
|
|
|
case AArch64::SUBSWrr:
|
|
|
|
case AArch64::SUBSWri:
|
|
|
|
case AArch64::SUBSXrr:
|
|
|
|
case AArch64::SUBSXri:
|
2017-01-06 08:30:53 +08:00
|
|
|
return Instr.getOpcode();
|
2016-04-06 19:39:00 +08:00
|
|
|
|
2017-07-28 11:21:58 +08:00
|
|
|
case AArch64::ADDWrr:
|
|
|
|
return AArch64::ADDSWrr;
|
|
|
|
case AArch64::ADDWri:
|
|
|
|
return AArch64::ADDSWri;
|
|
|
|
case AArch64::ADDXrr:
|
|
|
|
return AArch64::ADDSXrr;
|
|
|
|
case AArch64::ADDXri:
|
|
|
|
return AArch64::ADDSXri;
|
|
|
|
case AArch64::ADCWr:
|
|
|
|
return AArch64::ADCSWr;
|
|
|
|
case AArch64::ADCXr:
|
|
|
|
return AArch64::ADCSXr;
|
|
|
|
case AArch64::SUBWrr:
|
|
|
|
return AArch64::SUBSWrr;
|
|
|
|
case AArch64::SUBWri:
|
|
|
|
return AArch64::SUBSWri;
|
|
|
|
case AArch64::SUBXrr:
|
|
|
|
return AArch64::SUBSXrr;
|
|
|
|
case AArch64::SUBXri:
|
|
|
|
return AArch64::SUBSXri;
|
|
|
|
case AArch64::SBCWr:
|
|
|
|
return AArch64::SBCSWr;
|
|
|
|
case AArch64::SBCXr:
|
|
|
|
return AArch64::SBCSXr;
|
|
|
|
case AArch64::ANDWri:
|
|
|
|
return AArch64::ANDSWri;
|
|
|
|
case AArch64::ANDXri:
|
|
|
|
return AArch64::ANDSXri;
|
2016-04-06 19:39:00 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Check if AArch64::NZCV should be alive in successors of MBB.
|
|
|
|
static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) {
|
|
|
|
for (auto *BB : MBB->successors())
|
|
|
|
if (BB->isLiveIn(AArch64::NZCV))
|
|
|
|
return true;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2016-08-06 19:13:10 +08:00
|
|
|
namespace {
|
2017-01-06 08:30:53 +08:00
|
|
|
|
2016-04-21 16:54:08 +08:00
|
|
|
struct UsedNZCV {
|
2017-01-06 08:30:53 +08:00
|
|
|
bool N = false;
|
|
|
|
bool Z = false;
|
|
|
|
bool C = false;
|
|
|
|
bool V = false;
|
|
|
|
|
|
|
|
UsedNZCV() = default;
|
|
|
|
|
2017-07-28 11:21:58 +08:00
|
|
|
UsedNZCV &operator|=(const UsedNZCV &UsedFlags) {
|
2016-04-21 16:54:08 +08:00
|
|
|
this->N |= UsedFlags.N;
|
|
|
|
this->Z |= UsedFlags.Z;
|
|
|
|
this->C |= UsedFlags.C;
|
|
|
|
this->V |= UsedFlags.V;
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
};
|
2017-01-06 08:30:53 +08:00
|
|
|
|
2016-08-06 19:13:10 +08:00
|
|
|
} // end anonymous namespace
|
2016-04-21 16:54:08 +08:00
|
|
|
|
|
|
|
/// Find a condition code used by the instruction.
|
|
|
|
/// Returns AArch64CC::Invalid if either the instruction does not use condition
|
|
|
|
/// codes or we don't optimize CmpInstr in the presence of such instructions.
|
|
|
|
static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) {
|
|
|
|
switch (Instr.getOpcode()) {
|
2017-07-28 11:21:58 +08:00
|
|
|
default:
|
|
|
|
return AArch64CC::Invalid;
|
2016-04-21 16:54:08 +08:00
|
|
|
|
2017-07-28 11:21:58 +08:00
|
|
|
case AArch64::Bcc: {
|
|
|
|
int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
|
|
|
|
assert(Idx >= 2);
|
|
|
|
return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm());
|
|
|
|
}
|
2016-04-21 16:54:08 +08:00
|
|
|
|
2017-07-28 11:21:58 +08:00
|
|
|
case AArch64::CSINVWr:
|
|
|
|
case AArch64::CSINVXr:
|
|
|
|
case AArch64::CSINCWr:
|
|
|
|
case AArch64::CSINCXr:
|
|
|
|
case AArch64::CSELWr:
|
|
|
|
case AArch64::CSELXr:
|
|
|
|
case AArch64::CSNEGWr:
|
|
|
|
case AArch64::CSNEGXr:
|
|
|
|
case AArch64::FCSELSrrr:
|
|
|
|
case AArch64::FCSELDrrr: {
|
|
|
|
int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
|
|
|
|
assert(Idx >= 1);
|
|
|
|
return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm());
|
|
|
|
}
|
2016-04-21 16:54:08 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
|
|
|
|
assert(CC != AArch64CC::Invalid);
|
|
|
|
UsedNZCV UsedFlags;
|
|
|
|
switch (CC) {
|
2017-07-28 11:21:58 +08:00
|
|
|
default:
|
|
|
|
break;
|
2016-04-21 16:54:08 +08:00
|
|
|
|
2017-07-28 11:21:58 +08:00
|
|
|
case AArch64CC::EQ: // Z set
|
|
|
|
case AArch64CC::NE: // Z clear
|
|
|
|
UsedFlags.Z = true;
|
|
|
|
break;
|
2016-04-21 16:54:08 +08:00
|
|
|
|
2017-07-28 11:21:58 +08:00
|
|
|
case AArch64CC::HI: // Z clear and C set
|
|
|
|
case AArch64CC::LS: // Z set or C clear
|
|
|
|
UsedFlags.Z = true;
|
|
|
|
LLVM_FALLTHROUGH;
|
|
|
|
case AArch64CC::HS: // C set
|
|
|
|
case AArch64CC::LO: // C clear
|
|
|
|
UsedFlags.C = true;
|
|
|
|
break;
|
2016-04-21 16:54:08 +08:00
|
|
|
|
2017-07-28 11:21:58 +08:00
|
|
|
case AArch64CC::MI: // N set
|
|
|
|
case AArch64CC::PL: // N clear
|
|
|
|
UsedFlags.N = true;
|
|
|
|
break;
|
2016-04-21 16:54:08 +08:00
|
|
|
|
2017-07-28 11:21:58 +08:00
|
|
|
case AArch64CC::VS: // V set
|
|
|
|
case AArch64CC::VC: // V clear
|
|
|
|
UsedFlags.V = true;
|
|
|
|
break;
|
2016-04-21 16:54:08 +08:00
|
|
|
|
2017-07-28 11:21:58 +08:00
|
|
|
case AArch64CC::GT: // Z clear, N and V the same
|
|
|
|
case AArch64CC::LE: // Z set, N and V differ
|
|
|
|
UsedFlags.Z = true;
|
|
|
|
LLVM_FALLTHROUGH;
|
|
|
|
case AArch64CC::GE: // N and V the same
|
|
|
|
case AArch64CC::LT: // N and V differ
|
|
|
|
UsedFlags.N = true;
|
|
|
|
UsedFlags.V = true;
|
|
|
|
break;
|
2016-04-21 16:54:08 +08:00
|
|
|
}
|
|
|
|
return UsedFlags;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool isADDSRegImm(unsigned Opcode) {
|
|
|
|
return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool isSUBSRegImm(unsigned Opcode) {
|
|
|
|
return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Check if CmpInstr can be substituted by MI.
|
|
|
|
///
|
|
|
|
/// CmpInstr can be substituted:
|
|
|
|
/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
|
|
|
|
/// - and, MI and CmpInstr are from the same MachineBB
|
|
|
|
/// - and, condition flags are not alive in successors of the CmpInstr parent
|
|
|
|
/// - and, if MI opcode is the S form there must be no defs of flags between
|
|
|
|
/// MI and CmpInstr
|
|
|
|
/// or if MI opcode is not the S form there must be neither defs of flags
|
|
|
|
/// nor uses of flags between MI and CmpInstr.
|
|
|
|
/// - and C/V flags are not used after CmpInstr
|
|
|
|
static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr,
|
2017-07-28 11:21:58 +08:00
|
|
|
const TargetRegisterInfo *TRI) {
|
2016-04-21 16:54:08 +08:00
|
|
|
assert(MI);
|
|
|
|
assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END);
|
|
|
|
assert(CmpInstr);
|
|
|
|
|
|
|
|
const unsigned CmpOpcode = CmpInstr->getOpcode();
|
|
|
|
if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (MI->getParent() != CmpInstr->getParent())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (areCFlagsAliveInSuccessors(CmpInstr->getParent()))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
AccessKind AccessToCheck = AK_Write;
|
|
|
|
if (sForm(*MI) != MI->getOpcode())
|
|
|
|
AccessToCheck = AK_All;
|
|
|
|
if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
UsedNZCV NZCVUsedAfterCmp;
|
2017-07-28 11:21:58 +08:00
|
|
|
for (auto I = std::next(CmpInstr->getIterator()),
|
|
|
|
E = CmpInstr->getParent()->instr_end();
|
2016-04-21 16:54:08 +08:00
|
|
|
I != E; ++I) {
|
|
|
|
const MachineInstr &Instr = *I;
|
|
|
|
if (Instr.readsRegister(AArch64::NZCV, TRI)) {
|
|
|
|
AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr);
|
|
|
|
if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
|
|
|
|
return false;
|
|
|
|
NZCVUsedAfterCmp |= getUsedNZCV(CC);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (Instr.modifiesRegister(AArch64::NZCV, TRI))
|
|
|
|
break;
|
|
|
|
}
|
2017-07-28 11:21:58 +08:00
|
|
|
|
2016-04-21 16:54:08 +08:00
|
|
|
return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Substitute an instruction comparing to zero with another instruction
|
|
|
|
/// which produces needed condition flags.
|
|
|
|
///
|
2016-04-06 19:39:00 +08:00
|
|
|
/// Return true on success.
|
2016-06-30 08:01:54 +08:00
|
|
|
bool AArch64InstrInfo::substituteCmpToZero(
|
|
|
|
MachineInstr &CmpInstr, unsigned SrcReg,
|
|
|
|
const MachineRegisterInfo *MRI) const {
|
2016-04-21 16:54:08 +08:00
|
|
|
assert(MRI);
|
2016-04-06 19:39:00 +08:00
|
|
|
// Get the unique definition of SrcReg.
|
|
|
|
MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
|
|
|
|
if (!MI)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
const TargetRegisterInfo *TRI = &getRegisterInfo();
|
|
|
|
|
|
|
|
unsigned NewOpc = sForm(*MI);
|
|
|
|
if (NewOpc == AArch64::INSTRUCTION_LIST_END)
|
|
|
|
return false;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2016-06-30 08:01:54 +08:00
|
|
|
if (!canInstrSubstituteCmpInstr(MI, &CmpInstr, TRI))
|
2016-04-06 19:39:00 +08:00
|
|
|
return false;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-04-30 21:14:14 +08:00
|
|
|
// Update the instruction to set NZCV.
|
2014-03-29 18:18:08 +08:00
|
|
|
MI->setDesc(get(NewOpc));
|
2016-06-30 08:01:54 +08:00
|
|
|
CmpInstr.eraseFromParent();
|
|
|
|
bool succeeded = UpdateOperandRegClass(*MI);
|
2014-03-29 18:18:08 +08:00
|
|
|
(void)succeeded;
|
|
|
|
assert(succeeded && "Some operands reg class are incompatible!");
|
2014-05-24 20:50:23 +08:00
|
|
|
MI->addRegisterDefined(AArch64::NZCV, TRI);
|
2014-03-29 18:18:08 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2016-06-30 08:01:54 +08:00
|
|
|
bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
|
2018-11-10 07:33:30 +08:00
|
|
|
if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
|
|
|
|
MI.getOpcode() != AArch64::CATCHRET)
|
2014-07-26 03:31:34 +08:00
|
|
|
return false;
|
|
|
|
|
2016-06-30 08:01:54 +08:00
|
|
|
MachineBasicBlock &MBB = *MI.getParent();
|
|
|
|
DebugLoc DL = MI.getDebugLoc();
|
2018-11-10 07:33:30 +08:00
|
|
|
|
|
|
|
if (MI.getOpcode() == AArch64::CATCHRET) {
|
|
|
|
// Skip to the first instruction before the epilog.
|
|
|
|
const TargetInstrInfo *TII =
|
|
|
|
MBB.getParent()->getSubtarget().getInstrInfo();
|
|
|
|
MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
|
|
|
|
auto MBBI = MachineBasicBlock::iterator(MI);
|
|
|
|
MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
|
|
|
|
while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
|
|
|
|
FirstEpilogSEH != MBB.begin())
|
|
|
|
FirstEpilogSEH = std::prev(FirstEpilogSEH);
|
|
|
|
if (FirstEpilogSEH != MBB.begin())
|
|
|
|
FirstEpilogSEH = std::next(FirstEpilogSEH);
|
|
|
|
BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
|
|
|
|
.addReg(AArch64::X0, RegState::Define)
|
|
|
|
.addMBB(TargetMBB);
|
|
|
|
BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
|
|
|
|
.addReg(AArch64::X0, RegState::Define)
|
|
|
|
.addReg(AArch64::X0)
|
|
|
|
.addMBB(TargetMBB)
|
|
|
|
.addImm(0);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2016-06-30 08:01:54 +08:00
|
|
|
unsigned Reg = MI.getOperand(0).getReg();
|
2014-07-26 03:31:34 +08:00
|
|
|
const GlobalValue *GV =
|
2016-06-30 08:01:54 +08:00
|
|
|
cast<GlobalValue>((*MI.memoperands_begin())->getValue());
|
2014-07-26 03:31:34 +08:00
|
|
|
const TargetMachine &TM = MBB.getParent()->getTarget();
|
|
|
|
unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
|
|
|
|
const unsigned char MO_NC = AArch64II::MO_NC;
|
|
|
|
|
|
|
|
if ((OpFlags & AArch64II::MO_GOT) != 0) {
|
|
|
|
BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
|
2018-09-05 04:56:21 +08:00
|
|
|
.addGlobalAddress(GV, 0, OpFlags);
|
2014-07-26 03:31:34 +08:00
|
|
|
BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
|
2016-06-30 08:01:54 +08:00
|
|
|
.addReg(Reg, RegState::Kill)
|
|
|
|
.addImm(0)
|
|
|
|
.addMemOperand(*MI.memoperands_begin());
|
2014-07-26 03:31:34 +08:00
|
|
|
} else if (TM.getCodeModel() == CodeModel::Large) {
|
|
|
|
BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
|
2017-07-28 11:21:58 +08:00
|
|
|
.addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
|
|
|
|
.addImm(0);
|
2014-07-26 03:31:34 +08:00
|
|
|
BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
|
|
|
|
.addReg(Reg, RegState::Kill)
|
2017-07-28 11:21:58 +08:00
|
|
|
.addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
|
|
|
|
.addImm(16);
|
2014-07-26 03:31:34 +08:00
|
|
|
BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
|
|
|
|
.addReg(Reg, RegState::Kill)
|
2017-07-28 11:21:58 +08:00
|
|
|
.addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
|
|
|
|
.addImm(32);
|
2014-07-26 03:31:34 +08:00
|
|
|
BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
|
|
|
|
.addReg(Reg, RegState::Kill)
|
2017-07-28 11:21:58 +08:00
|
|
|
.addGlobalAddress(GV, 0, AArch64II::MO_G3)
|
|
|
|
.addImm(48);
|
2014-07-26 03:31:34 +08:00
|
|
|
BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
|
2016-06-30 08:01:54 +08:00
|
|
|
.addReg(Reg, RegState::Kill)
|
|
|
|
.addImm(0)
|
|
|
|
.addMemOperand(*MI.memoperands_begin());
|
2018-08-22 19:31:39 +08:00
|
|
|
} else if (TM.getCodeModel() == CodeModel::Tiny) {
|
|
|
|
BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
|
|
|
|
.addGlobalAddress(GV, 0, OpFlags);
|
2014-07-26 03:31:34 +08:00
|
|
|
} else {
|
|
|
|
BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
|
|
|
|
.addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
|
|
|
|
unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
|
|
|
|
BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
|
|
|
|
.addReg(Reg, RegState::Kill)
|
|
|
|
.addGlobalAddress(GV, 0, LoFlags)
|
2016-06-30 08:01:54 +08:00
|
|
|
.addMemOperand(*MI.memoperands_begin());
|
2014-07-26 03:31:34 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
MBB.erase(MI);
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
// Return true if this instruction simply sets its single destination register
|
|
|
|
// to zero. This is equivalent to a register rename of the zero-register.
|
2018-02-10 00:14:41 +08:00
|
|
|
bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) {
|
2016-06-30 08:01:54 +08:00
|
|
|
switch (MI.getOpcode()) {
|
2014-03-29 18:18:08 +08:00
|
|
|
default:
|
|
|
|
break;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::MOVZWi:
|
|
|
|
case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
|
2016-06-30 08:01:54 +08:00
|
|
|
if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
|
|
|
|
assert(MI.getDesc().getNumOperands() == 3 &&
|
|
|
|
MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
|
2014-03-29 18:18:08 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
break;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::ANDWri: // and Rd, Rzr, #imm
|
2016-06-30 08:01:54 +08:00
|
|
|
return MI.getOperand(1).getReg() == AArch64::WZR;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::ANDXri:
|
2016-06-30 08:01:54 +08:00
|
|
|
return MI.getOperand(1).getReg() == AArch64::XZR;
|
2014-03-29 18:18:08 +08:00
|
|
|
case TargetOpcode::COPY:
|
2016-06-30 08:01:54 +08:00
|
|
|
return MI.getOperand(1).getReg() == AArch64::WZR;
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Return true if this instruction simply renames a general register without
|
|
|
|
// modifying bits.
|
2018-02-10 00:14:41 +08:00
|
|
|
bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) {
|
2016-06-30 08:01:54 +08:00
|
|
|
switch (MI.getOpcode()) {
|
2014-03-29 18:18:08 +08:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
case TargetOpcode::COPY: {
|
|
|
|
// GPR32 copies will by lowered to ORRXrs
|
2016-06-30 08:01:54 +08:00
|
|
|
unsigned DstReg = MI.getOperand(0).getReg();
|
2014-05-24 20:50:23 +08:00
|
|
|
return (AArch64::GPR32RegClass.contains(DstReg) ||
|
|
|
|
AArch64::GPR64RegClass.contains(DstReg));
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
|
2016-06-30 08:01:54 +08:00
|
|
|
if (MI.getOperand(1).getReg() == AArch64::XZR) {
|
|
|
|
assert(MI.getDesc().getNumOperands() == 4 &&
|
|
|
|
MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
|
2014-03-29 18:18:08 +08:00
|
|
|
return true;
|
|
|
|
}
|
2014-08-02 01:27:31 +08:00
|
|
|
break;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
|
2016-06-30 08:01:54 +08:00
|
|
|
if (MI.getOperand(2).getImm() == 0) {
|
|
|
|
assert(MI.getDesc().getNumOperands() == 4 &&
|
|
|
|
MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
|
2014-03-29 18:18:08 +08:00
|
|
|
return true;
|
|
|
|
}
|
2014-08-02 01:27:31 +08:00
|
|
|
break;
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Return true if this instruction simply renames a general register without
|
|
|
|
// modifying bits.
|
2018-02-10 00:14:41 +08:00
|
|
|
bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) {
|
2016-06-30 08:01:54 +08:00
|
|
|
switch (MI.getOpcode()) {
|
2014-03-29 18:18:08 +08:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
case TargetOpcode::COPY: {
|
|
|
|
// FPR64 copies will by lowered to ORR.16b
|
2016-06-30 08:01:54 +08:00
|
|
|
unsigned DstReg = MI.getOperand(0).getReg();
|
2014-05-24 20:50:23 +08:00
|
|
|
return (AArch64::FPR64RegClass.contains(DstReg) ||
|
|
|
|
AArch64::FPR128RegClass.contains(DstReg));
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::ORRv16i8:
|
2016-06-30 08:01:54 +08:00
|
|
|
if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
|
|
|
|
assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
|
2014-03-29 18:18:08 +08:00
|
|
|
"invalid ORRv16i8 operands");
|
|
|
|
return true;
|
|
|
|
}
|
2014-08-02 01:27:31 +08:00
|
|
|
break;
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2016-06-30 08:01:54 +08:00
|
|
|
unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
|
2014-05-24 20:50:23 +08:00
|
|
|
int &FrameIndex) const {
|
2016-06-30 08:01:54 +08:00
|
|
|
switch (MI.getOpcode()) {
|
2014-03-29 18:18:08 +08:00
|
|
|
default:
|
|
|
|
break;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::LDRWui:
|
|
|
|
case AArch64::LDRXui:
|
|
|
|
case AArch64::LDRBui:
|
|
|
|
case AArch64::LDRHui:
|
|
|
|
case AArch64::LDRSui:
|
|
|
|
case AArch64::LDRDui:
|
|
|
|
case AArch64::LDRQui:
|
2016-06-30 08:01:54 +08:00
|
|
|
if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
|
|
|
|
MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
|
|
|
|
FrameIndex = MI.getOperand(1).getIndex();
|
|
|
|
return MI.getOperand(0).getReg();
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-06-30 08:01:54 +08:00
|
|
|
unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
|
2014-05-24 20:50:23 +08:00
|
|
|
int &FrameIndex) const {
|
2016-06-30 08:01:54 +08:00
|
|
|
switch (MI.getOpcode()) {
|
2014-03-29 18:18:08 +08:00
|
|
|
default:
|
|
|
|
break;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::STRWui:
|
|
|
|
case AArch64::STRXui:
|
|
|
|
case AArch64::STRBui:
|
|
|
|
case AArch64::STRHui:
|
|
|
|
case AArch64::STRSui:
|
|
|
|
case AArch64::STRDui:
|
|
|
|
case AArch64::STRQui:
|
2016-06-30 08:01:54 +08:00
|
|
|
if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
|
|
|
|
MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
|
|
|
|
FrameIndex = MI.getOperand(1).getIndex();
|
|
|
|
return MI.getOperand(0).getReg();
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Check all MachineMemOperands for a hint to suppress pairing.
|
2018-02-10 00:14:41 +08:00
|
|
|
bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) {
|
2017-01-06 08:30:53 +08:00
|
|
|
return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
|
2016-07-15 02:15:20 +08:00
|
|
|
return MMO->getFlags() & MOSuppressPair;
|
|
|
|
});
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Set a flag on the first MachineMemOperand to suppress pairing.
|
2018-02-10 00:14:41 +08:00
|
|
|
void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) {
|
2016-06-30 08:01:54 +08:00
|
|
|
if (MI.memoperands_empty())
|
2014-03-29 18:18:08 +08:00
|
|
|
return;
|
2016-07-15 02:15:20 +08:00
|
|
|
(*MI.memoperands_begin())->setFlags(MOSuppressPair);
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
|
|
|
|
2017-07-15 05:44:12 +08:00
|
|
|
/// Check all MachineMemOperands for a hint that the load/store is strided.
|
2018-02-10 00:14:41 +08:00
|
|
|
bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) {
|
2017-07-15 05:44:12 +08:00
|
|
|
return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
|
|
|
|
return MMO->getFlags() & MOStridedAccess;
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2018-02-10 00:14:41 +08:00
|
|
|
bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) {
|
2016-03-10 01:29:48 +08:00
|
|
|
switch (Opc) {
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
case AArch64::STURSi:
|
|
|
|
case AArch64::STURDi:
|
|
|
|
case AArch64::STURQi:
|
|
|
|
case AArch64::STURBBi:
|
|
|
|
case AArch64::STURHHi:
|
|
|
|
case AArch64::STURWi:
|
|
|
|
case AArch64::STURXi:
|
|
|
|
case AArch64::LDURSi:
|
|
|
|
case AArch64::LDURDi:
|
|
|
|
case AArch64::LDURQi:
|
|
|
|
case AArch64::LDURWi:
|
|
|
|
case AArch64::LDURXi:
|
|
|
|
case AArch64::LDURSWi:
|
|
|
|
case AArch64::LDURHHi:
|
|
|
|
case AArch64::LDURBBi:
|
|
|
|
case AArch64::LDURSBWi:
|
|
|
|
case AArch64::LDURSHWi:
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-02-10 00:14:41 +08:00
|
|
|
bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) {
|
|
|
|
switch (MI.getOpcode()) {
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
// Scaled instructions.
|
|
|
|
case AArch64::STRSui:
|
|
|
|
case AArch64::STRDui:
|
|
|
|
case AArch64::STRQui:
|
|
|
|
case AArch64::STRXui:
|
|
|
|
case AArch64::STRWui:
|
|
|
|
case AArch64::LDRSui:
|
|
|
|
case AArch64::LDRDui:
|
|
|
|
case AArch64::LDRQui:
|
|
|
|
case AArch64::LDRXui:
|
|
|
|
case AArch64::LDRWui:
|
|
|
|
case AArch64::LDRSWui:
|
|
|
|
// Unscaled instructions.
|
|
|
|
case AArch64::STURSi:
|
|
|
|
case AArch64::STURDi:
|
|
|
|
case AArch64::STURQi:
|
|
|
|
case AArch64::STURWi:
|
|
|
|
case AArch64::STURXi:
|
|
|
|
case AArch64::LDURSi:
|
|
|
|
case AArch64::LDURDi:
|
|
|
|
case AArch64::LDURQi:
|
|
|
|
case AArch64::LDURWi:
|
|
|
|
case AArch64::LDURXi:
|
|
|
|
case AArch64::LDURSWi:
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc,
|
|
|
|
bool &Is64Bit) {
|
|
|
|
switch (Opc) {
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Opcode has no flag setting equivalent!");
|
|
|
|
// 32-bit cases:
|
|
|
|
case AArch64::ADDWri:
|
|
|
|
Is64Bit = false;
|
|
|
|
return AArch64::ADDSWri;
|
|
|
|
case AArch64::ADDWrr:
|
|
|
|
Is64Bit = false;
|
|
|
|
return AArch64::ADDSWrr;
|
|
|
|
case AArch64::ADDWrs:
|
|
|
|
Is64Bit = false;
|
|
|
|
return AArch64::ADDSWrs;
|
|
|
|
case AArch64::ADDWrx:
|
|
|
|
Is64Bit = false;
|
|
|
|
return AArch64::ADDSWrx;
|
|
|
|
case AArch64::ANDWri:
|
|
|
|
Is64Bit = false;
|
|
|
|
return AArch64::ANDSWri;
|
|
|
|
case AArch64::ANDWrr:
|
|
|
|
Is64Bit = false;
|
|
|
|
return AArch64::ANDSWrr;
|
|
|
|
case AArch64::ANDWrs:
|
|
|
|
Is64Bit = false;
|
|
|
|
return AArch64::ANDSWrs;
|
|
|
|
case AArch64::BICWrr:
|
|
|
|
Is64Bit = false;
|
|
|
|
return AArch64::BICSWrr;
|
|
|
|
case AArch64::BICWrs:
|
|
|
|
Is64Bit = false;
|
|
|
|
return AArch64::BICSWrs;
|
|
|
|
case AArch64::SUBWri:
|
|
|
|
Is64Bit = false;
|
|
|
|
return AArch64::SUBSWri;
|
|
|
|
case AArch64::SUBWrr:
|
|
|
|
Is64Bit = false;
|
|
|
|
return AArch64::SUBSWrr;
|
|
|
|
case AArch64::SUBWrs:
|
|
|
|
Is64Bit = false;
|
|
|
|
return AArch64::SUBSWrs;
|
|
|
|
case AArch64::SUBWrx:
|
|
|
|
Is64Bit = false;
|
|
|
|
return AArch64::SUBSWrx;
|
|
|
|
// 64-bit cases:
|
|
|
|
case AArch64::ADDXri:
|
|
|
|
Is64Bit = true;
|
|
|
|
return AArch64::ADDSXri;
|
|
|
|
case AArch64::ADDXrr:
|
|
|
|
Is64Bit = true;
|
|
|
|
return AArch64::ADDSXrr;
|
|
|
|
case AArch64::ADDXrs:
|
|
|
|
Is64Bit = true;
|
|
|
|
return AArch64::ADDSXrs;
|
|
|
|
case AArch64::ADDXrx:
|
|
|
|
Is64Bit = true;
|
|
|
|
return AArch64::ADDSXrx;
|
|
|
|
case AArch64::ANDXri:
|
|
|
|
Is64Bit = true;
|
|
|
|
return AArch64::ANDSXri;
|
|
|
|
case AArch64::ANDXrr:
|
|
|
|
Is64Bit = true;
|
|
|
|
return AArch64::ANDSXrr;
|
|
|
|
case AArch64::ANDXrs:
|
|
|
|
Is64Bit = true;
|
|
|
|
return AArch64::ANDSXrs;
|
|
|
|
case AArch64::BICXrr:
|
|
|
|
Is64Bit = true;
|
|
|
|
return AArch64::BICSXrr;
|
|
|
|
case AArch64::BICXrs:
|
|
|
|
Is64Bit = true;
|
|
|
|
return AArch64::BICSXrs;
|
|
|
|
case AArch64::SUBXri:
|
|
|
|
Is64Bit = true;
|
|
|
|
return AArch64::SUBSXri;
|
|
|
|
case AArch64::SUBXrr:
|
|
|
|
Is64Bit = true;
|
|
|
|
return AArch64::SUBSXrr;
|
|
|
|
case AArch64::SUBXrs:
|
|
|
|
Is64Bit = true;
|
|
|
|
return AArch64::SUBSXrs;
|
|
|
|
case AArch64::SUBXrx:
|
|
|
|
Is64Bit = true;
|
|
|
|
return AArch64::SUBSXrx;
|
|
|
|
}
|
2016-03-10 01:29:48 +08:00
|
|
|
}
|
|
|
|
|
2016-03-19 03:21:02 +08:00
|
|
|
// Is this a candidate for ld/st merging or pairing? For example, we don't
|
|
|
|
// touch volatiles or load/stores that have a hint to avoid pair formation.
|
2016-06-30 08:01:54 +08:00
|
|
|
bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const {
|
2016-03-19 03:21:02 +08:00
|
|
|
// If this is a volatile load/store, don't mess with it.
|
2016-06-30 08:01:54 +08:00
|
|
|
if (MI.hasOrderedMemoryRef())
|
2016-03-19 03:21:02 +08:00
|
|
|
return false;
|
|
|
|
|
2018-11-28 20:00:28 +08:00
|
|
|
// Make sure this is a reg/fi+imm (as opposed to an address reloc).
|
|
|
|
assert((MI.getOperand(1).isReg() || MI.getOperand(1).isFI()) &&
|
|
|
|
"Expected a reg or frame index operand.");
|
2016-06-30 08:01:54 +08:00
|
|
|
if (!MI.getOperand(2).isImm())
|
2016-03-19 03:21:02 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
// Can't merge/pair if the instruction modifies the base register.
|
|
|
|
// e.g., ldr x0, [x0]
|
2018-11-28 20:00:20 +08:00
|
|
|
// This case will never occur with an FI base.
|
|
|
|
if (MI.getOperand(1).isReg()) {
|
|
|
|
unsigned BaseReg = MI.getOperand(1).getReg();
|
|
|
|
const TargetRegisterInfo *TRI = &getRegisterInfo();
|
|
|
|
if (MI.modifiesRegister(BaseReg, TRI))
|
|
|
|
return false;
|
|
|
|
}
|
2016-03-19 03:21:02 +08:00
|
|
|
|
|
|
|
// Check if this load/store has a hint to avoid pair formation.
|
|
|
|
// MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
|
|
|
|
if (isLdStPairSuppressed(MI))
|
|
|
|
return false;
|
|
|
|
|
2016-06-03 02:03:53 +08:00
|
|
|
// On some CPUs quad load/store pairs are slower than two single load/stores.
|
2017-01-25 01:34:31 +08:00
|
|
|
if (Subtarget.isPaired128Slow()) {
|
2016-06-30 08:01:54 +08:00
|
|
|
switch (MI.getOpcode()) {
|
2016-05-28 09:06:51 +08:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
case AArch64::LDURQi:
|
|
|
|
case AArch64::STURQi:
|
|
|
|
case AArch64::LDRQui:
|
|
|
|
case AArch64::STRQui:
|
|
|
|
return false;
|
2016-04-14 02:31:45 +08:00
|
|
|
}
|
2016-05-28 09:06:51 +08:00
|
|
|
}
|
2016-04-14 02:31:45 +08:00
|
|
|
|
2016-03-19 03:21:02 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-11-28 20:00:20 +08:00
|
|
|
bool AArch64InstrInfo::getMemOperandWithOffset(MachineInstr &LdSt,
|
|
|
|
MachineOperand *&BaseOp,
|
|
|
|
int64_t &Offset,
|
|
|
|
const TargetRegisterInfo *TRI) const {
|
2016-08-12 23:26:00 +08:00
|
|
|
unsigned Width;
|
2018-11-28 20:00:20 +08:00
|
|
|
return getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI);
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
|
|
|
|
2018-11-28 20:00:20 +08:00
|
|
|
bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
|
|
|
|
MachineInstr &LdSt, MachineOperand *&BaseOp, int64_t &Offset,
|
|
|
|
unsigned &Width, const TargetRegisterInfo *TRI) const {
|
2016-06-30 08:01:54 +08:00
|
|
|
assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
|
2014-09-08 22:43:48 +08:00
|
|
|
// Handle only loads/stores with base register followed by immediate offset.
|
2016-06-30 08:01:54 +08:00
|
|
|
if (LdSt.getNumExplicitOperands() == 3) {
|
2016-04-16 02:09:10 +08:00
|
|
|
// Non-paired instruction (e.g., ldr x1, [x0, #8]).
|
2018-11-28 20:00:28 +08:00
|
|
|
if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
|
|
|
|
!LdSt.getOperand(2).isImm())
|
2016-04-16 02:09:10 +08:00
|
|
|
return false;
|
2016-06-30 08:01:54 +08:00
|
|
|
} else if (LdSt.getNumExplicitOperands() == 4) {
|
2016-04-16 02:09:10 +08:00
|
|
|
// Paired instruction (e.g., ldp x1, x2, [x0, #8]).
|
2018-11-28 20:00:28 +08:00
|
|
|
if (!LdSt.getOperand(1).isReg() ||
|
|
|
|
(!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
|
2016-06-30 08:01:54 +08:00
|
|
|
!LdSt.getOperand(3).isImm())
|
2016-04-16 02:09:10 +08:00
|
|
|
return false;
|
|
|
|
} else
|
2014-09-08 22:43:48 +08:00
|
|
|
return false;
|
|
|
|
|
2017-07-28 11:21:58 +08:00
|
|
|
// Get the scaling factor for the instruction and set the width for the
|
2017-03-18 06:26:55 +08:00
|
|
|
// instruction.
|
2016-03-10 00:46:48 +08:00
|
|
|
unsigned Scale = 0;
|
2017-03-18 06:26:55 +08:00
|
|
|
int64_t Dummy1, Dummy2;
|
|
|
|
|
|
|
|
// If this returns false, then it's an instruction we don't want to handle.
|
|
|
|
if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Compute the offset. Offset is calculated as the immediate operand
|
|
|
|
// multiplied by the scaling factor. Unscaled instructions have scaling factor
|
|
|
|
// set to 1.
|
|
|
|
if (LdSt.getNumExplicitOperands() == 3) {
|
2018-11-28 20:00:20 +08:00
|
|
|
BaseOp = &LdSt.getOperand(1);
|
2017-03-18 06:26:55 +08:00
|
|
|
Offset = LdSt.getOperand(2).getImm() * Scale;
|
|
|
|
} else {
|
|
|
|
assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
|
2018-11-28 20:00:20 +08:00
|
|
|
BaseOp = &LdSt.getOperand(2);
|
2017-03-18 06:26:55 +08:00
|
|
|
Offset = LdSt.getOperand(3).getImm() * Scale;
|
|
|
|
}
|
2018-11-28 20:00:20 +08:00
|
|
|
|
2018-11-28 20:00:28 +08:00
|
|
|
assert((BaseOp->isReg() || BaseOp->isFI()) &&
|
|
|
|
"getMemOperandWithOffset only supports base "
|
|
|
|
"operands of type register or frame index.");
|
2018-11-28 20:00:20 +08:00
|
|
|
|
2017-03-18 06:26:55 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2017-07-28 11:21:58 +08:00
|
|
|
MachineOperand &
|
2017-03-18 06:26:55 +08:00
|
|
|
AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
|
|
|
|
assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
|
2017-07-28 11:21:58 +08:00
|
|
|
MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
|
2017-03-18 06:26:55 +08:00
|
|
|
assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
|
|
|
|
return OfsOp;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
|
|
|
|
unsigned &Width, int64_t &MinOffset,
|
|
|
|
int64_t &MaxOffset) const {
|
|
|
|
switch (Opcode) {
|
2017-07-28 11:21:58 +08:00
|
|
|
// Not a memory operation or something we want to handle.
|
2014-09-08 22:43:48 +08:00
|
|
|
default:
|
2017-03-18 06:26:55 +08:00
|
|
|
Scale = Width = 0;
|
|
|
|
MinOffset = MaxOffset = 0;
|
2014-09-08 22:43:48 +08:00
|
|
|
return false;
|
2017-03-18 06:26:55 +08:00
|
|
|
case AArch64::STRWpost:
|
|
|
|
case AArch64::LDRWpost:
|
|
|
|
Width = 32;
|
|
|
|
Scale = 4;
|
|
|
|
MinOffset = -256;
|
|
|
|
MaxOffset = 255;
|
|
|
|
break;
|
2014-09-08 22:43:48 +08:00
|
|
|
case AArch64::LDURQi:
|
|
|
|
case AArch64::STURQi:
|
|
|
|
Width = 16;
|
|
|
|
Scale = 1;
|
2017-03-18 06:26:55 +08:00
|
|
|
MinOffset = -256;
|
|
|
|
MaxOffset = 255;
|
2014-09-08 22:43:48 +08:00
|
|
|
break;
|
|
|
|
case AArch64::LDURXi:
|
|
|
|
case AArch64::LDURDi:
|
|
|
|
case AArch64::STURXi:
|
|
|
|
case AArch64::STURDi:
|
|
|
|
Width = 8;
|
|
|
|
Scale = 1;
|
2017-03-18 06:26:55 +08:00
|
|
|
MinOffset = -256;
|
|
|
|
MaxOffset = 255;
|
2014-09-08 22:43:48 +08:00
|
|
|
break;
|
|
|
|
case AArch64::LDURWi:
|
|
|
|
case AArch64::LDURSi:
|
|
|
|
case AArch64::LDURSWi:
|
|
|
|
case AArch64::STURWi:
|
|
|
|
case AArch64::STURSi:
|
|
|
|
Width = 4;
|
|
|
|
Scale = 1;
|
2017-03-18 06:26:55 +08:00
|
|
|
MinOffset = -256;
|
|
|
|
MaxOffset = 255;
|
2014-09-08 22:43:48 +08:00
|
|
|
break;
|
|
|
|
case AArch64::LDURHi:
|
|
|
|
case AArch64::LDURHHi:
|
|
|
|
case AArch64::LDURSHXi:
|
|
|
|
case AArch64::LDURSHWi:
|
|
|
|
case AArch64::STURHi:
|
|
|
|
case AArch64::STURHHi:
|
|
|
|
Width = 2;
|
|
|
|
Scale = 1;
|
2017-03-18 06:26:55 +08:00
|
|
|
MinOffset = -256;
|
|
|
|
MaxOffset = 255;
|
2014-09-08 22:43:48 +08:00
|
|
|
break;
|
|
|
|
case AArch64::LDURBi:
|
|
|
|
case AArch64::LDURBBi:
|
|
|
|
case AArch64::LDURSBXi:
|
|
|
|
case AArch64::LDURSBWi:
|
|
|
|
case AArch64::STURBi:
|
|
|
|
case AArch64::STURBBi:
|
|
|
|
Width = 1;
|
|
|
|
Scale = 1;
|
2017-03-18 06:26:55 +08:00
|
|
|
MinOffset = -256;
|
|
|
|
MaxOffset = 255;
|
2014-09-08 22:43:48 +08:00
|
|
|
break;
|
2016-04-16 02:09:10 +08:00
|
|
|
case AArch64::LDPQi:
|
|
|
|
case AArch64::LDNPQi:
|
|
|
|
case AArch64::STPQi:
|
|
|
|
case AArch64::STNPQi:
|
|
|
|
Scale = 16;
|
|
|
|
Width = 32;
|
2017-03-18 06:26:55 +08:00
|
|
|
MinOffset = -64;
|
|
|
|
MaxOffset = 63;
|
2016-04-16 02:09:10 +08:00
|
|
|
break;
|
2015-09-18 22:15:19 +08:00
|
|
|
case AArch64::LDRQui:
|
|
|
|
case AArch64::STRQui:
|
|
|
|
Scale = Width = 16;
|
2017-03-18 06:26:55 +08:00
|
|
|
MinOffset = 0;
|
|
|
|
MaxOffset = 4095;
|
2015-09-18 22:15:19 +08:00
|
|
|
break;
|
2016-04-16 02:09:10 +08:00
|
|
|
case AArch64::LDPXi:
|
|
|
|
case AArch64::LDPDi:
|
|
|
|
case AArch64::LDNPXi:
|
|
|
|
case AArch64::LDNPDi:
|
|
|
|
case AArch64::STPXi:
|
|
|
|
case AArch64::STPDi:
|
|
|
|
case AArch64::STNPXi:
|
|
|
|
case AArch64::STNPDi:
|
|
|
|
Scale = 8;
|
|
|
|
Width = 16;
|
2017-03-18 06:26:55 +08:00
|
|
|
MinOffset = -64;
|
|
|
|
MaxOffset = 63;
|
2016-04-16 02:09:10 +08:00
|
|
|
break;
|
2014-09-08 22:43:48 +08:00
|
|
|
case AArch64::LDRXui:
|
2015-09-18 22:13:18 +08:00
|
|
|
case AArch64::LDRDui:
|
2014-09-08 22:43:48 +08:00
|
|
|
case AArch64::STRXui:
|
2015-09-18 22:13:18 +08:00
|
|
|
case AArch64::STRDui:
|
2014-09-08 22:43:48 +08:00
|
|
|
Scale = Width = 8;
|
2017-03-18 06:26:55 +08:00
|
|
|
MinOffset = 0;
|
|
|
|
MaxOffset = 4095;
|
2014-09-08 22:43:48 +08:00
|
|
|
break;
|
2016-04-16 02:09:10 +08:00
|
|
|
case AArch64::LDPWi:
|
|
|
|
case AArch64::LDPSi:
|
|
|
|
case AArch64::LDNPWi:
|
|
|
|
case AArch64::LDNPSi:
|
|
|
|
case AArch64::STPWi:
|
|
|
|
case AArch64::STPSi:
|
|
|
|
case AArch64::STNPWi:
|
|
|
|
case AArch64::STNPSi:
|
|
|
|
Scale = 4;
|
|
|
|
Width = 8;
|
2017-03-18 06:26:55 +08:00
|
|
|
MinOffset = -64;
|
|
|
|
MaxOffset = 63;
|
2016-04-16 02:09:10 +08:00
|
|
|
break;
|
2014-09-08 22:43:48 +08:00
|
|
|
case AArch64::LDRWui:
|
2015-09-18 22:13:18 +08:00
|
|
|
case AArch64::LDRSui:
|
2016-03-19 03:21:02 +08:00
|
|
|
case AArch64::LDRSWui:
|
2014-09-08 22:43:48 +08:00
|
|
|
case AArch64::STRWui:
|
2015-09-18 22:13:18 +08:00
|
|
|
case AArch64::STRSui:
|
2014-09-08 22:43:48 +08:00
|
|
|
Scale = Width = 4;
|
2017-03-18 06:26:55 +08:00
|
|
|
MinOffset = 0;
|
|
|
|
MaxOffset = 4095;
|
2014-09-08 22:43:48 +08:00
|
|
|
break;
|
|
|
|
case AArch64::LDRHui:
|
2015-09-18 22:13:18 +08:00
|
|
|
case AArch64::LDRHHui:
|
2014-09-08 22:43:48 +08:00
|
|
|
case AArch64::STRHui:
|
2015-09-18 22:13:18 +08:00
|
|
|
case AArch64::STRHHui:
|
2014-09-08 22:43:48 +08:00
|
|
|
Scale = Width = 2;
|
2017-03-18 06:26:55 +08:00
|
|
|
MinOffset = 0;
|
|
|
|
MaxOffset = 4095;
|
2014-09-08 22:43:48 +08:00
|
|
|
break;
|
2015-09-18 22:15:19 +08:00
|
|
|
case AArch64::LDRBui:
|
|
|
|
case AArch64::LDRBBui:
|
|
|
|
case AArch64::STRBui:
|
|
|
|
case AArch64::STRBBui:
|
|
|
|
Scale = Width = 1;
|
2017-03-18 06:26:55 +08:00
|
|
|
MinOffset = 0;
|
|
|
|
MaxOffset = 4095;
|
2014-09-08 22:43:48 +08:00
|
|
|
break;
|
2016-02-02 04:54:36 +08:00
|
|
|
}
|
2014-09-08 22:43:48 +08:00
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-11-28 20:00:28 +08:00
|
|
|
static unsigned getOffsetStride(unsigned Opc) {
|
2016-03-19 03:21:02 +08:00
|
|
|
switch (Opc) {
|
|
|
|
default:
|
2018-11-28 20:00:28 +08:00
|
|
|
return 0;
|
2016-03-19 03:21:02 +08:00
|
|
|
case AArch64::LDURQi:
|
2016-04-15 22:58:38 +08:00
|
|
|
case AArch64::STURQi:
|
2018-11-28 20:00:28 +08:00
|
|
|
return 16;
|
2016-03-19 03:21:02 +08:00
|
|
|
case AArch64::LDURXi:
|
|
|
|
case AArch64::LDURDi:
|
2016-04-15 22:58:38 +08:00
|
|
|
case AArch64::STURXi:
|
|
|
|
case AArch64::STURDi:
|
2018-11-28 20:00:28 +08:00
|
|
|
return 8;
|
2016-03-19 03:21:02 +08:00
|
|
|
case AArch64::LDURWi:
|
|
|
|
case AArch64::LDURSi:
|
|
|
|
case AArch64::LDURSWi:
|
2016-04-15 22:58:38 +08:00
|
|
|
case AArch64::STURWi:
|
|
|
|
case AArch64::STURSi:
|
2018-11-28 20:00:28 +08:00
|
|
|
return 4;
|
2016-03-19 03:21:02 +08:00
|
|
|
}
|
2018-11-28 20:00:28 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Scale the unscaled offsets. Returns false if the unscaled offset can't be
|
|
|
|
// scaled.
|
|
|
|
static bool scaleOffset(unsigned Opc, int64_t &Offset) {
|
|
|
|
unsigned OffsetStride = getOffsetStride(Opc);
|
|
|
|
if (OffsetStride == 0)
|
|
|
|
return false;
|
2016-03-19 03:21:02 +08:00
|
|
|
// If the byte-offset isn't a multiple of the stride, we can't scale this
|
|
|
|
// offset.
|
|
|
|
if (Offset % OffsetStride != 0)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Convert the byte-offset used by unscaled into an "element" offset used
|
|
|
|
// by the scaled pair load/store instructions.
|
|
|
|
Offset /= OffsetStride;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-11-28 20:00:28 +08:00
|
|
|
// Unscale the scaled offsets. Returns false if the scaled offset can't be
|
|
|
|
// unscaled.
|
|
|
|
static bool unscaleOffset(unsigned Opc, int64_t &Offset) {
|
|
|
|
unsigned OffsetStride = getOffsetStride(Opc);
|
|
|
|
if (OffsetStride == 0)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Convert the "element" offset used by scaled pair load/store instructions
|
|
|
|
// into the byte-offset used by unscaled.
|
|
|
|
Offset *= OffsetStride;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2016-03-19 03:21:02 +08:00
|
|
|
static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
|
|
|
|
if (FirstOpc == SecondOpc)
|
|
|
|
return true;
|
|
|
|
// We can also pair sign-ext and zero-ext instructions.
|
|
|
|
switch (FirstOpc) {
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
case AArch64::LDRWui:
|
|
|
|
case AArch64::LDURWi:
|
|
|
|
return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
|
|
|
|
case AArch64::LDRSWui:
|
|
|
|
case AArch64::LDURSWi:
|
|
|
|
return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
|
|
|
|
}
|
|
|
|
// These instructions can't be paired based on their opcodes.
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2018-11-28 20:00:28 +08:00
|
|
|
static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
|
|
|
|
int64_t Offset1, unsigned Opcode1, int FI2,
|
|
|
|
int64_t Offset2, unsigned Opcode2) {
|
|
|
|
// Accesses through fixed stack object frame indices may access a different
|
|
|
|
// fixed stack slot. Check that the object offsets + offsets match.
|
|
|
|
if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
|
|
|
|
int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
|
|
|
|
int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
|
2018-11-30 04:03:19 +08:00
|
|
|
assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
|
2018-11-28 20:00:28 +08:00
|
|
|
// Get the byte-offset from the object offset.
|
|
|
|
if (!unscaleOffset(Opcode1, Offset1) || !unscaleOffset(Opcode2, Offset2))
|
|
|
|
return false;
|
|
|
|
ObjectOffset1 += Offset1;
|
|
|
|
ObjectOffset2 += Offset2;
|
|
|
|
// Get the "element" index in the object.
|
|
|
|
if (!scaleOffset(Opcode1, ObjectOffset1) ||
|
|
|
|
!scaleOffset(Opcode2, ObjectOffset2))
|
|
|
|
return false;
|
2018-11-30 04:03:19 +08:00
|
|
|
return ObjectOffset1 + 1 == ObjectOffset2;
|
2018-11-28 20:00:28 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return FI1 == FI2;
|
|
|
|
}
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
/// Detect opportunities for ldp/stp formation.
|
|
|
|
///
|
2018-11-28 20:00:20 +08:00
|
|
|
/// Only called for LdSt for which getMemOperandWithOffset returns true.
|
|
|
|
bool AArch64InstrInfo::shouldClusterMemOps(MachineOperand &BaseOp1,
|
|
|
|
MachineOperand &BaseOp2,
|
2016-04-15 22:58:38 +08:00
|
|
|
unsigned NumLoads) const {
|
2018-11-28 20:00:20 +08:00
|
|
|
MachineInstr &FirstLdSt = *BaseOp1.getParent();
|
|
|
|
MachineInstr &SecondLdSt = *BaseOp2.getParent();
|
|
|
|
if (BaseOp1.getType() != BaseOp2.getType())
|
|
|
|
return false;
|
|
|
|
|
2018-12-04 20:24:10 +08:00
|
|
|
assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
|
|
|
|
"Only base registers and frame indices are supported.");
|
2018-11-28 20:00:20 +08:00
|
|
|
|
2018-11-28 20:00:28 +08:00
|
|
|
// Check for both base regs and base FI.
|
2018-11-28 20:00:20 +08:00
|
|
|
if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
|
2017-09-14 06:20:47 +08:00
|
|
|
return false;
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
// Only cluster up to a single pair.
|
|
|
|
if (NumLoads > 1)
|
|
|
|
return false;
|
2016-03-19 03:21:02 +08:00
|
|
|
|
2016-08-12 23:26:00 +08:00
|
|
|
if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
|
|
|
|
return false;
|
|
|
|
|
2016-03-19 03:21:02 +08:00
|
|
|
// Can we pair these instructions based on their opcodes?
|
2016-06-30 08:01:54 +08:00
|
|
|
unsigned FirstOpc = FirstLdSt.getOpcode();
|
|
|
|
unsigned SecondOpc = SecondLdSt.getOpcode();
|
2016-03-19 03:21:02 +08:00
|
|
|
if (!canPairLdStOpc(FirstOpc, SecondOpc))
|
2014-03-29 18:18:08 +08:00
|
|
|
return false;
|
2016-03-19 03:21:02 +08:00
|
|
|
|
|
|
|
// Can't merge volatiles or load/stores that have a hint to avoid pair
|
|
|
|
// formation, for example.
|
|
|
|
if (!isCandidateToMergeOrPair(FirstLdSt) ||
|
|
|
|
!isCandidateToMergeOrPair(SecondLdSt))
|
2014-03-29 18:18:08 +08:00
|
|
|
return false;
|
2016-03-19 03:21:02 +08:00
|
|
|
|
|
|
|
// isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
|
2016-06-30 08:01:54 +08:00
|
|
|
int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
|
2016-03-19 03:21:02 +08:00
|
|
|
if (isUnscaledLdSt(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
|
|
|
|
return false;
|
|
|
|
|
2016-06-30 08:01:54 +08:00
|
|
|
int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
|
2016-03-19 03:21:02 +08:00
|
|
|
if (isUnscaledLdSt(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Pairwise instructions have a 7-bit signed offset field.
|
|
|
|
if (Offset1 > 63 || Offset1 < -64)
|
|
|
|
return false;
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
// The caller should already have ordered First/SecondLdSt by offset.
|
2018-11-28 20:00:28 +08:00
|
|
|
// Note: except for non-equal frame index bases
|
|
|
|
if (BaseOp1.isFI()) {
|
2018-11-30 04:03:19 +08:00
|
|
|
assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 >= Offset2) &&
|
|
|
|
"Caller should have ordered offsets.");
|
|
|
|
|
2018-11-28 20:00:28 +08:00
|
|
|
const MachineFrameInfo &MFI =
|
|
|
|
FirstLdSt.getParent()->getParent()->getFrameInfo();
|
|
|
|
return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
|
|
|
|
BaseOp2.getIndex(), Offset2, SecondOpc);
|
|
|
|
}
|
|
|
|
|
2018-11-30 04:03:19 +08:00
|
|
|
assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
|
|
|
|
"Caller should have ordered offsets.");
|
|
|
|
|
2016-03-19 03:21:02 +08:00
|
|
|
return Offset1 + 1 == Offset2;
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
|
|
|
|
unsigned Reg, unsigned SubIdx,
|
|
|
|
unsigned State,
|
|
|
|
const TargetRegisterInfo *TRI) {
|
|
|
|
if (!SubIdx)
|
|
|
|
return MIB.addReg(Reg, State);
|
|
|
|
|
|
|
|
if (TargetRegisterInfo::isPhysicalRegister(Reg))
|
|
|
|
return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
|
|
|
|
return MIB.addReg(Reg, State, SubIdx);
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
|
|
|
|
unsigned NumRegs) {
|
|
|
|
// We really want the positive remainder mod 32 here, that happens to be
|
|
|
|
// easily obtainable with a mask.
|
|
|
|
return ((DestReg - SrcReg) & 0x1f) < NumRegs;
|
|
|
|
}
|
|
|
|
|
2017-07-28 11:21:58 +08:00
|
|
|
void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB,
|
|
|
|
MachineBasicBlock::iterator I,
|
|
|
|
const DebugLoc &DL, unsigned DestReg,
|
|
|
|
unsigned SrcReg, bool KillSrc,
|
|
|
|
unsigned Opcode,
|
|
|
|
ArrayRef<unsigned> Indices) const {
|
|
|
|
assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
|
2015-03-19 04:37:30 +08:00
|
|
|
const TargetRegisterInfo *TRI = &getRegisterInfo();
|
2014-03-29 18:18:08 +08:00
|
|
|
uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
|
|
|
|
uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
|
|
|
|
unsigned NumRegs = Indices.size();
|
|
|
|
|
|
|
|
int SubReg = 0, End = NumRegs, Incr = 1;
|
|
|
|
if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
|
|
|
|
SubReg = NumRegs - 1;
|
|
|
|
End = -1;
|
|
|
|
Incr = -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (; SubReg != End; SubReg += Incr) {
|
2015-04-16 19:37:40 +08:00
|
|
|
const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
|
2014-03-29 18:18:08 +08:00
|
|
|
AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
|
|
|
|
AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
|
|
|
|
AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-02-07 18:35:34 +08:00
|
|
|
void AArch64InstrInfo::copyGPRRegTuple(MachineBasicBlock &MBB,
|
|
|
|
MachineBasicBlock::iterator I,
|
|
|
|
DebugLoc DL, unsigned DestReg,
|
|
|
|
unsigned SrcReg, bool KillSrc,
|
|
|
|
unsigned Opcode, unsigned ZeroReg,
|
|
|
|
llvm::ArrayRef<unsigned> Indices) const {
|
|
|
|
const TargetRegisterInfo *TRI = &getRegisterInfo();
|
|
|
|
unsigned NumRegs = Indices.size();
|
|
|
|
|
|
|
|
#ifndef NDEBUG
|
|
|
|
uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
|
|
|
|
uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
|
|
|
|
assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
|
|
|
|
"GPR reg sequences should not be able to overlap");
|
|
|
|
#endif
|
|
|
|
|
|
|
|
for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
|
|
|
|
const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
|
|
|
|
AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
|
|
|
|
MIB.addReg(ZeroReg);
|
|
|
|
AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
|
|
|
|
MIB.addImm(0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
2016-06-12 23:39:02 +08:00
|
|
|
MachineBasicBlock::iterator I,
|
|
|
|
const DebugLoc &DL, unsigned DestReg,
|
|
|
|
unsigned SrcReg, bool KillSrc) const {
|
2014-05-24 20:50:23 +08:00
|
|
|
if (AArch64::GPR32spRegClass.contains(DestReg) &&
|
|
|
|
(AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
|
2015-03-19 04:37:30 +08:00
|
|
|
const TargetRegisterInfo *TRI = &getRegisterInfo();
|
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
|
2014-03-29 18:18:08 +08:00
|
|
|
// If either operand is WSP, expand to ADD #0.
|
|
|
|
if (Subtarget.hasZeroCycleRegMove()) {
|
|
|
|
// Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
|
2014-05-24 20:50:23 +08:00
|
|
|
unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
|
|
|
|
&AArch64::GPR64spRegClass);
|
|
|
|
unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
|
|
|
|
&AArch64::GPR64spRegClass);
|
2014-03-29 18:18:08 +08:00
|
|
|
// This instruction is reading and writing X registers. This may upset
|
|
|
|
// the register scavenger and machine verifier, so we need to indicate
|
|
|
|
// that we are reading an undefined value from SrcRegX, but a proper
|
|
|
|
// value from SrcReg.
|
2014-05-24 20:50:23 +08:00
|
|
|
BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
|
2014-03-29 18:18:08 +08:00
|
|
|
.addReg(SrcRegX, RegState::Undef)
|
|
|
|
.addImm(0)
|
2014-05-24 20:50:23 +08:00
|
|
|
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
|
2014-03-29 18:18:08 +08:00
|
|
|
.addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
|
|
|
|
} else {
|
2014-05-24 20:50:23 +08:00
|
|
|
BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
|
2014-03-29 18:18:08 +08:00
|
|
|
.addReg(SrcReg, getKillRegState(KillSrc))
|
|
|
|
.addImm(0)
|
2014-05-24 20:50:23 +08:00
|
|
|
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
2018-09-29 03:05:09 +08:00
|
|
|
} else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) {
|
2017-07-28 11:21:58 +08:00
|
|
|
BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
|
|
|
|
.addImm(0)
|
|
|
|
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
|
2014-03-29 18:18:08 +08:00
|
|
|
} else {
|
|
|
|
if (Subtarget.hasZeroCycleRegMove()) {
|
|
|
|
// Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
|
2014-05-24 20:50:23 +08:00
|
|
|
unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
|
|
|
|
&AArch64::GPR64spRegClass);
|
|
|
|
unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
|
|
|
|
&AArch64::GPR64spRegClass);
|
2014-03-29 18:18:08 +08:00
|
|
|
// This instruction is reading and writing X registers. This may upset
|
|
|
|
// the register scavenger and machine verifier, so we need to indicate
|
|
|
|
// that we are reading an undefined value from SrcRegX, but a proper
|
|
|
|
// value from SrcReg.
|
2014-05-24 20:50:23 +08:00
|
|
|
BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
|
|
|
|
.addReg(AArch64::XZR)
|
2014-03-29 18:18:08 +08:00
|
|
|
.addReg(SrcRegX, RegState::Undef)
|
|
|
|
.addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
|
|
|
|
} else {
|
|
|
|
// Otherwise, expand to ORR WZR.
|
2014-05-24 20:50:23 +08:00
|
|
|
BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
|
|
|
|
.addReg(AArch64::WZR)
|
2014-03-29 18:18:08 +08:00
|
|
|
.addReg(SrcReg, getKillRegState(KillSrc));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
if (AArch64::GPR64spRegClass.contains(DestReg) &&
|
|
|
|
(AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
|
|
|
|
if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
|
2014-03-29 18:18:08 +08:00
|
|
|
// If either operand is SP, expand to ADD #0.
|
2014-05-24 20:50:23 +08:00
|
|
|
BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
|
2014-03-29 18:18:08 +08:00
|
|
|
.addReg(SrcReg, getKillRegState(KillSrc))
|
|
|
|
.addImm(0)
|
2014-05-24 20:50:23 +08:00
|
|
|
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
|
2018-09-29 03:05:09 +08:00
|
|
|
} else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) {
|
2017-07-28 11:21:58 +08:00
|
|
|
BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
|
|
|
|
.addImm(0)
|
|
|
|
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
|
2014-03-29 18:18:08 +08:00
|
|
|
} else {
|
|
|
|
// Otherwise, expand to ORR XZR.
|
2014-05-24 20:50:23 +08:00
|
|
|
BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
|
|
|
|
.addReg(AArch64::XZR)
|
2014-03-29 18:18:08 +08:00
|
|
|
.addReg(SrcReg, getKillRegState(KillSrc));
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Copy a DDDD register quad by copying the individual sub-registers.
|
2014-05-24 20:50:23 +08:00
|
|
|
if (AArch64::DDDDRegClass.contains(DestReg) &&
|
|
|
|
AArch64::DDDDRegClass.contains(SrcReg)) {
|
2017-07-28 11:21:58 +08:00
|
|
|
static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
|
|
|
|
AArch64::dsub2, AArch64::dsub3};
|
2014-05-24 20:50:23 +08:00
|
|
|
copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
|
2014-03-29 18:18:08 +08:00
|
|
|
Indices);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Copy a DDD register triple by copying the individual sub-registers.
|
2014-05-24 20:50:23 +08:00
|
|
|
if (AArch64::DDDRegClass.contains(DestReg) &&
|
|
|
|
AArch64::DDDRegClass.contains(SrcReg)) {
|
2017-07-28 11:21:58 +08:00
|
|
|
static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
|
|
|
|
AArch64::dsub2};
|
2014-05-24 20:50:23 +08:00
|
|
|
copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
|
2014-03-29 18:18:08 +08:00
|
|
|
Indices);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Copy a DD register pair by copying the individual sub-registers.
|
2014-05-24 20:50:23 +08:00
|
|
|
if (AArch64::DDRegClass.contains(DestReg) &&
|
|
|
|
AArch64::DDRegClass.contains(SrcReg)) {
|
2017-07-28 11:21:58 +08:00
|
|
|
static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
|
2014-05-24 20:50:23 +08:00
|
|
|
copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
|
2014-03-29 18:18:08 +08:00
|
|
|
Indices);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Copy a QQQQ register quad by copying the individual sub-registers.
|
2014-05-24 20:50:23 +08:00
|
|
|
if (AArch64::QQQQRegClass.contains(DestReg) &&
|
|
|
|
AArch64::QQQQRegClass.contains(SrcReg)) {
|
2017-07-28 11:21:58 +08:00
|
|
|
static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
|
|
|
|
AArch64::qsub2, AArch64::qsub3};
|
2014-05-24 20:50:23 +08:00
|
|
|
copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
|
2014-03-29 18:18:08 +08:00
|
|
|
Indices);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Copy a QQQ register triple by copying the individual sub-registers.
|
2014-05-24 20:50:23 +08:00
|
|
|
if (AArch64::QQQRegClass.contains(DestReg) &&
|
|
|
|
AArch64::QQQRegClass.contains(SrcReg)) {
|
2017-07-28 11:21:58 +08:00
|
|
|
static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
|
|
|
|
AArch64::qsub2};
|
2014-05-24 20:50:23 +08:00
|
|
|
copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
|
2014-03-29 18:18:08 +08:00
|
|
|
Indices);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Copy a QQ register pair by copying the individual sub-registers.
|
2014-05-24 20:50:23 +08:00
|
|
|
if (AArch64::QQRegClass.contains(DestReg) &&
|
|
|
|
AArch64::QQRegClass.contains(SrcReg)) {
|
2017-07-28 11:21:58 +08:00
|
|
|
static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
|
2014-05-24 20:50:23 +08:00
|
|
|
copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
|
2014-03-29 18:18:08 +08:00
|
|
|
Indices);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2019-02-07 18:35:34 +08:00
|
|
|
if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
|
|
|
|
AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
|
|
|
|
static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
|
|
|
|
copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
|
|
|
|
AArch64::XZR, Indices);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
|
|
|
|
AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
|
|
|
|
static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
|
|
|
|
copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
|
|
|
|
AArch64::WZR, Indices);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
if (AArch64::FPR128RegClass.contains(DestReg) &&
|
|
|
|
AArch64::FPR128RegClass.contains(SrcReg)) {
|
2017-07-28 11:21:58 +08:00
|
|
|
if (Subtarget.hasNEON()) {
|
2014-05-24 20:50:23 +08:00
|
|
|
BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
|
|
|
|
.addReg(SrcReg)
|
|
|
|
.addReg(SrcReg, getKillRegState(KillSrc));
|
2014-04-23 14:22:48 +08:00
|
|
|
} else {
|
2014-05-24 20:50:23 +08:00
|
|
|
BuildMI(MBB, I, DL, get(AArch64::STRQpre))
|
2017-07-28 11:21:58 +08:00
|
|
|
.addReg(AArch64::SP, RegState::Define)
|
|
|
|
.addReg(SrcReg, getKillRegState(KillSrc))
|
|
|
|
.addReg(AArch64::SP)
|
|
|
|
.addImm(-16);
|
2014-05-24 20:50:23 +08:00
|
|
|
BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
|
2017-07-28 11:21:58 +08:00
|
|
|
.addReg(AArch64::SP, RegState::Define)
|
|
|
|
.addReg(DestReg, RegState::Define)
|
|
|
|
.addReg(AArch64::SP)
|
|
|
|
.addImm(16);
|
2014-04-23 14:22:48 +08:00
|
|
|
}
|
2014-03-29 18:18:08 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
if (AArch64::FPR64RegClass.contains(DestReg) &&
|
|
|
|
AArch64::FPR64RegClass.contains(SrcReg)) {
|
2017-07-28 11:21:58 +08:00
|
|
|
if (Subtarget.hasNEON()) {
|
2015-03-19 04:37:30 +08:00
|
|
|
DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
|
|
|
|
&AArch64::FPR128RegClass);
|
|
|
|
SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
|
|
|
|
&AArch64::FPR128RegClass);
|
2014-05-24 20:50:23 +08:00
|
|
|
BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
|
|
|
|
.addReg(SrcReg)
|
|
|
|
.addReg(SrcReg, getKillRegState(KillSrc));
|
2014-04-23 14:22:48 +08:00
|
|
|
} else {
|
2014-05-24 20:50:23 +08:00
|
|
|
BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
|
2014-04-23 14:22:48 +08:00
|
|
|
.addReg(SrcReg, getKillRegState(KillSrc));
|
|
|
|
}
|
2014-03-29 18:18:08 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
if (AArch64::FPR32RegClass.contains(DestReg) &&
|
|
|
|
AArch64::FPR32RegClass.contains(SrcReg)) {
|
2017-07-28 11:21:58 +08:00
|
|
|
if (Subtarget.hasNEON()) {
|
2015-03-19 04:37:30 +08:00
|
|
|
DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
|
|
|
|
&AArch64::FPR128RegClass);
|
|
|
|
SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
|
|
|
|
&AArch64::FPR128RegClass);
|
2014-05-24 20:50:23 +08:00
|
|
|
BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
|
|
|
|
.addReg(SrcReg)
|
|
|
|
.addReg(SrcReg, getKillRegState(KillSrc));
|
2014-04-23 14:22:48 +08:00
|
|
|
} else {
|
2014-05-24 20:50:23 +08:00
|
|
|
BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
|
2014-04-23 14:22:48 +08:00
|
|
|
.addReg(SrcReg, getKillRegState(KillSrc));
|
|
|
|
}
|
2014-03-29 18:18:08 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
if (AArch64::FPR16RegClass.contains(DestReg) &&
|
|
|
|
AArch64::FPR16RegClass.contains(SrcReg)) {
|
2017-07-28 11:21:58 +08:00
|
|
|
if (Subtarget.hasNEON()) {
|
2015-03-19 04:37:30 +08:00
|
|
|
DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
|
|
|
|
&AArch64::FPR128RegClass);
|
|
|
|
SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
|
|
|
|
&AArch64::FPR128RegClass);
|
2014-05-24 20:50:23 +08:00
|
|
|
BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
|
|
|
|
.addReg(SrcReg)
|
|
|
|
.addReg(SrcReg, getKillRegState(KillSrc));
|
2014-04-23 14:22:48 +08:00
|
|
|
} else {
|
2015-03-19 04:37:30 +08:00
|
|
|
DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
|
|
|
|
&AArch64::FPR32RegClass);
|
|
|
|
SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
|
|
|
|
&AArch64::FPR32RegClass);
|
2014-05-24 20:50:23 +08:00
|
|
|
BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
|
2014-04-23 14:22:48 +08:00
|
|
|
.addReg(SrcReg, getKillRegState(KillSrc));
|
|
|
|
}
|
2014-03-29 18:18:08 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
if (AArch64::FPR8RegClass.contains(DestReg) &&
|
|
|
|
AArch64::FPR8RegClass.contains(SrcReg)) {
|
2017-07-28 11:21:58 +08:00
|
|
|
if (Subtarget.hasNEON()) {
|
2015-03-19 04:37:30 +08:00
|
|
|
DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
|
2014-05-24 20:50:23 +08:00
|
|
|
&AArch64::FPR128RegClass);
|
2015-03-19 04:37:30 +08:00
|
|
|
SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
|
|
|
|
&AArch64::FPR128RegClass);
|
2014-05-24 20:50:23 +08:00
|
|
|
BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
|
|
|
|
.addReg(SrcReg)
|
|
|
|
.addReg(SrcReg, getKillRegState(KillSrc));
|
2014-04-23 14:22:48 +08:00
|
|
|
} else {
|
2015-03-19 04:37:30 +08:00
|
|
|
DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
|
|
|
|
&AArch64::FPR32RegClass);
|
|
|
|
SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
|
|
|
|
&AArch64::FPR32RegClass);
|
2014-05-24 20:50:23 +08:00
|
|
|
BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
|
2014-04-23 14:22:48 +08:00
|
|
|
.addReg(SrcReg, getKillRegState(KillSrc));
|
|
|
|
}
|
2014-03-29 18:18:08 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Copies between GPR64 and FPR64.
|
2014-05-24 20:50:23 +08:00
|
|
|
if (AArch64::FPR64RegClass.contains(DestReg) &&
|
|
|
|
AArch64::GPR64RegClass.contains(SrcReg)) {
|
|
|
|
BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
|
2014-03-29 18:18:08 +08:00
|
|
|
.addReg(SrcReg, getKillRegState(KillSrc));
|
|
|
|
return;
|
|
|
|
}
|
2014-05-24 20:50:23 +08:00
|
|
|
if (AArch64::GPR64RegClass.contains(DestReg) &&
|
|
|
|
AArch64::FPR64RegClass.contains(SrcReg)) {
|
|
|
|
BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
|
2014-03-29 18:18:08 +08:00
|
|
|
.addReg(SrcReg, getKillRegState(KillSrc));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
// Copies between GPR32 and FPR32.
|
2014-05-24 20:50:23 +08:00
|
|
|
if (AArch64::FPR32RegClass.contains(DestReg) &&
|
|
|
|
AArch64::GPR32RegClass.contains(SrcReg)) {
|
|
|
|
BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
|
2014-03-29 18:18:08 +08:00
|
|
|
.addReg(SrcReg, getKillRegState(KillSrc));
|
|
|
|
return;
|
|
|
|
}
|
2014-05-24 20:50:23 +08:00
|
|
|
if (AArch64::GPR32RegClass.contains(DestReg) &&
|
|
|
|
AArch64::FPR32RegClass.contains(SrcReg)) {
|
|
|
|
BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
|
2014-03-29 18:18:08 +08:00
|
|
|
.addReg(SrcReg, getKillRegState(KillSrc));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2014-05-27 20:16:02 +08:00
|
|
|
if (DestReg == AArch64::NZCV) {
|
|
|
|
assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
|
|
|
|
BuildMI(MBB, I, DL, get(AArch64::MSR))
|
2017-07-28 11:21:58 +08:00
|
|
|
.addImm(AArch64SysReg::NZCV)
|
|
|
|
.addReg(SrcReg, getKillRegState(KillSrc))
|
|
|
|
.addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
|
2014-05-27 20:16:02 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (SrcReg == AArch64::NZCV) {
|
|
|
|
assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
|
2016-04-23 02:46:17 +08:00
|
|
|
BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
|
2017-07-28 11:21:58 +08:00
|
|
|
.addImm(AArch64SysReg::NZCV)
|
|
|
|
.addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
|
2014-05-27 20:16:02 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
llvm_unreachable("unimplemented reg-to-reg copy");
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
|
|
|
|
2018-10-05 01:02:53 +08:00
|
|
|
static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI,
|
|
|
|
MachineBasicBlock &MBB,
|
|
|
|
MachineBasicBlock::iterator InsertBefore,
|
|
|
|
const MCInstrDesc &MCID,
|
|
|
|
unsigned SrcReg, bool IsKill,
|
|
|
|
unsigned SubIdx0, unsigned SubIdx1, int FI,
|
|
|
|
MachineMemOperand *MMO) {
|
|
|
|
unsigned SrcReg0 = SrcReg;
|
|
|
|
unsigned SrcReg1 = SrcReg;
|
|
|
|
if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
|
|
|
|
SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
|
|
|
|
SubIdx0 = 0;
|
|
|
|
SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
|
|
|
|
SubIdx1 = 0;
|
|
|
|
}
|
|
|
|
BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
|
|
|
|
.addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
|
|
|
|
.addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
|
|
|
|
.addFrameIndex(FI)
|
|
|
|
.addImm(0)
|
|
|
|
.addMemOperand(MMO);
|
|
|
|
}
|
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
void AArch64InstrInfo::storeRegToStackSlot(
|
|
|
|
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
|
|
|
|
bool isKill, int FI, const TargetRegisterClass *RC,
|
|
|
|
const TargetRegisterInfo *TRI) const {
|
2014-03-29 18:18:08 +08:00
|
|
|
MachineFunction &MF = *MBB.getParent();
|
2016-07-29 02:40:00 +08:00
|
|
|
MachineFrameInfo &MFI = MF.getFrameInfo();
|
2014-03-29 18:18:08 +08:00
|
|
|
unsigned Align = MFI.getObjectAlignment(FI);
|
|
|
|
|
2015-08-12 07:09:45 +08:00
|
|
|
MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
|
2014-03-29 18:18:08 +08:00
|
|
|
MachineMemOperand *MMO = MF.getMachineMemOperand(
|
|
|
|
PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
|
|
|
|
unsigned Opc = 0;
|
|
|
|
bool Offset = true;
|
2017-04-25 02:55:33 +08:00
|
|
|
switch (TRI->getSpillSize(*RC)) {
|
2014-03-29 18:18:08 +08:00
|
|
|
case 1:
|
2014-05-24 20:50:23 +08:00
|
|
|
if (AArch64::FPR8RegClass.hasSubClassEq(RC))
|
|
|
|
Opc = AArch64::STRBui;
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
|
|
|
case 2:
|
2014-05-24 20:50:23 +08:00
|
|
|
if (AArch64::FPR16RegClass.hasSubClassEq(RC))
|
|
|
|
Opc = AArch64::STRHui;
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
|
|
|
case 4:
|
2014-05-24 20:50:23 +08:00
|
|
|
if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
|
|
|
|
Opc = AArch64::STRWui;
|
2014-03-29 18:18:08 +08:00
|
|
|
if (TargetRegisterInfo::isVirtualRegister(SrcReg))
|
2014-05-24 20:50:23 +08:00
|
|
|
MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
|
2014-03-29 18:18:08 +08:00
|
|
|
else
|
2014-05-24 20:50:23 +08:00
|
|
|
assert(SrcReg != AArch64::WSP);
|
|
|
|
} else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
|
|
|
|
Opc = AArch64::STRSui;
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
|
|
|
case 8:
|
2014-05-24 20:50:23 +08:00
|
|
|
if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
|
|
|
|
Opc = AArch64::STRXui;
|
2014-03-29 18:18:08 +08:00
|
|
|
if (TargetRegisterInfo::isVirtualRegister(SrcReg))
|
2014-05-24 20:50:23 +08:00
|
|
|
MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
|
2014-03-29 18:18:08 +08:00
|
|
|
else
|
2014-05-24 20:50:23 +08:00
|
|
|
assert(SrcReg != AArch64::SP);
|
2018-10-05 01:02:53 +08:00
|
|
|
} else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
|
2014-05-24 20:50:23 +08:00
|
|
|
Opc = AArch64::STRDui;
|
2018-10-05 01:02:53 +08:00
|
|
|
} else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
|
|
|
|
storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI,
|
|
|
|
get(AArch64::STPWi), SrcReg, isKill,
|
|
|
|
AArch64::sube32, AArch64::subo32, FI, MMO);
|
|
|
|
return;
|
|
|
|
}
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
|
|
|
case 16:
|
2014-05-24 20:50:23 +08:00
|
|
|
if (AArch64::FPR128RegClass.hasSubClassEq(RC))
|
|
|
|
Opc = AArch64::STRQui;
|
|
|
|
else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
|
2017-07-28 11:21:58 +08:00
|
|
|
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
|
2016-02-19 06:09:30 +08:00
|
|
|
Opc = AArch64::ST1Twov1d;
|
|
|
|
Offset = false;
|
2018-01-29 17:18:37 +08:00
|
|
|
} else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
|
2018-10-05 01:02:53 +08:00
|
|
|
storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI,
|
|
|
|
get(AArch64::STPXi), SrcReg, isKill,
|
|
|
|
AArch64::sube64, AArch64::subo64, FI, MMO);
|
2018-01-29 17:18:37 +08:00
|
|
|
return;
|
2014-04-23 14:22:48 +08:00
|
|
|
}
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
|
|
|
case 24:
|
2014-05-24 20:50:23 +08:00
|
|
|
if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
|
2017-07-28 11:21:58 +08:00
|
|
|
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
|
2016-02-19 06:09:30 +08:00
|
|
|
Opc = AArch64::ST1Threev1d;
|
|
|
|
Offset = false;
|
2014-04-23 14:22:48 +08:00
|
|
|
}
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
|
|
|
case 32:
|
2014-05-24 20:50:23 +08:00
|
|
|
if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
|
2017-07-28 11:21:58 +08:00
|
|
|
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
|
2016-02-19 06:09:30 +08:00
|
|
|
Opc = AArch64::ST1Fourv1d;
|
|
|
|
Offset = false;
|
2014-05-24 20:50:23 +08:00
|
|
|
} else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
|
2017-07-28 11:21:58 +08:00
|
|
|
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
|
2016-02-19 06:09:30 +08:00
|
|
|
Opc = AArch64::ST1Twov2d;
|
|
|
|
Offset = false;
|
2014-04-23 14:22:48 +08:00
|
|
|
}
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
|
|
|
case 48:
|
2014-05-24 20:50:23 +08:00
|
|
|
if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
|
2017-07-28 11:21:58 +08:00
|
|
|
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
|
2016-02-19 06:09:30 +08:00
|
|
|
Opc = AArch64::ST1Threev2d;
|
|
|
|
Offset = false;
|
2014-04-23 14:22:48 +08:00
|
|
|
}
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
|
|
|
case 64:
|
2014-05-24 20:50:23 +08:00
|
|
|
if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
|
2017-07-28 11:21:58 +08:00
|
|
|
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
|
2016-02-19 06:09:30 +08:00
|
|
|
Opc = AArch64::ST1Fourv2d;
|
|
|
|
Offset = false;
|
2014-04-23 14:22:48 +08:00
|
|
|
}
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
assert(Opc && "Unknown register class");
|
|
|
|
|
2018-10-06 06:00:13 +08:00
|
|
|
const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
|
2017-07-28 11:21:58 +08:00
|
|
|
.addReg(SrcReg, getKillRegState(isKill))
|
|
|
|
.addFrameIndex(FI);
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
if (Offset)
|
|
|
|
MI.addImm(0);
|
|
|
|
MI.addMemOperand(MMO);
|
|
|
|
}
|
|
|
|
|
2018-10-05 01:02:53 +08:00
|
|
|
static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI,
|
|
|
|
MachineBasicBlock &MBB,
|
|
|
|
MachineBasicBlock::iterator InsertBefore,
|
|
|
|
const MCInstrDesc &MCID,
|
|
|
|
unsigned DestReg, unsigned SubIdx0,
|
|
|
|
unsigned SubIdx1, int FI,
|
|
|
|
MachineMemOperand *MMO) {
|
|
|
|
unsigned DestReg0 = DestReg;
|
|
|
|
unsigned DestReg1 = DestReg;
|
|
|
|
bool IsUndef = true;
|
|
|
|
if (TargetRegisterInfo::isPhysicalRegister(DestReg)) {
|
|
|
|
DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
|
|
|
|
SubIdx0 = 0;
|
|
|
|
DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
|
|
|
|
SubIdx1 = 0;
|
|
|
|
IsUndef = false;
|
|
|
|
}
|
|
|
|
BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
|
|
|
|
.addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
|
|
|
|
.addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
|
|
|
|
.addFrameIndex(FI)
|
|
|
|
.addImm(0)
|
|
|
|
.addMemOperand(MMO);
|
|
|
|
}
|
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
void AArch64InstrInfo::loadRegFromStackSlot(
|
|
|
|
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
|
|
|
|
int FI, const TargetRegisterClass *RC,
|
|
|
|
const TargetRegisterInfo *TRI) const {
|
2014-03-29 18:18:08 +08:00
|
|
|
MachineFunction &MF = *MBB.getParent();
|
2016-07-29 02:40:00 +08:00
|
|
|
MachineFrameInfo &MFI = MF.getFrameInfo();
|
2014-03-29 18:18:08 +08:00
|
|
|
unsigned Align = MFI.getObjectAlignment(FI);
|
2015-08-12 07:09:45 +08:00
|
|
|
MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
|
2014-03-29 18:18:08 +08:00
|
|
|
MachineMemOperand *MMO = MF.getMachineMemOperand(
|
|
|
|
PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
|
|
|
|
|
|
|
|
unsigned Opc = 0;
|
|
|
|
bool Offset = true;
|
2017-04-25 02:55:33 +08:00
|
|
|
switch (TRI->getSpillSize(*RC)) {
|
2014-03-29 18:18:08 +08:00
|
|
|
case 1:
|
2014-05-24 20:50:23 +08:00
|
|
|
if (AArch64::FPR8RegClass.hasSubClassEq(RC))
|
|
|
|
Opc = AArch64::LDRBui;
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
|
|
|
case 2:
|
2014-05-24 20:50:23 +08:00
|
|
|
if (AArch64::FPR16RegClass.hasSubClassEq(RC))
|
|
|
|
Opc = AArch64::LDRHui;
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
|
|
|
case 4:
|
2014-05-24 20:50:23 +08:00
|
|
|
if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
|
|
|
|
Opc = AArch64::LDRWui;
|
2014-03-29 18:18:08 +08:00
|
|
|
if (TargetRegisterInfo::isVirtualRegister(DestReg))
|
2014-05-24 20:50:23 +08:00
|
|
|
MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
|
2014-03-29 18:18:08 +08:00
|
|
|
else
|
2014-05-24 20:50:23 +08:00
|
|
|
assert(DestReg != AArch64::WSP);
|
|
|
|
} else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
|
|
|
|
Opc = AArch64::LDRSui;
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
|
|
|
case 8:
|
2014-05-24 20:50:23 +08:00
|
|
|
if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
|
|
|
|
Opc = AArch64::LDRXui;
|
2014-03-29 18:18:08 +08:00
|
|
|
if (TargetRegisterInfo::isVirtualRegister(DestReg))
|
2014-05-24 20:50:23 +08:00
|
|
|
MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
|
2014-03-29 18:18:08 +08:00
|
|
|
else
|
2014-05-24 20:50:23 +08:00
|
|
|
assert(DestReg != AArch64::SP);
|
2018-10-05 01:02:53 +08:00
|
|
|
} else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
|
2014-05-24 20:50:23 +08:00
|
|
|
Opc = AArch64::LDRDui;
|
2018-10-05 01:02:53 +08:00
|
|
|
} else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
|
|
|
|
loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI,
|
|
|
|
get(AArch64::LDPWi), DestReg, AArch64::sube32,
|
|
|
|
AArch64::subo32, FI, MMO);
|
|
|
|
return;
|
|
|
|
}
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
|
|
|
case 16:
|
2014-05-24 20:50:23 +08:00
|
|
|
if (AArch64::FPR128RegClass.hasSubClassEq(RC))
|
|
|
|
Opc = AArch64::LDRQui;
|
|
|
|
else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
|
2017-07-28 11:21:58 +08:00
|
|
|
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
|
2016-02-19 06:09:30 +08:00
|
|
|
Opc = AArch64::LD1Twov1d;
|
|
|
|
Offset = false;
|
2018-01-29 17:18:37 +08:00
|
|
|
} else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
|
2018-10-05 01:02:53 +08:00
|
|
|
loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI,
|
|
|
|
get(AArch64::LDPXi), DestReg, AArch64::sube64,
|
|
|
|
AArch64::subo64, FI, MMO);
|
2018-01-29 17:18:37 +08:00
|
|
|
return;
|
2014-04-23 14:22:48 +08:00
|
|
|
}
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
|
|
|
case 24:
|
2014-05-24 20:50:23 +08:00
|
|
|
if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
|
2017-07-28 11:21:58 +08:00
|
|
|
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
|
2016-02-19 06:09:30 +08:00
|
|
|
Opc = AArch64::LD1Threev1d;
|
|
|
|
Offset = false;
|
2014-04-23 14:22:48 +08:00
|
|
|
}
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
|
|
|
case 32:
|
2014-05-24 20:50:23 +08:00
|
|
|
if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
|
2017-07-28 11:21:58 +08:00
|
|
|
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
|
2016-02-19 06:09:30 +08:00
|
|
|
Opc = AArch64::LD1Fourv1d;
|
|
|
|
Offset = false;
|
2014-05-24 20:50:23 +08:00
|
|
|
} else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
|
2017-07-28 11:21:58 +08:00
|
|
|
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
|
2016-02-19 06:09:30 +08:00
|
|
|
Opc = AArch64::LD1Twov2d;
|
|
|
|
Offset = false;
|
2014-04-23 14:22:48 +08:00
|
|
|
}
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
|
|
|
case 48:
|
2014-05-24 20:50:23 +08:00
|
|
|
if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
|
2017-07-28 11:21:58 +08:00
|
|
|
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
|
2016-02-19 06:09:30 +08:00
|
|
|
Opc = AArch64::LD1Threev2d;
|
|
|
|
Offset = false;
|
2014-04-23 14:22:48 +08:00
|
|
|
}
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
|
|
|
case 64:
|
2014-05-24 20:50:23 +08:00
|
|
|
if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
|
2017-07-28 11:21:58 +08:00
|
|
|
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
|
2016-02-19 06:09:30 +08:00
|
|
|
Opc = AArch64::LD1Fourv2d;
|
|
|
|
Offset = false;
|
2014-04-23 14:22:48 +08:00
|
|
|
}
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
assert(Opc && "Unknown register class");
|
|
|
|
|
2018-10-06 06:00:13 +08:00
|
|
|
const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
|
2017-07-28 11:21:58 +08:00
|
|
|
.addReg(DestReg, getDefRegState(true))
|
|
|
|
.addFrameIndex(FI);
|
2014-03-29 18:18:08 +08:00
|
|
|
if (Offset)
|
|
|
|
MI.addImm(0);
|
|
|
|
MI.addMemOperand(MMO);
|
|
|
|
}
|
|
|
|
|
|
|
|
void llvm::emitFrameOffset(MachineBasicBlock &MBB,
|
2016-06-12 23:39:02 +08:00
|
|
|
MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
|
2014-03-29 18:18:08 +08:00
|
|
|
unsigned DestReg, unsigned SrcReg, int Offset,
|
2014-06-11 01:33:39 +08:00
|
|
|
const TargetInstrInfo *TII,
|
2018-10-31 17:27:01 +08:00
|
|
|
MachineInstr::MIFlag Flag, bool SetNZCV,
|
|
|
|
bool NeedsWinCFI) {
|
2014-03-29 18:18:08 +08:00
|
|
|
if (DestReg == SrcReg && Offset == 0)
|
|
|
|
return;
|
|
|
|
|
2016-05-07 00:34:59 +08:00
|
|
|
assert((DestReg != AArch64::SP || Offset % 16 == 0) &&
|
|
|
|
"SP increment/decrement not 16-byte aligned");
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
bool isSub = Offset < 0;
|
|
|
|
if (isSub)
|
|
|
|
Offset = -Offset;
|
|
|
|
|
|
|
|
// FIXME: If the offset won't fit in 24-bits, compute the offset into a
|
|
|
|
// scratch register. If DestReg is a virtual register, use it as the
|
|
|
|
// scratch register; otherwise, create a new virtual register (to be
|
|
|
|
// replaced by the scavenger at the end of PEI). That case can be optimized
|
|
|
|
// slightly if DestReg is SP which is always 16-byte aligned, so the scratch
|
|
|
|
// register can be loaded with offset%8 and the add/sub can use an extending
|
|
|
|
// instruction with LSL#3.
|
|
|
|
// Currently the function handles any offsets but generates a poor sequence
|
|
|
|
// of code.
|
|
|
|
// assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
|
|
|
|
|
|
|
|
unsigned Opc;
|
2014-04-30 21:14:14 +08:00
|
|
|
if (SetNZCV)
|
2014-05-24 20:50:23 +08:00
|
|
|
Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri;
|
2014-03-29 18:18:08 +08:00
|
|
|
else
|
2014-05-24 20:50:23 +08:00
|
|
|
Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri;
|
2014-03-29 18:18:08 +08:00
|
|
|
const unsigned MaxEncoding = 0xfff;
|
|
|
|
const unsigned ShiftSize = 12;
|
|
|
|
const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
|
|
|
|
while (((unsigned)Offset) >= (1 << ShiftSize)) {
|
|
|
|
unsigned ThisVal;
|
|
|
|
if (((unsigned)Offset) > MaxEncodableValue) {
|
|
|
|
ThisVal = MaxEncodableValue;
|
|
|
|
} else {
|
|
|
|
ThisVal = Offset & MaxEncodableValue;
|
|
|
|
}
|
|
|
|
assert((ThisVal >> ShiftSize) <= MaxEncoding &&
|
|
|
|
"Encoding cannot handle value that big");
|
|
|
|
BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
|
|
|
|
.addReg(SrcReg)
|
|
|
|
.addImm(ThisVal >> ShiftSize)
|
2014-05-24 20:50:23 +08:00
|
|
|
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize))
|
2014-03-29 18:18:08 +08:00
|
|
|
.setMIFlag(Flag);
|
|
|
|
|
2018-10-31 17:27:01 +08:00
|
|
|
if (NeedsWinCFI && SrcReg == AArch64::SP && DestReg == AArch64::SP)
|
|
|
|
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
|
|
|
|
.addImm(ThisVal)
|
|
|
|
.setMIFlag(Flag);
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
SrcReg = DestReg;
|
|
|
|
Offset -= ThisVal;
|
|
|
|
if (Offset == 0)
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
|
|
|
|
.addReg(SrcReg)
|
|
|
|
.addImm(Offset)
|
2014-05-24 20:50:23 +08:00
|
|
|
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
|
2014-03-29 18:18:08 +08:00
|
|
|
.setMIFlag(Flag);
|
2018-10-31 17:27:01 +08:00
|
|
|
|
|
|
|
if (NeedsWinCFI) {
|
|
|
|
if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
|
|
|
|
(SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
|
|
|
|
if (Offset == 0)
|
|
|
|
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).
|
|
|
|
setMIFlag(Flag);
|
|
|
|
else
|
|
|
|
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP)).
|
|
|
|
addImm(Offset).setMIFlag(Flag);
|
|
|
|
} else if (DestReg == AArch64::SP) {
|
|
|
|
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc)).
|
|
|
|
addImm(Offset).setMIFlag(Flag);
|
|
|
|
}
|
|
|
|
}
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
|
|
|
|
2015-06-09 04:09:58 +08:00
|
|
|
MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
|
2016-06-30 08:01:54 +08:00
|
|
|
MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
|
2016-05-10 16:09:37 +08:00
|
|
|
MachineBasicBlock::iterator InsertPt, int FrameIndex,
|
|
|
|
LiveIntervals *LIS) const {
|
2014-03-29 18:18:08 +08:00
|
|
|
// This is a bit of a hack. Consider this instruction:
|
|
|
|
//
|
2017-12-07 18:40:31 +08:00
|
|
|
// %0 = COPY %sp; GPR64all:%0
|
2014-03-29 18:18:08 +08:00
|
|
|
//
|
|
|
|
// We explicitly chose GPR64all for the virtual register so such a copy might
|
|
|
|
// be eliminated by RegisterCoalescer. However, that may not be possible, and
|
2017-11-30 20:12:19 +08:00
|
|
|
// %0 may even spill. We can't spill %sp, and since it is in the GPR64all
|
2014-03-29 18:18:08 +08:00
|
|
|
// register class, TargetInstrInfo::foldMemoryOperand() is going to try.
|
|
|
|
//
|
2017-11-30 20:12:19 +08:00
|
|
|
// To prevent that, we are going to constrain the %0 register class here.
|
2014-03-29 18:18:08 +08:00
|
|
|
//
|
|
|
|
// <rdar://problem/11522048>
|
|
|
|
//
|
2017-01-06 05:51:42 +08:00
|
|
|
if (MI.isFullCopy()) {
|
2016-06-30 08:01:54 +08:00
|
|
|
unsigned DstReg = MI.getOperand(0).getReg();
|
|
|
|
unsigned SrcReg = MI.getOperand(1).getReg();
|
2014-05-24 20:50:23 +08:00
|
|
|
if (SrcReg == AArch64::SP &&
|
|
|
|
TargetRegisterInfo::isVirtualRegister(DstReg)) {
|
|
|
|
MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
|
2014-04-25 13:30:21 +08:00
|
|
|
return nullptr;
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
2014-05-24 20:50:23 +08:00
|
|
|
if (DstReg == AArch64::SP &&
|
|
|
|
TargetRegisterInfo::isVirtualRegister(SrcReg)) {
|
|
|
|
MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
|
2014-04-25 13:30:21 +08:00
|
|
|
return nullptr;
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-01-06 05:51:42 +08:00
|
|
|
// Handle the case where a copy is being spilled or filled but the source
|
2016-12-02 07:43:55 +08:00
|
|
|
// and destination register class don't match. For example:
|
2016-11-30 02:28:32 +08:00
|
|
|
//
|
2017-12-07 18:40:31 +08:00
|
|
|
// %0 = COPY %xzr; GPR64common:%0
|
2016-11-30 02:28:32 +08:00
|
|
|
//
|
|
|
|
// In this case we can still safely fold away the COPY and generate the
|
|
|
|
// following spill code:
|
|
|
|
//
|
2017-12-16 00:33:45 +08:00
|
|
|
// STRXui %xzr, %stack.0
|
2016-11-30 02:28:32 +08:00
|
|
|
//
|
2016-12-02 07:43:55 +08:00
|
|
|
// This also eliminates spilled cross register class COPYs (e.g. between x and
|
|
|
|
// d regs) of the same size. For example:
|
|
|
|
//
|
2017-12-07 18:40:31 +08:00
|
|
|
// %0 = COPY %1; GPR64:%0, FPR64:%1
|
2016-12-02 07:43:55 +08:00
|
|
|
//
|
2017-01-06 05:51:42 +08:00
|
|
|
// will be filled as
|
2016-12-02 07:43:55 +08:00
|
|
|
//
|
2017-11-30 20:12:19 +08:00
|
|
|
// LDRDui %0, fi<#0>
|
2016-12-02 07:43:55 +08:00
|
|
|
//
|
|
|
|
// instead of
|
|
|
|
//
|
2017-11-30 20:12:19 +08:00
|
|
|
// LDRXui %Temp, fi<#0>
|
|
|
|
// %0 = FMOV %Temp
|
2016-12-02 07:43:55 +08:00
|
|
|
//
|
2017-01-06 05:51:42 +08:00
|
|
|
if (MI.isCopy() && Ops.size() == 1 &&
|
2016-12-02 07:43:55 +08:00
|
|
|
// Make sure we're only folding the explicit COPY defs/uses.
|
|
|
|
(Ops[0] == 0 || Ops[0] == 1)) {
|
2017-01-06 05:51:42 +08:00
|
|
|
bool IsSpill = Ops[0] == 0;
|
|
|
|
bool IsFill = !IsSpill;
|
2016-12-02 07:43:55 +08:00
|
|
|
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
|
|
|
|
const MachineRegisterInfo &MRI = MF.getRegInfo();
|
2016-11-30 02:28:32 +08:00
|
|
|
MachineBasicBlock &MBB = *MI.getParent();
|
2016-12-02 07:43:55 +08:00
|
|
|
const MachineOperand &DstMO = MI.getOperand(0);
|
2016-11-30 02:28:32 +08:00
|
|
|
const MachineOperand &SrcMO = MI.getOperand(1);
|
2016-12-02 07:43:55 +08:00
|
|
|
unsigned DstReg = DstMO.getReg();
|
2016-11-30 02:28:32 +08:00
|
|
|
unsigned SrcReg = SrcMO.getReg();
|
2017-01-06 05:51:42 +08:00
|
|
|
// This is slightly expensive to compute for physical regs since
|
|
|
|
// getMinimalPhysRegClass is slow.
|
2016-12-02 07:43:55 +08:00
|
|
|
auto getRegClass = [&](unsigned Reg) {
|
|
|
|
return TargetRegisterInfo::isVirtualRegister(Reg)
|
|
|
|
? MRI.getRegClass(Reg)
|
|
|
|
: TRI.getMinimalPhysRegClass(Reg);
|
|
|
|
};
|
2017-01-06 05:51:42 +08:00
|
|
|
|
|
|
|
if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
|
2017-04-25 02:55:33 +08:00
|
|
|
assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
|
2017-07-28 11:21:58 +08:00
|
|
|
TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
|
2017-01-06 05:51:42 +08:00
|
|
|
"Mismatched register size in non subreg COPY");
|
|
|
|
if (IsSpill)
|
2016-12-02 07:43:55 +08:00
|
|
|
storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
|
2017-01-06 05:51:42 +08:00
|
|
|
getRegClass(SrcReg), &TRI);
|
2016-12-02 07:43:55 +08:00
|
|
|
else
|
2017-01-06 05:51:42 +08:00
|
|
|
loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
|
|
|
|
getRegClass(DstReg), &TRI);
|
2016-11-30 02:28:32 +08:00
|
|
|
return &*--InsertPt;
|
|
|
|
}
|
2017-01-06 05:51:42 +08:00
|
|
|
|
|
|
|
// Handle cases like spilling def of:
|
|
|
|
//
|
2017-11-30 20:12:19 +08:00
|
|
|
// %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
|
2017-01-06 05:51:42 +08:00
|
|
|
//
|
|
|
|
// where the physical register source can be widened and stored to the full
|
|
|
|
// virtual reg destination stack slot, in this case producing:
|
|
|
|
//
|
2017-12-16 00:33:45 +08:00
|
|
|
// STRXui %xzr, %stack.0
|
2017-01-06 05:51:42 +08:00
|
|
|
//
|
|
|
|
if (IsSpill && DstMO.isUndef() &&
|
|
|
|
TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
|
|
|
|
assert(SrcMO.getSubReg() == 0 &&
|
|
|
|
"Unexpected subreg on physical register");
|
|
|
|
const TargetRegisterClass *SpillRC;
|
|
|
|
unsigned SpillSubreg;
|
|
|
|
switch (DstMO.getSubReg()) {
|
|
|
|
default:
|
|
|
|
SpillRC = nullptr;
|
|
|
|
break;
|
|
|
|
case AArch64::sub_32:
|
|
|
|
case AArch64::ssub:
|
|
|
|
if (AArch64::GPR32RegClass.contains(SrcReg)) {
|
|
|
|
SpillRC = &AArch64::GPR64RegClass;
|
|
|
|
SpillSubreg = AArch64::sub_32;
|
|
|
|
} else if (AArch64::FPR32RegClass.contains(SrcReg)) {
|
|
|
|
SpillRC = &AArch64::FPR64RegClass;
|
|
|
|
SpillSubreg = AArch64::ssub;
|
|
|
|
} else
|
|
|
|
SpillRC = nullptr;
|
|
|
|
break;
|
|
|
|
case AArch64::dsub:
|
|
|
|
if (AArch64::FPR64RegClass.contains(SrcReg)) {
|
|
|
|
SpillRC = &AArch64::FPR128RegClass;
|
|
|
|
SpillSubreg = AArch64::dsub;
|
|
|
|
} else
|
|
|
|
SpillRC = nullptr;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (SpillRC)
|
|
|
|
if (unsigned WidenedSrcReg =
|
|
|
|
TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) {
|
|
|
|
storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(),
|
|
|
|
FrameIndex, SpillRC, &TRI);
|
|
|
|
return &*--InsertPt;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Handle cases like filling use of:
|
|
|
|
//
|
2017-11-30 20:12:19 +08:00
|
|
|
// %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
|
2017-01-06 05:51:42 +08:00
|
|
|
//
|
|
|
|
// where we can load the full virtual reg source stack slot, into the subreg
|
|
|
|
// destination, in this case producing:
|
|
|
|
//
|
2017-12-16 00:33:45 +08:00
|
|
|
// LDRWui %0:sub_32<def,read-undef>, %stack.0
|
2017-01-06 05:51:42 +08:00
|
|
|
//
|
|
|
|
if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
|
|
|
|
const TargetRegisterClass *FillRC;
|
|
|
|
switch (DstMO.getSubReg()) {
|
|
|
|
default:
|
|
|
|
FillRC = nullptr;
|
|
|
|
break;
|
|
|
|
case AArch64::sub_32:
|
|
|
|
FillRC = &AArch64::GPR32RegClass;
|
|
|
|
break;
|
|
|
|
case AArch64::ssub:
|
|
|
|
FillRC = &AArch64::FPR32RegClass;
|
|
|
|
break;
|
|
|
|
case AArch64::dsub:
|
|
|
|
FillRC = &AArch64::FPR64RegClass;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (FillRC) {
|
2017-04-25 02:55:33 +08:00
|
|
|
assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
|
|
|
|
TRI.getRegSizeInBits(*FillRC) &&
|
2017-01-06 05:51:42 +08:00
|
|
|
"Mismatched regclass size on folded subreg COPY");
|
|
|
|
loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI);
|
|
|
|
MachineInstr &LoadMI = *--InsertPt;
|
|
|
|
MachineOperand &LoadDst = LoadMI.getOperand(0);
|
|
|
|
assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
|
|
|
|
LoadDst.setSubReg(DstMO.getSubReg());
|
|
|
|
LoadDst.setIsUndef();
|
|
|
|
return &LoadMI;
|
|
|
|
}
|
|
|
|
}
|
2016-11-30 02:28:32 +08:00
|
|
|
}
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
// Cannot fold.
|
2014-04-25 13:30:21 +08:00
|
|
|
return nullptr;
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
|
|
|
|
bool *OutUseUnscaledOp,
|
|
|
|
unsigned *OutUnscaledOp,
|
|
|
|
int *EmittableOffset) {
|
2014-03-29 18:18:08 +08:00
|
|
|
int Scale = 1;
|
|
|
|
bool IsSigned = false;
|
|
|
|
// The ImmIdx should be changed case by case if it is not 2.
|
|
|
|
unsigned ImmIdx = 2;
|
|
|
|
unsigned UnscaledOp = 0;
|
|
|
|
// Set output values in case of early exit.
|
|
|
|
if (EmittableOffset)
|
|
|
|
*EmittableOffset = 0;
|
|
|
|
if (OutUseUnscaledOp)
|
|
|
|
*OutUseUnscaledOp = false;
|
|
|
|
if (OutUnscaledOp)
|
|
|
|
*OutUnscaledOp = 0;
|
|
|
|
switch (MI.getOpcode()) {
|
|
|
|
default:
|
2014-06-18 13:05:13 +08:00
|
|
|
llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex");
|
2014-03-29 18:18:08 +08:00
|
|
|
// Vector spills/fills can't take an immediate offset.
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::LD1Twov2d:
|
|
|
|
case AArch64::LD1Threev2d:
|
|
|
|
case AArch64::LD1Fourv2d:
|
|
|
|
case AArch64::LD1Twov1d:
|
|
|
|
case AArch64::LD1Threev1d:
|
|
|
|
case AArch64::LD1Fourv1d:
|
|
|
|
case AArch64::ST1Twov2d:
|
|
|
|
case AArch64::ST1Threev2d:
|
|
|
|
case AArch64::ST1Fourv2d:
|
|
|
|
case AArch64::ST1Twov1d:
|
|
|
|
case AArch64::ST1Threev1d:
|
|
|
|
case AArch64::ST1Fourv1d:
|
|
|
|
return AArch64FrameOffsetCannotUpdate;
|
|
|
|
case AArch64::PRFMui:
|
2014-03-29 18:18:08 +08:00
|
|
|
Scale = 8;
|
2014-05-24 20:50:23 +08:00
|
|
|
UnscaledOp = AArch64::PRFUMi;
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::LDRXui:
|
2014-03-29 18:18:08 +08:00
|
|
|
Scale = 8;
|
2014-05-24 20:50:23 +08:00
|
|
|
UnscaledOp = AArch64::LDURXi;
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::LDRWui:
|
2014-03-29 18:18:08 +08:00
|
|
|
Scale = 4;
|
2014-05-24 20:50:23 +08:00
|
|
|
UnscaledOp = AArch64::LDURWi;
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::LDRBui:
|
2014-03-29 18:18:08 +08:00
|
|
|
Scale = 1;
|
2014-05-24 20:50:23 +08:00
|
|
|
UnscaledOp = AArch64::LDURBi;
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::LDRHui:
|
2014-03-29 18:18:08 +08:00
|
|
|
Scale = 2;
|
2014-05-24 20:50:23 +08:00
|
|
|
UnscaledOp = AArch64::LDURHi;
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::LDRSui:
|
2014-03-29 18:18:08 +08:00
|
|
|
Scale = 4;
|
2014-05-24 20:50:23 +08:00
|
|
|
UnscaledOp = AArch64::LDURSi;
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::LDRDui:
|
2014-03-29 18:18:08 +08:00
|
|
|
Scale = 8;
|
2014-05-24 20:50:23 +08:00
|
|
|
UnscaledOp = AArch64::LDURDi;
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::LDRQui:
|
2014-03-29 18:18:08 +08:00
|
|
|
Scale = 16;
|
2014-05-24 20:50:23 +08:00
|
|
|
UnscaledOp = AArch64::LDURQi;
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::LDRBBui:
|
2014-03-29 18:18:08 +08:00
|
|
|
Scale = 1;
|
2014-05-24 20:50:23 +08:00
|
|
|
UnscaledOp = AArch64::LDURBBi;
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::LDRHHui:
|
2014-03-29 18:18:08 +08:00
|
|
|
Scale = 2;
|
2014-05-24 20:50:23 +08:00
|
|
|
UnscaledOp = AArch64::LDURHHi;
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::LDRSBXui:
|
2014-03-29 18:18:08 +08:00
|
|
|
Scale = 1;
|
2014-05-24 20:50:23 +08:00
|
|
|
UnscaledOp = AArch64::LDURSBXi;
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::LDRSBWui:
|
2014-03-29 18:18:08 +08:00
|
|
|
Scale = 1;
|
2014-05-24 20:50:23 +08:00
|
|
|
UnscaledOp = AArch64::LDURSBWi;
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::LDRSHXui:
|
2014-03-29 18:18:08 +08:00
|
|
|
Scale = 2;
|
2014-05-24 20:50:23 +08:00
|
|
|
UnscaledOp = AArch64::LDURSHXi;
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::LDRSHWui:
|
2014-03-29 18:18:08 +08:00
|
|
|
Scale = 2;
|
2014-05-24 20:50:23 +08:00
|
|
|
UnscaledOp = AArch64::LDURSHWi;
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::LDRSWui:
|
2014-03-29 18:18:08 +08:00
|
|
|
Scale = 4;
|
2014-05-24 20:50:23 +08:00
|
|
|
UnscaledOp = AArch64::LDURSWi;
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::STRXui:
|
2014-03-29 18:18:08 +08:00
|
|
|
Scale = 8;
|
2014-05-24 20:50:23 +08:00
|
|
|
UnscaledOp = AArch64::STURXi;
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::STRWui:
|
2014-03-29 18:18:08 +08:00
|
|
|
Scale = 4;
|
2014-05-24 20:50:23 +08:00
|
|
|
UnscaledOp = AArch64::STURWi;
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::STRBui:
|
2014-03-29 18:18:08 +08:00
|
|
|
Scale = 1;
|
2014-05-24 20:50:23 +08:00
|
|
|
UnscaledOp = AArch64::STURBi;
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::STRHui:
|
2014-03-29 18:18:08 +08:00
|
|
|
Scale = 2;
|
2014-05-24 20:50:23 +08:00
|
|
|
UnscaledOp = AArch64::STURHi;
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::STRSui:
|
2014-03-29 18:18:08 +08:00
|
|
|
Scale = 4;
|
2014-05-24 20:50:23 +08:00
|
|
|
UnscaledOp = AArch64::STURSi;
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::STRDui:
|
2014-03-29 18:18:08 +08:00
|
|
|
Scale = 8;
|
2014-05-24 20:50:23 +08:00
|
|
|
UnscaledOp = AArch64::STURDi;
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::STRQui:
|
2014-03-29 18:18:08 +08:00
|
|
|
Scale = 16;
|
2014-05-24 20:50:23 +08:00
|
|
|
UnscaledOp = AArch64::STURQi;
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::STRBBui:
|
2014-03-29 18:18:08 +08:00
|
|
|
Scale = 1;
|
2014-05-24 20:50:23 +08:00
|
|
|
UnscaledOp = AArch64::STURBBi;
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::STRHHui:
|
2014-03-29 18:18:08 +08:00
|
|
|
Scale = 2;
|
2014-05-24 20:50:23 +08:00
|
|
|
UnscaledOp = AArch64::STURHHi;
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::LDPXi:
|
|
|
|
case AArch64::LDPDi:
|
|
|
|
case AArch64::STPXi:
|
|
|
|
case AArch64::STPDi:
|
2015-09-10 09:54:43 +08:00
|
|
|
case AArch64::LDNPXi:
|
|
|
|
case AArch64::LDNPDi:
|
|
|
|
case AArch64::STNPXi:
|
|
|
|
case AArch64::STNPDi:
|
|
|
|
ImmIdx = 3;
|
2014-03-29 18:18:08 +08:00
|
|
|
IsSigned = true;
|
|
|
|
Scale = 8;
|
|
|
|
break;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::LDPQi:
|
|
|
|
case AArch64::STPQi:
|
2015-09-10 09:54:43 +08:00
|
|
|
case AArch64::LDNPQi:
|
|
|
|
case AArch64::STNPQi:
|
|
|
|
ImmIdx = 3;
|
2014-03-29 18:18:08 +08:00
|
|
|
IsSigned = true;
|
|
|
|
Scale = 16;
|
|
|
|
break;
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::LDPWi:
|
|
|
|
case AArch64::LDPSi:
|
|
|
|
case AArch64::STPWi:
|
|
|
|
case AArch64::STPSi:
|
2015-09-10 09:54:43 +08:00
|
|
|
case AArch64::LDNPWi:
|
|
|
|
case AArch64::LDNPSi:
|
|
|
|
case AArch64::STNPWi:
|
|
|
|
case AArch64::STNPSi:
|
|
|
|
ImmIdx = 3;
|
2014-03-29 18:18:08 +08:00
|
|
|
IsSigned = true;
|
|
|
|
Scale = 4;
|
|
|
|
break;
|
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::LDURXi:
|
|
|
|
case AArch64::LDURWi:
|
|
|
|
case AArch64::LDURBi:
|
|
|
|
case AArch64::LDURHi:
|
|
|
|
case AArch64::LDURSi:
|
|
|
|
case AArch64::LDURDi:
|
|
|
|
case AArch64::LDURQi:
|
|
|
|
case AArch64::LDURHHi:
|
|
|
|
case AArch64::LDURBBi:
|
|
|
|
case AArch64::LDURSBXi:
|
|
|
|
case AArch64::LDURSBWi:
|
|
|
|
case AArch64::LDURSHXi:
|
|
|
|
case AArch64::LDURSHWi:
|
|
|
|
case AArch64::LDURSWi:
|
|
|
|
case AArch64::STURXi:
|
|
|
|
case AArch64::STURWi:
|
|
|
|
case AArch64::STURBi:
|
|
|
|
case AArch64::STURHi:
|
|
|
|
case AArch64::STURSi:
|
|
|
|
case AArch64::STURDi:
|
|
|
|
case AArch64::STURQi:
|
|
|
|
case AArch64::STURBBi:
|
|
|
|
case AArch64::STURHHi:
|
2014-03-29 18:18:08 +08:00
|
|
|
Scale = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
Offset += MI.getOperand(ImmIdx).getImm() * Scale;
|
|
|
|
|
|
|
|
bool useUnscaledOp = false;
|
|
|
|
// If the offset doesn't match the scale, we rewrite the instruction to
|
|
|
|
// use the unscaled instruction instead. Likewise, if we have a negative
|
|
|
|
// offset (and have an unscaled op to use).
|
|
|
|
if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0))
|
|
|
|
useUnscaledOp = true;
|
|
|
|
|
|
|
|
// Use an unscaled addressing mode if the instruction has a negative offset
|
|
|
|
// (or if the instruction is already using an unscaled addressing mode).
|
|
|
|
unsigned MaskBits;
|
|
|
|
if (IsSigned) {
|
|
|
|
// ldp/stp instructions.
|
|
|
|
MaskBits = 7;
|
|
|
|
Offset /= Scale;
|
|
|
|
} else if (UnscaledOp == 0 || useUnscaledOp) {
|
|
|
|
MaskBits = 9;
|
|
|
|
IsSigned = true;
|
|
|
|
Scale = 1;
|
|
|
|
} else {
|
|
|
|
MaskBits = 12;
|
|
|
|
IsSigned = false;
|
|
|
|
Offset /= Scale;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Attempt to fold address computation.
|
|
|
|
int MaxOff = (1 << (MaskBits - IsSigned)) - 1;
|
|
|
|
int MinOff = (IsSigned ? (-MaxOff - 1) : 0);
|
|
|
|
if (Offset >= MinOff && Offset <= MaxOff) {
|
|
|
|
if (EmittableOffset)
|
|
|
|
*EmittableOffset = Offset;
|
|
|
|
Offset = 0;
|
|
|
|
} else {
|
|
|
|
int NewOff = Offset < 0 ? MinOff : MaxOff;
|
|
|
|
if (EmittableOffset)
|
|
|
|
*EmittableOffset = NewOff;
|
|
|
|
Offset = (Offset - NewOff) * Scale;
|
|
|
|
}
|
|
|
|
if (OutUseUnscaledOp)
|
|
|
|
*OutUseUnscaledOp = useUnscaledOp;
|
|
|
|
if (OutUnscaledOp)
|
|
|
|
*OutUnscaledOp = UnscaledOp;
|
2014-05-24 20:50:23 +08:00
|
|
|
return AArch64FrameOffsetCanUpdate |
|
|
|
|
(Offset == 0 ? AArch64FrameOffsetIsLegal : 0);
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
|
|
|
|
unsigned FrameReg, int &Offset,
|
|
|
|
const AArch64InstrInfo *TII) {
|
2014-03-29 18:18:08 +08:00
|
|
|
unsigned Opcode = MI.getOpcode();
|
|
|
|
unsigned ImmIdx = FrameRegIdx + 1;
|
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
|
2014-03-29 18:18:08 +08:00
|
|
|
Offset += MI.getOperand(ImmIdx).getImm();
|
|
|
|
emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
|
|
|
|
MI.getOperand(0).getReg(), FrameReg, Offset, TII,
|
2014-05-24 20:50:23 +08:00
|
|
|
MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
|
2014-03-29 18:18:08 +08:00
|
|
|
MI.eraseFromParent();
|
|
|
|
Offset = 0;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
int NewOffset;
|
|
|
|
unsigned UnscaledOp;
|
|
|
|
bool UseUnscaledOp;
|
2014-05-24 20:50:23 +08:00
|
|
|
int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
|
|
|
|
&UnscaledOp, &NewOffset);
|
|
|
|
if (Status & AArch64FrameOffsetCanUpdate) {
|
|
|
|
if (Status & AArch64FrameOffsetIsLegal)
|
2014-03-29 18:18:08 +08:00
|
|
|
// Replace the FrameIndex with FrameReg.
|
|
|
|
MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
|
|
|
|
if (UseUnscaledOp)
|
|
|
|
MI.setDesc(TII->get(UnscaledOp));
|
|
|
|
|
|
|
|
MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
|
|
|
|
return Offset == 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2017-04-22 05:48:41 +08:00
|
|
|
void AArch64InstrInfo::getNoop(MCInst &NopInst) const {
|
2014-05-24 20:50:23 +08:00
|
|
|
NopInst.setOpcode(AArch64::HINT);
|
2015-05-14 02:37:00 +08:00
|
|
|
NopInst.addOperand(MCOperand::createImm(0));
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
2016-05-02 22:56:21 +08:00
|
|
|
|
|
|
|
// AArch64 supports MachineCombiner.
|
2017-07-28 11:21:58 +08:00
|
|
|
bool AArch64InstrInfo::useMachineCombiner() const { return true; }
|
2017-01-06 08:30:53 +08:00
|
|
|
|
2014-08-08 05:40:58 +08:00
|
|
|
// True when Opc sets flag
|
|
|
|
static bool isCombineInstrSettingFlag(unsigned Opc) {
|
|
|
|
switch (Opc) {
|
|
|
|
case AArch64::ADDSWrr:
|
|
|
|
case AArch64::ADDSWri:
|
|
|
|
case AArch64::ADDSXrr:
|
|
|
|
case AArch64::ADDSXri:
|
|
|
|
case AArch64::SUBSWrr:
|
|
|
|
case AArch64::SUBSXrr:
|
|
|
|
// Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
|
|
|
|
case AArch64::SUBSWri:
|
|
|
|
case AArch64::SUBSXri:
|
|
|
|
return true;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
2017-01-06 08:30:53 +08:00
|
|
|
|
2014-08-08 05:40:58 +08:00
|
|
|
// 32b Opcodes that can be combined with a MUL
|
|
|
|
static bool isCombineInstrCandidate32(unsigned Opc) {
|
|
|
|
switch (Opc) {
|
|
|
|
case AArch64::ADDWrr:
|
|
|
|
case AArch64::ADDWri:
|
|
|
|
case AArch64::SUBWrr:
|
|
|
|
case AArch64::ADDSWrr:
|
|
|
|
case AArch64::ADDSWri:
|
|
|
|
case AArch64::SUBSWrr:
|
|
|
|
// Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
|
|
|
|
case AArch64::SUBWri:
|
|
|
|
case AArch64::SUBSWri:
|
|
|
|
return true;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
2017-01-06 08:30:53 +08:00
|
|
|
|
2014-08-08 05:40:58 +08:00
|
|
|
// 64b Opcodes that can be combined with a MUL
|
|
|
|
static bool isCombineInstrCandidate64(unsigned Opc) {
|
|
|
|
switch (Opc) {
|
|
|
|
case AArch64::ADDXrr:
|
|
|
|
case AArch64::ADDXri:
|
|
|
|
case AArch64::SUBXrr:
|
|
|
|
case AArch64::ADDSXrr:
|
|
|
|
case AArch64::ADDSXri:
|
|
|
|
case AArch64::SUBSXrr:
|
|
|
|
// Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
|
|
|
|
case AArch64::SUBXri:
|
|
|
|
case AArch64::SUBSXri:
|
|
|
|
return true;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
2017-01-06 08:30:53 +08:00
|
|
|
|
2016-04-24 13:14:01 +08:00
|
|
|
// FP Opcodes that can be combined with a FMUL
|
|
|
|
static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
|
|
|
|
switch (Inst.getOpcode()) {
|
2016-09-16 03:55:23 +08:00
|
|
|
default:
|
|
|
|
break;
|
2016-04-24 13:14:01 +08:00
|
|
|
case AArch64::FADDSrr:
|
|
|
|
case AArch64::FADDDrr:
|
|
|
|
case AArch64::FADDv2f32:
|
|
|
|
case AArch64::FADDv2f64:
|
|
|
|
case AArch64::FADDv4f32:
|
|
|
|
case AArch64::FSUBSrr:
|
|
|
|
case AArch64::FSUBDrr:
|
|
|
|
case AArch64::FSUBv2f32:
|
|
|
|
case AArch64::FSUBv2f64:
|
|
|
|
case AArch64::FSUBv4f32:
|
2017-01-06 07:41:33 +08:00
|
|
|
TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
|
|
|
|
return (Options.UnsafeFPMath ||
|
|
|
|
Options.AllowFPOpFusion == FPOpFusion::Fast);
|
2016-04-24 13:14:01 +08:00
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
2017-01-06 08:30:53 +08:00
|
|
|
|
2014-08-08 05:40:58 +08:00
|
|
|
// Opcodes that can be combined with a MUL
|
|
|
|
static bool isCombineInstrCandidate(unsigned Opc) {
|
|
|
|
return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
|
|
|
|
}
|
|
|
|
|
2016-04-24 13:14:01 +08:00
|
|
|
//
|
|
|
|
// Utility routine that checks if \param MO is defined by an
|
|
|
|
// \param CombineOpc instruction in the basic block \param MBB
|
|
|
|
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,
|
|
|
|
unsigned CombineOpc, unsigned ZeroReg = 0,
|
|
|
|
bool CheckZeroReg = false) {
|
2014-08-08 05:40:58 +08:00
|
|
|
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
|
|
|
|
MachineInstr *MI = nullptr;
|
2016-04-24 13:14:01 +08:00
|
|
|
|
2014-08-08 05:40:58 +08:00
|
|
|
if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
|
|
|
|
MI = MRI.getUniqueVRegDef(MO.getReg());
|
|
|
|
// And it needs to be in the trace (otherwise, it won't have a depth).
|
2016-04-24 13:14:01 +08:00
|
|
|
if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
|
2014-08-08 05:40:58 +08:00
|
|
|
return false;
|
|
|
|
// Must only used by the user we combine with.
|
2014-08-14 06:07:36 +08:00
|
|
|
if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
|
2014-08-08 05:40:58 +08:00
|
|
|
return false;
|
|
|
|
|
2016-04-24 13:14:01 +08:00
|
|
|
if (CheckZeroReg) {
|
|
|
|
assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
|
|
|
|
MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
|
|
|
|
MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
|
|
|
|
// The third input reg must be zero.
|
|
|
|
if (MI->getOperand(3).getReg() != ZeroReg)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2014-08-08 05:40:58 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2016-04-24 13:14:01 +08:00
|
|
|
//
|
|
|
|
// Is \param MO defined by an integer multiply and can be combined?
|
|
|
|
static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
|
|
|
|
unsigned MulOpc, unsigned ZeroReg) {
|
|
|
|
return canCombine(MBB, MO, MulOpc, ZeroReg, true);
|
|
|
|
}
|
|
|
|
|
|
|
|
//
|
|
|
|
// Is \param MO defined by a floating-point multiply and can be combined?
|
|
|
|
static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO,
|
|
|
|
unsigned MulOpc) {
|
|
|
|
return canCombine(MBB, MO, MulOpc);
|
|
|
|
}
|
|
|
|
|
2016-01-07 12:01:02 +08:00
|
|
|
// TODO: There are many more machine instruction opcodes to match:
|
|
|
|
// 1. Other data types (integer, vectors)
|
|
|
|
// 2. Other math / logic operations (xor, or)
|
|
|
|
// 3. Other forms of the same operation (intrinsics and other variants)
|
2017-07-28 11:21:58 +08:00
|
|
|
bool AArch64InstrInfo::isAssociativeAndCommutative(
|
|
|
|
const MachineInstr &Inst) const {
|
2016-01-07 12:01:02 +08:00
|
|
|
switch (Inst.getOpcode()) {
|
|
|
|
case AArch64::FADDDrr:
|
|
|
|
case AArch64::FADDSrr:
|
|
|
|
case AArch64::FADDv2f32:
|
|
|
|
case AArch64::FADDv2f64:
|
|
|
|
case AArch64::FADDv4f32:
|
|
|
|
case AArch64::FMULDrr:
|
|
|
|
case AArch64::FMULSrr:
|
|
|
|
case AArch64::FMULX32:
|
|
|
|
case AArch64::FMULX64:
|
|
|
|
case AArch64::FMULXv2f32:
|
|
|
|
case AArch64::FMULXv2f64:
|
|
|
|
case AArch64::FMULXv4f32:
|
|
|
|
case AArch64::FMULv2f32:
|
|
|
|
case AArch64::FMULv2f64:
|
|
|
|
case AArch64::FMULv4f32:
|
|
|
|
return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
2014-08-08 05:40:58 +08:00
|
|
|
|
2016-01-07 12:01:02 +08:00
|
|
|
/// Find instructions that can be turned into madd.
|
|
|
|
static bool getMaddPatterns(MachineInstr &Root,
|
|
|
|
SmallVectorImpl<MachineCombinerPattern> &Patterns) {
|
2014-08-08 05:40:58 +08:00
|
|
|
unsigned Opc = Root.getOpcode();
|
|
|
|
MachineBasicBlock &MBB = *Root.getParent();
|
|
|
|
bool Found = false;
|
|
|
|
|
|
|
|
if (!isCombineInstrCandidate(Opc))
|
2016-03-24 04:07:28 +08:00
|
|
|
return false;
|
2014-08-08 05:40:58 +08:00
|
|
|
if (isCombineInstrSettingFlag(Opc)) {
|
|
|
|
int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
|
|
|
|
// When NZCV is live bail out.
|
|
|
|
if (Cmp_NZCV == -1)
|
2016-03-24 04:07:28 +08:00
|
|
|
return false;
|
[AArch64] Prefer Bcc to CBZ/CBNZ/TBZ/TBNZ when NZCV flags can be set for "free".
This patch contains a pass that transforms CBZ/CBNZ/TBZ/TBNZ instructions into a
conditional branch (Bcc), when the NZCV flags can be set for "free". This is
preferred on targets that have more flexibility when scheduling Bcc
instructions as compared to CBZ/CBNZ/TBZ/TBNZ (assuming all other variables are
equal). This can reduce register pressure and is also the default behavior for
GCC.
A few examples:
add w8, w0, w1 -> cmn w0, w1 ; CMN is an alias of ADDS.
cbz w8, .LBB_2 -> b.eq .LBB0_2 ; single def/use of w8 removed.
add w8, w0, w1 -> adds w8, w0, w1 ; w8 has multiple uses.
cbz w8, .LBB1_2 -> b.eq .LBB1_2
sub w8, w0, w1 -> subs w8, w0, w1 ; w8 has multiple uses.
tbz w8, #31, .LBB6_2 -> b.ge .LBB6_2
In looking at all current sub-target machine descriptions, this transformation
appears to be either positive or neutral.
Differential Revision: https://reviews.llvm.org/D34220.
llvm-svn: 306144
2017-06-24 03:20:12 +08:00
|
|
|
unsigned NewOpc = convertToNonFlagSettingOpc(Root);
|
2014-08-08 05:40:58 +08:00
|
|
|
// When opcode can't change bail out.
|
|
|
|
// CHECKME: do we miss any cases for opcode conversion?
|
|
|
|
if (NewOpc == Opc)
|
2016-03-24 04:07:28 +08:00
|
|
|
return false;
|
2014-08-08 05:40:58 +08:00
|
|
|
Opc = NewOpc;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (Opc) {
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
case AArch64::ADDWrr:
|
|
|
|
assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
|
|
|
|
"ADDWrr does not have register operands");
|
|
|
|
if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
|
|
|
|
AArch64::WZR)) {
|
2015-11-06 03:34:57 +08:00
|
|
|
Patterns.push_back(MachineCombinerPattern::MULADDW_OP1);
|
2014-08-08 05:40:58 +08:00
|
|
|
Found = true;
|
|
|
|
}
|
|
|
|
if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
|
|
|
|
AArch64::WZR)) {
|
2015-11-06 03:34:57 +08:00
|
|
|
Patterns.push_back(MachineCombinerPattern::MULADDW_OP2);
|
2014-08-08 05:40:58 +08:00
|
|
|
Found = true;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case AArch64::ADDXrr:
|
|
|
|
if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
|
|
|
|
AArch64::XZR)) {
|
2015-11-06 03:34:57 +08:00
|
|
|
Patterns.push_back(MachineCombinerPattern::MULADDX_OP1);
|
2014-08-08 05:40:58 +08:00
|
|
|
Found = true;
|
|
|
|
}
|
|
|
|
if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
|
|
|
|
AArch64::XZR)) {
|
2015-11-06 03:34:57 +08:00
|
|
|
Patterns.push_back(MachineCombinerPattern::MULADDX_OP2);
|
2014-08-08 05:40:58 +08:00
|
|
|
Found = true;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case AArch64::SUBWrr:
|
|
|
|
if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
|
|
|
|
AArch64::WZR)) {
|
2015-11-06 03:34:57 +08:00
|
|
|
Patterns.push_back(MachineCombinerPattern::MULSUBW_OP1);
|
2014-08-08 05:40:58 +08:00
|
|
|
Found = true;
|
|
|
|
}
|
|
|
|
if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
|
|
|
|
AArch64::WZR)) {
|
2015-11-06 03:34:57 +08:00
|
|
|
Patterns.push_back(MachineCombinerPattern::MULSUBW_OP2);
|
2014-08-08 05:40:58 +08:00
|
|
|
Found = true;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case AArch64::SUBXrr:
|
|
|
|
if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
|
|
|
|
AArch64::XZR)) {
|
2015-11-06 03:34:57 +08:00
|
|
|
Patterns.push_back(MachineCombinerPattern::MULSUBX_OP1);
|
2014-08-08 05:40:58 +08:00
|
|
|
Found = true;
|
|
|
|
}
|
|
|
|
if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
|
|
|
|
AArch64::XZR)) {
|
2015-11-06 03:34:57 +08:00
|
|
|
Patterns.push_back(MachineCombinerPattern::MULSUBX_OP2);
|
2014-08-08 05:40:58 +08:00
|
|
|
Found = true;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case AArch64::ADDWri:
|
|
|
|
if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
|
|
|
|
AArch64::WZR)) {
|
2015-11-06 03:34:57 +08:00
|
|
|
Patterns.push_back(MachineCombinerPattern::MULADDWI_OP1);
|
2014-08-08 05:40:58 +08:00
|
|
|
Found = true;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case AArch64::ADDXri:
|
|
|
|
if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
|
|
|
|
AArch64::XZR)) {
|
2015-11-06 03:34:57 +08:00
|
|
|
Patterns.push_back(MachineCombinerPattern::MULADDXI_OP1);
|
2014-08-08 05:40:58 +08:00
|
|
|
Found = true;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case AArch64::SUBWri:
|
|
|
|
if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
|
|
|
|
AArch64::WZR)) {
|
2015-11-06 03:34:57 +08:00
|
|
|
Patterns.push_back(MachineCombinerPattern::MULSUBWI_OP1);
|
2014-08-08 05:40:58 +08:00
|
|
|
Found = true;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case AArch64::SUBXri:
|
|
|
|
if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
|
|
|
|
AArch64::XZR)) {
|
2015-11-06 03:34:57 +08:00
|
|
|
Patterns.push_back(MachineCombinerPattern::MULSUBXI_OP1);
|
2014-08-08 05:40:58 +08:00
|
|
|
Found = true;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return Found;
|
|
|
|
}
|
2016-04-24 13:14:01 +08:00
|
|
|
/// Floating-Point Support
|
2014-08-08 05:40:58 +08:00
|
|
|
|
2016-04-24 13:14:01 +08:00
|
|
|
/// Find instructions that can be turned into madd.
|
|
|
|
static bool getFMAPatterns(MachineInstr &Root,
|
|
|
|
SmallVectorImpl<MachineCombinerPattern> &Patterns) {
|
|
|
|
|
|
|
|
if (!isCombineInstrCandidateFP(Root))
|
2017-01-06 08:30:53 +08:00
|
|
|
return false;
|
2016-04-24 13:14:01 +08:00
|
|
|
|
|
|
|
MachineBasicBlock &MBB = *Root.getParent();
|
|
|
|
bool Found = false;
|
|
|
|
|
|
|
|
switch (Root.getOpcode()) {
|
|
|
|
default:
|
|
|
|
assert(false && "Unsupported FP instruction in combiner\n");
|
|
|
|
break;
|
|
|
|
case AArch64::FADDSrr:
|
|
|
|
assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
|
|
|
|
"FADDWrr does not have register operands");
|
|
|
|
if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
|
|
|
|
Patterns.push_back(MachineCombinerPattern::FMULADDS_OP1);
|
|
|
|
Found = true;
|
|
|
|
} else if (canCombineWithFMUL(MBB, Root.getOperand(1),
|
|
|
|
AArch64::FMULv1i32_indexed)) {
|
|
|
|
Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP1);
|
|
|
|
Found = true;
|
|
|
|
}
|
|
|
|
if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
|
|
|
|
Patterns.push_back(MachineCombinerPattern::FMULADDS_OP2);
|
|
|
|
Found = true;
|
|
|
|
} else if (canCombineWithFMUL(MBB, Root.getOperand(2),
|
|
|
|
AArch64::FMULv1i32_indexed)) {
|
|
|
|
Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP2);
|
|
|
|
Found = true;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case AArch64::FADDDrr:
|
|
|
|
if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
|
|
|
|
Patterns.push_back(MachineCombinerPattern::FMULADDD_OP1);
|
|
|
|
Found = true;
|
|
|
|
} else if (canCombineWithFMUL(MBB, Root.getOperand(1),
|
|
|
|
AArch64::FMULv1i64_indexed)) {
|
|
|
|
Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP1);
|
|
|
|
Found = true;
|
|
|
|
}
|
|
|
|
if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
|
|
|
|
Patterns.push_back(MachineCombinerPattern::FMULADDD_OP2);
|
|
|
|
Found = true;
|
|
|
|
} else if (canCombineWithFMUL(MBB, Root.getOperand(2),
|
|
|
|
AArch64::FMULv1i64_indexed)) {
|
|
|
|
Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP2);
|
|
|
|
Found = true;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case AArch64::FADDv2f32:
|
|
|
|
if (canCombineWithFMUL(MBB, Root.getOperand(1),
|
|
|
|
AArch64::FMULv2i32_indexed)) {
|
|
|
|
Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP1);
|
|
|
|
Found = true;
|
|
|
|
} else if (canCombineWithFMUL(MBB, Root.getOperand(1),
|
|
|
|
AArch64::FMULv2f32)) {
|
|
|
|
Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP1);
|
|
|
|
Found = true;
|
|
|
|
}
|
|
|
|
if (canCombineWithFMUL(MBB, Root.getOperand(2),
|
|
|
|
AArch64::FMULv2i32_indexed)) {
|
|
|
|
Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP2);
|
|
|
|
Found = true;
|
|
|
|
} else if (canCombineWithFMUL(MBB, Root.getOperand(2),
|
|
|
|
AArch64::FMULv2f32)) {
|
|
|
|
Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP2);
|
|
|
|
Found = true;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case AArch64::FADDv2f64:
|
|
|
|
if (canCombineWithFMUL(MBB, Root.getOperand(1),
|
|
|
|
AArch64::FMULv2i64_indexed)) {
|
|
|
|
Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP1);
|
|
|
|
Found = true;
|
|
|
|
} else if (canCombineWithFMUL(MBB, Root.getOperand(1),
|
|
|
|
AArch64::FMULv2f64)) {
|
|
|
|
Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP1);
|
|
|
|
Found = true;
|
|
|
|
}
|
|
|
|
if (canCombineWithFMUL(MBB, Root.getOperand(2),
|
|
|
|
AArch64::FMULv2i64_indexed)) {
|
|
|
|
Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP2);
|
|
|
|
Found = true;
|
|
|
|
} else if (canCombineWithFMUL(MBB, Root.getOperand(2),
|
|
|
|
AArch64::FMULv2f64)) {
|
|
|
|
Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP2);
|
|
|
|
Found = true;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case AArch64::FADDv4f32:
|
|
|
|
if (canCombineWithFMUL(MBB, Root.getOperand(1),
|
|
|
|
AArch64::FMULv4i32_indexed)) {
|
|
|
|
Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP1);
|
|
|
|
Found = true;
|
|
|
|
} else if (canCombineWithFMUL(MBB, Root.getOperand(1),
|
|
|
|
AArch64::FMULv4f32)) {
|
|
|
|
Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP1);
|
|
|
|
Found = true;
|
|
|
|
}
|
|
|
|
if (canCombineWithFMUL(MBB, Root.getOperand(2),
|
|
|
|
AArch64::FMULv4i32_indexed)) {
|
|
|
|
Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP2);
|
|
|
|
Found = true;
|
|
|
|
} else if (canCombineWithFMUL(MBB, Root.getOperand(2),
|
|
|
|
AArch64::FMULv4f32)) {
|
|
|
|
Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP2);
|
|
|
|
Found = true;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case AArch64::FSUBSrr:
|
|
|
|
if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
|
|
|
|
Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP1);
|
|
|
|
Found = true;
|
|
|
|
}
|
|
|
|
if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
|
|
|
|
Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP2);
|
|
|
|
Found = true;
|
|
|
|
} else if (canCombineWithFMUL(MBB, Root.getOperand(2),
|
|
|
|
AArch64::FMULv1i32_indexed)) {
|
|
|
|
Patterns.push_back(MachineCombinerPattern::FMLSv1i32_indexed_OP2);
|
|
|
|
Found = true;
|
|
|
|
}
|
2017-05-12 04:07:24 +08:00
|
|
|
if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULSrr)) {
|
|
|
|
Patterns.push_back(MachineCombinerPattern::FNMULSUBS_OP1);
|
|
|
|
Found = true;
|
|
|
|
}
|
2016-04-24 13:14:01 +08:00
|
|
|
break;
|
|
|
|
case AArch64::FSUBDrr:
|
|
|
|
if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
|
|
|
|
Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP1);
|
|
|
|
Found = true;
|
|
|
|
}
|
|
|
|
if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
|
|
|
|
Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP2);
|
|
|
|
Found = true;
|
|
|
|
} else if (canCombineWithFMUL(MBB, Root.getOperand(2),
|
|
|
|
AArch64::FMULv1i64_indexed)) {
|
|
|
|
Patterns.push_back(MachineCombinerPattern::FMLSv1i64_indexed_OP2);
|
|
|
|
Found = true;
|
|
|
|
}
|
2017-05-12 04:07:24 +08:00
|
|
|
if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULDrr)) {
|
|
|
|
Patterns.push_back(MachineCombinerPattern::FNMULSUBD_OP1);
|
|
|
|
Found = true;
|
|
|
|
}
|
2016-04-24 13:14:01 +08:00
|
|
|
break;
|
|
|
|
case AArch64::FSUBv2f32:
|
|
|
|
if (canCombineWithFMUL(MBB, Root.getOperand(2),
|
|
|
|
AArch64::FMULv2i32_indexed)) {
|
|
|
|
Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2);
|
|
|
|
Found = true;
|
|
|
|
} else if (canCombineWithFMUL(MBB, Root.getOperand(2),
|
|
|
|
AArch64::FMULv2f32)) {
|
|
|
|
Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP2);
|
|
|
|
Found = true;
|
|
|
|
}
|
2017-12-07 06:48:36 +08:00
|
|
|
if (canCombineWithFMUL(MBB, Root.getOperand(1),
|
2017-12-27 23:25:01 +08:00
|
|
|
AArch64::FMULv2i32_indexed)) {
|
|
|
|
Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP1);
|
2017-12-07 06:48:36 +08:00
|
|
|
Found = true;
|
|
|
|
} else if (canCombineWithFMUL(MBB, Root.getOperand(1),
|
2017-12-27 23:25:01 +08:00
|
|
|
AArch64::FMULv2f32)) {
|
|
|
|
Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP1);
|
2017-12-07 06:48:36 +08:00
|
|
|
Found = true;
|
|
|
|
}
|
2017-12-27 23:25:01 +08:00
|
|
|
break;
|
|
|
|
case AArch64::FSUBv2f64:
|
2016-04-24 13:14:01 +08:00
|
|
|
if (canCombineWithFMUL(MBB, Root.getOperand(2),
|
|
|
|
AArch64::FMULv2i64_indexed)) {
|
|
|
|
Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2);
|
|
|
|
Found = true;
|
|
|
|
} else if (canCombineWithFMUL(MBB, Root.getOperand(2),
|
|
|
|
AArch64::FMULv2f64)) {
|
|
|
|
Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP2);
|
|
|
|
Found = true;
|
|
|
|
}
|
2017-12-07 06:48:36 +08:00
|
|
|
if (canCombineWithFMUL(MBB, Root.getOperand(1),
|
2017-12-27 23:25:01 +08:00
|
|
|
AArch64::FMULv2i64_indexed)) {
|
|
|
|
Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP1);
|
2017-12-07 06:48:36 +08:00
|
|
|
Found = true;
|
|
|
|
} else if (canCombineWithFMUL(MBB, Root.getOperand(1),
|
2017-12-27 23:25:01 +08:00
|
|
|
AArch64::FMULv2f64)) {
|
|
|
|
Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP1);
|
2017-12-07 06:48:36 +08:00
|
|
|
Found = true;
|
|
|
|
}
|
2017-12-27 23:25:01 +08:00
|
|
|
break;
|
|
|
|
case AArch64::FSUBv4f32:
|
2016-04-24 13:14:01 +08:00
|
|
|
if (canCombineWithFMUL(MBB, Root.getOperand(2),
|
|
|
|
AArch64::FMULv4i32_indexed)) {
|
|
|
|
Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2);
|
|
|
|
Found = true;
|
|
|
|
} else if (canCombineWithFMUL(MBB, Root.getOperand(2),
|
|
|
|
AArch64::FMULv4f32)) {
|
|
|
|
Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP2);
|
|
|
|
Found = true;
|
|
|
|
}
|
2017-12-27 23:25:01 +08:00
|
|
|
if (canCombineWithFMUL(MBB, Root.getOperand(1),
|
|
|
|
AArch64::FMULv4i32_indexed)) {
|
|
|
|
Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP1);
|
|
|
|
Found = true;
|
|
|
|
} else if (canCombineWithFMUL(MBB, Root.getOperand(1),
|
|
|
|
AArch64::FMULv4f32)) {
|
|
|
|
Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP1);
|
|
|
|
Found = true;
|
|
|
|
}
|
2016-04-24 13:14:01 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
return Found;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Return true when a code sequence can improve throughput. It
|
|
|
|
/// should be called only for instructions in loops.
|
|
|
|
/// \param Pattern - combiner pattern
|
2017-07-28 11:21:58 +08:00
|
|
|
bool AArch64InstrInfo::isThroughputPattern(
|
|
|
|
MachineCombinerPattern Pattern) const {
|
2016-04-24 13:14:01 +08:00
|
|
|
switch (Pattern) {
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
case MachineCombinerPattern::FMULADDS_OP1:
|
|
|
|
case MachineCombinerPattern::FMULADDS_OP2:
|
|
|
|
case MachineCombinerPattern::FMULSUBS_OP1:
|
|
|
|
case MachineCombinerPattern::FMULSUBS_OP2:
|
|
|
|
case MachineCombinerPattern::FMULADDD_OP1:
|
|
|
|
case MachineCombinerPattern::FMULADDD_OP2:
|
|
|
|
case MachineCombinerPattern::FMULSUBD_OP1:
|
|
|
|
case MachineCombinerPattern::FMULSUBD_OP2:
|
2017-05-12 04:07:24 +08:00
|
|
|
case MachineCombinerPattern::FNMULSUBS_OP1:
|
|
|
|
case MachineCombinerPattern::FNMULSUBD_OP1:
|
2016-04-24 13:14:01 +08:00
|
|
|
case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
|
|
|
|
case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
|
|
|
|
case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
|
|
|
|
case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
|
|
|
|
case MachineCombinerPattern::FMLAv2f32_OP2:
|
|
|
|
case MachineCombinerPattern::FMLAv2f32_OP1:
|
|
|
|
case MachineCombinerPattern::FMLAv2f64_OP1:
|
|
|
|
case MachineCombinerPattern::FMLAv2f64_OP2:
|
|
|
|
case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
|
|
|
|
case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
|
|
|
|
case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
|
|
|
|
case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
|
|
|
|
case MachineCombinerPattern::FMLAv4f32_OP1:
|
|
|
|
case MachineCombinerPattern::FMLAv4f32_OP2:
|
|
|
|
case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
|
|
|
|
case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
|
|
|
|
case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
|
|
|
|
case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
|
|
|
|
case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
|
|
|
|
case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
|
|
|
|
case MachineCombinerPattern::FMLSv2f32_OP2:
|
|
|
|
case MachineCombinerPattern::FMLSv2f64_OP2:
|
|
|
|
case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
|
|
|
|
case MachineCombinerPattern::FMLSv4f32_OP2:
|
|
|
|
return true;
|
|
|
|
} // end switch (Pattern)
|
|
|
|
return false;
|
|
|
|
}
|
2016-01-07 12:01:02 +08:00
|
|
|
/// Return true when there is potentially a faster code sequence for an
|
|
|
|
/// instruction chain ending in \p Root. All potential patterns are listed in
|
|
|
|
/// the \p Pattern vector. Pattern should be sorted in priority order since the
|
|
|
|
/// pattern evaluator stops checking as soon as it finds a faster sequence.
|
|
|
|
|
|
|
|
bool AArch64InstrInfo::getMachineCombinerPatterns(
|
|
|
|
MachineInstr &Root,
|
|
|
|
SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
|
2016-04-24 13:14:01 +08:00
|
|
|
// Integer patterns
|
2016-01-07 12:01:02 +08:00
|
|
|
if (getMaddPatterns(Root, Patterns))
|
|
|
|
return true;
|
2016-04-24 13:14:01 +08:00
|
|
|
// Floating point patterns
|
|
|
|
if (getFMAPatterns(Root, Patterns))
|
|
|
|
return true;
|
2016-01-07 12:01:02 +08:00
|
|
|
|
|
|
|
return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
|
|
|
|
}
|
|
|
|
|
2016-04-24 13:14:01 +08:00
|
|
|
enum class FMAInstKind { Default, Indexed, Accumulator };
|
|
|
|
/// genFusedMultiply - Generate fused multiply instructions.
|
|
|
|
/// This function supports both integer and floating point instructions.
|
|
|
|
/// A typical example:
|
|
|
|
/// F|MUL I=A,B,0
|
|
|
|
/// F|ADD R,I,C
|
|
|
|
/// ==> F|MADD R,A,B,C
|
2017-07-11 06:11:50 +08:00
|
|
|
/// \param MF Containing MachineFunction
|
|
|
|
/// \param MRI Register information
|
|
|
|
/// \param TII Target information
|
2016-04-24 13:14:01 +08:00
|
|
|
/// \param Root is the F|ADD instruction
|
2014-08-08 10:04:18 +08:00
|
|
|
/// \param [out] InsInstrs is a vector of machine instructions and will
|
2014-08-08 05:40:58 +08:00
|
|
|
/// contain the generated madd instruction
|
|
|
|
/// \param IdxMulOpd is index of operand in Root that is the result of
|
2016-04-24 13:14:01 +08:00
|
|
|
/// the F|MUL. In the example above IdxMulOpd is 1.
|
|
|
|
/// \param MaddOpc the opcode fo the f|madd instruction
|
2017-07-11 06:11:50 +08:00
|
|
|
/// \param RC Register class of operands
|
|
|
|
/// \param kind of fma instruction (addressing mode) to be generated
|
2017-12-07 06:48:36 +08:00
|
|
|
/// \param ReplacedAddend is the result register from the instruction
|
|
|
|
/// replacing the non-combined operand, if any.
|
2016-04-24 13:14:01 +08:00
|
|
|
static MachineInstr *
|
|
|
|
genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
|
|
|
|
const TargetInstrInfo *TII, MachineInstr &Root,
|
|
|
|
SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
|
|
|
|
unsigned MaddOpc, const TargetRegisterClass *RC,
|
2017-12-07 06:48:36 +08:00
|
|
|
FMAInstKind kind = FMAInstKind::Default,
|
|
|
|
const unsigned *ReplacedAddend = nullptr) {
|
2014-08-08 05:40:58 +08:00
|
|
|
assert(IdxMulOpd == 1 || IdxMulOpd == 2);
|
|
|
|
|
|
|
|
unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
|
|
|
|
MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
|
2014-09-03 15:07:10 +08:00
|
|
|
unsigned ResultReg = Root.getOperand(0).getReg();
|
|
|
|
unsigned SrcReg0 = MUL->getOperand(1).getReg();
|
|
|
|
bool Src0IsKill = MUL->getOperand(1).isKill();
|
|
|
|
unsigned SrcReg1 = MUL->getOperand(2).getReg();
|
|
|
|
bool Src1IsKill = MUL->getOperand(2).isKill();
|
2017-12-07 06:48:36 +08:00
|
|
|
|
|
|
|
unsigned SrcReg2;
|
|
|
|
bool Src2IsKill;
|
|
|
|
if (ReplacedAddend) {
|
|
|
|
// If we just generated a new addend, we must be it's only use.
|
|
|
|
SrcReg2 = *ReplacedAddend;
|
|
|
|
Src2IsKill = true;
|
|
|
|
} else {
|
|
|
|
SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
|
|
|
|
Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
|
|
|
|
}
|
2014-09-03 15:07:10 +08:00
|
|
|
|
|
|
|
if (TargetRegisterInfo::isVirtualRegister(ResultReg))
|
|
|
|
MRI.constrainRegClass(ResultReg, RC);
|
|
|
|
if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
|
|
|
|
MRI.constrainRegClass(SrcReg0, RC);
|
|
|
|
if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
|
|
|
|
MRI.constrainRegClass(SrcReg1, RC);
|
|
|
|
if (TargetRegisterInfo::isVirtualRegister(SrcReg2))
|
|
|
|
MRI.constrainRegClass(SrcReg2, RC);
|
|
|
|
|
2016-04-24 13:14:01 +08:00
|
|
|
MachineInstrBuilder MIB;
|
|
|
|
if (kind == FMAInstKind::Default)
|
|
|
|
MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
|
|
|
|
.addReg(SrcReg0, getKillRegState(Src0IsKill))
|
|
|
|
.addReg(SrcReg1, getKillRegState(Src1IsKill))
|
|
|
|
.addReg(SrcReg2, getKillRegState(Src2IsKill));
|
|
|
|
else if (kind == FMAInstKind::Indexed)
|
|
|
|
MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
|
|
|
|
.addReg(SrcReg2, getKillRegState(Src2IsKill))
|
|
|
|
.addReg(SrcReg0, getKillRegState(Src0IsKill))
|
|
|
|
.addReg(SrcReg1, getKillRegState(Src1IsKill))
|
|
|
|
.addImm(MUL->getOperand(3).getImm());
|
|
|
|
else if (kind == FMAInstKind::Accumulator)
|
|
|
|
MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
|
|
|
|
.addReg(SrcReg2, getKillRegState(Src2IsKill))
|
|
|
|
.addReg(SrcReg0, getKillRegState(Src0IsKill))
|
|
|
|
.addReg(SrcReg1, getKillRegState(Src1IsKill));
|
|
|
|
else
|
|
|
|
assert(false && "Invalid FMA instruction kind \n");
|
|
|
|
// Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
|
2014-08-08 05:40:58 +08:00
|
|
|
InsInstrs.push_back(MIB);
|
|
|
|
return MUL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// genMaddR - Generate madd instruction and combine mul and add using
|
|
|
|
/// an extra virtual register
|
|
|
|
/// Example - an ADD intermediate needs to be stored in a register:
|
|
|
|
/// MUL I=A,B,0
|
|
|
|
/// ADD R,I,Imm
|
|
|
|
/// ==> ORR V, ZR, Imm
|
|
|
|
/// ==> MADD R,A,B,V
|
2017-07-11 06:11:50 +08:00
|
|
|
/// \param MF Containing MachineFunction
|
|
|
|
/// \param MRI Register information
|
|
|
|
/// \param TII Target information
|
2014-08-08 05:40:58 +08:00
|
|
|
/// \param Root is the ADD instruction
|
2014-08-08 10:04:18 +08:00
|
|
|
/// \param [out] InsInstrs is a vector of machine instructions and will
|
2014-08-08 05:40:58 +08:00
|
|
|
/// contain the generated madd instruction
|
|
|
|
/// \param IdxMulOpd is index of operand in Root that is the result of
|
|
|
|
/// the MUL. In the example above IdxMulOpd is 1.
|
|
|
|
/// \param MaddOpc the opcode fo the madd instruction
|
|
|
|
/// \param VR is a virtual register that holds the value of an ADD operand
|
|
|
|
/// (V in the example above).
|
2017-07-11 06:11:50 +08:00
|
|
|
/// \param RC Register class of operands
|
2014-08-08 05:40:58 +08:00
|
|
|
static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
|
|
|
|
const TargetInstrInfo *TII, MachineInstr &Root,
|
|
|
|
SmallVectorImpl<MachineInstr *> &InsInstrs,
|
2017-07-28 11:21:58 +08:00
|
|
|
unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
|
|
|
|
const TargetRegisterClass *RC) {
|
2014-08-08 05:40:58 +08:00
|
|
|
assert(IdxMulOpd == 1 || IdxMulOpd == 2);
|
|
|
|
|
|
|
|
MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
|
2014-09-03 15:07:10 +08:00
|
|
|
unsigned ResultReg = Root.getOperand(0).getReg();
|
|
|
|
unsigned SrcReg0 = MUL->getOperand(1).getReg();
|
|
|
|
bool Src0IsKill = MUL->getOperand(1).isKill();
|
|
|
|
unsigned SrcReg1 = MUL->getOperand(2).getReg();
|
|
|
|
bool Src1IsKill = MUL->getOperand(2).isKill();
|
|
|
|
|
|
|
|
if (TargetRegisterInfo::isVirtualRegister(ResultReg))
|
|
|
|
MRI.constrainRegClass(ResultReg, RC);
|
|
|
|
if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
|
|
|
|
MRI.constrainRegClass(SrcReg0, RC);
|
|
|
|
if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
|
|
|
|
MRI.constrainRegClass(SrcReg1, RC);
|
|
|
|
if (TargetRegisterInfo::isVirtualRegister(VR))
|
|
|
|
MRI.constrainRegClass(VR, RC);
|
|
|
|
|
2017-07-28 11:21:58 +08:00
|
|
|
MachineInstrBuilder MIB =
|
|
|
|
BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
|
|
|
|
.addReg(SrcReg0, getKillRegState(Src0IsKill))
|
|
|
|
.addReg(SrcReg1, getKillRegState(Src1IsKill))
|
|
|
|
.addReg(VR);
|
2014-08-08 05:40:58 +08:00
|
|
|
// Insert the MADD
|
|
|
|
InsInstrs.push_back(MIB);
|
|
|
|
return MUL;
|
|
|
|
}
|
2014-09-03 15:07:10 +08:00
|
|
|
|
2015-06-20 07:21:42 +08:00
|
|
|
/// When getMachineCombinerPatterns() finds potential patterns,
|
2014-08-08 05:40:58 +08:00
|
|
|
/// this function generates the instructions that could replace the
|
|
|
|
/// original code sequence
|
|
|
|
void AArch64InstrInfo::genAlternativeCodeSequence(
|
2015-11-06 03:34:57 +08:00
|
|
|
MachineInstr &Root, MachineCombinerPattern Pattern,
|
2014-08-08 05:40:58 +08:00
|
|
|
SmallVectorImpl<MachineInstr *> &InsInstrs,
|
|
|
|
SmallVectorImpl<MachineInstr *> &DelInstrs,
|
|
|
|
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
|
|
|
|
MachineBasicBlock &MBB = *Root.getParent();
|
|
|
|
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
|
|
|
|
MachineFunction &MF = *MBB.getParent();
|
2014-09-04 04:36:26 +08:00
|
|
|
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
|
2014-08-08 05:40:58 +08:00
|
|
|
|
|
|
|
MachineInstr *MUL;
|
2014-09-03 15:07:10 +08:00
|
|
|
const TargetRegisterClass *RC;
|
2014-08-08 05:40:58 +08:00
|
|
|
unsigned Opc;
|
|
|
|
switch (Pattern) {
|
|
|
|
default:
|
2016-01-07 12:01:02 +08:00
|
|
|
// Reassociate instructions.
|
|
|
|
TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
|
|
|
|
DelInstrs, InstrIdxForVirtReg);
|
|
|
|
return;
|
2015-11-06 03:34:57 +08:00
|
|
|
case MachineCombinerPattern::MULADDW_OP1:
|
|
|
|
case MachineCombinerPattern::MULADDX_OP1:
|
2014-08-08 05:40:58 +08:00
|
|
|
// MUL I=A,B,0
|
|
|
|
// ADD R,I,C
|
|
|
|
// ==> MADD R,A,B,C
|
|
|
|
// --- Create(MADD);
|
2015-11-06 03:34:57 +08:00
|
|
|
if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
|
2014-09-03 15:07:10 +08:00
|
|
|
Opc = AArch64::MADDWrrr;
|
|
|
|
RC = &AArch64::GPR32RegClass;
|
|
|
|
} else {
|
|
|
|
Opc = AArch64::MADDXrrr;
|
|
|
|
RC = &AArch64::GPR64RegClass;
|
|
|
|
}
|
2016-04-24 13:14:01 +08:00
|
|
|
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
|
2014-08-08 05:40:58 +08:00
|
|
|
break;
|
2015-11-06 03:34:57 +08:00
|
|
|
case MachineCombinerPattern::MULADDW_OP2:
|
|
|
|
case MachineCombinerPattern::MULADDX_OP2:
|
2014-08-08 05:40:58 +08:00
|
|
|
// MUL I=A,B,0
|
|
|
|
// ADD R,C,I
|
|
|
|
// ==> MADD R,A,B,C
|
|
|
|
// --- Create(MADD);
|
2015-11-06 03:34:57 +08:00
|
|
|
if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
|
2014-09-03 15:07:10 +08:00
|
|
|
Opc = AArch64::MADDWrrr;
|
|
|
|
RC = &AArch64::GPR32RegClass;
|
|
|
|
} else {
|
|
|
|
Opc = AArch64::MADDXrrr;
|
|
|
|
RC = &AArch64::GPR64RegClass;
|
|
|
|
}
|
2016-04-24 13:14:01 +08:00
|
|
|
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
|
2014-08-08 05:40:58 +08:00
|
|
|
break;
|
2015-11-06 03:34:57 +08:00
|
|
|
case MachineCombinerPattern::MULADDWI_OP1:
|
|
|
|
case MachineCombinerPattern::MULADDXI_OP1: {
|
2014-08-08 05:40:58 +08:00
|
|
|
// MUL I=A,B,0
|
|
|
|
// ADD R,I,Imm
|
|
|
|
// ==> ORR V, ZR, Imm
|
|
|
|
// ==> MADD R,A,B,V
|
|
|
|
// --- Create(MADD);
|
2014-09-03 15:07:10 +08:00
|
|
|
const TargetRegisterClass *OrrRC;
|
|
|
|
unsigned BitSize, OrrOpc, ZeroReg;
|
2015-11-06 03:34:57 +08:00
|
|
|
if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
|
2014-09-03 15:07:10 +08:00
|
|
|
OrrOpc = AArch64::ORRWri;
|
|
|
|
OrrRC = &AArch64::GPR32spRegClass;
|
|
|
|
BitSize = 32;
|
|
|
|
ZeroReg = AArch64::WZR;
|
|
|
|
Opc = AArch64::MADDWrrr;
|
|
|
|
RC = &AArch64::GPR32RegClass;
|
|
|
|
} else {
|
|
|
|
OrrOpc = AArch64::ORRXri;
|
|
|
|
OrrRC = &AArch64::GPR64spRegClass;
|
|
|
|
BitSize = 64;
|
|
|
|
ZeroReg = AArch64::XZR;
|
|
|
|
Opc = AArch64::MADDXrrr;
|
|
|
|
RC = &AArch64::GPR64RegClass;
|
|
|
|
}
|
|
|
|
unsigned NewVR = MRI.createVirtualRegister(OrrRC);
|
|
|
|
uint64_t Imm = Root.getOperand(2).getImm();
|
2014-08-08 05:40:58 +08:00
|
|
|
|
2014-09-03 15:07:10 +08:00
|
|
|
if (Root.getOperand(3).isImm()) {
|
|
|
|
unsigned Val = Root.getOperand(3).getImm();
|
|
|
|
Imm = Imm << Val;
|
|
|
|
}
|
2016-07-22 07:46:56 +08:00
|
|
|
uint64_t UImm = SignExtend64(Imm, BitSize);
|
2014-09-03 15:07:10 +08:00
|
|
|
uint64_t Encoding;
|
|
|
|
if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
|
|
|
|
MachineInstrBuilder MIB1 =
|
|
|
|
BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
|
|
|
|
.addReg(ZeroReg)
|
|
|
|
.addImm(Encoding);
|
|
|
|
InsInstrs.push_back(MIB1);
|
|
|
|
InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
|
|
|
|
MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
|
2014-08-08 05:40:58 +08:00
|
|
|
}
|
|
|
|
break;
|
2014-09-03 15:07:10 +08:00
|
|
|
}
|
2015-11-06 03:34:57 +08:00
|
|
|
case MachineCombinerPattern::MULSUBW_OP1:
|
|
|
|
case MachineCombinerPattern::MULSUBX_OP1: {
|
2014-08-08 05:40:58 +08:00
|
|
|
// MUL I=A,B,0
|
|
|
|
// SUB R,I, C
|
|
|
|
// ==> SUB V, 0, C
|
|
|
|
// ==> MADD R,A,B,V // = -C + A*B
|
|
|
|
// --- Create(MADD);
|
2014-09-03 15:07:10 +08:00
|
|
|
const TargetRegisterClass *SubRC;
|
2014-08-08 05:40:58 +08:00
|
|
|
unsigned SubOpc, ZeroReg;
|
2015-11-06 03:34:57 +08:00
|
|
|
if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
|
2014-08-08 05:40:58 +08:00
|
|
|
SubOpc = AArch64::SUBWrr;
|
2014-09-03 15:07:10 +08:00
|
|
|
SubRC = &AArch64::GPR32spRegClass;
|
2014-08-08 05:40:58 +08:00
|
|
|
ZeroReg = AArch64::WZR;
|
|
|
|
Opc = AArch64::MADDWrrr;
|
2014-09-03 15:07:10 +08:00
|
|
|
RC = &AArch64::GPR32RegClass;
|
2014-08-08 05:40:58 +08:00
|
|
|
} else {
|
|
|
|
SubOpc = AArch64::SUBXrr;
|
2014-09-03 15:07:10 +08:00
|
|
|
SubRC = &AArch64::GPR64spRegClass;
|
2014-08-08 05:40:58 +08:00
|
|
|
ZeroReg = AArch64::XZR;
|
|
|
|
Opc = AArch64::MADDXrrr;
|
2014-09-03 15:07:10 +08:00
|
|
|
RC = &AArch64::GPR64RegClass;
|
2014-08-08 05:40:58 +08:00
|
|
|
}
|
2014-09-03 15:07:10 +08:00
|
|
|
unsigned NewVR = MRI.createVirtualRegister(SubRC);
|
2014-08-08 05:40:58 +08:00
|
|
|
// SUB NewVR, 0, C
|
|
|
|
MachineInstrBuilder MIB1 =
|
2014-09-03 15:07:10 +08:00
|
|
|
BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
|
2014-08-08 05:40:58 +08:00
|
|
|
.addReg(ZeroReg)
|
2017-01-13 17:58:52 +08:00
|
|
|
.add(Root.getOperand(2));
|
2014-08-08 05:40:58 +08:00
|
|
|
InsInstrs.push_back(MIB1);
|
|
|
|
InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
|
2014-09-03 15:07:10 +08:00
|
|
|
MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
|
|
|
|
break;
|
|
|
|
}
|
2015-11-06 03:34:57 +08:00
|
|
|
case MachineCombinerPattern::MULSUBW_OP2:
|
|
|
|
case MachineCombinerPattern::MULSUBX_OP2:
|
2014-08-08 05:40:58 +08:00
|
|
|
// MUL I=A,B,0
|
|
|
|
// SUB R,C,I
|
|
|
|
// ==> MSUB R,A,B,C (computes C - A*B)
|
|
|
|
// --- Create(MSUB);
|
2015-11-06 03:34:57 +08:00
|
|
|
if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
|
2014-09-03 15:07:10 +08:00
|
|
|
Opc = AArch64::MSUBWrrr;
|
|
|
|
RC = &AArch64::GPR32RegClass;
|
|
|
|
} else {
|
|
|
|
Opc = AArch64::MSUBXrrr;
|
|
|
|
RC = &AArch64::GPR64RegClass;
|
|
|
|
}
|
2016-04-24 13:14:01 +08:00
|
|
|
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
|
2014-08-08 05:40:58 +08:00
|
|
|
break;
|
2015-11-06 03:34:57 +08:00
|
|
|
case MachineCombinerPattern::MULSUBWI_OP1:
|
|
|
|
case MachineCombinerPattern::MULSUBXI_OP1: {
|
2014-08-08 05:40:58 +08:00
|
|
|
// MUL I=A,B,0
|
|
|
|
// SUB R,I, Imm
|
|
|
|
// ==> ORR V, ZR, -Imm
|
|
|
|
// ==> MADD R,A,B,V // = -Imm + A*B
|
|
|
|
// --- Create(MADD);
|
2014-09-03 15:07:10 +08:00
|
|
|
const TargetRegisterClass *OrrRC;
|
2014-08-08 05:40:58 +08:00
|
|
|
unsigned BitSize, OrrOpc, ZeroReg;
|
2015-11-06 03:34:57 +08:00
|
|
|
if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
|
2014-08-30 14:16:26 +08:00
|
|
|
OrrOpc = AArch64::ORRWri;
|
2014-09-03 15:07:10 +08:00
|
|
|
OrrRC = &AArch64::GPR32spRegClass;
|
|
|
|
BitSize = 32;
|
2014-08-08 05:40:58 +08:00
|
|
|
ZeroReg = AArch64::WZR;
|
|
|
|
Opc = AArch64::MADDWrrr;
|
2014-09-03 15:07:10 +08:00
|
|
|
RC = &AArch64::GPR32RegClass;
|
2014-08-08 05:40:58 +08:00
|
|
|
} else {
|
|
|
|
OrrOpc = AArch64::ORRXri;
|
2014-11-05 06:20:07 +08:00
|
|
|
OrrRC = &AArch64::GPR64spRegClass;
|
2014-08-08 05:40:58 +08:00
|
|
|
BitSize = 64;
|
|
|
|
ZeroReg = AArch64::XZR;
|
|
|
|
Opc = AArch64::MADDXrrr;
|
2014-09-03 15:07:10 +08:00
|
|
|
RC = &AArch64::GPR64RegClass;
|
2014-08-08 05:40:58 +08:00
|
|
|
}
|
2014-09-03 15:07:10 +08:00
|
|
|
unsigned NewVR = MRI.createVirtualRegister(OrrRC);
|
2016-07-22 07:46:56 +08:00
|
|
|
uint64_t Imm = Root.getOperand(2).getImm();
|
2014-08-08 05:40:58 +08:00
|
|
|
if (Root.getOperand(3).isImm()) {
|
2014-09-03 15:07:10 +08:00
|
|
|
unsigned Val = Root.getOperand(3).getImm();
|
|
|
|
Imm = Imm << Val;
|
2014-08-08 05:40:58 +08:00
|
|
|
}
|
2016-07-22 07:46:56 +08:00
|
|
|
uint64_t UImm = SignExtend64(-Imm, BitSize);
|
2014-08-08 05:40:58 +08:00
|
|
|
uint64_t Encoding;
|
|
|
|
if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
|
|
|
|
MachineInstrBuilder MIB1 =
|
2014-09-03 15:07:10 +08:00
|
|
|
BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
|
2014-08-08 05:40:58 +08:00
|
|
|
.addReg(ZeroReg)
|
|
|
|
.addImm(Encoding);
|
|
|
|
InsInstrs.push_back(MIB1);
|
|
|
|
InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
|
2014-09-03 15:07:10 +08:00
|
|
|
MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
|
2014-08-08 05:40:58 +08:00
|
|
|
}
|
2014-09-03 15:07:10 +08:00
|
|
|
break;
|
2014-08-08 05:40:58 +08:00
|
|
|
}
|
2016-04-24 13:14:01 +08:00
|
|
|
// Floating Point Support
|
|
|
|
case MachineCombinerPattern::FMULADDS_OP1:
|
|
|
|
case MachineCombinerPattern::FMULADDD_OP1:
|
|
|
|
// MUL I=A,B,0
|
|
|
|
// ADD R,I,C
|
|
|
|
// ==> MADD R,A,B,C
|
|
|
|
// --- Create(MADD);
|
|
|
|
if (Pattern == MachineCombinerPattern::FMULADDS_OP1) {
|
|
|
|
Opc = AArch64::FMADDSrrr;
|
|
|
|
RC = &AArch64::FPR32RegClass;
|
|
|
|
} else {
|
|
|
|
Opc = AArch64::FMADDDrrr;
|
|
|
|
RC = &AArch64::FPR64RegClass;
|
|
|
|
}
|
|
|
|
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
|
|
|
|
break;
|
|
|
|
case MachineCombinerPattern::FMULADDS_OP2:
|
|
|
|
case MachineCombinerPattern::FMULADDD_OP2:
|
|
|
|
// FMUL I=A,B,0
|
|
|
|
// FADD R,C,I
|
|
|
|
// ==> FMADD R,A,B,C
|
|
|
|
// --- Create(FMADD);
|
|
|
|
if (Pattern == MachineCombinerPattern::FMULADDS_OP2) {
|
|
|
|
Opc = AArch64::FMADDSrrr;
|
|
|
|
RC = &AArch64::FPR32RegClass;
|
|
|
|
} else {
|
|
|
|
Opc = AArch64::FMADDDrrr;
|
|
|
|
RC = &AArch64::FPR64RegClass;
|
|
|
|
}
|
|
|
|
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
|
|
|
|
Opc = AArch64::FMLAv1i32_indexed;
|
|
|
|
RC = &AArch64::FPR32RegClass;
|
|
|
|
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
|
|
|
|
FMAInstKind::Indexed);
|
|
|
|
break;
|
|
|
|
case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
|
|
|
|
Opc = AArch64::FMLAv1i32_indexed;
|
|
|
|
RC = &AArch64::FPR32RegClass;
|
|
|
|
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
|
|
|
|
FMAInstKind::Indexed);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
|
|
|
|
Opc = AArch64::FMLAv1i64_indexed;
|
|
|
|
RC = &AArch64::FPR64RegClass;
|
|
|
|
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
|
|
|
|
FMAInstKind::Indexed);
|
|
|
|
break;
|
|
|
|
case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
|
|
|
|
Opc = AArch64::FMLAv1i64_indexed;
|
|
|
|
RC = &AArch64::FPR64RegClass;
|
|
|
|
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
|
|
|
|
FMAInstKind::Indexed);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
|
|
|
|
case MachineCombinerPattern::FMLAv2f32_OP1:
|
|
|
|
RC = &AArch64::FPR64RegClass;
|
|
|
|
if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) {
|
|
|
|
Opc = AArch64::FMLAv2i32_indexed;
|
|
|
|
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
|
|
|
|
FMAInstKind::Indexed);
|
|
|
|
} else {
|
|
|
|
Opc = AArch64::FMLAv2f32;
|
|
|
|
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
|
|
|
|
FMAInstKind::Accumulator);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
|
|
|
|
case MachineCombinerPattern::FMLAv2f32_OP2:
|
|
|
|
RC = &AArch64::FPR64RegClass;
|
|
|
|
if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) {
|
|
|
|
Opc = AArch64::FMLAv2i32_indexed;
|
|
|
|
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
|
|
|
|
FMAInstKind::Indexed);
|
|
|
|
} else {
|
|
|
|
Opc = AArch64::FMLAv2f32;
|
|
|
|
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
|
|
|
|
FMAInstKind::Accumulator);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
|
|
|
|
case MachineCombinerPattern::FMLAv2f64_OP1:
|
|
|
|
RC = &AArch64::FPR128RegClass;
|
|
|
|
if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) {
|
|
|
|
Opc = AArch64::FMLAv2i64_indexed;
|
|
|
|
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
|
|
|
|
FMAInstKind::Indexed);
|
|
|
|
} else {
|
|
|
|
Opc = AArch64::FMLAv2f64;
|
|
|
|
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
|
|
|
|
FMAInstKind::Accumulator);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
|
|
|
|
case MachineCombinerPattern::FMLAv2f64_OP2:
|
|
|
|
RC = &AArch64::FPR128RegClass;
|
|
|
|
if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) {
|
|
|
|
Opc = AArch64::FMLAv2i64_indexed;
|
|
|
|
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
|
|
|
|
FMAInstKind::Indexed);
|
|
|
|
} else {
|
|
|
|
Opc = AArch64::FMLAv2f64;
|
|
|
|
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
|
|
|
|
FMAInstKind::Accumulator);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
|
|
|
|
case MachineCombinerPattern::FMLAv4f32_OP1:
|
|
|
|
RC = &AArch64::FPR128RegClass;
|
|
|
|
if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) {
|
|
|
|
Opc = AArch64::FMLAv4i32_indexed;
|
|
|
|
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
|
|
|
|
FMAInstKind::Indexed);
|
|
|
|
} else {
|
|
|
|
Opc = AArch64::FMLAv4f32;
|
|
|
|
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
|
|
|
|
FMAInstKind::Accumulator);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
|
|
|
|
case MachineCombinerPattern::FMLAv4f32_OP2:
|
|
|
|
RC = &AArch64::FPR128RegClass;
|
|
|
|
if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) {
|
|
|
|
Opc = AArch64::FMLAv4i32_indexed;
|
|
|
|
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
|
|
|
|
FMAInstKind::Indexed);
|
|
|
|
} else {
|
|
|
|
Opc = AArch64::FMLAv4f32;
|
|
|
|
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
|
|
|
|
FMAInstKind::Accumulator);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case MachineCombinerPattern::FMULSUBS_OP1:
|
|
|
|
case MachineCombinerPattern::FMULSUBD_OP1: {
|
|
|
|
// FMUL I=A,B,0
|
|
|
|
// FSUB R,I,C
|
|
|
|
// ==> FNMSUB R,A,B,C // = -C + A*B
|
|
|
|
// --- Create(FNMSUB);
|
|
|
|
if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) {
|
|
|
|
Opc = AArch64::FNMSUBSrrr;
|
|
|
|
RC = &AArch64::FPR32RegClass;
|
|
|
|
} else {
|
|
|
|
Opc = AArch64::FNMSUBDrrr;
|
|
|
|
RC = &AArch64::FPR64RegClass;
|
|
|
|
}
|
|
|
|
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
|
|
|
|
break;
|
|
|
|
}
|
2017-05-12 04:07:24 +08:00
|
|
|
|
|
|
|
case MachineCombinerPattern::FNMULSUBS_OP1:
|
|
|
|
case MachineCombinerPattern::FNMULSUBD_OP1: {
|
|
|
|
// FNMUL I=A,B,0
|
|
|
|
// FSUB R,I,C
|
|
|
|
// ==> FNMADD R,A,B,C // = -A*B - C
|
|
|
|
// --- Create(FNMADD);
|
|
|
|
if (Pattern == MachineCombinerPattern::FNMULSUBS_OP1) {
|
|
|
|
Opc = AArch64::FNMADDSrrr;
|
|
|
|
RC = &AArch64::FPR32RegClass;
|
|
|
|
} else {
|
|
|
|
Opc = AArch64::FNMADDDrrr;
|
|
|
|
RC = &AArch64::FPR64RegClass;
|
|
|
|
}
|
|
|
|
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2016-04-24 13:14:01 +08:00
|
|
|
case MachineCombinerPattern::FMULSUBS_OP2:
|
|
|
|
case MachineCombinerPattern::FMULSUBD_OP2: {
|
|
|
|
// FMUL I=A,B,0
|
|
|
|
// FSUB R,C,I
|
|
|
|
// ==> FMSUB R,A,B,C (computes C - A*B)
|
|
|
|
// --- Create(FMSUB);
|
|
|
|
if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) {
|
|
|
|
Opc = AArch64::FMSUBSrrr;
|
|
|
|
RC = &AArch64::FPR32RegClass;
|
|
|
|
} else {
|
|
|
|
Opc = AArch64::FMSUBDrrr;
|
|
|
|
RC = &AArch64::FPR64RegClass;
|
|
|
|
}
|
|
|
|
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
|
|
|
|
break;
|
2017-05-16 20:43:23 +08:00
|
|
|
}
|
2016-04-24 13:14:01 +08:00
|
|
|
|
|
|
|
case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
|
|
|
|
Opc = AArch64::FMLSv1i32_indexed;
|
|
|
|
RC = &AArch64::FPR32RegClass;
|
|
|
|
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
|
|
|
|
FMAInstKind::Indexed);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
|
|
|
|
Opc = AArch64::FMLSv1i64_indexed;
|
|
|
|
RC = &AArch64::FPR64RegClass;
|
|
|
|
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
|
|
|
|
FMAInstKind::Indexed);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case MachineCombinerPattern::FMLSv2f32_OP2:
|
|
|
|
case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
|
|
|
|
RC = &AArch64::FPR64RegClass;
|
|
|
|
if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) {
|
|
|
|
Opc = AArch64::FMLSv2i32_indexed;
|
|
|
|
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
|
|
|
|
FMAInstKind::Indexed);
|
|
|
|
} else {
|
|
|
|
Opc = AArch64::FMLSv2f32;
|
|
|
|
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
|
|
|
|
FMAInstKind::Accumulator);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case MachineCombinerPattern::FMLSv2f64_OP2:
|
|
|
|
case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
|
|
|
|
RC = &AArch64::FPR128RegClass;
|
|
|
|
if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) {
|
|
|
|
Opc = AArch64::FMLSv2i64_indexed;
|
|
|
|
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
|
|
|
|
FMAInstKind::Indexed);
|
|
|
|
} else {
|
|
|
|
Opc = AArch64::FMLSv2f64;
|
|
|
|
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
|
|
|
|
FMAInstKind::Accumulator);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case MachineCombinerPattern::FMLSv4f32_OP2:
|
|
|
|
case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
|
|
|
|
RC = &AArch64::FPR128RegClass;
|
|
|
|
if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) {
|
|
|
|
Opc = AArch64::FMLSv4i32_indexed;
|
|
|
|
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
|
|
|
|
FMAInstKind::Indexed);
|
|
|
|
} else {
|
|
|
|
Opc = AArch64::FMLSv4f32;
|
|
|
|
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
|
|
|
|
FMAInstKind::Accumulator);
|
|
|
|
}
|
|
|
|
break;
|
2017-12-07 06:48:36 +08:00
|
|
|
case MachineCombinerPattern::FMLSv2f32_OP1:
|
|
|
|
case MachineCombinerPattern::FMLSv2i32_indexed_OP1: {
|
|
|
|
RC = &AArch64::FPR64RegClass;
|
|
|
|
unsigned NewVR = MRI.createVirtualRegister(RC);
|
|
|
|
MachineInstrBuilder MIB1 =
|
|
|
|
BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f32), NewVR)
|
|
|
|
.add(Root.getOperand(2));
|
|
|
|
InsInstrs.push_back(MIB1);
|
|
|
|
InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
|
|
|
|
if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP1) {
|
|
|
|
Opc = AArch64::FMLAv2i32_indexed;
|
|
|
|
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
|
|
|
|
FMAInstKind::Indexed, &NewVR);
|
|
|
|
} else {
|
|
|
|
Opc = AArch64::FMLAv2f32;
|
|
|
|
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
|
|
|
|
FMAInstKind::Accumulator, &NewVR);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case MachineCombinerPattern::FMLSv4f32_OP1:
|
|
|
|
case MachineCombinerPattern::FMLSv4i32_indexed_OP1: {
|
|
|
|
RC = &AArch64::FPR128RegClass;
|
|
|
|
unsigned NewVR = MRI.createVirtualRegister(RC);
|
|
|
|
MachineInstrBuilder MIB1 =
|
|
|
|
BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f32), NewVR)
|
|
|
|
.add(Root.getOperand(2));
|
|
|
|
InsInstrs.push_back(MIB1);
|
|
|
|
InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
|
|
|
|
if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP1) {
|
|
|
|
Opc = AArch64::FMLAv4i32_indexed;
|
|
|
|
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
|
|
|
|
FMAInstKind::Indexed, &NewVR);
|
|
|
|
} else {
|
|
|
|
Opc = AArch64::FMLAv4f32;
|
|
|
|
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
|
|
|
|
FMAInstKind::Accumulator, &NewVR);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case MachineCombinerPattern::FMLSv2f64_OP1:
|
|
|
|
case MachineCombinerPattern::FMLSv2i64_indexed_OP1: {
|
|
|
|
RC = &AArch64::FPR128RegClass;
|
|
|
|
unsigned NewVR = MRI.createVirtualRegister(RC);
|
|
|
|
MachineInstrBuilder MIB1 =
|
|
|
|
BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f64), NewVR)
|
|
|
|
.add(Root.getOperand(2));
|
|
|
|
InsInstrs.push_back(MIB1);
|
|
|
|
InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
|
|
|
|
if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP1) {
|
|
|
|
Opc = AArch64::FMLAv2i64_indexed;
|
|
|
|
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
|
|
|
|
FMAInstKind::Indexed, &NewVR);
|
|
|
|
} else {
|
|
|
|
Opc = AArch64::FMLAv2f64;
|
|
|
|
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
|
|
|
|
FMAInstKind::Accumulator, &NewVR);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2014-09-03 15:07:10 +08:00
|
|
|
} // end switch (Pattern)
|
2014-08-08 05:40:58 +08:00
|
|
|
// Record MUL and ADD/SUB for deletion
|
|
|
|
DelInstrs.push_back(MUL);
|
|
|
|
DelInstrs.push_back(&Root);
|
|
|
|
}
|
[AAarch64] Optimize CSINC-branch sequence
Peephole optimization that generates a single conditional branch
for csinc-branch sequences like in the examples below. This is
possible when the csinc sets or clears a register based on a condition
code and the branch checks that register. Also the condition
code may not be modified between the csinc and the original branch.
Examples:
1. Convert csinc w9, wzr, wzr, <CC>;tbnz w9, #0, 0x44
to b.<invCC>
2. Convert csinc w9, wzr, wzr, <CC>; tbz w9, #0, 0x44
to b.<CC>
rdar://problem/18506500
llvm-svn: 219742
2014-10-15 07:07:53 +08:00
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Replace csincr-branch sequence by simple conditional branch
|
[AAarch64] Optimize CSINC-branch sequence
Peephole optimization that generates a single conditional branch
for csinc-branch sequences like in the examples below. This is
possible when the csinc sets or clears a register based on a condition
code and the branch checks that register. Also the condition
code may not be modified between the csinc and the original branch.
Examples:
1. Convert csinc w9, wzr, wzr, <CC>;tbnz w9, #0, 0x44
to b.<invCC>
2. Convert csinc w9, wzr, wzr, <CC>; tbz w9, #0, 0x44
to b.<CC>
rdar://problem/18506500
llvm-svn: 219742
2014-10-15 07:07:53 +08:00
|
|
|
///
|
|
|
|
/// Examples:
|
2017-07-06 22:17:36 +08:00
|
|
|
/// 1. \code
|
[AAarch64] Optimize CSINC-branch sequence
Peephole optimization that generates a single conditional branch
for csinc-branch sequences like in the examples below. This is
possible when the csinc sets or clears a register based on a condition
code and the branch checks that register. Also the condition
code may not be modified between the csinc and the original branch.
Examples:
1. Convert csinc w9, wzr, wzr, <CC>;tbnz w9, #0, 0x44
to b.<invCC>
2. Convert csinc w9, wzr, wzr, <CC>; tbz w9, #0, 0x44
to b.<CC>
rdar://problem/18506500
llvm-svn: 219742
2014-10-15 07:07:53 +08:00
|
|
|
/// csinc w9, wzr, wzr, <condition code>
|
|
|
|
/// tbnz w9, #0, 0x44
|
2017-07-06 22:17:36 +08:00
|
|
|
/// \endcode
|
[AAarch64] Optimize CSINC-branch sequence
Peephole optimization that generates a single conditional branch
for csinc-branch sequences like in the examples below. This is
possible when the csinc sets or clears a register based on a condition
code and the branch checks that register. Also the condition
code may not be modified between the csinc and the original branch.
Examples:
1. Convert csinc w9, wzr, wzr, <CC>;tbnz w9, #0, 0x44
to b.<invCC>
2. Convert csinc w9, wzr, wzr, <CC>; tbz w9, #0, 0x44
to b.<CC>
rdar://problem/18506500
llvm-svn: 219742
2014-10-15 07:07:53 +08:00
|
|
|
/// to
|
2017-07-06 22:17:36 +08:00
|
|
|
/// \code
|
[AAarch64] Optimize CSINC-branch sequence
Peephole optimization that generates a single conditional branch
for csinc-branch sequences like in the examples below. This is
possible when the csinc sets or clears a register based on a condition
code and the branch checks that register. Also the condition
code may not be modified between the csinc and the original branch.
Examples:
1. Convert csinc w9, wzr, wzr, <CC>;tbnz w9, #0, 0x44
to b.<invCC>
2. Convert csinc w9, wzr, wzr, <CC>; tbz w9, #0, 0x44
to b.<CC>
rdar://problem/18506500
llvm-svn: 219742
2014-10-15 07:07:53 +08:00
|
|
|
/// b.<inverted condition code>
|
2017-07-06 22:17:36 +08:00
|
|
|
/// \endcode
|
[AAarch64] Optimize CSINC-branch sequence
Peephole optimization that generates a single conditional branch
for csinc-branch sequences like in the examples below. This is
possible when the csinc sets or clears a register based on a condition
code and the branch checks that register. Also the condition
code may not be modified between the csinc and the original branch.
Examples:
1. Convert csinc w9, wzr, wzr, <CC>;tbnz w9, #0, 0x44
to b.<invCC>
2. Convert csinc w9, wzr, wzr, <CC>; tbz w9, #0, 0x44
to b.<CC>
rdar://problem/18506500
llvm-svn: 219742
2014-10-15 07:07:53 +08:00
|
|
|
///
|
2017-07-06 22:17:36 +08:00
|
|
|
/// 2. \code
|
[AAarch64] Optimize CSINC-branch sequence
Peephole optimization that generates a single conditional branch
for csinc-branch sequences like in the examples below. This is
possible when the csinc sets or clears a register based on a condition
code and the branch checks that register. Also the condition
code may not be modified between the csinc and the original branch.
Examples:
1. Convert csinc w9, wzr, wzr, <CC>;tbnz w9, #0, 0x44
to b.<invCC>
2. Convert csinc w9, wzr, wzr, <CC>; tbz w9, #0, 0x44
to b.<CC>
rdar://problem/18506500
llvm-svn: 219742
2014-10-15 07:07:53 +08:00
|
|
|
/// csinc w9, wzr, wzr, <condition code>
|
|
|
|
/// tbz w9, #0, 0x44
|
2017-07-06 22:17:36 +08:00
|
|
|
/// \endcode
|
[AAarch64] Optimize CSINC-branch sequence
Peephole optimization that generates a single conditional branch
for csinc-branch sequences like in the examples below. This is
possible when the csinc sets or clears a register based on a condition
code and the branch checks that register. Also the condition
code may not be modified between the csinc and the original branch.
Examples:
1. Convert csinc w9, wzr, wzr, <CC>;tbnz w9, #0, 0x44
to b.<invCC>
2. Convert csinc w9, wzr, wzr, <CC>; tbz w9, #0, 0x44
to b.<CC>
rdar://problem/18506500
llvm-svn: 219742
2014-10-15 07:07:53 +08:00
|
|
|
/// to
|
2017-07-06 22:17:36 +08:00
|
|
|
/// \code
|
[AAarch64] Optimize CSINC-branch sequence
Peephole optimization that generates a single conditional branch
for csinc-branch sequences like in the examples below. This is
possible when the csinc sets or clears a register based on a condition
code and the branch checks that register. Also the condition
code may not be modified between the csinc and the original branch.
Examples:
1. Convert csinc w9, wzr, wzr, <CC>;tbnz w9, #0, 0x44
to b.<invCC>
2. Convert csinc w9, wzr, wzr, <CC>; tbz w9, #0, 0x44
to b.<CC>
rdar://problem/18506500
llvm-svn: 219742
2014-10-15 07:07:53 +08:00
|
|
|
/// b.<condition code>
|
2017-07-06 22:17:36 +08:00
|
|
|
/// \endcode
|
[AAarch64] Optimize CSINC-branch sequence
Peephole optimization that generates a single conditional branch
for csinc-branch sequences like in the examples below. This is
possible when the csinc sets or clears a register based on a condition
code and the branch checks that register. Also the condition
code may not be modified between the csinc and the original branch.
Examples:
1. Convert csinc w9, wzr, wzr, <CC>;tbnz w9, #0, 0x44
to b.<invCC>
2. Convert csinc w9, wzr, wzr, <CC>; tbz w9, #0, 0x44
to b.<CC>
rdar://problem/18506500
llvm-svn: 219742
2014-10-15 07:07:53 +08:00
|
|
|
///
|
2016-03-21 21:43:58 +08:00
|
|
|
/// Replace compare and branch sequence by TBZ/TBNZ instruction when the
|
|
|
|
/// compare's constant operand is power of 2.
|
[AArch64] Optimize compare and branch sequence when the compare's constant operand is power of 2
Summary:
Peephole optimization that generates a single TBZ/TBNZ instruction
for test and branch sequences like in the example below. This handles
the cases that miss folding of AND into TBZ/TBNZ during ISelLowering of BR_CC
Examples:
and w8, w8, #0x400
cbnz w8, L1
to
tbnz w8, #10, L1
Reviewers: MatzeB, jmolloy, mcrosier, t.p.northover
Subscribers: aemerson, rengolin, mcrosier, llvm-commits
Differential Revision: http://reviews.llvm.org/D17942
llvm-svn: 263136
2016-03-11 01:54:55 +08:00
|
|
|
///
|
|
|
|
/// Examples:
|
2017-07-06 22:17:36 +08:00
|
|
|
/// \code
|
[AArch64] Optimize compare and branch sequence when the compare's constant operand is power of 2
Summary:
Peephole optimization that generates a single TBZ/TBNZ instruction
for test and branch sequences like in the example below. This handles
the cases that miss folding of AND into TBZ/TBNZ during ISelLowering of BR_CC
Examples:
and w8, w8, #0x400
cbnz w8, L1
to
tbnz w8, #10, L1
Reviewers: MatzeB, jmolloy, mcrosier, t.p.northover
Subscribers: aemerson, rengolin, mcrosier, llvm-commits
Differential Revision: http://reviews.llvm.org/D17942
llvm-svn: 263136
2016-03-11 01:54:55 +08:00
|
|
|
/// and w8, w8, #0x400
|
|
|
|
/// cbnz w8, L1
|
2017-07-06 22:17:36 +08:00
|
|
|
/// \endcode
|
[AArch64] Optimize compare and branch sequence when the compare's constant operand is power of 2
Summary:
Peephole optimization that generates a single TBZ/TBNZ instruction
for test and branch sequences like in the example below. This handles
the cases that miss folding of AND into TBZ/TBNZ during ISelLowering of BR_CC
Examples:
and w8, w8, #0x400
cbnz w8, L1
to
tbnz w8, #10, L1
Reviewers: MatzeB, jmolloy, mcrosier, t.p.northover
Subscribers: aemerson, rengolin, mcrosier, llvm-commits
Differential Revision: http://reviews.llvm.org/D17942
llvm-svn: 263136
2016-03-11 01:54:55 +08:00
|
|
|
/// to
|
2017-07-06 22:17:36 +08:00
|
|
|
/// \code
|
[AArch64] Optimize compare and branch sequence when the compare's constant operand is power of 2
Summary:
Peephole optimization that generates a single TBZ/TBNZ instruction
for test and branch sequences like in the example below. This handles
the cases that miss folding of AND into TBZ/TBNZ during ISelLowering of BR_CC
Examples:
and w8, w8, #0x400
cbnz w8, L1
to
tbnz w8, #10, L1
Reviewers: MatzeB, jmolloy, mcrosier, t.p.northover
Subscribers: aemerson, rengolin, mcrosier, llvm-commits
Differential Revision: http://reviews.llvm.org/D17942
llvm-svn: 263136
2016-03-11 01:54:55 +08:00
|
|
|
/// tbnz w8, #10, L1
|
2017-07-06 22:17:36 +08:00
|
|
|
/// \endcode
|
[AArch64] Optimize compare and branch sequence when the compare's constant operand is power of 2
Summary:
Peephole optimization that generates a single TBZ/TBNZ instruction
for test and branch sequences like in the example below. This handles
the cases that miss folding of AND into TBZ/TBNZ during ISelLowering of BR_CC
Examples:
and w8, w8, #0x400
cbnz w8, L1
to
tbnz w8, #10, L1
Reviewers: MatzeB, jmolloy, mcrosier, t.p.northover
Subscribers: aemerson, rengolin, mcrosier, llvm-commits
Differential Revision: http://reviews.llvm.org/D17942
llvm-svn: 263136
2016-03-11 01:54:55 +08:00
|
|
|
///
|
[AAarch64] Optimize CSINC-branch sequence
Peephole optimization that generates a single conditional branch
for csinc-branch sequences like in the examples below. This is
possible when the csinc sets or clears a register based on a condition
code and the branch checks that register. Also the condition
code may not be modified between the csinc and the original branch.
Examples:
1. Convert csinc w9, wzr, wzr, <CC>;tbnz w9, #0, 0x44
to b.<invCC>
2. Convert csinc w9, wzr, wzr, <CC>; tbz w9, #0, 0x44
to b.<CC>
rdar://problem/18506500
llvm-svn: 219742
2014-10-15 07:07:53 +08:00
|
|
|
/// \param MI Conditional Branch
|
|
|
|
/// \return True when the simple conditional branch is generated
|
|
|
|
///
|
2016-06-30 08:01:54 +08:00
|
|
|
bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const {
|
[AAarch64] Optimize CSINC-branch sequence
Peephole optimization that generates a single conditional branch
for csinc-branch sequences like in the examples below. This is
possible when the csinc sets or clears a register based on a condition
code and the branch checks that register. Also the condition
code may not be modified between the csinc and the original branch.
Examples:
1. Convert csinc w9, wzr, wzr, <CC>;tbnz w9, #0, 0x44
to b.<invCC>
2. Convert csinc w9, wzr, wzr, <CC>; tbz w9, #0, 0x44
to b.<CC>
rdar://problem/18506500
llvm-svn: 219742
2014-10-15 07:07:53 +08:00
|
|
|
bool IsNegativeBranch = false;
|
|
|
|
bool IsTestAndBranch = false;
|
|
|
|
unsigned TargetBBInMI = 0;
|
2016-06-30 08:01:54 +08:00
|
|
|
switch (MI.getOpcode()) {
|
[AAarch64] Optimize CSINC-branch sequence
Peephole optimization that generates a single conditional branch
for csinc-branch sequences like in the examples below. This is
possible when the csinc sets or clears a register based on a condition
code and the branch checks that register. Also the condition
code may not be modified between the csinc and the original branch.
Examples:
1. Convert csinc w9, wzr, wzr, <CC>;tbnz w9, #0, 0x44
to b.<invCC>
2. Convert csinc w9, wzr, wzr, <CC>; tbz w9, #0, 0x44
to b.<CC>
rdar://problem/18506500
llvm-svn: 219742
2014-10-15 07:07:53 +08:00
|
|
|
default:
|
|
|
|
llvm_unreachable("Unknown branch instruction?");
|
|
|
|
case AArch64::Bcc:
|
|
|
|
return false;
|
|
|
|
case AArch64::CBZW:
|
|
|
|
case AArch64::CBZX:
|
|
|
|
TargetBBInMI = 1;
|
|
|
|
break;
|
|
|
|
case AArch64::CBNZW:
|
|
|
|
case AArch64::CBNZX:
|
|
|
|
TargetBBInMI = 1;
|
|
|
|
IsNegativeBranch = true;
|
|
|
|
break;
|
|
|
|
case AArch64::TBZW:
|
|
|
|
case AArch64::TBZX:
|
|
|
|
TargetBBInMI = 2;
|
|
|
|
IsTestAndBranch = true;
|
|
|
|
break;
|
|
|
|
case AArch64::TBNZW:
|
|
|
|
case AArch64::TBNZX:
|
|
|
|
TargetBBInMI = 2;
|
|
|
|
IsNegativeBranch = true;
|
|
|
|
IsTestAndBranch = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
// So we increment a zero register and test for bits other
|
|
|
|
// than bit 0? Conservatively bail out in case the verifier
|
|
|
|
// missed this case.
|
2016-06-30 08:01:54 +08:00
|
|
|
if (IsTestAndBranch && MI.getOperand(1).getImm())
|
[AAarch64] Optimize CSINC-branch sequence
Peephole optimization that generates a single conditional branch
for csinc-branch sequences like in the examples below. This is
possible when the csinc sets or clears a register based on a condition
code and the branch checks that register. Also the condition
code may not be modified between the csinc and the original branch.
Examples:
1. Convert csinc w9, wzr, wzr, <CC>;tbnz w9, #0, 0x44
to b.<invCC>
2. Convert csinc w9, wzr, wzr, <CC>; tbz w9, #0, 0x44
to b.<CC>
rdar://problem/18506500
llvm-svn: 219742
2014-10-15 07:07:53 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
// Find Definition.
|
2016-06-30 08:01:54 +08:00
|
|
|
assert(MI.getParent() && "Incomplete machine instruciton\n");
|
|
|
|
MachineBasicBlock *MBB = MI.getParent();
|
[AAarch64] Optimize CSINC-branch sequence
Peephole optimization that generates a single conditional branch
for csinc-branch sequences like in the examples below. This is
possible when the csinc sets or clears a register based on a condition
code and the branch checks that register. Also the condition
code may not be modified between the csinc and the original branch.
Examples:
1. Convert csinc w9, wzr, wzr, <CC>;tbnz w9, #0, 0x44
to b.<invCC>
2. Convert csinc w9, wzr, wzr, <CC>; tbz w9, #0, 0x44
to b.<CC>
rdar://problem/18506500
llvm-svn: 219742
2014-10-15 07:07:53 +08:00
|
|
|
MachineFunction *MF = MBB->getParent();
|
|
|
|
MachineRegisterInfo *MRI = &MF->getRegInfo();
|
2016-06-30 08:01:54 +08:00
|
|
|
unsigned VReg = MI.getOperand(0).getReg();
|
[AAarch64] Optimize CSINC-branch sequence
Peephole optimization that generates a single conditional branch
for csinc-branch sequences like in the examples below. This is
possible when the csinc sets or clears a register based on a condition
code and the branch checks that register. Also the condition
code may not be modified between the csinc and the original branch.
Examples:
1. Convert csinc w9, wzr, wzr, <CC>;tbnz w9, #0, 0x44
to b.<invCC>
2. Convert csinc w9, wzr, wzr, <CC>; tbz w9, #0, 0x44
to b.<CC>
rdar://problem/18506500
llvm-svn: 219742
2014-10-15 07:07:53 +08:00
|
|
|
if (!TargetRegisterInfo::isVirtualRegister(VReg))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
MachineInstr *DefMI = MRI->getVRegDef(VReg);
|
|
|
|
|
[AArch64] Optimize compare and branch sequence when the compare's constant operand is power of 2
Summary:
Peephole optimization that generates a single TBZ/TBNZ instruction
for test and branch sequences like in the example below. This handles
the cases that miss folding of AND into TBZ/TBNZ during ISelLowering of BR_CC
Examples:
and w8, w8, #0x400
cbnz w8, L1
to
tbnz w8, #10, L1
Reviewers: MatzeB, jmolloy, mcrosier, t.p.northover
Subscribers: aemerson, rengolin, mcrosier, llvm-commits
Differential Revision: http://reviews.llvm.org/D17942
llvm-svn: 263136
2016-03-11 01:54:55 +08:00
|
|
|
// Look through COPY instructions to find definition.
|
|
|
|
while (DefMI->isCopy()) {
|
|
|
|
unsigned CopyVReg = DefMI->getOperand(1).getReg();
|
|
|
|
if (!MRI->hasOneNonDBGUse(CopyVReg))
|
|
|
|
return false;
|
|
|
|
if (!MRI->hasOneDef(CopyVReg))
|
|
|
|
return false;
|
|
|
|
DefMI = MRI->getVRegDef(CopyVReg);
|
|
|
|
}
|
[AAarch64] Optimize CSINC-branch sequence
Peephole optimization that generates a single conditional branch
for csinc-branch sequences like in the examples below. This is
possible when the csinc sets or clears a register based on a condition
code and the branch checks that register. Also the condition
code may not be modified between the csinc and the original branch.
Examples:
1. Convert csinc w9, wzr, wzr, <CC>;tbnz w9, #0, 0x44
to b.<invCC>
2. Convert csinc w9, wzr, wzr, <CC>; tbz w9, #0, 0x44
to b.<CC>
rdar://problem/18506500
llvm-svn: 219742
2014-10-15 07:07:53 +08:00
|
|
|
|
[AArch64] Optimize compare and branch sequence when the compare's constant operand is power of 2
Summary:
Peephole optimization that generates a single TBZ/TBNZ instruction
for test and branch sequences like in the example below. This handles
the cases that miss folding of AND into TBZ/TBNZ during ISelLowering of BR_CC
Examples:
and w8, w8, #0x400
cbnz w8, L1
to
tbnz w8, #10, L1
Reviewers: MatzeB, jmolloy, mcrosier, t.p.northover
Subscribers: aemerson, rengolin, mcrosier, llvm-commits
Differential Revision: http://reviews.llvm.org/D17942
llvm-svn: 263136
2016-03-11 01:54:55 +08:00
|
|
|
switch (DefMI->getOpcode()) {
|
|
|
|
default:
|
[AAarch64] Optimize CSINC-branch sequence
Peephole optimization that generates a single conditional branch
for csinc-branch sequences like in the examples below. This is
possible when the csinc sets or clears a register based on a condition
code and the branch checks that register. Also the condition
code may not be modified between the csinc and the original branch.
Examples:
1. Convert csinc w9, wzr, wzr, <CC>;tbnz w9, #0, 0x44
to b.<invCC>
2. Convert csinc w9, wzr, wzr, <CC>; tbz w9, #0, 0x44
to b.<CC>
rdar://problem/18506500
llvm-svn: 219742
2014-10-15 07:07:53 +08:00
|
|
|
return false;
|
[AArch64] Optimize compare and branch sequence when the compare's constant operand is power of 2
Summary:
Peephole optimization that generates a single TBZ/TBNZ instruction
for test and branch sequences like in the example below. This handles
the cases that miss folding of AND into TBZ/TBNZ during ISelLowering of BR_CC
Examples:
and w8, w8, #0x400
cbnz w8, L1
to
tbnz w8, #10, L1
Reviewers: MatzeB, jmolloy, mcrosier, t.p.northover
Subscribers: aemerson, rengolin, mcrosier, llvm-commits
Differential Revision: http://reviews.llvm.org/D17942
llvm-svn: 263136
2016-03-11 01:54:55 +08:00
|
|
|
// Fold AND into a TBZ/TBNZ if constant operand is power of 2.
|
|
|
|
case AArch64::ANDWri:
|
|
|
|
case AArch64::ANDXri: {
|
|
|
|
if (IsTestAndBranch)
|
|
|
|
return false;
|
|
|
|
if (DefMI->getParent() != MBB)
|
|
|
|
return false;
|
|
|
|
if (!MRI->hasOneNonDBGUse(VReg))
|
|
|
|
return false;
|
[AAarch64] Optimize CSINC-branch sequence
Peephole optimization that generates a single conditional branch
for csinc-branch sequences like in the examples below. This is
possible when the csinc sets or clears a register based on a condition
code and the branch checks that register. Also the condition
code may not be modified between the csinc and the original branch.
Examples:
1. Convert csinc w9, wzr, wzr, <CC>;tbnz w9, #0, 0x44
to b.<invCC>
2. Convert csinc w9, wzr, wzr, <CC>; tbz w9, #0, 0x44
to b.<CC>
rdar://problem/18506500
llvm-svn: 219742
2014-10-15 07:07:53 +08:00
|
|
|
|
2016-04-26 04:54:08 +08:00
|
|
|
bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
|
[AArch64] Optimize compare and branch sequence when the compare's constant operand is power of 2
Summary:
Peephole optimization that generates a single TBZ/TBNZ instruction
for test and branch sequences like in the example below. This handles
the cases that miss folding of AND into TBZ/TBNZ during ISelLowering of BR_CC
Examples:
and w8, w8, #0x400
cbnz w8, L1
to
tbnz w8, #10, L1
Reviewers: MatzeB, jmolloy, mcrosier, t.p.northover
Subscribers: aemerson, rengolin, mcrosier, llvm-commits
Differential Revision: http://reviews.llvm.org/D17942
llvm-svn: 263136
2016-03-11 01:54:55 +08:00
|
|
|
uint64_t Mask = AArch64_AM::decodeLogicalImmediate(
|
2016-04-26 04:54:08 +08:00
|
|
|
DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
|
[AArch64] Optimize compare and branch sequence when the compare's constant operand is power of 2
Summary:
Peephole optimization that generates a single TBZ/TBNZ instruction
for test and branch sequences like in the example below. This handles
the cases that miss folding of AND into TBZ/TBNZ during ISelLowering of BR_CC
Examples:
and w8, w8, #0x400
cbnz w8, L1
to
tbnz w8, #10, L1
Reviewers: MatzeB, jmolloy, mcrosier, t.p.northover
Subscribers: aemerson, rengolin, mcrosier, llvm-commits
Differential Revision: http://reviews.llvm.org/D17942
llvm-svn: 263136
2016-03-11 01:54:55 +08:00
|
|
|
if (!isPowerOf2_64(Mask))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
MachineOperand &MO = DefMI->getOperand(1);
|
|
|
|
unsigned NewReg = MO.getReg();
|
|
|
|
if (!TargetRegisterInfo::isVirtualRegister(NewReg))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
assert(!MRI->def_empty(NewReg) && "Register must be defined.");
|
|
|
|
|
|
|
|
MachineBasicBlock &RefToMBB = *MBB;
|
2016-06-30 08:01:54 +08:00
|
|
|
MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
|
|
|
|
DebugLoc DL = MI.getDebugLoc();
|
[AArch64] Optimize compare and branch sequence when the compare's constant operand is power of 2
Summary:
Peephole optimization that generates a single TBZ/TBNZ instruction
for test and branch sequences like in the example below. This handles
the cases that miss folding of AND into TBZ/TBNZ during ISelLowering of BR_CC
Examples:
and w8, w8, #0x400
cbnz w8, L1
to
tbnz w8, #10, L1
Reviewers: MatzeB, jmolloy, mcrosier, t.p.northover
Subscribers: aemerson, rengolin, mcrosier, llvm-commits
Differential Revision: http://reviews.llvm.org/D17942
llvm-svn: 263136
2016-03-11 01:54:55 +08:00
|
|
|
unsigned Imm = Log2_64(Mask);
|
2016-04-24 03:30:52 +08:00
|
|
|
unsigned Opc = (Imm < 32)
|
|
|
|
? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
|
|
|
|
: (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
|
2016-04-26 04:54:08 +08:00
|
|
|
MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
|
|
|
|
.addReg(NewReg)
|
|
|
|
.addImm(Imm)
|
|
|
|
.addMBB(TBB);
|
2016-05-03 12:54:16 +08:00
|
|
|
// Register lives on to the CBZ now.
|
|
|
|
MO.setIsKill(false);
|
2016-04-26 04:54:08 +08:00
|
|
|
|
|
|
|
// For immediate smaller than 32, we need to use the 32-bit
|
|
|
|
// variant (W) in all cases. Indeed the 64-bit variant does not
|
|
|
|
// allow to encode them.
|
|
|
|
// Therefore, if the input register is 64-bit, we need to take the
|
|
|
|
// 32-bit sub-part.
|
|
|
|
if (!Is32Bit && Imm < 32)
|
|
|
|
NewMI->getOperand(0).setSubReg(AArch64::sub_32);
|
2016-06-30 08:01:54 +08:00
|
|
|
MI.eraseFromParent();
|
[AArch64] Optimize compare and branch sequence when the compare's constant operand is power of 2
Summary:
Peephole optimization that generates a single TBZ/TBNZ instruction
for test and branch sequences like in the example below. This handles
the cases that miss folding of AND into TBZ/TBNZ during ISelLowering of BR_CC
Examples:
and w8, w8, #0x400
cbnz w8, L1
to
tbnz w8, #10, L1
Reviewers: MatzeB, jmolloy, mcrosier, t.p.northover
Subscribers: aemerson, rengolin, mcrosier, llvm-commits
Differential Revision: http://reviews.llvm.org/D17942
llvm-svn: 263136
2016-03-11 01:54:55 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
// Look for CSINC
|
|
|
|
case AArch64::CSINCWr:
|
|
|
|
case AArch64::CSINCXr: {
|
|
|
|
if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
|
|
|
|
DefMI->getOperand(2).getReg() == AArch64::WZR) &&
|
|
|
|
!(DefMI->getOperand(1).getReg() == AArch64::XZR &&
|
|
|
|
DefMI->getOperand(2).getReg() == AArch64::XZR))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
|
|
|
|
// Convert only when the condition code is not modified between
|
|
|
|
// the CSINC and the branch. The CC may be used by other
|
|
|
|
// instructions in between.
|
2016-04-06 19:39:00 +08:00
|
|
|
if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write))
|
[AArch64] Optimize compare and branch sequence when the compare's constant operand is power of 2
Summary:
Peephole optimization that generates a single TBZ/TBNZ instruction
for test and branch sequences like in the example below. This handles
the cases that miss folding of AND into TBZ/TBNZ during ISelLowering of BR_CC
Examples:
and w8, w8, #0x400
cbnz w8, L1
to
tbnz w8, #10, L1
Reviewers: MatzeB, jmolloy, mcrosier, t.p.northover
Subscribers: aemerson, rengolin, mcrosier, llvm-commits
Differential Revision: http://reviews.llvm.org/D17942
llvm-svn: 263136
2016-03-11 01:54:55 +08:00
|
|
|
return false;
|
|
|
|
MachineBasicBlock &RefToMBB = *MBB;
|
2016-06-30 08:01:54 +08:00
|
|
|
MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
|
|
|
|
DebugLoc DL = MI.getDebugLoc();
|
[AArch64] Optimize compare and branch sequence when the compare's constant operand is power of 2
Summary:
Peephole optimization that generates a single TBZ/TBNZ instruction
for test and branch sequences like in the example below. This handles
the cases that miss folding of AND into TBZ/TBNZ during ISelLowering of BR_CC
Examples:
and w8, w8, #0x400
cbnz w8, L1
to
tbnz w8, #10, L1
Reviewers: MatzeB, jmolloy, mcrosier, t.p.northover
Subscribers: aemerson, rengolin, mcrosier, llvm-commits
Differential Revision: http://reviews.llvm.org/D17942
llvm-svn: 263136
2016-03-11 01:54:55 +08:00
|
|
|
if (IsNegativeBranch)
|
|
|
|
CC = AArch64CC::getInvertedCondCode(CC);
|
|
|
|
BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
|
2016-06-30 08:01:54 +08:00
|
|
|
MI.eraseFromParent();
|
[AArch64] Optimize compare and branch sequence when the compare's constant operand is power of 2
Summary:
Peephole optimization that generates a single TBZ/TBNZ instruction
for test and branch sequences like in the example below. This handles
the cases that miss folding of AND into TBZ/TBNZ during ISelLowering of BR_CC
Examples:
and w8, w8, #0x400
cbnz w8, L1
to
tbnz w8, #10, L1
Reviewers: MatzeB, jmolloy, mcrosier, t.p.northover
Subscribers: aemerson, rengolin, mcrosier, llvm-commits
Differential Revision: http://reviews.llvm.org/D17942
llvm-svn: 263136
2016-03-11 01:54:55 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
[AAarch64] Optimize CSINC-branch sequence
Peephole optimization that generates a single conditional branch
for csinc-branch sequences like in the examples below. This is
possible when the csinc sets or clears a register based on a condition
code and the branch checks that register. Also the condition
code may not be modified between the csinc and the original branch.
Examples:
1. Convert csinc w9, wzr, wzr, <CC>;tbnz w9, #0, 0x44
to b.<invCC>
2. Convert csinc w9, wzr, wzr, <CC>; tbz w9, #0, 0x44
to b.<CC>
rdar://problem/18506500
llvm-svn: 219742
2014-10-15 07:07:53 +08:00
|
|
|
}
|
2015-08-19 06:52:15 +08:00
|
|
|
|
|
|
|
std::pair<unsigned, unsigned>
|
|
|
|
AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
|
|
|
|
const unsigned Mask = AArch64II::MO_FRAGMENT;
|
|
|
|
return std::make_pair(TF & Mask, TF & ~Mask);
|
|
|
|
}
|
|
|
|
|
|
|
|
ArrayRef<std::pair<unsigned, const char *>>
|
|
|
|
AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
|
|
|
|
using namespace AArch64II;
|
2017-01-06 08:30:53 +08:00
|
|
|
|
2015-08-30 16:07:29 +08:00
|
|
|
static const std::pair<unsigned, const char *> TargetFlags[] = {
|
2017-07-28 11:21:58 +08:00
|
|
|
{MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
|
|
|
|
{MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
|
|
|
|
{MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
|
2015-08-19 06:52:15 +08:00
|
|
|
{MO_HI12, "aarch64-hi12"}};
|
|
|
|
return makeArrayRef(TargetFlags);
|
|
|
|
}
|
|
|
|
|
|
|
|
ArrayRef<std::pair<unsigned, const char *>>
|
|
|
|
AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
|
|
|
|
using namespace AArch64II;
|
2017-01-06 08:30:53 +08:00
|
|
|
|
2015-08-30 16:07:29 +08:00
|
|
|
static const std::pair<unsigned, const char *> TargetFlags[] = {
|
2018-09-05 04:56:21 +08:00
|
|
|
{MO_COFFSTUB, "aarch64-coffstub"},
|
2018-08-31 16:00:34 +08:00
|
|
|
{MO_GOT, "aarch64-got"}, {MO_NC, "aarch64-nc"},
|
2019-01-10 12:59:44 +08:00
|
|
|
{MO_S, "aarch64-s"}, {MO_TLS, "aarch64-tls"},
|
|
|
|
{MO_DLLIMPORT, "aarch64-dllimport"}};
|
2015-08-19 06:52:15 +08:00
|
|
|
return makeArrayRef(TargetFlags);
|
|
|
|
}
|
2017-03-18 06:26:55 +08:00
|
|
|
|
2017-07-13 10:28:54 +08:00
|
|
|
ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
|
|
|
|
AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
|
|
|
|
static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
|
2017-07-15 05:44:12 +08:00
|
|
|
{{MOSuppressPair, "aarch64-suppress-pair"},
|
|
|
|
{MOStridedAccess, "aarch64-strided-access"}};
|
2017-07-13 10:28:54 +08:00
|
|
|
return makeArrayRef(TargetFlags);
|
|
|
|
}
|
|
|
|
|
2018-07-31 01:45:28 +08:00
|
|
|
/// Constants defining how certain sequences should be outlined.
|
|
|
|
/// This encompasses how an outlined function should be called, and what kind of
|
|
|
|
/// frame should be emitted for that outlined function.
|
|
|
|
///
|
|
|
|
/// \p MachineOutlinerDefault implies that the function should be called with
|
|
|
|
/// a save and restore of LR to the stack.
|
|
|
|
///
|
|
|
|
/// That is,
|
|
|
|
///
|
|
|
|
/// I1 Save LR OUTLINED_FUNCTION:
|
|
|
|
/// I2 --> BL OUTLINED_FUNCTION I1
|
|
|
|
/// I3 Restore LR I2
|
|
|
|
/// I3
|
|
|
|
/// RET
|
|
|
|
///
|
|
|
|
/// * Call construction overhead: 3 (save + BL + restore)
|
|
|
|
/// * Frame construction overhead: 1 (ret)
|
|
|
|
/// * Requires stack fixups? Yes
|
|
|
|
///
|
|
|
|
/// \p MachineOutlinerTailCall implies that the function is being created from
|
|
|
|
/// a sequence of instructions ending in a return.
|
|
|
|
///
|
|
|
|
/// That is,
|
|
|
|
///
|
|
|
|
/// I1 OUTLINED_FUNCTION:
|
|
|
|
/// I2 --> B OUTLINED_FUNCTION I1
|
|
|
|
/// RET I2
|
|
|
|
/// RET
|
|
|
|
///
|
|
|
|
/// * Call construction overhead: 1 (B)
|
|
|
|
/// * Frame construction overhead: 0 (Return included in sequence)
|
|
|
|
/// * Requires stack fixups? No
|
|
|
|
///
|
|
|
|
/// \p MachineOutlinerNoLRSave implies that the function should be called using
|
|
|
|
/// a BL instruction, but doesn't require LR to be saved and restored. This
|
|
|
|
/// happens when LR is known to be dead.
|
|
|
|
///
|
|
|
|
/// That is,
|
|
|
|
///
|
|
|
|
/// I1 OUTLINED_FUNCTION:
|
|
|
|
/// I2 --> BL OUTLINED_FUNCTION I1
|
|
|
|
/// I3 I2
|
|
|
|
/// I3
|
|
|
|
/// RET
|
|
|
|
///
|
|
|
|
/// * Call construction overhead: 1 (BL)
|
|
|
|
/// * Frame construction overhead: 1 (RET)
|
|
|
|
/// * Requires stack fixups? No
|
|
|
|
///
|
|
|
|
/// \p MachineOutlinerThunk implies that the function is being created from
|
|
|
|
/// a sequence of instructions ending in a call. The outlined function is
|
|
|
|
/// called with a BL instruction, and the outlined function tail-calls the
|
|
|
|
/// original call destination.
|
|
|
|
///
|
|
|
|
/// That is,
|
|
|
|
///
|
|
|
|
/// I1 OUTLINED_FUNCTION:
|
|
|
|
/// I2 --> BL OUTLINED_FUNCTION I1
|
|
|
|
/// BL f I2
|
|
|
|
/// B f
|
|
|
|
/// * Call construction overhead: 1 (BL)
|
|
|
|
/// * Frame construction overhead: 0
|
|
|
|
/// * Requires stack fixups? No
|
|
|
|
///
|
|
|
|
/// \p MachineOutlinerRegSave implies that the function should be called with a
|
|
|
|
/// save and restore of LR to an available register. This allows us to avoid
|
|
|
|
/// stack fixups. Note that this outlining variant is compatible with the
|
|
|
|
/// NoLRSave case.
|
|
|
|
///
|
|
|
|
/// That is,
|
|
|
|
///
|
|
|
|
/// I1 Save LR OUTLINED_FUNCTION:
|
|
|
|
/// I2 --> BL OUTLINED_FUNCTION I1
|
|
|
|
/// I3 Restore LR I2
|
|
|
|
/// I3
|
|
|
|
/// RET
|
|
|
|
///
|
|
|
|
/// * Call construction overhead: 3 (save + BL + restore)
|
|
|
|
/// * Frame construction overhead: 1 (ret)
|
|
|
|
/// * Requires stack fixups? No
|
2017-09-28 04:47:39 +08:00
|
|
|
enum MachineOutlinerClass {
|
|
|
|
MachineOutlinerDefault, /// Emit a save, restore, call, and return.
|
|
|
|
MachineOutlinerTailCall, /// Only emit a branch.
|
2018-05-23 03:11:06 +08:00
|
|
|
MachineOutlinerNoLRSave, /// Emit a call and return.
|
|
|
|
MachineOutlinerThunk, /// Emit a call and tail-call.
|
2018-07-31 01:45:28 +08:00
|
|
|
MachineOutlinerRegSave /// Same as default, but save to a register.
|
2017-09-28 04:47:39 +08:00
|
|
|
};
|
2017-03-18 06:26:55 +08:00
|
|
|
|
[MachineOutliner] AArch64: Handle instrs that use SP and will never need fixups
This commit does two things. Firstly, it adds a collection of flags which can
be passed along to the target to encode information about the MBB that an
instruction lives in to the outliner.
Second, it adds some of those flags to the AArch64 outliner in order to add
more stack instructions to the list of legal instructions that are handled
by the outliner. The two flags added check if
- There are calls in the MachineBasicBlock containing the instruction
- The link register is available in the entire block
If the link register is available and there are no calls, then a stack
instruction can always be outlined without fixups, regardless of what it is,
since in this case, the outliner will never modify the stack to create a
call or outlined frame.
The motivation for doing this was checking which instructions are most often
missed by the outliner. Instructions like, say
%sp<def> = ADDXri %sp, 32, 0; flags: FrameDestroy
are very common, but cannot be outlined in the case that the outliner might
modify the stack. This commit allows us to outline instructions like this.
llvm-svn: 322048
2018-01-09 08:26:18 +08:00
|
|
|
enum MachineOutlinerMBBFlags {
|
|
|
|
LRUnavailableSomewhere = 0x2,
|
2018-11-15 06:23:38 +08:00
|
|
|
HasCalls = 0x4,
|
|
|
|
UnsafeRegsDead = 0x8
|
[MachineOutliner] AArch64: Handle instrs that use SP and will never need fixups
This commit does two things. Firstly, it adds a collection of flags which can
be passed along to the target to encode information about the MBB that an
instruction lives in to the outliner.
Second, it adds some of those flags to the AArch64 outliner in order to add
more stack instructions to the list of legal instructions that are handled
by the outliner. The two flags added check if
- There are calls in the MachineBasicBlock containing the instruction
- The link register is available in the entire block
If the link register is available and there are no calls, then a stack
instruction can always be outlined without fixups, regardless of what it is,
since in this case, the outliner will never modify the stack to create a
call or outlined frame.
The motivation for doing this was checking which instructions are most often
missed by the outliner. Instructions like, say
%sp<def> = ADDXri %sp, 32, 0; flags: FrameDestroy
are very common, but cannot be outlined in the case that the outliner might
modify the stack. This commit allows us to outline instructions like this.
llvm-svn: 322048
2018-01-09 08:26:18 +08:00
|
|
|
};
|
|
|
|
|
2018-07-31 01:45:28 +08:00
|
|
|
unsigned
|
|
|
|
AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
|
2018-11-15 06:23:38 +08:00
|
|
|
assert(C.LRUWasSet && "LRU wasn't set?");
|
2018-07-31 01:45:28 +08:00
|
|
|
MachineFunction *MF = C.getMF();
|
|
|
|
const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
|
|
|
|
MF->getSubtarget().getRegisterInfo());
|
|
|
|
|
|
|
|
// Check if there is an available register across the sequence that we can
|
|
|
|
// use.
|
|
|
|
for (unsigned Reg : AArch64::GPR64RegClass) {
|
|
|
|
if (!ARI->isReservedReg(*MF, Reg) &&
|
|
|
|
Reg != AArch64::LR && // LR is not reserved, but don't use it.
|
|
|
|
Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
|
|
|
|
Reg != AArch64::X17 && // Ditto for X17.
|
|
|
|
C.LRU.available(Reg) && C.UsedInSequence.available(Reg))
|
|
|
|
return Reg;
|
|
|
|
}
|
|
|
|
|
|
|
|
// No suitable register. Return 0.
|
|
|
|
return 0u;
|
|
|
|
}
|
|
|
|
|
2018-07-25 04:13:10 +08:00
|
|
|
outliner::OutlinedFunction
|
2018-07-04 23:37:08 +08:00
|
|
|
AArch64InstrInfo::getOutliningCandidateInfo(
|
2018-06-05 05:14:16 +08:00
|
|
|
std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
|
2018-11-14 07:01:34 +08:00
|
|
|
outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
|
|
|
|
unsigned SequenceSize =
|
|
|
|
std::accumulate(FirstCand.front(), std::next(FirstCand.back()), 0,
|
|
|
|
[this](unsigned Sum, const MachineInstr &MI) {
|
|
|
|
return Sum + getInstSizeInBytes(MI);
|
|
|
|
});
|
2017-09-28 04:47:39 +08:00
|
|
|
|
2018-11-14 07:41:31 +08:00
|
|
|
// Properties about candidate MBBs that hold for all of them.
|
|
|
|
unsigned FlagsSetInAll = 0xF;
|
|
|
|
|
|
|
|
// Compute liveness information for each candidate, and set FlagsSetInAll.
|
2018-06-28 01:43:27 +08:00
|
|
|
const TargetRegisterInfo &TRI = getRegisterInfo();
|
|
|
|
std::for_each(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
|
2018-11-15 06:23:38 +08:00
|
|
|
[&FlagsSetInAll](outliner::Candidate &C) {
|
2018-11-14 07:41:31 +08:00
|
|
|
FlagsSetInAll &= C.Flags;
|
|
|
|
});
|
2018-06-28 01:43:27 +08:00
|
|
|
|
|
|
|
// According to the AArch64 Procedure Call Standard, the following are
|
|
|
|
// undefined on entry/exit from a function call:
|
|
|
|
//
|
|
|
|
// * Registers x16, x17, (and thus w16, w17)
|
|
|
|
// * Condition codes (and thus the NZCV register)
|
|
|
|
//
|
|
|
|
// Because if this, we can't outline any sequence of instructions where
|
|
|
|
// one
|
|
|
|
// of these registers is live into/across it. Thus, we need to delete
|
|
|
|
// those
|
|
|
|
// candidates.
|
2018-11-15 06:23:38 +08:00
|
|
|
auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) {
|
|
|
|
// If the unsafe registers in this block are all dead, then we don't need
|
|
|
|
// to compute liveness here.
|
|
|
|
if (C.Flags & UnsafeRegsDead)
|
|
|
|
return false;
|
|
|
|
C.initLRU(TRI);
|
2018-06-28 01:43:27 +08:00
|
|
|
LiveRegUnits LRU = C.LRU;
|
|
|
|
return (!LRU.available(AArch64::W16) || !LRU.available(AArch64::W17) ||
|
|
|
|
!LRU.available(AArch64::NZCV));
|
|
|
|
};
|
|
|
|
|
2018-11-15 06:23:38 +08:00
|
|
|
// Are there any candidates where those registers are live?
|
|
|
|
if (!(FlagsSetInAll & UnsafeRegsDead)) {
|
|
|
|
// Erase every candidate that violates the restrictions above. (It could be
|
|
|
|
// true that we have viable candidates, so it's not worth bailing out in
|
|
|
|
// the case that, say, 1 out of 20 candidates violate the restructions.)
|
|
|
|
RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(),
|
|
|
|
RepeatedSequenceLocs.end(),
|
|
|
|
CantGuaranteeValueAcrossCall),
|
|
|
|
RepeatedSequenceLocs.end());
|
2018-06-28 01:43:27 +08:00
|
|
|
|
2018-11-15 06:23:38 +08:00
|
|
|
// If the sequence doesn't have enough candidates left, then we're done.
|
|
|
|
if (RepeatedSequenceLocs.size() < 2)
|
|
|
|
return outliner::OutlinedFunction();
|
|
|
|
}
|
2018-07-28 02:21:57 +08:00
|
|
|
|
2018-06-28 01:43:27 +08:00
|
|
|
// At this point, we have only "safe" candidates to outline. Figure out
|
|
|
|
// frame + call instruction information.
|
2017-09-28 04:47:39 +08:00
|
|
|
|
2018-06-05 05:14:16 +08:00
|
|
|
unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode();
|
2018-05-23 03:11:06 +08:00
|
|
|
|
2018-07-25 01:42:11 +08:00
|
|
|
// Helper lambda which sets call information for every candidate.
|
|
|
|
auto SetCandidateCallInfo =
|
|
|
|
[&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
|
|
|
|
for (outliner::Candidate &C : RepeatedSequenceLocs)
|
|
|
|
C.setCallInfo(CallID, NumBytesForCall);
|
|
|
|
};
|
|
|
|
|
|
|
|
unsigned FrameID = MachineOutlinerDefault;
|
|
|
|
unsigned NumBytesToCreateFrame = 4;
|
|
|
|
|
2018-10-19 14:12:02 +08:00
|
|
|
bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
|
|
|
|
return C.getMF()->getFunction().hasFnAttribute("branch-target-enforcement");
|
|
|
|
});
|
2018-10-08 22:12:08 +08:00
|
|
|
|
2018-12-04 08:31:55 +08:00
|
|
|
// Returns true if an instructions is safe to fix up, false otherwise.
|
|
|
|
auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
|
|
|
|
if (MI.isCall())
|
|
|
|
return true;
|
|
|
|
|
|
|
|
if (!MI.modifiesRegister(AArch64::SP, &TRI) &&
|
|
|
|
!MI.readsRegister(AArch64::SP, &TRI))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
// Any modification of SP will break our code to save/restore LR.
|
|
|
|
// FIXME: We could handle some instructions which add a constant
|
|
|
|
// offset to SP, with a bit more work.
|
|
|
|
if (MI.modifiesRegister(AArch64::SP, &TRI))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// At this point, we have a stack instruction that we might need to
|
|
|
|
// fix up. We'll handle it if it's a load or store.
|
|
|
|
if (MI.mayLoadOrStore()) {
|
|
|
|
MachineOperand *Base; // Filled with the base operand of MI.
|
|
|
|
int64_t Offset; // Filled with the offset of MI.
|
|
|
|
|
|
|
|
// Does it allow us to offset the base operand and is the base the
|
|
|
|
// register SP?
|
|
|
|
if (!getMemOperandWithOffset(MI, Base, Offset, &TRI) || !Base->isReg() ||
|
|
|
|
Base->getReg() != AArch64::SP)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Find the minimum/maximum offset for this instruction and check
|
|
|
|
// if fixing it up would be in range.
|
|
|
|
int64_t MinOffset,
|
|
|
|
MaxOffset; // Unscaled offsets for the instruction.
|
|
|
|
unsigned Scale; // The scale to multiply the offsets by.
|
|
|
|
unsigned DummyWidth;
|
|
|
|
getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
|
|
|
|
|
|
|
|
Offset += 16; // Update the offset to what it would be if we outlined.
|
|
|
|
if (Offset < MinOffset * Scale || Offset > MaxOffset * Scale)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// It's in range, so we can outline it.
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// FIXME: Add handling for instructions like "add x0, sp, #8".
|
|
|
|
|
|
|
|
// We can't fix it up, so don't outline it.
|
|
|
|
return false;
|
|
|
|
};
|
|
|
|
|
|
|
|
// True if it's possible to fix up each stack instruction in this sequence.
|
|
|
|
// Important for frames/call variants that modify the stack.
|
|
|
|
bool AllStackInstrsSafe = std::all_of(
|
|
|
|
FirstCand.front(), std::next(FirstCand.back()), IsSafeToFixup);
|
|
|
|
|
2017-09-28 04:47:39 +08:00
|
|
|
// If the last instruction in any candidate is a terminator, then we should
|
|
|
|
// tail call all of the candidates.
|
2018-06-05 05:14:16 +08:00
|
|
|
if (RepeatedSequenceLocs[0].back()->isTerminator()) {
|
2017-09-28 04:47:39 +08:00
|
|
|
FrameID = MachineOutlinerTailCall;
|
2018-05-18 09:52:16 +08:00
|
|
|
NumBytesToCreateFrame = 0;
|
2018-07-25 01:42:11 +08:00
|
|
|
SetCandidateCallInfo(MachineOutlinerTailCall, 4);
|
2017-09-28 04:47:39 +08:00
|
|
|
}
|
2017-07-28 11:21:58 +08:00
|
|
|
|
2018-10-08 22:12:08 +08:00
|
|
|
else if (LastInstrOpcode == AArch64::BL ||
|
|
|
|
(LastInstrOpcode == AArch64::BLR && !HasBTI)) {
|
2018-05-23 03:11:06 +08:00
|
|
|
// FIXME: Do we need to check if the code after this uses the value of LR?
|
|
|
|
FrameID = MachineOutlinerThunk;
|
|
|
|
NumBytesToCreateFrame = 0;
|
2018-07-25 01:42:11 +08:00
|
|
|
SetCandidateCallInfo(MachineOutlinerThunk, 4);
|
2018-05-23 03:11:06 +08:00
|
|
|
}
|
|
|
|
|
2018-07-25 01:42:11 +08:00
|
|
|
else {
|
2018-12-01 05:14:58 +08:00
|
|
|
// We need to decide how to emit calls + frames. We can always emit the same
|
|
|
|
// frame if we don't need to save to the stack. If we have to save to the
|
|
|
|
// stack, then we need a different frame.
|
2018-12-04 03:11:27 +08:00
|
|
|
unsigned NumBytesNoStackCalls = 0;
|
|
|
|
std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
|
2018-12-01 05:14:58 +08:00
|
|
|
|
|
|
|
for (outliner::Candidate &C : RepeatedSequenceLocs) {
|
|
|
|
C.initLRU(TRI);
|
|
|
|
|
|
|
|
// Is LR available? If so, we don't need a save.
|
|
|
|
if (C.LRU.available(AArch64::LR)) {
|
2018-12-04 03:11:27 +08:00
|
|
|
NumBytesNoStackCalls += 4;
|
2018-12-01 05:14:58 +08:00
|
|
|
C.setCallInfo(MachineOutlinerNoLRSave, 4);
|
2018-12-04 03:11:27 +08:00
|
|
|
CandidatesWithoutStackFixups.push_back(C);
|
2018-12-01 05:14:58 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Is an unused register available? If so, we won't modify the stack, so
|
|
|
|
// we can outline with the same frame type as those that don't save LR.
|
|
|
|
else if (findRegisterToSaveLRTo(C)) {
|
2018-12-04 03:11:27 +08:00
|
|
|
NumBytesNoStackCalls += 12;
|
2018-12-01 05:14:58 +08:00
|
|
|
C.setCallInfo(MachineOutlinerRegSave, 12);
|
2018-12-04 03:11:27 +08:00
|
|
|
CandidatesWithoutStackFixups.push_back(C);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Is SP used in the sequence at all? If not, we don't have to modify
|
|
|
|
// the stack, so we are guaranteed to get the same frame.
|
|
|
|
else if (C.UsedInSequence.available(AArch64::SP)) {
|
|
|
|
NumBytesNoStackCalls += 12;
|
|
|
|
C.setCallInfo(MachineOutlinerDefault, 12);
|
|
|
|
CandidatesWithoutStackFixups.push_back(C);
|
|
|
|
}
|
|
|
|
|
|
|
|
// If we outline this, we need to modify the stack. Pretend we don't
|
|
|
|
// outline this by saving all of its bytes.
|
|
|
|
else {
|
|
|
|
NumBytesNoStackCalls += SequenceSize;
|
2018-12-01 05:14:58 +08:00
|
|
|
}
|
2018-07-31 01:45:28 +08:00
|
|
|
}
|
|
|
|
|
2018-12-04 08:31:55 +08:00
|
|
|
// If there are no places where we have to save LR, then note that we
|
|
|
|
// don't have to update the stack. Otherwise, give every candidate the
|
|
|
|
// default call type, as long as it's safe to do so.
|
|
|
|
if (!AllStackInstrsSafe ||
|
|
|
|
NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) {
|
2018-12-04 03:11:27 +08:00
|
|
|
RepeatedSequenceLocs = CandidatesWithoutStackFixups;
|
2018-12-01 05:14:58 +08:00
|
|
|
FrameID = MachineOutlinerNoLRSave;
|
2018-12-04 03:11:27 +08:00
|
|
|
} else {
|
2018-07-31 01:45:28 +08:00
|
|
|
SetCandidateCallInfo(MachineOutlinerDefault, 12);
|
2018-12-04 03:11:27 +08:00
|
|
|
}
|
2018-12-04 08:31:47 +08:00
|
|
|
|
|
|
|
// If we dropped all of the candidates, bail out here.
|
|
|
|
if (RepeatedSequenceLocs.size() < 2) {
|
|
|
|
RepeatedSequenceLocs.clear();
|
|
|
|
return outliner::OutlinedFunction();
|
|
|
|
}
|
2017-09-28 04:47:39 +08:00
|
|
|
}
|
2017-07-28 11:21:58 +08:00
|
|
|
|
2018-11-14 07:41:31 +08:00
|
|
|
// Does every candidate's MBB contain a call? If so, then we might have a call
|
|
|
|
// in the range.
|
|
|
|
if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
|
2018-11-14 07:01:34 +08:00
|
|
|
// Check if the range contains a call. These require a save + restore of the
|
|
|
|
// link register.
|
2018-12-04 08:31:55 +08:00
|
|
|
bool ModStackToSaveLR = false;
|
2018-11-14 07:01:34 +08:00
|
|
|
if (std::any_of(FirstCand.front(), FirstCand.back(),
|
|
|
|
[](const MachineInstr &MI) { return MI.isCall(); }))
|
2018-12-04 08:31:55 +08:00
|
|
|
ModStackToSaveLR = true;
|
2018-11-14 07:01:34 +08:00
|
|
|
|
|
|
|
// Handle the last instruction separately. If this is a tail call, then the
|
|
|
|
// last instruction is a call. We don't want to save + restore in this case.
|
|
|
|
// However, it could be possible that the last instruction is a call without
|
|
|
|
// it being valid to tail call this sequence. We should consider this as
|
|
|
|
// well.
|
|
|
|
else if (FrameID != MachineOutlinerThunk &&
|
|
|
|
FrameID != MachineOutlinerTailCall && FirstCand.back()->isCall())
|
2018-12-04 08:31:55 +08:00
|
|
|
ModStackToSaveLR = true;
|
|
|
|
|
|
|
|
if (ModStackToSaveLR) {
|
|
|
|
// We can't fix up the stack. Bail out.
|
|
|
|
if (!AllStackInstrsSafe) {
|
|
|
|
RepeatedSequenceLocs.clear();
|
|
|
|
return outliner::OutlinedFunction();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Save + restore LR.
|
2018-11-14 07:01:34 +08:00
|
|
|
NumBytesToCreateFrame += 8;
|
2018-12-04 08:31:55 +08:00
|
|
|
}
|
2018-11-14 07:01:34 +08:00
|
|
|
}
|
2017-12-19 03:33:21 +08:00
|
|
|
|
2018-07-25 04:13:10 +08:00
|
|
|
return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
|
|
|
|
NumBytesToCreateFrame, FrameID);
|
2017-03-18 06:26:55 +08:00
|
|
|
}
|
|
|
|
|
2017-12-19 03:33:21 +08:00
|
|
|
bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
|
|
|
|
MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
|
2017-12-16 06:22:58 +08:00
|
|
|
const Function &F = MF.getFunction();
|
[MachineOutliner] Disable outlining from LinkOnceODRs by default
Say you have two identical linkonceodr functions, one in M1 and one in M2.
Say that the outliner outlines A,B,C from one function, and D,E,F from another
function (where letters are instructions). Now those functions are not
identical, and cannot be deduped. Locally to M1 and M2, these outlining
choices would be good-- to the whole program, however, this might not be true!
To mitigate this, this commit makes it so that the outliner sees linkonceodr
functions as unsafe to outline from. It also adds a flag,
-enable-linkonceodr-outlining, which allows the user to specify that they
want to outline from such functions when they know what they're doing.
Changing this handles most code size regressions in the test suite caused by
competing with linker dedupe. It also doesn't have a huge impact on the code
size improvements from the outliner. There are 6 tests that regress > 5% from
outlining WITH linkonceodrs to outlining WITHOUT linkonceodrs. Overall, most
tests either improve or are not impacted.
Not outlined vs outlined without linkonceodrs:
https://hastebin.com/raw/qeguxavuda
Not outlined vs outlined with linkonceodrs:
https://hastebin.com/raw/edepoqoqic
Outlined with linkonceodrs vs outlined without linkonceodrs:
https://hastebin.com/raw/awiqifiheb
Numbers generated using compare.py with -m size.__text. Tests run for AArch64
with -Oz -mllvm -enable-machine-outliner -mno-red-zone.
llvm-svn: 315136
2017-10-07 08:16:34 +08:00
|
|
|
|
|
|
|
// Can F be deduplicated by the linker? If it can, don't outline from it.
|
2017-12-16 06:22:58 +08:00
|
|
|
if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
|
[MachineOutliner] Disable outlining from LinkOnceODRs by default
Say you have two identical linkonceodr functions, one in M1 and one in M2.
Say that the outliner outlines A,B,C from one function, and D,E,F from another
function (where letters are instructions). Now those functions are not
identical, and cannot be deduped. Locally to M1 and M2, these outlining
choices would be good-- to the whole program, however, this might not be true!
To mitigate this, this commit makes it so that the outliner sees linkonceodr
functions as unsafe to outline from. It also adds a flag,
-enable-linkonceodr-outlining, which allows the user to specify that they
want to outline from such functions when they know what they're doing.
Changing this handles most code size regressions in the test suite caused by
competing with linker dedupe. It also doesn't have a huge impact on the code
size improvements from the outliner. There are 6 tests that regress > 5% from
outlining WITH linkonceodrs to outlining WITHOUT linkonceodrs. Overall, most
tests either improve or are not impacted.
Not outlined vs outlined without linkonceodrs:
https://hastebin.com/raw/qeguxavuda
Not outlined vs outlined with linkonceodrs:
https://hastebin.com/raw/edepoqoqic
Outlined with linkonceodrs vs outlined without linkonceodrs:
https://hastebin.com/raw/awiqifiheb
Numbers generated using compare.py with -m size.__text. Tests run for AArch64
with -Oz -mllvm -enable-machine-outliner -mno-red-zone.
llvm-svn: 315136
2017-10-07 08:16:34 +08:00
|
|
|
return false;
|
2017-12-16 06:22:58 +08:00
|
|
|
|
2018-04-27 08:21:34 +08:00
|
|
|
// Don't outline from functions with section markings; the program could
|
|
|
|
// expect that all the code is in the named section.
|
|
|
|
// FIXME: Allow outlining from multiple functions with the same section
|
|
|
|
// marking.
|
|
|
|
if (F.hasSection())
|
|
|
|
return false;
|
|
|
|
|
2018-04-04 05:56:10 +08:00
|
|
|
// Outlining from functions with redzones is unsafe since the outliner may
|
|
|
|
// modify the stack. Check if hasRedZone is true or unknown; if yes, don't
|
|
|
|
// outline from it.
|
|
|
|
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
|
|
|
|
if (!AFI || AFI->hasRedZone().getValueOr(true))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// It's safe to outline from MF.
|
[MachineOutliner] Disable outlining from LinkOnceODRs by default
Say you have two identical linkonceodr functions, one in M1 and one in M2.
Say that the outliner outlines A,B,C from one function, and D,E,F from another
function (where letters are instructions). Now those functions are not
identical, and cannot be deduped. Locally to M1 and M2, these outlining
choices would be good-- to the whole program, however, this might not be true!
To mitigate this, this commit makes it so that the outliner sees linkonceodr
functions as unsafe to outline from. It also adds a flag,
-enable-linkonceodr-outlining, which allows the user to specify that they
want to outline from such functions when they know what they're doing.
Changing this handles most code size regressions in the test suite caused by
competing with linker dedupe. It also doesn't have a huge impact on the code
size improvements from the outliner. There are 6 tests that regress > 5% from
outlining WITH linkonceodrs to outlining WITHOUT linkonceodrs. Overall, most
tests either improve or are not impacted.
Not outlined vs outlined without linkonceodrs:
https://hastebin.com/raw/qeguxavuda
Not outlined vs outlined with linkonceodrs:
https://hastebin.com/raw/edepoqoqic
Outlined with linkonceodrs vs outlined without linkonceodrs:
https://hastebin.com/raw/awiqifiheb
Numbers generated using compare.py with -m size.__text. Tests run for AArch64
with -Oz -mllvm -enable-machine-outliner -mno-red-zone.
llvm-svn: 315136
2017-10-07 08:16:34 +08:00
|
|
|
return true;
|
2017-03-18 06:26:55 +08:00
|
|
|
}
|
|
|
|
|
2018-11-13 07:51:32 +08:00
|
|
|
bool AArch64InstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
|
|
|
|
unsigned &Flags) const {
|
[MachineOutliner] AArch64: Handle instrs that use SP and will never need fixups
This commit does two things. Firstly, it adds a collection of flags which can
be passed along to the target to encode information about the MBB that an
instruction lives in to the outliner.
Second, it adds some of those flags to the AArch64 outliner in order to add
more stack instructions to the list of legal instructions that are handled
by the outliner. The two flags added check if
- There are calls in the MachineBasicBlock containing the instruction
- The link register is available in the entire block
If the link register is available and there are no calls, then a stack
instruction can always be outlined without fixups, regardless of what it is,
since in this case, the outliner will never modify the stack to create a
call or outlined frame.
The motivation for doing this was checking which instructions are most often
missed by the outliner. Instructions like, say
%sp<def> = ADDXri %sp, 32, 0; flags: FrameDestroy
are very common, but cannot be outlined in the case that the outliner might
modify the stack. This commit allows us to outline instructions like this.
llvm-svn: 322048
2018-01-09 08:26:18 +08:00
|
|
|
// Check if LR is available through all of the MBB. If it's not, then set
|
|
|
|
// a flag.
|
2018-07-07 16:02:19 +08:00
|
|
|
assert(MBB.getParent()->getRegInfo().tracksLiveness() &&
|
|
|
|
"Suitable Machine Function for outlining must track liveness");
|
2018-11-13 08:32:09 +08:00
|
|
|
LiveRegUnits LRU(getRegisterInfo());
|
2017-03-18 06:26:55 +08:00
|
|
|
|
2018-11-13 07:51:32 +08:00
|
|
|
std::for_each(MBB.rbegin(), MBB.rend(),
|
2018-11-13 08:32:09 +08:00
|
|
|
[&LRU](MachineInstr &MI) { LRU.accumulate(MI); });
|
|
|
|
|
|
|
|
// Check if each of the unsafe registers are available...
|
|
|
|
bool W16AvailableInBlock = LRU.available(AArch64::W16);
|
|
|
|
bool W17AvailableInBlock = LRU.available(AArch64::W17);
|
|
|
|
bool NZCVAvailableInBlock = LRU.available(AArch64::NZCV);
|
|
|
|
|
2018-11-15 06:23:38 +08:00
|
|
|
// If all of these are dead (and not live out), we know we don't have to check
|
|
|
|
// them later.
|
|
|
|
if (W16AvailableInBlock && W17AvailableInBlock && NZCVAvailableInBlock)
|
|
|
|
Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead;
|
|
|
|
|
2018-11-13 08:32:09 +08:00
|
|
|
// Now, add the live outs to the set.
|
|
|
|
LRU.addLiveOuts(MBB);
|
|
|
|
|
|
|
|
// If any of these registers is available in the MBB, but also a live out of
|
|
|
|
// the block, then we know outlining is unsafe.
|
|
|
|
if (W16AvailableInBlock && !LRU.available(AArch64::W16))
|
|
|
|
return false;
|
|
|
|
if (W17AvailableInBlock && !LRU.available(AArch64::W17))
|
|
|
|
return false;
|
|
|
|
if (NZCVAvailableInBlock && !LRU.available(AArch64::NZCV))
|
2018-11-13 07:51:32 +08:00
|
|
|
return false;
|
[MachineOutliner] AArch64: Handle instrs that use SP and will never need fixups
This commit does two things. Firstly, it adds a collection of flags which can
be passed along to the target to encode information about the MBB that an
instruction lives in to the outliner.
Second, it adds some of those flags to the AArch64 outliner in order to add
more stack instructions to the list of legal instructions that are handled
by the outliner. The two flags added check if
- There are calls in the MachineBasicBlock containing the instruction
- The link register is available in the entire block
If the link register is available and there are no calls, then a stack
instruction can always be outlined without fixups, regardless of what it is,
since in this case, the outliner will never modify the stack to create a
call or outlined frame.
The motivation for doing this was checking which instructions are most often
missed by the outliner. Instructions like, say
%sp<def> = ADDXri %sp, 32, 0; flags: FrameDestroy
are very common, but cannot be outlined in the case that the outliner might
modify the stack. This commit allows us to outline instructions like this.
llvm-svn: 322048
2018-01-09 08:26:18 +08:00
|
|
|
|
2018-11-13 07:51:32 +08:00
|
|
|
// Check if there's a call inside this MachineBasicBlock. If there is, then
|
|
|
|
// set a flag.
|
|
|
|
if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); }))
|
|
|
|
Flags |= MachineOutlinerMBBFlags::HasCalls;
|
|
|
|
|
2018-12-02 05:24:06 +08:00
|
|
|
MachineFunction *MF = MBB.getParent();
|
|
|
|
|
|
|
|
// In the event that we outline, we may have to save LR. If there is an
|
|
|
|
// available register in the MBB, then we'll always save LR there. Check if
|
|
|
|
// this is true.
|
|
|
|
bool CanSaveLR = false;
|
|
|
|
const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
|
|
|
|
MF->getSubtarget().getRegisterInfo());
|
|
|
|
|
|
|
|
// Check if there is an available register across the sequence that we can
|
|
|
|
// use.
|
|
|
|
for (unsigned Reg : AArch64::GPR64RegClass) {
|
|
|
|
if (!ARI->isReservedReg(*MF, Reg) && Reg != AArch64::LR &&
|
|
|
|
Reg != AArch64::X16 && Reg != AArch64::X17 && LRU.available(Reg)) {
|
|
|
|
CanSaveLR = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check if we have a register we can save LR to, and if LR was used
|
|
|
|
// somewhere. If both of those things are true, then we need to evaluate the
|
|
|
|
// safety of outlining stack instructions later.
|
|
|
|
if (!CanSaveLR && !LRU.available(AArch64::LR))
|
2018-11-13 07:51:32 +08:00
|
|
|
Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
|
[MachineOutliner] AArch64: Handle instrs that use SP and will never need fixups
This commit does two things. Firstly, it adds a collection of flags which can
be passed along to the target to encode information about the MBB that an
instruction lives in to the outliner.
Second, it adds some of those flags to the AArch64 outliner in order to add
more stack instructions to the list of legal instructions that are handled
by the outliner. The two flags added check if
- There are calls in the MachineBasicBlock containing the instruction
- The link register is available in the entire block
If the link register is available and there are no calls, then a stack
instruction can always be outlined without fixups, regardless of what it is,
since in this case, the outliner will never modify the stack to create a
call or outlined frame.
The motivation for doing this was checking which instructions are most often
missed by the outliner. Instructions like, say
%sp<def> = ADDXri %sp, 32, 0; flags: FrameDestroy
are very common, but cannot be outlined in the case that the outliner might
modify the stack. This commit allows us to outline instructions like this.
llvm-svn: 322048
2018-01-09 08:26:18 +08:00
|
|
|
|
2018-11-13 07:51:32 +08:00
|
|
|
return true;
|
[MachineOutliner] AArch64: Handle instrs that use SP and will never need fixups
This commit does two things. Firstly, it adds a collection of flags which can
be passed along to the target to encode information about the MBB that an
instruction lives in to the outliner.
Second, it adds some of those flags to the AArch64 outliner in order to add
more stack instructions to the list of legal instructions that are handled
by the outliner. The two flags added check if
- There are calls in the MachineBasicBlock containing the instruction
- The link register is available in the entire block
If the link register is available and there are no calls, then a stack
instruction can always be outlined without fixups, regardless of what it is,
since in this case, the outliner will never modify the stack to create a
call or outlined frame.
The motivation for doing this was checking which instructions are most often
missed by the outliner. Instructions like, say
%sp<def> = ADDXri %sp, 32, 0; flags: FrameDestroy
are very common, but cannot be outlined in the case that the outliner might
modify the stack. This commit allows us to outline instructions like this.
llvm-svn: 322048
2018-01-09 08:26:18 +08:00
|
|
|
}
|
|
|
|
|
2018-06-05 05:14:16 +08:00
|
|
|
outliner::InstrType
|
[MachineOutliner] AArch64: Handle instrs that use SP and will never need fixups
This commit does two things. Firstly, it adds a collection of flags which can
be passed along to the target to encode information about the MBB that an
instruction lives in to the outliner.
Second, it adds some of those flags to the AArch64 outliner in order to add
more stack instructions to the list of legal instructions that are handled
by the outliner. The two flags added check if
- There are calls in the MachineBasicBlock containing the instruction
- The link register is available in the entire block
If the link register is available and there are no calls, then a stack
instruction can always be outlined without fixups, regardless of what it is,
since in this case, the outliner will never modify the stack to create a
call or outlined frame.
The motivation for doing this was checking which instructions are most often
missed by the outliner. Instructions like, say
%sp<def> = ADDXri %sp, 32, 0; flags: FrameDestroy
are very common, but cannot be outlined in the case that the outliner might
modify the stack. This commit allows us to outline instructions like this.
llvm-svn: 322048
2018-01-09 08:26:18 +08:00
|
|
|
AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
|
|
|
|
unsigned Flags) const {
|
|
|
|
MachineInstr &MI = *MIT;
|
|
|
|
MachineBasicBlock *MBB = MI.getParent();
|
|
|
|
MachineFunction *MF = MBB->getParent();
|
2017-03-18 06:26:55 +08:00
|
|
|
AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
|
|
|
|
|
|
|
|
// Don't outline LOHs.
|
|
|
|
if (FuncInfo->getLOHRelated().count(&MI))
|
2018-06-05 05:14:16 +08:00
|
|
|
return outliner::InstrType::Illegal;
|
2017-03-18 06:26:55 +08:00
|
|
|
|
|
|
|
// Don't allow debug values to impact outlining type.
|
2018-05-09 10:42:00 +08:00
|
|
|
if (MI.isDebugInstr() || MI.isIndirectDebugValue())
|
2018-06-05 05:14:16 +08:00
|
|
|
return outliner::InstrType::Invisible;
|
2018-03-17 06:53:34 +08:00
|
|
|
|
|
|
|
// At this point, KILL instructions don't really tell us much so we can go
|
|
|
|
// ahead and skip over them.
|
|
|
|
if (MI.isKill())
|
2018-06-05 05:14:16 +08:00
|
|
|
return outliner::InstrType::Invisible;
|
2018-07-31 03:41:25 +08:00
|
|
|
|
2017-03-18 06:26:55 +08:00
|
|
|
// Is this a terminator for a basic block?
|
|
|
|
if (MI.isTerminator()) {
|
|
|
|
|
|
|
|
// Is this the end of a function?
|
|
|
|
if (MI.getParent()->succ_empty())
|
2018-06-05 05:14:16 +08:00
|
|
|
return outliner::InstrType::Legal;
|
2018-07-31 03:41:25 +08:00
|
|
|
|
2017-03-18 06:26:55 +08:00
|
|
|
// It's not, so don't outline it.
|
2018-06-05 05:14:16 +08:00
|
|
|
return outliner::InstrType::Illegal;
|
2017-03-18 06:26:55 +08:00
|
|
|
}
|
|
|
|
|
2018-03-28 06:23:48 +08:00
|
|
|
// Make sure none of the operands are un-outlinable.
|
|
|
|
for (const MachineOperand &MOP : MI.operands()) {
|
|
|
|
if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
|
|
|
|
MOP.isTargetIndex())
|
2018-06-05 05:14:16 +08:00
|
|
|
return outliner::InstrType::Illegal;
|
2018-04-25 06:38:15 +08:00
|
|
|
|
|
|
|
// If it uses LR or W30 explicitly, then don't touch it.
|
|
|
|
if (MOP.isReg() && !MOP.isImplicit() &&
|
|
|
|
(MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
|
2018-06-05 05:14:16 +08:00
|
|
|
return outliner::InstrType::Illegal;
|
2018-03-28 06:23:48 +08:00
|
|
|
}
|
|
|
|
|
2018-01-11 02:49:57 +08:00
|
|
|
// Special cases for instructions that can always be outlined, but will fail
|
|
|
|
// the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
|
|
|
|
// be outlined because they don't require a *specific* value to be in LR.
|
|
|
|
if (MI.getOpcode() == AArch64::ADRP)
|
2018-06-05 05:14:16 +08:00
|
|
|
return outliner::InstrType::Legal;
|
2018-01-11 02:49:57 +08:00
|
|
|
|
2018-03-29 01:52:31 +08:00
|
|
|
// If MI is a call we might be able to outline it. We don't want to outline
|
|
|
|
// any calls that rely on the position of items on the stack. When we outline
|
|
|
|
// something containing a call, we have to emit a save and restore of LR in
|
|
|
|
// the outlined function. Currently, this always happens by saving LR to the
|
|
|
|
// stack. Thus, if we outline, say, half the parameters for a function call
|
|
|
|
// plus the call, then we'll break the callee's expectations for the layout
|
|
|
|
// of the stack.
|
2018-05-23 03:11:06 +08:00
|
|
|
//
|
|
|
|
// FIXME: Allow calls to functions which construct a stack frame, as long
|
|
|
|
// as they don't access arguments on the stack.
|
|
|
|
// FIXME: Figure out some way to analyze functions defined in other modules.
|
|
|
|
// We should be able to compute the memory usage based on the IR calling
|
|
|
|
// convention, even if we can't see the definition.
|
2017-12-19 03:33:21 +08:00
|
|
|
if (MI.isCall()) {
|
|
|
|
// Get the function associated with the call. Look at each operand and find
|
|
|
|
// the one that represents the callee and get its name.
|
2018-05-23 03:11:06 +08:00
|
|
|
const Function *Callee = nullptr;
|
2017-12-19 03:33:21 +08:00
|
|
|
for (const MachineOperand &MOP : MI.operands()) {
|
2018-05-23 03:11:06 +08:00
|
|
|
if (MOP.isGlobal()) {
|
|
|
|
Callee = dyn_cast<Function>(MOP.getGlobal());
|
2017-12-19 03:33:21 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-05-23 03:11:06 +08:00
|
|
|
// Never outline calls to mcount. There isn't any rule that would require
|
|
|
|
// this, but the Linux kernel's "ftrace" feature depends on it.
|
|
|
|
if (Callee && Callee->getName() == "\01_mcount")
|
2018-06-05 05:14:16 +08:00
|
|
|
return outliner::InstrType::Illegal;
|
2018-03-29 01:52:31 +08:00
|
|
|
|
2018-05-23 03:11:06 +08:00
|
|
|
// If we don't know anything about the callee, assume it depends on the
|
|
|
|
// stack layout of the caller. In that case, it's only legal to outline
|
|
|
|
// as a tail-call. Whitelist the call instructions we know about so we
|
|
|
|
// don't get unexpected results with call pseudo-instructions.
|
2018-06-05 05:14:16 +08:00
|
|
|
auto UnknownCallOutlineType = outliner::InstrType::Illegal;
|
2018-05-23 03:11:06 +08:00
|
|
|
if (MI.getOpcode() == AArch64::BLR || MI.getOpcode() == AArch64::BL)
|
2018-06-05 05:14:16 +08:00
|
|
|
UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
|
2018-05-23 03:11:06 +08:00
|
|
|
|
|
|
|
if (!Callee)
|
|
|
|
return UnknownCallOutlineType;
|
|
|
|
|
2017-12-19 03:33:21 +08:00
|
|
|
// We have a function we have information about. Check it if it's something
|
|
|
|
// can safely outline.
|
|
|
|
MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee);
|
|
|
|
|
|
|
|
// We don't know what's going on with the callee at all. Don't touch it.
|
2018-03-29 01:52:31 +08:00
|
|
|
if (!CalleeMF)
|
2018-05-23 03:11:06 +08:00
|
|
|
return UnknownCallOutlineType;
|
2017-12-19 03:33:21 +08:00
|
|
|
|
2018-03-29 01:52:31 +08:00
|
|
|
// Check if we know anything about the callee saves on the function. If we
|
|
|
|
// don't, then don't touch it, since that implies that we haven't
|
|
|
|
// computed anything about its stack frame yet.
|
|
|
|
MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
|
|
|
|
if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
|
|
|
|
MFI.getNumObjects() > 0)
|
2018-05-23 03:11:06 +08:00
|
|
|
return UnknownCallOutlineType;
|
2018-03-29 01:52:31 +08:00
|
|
|
|
|
|
|
// At this point, we can say that CalleeMF ought to not pass anything on the
|
|
|
|
// stack. Therefore, we can outline it.
|
2018-06-05 05:14:16 +08:00
|
|
|
return outliner::InstrType::Legal;
|
2017-12-19 03:33:21 +08:00
|
|
|
}
|
|
|
|
|
2017-03-18 06:26:55 +08:00
|
|
|
// Don't outline positions.
|
|
|
|
if (MI.isPosition())
|
2018-06-05 05:14:16 +08:00
|
|
|
return outliner::InstrType::Illegal;
|
2017-03-18 06:26:55 +08:00
|
|
|
|
2017-08-09 05:51:26 +08:00
|
|
|
// Don't touch the link register or W30.
|
|
|
|
if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
|
|
|
|
MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
|
2018-06-05 05:14:16 +08:00
|
|
|
return outliner::InstrType::Illegal;
|
2017-08-09 05:51:26 +08:00
|
|
|
|
2019-02-06 01:21:57 +08:00
|
|
|
// Don't outline BTI instructions, because that will prevent the outlining
|
|
|
|
// site from being indirectly callable.
|
|
|
|
if (MI.getOpcode() == AArch64::HINT) {
|
|
|
|
int64_t Imm = MI.getOperand(0).getImm();
|
|
|
|
if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
|
|
|
|
return outliner::InstrType::Illegal;
|
|
|
|
}
|
|
|
|
|
2018-06-05 05:14:16 +08:00
|
|
|
return outliner::InstrType::Legal;
|
2017-03-18 06:26:55 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
|
|
|
|
for (MachineInstr &MI : MBB) {
|
2018-11-28 20:00:20 +08:00
|
|
|
MachineOperand *Base;
|
|
|
|
unsigned Width;
|
2017-03-18 06:26:55 +08:00
|
|
|
int64_t Offset;
|
|
|
|
|
|
|
|
// Is this a load or store with an immediate offset with SP as the base?
|
|
|
|
if (!MI.mayLoadOrStore() ||
|
2018-11-28 20:00:20 +08:00
|
|
|
!getMemOperandWithOffsetWidth(MI, Base, Offset, Width, &RI) ||
|
|
|
|
(Base->isReg() && Base->getReg() != AArch64::SP))
|
2017-03-18 06:26:55 +08:00
|
|
|
continue;
|
|
|
|
|
|
|
|
// It is, so we have to fix it up.
|
|
|
|
unsigned Scale;
|
|
|
|
int64_t Dummy1, Dummy2;
|
|
|
|
|
|
|
|
MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
|
|
|
|
assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
|
|
|
|
getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
|
|
|
|
assert(Scale != 0 && "Unexpected opcode!");
|
|
|
|
|
|
|
|
// We've pushed the return address to the stack, so add 16 to the offset.
|
|
|
|
// This is safe, since we already checked if it would overflow when we
|
|
|
|
// checked if this instruction was legal to outline.
|
2017-07-28 11:21:58 +08:00
|
|
|
int64_t NewImm = (Offset + 16) / Scale;
|
2017-03-18 06:26:55 +08:00
|
|
|
StackOffsetOperand.setImm(NewImm);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-06-20 05:14:48 +08:00
|
|
|
void AArch64InstrInfo::buildOutlinedFrame(
|
2017-09-28 04:47:39 +08:00
|
|
|
MachineBasicBlock &MBB, MachineFunction &MF,
|
2018-07-25 04:13:10 +08:00
|
|
|
const outliner::OutlinedFunction &OF) const {
|
2018-05-23 03:11:06 +08:00
|
|
|
// For thunk outlining, rewrite the last instruction from a call to a
|
|
|
|
// tail-call.
|
2018-07-25 04:13:10 +08:00
|
|
|
if (OF.FrameConstructionID == MachineOutlinerThunk) {
|
2018-05-23 03:11:06 +08:00
|
|
|
MachineInstr *Call = &*--MBB.instr_end();
|
|
|
|
unsigned TailOpcode;
|
|
|
|
if (Call->getOpcode() == AArch64::BL) {
|
|
|
|
TailOpcode = AArch64::TCRETURNdi;
|
|
|
|
} else {
|
|
|
|
assert(Call->getOpcode() == AArch64::BLR);
|
2018-10-08 17:18:48 +08:00
|
|
|
TailOpcode = AArch64::TCRETURNriALL;
|
2018-05-23 03:11:06 +08:00
|
|
|
}
|
|
|
|
MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
|
|
|
|
.add(Call->getOperand(0))
|
|
|
|
.addImm(0);
|
|
|
|
MBB.insert(MBB.end(), TC);
|
|
|
|
Call->eraseFromParent();
|
|
|
|
}
|
|
|
|
|
2017-12-19 05:44:52 +08:00
|
|
|
// Is there a call in the outlined range?
|
2018-05-17 03:49:01 +08:00
|
|
|
auto IsNonTailCall = [](MachineInstr &MI) {
|
|
|
|
return MI.isCall() && !MI.isReturn();
|
|
|
|
};
|
|
|
|
if (std::any_of(MBB.instr_begin(), MBB.instr_end(), IsNonTailCall)) {
|
2017-12-19 03:33:21 +08:00
|
|
|
// Fix up the instructions in the range, since we're going to modify the
|
|
|
|
// stack.
|
2018-07-25 04:13:10 +08:00
|
|
|
assert(OF.FrameConstructionID != MachineOutlinerDefault &&
|
2018-05-23 03:11:06 +08:00
|
|
|
"Can only fix up stack references once");
|
2017-12-19 03:33:21 +08:00
|
|
|
fixupPostOutline(MBB);
|
|
|
|
|
|
|
|
// LR has to be a live in so that we can save it.
|
|
|
|
MBB.addLiveIn(AArch64::LR);
|
|
|
|
|
|
|
|
MachineBasicBlock::iterator It = MBB.begin();
|
|
|
|
MachineBasicBlock::iterator Et = MBB.end();
|
|
|
|
|
2018-07-25 04:13:10 +08:00
|
|
|
if (OF.FrameConstructionID == MachineOutlinerTailCall ||
|
|
|
|
OF.FrameConstructionID == MachineOutlinerThunk)
|
2017-12-19 03:33:21 +08:00
|
|
|
Et = std::prev(MBB.end());
|
|
|
|
|
|
|
|
// Insert a save before the outlined region
|
|
|
|
MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
|
|
|
|
.addReg(AArch64::SP, RegState::Define)
|
|
|
|
.addReg(AArch64::LR)
|
|
|
|
.addReg(AArch64::SP)
|
|
|
|
.addImm(-16);
|
|
|
|
It = MBB.insert(It, STRXpre);
|
|
|
|
|
2018-03-20 06:48:40 +08:00
|
|
|
const TargetSubtargetInfo &STI = MF.getSubtarget();
|
|
|
|
const MCRegisterInfo *MRI = STI.getRegisterInfo();
|
|
|
|
unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true);
|
|
|
|
|
|
|
|
// Add a CFI saying the stack was moved 16 B down.
|
|
|
|
int64_t StackPosEntry =
|
|
|
|
MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 16));
|
|
|
|
BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
|
|
|
|
.addCFIIndex(StackPosEntry)
|
|
|
|
.setMIFlags(MachineInstr::FrameSetup);
|
|
|
|
|
|
|
|
// Add a CFI saying that the LR that we want to find is now 16 B higher than
|
|
|
|
// before.
|
|
|
|
int64_t LRPosEntry =
|
|
|
|
MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, 16));
|
|
|
|
BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
|
|
|
|
.addCFIIndex(LRPosEntry)
|
|
|
|
.setMIFlags(MachineInstr::FrameSetup);
|
|
|
|
|
2017-12-19 03:33:21 +08:00
|
|
|
// Insert a restore before the terminator for the function.
|
|
|
|
MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
|
|
|
|
.addReg(AArch64::SP, RegState::Define)
|
|
|
|
.addReg(AArch64::LR, RegState::Define)
|
|
|
|
.addReg(AArch64::SP)
|
|
|
|
.addImm(16);
|
|
|
|
Et = MBB.insert(Et, LDRXpost);
|
|
|
|
}
|
|
|
|
|
2017-03-18 06:26:55 +08:00
|
|
|
// If this is a tail call outlined function, then there's already a return.
|
2018-07-25 04:13:10 +08:00
|
|
|
if (OF.FrameConstructionID == MachineOutlinerTailCall ||
|
|
|
|
OF.FrameConstructionID == MachineOutlinerThunk)
|
2017-03-18 06:26:55 +08:00
|
|
|
return;
|
|
|
|
|
|
|
|
// It's not a tail call, so we have to insert the return ourselves.
|
|
|
|
MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
|
|
|
|
.addReg(AArch64::LR, RegState::Undef);
|
|
|
|
MBB.insert(MBB.end(), ret);
|
|
|
|
|
2017-09-28 04:47:39 +08:00
|
|
|
// Did we have to modify the stack by saving the link register?
|
2018-07-31 01:45:28 +08:00
|
|
|
if (OF.FrameConstructionID != MachineOutlinerDefault)
|
2017-09-28 04:47:39 +08:00
|
|
|
return;
|
|
|
|
|
|
|
|
// We modified the stack.
|
2017-03-18 06:26:55 +08:00
|
|
|
// Walk over the basic block and fix up all the stack accesses.
|
|
|
|
fixupPostOutline(MBB);
|
|
|
|
}
|
|
|
|
|
|
|
|
MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
|
|
|
|
Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
|
2018-07-25 01:42:11 +08:00
|
|
|
MachineFunction &MF, const outliner::Candidate &C) const {
|
2017-03-18 06:26:55 +08:00
|
|
|
|
|
|
|
// Are we tail calling?
|
2018-07-25 01:42:11 +08:00
|
|
|
if (C.CallConstructionID == MachineOutlinerTailCall) {
|
2017-03-18 06:26:55 +08:00
|
|
|
// If yes, then we can just branch to the label.
|
2018-04-21 02:03:21 +08:00
|
|
|
It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
|
|
|
|
.addGlobalAddress(M.getNamedValue(MF.getName()))
|
|
|
|
.addImm(0));
|
2017-03-18 06:26:55 +08:00
|
|
|
return It;
|
|
|
|
}
|
|
|
|
|
2017-09-28 04:47:39 +08:00
|
|
|
// Are we saving the link register?
|
2018-07-25 01:42:11 +08:00
|
|
|
if (C.CallConstructionID == MachineOutlinerNoLRSave ||
|
|
|
|
C.CallConstructionID == MachineOutlinerThunk) {
|
2017-09-28 04:47:39 +08:00
|
|
|
// No, so just insert the call.
|
|
|
|
It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
|
|
|
|
.addGlobalAddress(M.getNamedValue(MF.getName())));
|
|
|
|
return It;
|
|
|
|
}
|
|
|
|
|
2018-04-28 07:36:35 +08:00
|
|
|
// We want to return the spot where we inserted the call.
|
|
|
|
MachineBasicBlock::iterator CallPt;
|
|
|
|
|
2018-07-31 01:45:28 +08:00
|
|
|
// Instructions for saving and restoring LR around the call instruction we're
|
|
|
|
// going to insert.
|
|
|
|
MachineInstr *Save;
|
|
|
|
MachineInstr *Restore;
|
|
|
|
// Can we save to a register?
|
|
|
|
if (C.CallConstructionID == MachineOutlinerRegSave) {
|
|
|
|
// FIXME: This logic should be sunk into a target-specific interface so that
|
|
|
|
// we don't have to recompute the register.
|
|
|
|
unsigned Reg = findRegisterToSaveLRTo(C);
|
|
|
|
assert(Reg != 0 && "No callee-saved register available?");
|
|
|
|
|
|
|
|
// Save and restore LR from that register.
|
|
|
|
Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
|
|
|
|
.addReg(AArch64::XZR)
|
|
|
|
.addReg(AArch64::LR)
|
|
|
|
.addImm(0);
|
|
|
|
Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
|
|
|
|
.addReg(AArch64::XZR)
|
|
|
|
.addReg(Reg)
|
|
|
|
.addImm(0);
|
|
|
|
} else {
|
|
|
|
// We have the default case. Save and restore from SP.
|
|
|
|
Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
|
|
|
|
.addReg(AArch64::SP, RegState::Define)
|
|
|
|
.addReg(AArch64::LR)
|
|
|
|
.addReg(AArch64::SP)
|
|
|
|
.addImm(-16);
|
|
|
|
Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
|
|
|
|
.addReg(AArch64::SP, RegState::Define)
|
|
|
|
.addReg(AArch64::LR, RegState::Define)
|
|
|
|
.addReg(AArch64::SP)
|
|
|
|
.addImm(16);
|
|
|
|
}
|
|
|
|
|
|
|
|
It = MBB.insert(It, Save);
|
2017-03-18 06:26:55 +08:00
|
|
|
It++;
|
|
|
|
|
|
|
|
// Insert the call.
|
2017-07-29 10:55:46 +08:00
|
|
|
It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
|
|
|
|
.addGlobalAddress(M.getNamedValue(MF.getName())));
|
2018-04-28 07:36:35 +08:00
|
|
|
CallPt = It;
|
2017-03-18 06:26:55 +08:00
|
|
|
It++;
|
|
|
|
|
2018-07-31 01:45:28 +08:00
|
|
|
It = MBB.insert(It, Restore);
|
2018-04-28 07:36:35 +08:00
|
|
|
return CallPt;
|
2017-12-27 23:25:01 +08:00
|
|
|
}
|
2018-07-28 04:18:27 +08:00
|
|
|
|
|
|
|
bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
|
|
|
|
MachineFunction &MF) const {
|
|
|
|
return MF.getFunction().optForMinSize();
|
|
|
|
}
|
2018-11-27 05:47:28 +08:00
|
|
|
|
2018-11-28 04:58:27 +08:00
|
|
|
#define GET_INSTRINFO_HELPERS
|
2018-11-27 05:47:28 +08:00
|
|
|
#include "AArch64GenInstrInfo.inc"
|