2014-05-24 20:50:23 +08:00
|
|
|
//===- AArch64InstrInfo.h - AArch64 Instruction Information -----*- C++ -*-===//
|
2014-03-29 18:18:08 +08:00
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2014-03-29 18:18:08 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
2014-05-24 20:50:23 +08:00
|
|
|
// This file contains the AArch64 implementation of the TargetInstrInfo class.
|
2014-03-29 18:18:08 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2014-08-14 00:26:38 +08:00
|
|
|
#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64INSTRINFO_H
|
|
|
|
#define LLVM_LIB_TARGET_AARCH64_AARCH64INSTRINFO_H
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
#include "AArch64.h"
|
|
|
|
#include "AArch64RegisterInfo.h"
|
2019-03-27 21:16:19 +08:00
|
|
|
#include "llvm/ADT/Optional.h"
|
2014-08-08 05:40:58 +08:00
|
|
|
#include "llvm/CodeGen/MachineCombinerPattern.h"
|
2017-11-08 09:01:31 +08:00
|
|
|
#include "llvm/CodeGen/TargetInstrInfo.h"
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
#define GET_INSTRINFO_HEADER
|
2014-05-24 20:50:23 +08:00
|
|
|
#include "AArch64GenInstrInfo.inc"
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
namespace llvm {
|
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
class AArch64Subtarget;
|
|
|
|
class AArch64TargetMachine;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2017-07-15 05:44:12 +08:00
|
|
|
static const MachineMemOperand::Flags MOSuppressPair =
|
|
|
|
MachineMemOperand::MOTargetFlag1;
|
|
|
|
static const MachineMemOperand::Flags MOStridedAccess =
|
|
|
|
MachineMemOperand::MOTargetFlag2;
|
|
|
|
|
|
|
|
#define FALKOR_STRIDED_ACCESS_MD "falkor.strided.access"
|
|
|
|
|
2016-07-27 22:31:46 +08:00
|
|
|
class AArch64InstrInfo final : public AArch64GenInstrInfo {
|
2015-03-19 04:37:30 +08:00
|
|
|
const AArch64RegisterInfo RI;
|
2014-05-24 20:50:23 +08:00
|
|
|
const AArch64Subtarget &Subtarget;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
public:
|
2014-05-24 20:50:23 +08:00
|
|
|
explicit AArch64InstrInfo(const AArch64Subtarget &STI);
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2015-03-19 04:37:30 +08:00
|
|
|
/// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
|
|
|
|
/// such, whenever a client has an instance of instruction info, it should
|
|
|
|
/// always be able to get register info as well (through this method).
|
|
|
|
const AArch64RegisterInfo &getRegisterInfo() const { return RI; }
|
|
|
|
|
2016-07-29 16:16:16 +08:00
|
|
|
unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2016-06-30 08:01:54 +08:00
|
|
|
bool isAsCheapAsAMove(const MachineInstr &MI) const override;
|
2014-07-29 10:09:26 +08:00
|
|
|
|
2014-03-30 15:25:18 +08:00
|
|
|
bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
|
|
|
|
unsigned &DstReg, unsigned &SubIdx) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-09-08 22:43:48 +08:00
|
|
|
bool
|
2019-04-19 17:08:38 +08:00
|
|
|
areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
|
|
|
|
const MachineInstr &MIb,
|
2014-09-08 22:43:48 +08:00
|
|
|
AliasAnalysis *AA = nullptr) const override;
|
|
|
|
|
2016-06-30 08:01:54 +08:00
|
|
|
unsigned isLoadFromStackSlot(const MachineInstr &MI,
|
2014-03-30 15:25:18 +08:00
|
|
|
int &FrameIndex) const override;
|
2016-06-30 08:01:54 +08:00
|
|
|
unsigned isStoreToStackSlot(const MachineInstr &MI,
|
2014-03-30 15:25:18 +08:00
|
|
|
int &FrameIndex) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Does this instruction set its full destination register to zero?
|
2018-02-10 00:14:41 +08:00
|
|
|
static bool isGPRZero(const MachineInstr &MI);
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Does this instruction rename a GPR without modifying bits?
|
2018-02-10 00:14:41 +08:00
|
|
|
static bool isGPRCopy(const MachineInstr &MI);
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Does this instruction rename an FPR without modifying bits?
|
2018-02-10 00:14:41 +08:00
|
|
|
static bool isFPRCopy(const MachineInstr &MI);
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
/// Return true if pairing the given load or store is hinted to be
|
|
|
|
/// unprofitable.
|
2018-02-10 00:14:41 +08:00
|
|
|
static bool isLdStPairSuppressed(const MachineInstr &MI);
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2017-07-15 05:44:12 +08:00
|
|
|
/// Return true if the given load or store is a strided memory access.
|
2018-02-10 00:14:41 +08:00
|
|
|
static bool isStridedAccess(const MachineInstr &MI);
|
2017-07-15 05:44:12 +08:00
|
|
|
|
2016-03-10 01:29:48 +08:00
|
|
|
/// Return true if this is an unscaled load/store.
|
2018-02-10 00:14:41 +08:00
|
|
|
static bool isUnscaledLdSt(unsigned Opc);
|
|
|
|
static bool isUnscaledLdSt(MachineInstr &MI) {
|
|
|
|
return isUnscaledLdSt(MI.getOpcode());
|
2016-08-12 23:26:00 +08:00
|
|
|
}
|
|
|
|
|
2019-03-27 21:16:19 +08:00
|
|
|
/// Returns the unscaled load/store for the scaled load/store opcode,
|
|
|
|
/// if there is a corresponding unscaled variant available.
|
|
|
|
static Optional<unsigned> getUnscaledLdSt(unsigned Opc);
|
|
|
|
|
|
|
|
|
|
|
|
/// Returns the index for the immediate for a given instruction.
|
|
|
|
static unsigned getLoadStoreImmIdx(unsigned Opc);
|
|
|
|
|
2018-02-10 00:14:41 +08:00
|
|
|
/// Return true if pairing the given load or store may be paired with another.
|
|
|
|
static bool isPairableLdStInst(const MachineInstr &MI);
|
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Return the opcode that set flags when possible. The caller is
|
[AArch64] Prefer Bcc to CBZ/CBNZ/TBZ/TBNZ when NZCV flags can be set for "free".
This patch contains a pass that transforms CBZ/CBNZ/TBZ/TBNZ instructions into a
conditional branch (Bcc), when the NZCV flags can be set for "free". This is
preferred on targets that have more flexibility when scheduling Bcc
instructions as compared to CBZ/CBNZ/TBZ/TBNZ (assuming all other variables are
equal). This can reduce register pressure and is also the default behavior for
GCC.
A few examples:
add w8, w0, w1 -> cmn w0, w1 ; CMN is an alias of ADDS.
cbz w8, .LBB_2 -> b.eq .LBB0_2 ; single def/use of w8 removed.
add w8, w0, w1 -> adds w8, w0, w1 ; w8 has multiple uses.
cbz w8, .LBB1_2 -> b.eq .LBB1_2
sub w8, w0, w1 -> subs w8, w0, w1 ; w8 has multiple uses.
tbz w8, #31, .LBB6_2 -> b.ge .LBB6_2
In looking at all current sub-target machine descriptions, this transformation
appears to be either positive or neutral.
Differential Revision: https://reviews.llvm.org/D34220.
llvm-svn: 306144
2017-06-24 03:20:12 +08:00
|
|
|
/// responsible for ensuring the opc has a flag setting equivalent.
|
2018-02-10 00:14:41 +08:00
|
|
|
static unsigned convertToFlagSettingOpc(unsigned Opc, bool &Is64Bit);
|
[AArch64] Prefer Bcc to CBZ/CBNZ/TBZ/TBNZ when NZCV flags can be set for "free".
This patch contains a pass that transforms CBZ/CBNZ/TBZ/TBNZ instructions into a
conditional branch (Bcc), when the NZCV flags can be set for "free". This is
preferred on targets that have more flexibility when scheduling Bcc
instructions as compared to CBZ/CBNZ/TBZ/TBNZ (assuming all other variables are
equal). This can reduce register pressure and is also the default behavior for
GCC.
A few examples:
add w8, w0, w1 -> cmn w0, w1 ; CMN is an alias of ADDS.
cbz w8, .LBB_2 -> b.eq .LBB0_2 ; single def/use of w8 removed.
add w8, w0, w1 -> adds w8, w0, w1 ; w8 has multiple uses.
cbz w8, .LBB1_2 -> b.eq .LBB1_2
sub w8, w0, w1 -> subs w8, w0, w1 ; w8 has multiple uses.
tbz w8, #31, .LBB6_2 -> b.ge .LBB6_2
In looking at all current sub-target machine descriptions, this transformation
appears to be either positive or neutral.
Differential Revision: https://reviews.llvm.org/D34220.
llvm-svn: 306144
2017-06-24 03:20:12 +08:00
|
|
|
|
2016-03-19 03:21:02 +08:00
|
|
|
/// Return true if this is a load/store that can be potentially paired/merged.
|
2019-04-19 17:08:38 +08:00
|
|
|
bool isCandidateToMergeOrPair(const MachineInstr &MI) const;
|
2016-03-19 03:21:02 +08:00
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
/// Hint that pairing the given load or store is unprofitable.
|
2018-02-10 00:14:41 +08:00
|
|
|
static void suppressLdStPair(MachineInstr &MI);
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2019-04-19 17:08:38 +08:00
|
|
|
bool getMemOperandWithOffset(const MachineInstr &MI,
|
|
|
|
const MachineOperand *&BaseOp,
|
2018-11-28 20:00:20 +08:00
|
|
|
int64_t &Offset,
|
|
|
|
const TargetRegisterInfo *TRI) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2019-04-19 17:08:38 +08:00
|
|
|
bool getMemOperandWithOffsetWidth(const MachineInstr &MI,
|
|
|
|
const MachineOperand *&BaseOp,
|
2018-11-28 20:00:20 +08:00
|
|
|
int64_t &Offset, unsigned &Width,
|
|
|
|
const TargetRegisterInfo *TRI) const;
|
2014-09-08 22:43:48 +08:00
|
|
|
|
2017-03-18 06:26:55 +08:00
|
|
|
/// Return the immediate offset of the base register in a load/store \p LdSt.
|
|
|
|
MachineOperand &getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const;
|
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Returns true if opcode \p Opc is a memory operation. If it is, set
|
2017-03-18 06:26:55 +08:00
|
|
|
/// \p Scale, \p Width, \p MinOffset, and \p MaxOffset accordingly.
|
|
|
|
///
|
|
|
|
/// For unscaled instructions, \p Scale is set to 1.
|
2019-03-27 21:16:19 +08:00
|
|
|
static bool getMemOpInfo(unsigned Opcode, unsigned &Scale, unsigned &Width,
|
|
|
|
int64_t &MinOffset, int64_t &MaxOffset);
|
2017-03-18 06:26:55 +08:00
|
|
|
|
2019-04-19 17:08:38 +08:00
|
|
|
bool shouldClusterMemOps(const MachineOperand &BaseOp1,
|
|
|
|
const MachineOperand &BaseOp2,
|
2016-06-30 08:01:54 +08:00
|
|
|
unsigned NumLoads) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
|
2016-06-12 23:39:02 +08:00
|
|
|
const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
|
2014-03-29 18:18:08 +08:00
|
|
|
bool KillSrc, unsigned Opcode,
|
|
|
|
llvm::ArrayRef<unsigned> Indices) const;
|
2019-02-07 18:35:34 +08:00
|
|
|
void copyGPRRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
|
|
|
|
DebugLoc DL, unsigned DestReg, unsigned SrcReg,
|
|
|
|
bool KillSrc, unsigned Opcode, unsigned ZeroReg,
|
|
|
|
llvm::ArrayRef<unsigned> Indices) const;
|
2014-03-30 15:25:18 +08:00
|
|
|
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
|
2016-06-12 23:39:02 +08:00
|
|
|
const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
|
2014-03-30 15:25:18 +08:00
|
|
|
bool KillSrc) const override;
|
|
|
|
|
|
|
|
void storeRegToStackSlot(MachineBasicBlock &MBB,
|
|
|
|
MachineBasicBlock::iterator MBBI, unsigned SrcReg,
|
|
|
|
bool isKill, int FrameIndex,
|
|
|
|
const TargetRegisterClass *RC,
|
|
|
|
const TargetRegisterInfo *TRI) const override;
|
|
|
|
|
|
|
|
void loadRegFromStackSlot(MachineBasicBlock &MBB,
|
|
|
|
MachineBasicBlock::iterator MBBI, unsigned DestReg,
|
|
|
|
int FrameIndex, const TargetRegisterClass *RC,
|
|
|
|
const TargetRegisterInfo *TRI) const override;
|
|
|
|
|
2017-01-06 05:51:42 +08:00
|
|
|
// This tells target independent code that it is okay to pass instructions
|
|
|
|
// with subreg operands to foldMemoryOperandImpl.
|
|
|
|
bool isSubregFoldable() const override { return true; }
|
|
|
|
|
2014-07-31 20:58:50 +08:00
|
|
|
using TargetInstrInfo::foldMemoryOperandImpl;
|
2016-06-30 08:01:54 +08:00
|
|
|
MachineInstr *
|
|
|
|
foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
|
|
|
|
ArrayRef<unsigned> Ops,
|
|
|
|
MachineBasicBlock::iterator InsertPt, int FrameIndex,
|
2019-06-08 14:19:15 +08:00
|
|
|
LiveIntervals *LIS = nullptr,
|
|
|
|
VirtRegMap *VRM = nullptr) const override;
|
2014-03-30 15:25:18 +08:00
|
|
|
|
2016-08-02 16:06:17 +08:00
|
|
|
/// \returns true if a branch from an instruction with opcode \p BranchOpc
|
2016-10-06 23:38:09 +08:00
|
|
|
/// bytes is capable of jumping to a position \p BrOffset bytes away.
|
|
|
|
bool isBranchOffsetInRange(unsigned BranchOpc,
|
|
|
|
int64_t BrOffset) const override;
|
|
|
|
|
|
|
|
MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override;
|
2016-08-02 16:06:17 +08:00
|
|
|
|
2016-07-15 22:41:04 +08:00
|
|
|
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
|
2014-03-30 15:25:18 +08:00
|
|
|
MachineBasicBlock *&FBB,
|
|
|
|
SmallVectorImpl<MachineOperand> &Cond,
|
|
|
|
bool AllowModify = false) const override;
|
2016-09-15 04:43:16 +08:00
|
|
|
unsigned removeBranch(MachineBasicBlock &MBB,
|
2016-09-15 01:23:48 +08:00
|
|
|
int *BytesRemoved = nullptr) const override;
|
2016-09-15 01:24:15 +08:00
|
|
|
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
|
2015-06-12 03:30:37 +08:00
|
|
|
MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
|
2016-09-15 01:23:48 +08:00
|
|
|
const DebugLoc &DL,
|
|
|
|
int *BytesAdded = nullptr) const override;
|
2014-03-30 15:25:18 +08:00
|
|
|
bool
|
2016-09-15 04:43:16 +08:00
|
|
|
reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
|
2015-06-12 03:30:37 +08:00
|
|
|
bool canInsertSelect(const MachineBasicBlock &, ArrayRef<MachineOperand> Cond,
|
|
|
|
unsigned, unsigned, int &, int &, int &) const override;
|
2014-03-30 15:25:18 +08:00
|
|
|
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
|
2016-06-12 23:39:02 +08:00
|
|
|
const DebugLoc &DL, unsigned DstReg,
|
|
|
|
ArrayRef<MachineOperand> Cond, unsigned TrueReg,
|
|
|
|
unsigned FalseReg) const override;
|
2017-04-22 05:48:41 +08:00
|
|
|
void getNoop(MCInst &NopInst) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2018-10-31 03:24:51 +08:00
|
|
|
bool isSchedulingBoundary(const MachineInstr &MI,
|
|
|
|
const MachineBasicBlock *MBB,
|
|
|
|
const MachineFunction &MF) const override;
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
/// analyzeCompare - For a comparison instruction, return the source registers
|
|
|
|
/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
|
|
|
|
/// Return true if the comparison instruction can be analyzed.
|
2016-06-30 08:01:54 +08:00
|
|
|
bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
|
2014-03-30 15:25:18 +08:00
|
|
|
unsigned &SrcReg2, int &CmpMask,
|
|
|
|
int &CmpValue) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
/// optimizeCompareInstr - Convert the instruction supplying the argument to
|
|
|
|
/// the comparison into one that sets the zero bit in the flags register.
|
2016-06-30 08:01:54 +08:00
|
|
|
bool optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
|
2014-03-30 15:25:18 +08:00
|
|
|
unsigned SrcReg2, int CmpMask, int CmpValue,
|
|
|
|
const MachineRegisterInfo *MRI) const override;
|
2016-06-30 08:01:54 +08:00
|
|
|
bool optimizeCondBranch(MachineInstr &MI) const override;
|
2016-04-24 13:14:01 +08:00
|
|
|
|
|
|
|
/// Return true when a code sequence can improve throughput. It
|
|
|
|
/// should be called only for instructions in loops.
|
|
|
|
/// \param Pattern - combiner pattern
|
|
|
|
bool isThroughputPattern(MachineCombinerPattern Pattern) const override;
|
2015-06-20 07:21:42 +08:00
|
|
|
/// Return true when there is potentially a faster code sequence
|
2017-07-11 06:11:50 +08:00
|
|
|
/// for an instruction chain ending in ``Root``. All potential patterns are
|
|
|
|
/// listed in the ``Patterns`` array.
|
2017-07-29 10:55:46 +08:00
|
|
|
bool getMachineCombinerPatterns(
|
|
|
|
MachineInstr &Root,
|
|
|
|
SmallVectorImpl<MachineCombinerPattern> &Patterns) const override;
|
2016-01-07 12:01:02 +08:00
|
|
|
/// Return true when Inst is associative and commutative so that it can be
|
|
|
|
/// reassociated.
|
|
|
|
bool isAssociativeAndCommutative(const MachineInstr &Inst) const override;
|
2015-06-20 07:21:42 +08:00
|
|
|
/// When getMachineCombinerPatterns() finds patterns, this function generates
|
|
|
|
/// the instructions that could replace the original code sequence
|
2014-09-03 19:41:21 +08:00
|
|
|
void genAlternativeCodeSequence(
|
2015-11-06 03:34:57 +08:00
|
|
|
MachineInstr &Root, MachineCombinerPattern Pattern,
|
2014-08-08 05:40:58 +08:00
|
|
|
SmallVectorImpl<MachineInstr *> &InsInstrs,
|
|
|
|
SmallVectorImpl<MachineInstr *> &DelInstrs,
|
2014-09-03 19:41:21 +08:00
|
|
|
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const override;
|
2016-05-02 22:56:21 +08:00
|
|
|
/// AArch64 supports MachineCombiner.
|
2014-09-03 19:41:21 +08:00
|
|
|
bool useMachineCombiner() const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2016-06-30 08:01:54 +08:00
|
|
|
bool expandPostRAPseudo(MachineInstr &MI) const override;
|
2015-08-19 06:52:15 +08:00
|
|
|
|
|
|
|
std::pair<unsigned, unsigned>
|
|
|
|
decomposeMachineOperandsTargetFlags(unsigned TF) const override;
|
|
|
|
ArrayRef<std::pair<unsigned, const char *>>
|
|
|
|
getSerializableDirectMachineOperandTargetFlags() const override;
|
|
|
|
ArrayRef<std::pair<unsigned, const char *>>
|
|
|
|
getSerializableBitmaskMachineOperandTargetFlags() const override;
|
2017-07-13 10:28:54 +08:00
|
|
|
ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
|
|
|
|
getSerializableMachineMemOperandTargetFlags() const override;
|
2015-08-19 06:52:15 +08:00
|
|
|
|
[MachineOutliner] Disable outlining from LinkOnceODRs by default
Say you have two identical linkonceodr functions, one in M1 and one in M2.
Say that the outliner outlines A,B,C from one function, and D,E,F from another
function (where letters are instructions). Now those functions are not
identical, and cannot be deduped. Locally to M1 and M2, these outlining
choices would be good-- to the whole program, however, this might not be true!
To mitigate this, this commit makes it so that the outliner sees linkonceodr
functions as unsafe to outline from. It also adds a flag,
-enable-linkonceodr-outlining, which allows the user to specify that they
want to outline from such functions when they know what they're doing.
Changing this handles most code size regressions in the test suite caused by
competing with linker dedupe. It also doesn't have a huge impact on the code
size improvements from the outliner. There are 6 tests that regress > 5% from
outlining WITH linkonceodrs to outlining WITHOUT linkonceodrs. Overall, most
tests either improve or are not impacted.
Not outlined vs outlined without linkonceodrs:
https://hastebin.com/raw/qeguxavuda
Not outlined vs outlined with linkonceodrs:
https://hastebin.com/raw/edepoqoqic
Outlined with linkonceodrs vs outlined without linkonceodrs:
https://hastebin.com/raw/awiqifiheb
Numbers generated using compare.py with -m size.__text. Tests run for AArch64
with -Oz -mllvm -enable-machine-outliner -mno-red-zone.
llvm-svn: 315136
2017-10-07 08:16:34 +08:00
|
|
|
bool isFunctionSafeToOutlineFrom(MachineFunction &MF,
|
|
|
|
bool OutlineFromLinkOnceODRs) const override;
|
2018-07-25 04:13:10 +08:00
|
|
|
outliner::OutlinedFunction getOutliningCandidateInfo(
|
2018-06-05 05:14:16 +08:00
|
|
|
std::vector<outliner::Candidate> &RepeatedSequenceLocs) const override;
|
|
|
|
outliner::InstrType
|
[MachineOutliner] AArch64: Handle instrs that use SP and will never need fixups
This commit does two things. Firstly, it adds a collection of flags which can
be passed along to the target to encode information about the MBB that an
instruction lives in to the outliner.
Second, it adds some of those flags to the AArch64 outliner in order to add
more stack instructions to the list of legal instructions that are handled
by the outliner. The two flags added check if
- There are calls in the MachineBasicBlock containing the instruction
- The link register is available in the entire block
If the link register is available and there are no calls, then a stack
instruction can always be outlined without fixups, regardless of what it is,
since in this case, the outliner will never modify the stack to create a
call or outlined frame.
The motivation for doing this was checking which instructions are most often
missed by the outliner. Instructions like, say
%sp<def> = ADDXri %sp, 32, 0; flags: FrameDestroy
are very common, but cannot be outlined in the case that the outliner might
modify the stack. This commit allows us to outline instructions like this.
llvm-svn: 322048
2018-01-09 08:26:18 +08:00
|
|
|
getOutliningType(MachineBasicBlock::iterator &MIT, unsigned Flags) const override;
|
2018-11-13 07:51:32 +08:00
|
|
|
bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
|
|
|
|
unsigned &Flags) const override;
|
2018-06-20 05:14:48 +08:00
|
|
|
void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF,
|
2018-07-25 04:13:10 +08:00
|
|
|
const outliner::OutlinedFunction &OF) const override;
|
2017-03-18 06:26:55 +08:00
|
|
|
MachineBasicBlock::iterator
|
|
|
|
insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
|
2017-07-29 10:55:46 +08:00
|
|
|
MachineBasicBlock::iterator &It, MachineFunction &MF,
|
2018-07-25 01:42:11 +08:00
|
|
|
const outliner::Candidate &C) const override;
|
2018-07-28 04:18:27 +08:00
|
|
|
bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override;
|
2017-04-08 11:30:15 +08:00
|
|
|
/// Returns true if the instruction has a shift by immediate that can be
|
|
|
|
/// executed in one cycle less.
|
2018-11-07 06:17:14 +08:00
|
|
|
static bool isFalkorShiftExtFast(const MachineInstr &MI);
|
2018-10-31 03:24:51 +08:00
|
|
|
/// Return true if the instructions is a SEH instruciton used for unwinding
|
|
|
|
/// on Windows.
|
|
|
|
static bool isSEHInstruction(const MachineInstr &MI);
|
2017-03-18 06:26:55 +08:00
|
|
|
|
2018-11-28 04:58:27 +08:00
|
|
|
#define GET_INSTRINFO_HELPER_DECLS
|
2018-11-27 05:47:28 +08:00
|
|
|
#include "AArch64GenInstrInfo.inc"
|
|
|
|
|
2019-05-23 02:48:58 +08:00
|
|
|
protected:
|
|
|
|
/// If the specific machine instruction is a instruction that moves/copies
|
|
|
|
/// value from one register to another register return true along with
|
|
|
|
/// @Source machine operand and @Destination machine operand.
|
|
|
|
bool isCopyInstrImpl(const MachineInstr &MI, const MachineOperand *&Source,
|
|
|
|
const MachineOperand *&Destination) const override;
|
|
|
|
|
2017-07-29 10:55:46 +08:00
|
|
|
private:
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Sets the offsets on outlined instructions in \p MBB which use SP
|
2017-03-18 06:26:55 +08:00
|
|
|
/// so that they will be valid post-outlining.
|
|
|
|
///
|
|
|
|
/// \param MBB A \p MachineBasicBlock in an outlined function.
|
|
|
|
void fixupPostOutline(MachineBasicBlock &MBB) const;
|
|
|
|
|
2016-06-12 23:39:02 +08:00
|
|
|
void instantiateCondBranch(MachineBasicBlock &MBB, const DebugLoc &DL,
|
2014-03-29 18:18:08 +08:00
|
|
|
MachineBasicBlock *TBB,
|
2015-06-12 03:30:37 +08:00
|
|
|
ArrayRef<MachineOperand> Cond) const;
|
2016-06-30 08:01:54 +08:00
|
|
|
bool substituteCmpToZero(MachineInstr &CmpInstr, unsigned SrcReg,
|
|
|
|
const MachineRegisterInfo *MRI) const;
|
2018-07-31 01:45:28 +08:00
|
|
|
|
|
|
|
/// Returns an unused general-purpose register which can be used for
|
|
|
|
/// constructing an outlined call if one exists. Returns 0 otherwise.
|
|
|
|
unsigned findRegisterToSaveLRTo(const outliner::Candidate &C) const;
|
2014-03-29 18:18:08 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
/// emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg
|
|
|
|
/// plus Offset. This is intended to be used from within the prolog/epilog
|
|
|
|
/// insertion (PEI) pass, where a virtual scratch register may be allocated
|
|
|
|
/// if necessary, to be replaced by the scavenger at the end of PEI.
|
|
|
|
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
|
2016-06-12 23:39:02 +08:00
|
|
|
const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
|
|
|
|
int Offset, const TargetInstrInfo *TII,
|
2014-03-29 18:18:08 +08:00
|
|
|
MachineInstr::MIFlag = MachineInstr::NoFlags,
|
2019-05-16 05:23:41 +08:00
|
|
|
bool SetNZCV = false, bool NeedsWinCFI = false,
|
|
|
|
bool *HasWinCFI = nullptr);
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
/// rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the
|
2014-03-29 18:18:08 +08:00
|
|
|
/// FP. Return false if the offset could not be handled directly in MI, and
|
|
|
|
/// return the left-over portion by reference.
|
2014-05-24 20:50:23 +08:00
|
|
|
bool rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
|
2017-07-29 10:55:46 +08:00
|
|
|
unsigned FrameReg, int &Offset,
|
|
|
|
const AArch64InstrInfo *TII);
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Use to report the frame offset status in isAArch64FrameOffsetLegal.
|
2014-05-24 20:50:23 +08:00
|
|
|
enum AArch64FrameOffsetStatus {
|
|
|
|
AArch64FrameOffsetCannotUpdate = 0x0, ///< Offset cannot apply.
|
|
|
|
AArch64FrameOffsetIsLegal = 0x1, ///< Offset is legal.
|
|
|
|
AArch64FrameOffsetCanUpdate = 0x2 ///< Offset can apply, at least partly.
|
2014-03-29 18:18:08 +08:00
|
|
|
};
|
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Check if the @p Offset is a valid frame offset for @p MI.
|
2014-03-29 18:18:08 +08:00
|
|
|
/// The returned value reports the validity of the frame offset for @p MI.
|
2014-05-24 20:50:23 +08:00
|
|
|
/// It uses the values defined by AArch64FrameOffsetStatus for that.
|
|
|
|
/// If result == AArch64FrameOffsetCannotUpdate, @p MI cannot be updated to
|
2014-03-29 18:18:08 +08:00
|
|
|
/// use an offset.eq
|
2014-05-24 20:50:23 +08:00
|
|
|
/// If result & AArch64FrameOffsetIsLegal, @p Offset can completely be
|
2017-07-16 15:48:48 +08:00
|
|
|
/// rewritten in @p MI.
|
2014-05-24 20:50:23 +08:00
|
|
|
/// If result & AArch64FrameOffsetCanUpdate, @p Offset contains the
|
2014-03-29 18:18:08 +08:00
|
|
|
/// amount that is off the limit of the legal offset.
|
|
|
|
/// If set, @p OutUseUnscaledOp will contain the whether @p MI should be
|
|
|
|
/// turned into an unscaled operator, which opcode is in @p OutUnscaledOp.
|
|
|
|
/// If set, @p EmittableOffset contains the amount that can be set in @p MI
|
|
|
|
/// (possibly with @p OutUnscaledOp if OutUseUnscaledOp is true) and that
|
|
|
|
/// is a legal offset.
|
2014-05-24 20:50:23 +08:00
|
|
|
int isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
|
2017-07-29 10:55:46 +08:00
|
|
|
bool *OutUseUnscaledOp = nullptr,
|
|
|
|
unsigned *OutUnscaledOp = nullptr,
|
|
|
|
int *EmittableOffset = nullptr);
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
static inline bool isUncondBranchOpcode(int Opc) { return Opc == AArch64::B; }
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
static inline bool isCondBranchOpcode(int Opc) {
|
|
|
|
switch (Opc) {
|
2014-05-24 20:50:23 +08:00
|
|
|
case AArch64::Bcc:
|
|
|
|
case AArch64::CBZW:
|
|
|
|
case AArch64::CBZX:
|
|
|
|
case AArch64::CBNZW:
|
|
|
|
case AArch64::CBNZX:
|
|
|
|
case AArch64::TBZW:
|
|
|
|
case AArch64::TBZX:
|
|
|
|
case AArch64::TBNZW:
|
|
|
|
case AArch64::TBNZX:
|
2014-03-29 18:18:08 +08:00
|
|
|
return true;
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-07-29 10:55:46 +08:00
|
|
|
static inline bool isIndirectBranchOpcode(int Opc) {
|
|
|
|
return Opc == AArch64::BR;
|
|
|
|
}
|
2014-03-29 18:18:08 +08:00
|
|
|
|
[AArch64][SVE] Asm: Add MOVPRFX instructions.
This patch adds predicated and unpredicated MOVPRFX instructions, which
can be prepended to SVE instructions that are destructive on their first
source operand, to make them a constructive operation, e.g.
add z1.s, p0/m, z1.s, z2.s <=> z1 = z1 + z2
can be made constructive:
movprfx z0, z1
add z0.s, p0/m, z0.s, z2.s <=> z0 = z1 + z2
The predicated MOVPRFX instruction can additionally be used to zero
inactive elements, e.g.
movprfx z0.s, p0/z, z1.s
add z0.s, p0/m, z0.s, z2.s
Not all instructions can be prefixed with the MOVPRFX instruction
which is why this patch also adds a mechanism to validate prefixed
instructions. The exact rules when a MOVPRFX applies is detailed in
the SVE supplement of the Architectural Reference Manual.
This is patch [1/2] in a series to add MOVPRFX instructions:
- Patch [1/2]: https://reviews.llvm.org/D49592
- Patch [2/2]: https://reviews.llvm.org/D49593
Reviewers: rengolin, SjoerdMeijer, samparker, fhahn, javed.absar
Reviewed By: SjoerdMeijer
Differential Revision: https://reviews.llvm.org/D49592
llvm-svn: 338258
2018-07-30 23:42:46 +08:00
|
|
|
// struct TSFlags {
|
|
|
|
#define TSFLAG_ELEMENT_SIZE_TYPE(X) (X) // 3-bits
|
|
|
|
#define TSFLAG_DESTRUCTIVE_INST_TYPE(X) ((X) << 3) // 1-bit
|
|
|
|
// }
|
|
|
|
|
|
|
|
namespace AArch64 {
|
|
|
|
|
|
|
|
enum ElementSizeType {
|
|
|
|
ElementSizeMask = TSFLAG_ELEMENT_SIZE_TYPE(0x7),
|
|
|
|
ElementSizeNone = TSFLAG_ELEMENT_SIZE_TYPE(0x0),
|
|
|
|
ElementSizeB = TSFLAG_ELEMENT_SIZE_TYPE(0x1),
|
|
|
|
ElementSizeH = TSFLAG_ELEMENT_SIZE_TYPE(0x2),
|
|
|
|
ElementSizeS = TSFLAG_ELEMENT_SIZE_TYPE(0x3),
|
|
|
|
ElementSizeD = TSFLAG_ELEMENT_SIZE_TYPE(0x4),
|
|
|
|
};
|
|
|
|
|
|
|
|
enum DestructiveInstType {
|
|
|
|
DestructiveInstTypeMask = TSFLAG_DESTRUCTIVE_INST_TYPE(0x1),
|
|
|
|
NotDestructive = TSFLAG_DESTRUCTIVE_INST_TYPE(0x0),
|
|
|
|
Destructive = TSFLAG_DESTRUCTIVE_INST_TYPE(0x1),
|
|
|
|
};
|
|
|
|
|
|
|
|
#undef TSFLAG_ELEMENT_SIZE_TYPE
|
|
|
|
#undef TSFLAG_DESTRUCTIVE_INST_TYPE
|
|
|
|
}
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
} // end namespace llvm
|
|
|
|
|
|
|
|
#endif
|