2014-05-24 20:50:23 +08:00
|
|
|
//==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==//
|
2014-03-29 18:18:08 +08:00
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
2014-05-24 20:50:23 +08:00
|
|
|
// This file defines the interfaces that AArch64 uses to lower LLVM code into a
|
2014-03-29 18:18:08 +08:00
|
|
|
// selection DAG.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2014-08-14 00:26:38 +08:00
|
|
|
#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
|
|
|
|
#define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2015-11-07 09:11:31 +08:00
|
|
|
#include "AArch64.h"
|
2014-03-29 18:18:08 +08:00
|
|
|
#include "llvm/CodeGen/CallingConvLower.h"
|
|
|
|
#include "llvm/CodeGen/SelectionDAG.h"
|
|
|
|
#include "llvm/IR/CallingConv.h"
|
2015-02-24 03:15:16 +08:00
|
|
|
#include "llvm/IR/Instruction.h"
|
2014-03-29 18:18:08 +08:00
|
|
|
#include "llvm/Target/TargetLowering.h"
|
|
|
|
|
|
|
|
namespace llvm {
|
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
namespace AArch64ISD {
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2015-05-08 05:33:59 +08:00
|
|
|
enum NodeType : unsigned {
|
2014-03-29 18:18:08 +08:00
|
|
|
FIRST_NUMBER = ISD::BUILTIN_OP_END,
|
|
|
|
WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
|
|
|
|
CALL, // Function call.
|
|
|
|
|
Fix PR22408 - LLVM producing AArch64 TLS relocations that GNU linkers cannot handle yet.
As is described at http://llvm.org/bugs/show_bug.cgi?id=22408, the GNU linkers
ld.bfd and ld.gold currently only support a subset of the whole range of AArch64
ELF TLS relocations. Furthermore, they assume that some of the code sequences to
access thread-local variables are produced in a very specific sequence.
When the sequence is not as the linker expects, it can silently mis-relaxe/mis-optimize
the instructions.
Even if that wouldn't be the case, it's good to produce the exact sequence,
as that ensures that linkers can perform optimizing relaxations.
This patch:
* implements support for 16MiB TLS area size instead of 4GiB TLS area size. Ideally clang
would grow an -mtls-size option to allow support for both, but that's not part of this patch.
* by default doesn't produce local dynamic access patterns, as even modern ld.bfd and ld.gold
linkers do not support the associated relocations. An option (-aarch64-elf-ldtls-generation)
is added to enable generation of local dynamic code sequence, but is off by default.
* makes sure that the exact expected code sequence for local dynamic and general dynamic
accesses is produced, by making use of a new pseudo instruction. The patch also removes
two (AArch64ISD::TLSDESC_BLR, AArch64ISD::TLSDESC_CALL) pre-existing AArch64-specific pseudo
SDNode instructions that are superseded by the new one (TLSDESC_CALLSEQ).
llvm-svn: 231227
2015-03-04 17:12:08 +08:00
|
|
|
// Produces the full sequence of instructions for getting the thread pointer
|
|
|
|
// offset of a variable into X0, using the TLSDesc model.
|
|
|
|
TLSDESC_CALLSEQ,
|
2014-03-29 18:18:08 +08:00
|
|
|
ADRP, // Page address of a TargetGlobalAddress operand.
|
|
|
|
ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand.
|
|
|
|
LOADgot, // Load from automatically generated descriptor (e.g. Global
|
|
|
|
// Offset Table, TLS record).
|
|
|
|
RET_FLAG, // Return with a flag operand. Operand 0 is the chain operand.
|
|
|
|
BRCOND, // Conditional branch instruction; "b.cond".
|
|
|
|
CSEL,
|
|
|
|
FCSEL, // Conditional move instruction.
|
|
|
|
CSINV, // Conditional select invert.
|
|
|
|
CSNEG, // Conditional select negate.
|
|
|
|
CSINC, // Conditional select increment.
|
|
|
|
|
|
|
|
// Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on
|
|
|
|
// ELF.
|
|
|
|
THREAD_POINTER,
|
|
|
|
ADC,
|
|
|
|
SBC, // adc, sbc instructions
|
|
|
|
|
|
|
|
// Arithmetic instructions which write flags.
|
|
|
|
ADDS,
|
|
|
|
SUBS,
|
|
|
|
ADCS,
|
|
|
|
SBCS,
|
|
|
|
ANDS,
|
|
|
|
|
2015-07-17 04:02:37 +08:00
|
|
|
// Conditional compares. Operands: left,right,falsecc,cc,flags
|
|
|
|
CCMP,
|
|
|
|
CCMN,
|
|
|
|
FCCMP,
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
// Floating point comparison
|
|
|
|
FCMP,
|
|
|
|
|
|
|
|
// Scalar extract
|
|
|
|
EXTR,
|
|
|
|
|
|
|
|
// Scalar-to-vector duplication
|
|
|
|
DUP,
|
|
|
|
DUPLANE8,
|
|
|
|
DUPLANE16,
|
|
|
|
DUPLANE32,
|
|
|
|
DUPLANE64,
|
|
|
|
|
|
|
|
// Vector immedate moves
|
|
|
|
MOVI,
|
|
|
|
MOVIshift,
|
|
|
|
MOVIedit,
|
|
|
|
MOVImsl,
|
|
|
|
FMOV,
|
|
|
|
MVNIshift,
|
|
|
|
MVNImsl,
|
|
|
|
|
|
|
|
// Vector immediate ops
|
|
|
|
BICi,
|
|
|
|
ORRi,
|
|
|
|
|
2014-04-18 17:31:01 +08:00
|
|
|
// Vector bit select: similar to ISD::VSELECT but not all bits within an
|
|
|
|
// element must be identical.
|
|
|
|
BSL,
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
// Vector arithmetic negation
|
|
|
|
NEG,
|
|
|
|
|
|
|
|
// Vector shuffles
|
|
|
|
ZIP1,
|
|
|
|
ZIP2,
|
|
|
|
UZP1,
|
|
|
|
UZP2,
|
|
|
|
TRN1,
|
|
|
|
TRN2,
|
|
|
|
REV16,
|
|
|
|
REV32,
|
|
|
|
REV64,
|
|
|
|
EXT,
|
|
|
|
|
|
|
|
// Vector shift by scalar
|
|
|
|
VSHL,
|
|
|
|
VLSHR,
|
|
|
|
VASHR,
|
|
|
|
|
|
|
|
// Vector shift by scalar (again)
|
|
|
|
SQSHL_I,
|
|
|
|
UQSHL_I,
|
|
|
|
SQSHLU_I,
|
|
|
|
SRSHR_I,
|
|
|
|
URSHR_I,
|
|
|
|
|
|
|
|
// Vector comparisons
|
|
|
|
CMEQ,
|
|
|
|
CMGE,
|
|
|
|
CMGT,
|
|
|
|
CMHI,
|
|
|
|
CMHS,
|
|
|
|
FCMEQ,
|
|
|
|
FCMGE,
|
|
|
|
FCMGT,
|
|
|
|
|
|
|
|
// Vector zero comparisons
|
|
|
|
CMEQz,
|
|
|
|
CMGEz,
|
|
|
|
CMGTz,
|
|
|
|
CMLEz,
|
|
|
|
CMLTz,
|
|
|
|
FCMEQz,
|
|
|
|
FCMGEz,
|
|
|
|
FCMGTz,
|
|
|
|
FCMLEz,
|
|
|
|
FCMLTz,
|
|
|
|
|
[AArch64] Avoid going through GPRs for across-vector instructions.
This adds new node types for each intrinsic.
For instance, for addv, we have AArch64ISD::UADDV, such that:
(v4i32 (uaddv ...))
is the same as
(v4i32 (scalar_to_vector (i32 (int_aarch64_neon_uaddv ...))))
that is,
(v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
(i32 (int_aarch64_neon_uaddv ...)), ssub)
In a combine, we transform all such across-vector-lanes intrinsics to:
(i32 (extract_vector_elt (uaddv ...), 0))
This has one big advantage: by making the extract_element explicit, we
enable the existing patterns for lane-aware instructions to fire.
This lets us avoid needlessly going through the GPRs. Consider:
uint32x4_t test_mul(uint32x4_t a, uint32x4_t b) {
return vmulq_n_u32(a, vaddvq_u32(b));
}
We now generate:
addv.4s s1, v1
mul.4s v0, v0, v1[0]
instead of the previous:
addv.4s s1, v1
fmov w8, s1
dup.4s v1, w8
mul.4s v0, v1, v0
rdar://20044838
llvm-svn: 231840
2015-03-11 04:45:38 +08:00
|
|
|
// Vector across-lanes addition
|
|
|
|
// Only the lower result lane is defined.
|
|
|
|
SADDV,
|
|
|
|
UADDV,
|
|
|
|
|
|
|
|
// Vector across-lanes min/max
|
|
|
|
// Only the lower result lane is defined.
|
|
|
|
SMINV,
|
|
|
|
UMINV,
|
|
|
|
SMAXV,
|
|
|
|
UMAXV,
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
// Vector bitwise negation
|
|
|
|
NOT,
|
|
|
|
|
|
|
|
// Vector bitwise selection
|
|
|
|
BIT,
|
|
|
|
|
|
|
|
// Compare-and-branch
|
|
|
|
CBZ,
|
|
|
|
CBNZ,
|
|
|
|
TBZ,
|
|
|
|
TBNZ,
|
|
|
|
|
|
|
|
// Tail calls
|
|
|
|
TC_RETURN,
|
|
|
|
|
|
|
|
// Custom prefetch handling
|
|
|
|
PREFETCH,
|
|
|
|
|
|
|
|
// {s|u}int to FP within a FP register.
|
|
|
|
SITOF,
|
2014-05-08 15:38:13 +08:00
|
|
|
UITOF,
|
|
|
|
|
2014-09-04 17:46:14 +08:00
|
|
|
/// Natural vector cast. ISD::BITCAST is not natural in the big-endian
|
|
|
|
/// world w.r.t vectors; which causes additional REV instructions to be
|
|
|
|
/// generated to compensate for the byte-swapping. But sometimes we do
|
|
|
|
/// need to re-interpret the data in SIMD vector registers in big-endian
|
|
|
|
/// mode without emitting such REV instructions.
|
|
|
|
NVCAST,
|
|
|
|
|
2014-10-08 10:31:24 +08:00
|
|
|
SMULL,
|
|
|
|
UMULL,
|
|
|
|
|
2016-11-15 07:29:01 +08:00
|
|
|
// Reciprocal estimates and steps.
|
|
|
|
FRECPE, FRECPS,
|
|
|
|
FRSQRTE, FRSQRTS,
|
2016-10-25 00:14:58 +08:00
|
|
|
|
2014-05-08 15:38:13 +08:00
|
|
|
// NEON Load/Store with post-increment base updates
|
|
|
|
LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
|
|
|
|
LD3post,
|
|
|
|
LD4post,
|
|
|
|
ST2post,
|
|
|
|
ST3post,
|
|
|
|
ST4post,
|
|
|
|
LD1x2post,
|
|
|
|
LD1x3post,
|
|
|
|
LD1x4post,
|
|
|
|
ST1x2post,
|
|
|
|
ST1x3post,
|
|
|
|
ST1x4post,
|
2014-05-16 17:39:02 +08:00
|
|
|
LD1DUPpost,
|
2014-05-08 15:38:13 +08:00
|
|
|
LD2DUPpost,
|
|
|
|
LD3DUPpost,
|
|
|
|
LD4DUPpost,
|
2014-05-16 17:39:02 +08:00
|
|
|
LD1LANEpost,
|
2014-05-08 15:38:13 +08:00
|
|
|
LD2LANEpost,
|
|
|
|
LD3LANEpost,
|
|
|
|
LD4LANEpost,
|
|
|
|
ST2LANEpost,
|
|
|
|
ST3LANEpost,
|
|
|
|
ST4LANEpost
|
2014-03-29 18:18:08 +08:00
|
|
|
};
|
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
} // end namespace AArch64ISD
|
2014-03-29 18:18:08 +08:00
|
|
|
|
[AArch64] Improve add/sub/cmp isel of uxtw forms.
Don't match the UXTW extended reg forms of ADD/ADDS/SUB/SUBS if the
32-bit to 64-bit zero-extend can be done for free by taking advantage
of the 32-bit defining instruction zeroing the upper 32-bits of the X
register destination. This enables better instruction selection in a
few cases, such as:
sub x0, xzr, x8
instead of:
mov x8, xzr
sub x0, x8, w9, uxtw
madd x0, x1, x1, x8
instead of:
mul x9, x1, x1
add x0, x9, w8, uxtw
cmp x2, x8
instead of:
sub x8, x2, w8, uxtw
cmp x8, #0
add x0, x8, x1, lsl #3
instead of:
lsl x9, x1, #3
add x0, x9, w8, uxtw
Reviewers: t.p.northover, jmolloy
Subscribers: mcrosier, aemerson, llvm-commits, rengolin
Differential Revision: https://reviews.llvm.org/D24747
llvm-svn: 282413
2016-09-26 23:34:47 +08:00
|
|
|
namespace {
|
|
|
|
|
|
|
|
// Any instruction that defines a 32-bit result zeros out the high half of the
|
|
|
|
// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
|
|
|
|
// be copying from a truncate. But any other 32-bit operation will zero-extend
|
|
|
|
// up to 64 bits.
|
|
|
|
// FIXME: X86 also checks for CMOV here. Do we need something similar?
|
|
|
|
static inline bool isDef32(const SDNode &N) {
|
|
|
|
unsigned Opc = N.getOpcode();
|
|
|
|
return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
|
|
|
|
Opc != ISD::CopyFromReg;
|
|
|
|
}
|
|
|
|
|
|
|
|
} // end anonymous namespace
|
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
class AArch64Subtarget;
|
|
|
|
class AArch64TargetMachine;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
class AArch64TargetLowering : public TargetLowering {
|
2014-03-29 18:18:08 +08:00
|
|
|
public:
|
2015-01-29 08:19:42 +08:00
|
|
|
explicit AArch64TargetLowering(const TargetMachine &TM,
|
|
|
|
const AArch64Subtarget &STI);
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-08-30 05:53:01 +08:00
|
|
|
/// Selects the correct CCAssignFn for a given CallingConvention value.
|
2014-03-29 18:18:08 +08:00
|
|
|
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
|
|
|
|
|
2016-08-11 05:44:01 +08:00
|
|
|
/// Selects the correct CCAssignFn for a given CallingConvention value.
|
|
|
|
CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const;
|
|
|
|
|
2015-11-10 03:18:26 +08:00
|
|
|
/// Determine which of the bits specified in Mask are known to be either zero
|
|
|
|
/// or one and return them in the KnownZero/KnownOne bitsets.
|
2014-05-15 05:14:37 +08:00
|
|
|
void computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero,
|
2017-03-31 19:24:16 +08:00
|
|
|
APInt &KnownOne, const APInt &DemandedElts,
|
|
|
|
const SelectionDAG &DAG,
|
2014-05-15 05:14:37 +08:00
|
|
|
unsigned Depth = 0) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2015-07-09 23:12:23 +08:00
|
|
|
MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2015-11-10 03:18:26 +08:00
|
|
|
/// Returns true if the target allows unaligned memory accesses of the
|
|
|
|
/// specified type.
|
2014-07-28 01:46:40 +08:00
|
|
|
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace = 0,
|
|
|
|
unsigned Align = 1,
|
2015-07-29 22:17:26 +08:00
|
|
|
bool *Fast = nullptr) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2015-11-10 03:18:26 +08:00
|
|
|
/// Provide custom lowering hooks for some operations.
|
2014-03-30 15:25:18 +08:00
|
|
|
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-03-30 15:25:18 +08:00
|
|
|
const char *getTargetNodeName(unsigned Opcode) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-03-30 15:25:18 +08:00
|
|
|
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
/// Returns true if a cast between SrcAS and DestAS is a noop.
|
2014-03-30 15:25:18 +08:00
|
|
|
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
|
2014-03-29 18:18:08 +08:00
|
|
|
// Addrspacecasts are always noops.
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2015-11-10 03:18:26 +08:00
|
|
|
/// This method returns a target specific FastISel object, or null if the
|
|
|
|
/// target does not support "fast" ISel.
|
2014-03-30 15:25:18 +08:00
|
|
|
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
|
|
|
|
const TargetLibraryInfo *libInfo) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-03-30 15:25:18 +08:00
|
|
|
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-03-30 15:25:18 +08:00
|
|
|
bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2015-11-10 03:18:26 +08:00
|
|
|
/// Return true if the given shuffle mask can be codegen'd directly, or if it
|
|
|
|
/// should be stack expanded.
|
2014-03-30 15:25:18 +08:00
|
|
|
bool isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2015-11-10 03:18:26 +08:00
|
|
|
/// Return the ISD::SETCC ValueType.
|
2015-07-09 10:09:04 +08:00
|
|
|
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
|
|
|
|
EVT VT) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
|
2016-07-01 06:52:52 +08:00
|
|
|
MachineBasicBlock *EmitF128CSEL(MachineInstr &MI,
|
2014-03-29 18:18:08 +08:00
|
|
|
MachineBasicBlock *BB) const;
|
|
|
|
|
2014-03-30 15:25:18 +08:00
|
|
|
MachineBasicBlock *
|
2016-07-01 06:52:52 +08:00
|
|
|
EmitInstrWithCustomInserter(MachineInstr &MI,
|
2014-03-30 15:25:18 +08:00
|
|
|
MachineBasicBlock *MBB) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-03-30 15:25:18 +08:00
|
|
|
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
|
|
|
|
unsigned Intrinsic) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-03-30 15:25:18 +08:00
|
|
|
bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
|
|
|
|
bool isTruncateFree(EVT VT1, EVT VT2) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2015-02-24 03:15:16 +08:00
|
|
|
bool isProfitableToHoist(Instruction *I) const override;
|
|
|
|
|
2014-03-30 15:25:18 +08:00
|
|
|
bool isZExtFree(Type *Ty1, Type *Ty2) const override;
|
|
|
|
bool isZExtFree(EVT VT1, EVT VT2) const override;
|
|
|
|
bool isZExtFree(SDValue Val, EVT VT2) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-03-30 15:25:18 +08:00
|
|
|
bool hasPairedLoad(EVT LoadedType, unsigned &RequiredAligment) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
[AArch64] Lower interleaved memory accesses to ldN/stN intrinsics. This patch also adds a function to calculate the cost of interleaved memory accesses.
E.g. Lower an interleaved load:
%wide.vec = load <8 x i32>, <8 x i32>* %ptr
%v0 = shuffle %wide.vec, undef, <0, 2, 4, 6>
%v1 = shuffle %wide.vec, undef, <1, 3, 5, 7>
into:
%ld2 = { <4 x i32>, <4 x i32> } call llvm.aarch64.neon.ld2(%ptr)
%vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
%vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
E.g. Lower an interleaved store:
%i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1, <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
store <12 x i32> %i.vec, <12 x i32>* %ptr
into:
%sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
%sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
%sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr)
Differential Revision: http://reviews.llvm.org/D10533
llvm-svn: 240754
2015-06-26 10:32:07 +08:00
|
|
|
unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
|
|
|
|
|
|
|
|
bool lowerInterleavedLoad(LoadInst *LI,
|
|
|
|
ArrayRef<ShuffleVectorInst *> Shuffles,
|
|
|
|
ArrayRef<unsigned> Indices,
|
|
|
|
unsigned Factor) const override;
|
|
|
|
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
|
|
|
|
unsigned Factor) const override;
|
|
|
|
|
2014-03-30 15:25:18 +08:00
|
|
|
bool isLegalAddImmediate(int64_t) const override;
|
|
|
|
bool isLegalICmpImmediate(int64_t) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-03-30 15:25:18 +08:00
|
|
|
EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
|
|
|
|
bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
|
|
|
|
MachineFunction &MF) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2015-11-10 03:18:26 +08:00
|
|
|
/// Return true if the addressing mode represented by AM is legal for this
|
|
|
|
/// target, for a load/store of the specified type.
|
2015-07-09 10:09:40 +08:00
|
|
|
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
|
2015-06-01 13:31:59 +08:00
|
|
|
unsigned AS) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
/// \brief Return the cost of the scaling factor used in the addressing
|
|
|
|
/// mode represented by AM for this target, for a load/store
|
|
|
|
/// of the specified type.
|
|
|
|
/// If the AM is supported, the return value must be >= 0.
|
|
|
|
/// If the AM is not supported, it returns a negative value.
|
2015-07-09 10:09:40 +08:00
|
|
|
int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
|
2015-06-01 13:31:59 +08:00
|
|
|
unsigned AS) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2015-11-10 03:18:26 +08:00
|
|
|
/// Return true if an FMA operation is faster than a pair of fmul and fadd
|
|
|
|
/// instructions. fmuladd intrinsics will be expanded to FMAs when this method
|
|
|
|
/// returns true, otherwise fmuladd is expanded to fmul + fadd.
|
2014-03-30 15:25:18 +08:00
|
|
|
bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-04-04 13:16:06 +08:00
|
|
|
const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
[ARM64] Prevent bit extraction to be adjusted by following shift
For pattern like ((x >> C1) & Mask) << C2, DAG combiner may convert it
into (x >> (C1-C2)) & (Mask << C2), which makes pattern matching of ubfx
more difficult.
For example:
Given
%shr = lshr i64 %x, 4
%and = and i64 %shr, 15
%arrayidx = getelementptr inbounds [8 x [64 x i64]]* @arr, i64 0, %i64 2, i64 %and
%0 = load i64* %arrayidx
With current shift folding, it takes 3 instrs to compute base address:
lsr x8, x0, #1
and x8, x8, #0x78
add x8, x9, x8
If using ubfx, it only needs 2 instrs:
ubfx x8, x0, #4, #4
add x8, x9, x8, lsl #3
This fixes bug 19589
llvm-svn: 207702
2014-05-01 05:07:24 +08:00
|
|
|
/// \brief Returns false if N is a bit extraction pattern of (X >> C) & Mask.
|
|
|
|
bool isDesirableToCommuteWithShift(const SDNode *N) const override;
|
|
|
|
|
2014-04-18 04:00:33 +08:00
|
|
|
/// \brief Returns true if it is beneficial to convert a load of a constant
|
|
|
|
/// to just the constant itself.
|
2014-03-30 15:25:18 +08:00
|
|
|
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
|
|
|
|
Type *Ty) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-04-18 04:00:33 +08:00
|
|
|
Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
|
|
|
|
AtomicOrdering Ord) const override;
|
|
|
|
Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
|
|
|
|
Value *Addr, AtomicOrdering Ord) const override;
|
|
|
|
|
2015-09-23 01:21:44 +08:00
|
|
|
void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const override;
|
|
|
|
|
2015-09-12 01:08:28 +08:00
|
|
|
TargetLoweringBase::AtomicExpansionKind
|
|
|
|
shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
|
2014-09-04 05:29:59 +08:00
|
|
|
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
|
2015-09-12 01:08:17 +08:00
|
|
|
TargetLoweringBase::AtomicExpansionKind
|
Mutate TargetLowering::shouldExpandAtomicRMWInIR to specifically dictate how AtomicRMWInsts are expanded.
Summary:
In PNaCl, most atomic instructions have their own @llvm.nacl.atomic.* function, each one, with a few exceptions, represents a consistent behaviour across all NaCl-supported targets. Unfortunately, the atomic RMW operations nand, [u]min, and [u]max aren't directly represented by any such @llvm.nacl.atomic.* function. This patch refines shouldExpandAtomicRMWInIR in TargetLowering so that a future `Le32TargetLowering` class can selectively inform the caller how the target desires the atomic RMW instruction to be expanded (ie via load-linked/store-conditional for ARM/AArch64, via cmpxchg for X86/others?, or not at all for Mips) if at all.
This does not represent a behavioural change and as such no tests were added.
Patch by: Richard Diamond.
Reviewers: jfb
Reviewed By: jfb
Subscribers: jfb, aemerson, t.p.northover, llvm-commits
Differential Revision: http://reviews.llvm.org/D7713
llvm-svn: 231250
2015-03-04 23:47:57 +08:00
|
|
|
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
|
2014-04-18 04:00:33 +08:00
|
|
|
|
2015-09-12 01:08:28 +08:00
|
|
|
bool shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
|
|
|
|
|
2014-07-26 03:31:34 +08:00
|
|
|
bool useLoadStackGuardNode() const override;
|
2014-07-03 08:23:43 +08:00
|
|
|
TargetLoweringBase::LegalizeTypeAction
|
|
|
|
getPreferredVectorAction(EVT VT) const override;
|
|
|
|
|
2016-04-06 06:41:50 +08:00
|
|
|
/// If the target has a standard location for the stack protector cookie,
|
|
|
|
/// returns the address of that location. Otherwise, returns nullptr.
|
2016-04-09 05:26:31 +08:00
|
|
|
Value *getIRStackGuard(IRBuilder<> &IRB) const override;
|
2016-04-06 06:41:50 +08:00
|
|
|
|
2015-10-27 02:28:25 +08:00
|
|
|
/// If the target has a standard location for the unsafe stack pointer,
|
|
|
|
/// returns the address of that location. Otherwise, returns nullptr.
|
|
|
|
Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
|
|
|
|
|
2015-11-07 09:11:31 +08:00
|
|
|
/// If a physical register, this returns the register that receives the
|
|
|
|
/// exception address on entry to an EH pad.
|
|
|
|
unsigned
|
|
|
|
getExceptionPointerRegister(const Constant *PersonalityFn) const override {
|
|
|
|
// FIXME: This is a guess. Has this been defined yet?
|
|
|
|
return AArch64::X0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// If a physical register, this returns the register that receives the
|
|
|
|
/// exception typeid on entry to a landing pad.
|
|
|
|
unsigned
|
|
|
|
getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
|
|
|
|
// FIXME: This is a guess. Has this been defined yet?
|
|
|
|
return AArch64::X1;
|
|
|
|
}
|
|
|
|
|
Rename AttributeSet to AttributeList
Summary:
This class is a list of AttributeSetNodes corresponding the function
prototype of a call or function declaration. This class used to be
called ParamAttrListPtr, then AttrListPtr, then AttributeSet. It is
typically accessed by parameter and return value index, so
"AttributeList" seems like a more intuitive name.
Rename AttributeSetImpl to AttributeListImpl to follow suit.
It's useful to rename this class so that we can rename AttributeSetNode
to AttributeSet later. AttributeSet is the set of attributes that apply
to a single function, argument, or return value.
Reviewers: sanjoy, javed.absar, chandlerc, pete
Reviewed By: pete
Subscribers: pete, jholewinski, arsenm, dschuff, mehdi_amini, jfb, nhaehnle, sbc100, void, llvm-commits
Differential Revision: https://reviews.llvm.org/D31102
llvm-svn: 298393
2017-03-22 00:57:19 +08:00
|
|
|
bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
|
2016-03-29 02:17:07 +08:00
|
|
|
|
[AArch64] add overrides for isCheapToSpeculateCttz() and isCheapToSpeculateCtlz()
AArch64 has instructions for efficient count-leading/trailing-zeros, so this should be
considered a cheap operation (and therefore fair game for speculation) for any AArch64
implementation.
The net result of allowing this speculation for the regression tests in this
patch is that we get this code:
ctlz:
clz w0, w0
ret
cttz:
rbit w8, w0
clz w0, w8
ret
Instead of:
ctlz:
cbz w0, .LBB0_2
clz w0, w0
ret
.LBB0_2:
orr w0, wzr, #0x20
ret
cttz:
cbz w0, .LBB1_2
rbit w8, w0
clz w0, w8
ret
.LBB1_2:
orr w0, wzr, #0x20
ret
See D14469 for the larger motivation.
Differential Revision: http://reviews.llvm.org/D14505
llvm-svn: 252625
2015-11-11 02:11:37 +08:00
|
|
|
bool isCheapToSpeculateCttz() const override {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool isCheapToSpeculateCtlz() const override {
|
|
|
|
return true;
|
|
|
|
}
|
2016-06-03 04:01:37 +08:00
|
|
|
|
2017-02-22 02:53:14 +08:00
|
|
|
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
|
|
|
|
|
2016-11-30 06:28:58 +08:00
|
|
|
bool hasAndNotCompare(SDValue) const override {
|
|
|
|
// 'bics'
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2016-06-03 04:01:37 +08:00
|
|
|
bool hasBitPreservingFPLogic(EVT VT) const override {
|
|
|
|
// FIXME: Is this always true? It should be true for vectors at least.
|
|
|
|
return VT == MVT::f32 || VT == MVT::f64;
|
|
|
|
}
|
|
|
|
|
2015-12-17 05:04:19 +08:00
|
|
|
bool supportSplitCSR(MachineFunction *MF) const override {
|
|
|
|
return MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
|
|
|
|
MF->getFunction()->hasFnAttribute(Attribute::NoUnwind);
|
|
|
|
}
|
|
|
|
void initializeSplitCSR(MachineBasicBlock *Entry) const override;
|
|
|
|
void insertCopiesSplitCSR(
|
|
|
|
MachineBasicBlock *Entry,
|
|
|
|
const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
|
[AArch64] add overrides for isCheapToSpeculateCttz() and isCheapToSpeculateCtlz()
AArch64 has instructions for efficient count-leading/trailing-zeros, so this should be
considered a cheap operation (and therefore fair game for speculation) for any AArch64
implementation.
The net result of allowing this speculation for the regression tests in this
patch is that we get this code:
ctlz:
clz w0, w0
ret
cttz:
rbit w8, w0
clz w0, w8
ret
Instead of:
ctlz:
cbz w0, .LBB0_2
clz w0, w0
ret
.LBB0_2:
orr w0, wzr, #0x20
ret
cttz:
cbz w0, .LBB1_2
rbit w8, w0
clz w0, w8
ret
.LBB1_2:
orr w0, wzr, #0x20
ret
See D14469 for the larger motivation.
Differential Revision: http://reviews.llvm.org/D14505
llvm-svn: 252625
2015-11-11 02:11:37 +08:00
|
|
|
|
2016-04-12 05:08:06 +08:00
|
|
|
bool supportSwiftError() const override {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2017-02-09 01:57:20 +08:00
|
|
|
/// Returns the size of the platform's va_list object.
|
|
|
|
unsigned getVaListSizeInBits(const DataLayout &DL) const override;
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
private:
|
2015-04-01 04:52:32 +08:00
|
|
|
bool isExtFreeImpl(const Instruction *Ext) const override;
|
|
|
|
|
2015-11-10 03:18:26 +08:00
|
|
|
/// Keep a pointer to the AArch64Subtarget around so that we can
|
2014-03-29 18:18:08 +08:00
|
|
|
/// make the right decision when generating code for different targets.
|
2014-05-24 20:50:23 +08:00
|
|
|
const AArch64Subtarget *Subtarget;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2016-04-15 14:20:21 +08:00
|
|
|
void addTypeForNEON(MVT VT, MVT PromotedBitwiseVT);
|
2014-03-29 18:18:08 +08:00
|
|
|
void addDRTypeForNEON(MVT VT);
|
|
|
|
void addQRTypeForNEON(MVT VT);
|
|
|
|
|
2016-06-12 23:39:02 +08:00
|
|
|
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
|
|
|
|
bool isVarArg,
|
|
|
|
const SmallVectorImpl<ISD::InputArg> &Ins,
|
|
|
|
const SDLoc &DL, SelectionDAG &DAG,
|
|
|
|
SmallVectorImpl<SDValue> &InVals) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-03-30 15:25:18 +08:00
|
|
|
SDValue LowerCall(CallLoweringInfo & /*CLI*/,
|
|
|
|
SmallVectorImpl<SDValue> &InVals) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
|
|
|
|
CallingConv::ID CallConv, bool isVarArg,
|
2016-06-12 23:39:02 +08:00
|
|
|
const SmallVectorImpl<ISD::InputArg> &Ins,
|
|
|
|
const SDLoc &DL, SelectionDAG &DAG,
|
|
|
|
SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
|
|
|
|
SDValue ThisVal) const;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2015-07-28 21:03:31 +08:00
|
|
|
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
bool isEligibleForTailCallOptimization(
|
|
|
|
SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
|
|
|
|
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
|
|
|
const SmallVectorImpl<SDValue> &OutVals,
|
|
|
|
const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const;
|
|
|
|
|
2014-05-15 09:33:17 +08:00
|
|
|
/// Finds the incoming stack arguments which overlap the given fixed stack
|
|
|
|
/// object and incorporates their load into the current chain. This prevents
|
|
|
|
/// an upcoming store from clobbering the stack argument before it's used.
|
|
|
|
SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
|
2016-07-29 02:40:00 +08:00
|
|
|
MachineFrameInfo &MFI, int ClobberedFI) const;
|
2014-05-15 09:33:17 +08:00
|
|
|
|
|
|
|
bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;
|
|
|
|
|
2016-06-12 23:39:02 +08:00
|
|
|
void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL,
|
2014-03-29 18:18:08 +08:00
|
|
|
SDValue &Chain) const;
|
|
|
|
|
2014-03-30 15:25:18 +08:00
|
|
|
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
|
|
|
|
bool isVarArg,
|
|
|
|
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
|
|
|
LLVMContext &Context) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-03-30 15:25:18 +08:00
|
|
|
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
|
|
|
|
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
2016-06-12 23:39:02 +08:00
|
|
|
const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
|
2014-03-30 15:25:18 +08:00
|
|
|
SelectionDAG &DAG) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
|
2016-06-12 23:39:02 +08:00
|
|
|
SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL,
|
Fix PR22408 - LLVM producing AArch64 TLS relocations that GNU linkers cannot handle yet.
As is described at http://llvm.org/bugs/show_bug.cgi?id=22408, the GNU linkers
ld.bfd and ld.gold currently only support a subset of the whole range of AArch64
ELF TLS relocations. Furthermore, they assume that some of the code sequences to
access thread-local variables are produced in a very specific sequence.
When the sequence is not as the linker expects, it can silently mis-relaxe/mis-optimize
the instructions.
Even if that wouldn't be the case, it's good to produce the exact sequence,
as that ensures that linkers can perform optimizing relaxations.
This patch:
* implements support for 16MiB TLS area size instead of 4GiB TLS area size. Ideally clang
would grow an -mtls-size option to allow support for both, but that's not part of this patch.
* by default doesn't produce local dynamic access patterns, as even modern ld.bfd and ld.gold
linkers do not support the associated relocations. An option (-aarch64-elf-ldtls-generation)
is added to enable generation of local dynamic code sequence, but is off by default.
* makes sure that the exact expected code sequence for local dynamic and general dynamic
accesses is produced, by making use of a new pseudo instruction. The patch also removes
two (AArch64ISD::TLSDESC_BLR, AArch64ISD::TLSDESC_CALL) pre-existing AArch64-specific pseudo
SDNode instructions that are superseded by the new one (TLSDESC_CALLSEQ).
llvm-svn: 231227
2015-03-04 17:12:08 +08:00
|
|
|
SelectionDAG &DAG) const;
|
2014-03-29 18:18:08 +08:00
|
|
|
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
|
2015-04-08 01:33:05 +08:00
|
|
|
SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
|
2016-06-12 23:39:02 +08:00
|
|
|
SDValue TVal, SDValue FVal, const SDLoc &dl,
|
2015-04-08 01:33:05 +08:00
|
|
|
SelectionDAG &DAG) const;
|
2014-03-29 18:18:08 +08:00
|
|
|
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerF128Call(SDValue Op, SelectionDAG &DAG,
|
|
|
|
RTLIB::Libcall Call) const;
|
|
|
|
SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerVectorAND(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
|
[AArch64] Lower sdiv x, pow2 using add + select + shift.
The target-independent DAGcombiner will generate:
asr w1, X, #31 w1 = splat sign bit.
add X, X, w1, lsr #28 X = X + 0 or pow2-1
asr w0, X, asr #4 w0 = X/pow2
However, the add + shifts is expensive, so generate:
add w0, X, 15 w0 = X + pow2-1
cmp X, wzr X - 0
csel X, w0, X, lt X = (X < 0) ? X + pow2-1 : X;
asr w0, X, asr 4 w0 = X/pow2
llvm-svn: 213758
2014-07-23 22:57:52 +08:00
|
|
|
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
|
2014-09-03 19:41:21 +08:00
|
|
|
std::vector<SDNode *> *Created) const override;
|
2016-11-11 07:31:06 +08:00
|
|
|
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
|
|
|
|
int &ExtraSteps, bool &UseOneConst,
|
|
|
|
bool Reciprocal) const override;
|
2016-10-25 00:14:58 +08:00
|
|
|
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
|
|
|
|
int &ExtraSteps) const override;
|
2015-07-29 07:05:48 +08:00
|
|
|
unsigned combineRepeatedFPDivisors() const override;
|
[AArch64] Lower sdiv x, pow2 using add + select + shift.
The target-independent DAGcombiner will generate:
asr w1, X, #31 w1 = splat sign bit.
add X, X, w1, lsr #28 X = X + 0 or pow2-1
asr w0, X, asr #4 w0 = X/pow2
However, the add + shifts is expensive, so generate:
add w0, X, 15 w0 = X + pow2-1
cmp X, wzr X - 0
csel X, w0, X, lt X = (X < 0) ? X + pow2-1 : X;
asr w0, X, asr 4 w0 = X/pow2
llvm-svn: 213758
2014-07-23 22:57:52 +08:00
|
|
|
|
2015-07-06 03:29:18 +08:00
|
|
|
ConstraintType getConstraintType(StringRef Constraint) const override;
|
2015-07-10 01:40:29 +08:00
|
|
|
unsigned getRegisterByName(const char* RegName, EVT VT,
|
|
|
|
SelectionDAG &DAG) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
/// Examine constraint string and operand type and determine a weight value.
|
|
|
|
/// The operand object must already have been set up with the operand type.
|
2014-04-29 15:58:25 +08:00
|
|
|
ConstraintWeight
|
|
|
|
getSingleConstraintMatchWeight(AsmOperandInfo &info,
|
|
|
|
const char *constraint) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
std::pair<unsigned, const TargetRegisterClass *>
|
2015-02-27 06:38:43 +08:00
|
|
|
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
|
2015-07-06 03:29:18 +08:00
|
|
|
StringRef Constraint, MVT VT) const override;
|
2016-05-09 19:10:44 +08:00
|
|
|
|
|
|
|
const char *LowerXConstraint(EVT ConstraintVT) const override;
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
|
|
|
|
std::vector<SDValue> &Ops,
|
2014-04-29 15:58:25 +08:00
|
|
|
SelectionDAG &DAG) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2015-07-06 03:29:18 +08:00
|
|
|
unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
|
2015-03-23 19:33:15 +08:00
|
|
|
if (ConstraintCode == "Q")
|
|
|
|
return InlineAsm::Constraint_Q;
|
|
|
|
// FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are
|
|
|
|
// followed by llvm_unreachable so we'll leave them unimplemented in
|
|
|
|
// the backend for now.
|
|
|
|
return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
|
2015-03-16 21:13:41 +08:00
|
|
|
}
|
|
|
|
|
2014-04-29 15:58:25 +08:00
|
|
|
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
|
|
|
|
bool mayBeEmittedAsTailCall(CallInst *CI) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset,
|
|
|
|
ISD::MemIndexedMode &AM, bool &IsInc,
|
|
|
|
SelectionDAG &DAG) const;
|
|
|
|
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
|
|
|
|
ISD::MemIndexedMode &AM,
|
2014-04-29 15:58:25 +08:00
|
|
|
SelectionDAG &DAG) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
|
|
|
|
SDValue &Offset, ISD::MemIndexedMode &AM,
|
2014-04-29 15:58:25 +08:00
|
|
|
SelectionDAG &DAG) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
|
2014-04-29 15:58:25 +08:00
|
|
|
SelectionDAG &DAG) const override;
|
2014-11-28 05:02:42 +08:00
|
|
|
|
|
|
|
bool functionArgumentNeedsConsecutiveRegisters(Type *Ty,
|
|
|
|
CallingConv::ID CallConv,
|
2014-11-28 11:58:26 +08:00
|
|
|
bool isVarArg) const override;
|
2015-07-17 04:02:37 +08:00
|
|
|
|
|
|
|
bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
};
|
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
namespace AArch64 {
|
2014-03-29 18:18:08 +08:00
|
|
|
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
|
|
|
|
const TargetLibraryInfo *libInfo);
|
2014-05-24 20:50:23 +08:00
|
|
|
} // end namespace AArch64
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
} // end namespace llvm
|
|
|
|
|
2014-08-14 00:26:38 +08:00
|
|
|
#endif
|