2014-05-24 20:50:23 +08:00
|
|
|
//==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==//
|
2014-03-29 18:18:08 +08:00
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2014-03-29 18:18:08 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
2014-05-24 20:50:23 +08:00
|
|
|
// This file defines the interfaces that AArch64 uses to lower LLVM code into a
|
2014-03-29 18:18:08 +08:00
|
|
|
// selection DAG.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2014-08-14 00:26:38 +08:00
|
|
|
#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
|
|
|
|
#define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2015-11-07 09:11:31 +08:00
|
|
|
#include "AArch64.h"
|
2014-03-29 18:18:08 +08:00
|
|
|
#include "llvm/CodeGen/CallingConvLower.h"
|
|
|
|
#include "llvm/CodeGen/SelectionDAG.h"
|
2017-11-17 09:07:10 +08:00
|
|
|
#include "llvm/CodeGen/TargetLowering.h"
|
2014-03-29 18:18:08 +08:00
|
|
|
#include "llvm/IR/CallingConv.h"
|
2015-02-24 03:15:16 +08:00
|
|
|
#include "llvm/IR/Instruction.h"
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
namespace llvm {
|
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
namespace AArch64ISD {
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2015-05-08 05:33:59 +08:00
|
|
|
enum NodeType : unsigned {
|
2014-03-29 18:18:08 +08:00
|
|
|
FIRST_NUMBER = ISD::BUILTIN_OP_END,
|
|
|
|
WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
|
|
|
|
CALL, // Function call.
|
|
|
|
|
Fix PR22408 - LLVM producing AArch64 TLS relocations that GNU linkers cannot handle yet.
As is described at http://llvm.org/bugs/show_bug.cgi?id=22408, the GNU linkers
ld.bfd and ld.gold currently only support a subset of the whole range of AArch64
ELF TLS relocations. Furthermore, they assume that some of the code sequences to
access thread-local variables are produced in a very specific sequence.
When the sequence is not as the linker expects, it can silently mis-relaxe/mis-optimize
the instructions.
Even if that wouldn't be the case, it's good to produce the exact sequence,
as that ensures that linkers can perform optimizing relaxations.
This patch:
* implements support for 16MiB TLS area size instead of 4GiB TLS area size. Ideally clang
would grow an -mtls-size option to allow support for both, but that's not part of this patch.
* by default doesn't produce local dynamic access patterns, as even modern ld.bfd and ld.gold
linkers do not support the associated relocations. An option (-aarch64-elf-ldtls-generation)
is added to enable generation of local dynamic code sequence, but is off by default.
* makes sure that the exact expected code sequence for local dynamic and general dynamic
accesses is produced, by making use of a new pseudo instruction. The patch also removes
two (AArch64ISD::TLSDESC_BLR, AArch64ISD::TLSDESC_CALL) pre-existing AArch64-specific pseudo
SDNode instructions that are superseded by the new one (TLSDESC_CALLSEQ).
llvm-svn: 231227
2015-03-04 17:12:08 +08:00
|
|
|
// Produces the full sequence of instructions for getting the thread pointer
|
|
|
|
// offset of a variable into X0, using the TLSDesc model.
|
|
|
|
TLSDESC_CALLSEQ,
|
2014-03-29 18:18:08 +08:00
|
|
|
ADRP, // Page address of a TargetGlobalAddress operand.
|
2018-08-22 19:31:39 +08:00
|
|
|
ADR, // ADR
|
2014-03-29 18:18:08 +08:00
|
|
|
ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand.
|
|
|
|
LOADgot, // Load from automatically generated descriptor (e.g. Global
|
|
|
|
// Offset Table, TLS record).
|
|
|
|
RET_FLAG, // Return with a flag operand. Operand 0 is the chain operand.
|
|
|
|
BRCOND, // Conditional branch instruction; "b.cond".
|
|
|
|
CSEL,
|
|
|
|
FCSEL, // Conditional move instruction.
|
|
|
|
CSINV, // Conditional select invert.
|
|
|
|
CSNEG, // Conditional select negate.
|
|
|
|
CSINC, // Conditional select increment.
|
|
|
|
|
|
|
|
// Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on
|
|
|
|
// ELF.
|
|
|
|
THREAD_POINTER,
|
|
|
|
ADC,
|
|
|
|
SBC, // adc, sbc instructions
|
|
|
|
|
|
|
|
// Arithmetic instructions which write flags.
|
|
|
|
ADDS,
|
|
|
|
SUBS,
|
|
|
|
ADCS,
|
|
|
|
SBCS,
|
|
|
|
ANDS,
|
|
|
|
|
2015-07-17 04:02:37 +08:00
|
|
|
// Conditional compares. Operands: left,right,falsecc,cc,flags
|
|
|
|
CCMP,
|
|
|
|
CCMN,
|
|
|
|
FCCMP,
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
// Floating point comparison
|
|
|
|
FCMP,
|
|
|
|
|
|
|
|
// Scalar extract
|
|
|
|
EXTR,
|
|
|
|
|
|
|
|
// Scalar-to-vector duplication
|
|
|
|
DUP,
|
|
|
|
DUPLANE8,
|
|
|
|
DUPLANE16,
|
|
|
|
DUPLANE32,
|
|
|
|
DUPLANE64,
|
|
|
|
|
|
|
|
// Vector immedate moves
|
|
|
|
MOVI,
|
|
|
|
MOVIshift,
|
|
|
|
MOVIedit,
|
|
|
|
MOVImsl,
|
|
|
|
FMOV,
|
|
|
|
MVNIshift,
|
|
|
|
MVNImsl,
|
|
|
|
|
|
|
|
// Vector immediate ops
|
|
|
|
BICi,
|
|
|
|
ORRi,
|
|
|
|
|
2014-04-18 17:31:01 +08:00
|
|
|
// Vector bit select: similar to ISD::VSELECT but not all bits within an
|
|
|
|
// element must be identical.
|
|
|
|
BSL,
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
// Vector arithmetic negation
|
|
|
|
NEG,
|
|
|
|
|
|
|
|
// Vector shuffles
|
|
|
|
ZIP1,
|
|
|
|
ZIP2,
|
|
|
|
UZP1,
|
|
|
|
UZP2,
|
|
|
|
TRN1,
|
|
|
|
TRN2,
|
|
|
|
REV16,
|
|
|
|
REV32,
|
|
|
|
REV64,
|
|
|
|
EXT,
|
|
|
|
|
|
|
|
// Vector shift by scalar
|
|
|
|
VSHL,
|
|
|
|
VLSHR,
|
|
|
|
VASHR,
|
|
|
|
|
|
|
|
// Vector shift by scalar (again)
|
|
|
|
SQSHL_I,
|
|
|
|
UQSHL_I,
|
|
|
|
SQSHLU_I,
|
|
|
|
SRSHR_I,
|
|
|
|
URSHR_I,
|
|
|
|
|
|
|
|
// Vector comparisons
|
|
|
|
CMEQ,
|
|
|
|
CMGE,
|
|
|
|
CMGT,
|
|
|
|
CMHI,
|
|
|
|
CMHS,
|
|
|
|
FCMEQ,
|
|
|
|
FCMGE,
|
|
|
|
FCMGT,
|
|
|
|
|
|
|
|
// Vector zero comparisons
|
|
|
|
CMEQz,
|
|
|
|
CMGEz,
|
|
|
|
CMGTz,
|
|
|
|
CMLEz,
|
|
|
|
CMLTz,
|
|
|
|
FCMEQz,
|
|
|
|
FCMGEz,
|
|
|
|
FCMGTz,
|
|
|
|
FCMLEz,
|
|
|
|
FCMLTz,
|
|
|
|
|
[AArch64] Avoid going through GPRs for across-vector instructions.
This adds new node types for each intrinsic.
For instance, for addv, we have AArch64ISD::UADDV, such that:
(v4i32 (uaddv ...))
is the same as
(v4i32 (scalar_to_vector (i32 (int_aarch64_neon_uaddv ...))))
that is,
(v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
(i32 (int_aarch64_neon_uaddv ...)), ssub)
In a combine, we transform all such across-vector-lanes intrinsics to:
(i32 (extract_vector_elt (uaddv ...), 0))
This has one big advantage: by making the extract_element explicit, we
enable the existing patterns for lane-aware instructions to fire.
This lets us avoid needlessly going through the GPRs. Consider:
uint32x4_t test_mul(uint32x4_t a, uint32x4_t b) {
return vmulq_n_u32(a, vaddvq_u32(b));
}
We now generate:
addv.4s s1, v1
mul.4s v0, v0, v1[0]
instead of the previous:
addv.4s s1, v1
fmov w8, s1
dup.4s v1, w8
mul.4s v0, v1, v0
rdar://20044838
llvm-svn: 231840
2015-03-11 04:45:38 +08:00
|
|
|
// Vector across-lanes addition
|
|
|
|
// Only the lower result lane is defined.
|
|
|
|
SADDV,
|
|
|
|
UADDV,
|
|
|
|
|
|
|
|
// Vector across-lanes min/max
|
|
|
|
// Only the lower result lane is defined.
|
|
|
|
SMINV,
|
|
|
|
UMINV,
|
|
|
|
SMAXV,
|
|
|
|
UMAXV,
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
// Vector bitwise negation
|
|
|
|
NOT,
|
|
|
|
|
|
|
|
// Vector bitwise selection
|
|
|
|
BIT,
|
|
|
|
|
|
|
|
// Compare-and-branch
|
|
|
|
CBZ,
|
|
|
|
CBNZ,
|
|
|
|
TBZ,
|
|
|
|
TBNZ,
|
|
|
|
|
|
|
|
// Tail calls
|
|
|
|
TC_RETURN,
|
|
|
|
|
|
|
|
// Custom prefetch handling
|
|
|
|
PREFETCH,
|
|
|
|
|
|
|
|
// {s|u}int to FP within a FP register.
|
|
|
|
SITOF,
|
2014-05-08 15:38:13 +08:00
|
|
|
UITOF,
|
|
|
|
|
2014-09-04 17:46:14 +08:00
|
|
|
/// Natural vector cast. ISD::BITCAST is not natural in the big-endian
|
|
|
|
/// world w.r.t vectors; which causes additional REV instructions to be
|
|
|
|
/// generated to compensate for the byte-swapping. But sometimes we do
|
|
|
|
/// need to re-interpret the data in SIMD vector registers in big-endian
|
|
|
|
/// mode without emitting such REV instructions.
|
|
|
|
NVCAST,
|
|
|
|
|
2014-10-08 10:31:24 +08:00
|
|
|
SMULL,
|
|
|
|
UMULL,
|
|
|
|
|
2016-11-15 07:29:01 +08:00
|
|
|
// Reciprocal estimates and steps.
|
|
|
|
FRECPE, FRECPS,
|
|
|
|
FRSQRTE, FRSQRTS,
|
2016-10-25 00:14:58 +08:00
|
|
|
|
2014-05-08 15:38:13 +08:00
|
|
|
// NEON Load/Store with post-increment base updates
|
|
|
|
LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
|
|
|
|
LD3post,
|
|
|
|
LD4post,
|
|
|
|
ST2post,
|
|
|
|
ST3post,
|
|
|
|
ST4post,
|
|
|
|
LD1x2post,
|
|
|
|
LD1x3post,
|
|
|
|
LD1x4post,
|
|
|
|
ST1x2post,
|
|
|
|
ST1x3post,
|
|
|
|
ST1x4post,
|
2014-05-16 17:39:02 +08:00
|
|
|
LD1DUPpost,
|
2014-05-08 15:38:13 +08:00
|
|
|
LD2DUPpost,
|
|
|
|
LD3DUPpost,
|
|
|
|
LD4DUPpost,
|
2014-05-16 17:39:02 +08:00
|
|
|
LD1LANEpost,
|
2014-05-08 15:38:13 +08:00
|
|
|
LD2LANEpost,
|
|
|
|
LD3LANEpost,
|
|
|
|
LD4LANEpost,
|
|
|
|
ST2LANEpost,
|
|
|
|
ST3LANEpost,
|
|
|
|
ST4LANEpost
|
2014-03-29 18:18:08 +08:00
|
|
|
};
|
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
} // end namespace AArch64ISD
|
2014-03-29 18:18:08 +08:00
|
|
|
|
[AArch64] Improve add/sub/cmp isel of uxtw forms.
Don't match the UXTW extended reg forms of ADD/ADDS/SUB/SUBS if the
32-bit to 64-bit zero-extend can be done for free by taking advantage
of the 32-bit defining instruction zeroing the upper 32-bits of the X
register destination. This enables better instruction selection in a
few cases, such as:
sub x0, xzr, x8
instead of:
mov x8, xzr
sub x0, x8, w9, uxtw
madd x0, x1, x1, x8
instead of:
mul x9, x1, x1
add x0, x9, w8, uxtw
cmp x2, x8
instead of:
sub x8, x2, w8, uxtw
cmp x8, #0
add x0, x8, x1, lsl #3
instead of:
lsl x9, x1, #3
add x0, x9, w8, uxtw
Reviewers: t.p.northover, jmolloy
Subscribers: mcrosier, aemerson, llvm-commits, rengolin
Differential Revision: https://reviews.llvm.org/D24747
llvm-svn: 282413
2016-09-26 23:34:47 +08:00
|
|
|
namespace {
|
|
|
|
|
|
|
|
// Any instruction that defines a 32-bit result zeros out the high half of the
|
|
|
|
// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
|
|
|
|
// be copying from a truncate. But any other 32-bit operation will zero-extend
|
|
|
|
// up to 64 bits.
|
|
|
|
// FIXME: X86 also checks for CMOV here. Do we need something similar?
|
|
|
|
static inline bool isDef32(const SDNode &N) {
|
|
|
|
unsigned Opc = N.getOpcode();
|
|
|
|
return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
|
|
|
|
Opc != ISD::CopyFromReg;
|
|
|
|
}
|
|
|
|
|
|
|
|
} // end anonymous namespace
|
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
class AArch64Subtarget;
|
|
|
|
class AArch64TargetMachine;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
class AArch64TargetLowering : public TargetLowering {
|
2014-03-29 18:18:08 +08:00
|
|
|
public:
|
2015-01-29 08:19:42 +08:00
|
|
|
explicit AArch64TargetLowering(const TargetMachine &TM,
|
|
|
|
const AArch64Subtarget &STI);
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-08-30 05:53:01 +08:00
|
|
|
/// Selects the correct CCAssignFn for a given CallingConvention value.
|
2014-03-29 18:18:08 +08:00
|
|
|
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
|
|
|
|
|
2016-08-11 05:44:01 +08:00
|
|
|
/// Selects the correct CCAssignFn for a given CallingConvention value.
|
|
|
|
CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const;
|
|
|
|
|
2015-11-10 03:18:26 +08:00
|
|
|
/// Determine which of the bits specified in Mask are known to be either zero
|
|
|
|
/// or one and return them in the KnownZero/KnownOne bitsets.
|
2017-04-28 13:31:46 +08:00
|
|
|
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known,
|
|
|
|
const APInt &DemandedElts,
|
2017-03-31 19:24:16 +08:00
|
|
|
const SelectionDAG &DAG,
|
2014-05-15 05:14:37 +08:00
|
|
|
unsigned Depth = 0) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2017-04-22 02:53:12 +08:00
|
|
|
bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
|
|
|
|
TargetLoweringOpt &TLO) const override;
|
|
|
|
|
2015-07-09 23:12:23 +08:00
|
|
|
MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2015-11-10 03:18:26 +08:00
|
|
|
/// Returns true if the target allows unaligned memory accesses of the
|
|
|
|
/// specified type.
|
2014-07-28 01:46:40 +08:00
|
|
|
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace = 0,
|
|
|
|
unsigned Align = 1,
|
2015-07-29 22:17:26 +08:00
|
|
|
bool *Fast = nullptr) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2015-11-10 03:18:26 +08:00
|
|
|
/// Provide custom lowering hooks for some operations.
|
2014-03-30 15:25:18 +08:00
|
|
|
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-03-30 15:25:18 +08:00
|
|
|
const char *getTargetNodeName(unsigned Opcode) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-03-30 15:25:18 +08:00
|
|
|
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
/// Returns true if a cast between SrcAS and DestAS is a noop.
|
2014-03-30 15:25:18 +08:00
|
|
|
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
|
2014-03-29 18:18:08 +08:00
|
|
|
// Addrspacecasts are always noops.
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2015-11-10 03:18:26 +08:00
|
|
|
/// This method returns a target specific FastISel object, or null if the
|
|
|
|
/// target does not support "fast" ISel.
|
2014-03-30 15:25:18 +08:00
|
|
|
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
|
|
|
|
const TargetLibraryInfo *libInfo) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-03-30 15:25:18 +08:00
|
|
|
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-03-30 15:25:18 +08:00
|
|
|
bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2015-11-10 03:18:26 +08:00
|
|
|
/// Return true if the given shuffle mask can be codegen'd directly, or if it
|
|
|
|
/// should be stack expanded.
|
2017-07-26 16:06:58 +08:00
|
|
|
bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2015-11-10 03:18:26 +08:00
|
|
|
/// Return the ISD::SETCC ValueType.
|
2015-07-09 10:09:04 +08:00
|
|
|
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
|
|
|
|
EVT VT) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
|
2016-07-01 06:52:52 +08:00
|
|
|
MachineBasicBlock *EmitF128CSEL(MachineInstr &MI,
|
2014-03-29 18:18:08 +08:00
|
|
|
MachineBasicBlock *BB) const;
|
|
|
|
|
2018-11-10 07:33:30 +08:00
|
|
|
MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
|
|
|
|
MachineBasicBlock *BB) const;
|
|
|
|
|
|
|
|
MachineBasicBlock *EmitLoweredCatchPad(MachineInstr &MI,
|
|
|
|
MachineBasicBlock *BB) const;
|
|
|
|
|
2014-03-30 15:25:18 +08:00
|
|
|
MachineBasicBlock *
|
2016-07-01 06:52:52 +08:00
|
|
|
EmitInstrWithCustomInserter(MachineInstr &MI,
|
2014-03-30 15:25:18 +08:00
|
|
|
MachineBasicBlock *MBB) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-03-30 15:25:18 +08:00
|
|
|
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
|
2017-12-15 06:34:10 +08:00
|
|
|
MachineFunction &MF,
|
2014-03-30 15:25:18 +08:00
|
|
|
unsigned Intrinsic) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2018-03-23 22:47:07 +08:00
|
|
|
bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
|
|
|
|
EVT NewVT) const override;
|
|
|
|
|
2014-03-30 15:25:18 +08:00
|
|
|
bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
|
|
|
|
bool isTruncateFree(EVT VT1, EVT VT2) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2015-02-24 03:15:16 +08:00
|
|
|
bool isProfitableToHoist(Instruction *I) const override;
|
|
|
|
|
2014-03-30 15:25:18 +08:00
|
|
|
bool isZExtFree(Type *Ty1, Type *Ty2) const override;
|
|
|
|
bool isZExtFree(EVT VT1, EVT VT2) const override;
|
|
|
|
bool isZExtFree(SDValue Val, EVT VT2) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
[CGP] Add support for sinking operands to their users, if they are free.
This patch improves code generation for some AArch64 ACLE intrinsics. It adds
support to CGP to duplicate and sink operands to their user, if they can be
folded into a target instruction, like zexts and sub into usubl. It adds a
TargetLowering hook shouldSinkOperands, which looks at the operands of
instructions to see if sinking is profitable.
I decided to add a new target hook, as for the sinking to be profitable,
at least on AArch64, we have to look at multiple operands of an
instruction, instead of looking at the users of a zext for example.
The sinking is done in CGP, because it works around an instruction
selection limitation. If instruction selection is not limited to a
single basic block, this patch should not be needed any longer.
Alternatively this could be done in the LoopSink pass, which tries to
undo LICM for instructions in blocks that are not executed frequently.
Note that we do not force the operands to sink to have a single user,
because we duplicate them before sinking. Therefore this is only
desirable if they really can be done for free. Additionally we could
consider the impact on live ranges later on.
This should fix https://bugs.llvm.org/show_bug.cgi?id=40025.
As for performance, we have internal code that uses intrinsics and can
be speed up by 10% by this change.
Reviewers: SjoerdMeijer, t.p.northover, samparker, efriedma, RKSimon, spatel
Reviewed By: samparker
Differential Revision: https://reviews.llvm.org/D57377
llvm-svn: 353152
2019-02-05 18:27:40 +08:00
|
|
|
bool shouldSinkOperands(Instruction *I,
|
|
|
|
SmallVectorImpl<Use *> &Ops) const override;
|
|
|
|
|
2014-03-30 15:25:18 +08:00
|
|
|
bool hasPairedLoad(EVT LoadedType, unsigned &RequiredAligment) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
[AArch64] Lower interleaved memory accesses to ldN/stN intrinsics. This patch also adds a function to calculate the cost of interleaved memory accesses.
E.g. Lower an interleaved load:
%wide.vec = load <8 x i32>, <8 x i32>* %ptr
%v0 = shuffle %wide.vec, undef, <0, 2, 4, 6>
%v1 = shuffle %wide.vec, undef, <1, 3, 5, 7>
into:
%ld2 = { <4 x i32>, <4 x i32> } call llvm.aarch64.neon.ld2(%ptr)
%vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
%vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
E.g. Lower an interleaved store:
%i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1, <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
store <12 x i32> %i.vec, <12 x i32>* %ptr
into:
%sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
%sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
%sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr)
Differential Revision: http://reviews.llvm.org/D10533
llvm-svn: 240754
2015-06-26 10:32:07 +08:00
|
|
|
unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
|
|
|
|
|
|
|
|
bool lowerInterleavedLoad(LoadInst *LI,
|
|
|
|
ArrayRef<ShuffleVectorInst *> Shuffles,
|
|
|
|
ArrayRef<unsigned> Indices,
|
|
|
|
unsigned Factor) const override;
|
|
|
|
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
|
|
|
|
unsigned Factor) const override;
|
|
|
|
|
2014-03-30 15:25:18 +08:00
|
|
|
bool isLegalAddImmediate(int64_t) const override;
|
|
|
|
bool isLegalICmpImmediate(int64_t) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
[CGP] Split large data structres to sink more GEPs
Accessing the members of a large data structures needs a lot of GEPs which
usually have large offsets due to the size of the underlying data structure. If
the offsets are too large to fit into the r+i addressing mode, these GEPs cannot
be sunk to their users' blocks and many extra registers are needed then to carry
the values of these GEPs.
This patch tries to split a large data struct starting from %base like the
following.
Before:
BB0:
%base =
BB1:
%gep0 = gep %base, off0
%gep1 = gep %base, off1
%gep2 = gep %base, off2
BB2:
%load1 = load %gep0
%load2 = load %gep1
%load3 = load %gep2
After:
BB0:
%base =
%new_base = gep %base, off0
BB1:
%new_gep0 = %new_base
%new_gep1 = gep %new_base, off1 - off0
%new_gep2 = gep %new_base, off2 - off0
BB2:
%load1 = load i32, i32* %new_gep0
%load2 = load i32, i32* %new_gep1
%load3 = load i32, i32* %new_gep2
In the above example, the struct is split into two parts. The first part still
starts from %base and the second part starts from %new_base. After the
splitting, %new_gep1 and %new_gep2 have smaller offsets and then can be sunk to
BB2 and folded into their users.
The algorithm to split data structure is simple and very similar to the work of
merging SExts. First, it collects GEPs that have large offsets when iterating
the blocks. Second, it splits the underlying data structures and updates the
collected GEPs to use smaller offsets.
Differential Revision: https://reviews.llvm.org/D42759
llvm-svn: 332015
2018-05-11 02:27:36 +08:00
|
|
|
bool shouldConsiderGEPOffsetSplit() const override;
|
|
|
|
|
2014-03-30 15:25:18 +08:00
|
|
|
EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
|
|
|
|
bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
|
|
|
|
MachineFunction &MF) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2015-11-10 03:18:26 +08:00
|
|
|
/// Return true if the addressing mode represented by AM is legal for this
|
|
|
|
/// target, for a load/store of the specified type.
|
2015-07-09 10:09:40 +08:00
|
|
|
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
|
2017-07-21 19:59:37 +08:00
|
|
|
unsigned AS,
|
|
|
|
Instruction *I = nullptr) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Return the cost of the scaling factor used in the addressing
|
2014-03-29 18:18:08 +08:00
|
|
|
/// mode represented by AM for this target, for a load/store
|
|
|
|
/// of the specified type.
|
|
|
|
/// If the AM is supported, the return value must be >= 0.
|
|
|
|
/// If the AM is not supported, it returns a negative value.
|
2015-07-09 10:09:40 +08:00
|
|
|
int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
|
2015-06-01 13:31:59 +08:00
|
|
|
unsigned AS) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2015-11-10 03:18:26 +08:00
|
|
|
/// Return true if an FMA operation is faster than a pair of fmul and fadd
|
|
|
|
/// instructions. fmuladd intrinsics will be expanded to FMAs when this method
|
|
|
|
/// returns true, otherwise fmuladd is expanded to fmul + fadd.
|
2014-03-30 15:25:18 +08:00
|
|
|
bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-04-04 13:16:06 +08:00
|
|
|
const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
|
2018-08-15 06:10:25 +08:00
|
|
|
bool isDesirableToCommuteWithShift(const SDNode *N,
|
|
|
|
CombineLevel Level) const override;
|
[ARM64] Prevent bit extraction to be adjusted by following shift
For pattern like ((x >> C1) & Mask) << C2, DAG combiner may convert it
into (x >> (C1-C2)) & (Mask << C2), which makes pattern matching of ubfx
more difficult.
For example:
Given
%shr = lshr i64 %x, 4
%and = and i64 %shr, 15
%arrayidx = getelementptr inbounds [8 x [64 x i64]]* @arr, i64 0, %i64 2, i64 %and
%0 = load i64* %arrayidx
With current shift folding, it takes 3 instrs to compute base address:
lsr x8, x0, #1
and x8, x8, #0x78
add x8, x9, x8
If using ubfx, it only needs 2 instrs:
ubfx x8, x0, #4, #4
add x8, x9, x8, lsl #3
This fixes bug 19589
llvm-svn: 207702
2014-05-01 05:07:24 +08:00
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Returns true if it is beneficial to convert a load of a constant
|
2014-04-18 04:00:33 +08:00
|
|
|
/// to just the constant itself.
|
2014-03-30 15:25:18 +08:00
|
|
|
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
|
|
|
|
Type *Ty) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2018-03-07 00:54:55 +08:00
|
|
|
/// Return true if EXTRACT_SUBVECTOR is cheap for this result type
|
|
|
|
/// with this index.
|
|
|
|
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
|
|
|
|
unsigned Index) const override;
|
|
|
|
|
2014-04-18 04:00:33 +08:00
|
|
|
Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
|
|
|
|
AtomicOrdering Ord) const override;
|
|
|
|
Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
|
|
|
|
Value *Addr, AtomicOrdering Ord) const override;
|
|
|
|
|
2015-09-23 01:21:44 +08:00
|
|
|
void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const override;
|
|
|
|
|
2015-09-12 01:08:28 +08:00
|
|
|
TargetLoweringBase::AtomicExpansionKind
|
|
|
|
shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
|
2014-09-04 05:29:59 +08:00
|
|
|
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
|
2015-09-12 01:08:17 +08:00
|
|
|
TargetLoweringBase::AtomicExpansionKind
|
Mutate TargetLowering::shouldExpandAtomicRMWInIR to specifically dictate how AtomicRMWInsts are expanded.
Summary:
In PNaCl, most atomic instructions have their own @llvm.nacl.atomic.* function, each one, with a few exceptions, represents a consistent behaviour across all NaCl-supported targets. Unfortunately, the atomic RMW operations nand, [u]min, and [u]max aren't directly represented by any such @llvm.nacl.atomic.* function. This patch refines shouldExpandAtomicRMWInIR in TargetLowering so that a future `Le32TargetLowering` class can selectively inform the caller how the target desires the atomic RMW instruction to be expanded (ie via load-linked/store-conditional for ARM/AArch64, via cmpxchg for X86/others?, or not at all for Mips) if at all.
This does not represent a behavioural change and as such no tests were added.
Patch by: Richard Diamond.
Reviewers: jfb
Reviewed By: jfb
Subscribers: jfb, aemerson, t.p.northover, llvm-commits
Differential Revision: http://reviews.llvm.org/D7713
llvm-svn: 231250
2015-03-04 23:47:57 +08:00
|
|
|
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
|
2014-04-18 04:00:33 +08:00
|
|
|
|
2018-09-19 22:51:42 +08:00
|
|
|
TargetLoweringBase::AtomicExpansionKind
|
|
|
|
shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
|
2015-09-12 01:08:28 +08:00
|
|
|
|
2014-07-26 03:31:34 +08:00
|
|
|
bool useLoadStackGuardNode() const override;
|
2014-07-03 08:23:43 +08:00
|
|
|
TargetLoweringBase::LegalizeTypeAction
|
2018-11-06 07:26:13 +08:00
|
|
|
getPreferredVectorAction(MVT VT) const override;
|
2014-07-03 08:23:43 +08:00
|
|
|
|
2016-04-06 06:41:50 +08:00
|
|
|
/// If the target has a standard location for the stack protector cookie,
|
|
|
|
/// returns the address of that location. Otherwise, returns nullptr.
|
2016-04-09 05:26:31 +08:00
|
|
|
Value *getIRStackGuard(IRBuilder<> &IRB) const override;
|
2016-04-06 06:41:50 +08:00
|
|
|
|
2018-11-09 10:48:36 +08:00
|
|
|
void insertSSPDeclarations(Module &M) const override;
|
|
|
|
Value *getSDagStackGuard(const Module &M) const override;
|
2019-02-02 04:43:25 +08:00
|
|
|
Function *getSSPStackGuardCheck(const Module &M) const override;
|
2018-11-09 10:48:36 +08:00
|
|
|
|
2015-10-27 02:28:25 +08:00
|
|
|
/// If the target has a standard location for the unsafe stack pointer,
|
|
|
|
/// returns the address of that location. Otherwise, returns nullptr.
|
|
|
|
Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
|
|
|
|
|
2015-11-07 09:11:31 +08:00
|
|
|
/// If a physical register, this returns the register that receives the
|
|
|
|
/// exception address on entry to an EH pad.
|
|
|
|
unsigned
|
|
|
|
getExceptionPointerRegister(const Constant *PersonalityFn) const override {
|
|
|
|
// FIXME: This is a guess. Has this been defined yet?
|
|
|
|
return AArch64::X0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// If a physical register, this returns the register that receives the
|
|
|
|
/// exception typeid on entry to a landing pad.
|
|
|
|
unsigned
|
|
|
|
getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
|
|
|
|
// FIXME: This is a guess. Has this been defined yet?
|
|
|
|
return AArch64::X1;
|
|
|
|
}
|
|
|
|
|
Rename AttributeSet to AttributeList
Summary:
This class is a list of AttributeSetNodes corresponding the function
prototype of a call or function declaration. This class used to be
called ParamAttrListPtr, then AttrListPtr, then AttributeSet. It is
typically accessed by parameter and return value index, so
"AttributeList" seems like a more intuitive name.
Rename AttributeSetImpl to AttributeListImpl to follow suit.
It's useful to rename this class so that we can rename AttributeSetNode
to AttributeSet later. AttributeSet is the set of attributes that apply
to a single function, argument, or return value.
Reviewers: sanjoy, javed.absar, chandlerc, pete
Reviewed By: pete
Subscribers: pete, jholewinski, arsenm, dschuff, mehdi_amini, jfb, nhaehnle, sbc100, void, llvm-commits
Differential Revision: https://reviews.llvm.org/D31102
llvm-svn: 298393
2017-03-22 00:57:19 +08:00
|
|
|
bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
|
2016-03-29 02:17:07 +08:00
|
|
|
|
2017-07-17 23:09:47 +08:00
|
|
|
bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
|
|
|
|
const SelectionDAG &DAG) const override {
|
|
|
|
// Do not merge to float value size (128 bytes) if no implicit
|
|
|
|
// float attribute is set.
|
|
|
|
|
2017-12-16 06:22:58 +08:00
|
|
|
bool NoFloat = DAG.getMachineFunction().getFunction().hasFnAttribute(
|
2017-07-17 23:09:47 +08:00
|
|
|
Attribute::NoImplicitFloat);
|
|
|
|
|
|
|
|
if (NoFloat)
|
|
|
|
return (MemVT.getSizeInBits() <= 64);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
[AArch64] add overrides for isCheapToSpeculateCttz() and isCheapToSpeculateCtlz()
AArch64 has instructions for efficient count-leading/trailing-zeros, so this should be
considered a cheap operation (and therefore fair game for speculation) for any AArch64
implementation.
The net result of allowing this speculation for the regression tests in this
patch is that we get this code:
ctlz:
clz w0, w0
ret
cttz:
rbit w8, w0
clz w0, w8
ret
Instead of:
ctlz:
cbz w0, .LBB0_2
clz w0, w0
ret
.LBB0_2:
orr w0, wzr, #0x20
ret
cttz:
cbz w0, .LBB1_2
rbit w8, w0
clz w0, w8
ret
.LBB1_2:
orr w0, wzr, #0x20
ret
See D14469 for the larger motivation.
Differential Revision: http://reviews.llvm.org/D14505
llvm-svn: 252625
2015-11-11 02:11:37 +08:00
|
|
|
bool isCheapToSpeculateCttz() const override {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool isCheapToSpeculateCtlz() const override {
|
|
|
|
return true;
|
|
|
|
}
|
2016-06-03 04:01:37 +08:00
|
|
|
|
2017-02-22 02:53:14 +08:00
|
|
|
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
|
|
|
|
|
2018-05-22 05:41:02 +08:00
|
|
|
bool hasAndNotCompare(SDValue V) const override {
|
|
|
|
// We can use bics for any scalar.
|
|
|
|
return V.getValueType().isScalarInteger();
|
|
|
|
}
|
|
|
|
|
|
|
|
bool hasAndNot(SDValue Y) const override {
|
|
|
|
EVT VT = Y.getValueType();
|
|
|
|
|
|
|
|
if (!VT.isVector())
|
|
|
|
return hasAndNotCompare(Y);
|
|
|
|
|
|
|
|
return VT.getSizeInBits() >= 64; // vector 'bic'
|
2016-11-30 06:28:58 +08:00
|
|
|
}
|
|
|
|
|
2019-01-31 16:07:30 +08:00
|
|
|
bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override {
|
|
|
|
if (DAG.getMachineFunction().getFunction().optForMinSize())
|
|
|
|
return false;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-07-16 20:44:10 +08:00
|
|
|
bool shouldTransformSignedTruncationCheck(EVT XVT,
|
|
|
|
unsigned KeptBits) const override {
|
|
|
|
// For vectors, we don't have a preference..
|
|
|
|
if (XVT.isVector())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
auto VTIsOk = [](EVT VT) -> bool {
|
|
|
|
return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
|
|
|
|
VT == MVT::i64;
|
|
|
|
};
|
|
|
|
|
|
|
|
// We are ok with KeptBitsVT being byte/word/dword, what SXT supports.
|
|
|
|
// XVT will be larger than KeptBitsVT.
|
|
|
|
MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
|
|
|
|
return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
|
|
|
|
}
|
|
|
|
|
2016-06-03 04:01:37 +08:00
|
|
|
bool hasBitPreservingFPLogic(EVT VT) const override {
|
|
|
|
// FIXME: Is this always true? It should be true for vectors at least.
|
|
|
|
return VT == MVT::f32 || VT == MVT::f64;
|
|
|
|
}
|
|
|
|
|
2015-12-17 05:04:19 +08:00
|
|
|
bool supportSplitCSR(MachineFunction *MF) const override {
|
2017-12-16 06:22:58 +08:00
|
|
|
return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
|
|
|
|
MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
|
2015-12-17 05:04:19 +08:00
|
|
|
}
|
|
|
|
void initializeSplitCSR(MachineBasicBlock *Entry) const override;
|
|
|
|
void insertCopiesSplitCSR(
|
|
|
|
MachineBasicBlock *Entry,
|
|
|
|
const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
|
[AArch64] add overrides for isCheapToSpeculateCttz() and isCheapToSpeculateCtlz()
AArch64 has instructions for efficient count-leading/trailing-zeros, so this should be
considered a cheap operation (and therefore fair game for speculation) for any AArch64
implementation.
The net result of allowing this speculation for the regression tests in this
patch is that we get this code:
ctlz:
clz w0, w0
ret
cttz:
rbit w8, w0
clz w0, w8
ret
Instead of:
ctlz:
cbz w0, .LBB0_2
clz w0, w0
ret
.LBB0_2:
orr w0, wzr, #0x20
ret
cttz:
cbz w0, .LBB1_2
rbit w8, w0
clz w0, w8
ret
.LBB1_2:
orr w0, wzr, #0x20
ret
See D14469 for the larger motivation.
Differential Revision: http://reviews.llvm.org/D14505
llvm-svn: 252625
2015-11-11 02:11:37 +08:00
|
|
|
|
2016-04-12 05:08:06 +08:00
|
|
|
bool supportSwiftError() const override {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-01-26 05:55:39 +08:00
|
|
|
/// Enable aggressive FMA fusion on targets that want it.
|
|
|
|
bool enableAggressiveFMAFusion(EVT VT) const override;
|
|
|
|
|
2017-02-09 01:57:20 +08:00
|
|
|
/// Returns the size of the platform's va_list object.
|
|
|
|
unsigned getVaListSizeInBits(const DataLayout &DL) const override;
|
|
|
|
|
2017-04-11 02:34:37 +08:00
|
|
|
/// Returns true if \p VecTy is a legal interleaved access type. This
|
|
|
|
/// function checks the vector element type and the overall width of the
|
|
|
|
/// vector.
|
|
|
|
bool isLegalInterleavedAccessType(VectorType *VecTy,
|
|
|
|
const DataLayout &DL) const;
|
|
|
|
|
|
|
|
/// Returns the number of interleaved accesses that will be generated when
|
|
|
|
/// lowering accesses of the given type.
|
|
|
|
unsigned getNumInterleavedAccesses(VectorType *VecTy,
|
|
|
|
const DataLayout &DL) const;
|
|
|
|
|
2017-07-15 05:44:12 +08:00
|
|
|
MachineMemOperand::Flags getMMOFlags(const Instruction &I) const override;
|
|
|
|
|
2017-08-22 05:56:11 +08:00
|
|
|
bool functionArgumentNeedsConsecutiveRegisters(Type *Ty,
|
|
|
|
CallingConv::ID CallConv,
|
|
|
|
bool isVarArg) const override;
|
2018-11-10 07:33:30 +08:00
|
|
|
/// Used for exception handling on Win64.
|
|
|
|
bool needsFixedCatchObjects() const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
private:
|
2015-11-10 03:18:26 +08:00
|
|
|
/// Keep a pointer to the AArch64Subtarget around so that we can
|
2014-03-29 18:18:08 +08:00
|
|
|
/// make the right decision when generating code for different targets.
|
2014-05-24 20:50:23 +08:00
|
|
|
const AArch64Subtarget *Subtarget;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2018-05-29 23:58:50 +08:00
|
|
|
bool isExtFreeImpl(const Instruction *Ext) const override;
|
|
|
|
|
2016-04-15 14:20:21 +08:00
|
|
|
void addTypeForNEON(MVT VT, MVT PromotedBitwiseVT);
|
2014-03-29 18:18:08 +08:00
|
|
|
void addDRTypeForNEON(MVT VT);
|
|
|
|
void addQRTypeForNEON(MVT VT);
|
|
|
|
|
2016-06-12 23:39:02 +08:00
|
|
|
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
|
|
|
|
bool isVarArg,
|
|
|
|
const SmallVectorImpl<ISD::InputArg> &Ins,
|
|
|
|
const SDLoc &DL, SelectionDAG &DAG,
|
|
|
|
SmallVectorImpl<SDValue> &InVals) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-03-30 15:25:18 +08:00
|
|
|
SDValue LowerCall(CallLoweringInfo & /*CLI*/,
|
|
|
|
SmallVectorImpl<SDValue> &InVals) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
|
|
|
|
CallingConv::ID CallConv, bool isVarArg,
|
2016-06-12 23:39:02 +08:00
|
|
|
const SmallVectorImpl<ISD::InputArg> &Ins,
|
|
|
|
const SDLoc &DL, SelectionDAG &DAG,
|
|
|
|
SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
|
|
|
|
SDValue ThisVal) const;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
[AArch64] Add custom lowering for v4i8 trunc store
This patch adds a custom trunc store lowering for v4i8 vector types.
Since there is not v.4b register, the v4i8 is promoted to v4i16 (v.4h)
and default action for v4i8 is to extract each element and issue 4
byte stores.
A better strategy would be to extended the promoted v4i16 to v8i16
(with undef elements) and extract and store the word lane which
represents the v4i8 subvectores. The construction:
define void @foo(<4 x i16> %x, i8* nocapture %p) {
%0 = trunc <4 x i16> %x to <4 x i8>
%1 = bitcast i8* %p to <4 x i8>*
store <4 x i8> %0, <4 x i8>* %1, align 4, !tbaa !2
ret void
}
Can be optimized from:
umov w8, v0.h[3]
umov w9, v0.h[2]
umov w10, v0.h[1]
umov w11, v0.h[0]
strb w8, [x0, #3]
strb w9, [x0, #2]
strb w10, [x0, #1]
strb w11, [x0]
ret
To:
xtn v0.8b, v0.8h
str s0, [x0]
ret
The patch also adjust the memory cost for autovectorization, so the C
code:
void foo (const int *src, int width, unsigned char *dst)
{
for (int i = 0; i < width; i++)
*dst++ = *src++;
}
can be vectorized to:
.LBB0_4: // %vector.body
// =>This Inner Loop Header: Depth=1
ldr q0, [x0], #16
subs x12, x12, #4 // =4
xtn v0.4h, v0.4s
xtn v0.8b, v0.8h
st1 { v0.s }[0], [x2], #4
b.ne .LBB0_4
Instead of byte operations.
llvm-svn: 335735
2018-06-27 21:58:46 +08:00
|
|
|
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
|
2015-07-28 21:03:31 +08:00
|
|
|
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
bool isEligibleForTailCallOptimization(
|
|
|
|
SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
|
|
|
|
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
|
|
|
const SmallVectorImpl<SDValue> &OutVals,
|
|
|
|
const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const;
|
|
|
|
|
2014-05-15 09:33:17 +08:00
|
|
|
/// Finds the incoming stack arguments which overlap the given fixed stack
|
|
|
|
/// object and incorporates their load into the current chain. This prevents
|
|
|
|
/// an upcoming store from clobbering the stack argument before it's used.
|
|
|
|
SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
|
2016-07-29 02:40:00 +08:00
|
|
|
MachineFrameInfo &MFI, int ClobberedFI) const;
|
2014-05-15 09:33:17 +08:00
|
|
|
|
|
|
|
bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;
|
|
|
|
|
2016-06-12 23:39:02 +08:00
|
|
|
void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL,
|
2014-03-29 18:18:08 +08:00
|
|
|
SDValue &Chain) const;
|
|
|
|
|
2014-03-30 15:25:18 +08:00
|
|
|
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
|
|
|
|
bool isVarArg,
|
|
|
|
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
|
|
|
LLVMContext &Context) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-03-30 15:25:18 +08:00
|
|
|
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
|
|
|
|
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
2016-06-12 23:39:02 +08:00
|
|
|
const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
|
2014-03-30 15:25:18 +08:00
|
|
|
SelectionDAG &DAG) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2017-04-22 01:31:03 +08:00
|
|
|
SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
|
|
|
|
unsigned Flag) const;
|
|
|
|
SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG,
|
|
|
|
unsigned Flag) const;
|
|
|
|
SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG,
|
|
|
|
unsigned Flag) const;
|
|
|
|
SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
|
|
|
|
unsigned Flag) const;
|
|
|
|
template <class NodeTy>
|
2017-10-25 15:25:18 +08:00
|
|
|
SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
|
|
|
|
template <class NodeTy>
|
|
|
|
SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
|
|
|
|
template <class NodeTy>
|
|
|
|
SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
|
2018-08-22 19:31:39 +08:00
|
|
|
template <class NodeTy>
|
|
|
|
SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
|
2018-01-12 06:53:30 +08:00
|
|
|
SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
|
2014-03-29 18:18:08 +08:00
|
|
|
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
|
2016-06-12 23:39:02 +08:00
|
|
|
SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL,
|
Fix PR22408 - LLVM producing AArch64 TLS relocations that GNU linkers cannot handle yet.
As is described at http://llvm.org/bugs/show_bug.cgi?id=22408, the GNU linkers
ld.bfd and ld.gold currently only support a subset of the whole range of AArch64
ELF TLS relocations. Furthermore, they assume that some of the code sequences to
access thread-local variables are produced in a very specific sequence.
When the sequence is not as the linker expects, it can silently mis-relaxe/mis-optimize
the instructions.
Even if that wouldn't be the case, it's good to produce the exact sequence,
as that ensures that linkers can perform optimizing relaxations.
This patch:
* implements support for 16MiB TLS area size instead of 4GiB TLS area size. Ideally clang
would grow an -mtls-size option to allow support for both, but that's not part of this patch.
* by default doesn't produce local dynamic access patterns, as even modern ld.bfd and ld.gold
linkers do not support the associated relocations. An option (-aarch64-elf-ldtls-generation)
is added to enable generation of local dynamic code sequence, but is off by default.
* makes sure that the exact expected code sequence for local dynamic and general dynamic
accesses is produced, by making use of a new pseudo instruction. The patch also removes
two (AArch64ISD::TLSDESC_BLR, AArch64ISD::TLSDESC_CALL) pre-existing AArch64-specific pseudo
SDNode instructions that are superseded by the new one (TLSDESC_CALLSEQ).
llvm-svn: 231227
2015-03-04 17:12:08 +08:00
|
|
|
SelectionDAG &DAG) const;
|
2018-03-11 03:05:21 +08:00
|
|
|
SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
|
2014-03-29 18:18:08 +08:00
|
|
|
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
|
2015-04-08 01:33:05 +08:00
|
|
|
SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
|
2016-06-12 23:39:02 +08:00
|
|
|
SDValue TVal, SDValue FVal, const SDLoc &dl,
|
2015-04-08 01:33:05 +08:00
|
|
|
SelectionDAG &DAG) const;
|
2014-03-29 18:18:08 +08:00
|
|
|
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
|
2018-10-25 04:19:09 +08:00
|
|
|
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
|
2014-03-29 18:18:08 +08:00
|
|
|
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const;
|
2017-07-14 01:03:12 +08:00
|
|
|
SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const;
|
2014-03-29 18:18:08 +08:00
|
|
|
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
|
[COFF, ARM64] Implement Intrinsic.sponentry for AArch64
Summary: This patch adds Intrinsic.sponentry. This intrinsic is required to correctly support setjmp for AArch64 Windows platform.
Patch by: Yin Ma (yinma@codeaurora.org)
Reviewers: mgrang, ssijaric, eli.friedman, TomTan, mstorsjo, rnk, compnerd, efriedma
Reviewed By: efriedma
Subscribers: efriedma, javed.absar, kristof.beyls, chrib, llvm-commits
Differential Revision: https://reviews.llvm.org/D53996
llvm-svn: 345909
2018-11-02 07:22:25 +08:00
|
|
|
SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const;
|
2014-03-29 18:18:08 +08:00
|
|
|
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
|
2018-06-20 20:09:01 +08:00
|
|
|
SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
|
2014-03-29 18:18:08 +08:00
|
|
|
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerF128Call(SDValue Op, SelectionDAG &DAG,
|
|
|
|
RTLIB::Libcall Call) const;
|
|
|
|
SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerVectorAND(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
|
2017-05-17 05:29:22 +08:00
|
|
|
SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
|
2018-02-12 22:22:03 +08:00
|
|
|
SDValue LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
|
2018-02-13 01:03:11 +08:00
|
|
|
SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const;
|
2018-02-17 22:26:32 +08:00
|
|
|
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SDValue Chain,
|
|
|
|
SDValue &Size,
|
|
|
|
SelectionDAG &DAG) const;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
[AArch64] Lower sdiv x, pow2 using add + select + shift.
The target-independent DAGcombiner will generate:
asr w1, X, #31 w1 = splat sign bit.
add X, X, w1, lsr #28 X = X + 0 or pow2-1
asr w0, X, asr #4 w0 = X/pow2
However, the add + shifts is expensive, so generate:
add w0, X, 15 w0 = X + pow2-1
cmp X, wzr X - 0
csel X, w0, X, lt X = (X < 0) ? X + pow2-1 : X;
asr w0, X, asr 4 w0 = X/pow2
llvm-svn: 213758
2014-07-23 22:57:52 +08:00
|
|
|
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
|
2018-07-31 07:22:00 +08:00
|
|
|
SmallVectorImpl<SDNode *> &Created) const override;
|
2016-11-11 07:31:06 +08:00
|
|
|
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
|
|
|
|
int &ExtraSteps, bool &UseOneConst,
|
|
|
|
bool Reciprocal) const override;
|
2016-10-25 00:14:58 +08:00
|
|
|
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
|
|
|
|
int &ExtraSteps) const override;
|
2015-07-29 07:05:48 +08:00
|
|
|
unsigned combineRepeatedFPDivisors() const override;
|
[AArch64] Lower sdiv x, pow2 using add + select + shift.
The target-independent DAGcombiner will generate:
asr w1, X, #31 w1 = splat sign bit.
add X, X, w1, lsr #28 X = X + 0 or pow2-1
asr w0, X, asr #4 w0 = X/pow2
However, the add + shifts is expensive, so generate:
add w0, X, 15 w0 = X + pow2-1
cmp X, wzr X - 0
csel X, w0, X, lt X = (X < 0) ? X + pow2-1 : X;
asr w0, X, asr 4 w0 = X/pow2
llvm-svn: 213758
2014-07-23 22:57:52 +08:00
|
|
|
|
2015-07-06 03:29:18 +08:00
|
|
|
ConstraintType getConstraintType(StringRef Constraint) const override;
|
2015-07-10 01:40:29 +08:00
|
|
|
unsigned getRegisterByName(const char* RegName, EVT VT,
|
|
|
|
SelectionDAG &DAG) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
/// Examine constraint string and operand type and determine a weight value.
|
|
|
|
/// The operand object must already have been set up with the operand type.
|
2014-04-29 15:58:25 +08:00
|
|
|
ConstraintWeight
|
|
|
|
getSingleConstraintMatchWeight(AsmOperandInfo &info,
|
|
|
|
const char *constraint) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
std::pair<unsigned, const TargetRegisterClass *>
|
2015-02-27 06:38:43 +08:00
|
|
|
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
|
2015-07-06 03:29:18 +08:00
|
|
|
StringRef Constraint, MVT VT) const override;
|
2016-05-09 19:10:44 +08:00
|
|
|
|
|
|
|
const char *LowerXConstraint(EVT ConstraintVT) const override;
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
|
|
|
|
std::vector<SDValue> &Ops,
|
2014-04-29 15:58:25 +08:00
|
|
|
SelectionDAG &DAG) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2015-07-06 03:29:18 +08:00
|
|
|
unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
|
2015-03-23 19:33:15 +08:00
|
|
|
if (ConstraintCode == "Q")
|
|
|
|
return InlineAsm::Constraint_Q;
|
|
|
|
// FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are
|
|
|
|
// followed by llvm_unreachable so we'll leave them unimplemented in
|
|
|
|
// the backend for now.
|
|
|
|
return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
|
2015-03-16 21:13:41 +08:00
|
|
|
}
|
|
|
|
|
2014-04-29 15:58:25 +08:00
|
|
|
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
|
2017-04-19 05:16:46 +08:00
|
|
|
bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset,
|
|
|
|
ISD::MemIndexedMode &AM, bool &IsInc,
|
|
|
|
SelectionDAG &DAG) const;
|
|
|
|
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
|
|
|
|
ISD::MemIndexedMode &AM,
|
2014-04-29 15:58:25 +08:00
|
|
|
SelectionDAG &DAG) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
|
|
|
|
SDValue &Offset, ISD::MemIndexedMode &AM,
|
2014-04-29 15:58:25 +08:00
|
|
|
SelectionDAG &DAG) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
|
2014-04-29 15:58:25 +08:00
|
|
|
SelectionDAG &DAG) const override;
|
2014-11-28 05:02:42 +08:00
|
|
|
|
2015-07-17 04:02:37 +08:00
|
|
|
bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;
|
2018-01-19 11:16:36 +08:00
|
|
|
|
|
|
|
void finalizeLowering(MachineFunction &MF) const override;
|
2014-03-29 18:18:08 +08:00
|
|
|
};
|
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
namespace AArch64 {
|
2014-03-29 18:18:08 +08:00
|
|
|
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
|
|
|
|
const TargetLibraryInfo *libInfo);
|
2014-05-24 20:50:23 +08:00
|
|
|
} // end namespace AArch64
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
} // end namespace llvm
|
|
|
|
|
2014-08-14 00:26:38 +08:00
|
|
|
#endif
|