2017-09-22 07:20:16 +08:00
|
|
|
//===- TargetLoweringBase.cpp - Implement the TargetLoweringBase class ----===//
|
2013-01-12 04:05:37 +08:00
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2013-01-12 04:05:37 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This implements the TargetLoweringBase class.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "llvm/ADT/BitVector.h"
|
|
|
|
#include "llvm/ADT/STLExtras.h"
|
2017-09-22 07:20:16 +08:00
|
|
|
#include "llvm/ADT/SmallVector.h"
|
2016-10-21 00:55:45 +08:00
|
|
|
#include "llvm/ADT/StringExtras.h"
|
2017-09-22 07:20:16 +08:00
|
|
|
#include "llvm/ADT/StringRef.h"
|
2013-02-16 02:45:18 +08:00
|
|
|
#include "llvm/ADT/Triple.h"
|
2017-09-22 07:20:16 +08:00
|
|
|
#include "llvm/ADT/Twine.h"
|
2020-01-13 03:10:42 +08:00
|
|
|
#include "llvm/Analysis/Loads.h"
|
2020-03-03 01:15:40 +08:00
|
|
|
#include "llvm/Analysis/TargetTransformInfo.h"
|
2013-01-12 04:05:37 +08:00
|
|
|
#include "llvm/CodeGen/Analysis.h"
|
2017-09-22 07:20:16 +08:00
|
|
|
#include "llvm/CodeGen/ISDOpcodes.h"
|
|
|
|
#include "llvm/CodeGen/MachineBasicBlock.h"
|
2013-01-12 04:05:37 +08:00
|
|
|
#include "llvm/CodeGen/MachineFrameInfo.h"
|
|
|
|
#include "llvm/CodeGen/MachineFunction.h"
|
2017-09-22 07:20:16 +08:00
|
|
|
#include "llvm/CodeGen/MachineInstr.h"
|
2013-11-29 11:07:54 +08:00
|
|
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
2017-09-22 07:20:16 +08:00
|
|
|
#include "llvm/CodeGen/MachineMemOperand.h"
|
|
|
|
#include "llvm/CodeGen/MachineOperand.h"
|
2017-04-29 04:25:05 +08:00
|
|
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
2017-09-22 07:20:16 +08:00
|
|
|
#include "llvm/CodeGen/RuntimeLibcalls.h"
|
2013-11-29 11:07:54 +08:00
|
|
|
#include "llvm/CodeGen/StackMaps.h"
|
2017-11-17 09:07:10 +08:00
|
|
|
#include "llvm/CodeGen/TargetLowering.h"
|
|
|
|
#include "llvm/CodeGen/TargetOpcodes.h"
|
|
|
|
#include "llvm/CodeGen/TargetRegisterInfo.h"
|
2018-03-30 01:21:10 +08:00
|
|
|
#include "llvm/CodeGen/ValueTypes.h"
|
2017-09-22 07:20:16 +08:00
|
|
|
#include "llvm/IR/Attributes.h"
|
|
|
|
#include "llvm/IR/CallingConv.h"
|
2013-01-12 04:05:37 +08:00
|
|
|
#include "llvm/IR/DataLayout.h"
|
|
|
|
#include "llvm/IR/DerivedTypes.h"
|
2017-09-22 07:20:16 +08:00
|
|
|
#include "llvm/IR/Function.h"
|
|
|
|
#include "llvm/IR/GlobalValue.h"
|
2013-01-12 04:05:37 +08:00
|
|
|
#include "llvm/IR/GlobalVariable.h"
|
2017-09-22 07:20:16 +08:00
|
|
|
#include "llvm/IR/IRBuilder.h"
|
|
|
|
#include "llvm/IR/Module.h"
|
|
|
|
#include "llvm/IR/Type.h"
|
2016-04-27 01:11:17 +08:00
|
|
|
#include "llvm/Support/BranchProbability.h"
|
2017-09-22 07:20:16 +08:00
|
|
|
#include "llvm/Support/Casting.h"
|
2013-01-12 04:05:37 +08:00
|
|
|
#include "llvm/Support/CommandLine.h"
|
2017-09-22 07:20:16 +08:00
|
|
|
#include "llvm/Support/Compiler.h"
|
2013-01-12 04:05:37 +08:00
|
|
|
#include "llvm/Support/ErrorHandling.h"
|
2018-03-24 07:58:25 +08:00
|
|
|
#include "llvm/Support/MachineValueType.h"
|
2013-01-12 04:05:37 +08:00
|
|
|
#include "llvm/Support/MathExtras.h"
|
|
|
|
#include "llvm/Target/TargetMachine.h"
|
2020-05-22 21:22:56 +08:00
|
|
|
#include "llvm/Transforms/Utils/SizeOpts.h"
|
2017-09-22 07:20:16 +08:00
|
|
|
#include <algorithm>
|
|
|
|
#include <cassert>
|
|
|
|
#include <cstddef>
|
|
|
|
#include <cstdint>
|
2017-11-17 09:07:10 +08:00
|
|
|
#include <cstring>
|
2017-09-22 07:20:16 +08:00
|
|
|
#include <iterator>
|
|
|
|
#include <string>
|
|
|
|
#include <tuple>
|
|
|
|
#include <utility>
|
|
|
|
|
2013-01-12 04:05:37 +08:00
|
|
|
using namespace llvm;
|
|
|
|
|
2015-07-02 02:10:20 +08:00
|
|
|
static cl::opt<bool> JumpIsExpensiveOverride(
|
|
|
|
"jump-is-expensive", cl::init(false),
|
|
|
|
cl::desc("Do not create extra branches to split comparison logic."),
|
|
|
|
cl::Hidden);
|
|
|
|
|
2016-10-26 03:53:51 +08:00
|
|
|
static cl::opt<unsigned> MinimumJumpTableEntries
|
|
|
|
("min-jump-table-entries", cl::init(4), cl::Hidden,
|
|
|
|
cl::desc("Set minimum number of entries to use a jump table."));
|
|
|
|
|
Revert r372893 "[CodeGen] Replace -max-jump-table-size with -max-jump-table-targets"
This caused severe compile-time regressions, see PR43455.
> Modern processors predict the targets of an indirect branch regardless of
> the size of any jump table used to glean its target address. Moreover,
> branch predictors typically use resources limited by the number of actual
> targets that occur at run time.
>
> This patch changes the semantics of the option `-max-jump-table-size` to limit
> the number of different targets instead of the number of entries in a jump
> table. Thus, it is now renamed to `-max-jump-table-targets`.
>
> Before, when `-max-jump-table-size` was specified, it could happen that
> cluster jump tables could have targets used repeatedly, but each one was
> counted and typically resulted in tables with the same number of entries.
> With this patch, when specifying `-max-jump-table-targets`, tables may have
> different lengths, since the number of unique targets is counted towards the
> limit, but the number of unique targets in tables is the same, but for the
> last one containing the balance of targets.
>
> Differential revision: https://reviews.llvm.org/D60295
llvm-svn: 373060
2019-09-27 17:54:26 +08:00
|
|
|
static cl::opt<unsigned> MaximumJumpTableSize
|
|
|
|
("max-jump-table-size", cl::init(UINT_MAX), cl::Hidden,
|
|
|
|
cl::desc("Set maximum size of jump tables."));
|
2016-09-26 23:32:33 +08:00
|
|
|
|
[InlineCost] Improve the cost heuristic for Switch
Summary:
The motivation example is like below which has 13 cases but only 2 distinct targets
```
lor.lhs.false2: ; preds = %if.then
switch i32 %Status, label %if.then27 [
i32 -7012, label %if.end35
i32 -10008, label %if.end35
i32 -10016, label %if.end35
i32 15000, label %if.end35
i32 14013, label %if.end35
i32 10114, label %if.end35
i32 10107, label %if.end35
i32 10105, label %if.end35
i32 10013, label %if.end35
i32 10011, label %if.end35
i32 7008, label %if.end35
i32 7007, label %if.end35
i32 5002, label %if.end35
]
```
which is compiled into a balanced binary tree like this on AArch64 (similar on X86)
```
.LBB853_9: // %lor.lhs.false2
mov w8, #10012
cmp w19, w8
b.gt .LBB853_14
// BB#10: // %lor.lhs.false2
mov w8, #5001
cmp w19, w8
b.gt .LBB853_18
// BB#11: // %lor.lhs.false2
mov w8, #-10016
cmp w19, w8
b.eq .LBB853_23
// BB#12: // %lor.lhs.false2
mov w8, #-10008
cmp w19, w8
b.eq .LBB853_23
// BB#13: // %lor.lhs.false2
mov w8, #-7012
cmp w19, w8
b.eq .LBB853_23
b .LBB853_3
.LBB853_14: // %lor.lhs.false2
mov w8, #14012
cmp w19, w8
b.gt .LBB853_21
// BB#15: // %lor.lhs.false2
mov w8, #-10105
add w8, w19, w8
cmp w8, #9 // =9
b.hi .LBB853_17
// BB#16: // %lor.lhs.false2
orr w9, wzr, #0x1
lsl w8, w9, w8
mov w9, #517
and w8, w8, w9
cbnz w8, .LBB853_23
.LBB853_17: // %lor.lhs.false2
mov w8, #10013
cmp w19, w8
b.eq .LBB853_23
b .LBB853_3
.LBB853_18: // %lor.lhs.false2
mov w8, #-7007
add w8, w19, w8
cmp w8, #2 // =2
b.lo .LBB853_23
// BB#19: // %lor.lhs.false2
mov w8, #5002
cmp w19, w8
b.eq .LBB853_23
// BB#20: // %lor.lhs.false2
mov w8, #10011
cmp w19, w8
b.eq .LBB853_23
b .LBB853_3
.LBB853_21: // %lor.lhs.false2
mov w8, #14013
cmp w19, w8
b.eq .LBB853_23
// BB#22: // %lor.lhs.false2
mov w8, #15000
cmp w19, w8
b.ne .LBB853_3
```
However, the inline cost model estimates the cost to be linear with the number
of distinct targets and the cost of the above switch is just 2 InstrCosts.
The function containing this switch is then inlined about 900 times.
This change use the general way of switch lowering for the inline heuristic. It
etimate the number of case clusters with the suitability check for a jump table
or bit test. Considering the binary search tree built for the clusters, this
change modifies the model to be linear with the size of the balanced binary
tree. The model is off by default for now :
-inline-generic-switch-cost=false
This change was originally proposed by Haicheng in D29870.
Reviewers: hans, bmakam, chandlerc, eraman, haicheng, mcrosier
Reviewed By: hans
Subscribers: joerg, aemerson, llvm-commits, rengolin
Differential Revision: https://reviews.llvm.org/D31085
llvm-svn: 301649
2017-04-29 00:04:03 +08:00
|
|
|
/// Minimum jump table density for normal functions.
|
|
|
|
static cl::opt<unsigned>
|
|
|
|
JumpTableDensity("jump-table-density", cl::init(10), cl::Hidden,
|
|
|
|
cl::desc("Minimum density for building a jump table in "
|
|
|
|
"a normal function"));
|
|
|
|
|
|
|
|
/// Minimum jump table density for -Os or -Oz functions.
|
|
|
|
static cl::opt<unsigned> OptsizeJumpTableDensity(
|
|
|
|
"optsize-jump-table-density", cl::init(40), cl::Hidden,
|
|
|
|
cl::desc("Minimum density for building a jump table in "
|
|
|
|
"an optsize function"));
|
|
|
|
|
2019-11-22 09:17:19 +08:00
|
|
|
// FIXME: This option is only to test if the strict fp operation processed
|
|
|
|
// correctly by preventing mutating strict fp operation to normal fp operation
|
|
|
|
// during development. When the backend supports strict float operation, this
|
|
|
|
// option will be meaningless.
|
|
|
|
static cl::opt<bool> DisableStrictNodeMutation("disable-strictnode-mutation",
|
|
|
|
cl::desc("Don't mutate strict-float node to a legalize node"),
|
|
|
|
cl::init(false), cl::Hidden);
|
|
|
|
|
2017-12-19 07:19:42 +08:00
|
|
|
static bool darwinHasSinCos(const Triple &TT) {
|
|
|
|
assert(TT.isOSDarwin() && "should be called with darwin triple");
|
2017-12-20 04:24:12 +08:00
|
|
|
// Don't bother with 32 bit x86.
|
|
|
|
if (TT.getArch() == Triple::x86)
|
|
|
|
return false;
|
|
|
|
// Macos < 10.9 has no sincos_stret.
|
2017-12-19 07:19:42 +08:00
|
|
|
if (TT.isMacOSX())
|
|
|
|
return !TT.isMacOSXVersionLT(10, 9) && TT.isArch64Bit();
|
2017-12-19 07:33:28 +08:00
|
|
|
// iOS < 7.0 has no sincos_stret.
|
2017-12-19 07:19:42 +08:00
|
|
|
if (TT.isiOS())
|
2017-12-19 07:33:28 +08:00
|
|
|
return !TT.isOSVersionLT(7, 0);
|
|
|
|
// Any other darwin such as WatchOS/TvOS is new enough.
|
2017-12-19 07:19:42 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2016-04-27 01:11:17 +08:00
|
|
|
// Although this default value is arbitrary, it is not random. It is assumed
|
|
|
|
// that a condition that evaluates the same way by a higher percentage than this
|
|
|
|
// is best represented as control flow. Therefore, the default value N should be
|
|
|
|
// set such that the win from N% correct executions is greater than the loss
|
|
|
|
// from (100 - N)% mispredicted executions for the majority of intended targets.
|
|
|
|
static cl::opt<int> MinPercentageForPredictableBranch(
|
|
|
|
"min-predictable-branch", cl::init(99),
|
|
|
|
cl::desc("Minimum percentage (0-100) that a condition must be either true "
|
|
|
|
"or false to assume that the condition is predictable"),
|
|
|
|
cl::Hidden);
|
|
|
|
|
2017-12-19 07:19:42 +08:00
|
|
|
void TargetLoweringBase::InitLibcalls(const Triple &TT) {
|
2017-07-20 05:53:30 +08:00
|
|
|
#define HANDLE_LIBCALL(code, name) \
|
2017-12-19 07:19:42 +08:00
|
|
|
setLibcallName(RTLIB::code, name);
|
2018-07-25 03:34:37 +08:00
|
|
|
#include "llvm/IR/RuntimeLibcalls.def"
|
2017-07-20 05:53:30 +08:00
|
|
|
#undef HANDLE_LIBCALL
|
2017-12-19 08:20:33 +08:00
|
|
|
// Initialize calling conventions to their default.
|
|
|
|
for (int LC = 0; LC < RTLIB::UNKNOWN_LIBCALL; ++LC)
|
|
|
|
setLibcallCallingConv((RTLIB::Libcall)LC, CallingConv::C);
|
2013-01-12 04:05:37 +08:00
|
|
|
|
2019-07-15 13:02:32 +08:00
|
|
|
// For IEEE quad-precision libcall names, PPC uses "kf" instead of "tf".
|
|
|
|
if (TT.getArch() == Triple::ppc || TT.isPPC64()) {
|
|
|
|
setLibcallName(RTLIB::ADD_F128, "__addkf3");
|
|
|
|
setLibcallName(RTLIB::SUB_F128, "__subkf3");
|
|
|
|
setLibcallName(RTLIB::MUL_F128, "__mulkf3");
|
|
|
|
setLibcallName(RTLIB::DIV_F128, "__divkf3");
|
|
|
|
setLibcallName(RTLIB::FPEXT_F32_F128, "__extendsfkf2");
|
|
|
|
setLibcallName(RTLIB::FPEXT_F64_F128, "__extenddfkf2");
|
|
|
|
setLibcallName(RTLIB::FPROUND_F128_F32, "__trunckfsf2");
|
|
|
|
setLibcallName(RTLIB::FPROUND_F128_F64, "__trunckfdf2");
|
|
|
|
setLibcallName(RTLIB::FPTOSINT_F128_I32, "__fixkfsi");
|
|
|
|
setLibcallName(RTLIB::FPTOSINT_F128_I64, "__fixkfdi");
|
|
|
|
setLibcallName(RTLIB::FPTOUINT_F128_I32, "__fixunskfsi");
|
|
|
|
setLibcallName(RTLIB::FPTOUINT_F128_I64, "__fixunskfdi");
|
|
|
|
setLibcallName(RTLIB::SINTTOFP_I32_F128, "__floatsikf");
|
|
|
|
setLibcallName(RTLIB::SINTTOFP_I64_F128, "__floatdikf");
|
|
|
|
setLibcallName(RTLIB::UINTTOFP_I32_F128, "__floatunsikf");
|
|
|
|
setLibcallName(RTLIB::UINTTOFP_I64_F128, "__floatundikf");
|
|
|
|
setLibcallName(RTLIB::OEQ_F128, "__eqkf2");
|
|
|
|
setLibcallName(RTLIB::UNE_F128, "__nekf2");
|
|
|
|
setLibcallName(RTLIB::OGE_F128, "__gekf2");
|
|
|
|
setLibcallName(RTLIB::OLT_F128, "__ltkf2");
|
|
|
|
setLibcallName(RTLIB::OLE_F128, "__lekf2");
|
|
|
|
setLibcallName(RTLIB::OGT_F128, "__gtkf2");
|
|
|
|
setLibcallName(RTLIB::UO_F128, "__unordkf2");
|
|
|
|
}
|
|
|
|
|
2017-07-20 05:53:30 +08:00
|
|
|
// A few names are different on particular architectures or environments.
|
2016-04-13 06:32:47 +08:00
|
|
|
if (TT.isOSDarwin()) {
|
|
|
|
// For f16/f32 conversions, Darwin uses the standard naming scheme, instead
|
|
|
|
// of the gnueabi-style __gnu_*_ieee.
|
|
|
|
// FIXME: What about other targets?
|
2017-12-19 07:19:42 +08:00
|
|
|
setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
|
|
|
|
setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
|
2017-12-19 07:14:28 +08:00
|
|
|
|
2018-01-11 04:49:57 +08:00
|
|
|
// Some darwins have an optimized __bzero/bzero function.
|
|
|
|
switch (TT.getArch()) {
|
|
|
|
case Triple::x86:
|
|
|
|
case Triple::x86_64:
|
|
|
|
if (TT.isMacOSX() && !TT.isMacOSXVersionLT(10, 6))
|
|
|
|
setLibcallName(RTLIB::BZERO, "__bzero");
|
|
|
|
break;
|
|
|
|
case Triple::aarch64:
|
2019-09-12 18:22:23 +08:00
|
|
|
case Triple::aarch64_32:
|
2018-01-11 04:49:57 +08:00
|
|
|
setLibcallName(RTLIB::BZERO, "bzero");
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
2017-12-19 08:43:00 +08:00
|
|
|
}
|
2017-12-19 07:19:42 +08:00
|
|
|
|
|
|
|
if (darwinHasSinCos(TT)) {
|
|
|
|
setLibcallName(RTLIB::SINCOS_STRET_F32, "__sincosf_stret");
|
|
|
|
setLibcallName(RTLIB::SINCOS_STRET_F64, "__sincos_stret");
|
|
|
|
if (TT.isWatchABI()) {
|
|
|
|
setLibcallCallingConv(RTLIB::SINCOS_STRET_F32,
|
|
|
|
CallingConv::ARM_AAPCS_VFP);
|
|
|
|
setLibcallCallingConv(RTLIB::SINCOS_STRET_F64,
|
|
|
|
CallingConv::ARM_AAPCS_VFP);
|
|
|
|
}
|
|
|
|
}
|
2016-04-13 06:32:47 +08:00
|
|
|
} else {
|
2017-12-19 07:19:42 +08:00
|
|
|
setLibcallName(RTLIB::FPEXT_F16_F32, "__gnu_h2f_ieee");
|
|
|
|
setLibcallName(RTLIB::FPROUND_F32_F16, "__gnu_f2h_ieee");
|
2016-04-13 06:32:47 +08:00
|
|
|
}
|
Add __atomic_* lowering to AtomicExpandPass.
(Recommit of r266002, with r266011, r266016, and not accidentally
including an extra unused/uninitialized element in LibcallRoutineNames)
AtomicExpandPass can now lower atomic load, atomic store, atomicrmw, and
cmpxchg instructions to __atomic_* library calls, when the target
doesn't support atomics of a given size.
This is the first step towards moving all atomic lowering from clang
into llvm. When all is done, the behavior of __sync_* builtins,
__atomic_* builtins, and C11 atomics will be unified.
Previously LLVM would pass everything through to the ISelLowering
code. There, unsupported atomic instructions would turn into __sync_*
library calls. Because of that behavior, Clang currently avoids emitting
llvm IR atomic instructions when this would happen, and emits __atomic_*
library functions itself, in the frontend.
This change makes LLVM able to emit __atomic_* libcalls, and thus will
eventually allow clang to depend on LLVM to do the right thing.
It is advantageous to do the new lowering to atomic libcalls in
AtomicExpandPass, before ISel time, because it's important that all
atomic operations for a given size either lower to __atomic_*
libcalls (which may use locks), or native instructions which won't. No
mixing and matching.
At the moment, this code is enabled only for SPARC, as a
demonstration. The next commit will expand support to all of the other
targets.
Differential Revision: http://reviews.llvm.org/D18200
llvm-svn: 266115
2016-04-13 04:18:48 +08:00
|
|
|
|
2018-09-18 21:18:21 +08:00
|
|
|
if (TT.isGNUEnvironment() || TT.isOSFuchsia() ||
|
|
|
|
(TT.isAndroid() && !TT.isAndroidVersionLT(9))) {
|
2017-12-19 07:19:42 +08:00
|
|
|
setLibcallName(RTLIB::SINCOS_F32, "sincosf");
|
|
|
|
setLibcallName(RTLIB::SINCOS_F64, "sincos");
|
|
|
|
setLibcallName(RTLIB::SINCOS_F80, "sincosl");
|
|
|
|
setLibcallName(RTLIB::SINCOS_F128, "sincosl");
|
|
|
|
setLibcallName(RTLIB::SINCOS_PPCF128, "sincosl");
|
2013-02-16 02:45:18 +08:00
|
|
|
}
|
2013-08-13 02:45:38 +08:00
|
|
|
|
2019-09-03 00:53:32 +08:00
|
|
|
if (TT.isPS4CPU()) {
|
|
|
|
setLibcallName(RTLIB::SINCOS_F32, "sincosf");
|
|
|
|
setLibcallName(RTLIB::SINCOS_F64, "sincos");
|
|
|
|
}
|
|
|
|
|
2017-07-20 05:53:30 +08:00
|
|
|
if (TT.isOSOpenBSD()) {
|
2017-12-19 07:19:42 +08:00
|
|
|
setLibcallName(RTLIB::STACKPROTECTOR_CHECK_FAIL, nullptr);
|
2015-05-14 09:00:51 +08:00
|
|
|
}
|
2013-01-12 04:05:37 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// getFPEXT - Return the FPEXT_*_* value for the given types, or
|
|
|
|
/// UNKNOWN_LIBCALL if there is none.
|
|
|
|
RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
|
2014-07-21 17:13:56 +08:00
|
|
|
if (OpVT == MVT::f16) {
|
|
|
|
if (RetVT == MVT::f32)
|
|
|
|
return FPEXT_F16_F32;
|
|
|
|
} else if (OpVT == MVT::f32) {
|
2013-01-12 04:05:37 +08:00
|
|
|
if (RetVT == MVT::f64)
|
|
|
|
return FPEXT_F32_F64;
|
|
|
|
if (RetVT == MVT::f128)
|
|
|
|
return FPEXT_F32_F128;
|
2016-02-04 22:43:50 +08:00
|
|
|
if (RetVT == MVT::ppcf128)
|
|
|
|
return FPEXT_F32_PPCF128;
|
2013-01-12 04:05:37 +08:00
|
|
|
} else if (OpVT == MVT::f64) {
|
|
|
|
if (RetVT == MVT::f128)
|
|
|
|
return FPEXT_F64_F128;
|
2016-02-04 22:43:50 +08:00
|
|
|
else if (RetVT == MVT::ppcf128)
|
|
|
|
return FPEXT_F64_PPCF128;
|
2018-01-18 06:29:16 +08:00
|
|
|
} else if (OpVT == MVT::f80) {
|
|
|
|
if (RetVT == MVT::f128)
|
|
|
|
return FPEXT_F80_F128;
|
2013-01-12 04:05:37 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return UNKNOWN_LIBCALL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// getFPROUND - Return the FPROUND_*_* value for the given types, or
|
|
|
|
/// UNKNOWN_LIBCALL if there is none.
|
|
|
|
RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) {
|
2014-07-17 19:12:12 +08:00
|
|
|
if (RetVT == MVT::f16) {
|
|
|
|
if (OpVT == MVT::f32)
|
|
|
|
return FPROUND_F32_F16;
|
|
|
|
if (OpVT == MVT::f64)
|
|
|
|
return FPROUND_F64_F16;
|
|
|
|
if (OpVT == MVT::f80)
|
|
|
|
return FPROUND_F80_F16;
|
|
|
|
if (OpVT == MVT::f128)
|
|
|
|
return FPROUND_F128_F16;
|
|
|
|
if (OpVT == MVT::ppcf128)
|
|
|
|
return FPROUND_PPCF128_F16;
|
|
|
|
} else if (RetVT == MVT::f32) {
|
2013-01-12 04:05:37 +08:00
|
|
|
if (OpVT == MVT::f64)
|
|
|
|
return FPROUND_F64_F32;
|
|
|
|
if (OpVT == MVT::f80)
|
|
|
|
return FPROUND_F80_F32;
|
|
|
|
if (OpVT == MVT::f128)
|
|
|
|
return FPROUND_F128_F32;
|
|
|
|
if (OpVT == MVT::ppcf128)
|
|
|
|
return FPROUND_PPCF128_F32;
|
|
|
|
} else if (RetVT == MVT::f64) {
|
|
|
|
if (OpVT == MVT::f80)
|
|
|
|
return FPROUND_F80_F64;
|
|
|
|
if (OpVT == MVT::f128)
|
|
|
|
return FPROUND_F128_F64;
|
|
|
|
if (OpVT == MVT::ppcf128)
|
|
|
|
return FPROUND_PPCF128_F64;
|
2018-01-18 06:29:16 +08:00
|
|
|
} else if (RetVT == MVT::f80) {
|
|
|
|
if (OpVT == MVT::f128)
|
|
|
|
return FPROUND_F128_F80;
|
2013-01-12 04:05:37 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return UNKNOWN_LIBCALL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or
|
|
|
|
/// UNKNOWN_LIBCALL if there is none.
|
|
|
|
RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) {
|
|
|
|
if (OpVT == MVT::f32) {
|
|
|
|
if (RetVT == MVT::i32)
|
|
|
|
return FPTOSINT_F32_I32;
|
|
|
|
if (RetVT == MVT::i64)
|
|
|
|
return FPTOSINT_F32_I64;
|
|
|
|
if (RetVT == MVT::i128)
|
|
|
|
return FPTOSINT_F32_I128;
|
|
|
|
} else if (OpVT == MVT::f64) {
|
|
|
|
if (RetVT == MVT::i32)
|
|
|
|
return FPTOSINT_F64_I32;
|
|
|
|
if (RetVT == MVT::i64)
|
|
|
|
return FPTOSINT_F64_I64;
|
|
|
|
if (RetVT == MVT::i128)
|
|
|
|
return FPTOSINT_F64_I128;
|
|
|
|
} else if (OpVT == MVT::f80) {
|
|
|
|
if (RetVT == MVT::i32)
|
|
|
|
return FPTOSINT_F80_I32;
|
|
|
|
if (RetVT == MVT::i64)
|
|
|
|
return FPTOSINT_F80_I64;
|
|
|
|
if (RetVT == MVT::i128)
|
|
|
|
return FPTOSINT_F80_I128;
|
|
|
|
} else if (OpVT == MVT::f128) {
|
|
|
|
if (RetVT == MVT::i32)
|
|
|
|
return FPTOSINT_F128_I32;
|
|
|
|
if (RetVT == MVT::i64)
|
|
|
|
return FPTOSINT_F128_I64;
|
|
|
|
if (RetVT == MVT::i128)
|
|
|
|
return FPTOSINT_F128_I128;
|
|
|
|
} else if (OpVT == MVT::ppcf128) {
|
|
|
|
if (RetVT == MVT::i32)
|
|
|
|
return FPTOSINT_PPCF128_I32;
|
|
|
|
if (RetVT == MVT::i64)
|
|
|
|
return FPTOSINT_PPCF128_I64;
|
|
|
|
if (RetVT == MVT::i128)
|
|
|
|
return FPTOSINT_PPCF128_I128;
|
|
|
|
}
|
|
|
|
return UNKNOWN_LIBCALL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or
|
|
|
|
/// UNKNOWN_LIBCALL if there is none.
|
|
|
|
RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) {
|
|
|
|
if (OpVT == MVT::f32) {
|
|
|
|
if (RetVT == MVT::i32)
|
|
|
|
return FPTOUINT_F32_I32;
|
|
|
|
if (RetVT == MVT::i64)
|
|
|
|
return FPTOUINT_F32_I64;
|
|
|
|
if (RetVT == MVT::i128)
|
|
|
|
return FPTOUINT_F32_I128;
|
|
|
|
} else if (OpVT == MVT::f64) {
|
|
|
|
if (RetVT == MVT::i32)
|
|
|
|
return FPTOUINT_F64_I32;
|
|
|
|
if (RetVT == MVT::i64)
|
|
|
|
return FPTOUINT_F64_I64;
|
|
|
|
if (RetVT == MVT::i128)
|
|
|
|
return FPTOUINT_F64_I128;
|
|
|
|
} else if (OpVT == MVT::f80) {
|
|
|
|
if (RetVT == MVT::i32)
|
|
|
|
return FPTOUINT_F80_I32;
|
|
|
|
if (RetVT == MVT::i64)
|
|
|
|
return FPTOUINT_F80_I64;
|
|
|
|
if (RetVT == MVT::i128)
|
|
|
|
return FPTOUINT_F80_I128;
|
|
|
|
} else if (OpVT == MVT::f128) {
|
|
|
|
if (RetVT == MVT::i32)
|
|
|
|
return FPTOUINT_F128_I32;
|
|
|
|
if (RetVT == MVT::i64)
|
|
|
|
return FPTOUINT_F128_I64;
|
|
|
|
if (RetVT == MVT::i128)
|
|
|
|
return FPTOUINT_F128_I128;
|
|
|
|
} else if (OpVT == MVT::ppcf128) {
|
|
|
|
if (RetVT == MVT::i32)
|
|
|
|
return FPTOUINT_PPCF128_I32;
|
|
|
|
if (RetVT == MVT::i64)
|
|
|
|
return FPTOUINT_PPCF128_I64;
|
|
|
|
if (RetVT == MVT::i128)
|
|
|
|
return FPTOUINT_PPCF128_I128;
|
|
|
|
}
|
|
|
|
return UNKNOWN_LIBCALL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or
|
|
|
|
/// UNKNOWN_LIBCALL if there is none.
|
|
|
|
RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) {
|
|
|
|
if (OpVT == MVT::i32) {
|
|
|
|
if (RetVT == MVT::f32)
|
|
|
|
return SINTTOFP_I32_F32;
|
|
|
|
if (RetVT == MVT::f64)
|
|
|
|
return SINTTOFP_I32_F64;
|
|
|
|
if (RetVT == MVT::f80)
|
|
|
|
return SINTTOFP_I32_F80;
|
|
|
|
if (RetVT == MVT::f128)
|
|
|
|
return SINTTOFP_I32_F128;
|
|
|
|
if (RetVT == MVT::ppcf128)
|
|
|
|
return SINTTOFP_I32_PPCF128;
|
|
|
|
} else if (OpVT == MVT::i64) {
|
|
|
|
if (RetVT == MVT::f32)
|
|
|
|
return SINTTOFP_I64_F32;
|
|
|
|
if (RetVT == MVT::f64)
|
|
|
|
return SINTTOFP_I64_F64;
|
|
|
|
if (RetVT == MVT::f80)
|
|
|
|
return SINTTOFP_I64_F80;
|
|
|
|
if (RetVT == MVT::f128)
|
|
|
|
return SINTTOFP_I64_F128;
|
|
|
|
if (RetVT == MVT::ppcf128)
|
|
|
|
return SINTTOFP_I64_PPCF128;
|
|
|
|
} else if (OpVT == MVT::i128) {
|
|
|
|
if (RetVT == MVT::f32)
|
|
|
|
return SINTTOFP_I128_F32;
|
|
|
|
if (RetVT == MVT::f64)
|
|
|
|
return SINTTOFP_I128_F64;
|
|
|
|
if (RetVT == MVT::f80)
|
|
|
|
return SINTTOFP_I128_F80;
|
|
|
|
if (RetVT == MVT::f128)
|
|
|
|
return SINTTOFP_I128_F128;
|
|
|
|
if (RetVT == MVT::ppcf128)
|
|
|
|
return SINTTOFP_I128_PPCF128;
|
|
|
|
}
|
|
|
|
return UNKNOWN_LIBCALL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or
|
|
|
|
/// UNKNOWN_LIBCALL if there is none.
|
|
|
|
RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) {
|
|
|
|
if (OpVT == MVT::i32) {
|
|
|
|
if (RetVT == MVT::f32)
|
|
|
|
return UINTTOFP_I32_F32;
|
|
|
|
if (RetVT == MVT::f64)
|
|
|
|
return UINTTOFP_I32_F64;
|
|
|
|
if (RetVT == MVT::f80)
|
|
|
|
return UINTTOFP_I32_F80;
|
|
|
|
if (RetVT == MVT::f128)
|
|
|
|
return UINTTOFP_I32_F128;
|
|
|
|
if (RetVT == MVT::ppcf128)
|
|
|
|
return UINTTOFP_I32_PPCF128;
|
|
|
|
} else if (OpVT == MVT::i64) {
|
|
|
|
if (RetVT == MVT::f32)
|
|
|
|
return UINTTOFP_I64_F32;
|
|
|
|
if (RetVT == MVT::f64)
|
|
|
|
return UINTTOFP_I64_F64;
|
|
|
|
if (RetVT == MVT::f80)
|
|
|
|
return UINTTOFP_I64_F80;
|
|
|
|
if (RetVT == MVT::f128)
|
|
|
|
return UINTTOFP_I64_F128;
|
|
|
|
if (RetVT == MVT::ppcf128)
|
|
|
|
return UINTTOFP_I64_PPCF128;
|
|
|
|
} else if (OpVT == MVT::i128) {
|
|
|
|
if (RetVT == MVT::f32)
|
|
|
|
return UINTTOFP_I128_F32;
|
|
|
|
if (RetVT == MVT::f64)
|
|
|
|
return UINTTOFP_I128_F64;
|
|
|
|
if (RetVT == MVT::f80)
|
|
|
|
return UINTTOFP_I128_F80;
|
|
|
|
if (RetVT == MVT::f128)
|
|
|
|
return UINTTOFP_I128_F128;
|
|
|
|
if (RetVT == MVT::ppcf128)
|
|
|
|
return UINTTOFP_I128_PPCF128;
|
|
|
|
}
|
|
|
|
return UNKNOWN_LIBCALL;
|
|
|
|
}
|
|
|
|
|
2016-03-17 06:12:04 +08:00
|
|
|
RTLIB::Libcall RTLIB::getSYNC(unsigned Opc, MVT VT) {
|
2015-03-06 04:04:29 +08:00
|
|
|
#define OP_TO_LIBCALL(Name, Enum) \
|
|
|
|
case Name: \
|
|
|
|
switch (VT.SimpleTy) { \
|
|
|
|
default: \
|
|
|
|
return UNKNOWN_LIBCALL; \
|
|
|
|
case MVT::i8: \
|
|
|
|
return Enum##_1; \
|
|
|
|
case MVT::i16: \
|
|
|
|
return Enum##_2; \
|
|
|
|
case MVT::i32: \
|
|
|
|
return Enum##_4; \
|
|
|
|
case MVT::i64: \
|
|
|
|
return Enum##_8; \
|
|
|
|
case MVT::i128: \
|
|
|
|
return Enum##_16; \
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (Opc) {
|
|
|
|
OP_TO_LIBCALL(ISD::ATOMIC_SWAP, SYNC_LOCK_TEST_AND_SET)
|
|
|
|
OP_TO_LIBCALL(ISD::ATOMIC_CMP_SWAP, SYNC_VAL_COMPARE_AND_SWAP)
|
|
|
|
OP_TO_LIBCALL(ISD::ATOMIC_LOAD_ADD, SYNC_FETCH_AND_ADD)
|
|
|
|
OP_TO_LIBCALL(ISD::ATOMIC_LOAD_SUB, SYNC_FETCH_AND_SUB)
|
|
|
|
OP_TO_LIBCALL(ISD::ATOMIC_LOAD_AND, SYNC_FETCH_AND_AND)
|
|
|
|
OP_TO_LIBCALL(ISD::ATOMIC_LOAD_OR, SYNC_FETCH_AND_OR)
|
|
|
|
OP_TO_LIBCALL(ISD::ATOMIC_LOAD_XOR, SYNC_FETCH_AND_XOR)
|
|
|
|
OP_TO_LIBCALL(ISD::ATOMIC_LOAD_NAND, SYNC_FETCH_AND_NAND)
|
|
|
|
OP_TO_LIBCALL(ISD::ATOMIC_LOAD_MAX, SYNC_FETCH_AND_MAX)
|
|
|
|
OP_TO_LIBCALL(ISD::ATOMIC_LOAD_UMAX, SYNC_FETCH_AND_UMAX)
|
|
|
|
OP_TO_LIBCALL(ISD::ATOMIC_LOAD_MIN, SYNC_FETCH_AND_MIN)
|
|
|
|
OP_TO_LIBCALL(ISD::ATOMIC_LOAD_UMIN, SYNC_FETCH_AND_UMIN)
|
|
|
|
}
|
|
|
|
|
|
|
|
#undef OP_TO_LIBCALL
|
|
|
|
|
|
|
|
return UNKNOWN_LIBCALL;
|
|
|
|
}
|
|
|
|
|
2017-06-16 22:43:59 +08:00
|
|
|
RTLIB::Libcall RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) {
|
2016-12-29 22:31:07 +08:00
|
|
|
switch (ElementSize) {
|
|
|
|
case 1:
|
2017-06-16 22:43:59 +08:00
|
|
|
return MEMCPY_ELEMENT_UNORDERED_ATOMIC_1;
|
2016-12-29 22:31:07 +08:00
|
|
|
case 2:
|
2017-06-16 22:43:59 +08:00
|
|
|
return MEMCPY_ELEMENT_UNORDERED_ATOMIC_2;
|
2016-12-29 22:31:07 +08:00
|
|
|
case 4:
|
2017-06-16 22:43:59 +08:00
|
|
|
return MEMCPY_ELEMENT_UNORDERED_ATOMIC_4;
|
2016-12-29 22:31:07 +08:00
|
|
|
case 8:
|
2017-06-16 22:43:59 +08:00
|
|
|
return MEMCPY_ELEMENT_UNORDERED_ATOMIC_8;
|
2016-12-29 22:31:07 +08:00
|
|
|
case 16:
|
2017-06-16 22:43:59 +08:00
|
|
|
return MEMCPY_ELEMENT_UNORDERED_ATOMIC_16;
|
2016-12-29 22:31:07 +08:00
|
|
|
default:
|
|
|
|
return UNKNOWN_LIBCALL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-07-12 23:25:26 +08:00
|
|
|
RTLIB::Libcall RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) {
|
|
|
|
switch (ElementSize) {
|
|
|
|
case 1:
|
|
|
|
return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1;
|
|
|
|
case 2:
|
|
|
|
return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2;
|
|
|
|
case 4:
|
|
|
|
return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4;
|
|
|
|
case 8:
|
|
|
|
return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8;
|
|
|
|
case 16:
|
|
|
|
return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16;
|
|
|
|
default:
|
|
|
|
return UNKNOWN_LIBCALL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Add element atomic memset intrinsic
Summary: Continuing the work from https://reviews.llvm.org/D33240, this change introduces an element unordered-atomic memset intrinsic. This intrinsic is essentially memset with the implementation requirement that all stores used for the assignment are done with unordered-atomic stores of a given element size.
Reviewers: eli.friedman, reames, mkazantsev, skatkov
Reviewed By: reames
Subscribers: jfb, dschuff, sbc100, jgravelle-google, aheejin, efriedma, llvm-commits
Differential Revision: https://reviews.llvm.org/D34885
llvm-svn: 307854
2017-07-13 05:57:23 +08:00
|
|
|
RTLIB::Libcall RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) {
|
|
|
|
switch (ElementSize) {
|
|
|
|
case 1:
|
|
|
|
return MEMSET_ELEMENT_UNORDERED_ATOMIC_1;
|
|
|
|
case 2:
|
|
|
|
return MEMSET_ELEMENT_UNORDERED_ATOMIC_2;
|
|
|
|
case 4:
|
|
|
|
return MEMSET_ELEMENT_UNORDERED_ATOMIC_4;
|
|
|
|
case 8:
|
|
|
|
return MEMSET_ELEMENT_UNORDERED_ATOMIC_8;
|
|
|
|
case 16:
|
|
|
|
return MEMSET_ELEMENT_UNORDERED_ATOMIC_16;
|
|
|
|
default:
|
|
|
|
return UNKNOWN_LIBCALL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-01-12 04:05:37 +08:00
|
|
|
/// InitCmpLibcallCCs - Set default comparison libcall CC.
|
|
|
|
static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
|
|
|
|
memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL);
|
|
|
|
CCs[RTLIB::OEQ_F32] = ISD::SETEQ;
|
|
|
|
CCs[RTLIB::OEQ_F64] = ISD::SETEQ;
|
|
|
|
CCs[RTLIB::OEQ_F128] = ISD::SETEQ;
|
2016-02-04 22:43:50 +08:00
|
|
|
CCs[RTLIB::OEQ_PPCF128] = ISD::SETEQ;
|
2013-01-12 04:05:37 +08:00
|
|
|
CCs[RTLIB::UNE_F32] = ISD::SETNE;
|
|
|
|
CCs[RTLIB::UNE_F64] = ISD::SETNE;
|
|
|
|
CCs[RTLIB::UNE_F128] = ISD::SETNE;
|
2016-02-04 22:43:50 +08:00
|
|
|
CCs[RTLIB::UNE_PPCF128] = ISD::SETNE;
|
2013-01-12 04:05:37 +08:00
|
|
|
CCs[RTLIB::OGE_F32] = ISD::SETGE;
|
|
|
|
CCs[RTLIB::OGE_F64] = ISD::SETGE;
|
|
|
|
CCs[RTLIB::OGE_F128] = ISD::SETGE;
|
2016-02-04 22:43:50 +08:00
|
|
|
CCs[RTLIB::OGE_PPCF128] = ISD::SETGE;
|
2013-01-12 04:05:37 +08:00
|
|
|
CCs[RTLIB::OLT_F32] = ISD::SETLT;
|
|
|
|
CCs[RTLIB::OLT_F64] = ISD::SETLT;
|
|
|
|
CCs[RTLIB::OLT_F128] = ISD::SETLT;
|
2016-02-04 22:43:50 +08:00
|
|
|
CCs[RTLIB::OLT_PPCF128] = ISD::SETLT;
|
2013-01-12 04:05:37 +08:00
|
|
|
CCs[RTLIB::OLE_F32] = ISD::SETLE;
|
|
|
|
CCs[RTLIB::OLE_F64] = ISD::SETLE;
|
|
|
|
CCs[RTLIB::OLE_F128] = ISD::SETLE;
|
2016-02-04 22:43:50 +08:00
|
|
|
CCs[RTLIB::OLE_PPCF128] = ISD::SETLE;
|
2013-01-12 04:05:37 +08:00
|
|
|
CCs[RTLIB::OGT_F32] = ISD::SETGT;
|
|
|
|
CCs[RTLIB::OGT_F64] = ISD::SETGT;
|
|
|
|
CCs[RTLIB::OGT_F128] = ISD::SETGT;
|
2016-02-04 22:43:50 +08:00
|
|
|
CCs[RTLIB::OGT_PPCF128] = ISD::SETGT;
|
2013-01-12 04:05:37 +08:00
|
|
|
CCs[RTLIB::UO_F32] = ISD::SETNE;
|
|
|
|
CCs[RTLIB::UO_F64] = ISD::SETNE;
|
|
|
|
CCs[RTLIB::UO_F128] = ISD::SETNE;
|
2016-02-04 22:43:50 +08:00
|
|
|
CCs[RTLIB::UO_PPCF128] = ISD::SETNE;
|
2013-01-12 04:05:37 +08:00
|
|
|
}
|
|
|
|
|
2014-11-14 05:29:21 +08:00
|
|
|
/// NOTE: The TargetMachine owns TLOF.
|
2015-03-10 10:37:25 +08:00
|
|
|
TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
|
2013-04-06 05:52:40 +08:00
|
|
|
initActions();
|
|
|
|
|
|
|
|
// Perform these initializations only once.
|
2017-06-01 01:12:38 +08:00
|
|
|
MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove =
|
|
|
|
MaxLoadsPerMemcmp = 8;
|
2018-05-16 23:36:52 +08:00
|
|
|
MaxGluedStoresPerMemcpy = 0;
|
2017-06-01 01:12:38 +08:00
|
|
|
MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize =
|
|
|
|
MaxStoresPerMemmoveOptSize = MaxLoadsPerMemcmpOptSize = 4;
|
2014-01-03 05:13:43 +08:00
|
|
|
HasMultipleConditionRegisters = false;
|
2014-04-22 06:22:44 +08:00
|
|
|
HasExtractBitsInsn = false;
|
2015-07-02 02:10:20 +08:00
|
|
|
JumpIsExpensive = JumpIsExpensiveOverride;
|
2013-04-06 05:52:40 +08:00
|
|
|
PredictableSelectIsExpensive = false;
|
[CodeGenPrepare] Reapply r224351 with a fix for the assertion failure:
The type promotion helper does not support vector type, so when make
such it does not kick in in such cases.
Original commit message:
[CodeGenPrepare] Move sign/zero extensions near loads using type promotion.
This patch extends the optimization in CodeGenPrepare that moves a sign/zero
extension near a load when the target can combine them. The optimization may
promote any operations between the extension and the load to make that possible.
Although this optimization may be beneficial for all targets, in particular
AArch64, this is enabled for X86 only as I have not benchmarked it for other
targets yet.
** Context **
Most targets feature extended loads, i.e., loads that perform a zero or sign
extension for free. In that context it is interesting to expose such pattern in
CodeGenPrepare so that the instruction selection pass can form such loads.
Sometimes, this pattern is blocked because of instructions between the load and
the extension. When those instructions are promotable to the extended type, we
can expose this pattern.
** Motivating Example **
Let us consider an example:
define void @foo(i8* %addr1, i32* %addr2, i8 %a, i32 %b) {
%ld = load i8* %addr1
%zextld = zext i8 %ld to i32
%ld2 = load i32* %addr2
%add = add nsw i32 %ld2, %zextld
%sextadd = sext i32 %add to i64
%zexta = zext i8 %a to i32
%addza = add nsw i32 %zexta, %zextld
%sextaddza = sext i32 %addza to i64
%addb = add nsw i32 %b, %zextld
%sextaddb = sext i32 %addb to i64
call void @dummy(i64 %sextadd, i64 %sextaddza, i64 %sextaddb)
ret void
}
As it is, this IR generates the following assembly on x86_64:
[...]
movzbl (%rdi), %eax # zero-extended load
movl (%rsi), %es # plain load
addl %eax, %esi # 32-bit add
movslq %esi, %rdi # sign extend the result of add
movzbl %dl, %edx # zero extend the first argument
addl %eax, %edx # 32-bit add
movslq %edx, %rsi # sign extend the result of add
addl %eax, %ecx # 32-bit add
movslq %ecx, %rdx # sign extend the result of add
[...]
The throughput of this sequence is 7.45 cycles on Ivy Bridge according to IACA.
Now, by promoting the additions to form more extended loads we would generate:
[...]
movzbl (%rdi), %eax # zero-extended load
movslq (%rsi), %rdi # sign-extended load
addq %rax, %rdi # 64-bit add
movzbl %dl, %esi # zero extend the first argument
addq %rax, %rsi # 64-bit add
movslq %ecx, %rdx # sign extend the second argument
addq %rax, %rdx # 64-bit add
[...]
The throughput of this sequence is 6.15 cycles on Ivy Bridge according to IACA.
This kind of sequences happen a lot on code using 32-bit indexes on 64-bit
architectures.
Note: The throughput numbers are similar on Sandy Bridge and Haswell.
** Proposed Solution **
To avoid the penalty of all these sign/zero extensions, we merge them in the
loads at the beginning of the chain of computation by promoting all the chain of
computation on the extended type. The promotion is done if and only if we do not
introduce new extensions, i.e., if we do not degrade the code quality.
To achieve this, we extend the existing “move ext to load” optimization with the
promotion mechanism introduced to match larger patterns for addressing mode
(r200947).
The idea of this extension is to perform the following transformation:
ext(promotableInst1(...(promotableInstN(load))))
=>
promotedInst1(...(promotedInstN(ext(load))))
The promotion mechanism in that optimization is enabled by a new TargetLowering
switch, which is off by default. In other words, by default, the optimization
performs the “move ext to load” optimization as it was before this patch.
** Performance **
Configuration: x86_64: Ivy Bridge fixed at 2900MHz running OS X 10.10.
Tested Optimization Levels: O3/Os
Tests: llvm-testsuite + externals.
Results:
- No regression beside noise.
- Improvements:
CINT2006/473.astar: ~2%
Benchmarks/PAQ8p: ~2%
Misc/perlin: ~3%
The results are consistent for both O3 and Os.
<rdar://problem/18310086>
llvm-svn: 224402
2014-12-17 09:36:17 +08:00
|
|
|
EnableExtLdPromotion = false;
|
2013-04-06 05:52:40 +08:00
|
|
|
StackPointerRegisterToSaveRestore = 0;
|
|
|
|
BooleanContents = UndefinedBooleanContent;
|
2014-07-10 18:18:12 +08:00
|
|
|
BooleanFloatContents = UndefinedBooleanContent;
|
2013-04-06 05:52:40 +08:00
|
|
|
BooleanVectorContents = UndefinedBooleanContent;
|
|
|
|
SchedPreferenceInfo = Sched::ILP;
|
In visitSTORE, always use FindBetterChain, rather than only when UseAA is enabled.
Recommiting with compiler time improvements
Recommitting after fixup of 32-bit aliasing sign offset bug in DAGCombiner.
* Simplify Consecutive Merge Store Candidate Search
Now that address aliasing is much less conservative, push through
simplified store merging search and chain alias analysis which only
checks for parallel stores through the chain subgraph. This is cleaner
as the separation of non-interfering loads/stores from the
store-merging logic.
When merging stores search up the chain through a single load, and
finds all possible stores by looking down from through a load and a
TokenFactor to all stores visited.
This improves the quality of the output SelectionDAG and the output
Codegen (save perhaps for some ARM cases where we correctly constructs
wider loads, but then promotes them to float operations which appear
but requires more expensive constant generation).
Some minor peephole optimizations to deal with improved SubDAG shapes (listed below)
Additional Minor Changes:
1. Finishes removing unused AliasLoad code
2. Unifies the chain aggregation in the merged stores across code
paths
3. Re-add the Store node to the worklist after calling
SimplifyDemandedBits.
4. Increase GatherAllAliasesMaxDepth from 6 to 18. That number is
arbitrary, but seems sufficient to not cause regressions in
tests.
5. Remove Chain dependencies of Memory operations on CopyfromReg
nodes as these are captured by data dependence
6. Forward loads-store values through tokenfactors containing
{CopyToReg,CopyFromReg} Values.
7. Peephole to convert buildvector of extract_vector_elt to
extract_subvector if possible (see
CodeGen/AArch64/store-merge.ll)
8. Store merging for the ARM target is restricted to 32-bit as
some in some contexts invalid 64-bit operations are being
generated. This can be removed once appropriate checks are
added.
This finishes the change Matt Arsenault started in r246307 and
jyknight's original patch.
Many tests required some changes as memory operations are now
reorderable, improving load-store forwarding. One test in
particular is worth noting:
CodeGen/PowerPC/ppc64-align-long-double.ll - Improved load-store
forwarding converts a load-store pair into a parallel store and
a memory-realized bitcast of the same value. However, because we
lose the sharing of the explicit and implicit store values we
must create another local store. A similar transformation
happens before SelectionDAG as well.
Reviewers: arsenm, hfinkel, tstellarAMD, jyknight, nhaehnle
llvm-svn: 297695
2017-03-14 08:34:14 +08:00
|
|
|
GatherAllAliasesMaxDepth = 18;
|
2019-11-22 09:17:19 +08:00
|
|
|
IsStrictFPEnabled = DisableStrictNodeMutation;
|
Add __atomic_* lowering to AtomicExpandPass.
(Recommit of r266002, with r266011, r266016, and not accidentally
including an extra unused/uninitialized element in LibcallRoutineNames)
AtomicExpandPass can now lower atomic load, atomic store, atomicrmw, and
cmpxchg instructions to __atomic_* library calls, when the target
doesn't support atomics of a given size.
This is the first step towards moving all atomic lowering from clang
into llvm. When all is done, the behavior of __sync_* builtins,
__atomic_* builtins, and C11 atomics will be unified.
Previously LLVM would pass everything through to the ISelLowering
code. There, unsupported atomic instructions would turn into __sync_*
library calls. Because of that behavior, Clang currently avoids emitting
llvm IR atomic instructions when this would happen, and emits __atomic_*
library functions itself, in the frontend.
This change makes LLVM able to emit __atomic_* libcalls, and thus will
eventually allow clang to depend on LLVM to do the right thing.
It is advantageous to do the new lowering to atomic libcalls in
AtomicExpandPass, before ISel time, because it's important that all
atomic operations for a given size either lower to __atomic_*
libcalls (which may use locks), or native instructions which won't. No
mixing and matching.
At the moment, this code is enabled only for SPARC, as a
demonstration. The next commit will expand support to all of the other
targets.
Differential Revision: http://reviews.llvm.org/D18200
llvm-svn: 266115
2016-04-13 04:18:48 +08:00
|
|
|
// TODO: the default will be switched to 0 in the next commit, along
|
|
|
|
// with the Target-specific changes necessary.
|
|
|
|
MaxAtomicSizeInBitsSupported = 1024;
|
2013-04-06 05:52:40 +08:00
|
|
|
|
2016-06-18 02:11:48 +08:00
|
|
|
MinCmpXchgSizeInBits = 0;
|
2017-12-09 14:45:36 +08:00
|
|
|
SupportsUnalignedAtomics = false;
|
2016-06-18 02:11:48 +08:00
|
|
|
|
2016-04-13 06:32:47 +08:00
|
|
|
std::fill(std::begin(LibcallRoutineNames), std::end(LibcallRoutineNames), nullptr);
|
|
|
|
|
2017-12-19 07:19:42 +08:00
|
|
|
InitLibcalls(TM.getTargetTriple());
|
2013-04-06 05:52:40 +08:00
|
|
|
InitCmpLibcallCCs(CmpLibcallCCs);
|
|
|
|
}
|
|
|
|
|
|
|
|
void TargetLoweringBase::initActions() {
|
2013-01-12 04:05:37 +08:00
|
|
|
// All operations default to being supported.
|
|
|
|
memset(OpActions, 0, sizeof(OpActions));
|
|
|
|
memset(LoadExtActions, 0, sizeof(LoadExtActions));
|
|
|
|
memset(TruncStoreActions, 0, sizeof(TruncStoreActions));
|
|
|
|
memset(IndexedModeActions, 0, sizeof(IndexedModeActions));
|
|
|
|
memset(CondCodeActions, 0, sizeof(CondCodeActions));
|
2016-04-08 15:10:46 +08:00
|
|
|
std::fill(std::begin(RegClassForVT), std::end(RegClassForVT), nullptr);
|
|
|
|
std::fill(std::begin(TargetDAGCombineArray),
|
|
|
|
std::end(TargetDAGCombineArray), 0);
|
2013-01-12 04:05:37 +08:00
|
|
|
|
2019-01-17 18:49:01 +08:00
|
|
|
for (MVT VT : MVT::fp_valuetypes()) {
|
2020-04-07 02:25:17 +08:00
|
|
|
MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits().getFixedSize());
|
2019-01-17 18:49:01 +08:00
|
|
|
if (IntVT.isValid()) {
|
|
|
|
setOperationAction(ISD::ATOMIC_SWAP, VT, Promote);
|
|
|
|
AddPromotedToType(ISD::ATOMIC_SWAP, VT, IntVT);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-01-12 04:05:37 +08:00
|
|
|
// Set default actions for various operations.
|
2015-01-08 05:27:10 +08:00
|
|
|
for (MVT VT : MVT::all_valuetypes()) {
|
2013-01-12 04:05:37 +08:00
|
|
|
// Default all indexed load / store to expand.
|
|
|
|
for (unsigned IM = (unsigned)ISD::PRE_INC;
|
|
|
|
IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) {
|
2015-01-08 05:27:10 +08:00
|
|
|
setIndexedLoadAction(IM, VT, Expand);
|
|
|
|
setIndexedStoreAction(IM, VT, Expand);
|
2019-11-21 22:56:37 +08:00
|
|
|
setIndexedMaskedLoadAction(IM, VT, Expand);
|
|
|
|
setIndexedMaskedStoreAction(IM, VT, Expand);
|
2013-01-12 04:05:37 +08:00
|
|
|
}
|
|
|
|
|
IR: add "cmpxchg weak" variant to support permitted failure.
This commit adds a weak variant of the cmpxchg operation, as described
in C++11. A cmpxchg instruction with this modifier is permitted to
fail to store, even if the comparison indicated it should.
As a result, cmpxchg instructions must return a flag indicating
success in addition to their original iN value loaded. Thus, for
uniformity *all* cmpxchg instructions now return "{ iN, i1 }". The
second flag is 1 when the store succeeded.
At the DAG level, a new ATOMIC_CMP_SWAP_WITH_SUCCESS node has been
added as the natural representation for the new cmpxchg instructions.
It is a strong cmpxchg.
By default this gets Expanded to the existing ATOMIC_CMP_SWAP during
Legalization, so existing backends should see no change in behaviour.
If they wish to deal with the enhanced node instead, they can call
setOperationAction on it. Beware: as a node with 2 results, it cannot
be selected from TableGen.
Currently, no use is made of the extra information provided in this
patch. Test updates are almost entirely adapting the input IR to the
new scheme.
Summary for out of tree users:
------------------------------
+ Legacy Bitcode files are upgraded during read.
+ Legacy assembly IR files will be invalid.
+ Front-ends must adapt to different type for "cmpxchg".
+ Backends should be unaffected by default.
llvm-svn: 210903
2014-06-13 22:24:07 +08:00
|
|
|
// Most backends expect to see the node which just returns the value loaded.
|
2015-01-08 05:27:10 +08:00
|
|
|
setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Expand);
|
IR: add "cmpxchg weak" variant to support permitted failure.
This commit adds a weak variant of the cmpxchg operation, as described
in C++11. A cmpxchg instruction with this modifier is permitted to
fail to store, even if the comparison indicated it should.
As a result, cmpxchg instructions must return a flag indicating
success in addition to their original iN value loaded. Thus, for
uniformity *all* cmpxchg instructions now return "{ iN, i1 }". The
second flag is 1 when the store succeeded.
At the DAG level, a new ATOMIC_CMP_SWAP_WITH_SUCCESS node has been
added as the natural representation for the new cmpxchg instructions.
It is a strong cmpxchg.
By default this gets Expanded to the existing ATOMIC_CMP_SWAP during
Legalization, so existing backends should see no change in behaviour.
If they wish to deal with the enhanced node instead, they can call
setOperationAction on it. Beware: as a node with 2 results, it cannot
be selected from TableGen.
Currently, no use is made of the extra information provided in this
patch. Test updates are almost entirely adapting the input IR to the
new scheme.
Summary for out of tree users:
------------------------------
+ Legacy Bitcode files are upgraded during read.
+ Legacy assembly IR files will be invalid.
+ Front-ends must adapt to different type for "cmpxchg".
+ Backends should be unaffected by default.
llvm-svn: 210903
2014-06-13 22:24:07 +08:00
|
|
|
|
2013-01-12 04:05:37 +08:00
|
|
|
// These operations default to expand.
|
2015-01-08 05:27:10 +08:00
|
|
|
setOperationAction(ISD::FGETSIGN, VT, Expand);
|
|
|
|
setOperationAction(ISD::CONCAT_VECTORS, VT, Expand);
|
|
|
|
setOperationAction(ISD::FMINNUM, VT, Expand);
|
|
|
|
setOperationAction(ISD::FMAXNUM, VT, Expand);
|
2018-10-23 00:27:27 +08:00
|
|
|
setOperationAction(ISD::FMINNUM_IEEE, VT, Expand);
|
|
|
|
setOperationAction(ISD::FMAXNUM_IEEE, VT, Expand);
|
[NFC] Rename minnan and maxnan to minimum and maximum
Summary:
Changes all uses of minnan/maxnan to minimum/maximum
globally. These names emphasize that the semantic difference between
these operations is more than just NaN-propagation.
Reviewers: arsenm, aheejin, dschuff, javed.absar
Subscribers: jholewinski, sdardis, wdng, sbc100, jgravelle-google, jrtc27, atanasyan, llvm-commits
Differential Revision: https://reviews.llvm.org/D53112
llvm-svn: 345218
2018-10-25 06:49:55 +08:00
|
|
|
setOperationAction(ISD::FMINIMUM, VT, Expand);
|
|
|
|
setOperationAction(ISD::FMAXIMUM, VT, Expand);
|
2015-02-21 06:10:33 +08:00
|
|
|
setOperationAction(ISD::FMAD, VT, Expand);
|
2015-05-15 17:03:15 +08:00
|
|
|
setOperationAction(ISD::SMIN, VT, Expand);
|
|
|
|
setOperationAction(ISD::SMAX, VT, Expand);
|
|
|
|
setOperationAction(ISD::UMIN, VT, Expand);
|
|
|
|
setOperationAction(ISD::UMAX, VT, Expand);
|
2017-03-15 05:26:58 +08:00
|
|
|
setOperationAction(ISD::ABS, VT, Expand);
|
2018-12-05 19:12:12 +08:00
|
|
|
setOperationAction(ISD::FSHL, VT, Expand);
|
|
|
|
setOperationAction(ISD::FSHR, VT, Expand);
|
2018-10-17 01:35:41 +08:00
|
|
|
setOperationAction(ISD::SADDSAT, VT, Expand);
|
2018-10-23 07:08:40 +08:00
|
|
|
setOperationAction(ISD::UADDSAT, VT, Expand);
|
2018-10-30 00:54:37 +08:00
|
|
|
setOperationAction(ISD::SSUBSAT, VT, Expand);
|
|
|
|
setOperationAction(ISD::USUBSAT, VT, Expand);
|
2018-12-12 14:29:14 +08:00
|
|
|
setOperationAction(ISD::SMULFIX, VT, Expand);
|
2019-05-22 03:17:19 +08:00
|
|
|
setOperationAction(ISD::SMULFIXSAT, VT, Expand);
|
2019-02-05 01:18:11 +08:00
|
|
|
setOperationAction(ISD::UMULFIX, VT, Expand);
|
[Intrinsic] Add the llvm.umul.fix.sat intrinsic
Summary:
Add an intrinsic that takes 2 unsigned integers with
the scale of them provided as the third argument and
performs fixed point multiplication on them. The
result is saturated and clamped between the largest and
smallest representable values of the first 2 operands.
This is a part of implementing fixed point arithmetic
in clang where some of the more complex operations
will be implemented as intrinsics.
Patch by: leonardchan, bjope
Reviewers: RKSimon, craig.topper, bevinh, leonardchan, lebedev.ri, spatel
Reviewed By: leonardchan
Subscribers: ychen, wuzish, nemanjai, MaskRay, jsji, jdoerfert, Ka-Ka, hiraditya, rjmccall, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57836
llvm-svn: 371308
2019-09-07 20:16:14 +08:00
|
|
|
setOperationAction(ISD::UMULFIXSAT, VT, Expand);
|
2020-01-08 22:05:03 +08:00
|
|
|
setOperationAction(ISD::SDIVFIX, VT, Expand);
|
2019-12-16 22:25:52 +08:00
|
|
|
setOperationAction(ISD::SDIVFIXSAT, VT, Expand);
|
2020-01-08 22:05:03 +08:00
|
|
|
setOperationAction(ISD::UDIVFIX, VT, Expand);
|
2019-12-16 22:25:52 +08:00
|
|
|
setOperationAction(ISD::UDIVFIXSAT, VT, Expand);
|
2013-08-09 12:13:44 +08:00
|
|
|
|
2015-04-30 00:30:46 +08:00
|
|
|
// Overflow operations default to expand
|
|
|
|
setOperationAction(ISD::SADDO, VT, Expand);
|
|
|
|
setOperationAction(ISD::SSUBO, VT, Expand);
|
|
|
|
setOperationAction(ISD::UADDO, VT, Expand);
|
|
|
|
setOperationAction(ISD::USUBO, VT, Expand);
|
|
|
|
setOperationAction(ISD::SMULO, VT, Expand);
|
|
|
|
setOperationAction(ISD::UMULO, VT, Expand);
|
2015-12-12 07:11:52 +08:00
|
|
|
|
2017-05-01 03:24:09 +08:00
|
|
|
// ADDCARRY operations default to expand
|
|
|
|
setOperationAction(ISD::ADDCARRY, VT, Expand);
|
|
|
|
setOperationAction(ISD::SUBCARRY, VT, Expand);
|
2017-06-01 19:14:17 +08:00
|
|
|
setOperationAction(ISD::SETCCCARRY, VT, Expand);
|
2017-05-01 03:24:09 +08:00
|
|
|
|
Set ADDE/ADDC/SUBE/SUBC to expand by default
Summary:
They've been deprecated in favor of UADDO/ADDCARRY or USUBO/SUBCARRY for a while.
Target that uses these opcodes are changed in order to ensure their behavior doesn't change.
Reviewers: efriedma, craig.topper, dblaikie, bkramer
Subscribers: jholewinski, arsenm, jyknight, sdardis, nemanjai, nhaehnle, kbarton, fedor.sergeev, asb, rbar, johnrusso, simoncook, jordy.potman.lists, apazos, sabuasal, niosHD, jrtc27, zzheng, edward-jones, mgrang, atanasyan, llvm-commits
Differential Revision: https://reviews.llvm.org/D47422
llvm-svn: 333748
2018-06-01 21:21:33 +08:00
|
|
|
// ADDC/ADDE/SUBC/SUBE default to expand.
|
|
|
|
setOperationAction(ISD::ADDC, VT, Expand);
|
|
|
|
setOperationAction(ISD::ADDE, VT, Expand);
|
|
|
|
setOperationAction(ISD::SUBC, VT, Expand);
|
|
|
|
setOperationAction(ISD::SUBE, VT, Expand);
|
|
|
|
|
2016-04-28 11:34:31 +08:00
|
|
|
// These default to Expand so they will be expanded to CTLZ/CTTZ by default.
|
|
|
|
setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
|
|
|
|
setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
|
|
|
|
|
2015-11-12 20:29:09 +08:00
|
|
|
setOperationAction(ISD::BITREVERSE, VT, Expand);
|
2018-07-31 03:41:25 +08:00
|
|
|
|
2013-08-09 12:13:44 +08:00
|
|
|
// These library functions default to expand.
|
2015-01-08 05:27:10 +08:00
|
|
|
setOperationAction(ISD::FROUND, VT, Expand);
|
2020-05-26 20:24:05 +08:00
|
|
|
setOperationAction(ISD::FROUNDEVEN, VT, Expand);
|
2017-05-30 23:27:55 +08:00
|
|
|
setOperationAction(ISD::FPOWI, VT, Expand);
|
Add a llvm.copysign intrinsic
This adds a llvm.copysign intrinsic; We already have Libfunc recognition for
copysign (which is turned into the FCOPYSIGN SDAG node). In order to
autovectorize calls to copysign in the loop vectorizer, we need a corresponding
intrinsic as well.
In addition to the expected changes to the language reference, the loop
vectorizer, BasicTTI, and the SDAG builder (the intrinsic is transformed into
an FCOPYSIGN node, just like the function call), this also adds FCOPYSIGN to a
few lists in LegalizeVector{Ops,Types} so that vector copysigns can be
expanded.
In TargetLoweringBase::initActions, I've made the default action for FCOPYSIGN
be Expand for vector types. This seems correct for all in-tree targets, and I
think is the right thing to do because, previously, there was no way to generate
vector-values FCOPYSIGN nodes (and most targets don't specify an action for
vector-typed FCOPYSIGN).
llvm-svn: 188728
2013-08-20 07:35:46 +08:00
|
|
|
|
|
|
|
// These operations default to expand for vector types.
|
2015-01-08 05:27:10 +08:00
|
|
|
if (VT.isVector()) {
|
|
|
|
setOperationAction(ISD::FCOPYSIGN, VT, Expand);
|
2020-01-03 11:26:41 +08:00
|
|
|
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
|
2015-01-08 05:27:10 +08:00
|
|
|
setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, VT, Expand);
|
|
|
|
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Expand);
|
|
|
|
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand);
|
2019-10-18 19:48:35 +08:00
|
|
|
setOperationAction(ISD::SPLAT_VECTOR, VT, Expand);
|
2014-07-10 06:53:04 +08:00
|
|
|
}
|
2015-12-01 19:40:55 +08:00
|
|
|
|
2019-06-06 06:33:10 +08:00
|
|
|
// Constrained floating-point operations default to expand.
|
2020-01-17 10:32:30 +08:00
|
|
|
#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
|
2019-11-05 21:42:16 +08:00
|
|
|
setOperationAction(ISD::STRICT_##DAGN, VT, Expand);
|
|
|
|
#include "llvm/IR/ConstrainedOps.def"
|
2019-06-06 06:33:10 +08:00
|
|
|
|
[stack-protection] Add support for MSVC buffer security check
Summary:
This patch is adding support for the MSVC buffer security check implementation
The buffer security check is turned on with the '/GS' compiler switch.
* https://msdn.microsoft.com/en-us/library/8dbf701c.aspx
* To be added to clang here: http://reviews.llvm.org/D20347
Some overview of buffer security check feature and implementation:
* https://msdn.microsoft.com/en-us/library/aa290051(VS.71).aspx
* http://www.ksyash.com/2011/01/buffer-overflow-protection-3/
* http://blog.osom.info/2012/02/understanding-vs-c-compilers-buffer.html
For the following example:
```
int example(int offset, int index) {
char buffer[10];
memset(buffer, 0xCC, index);
return buffer[index];
}
```
The MSVC compiler is adding these instructions to perform stack integrity check:
```
push ebp
mov ebp,esp
sub esp,50h
[1] mov eax,dword ptr [__security_cookie (01068024h)]
[2] xor eax,ebp
[3] mov dword ptr [ebp-4],eax
push ebx
push esi
push edi
mov eax,dword ptr [index]
push eax
push 0CCh
lea ecx,[buffer]
push ecx
call _memset (010610B9h)
add esp,0Ch
mov eax,dword ptr [index]
movsx eax,byte ptr buffer[eax]
pop edi
pop esi
pop ebx
[4] mov ecx,dword ptr [ebp-4]
[5] xor ecx,ebp
[6] call @__security_check_cookie@4 (01061276h)
mov esp,ebp
pop ebp
ret
```
The instrumentation above is:
* [1] is loading the global security canary,
* [3] is storing the local computed ([2]) canary to the guard slot,
* [4] is loading the guard slot and ([5]) re-compute the global canary,
* [6] is validating the resulting canary with the '__security_check_cookie' and performs error handling.
Overview of the current stack-protection implementation:
* lib/CodeGen/StackProtector.cpp
* There is a default stack-protection implementation applied on intermediate representation.
* The target can overload 'getIRStackGuard' method if it has a standard location for the stack protector cookie.
* An intrinsic 'Intrinsic::stackprotector' is added to the prologue. It will be expanded by the instruction selection pass (DAG or Fast).
* Basic Blocks are added to every instrumented function to receive the code for handling stack guard validation and errors handling.
* Guard manipulation and comparison are added directly to the intermediate representation.
* lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
* lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
* There is an implementation that adds instrumentation during instruction selection (for better handling of sibbling calls).
* see long comment above 'class StackProtectorDescriptor' declaration.
* The target needs to override 'getSDagStackGuard' to activate SDAG stack protection generation. (note: getIRStackGuard MUST be nullptr).
* 'getSDagStackGuard' returns the appropriate stack guard (security cookie)
* The code is generated by 'SelectionDAGBuilder.cpp' and 'SelectionDAGISel.cpp'.
* include/llvm/Target/TargetLowering.h
* Contains function to retrieve the default Guard 'Value'; should be overriden by each target to select which implementation is used and provide Guard 'Value'.
* lib/Target/X86/X86ISelLowering.cpp
* Contains the x86 specialisation; Guard 'Value' used by the SelectionDAG algorithm.
Function-based Instrumentation:
* The MSVC doesn't inline the stack guard comparison in every function. Instead, a call to '__security_check_cookie' is added to the epilogue before every return instructions.
* To support function-based instrumentation, this patch is
* adding a function to get the function-based check (llvm 'Value', see include/llvm/Target/TargetLowering.h),
* If provided, the stack protection instrumentation won't be inlined and a call to that function will be added to the prologue.
* modifying (SelectionDAGISel.cpp) do avoid producing basic blocks used for inline instrumentation,
* generating the function-based instrumentation during the ISEL pass (SelectionDAGBuilder.cpp),
* if FastISEL (not SelectionDAG), using the fallback which rely on the same function-based implemented over intermediate representation (StackProtector.cpp).
Modifications
* adding support for MSVC (lib/Target/X86/X86ISelLowering.cpp)
* adding support function-based instrumentation (lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp, .h)
Results
* IR generated instrumentation:
```
clang-cl /GS test.cc /Od /c -mllvm -print-isel-input
```
```
*** Final LLVM Code input to ISel ***
; Function Attrs: nounwind sspstrong
define i32 @"\01?example@@YAHHH@Z"(i32 %offset, i32 %index) #0 {
entry:
%StackGuardSlot = alloca i8* <<<-- Allocated guard slot
%0 = call i8* @llvm.stackguard() <<<-- Loading Stack Guard value
call void @llvm.stackprotector(i8* %0, i8** %StackGuardSlot) <<<-- Prologue intrinsic call (store to Guard slot)
%index.addr = alloca i32, align 4
%offset.addr = alloca i32, align 4
%buffer = alloca [10 x i8], align 1
store i32 %index, i32* %index.addr, align 4
store i32 %offset, i32* %offset.addr, align 4
%arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %buffer, i32 0, i32 0
%1 = load i32, i32* %index.addr, align 4
call void @llvm.memset.p0i8.i32(i8* %arraydecay, i8 -52, i32 %1, i32 1, i1 false)
%2 = load i32, i32* %index.addr, align 4
%arrayidx = getelementptr inbounds [10 x i8], [10 x i8]* %buffer, i32 0, i32 %2
%3 = load i8, i8* %arrayidx, align 1
%conv = sext i8 %3 to i32
%4 = load volatile i8*, i8** %StackGuardSlot <<<-- Loading Guard slot
call void @__security_check_cookie(i8* %4) <<<-- Epilogue function-based check
ret i32 %conv
}
```
* SelectionDAG generated instrumentation:
```
clang-cl /GS test.cc /O1 /c /FA
```
```
"?example@@YAHHH@Z": # @"\01?example@@YAHHH@Z"
# BB#0: # %entry
pushl %esi
subl $16, %esp
movl ___security_cookie, %eax <<<-- Loading Stack Guard value
movl 28(%esp), %esi
movl %eax, 12(%esp) <<<-- Store to Guard slot
leal 2(%esp), %eax
pushl %esi
pushl $204
pushl %eax
calll _memset
addl $12, %esp
movsbl 2(%esp,%esi), %esi
movl 12(%esp), %ecx <<<-- Loading Guard slot
calll @__security_check_cookie@4 <<<-- Epilogue function-based check
movl %esi, %eax
addl $16, %esp
popl %esi
retl
```
Reviewers: kcc, pcc, eugenis, rnk
Subscribers: majnemer, llvm-commits, hans, thakis, rnk
Differential Revision: http://reviews.llvm.org/D20346
llvm-svn: 272053
2016-06-08 04:15:35 +08:00
|
|
|
// For most targets @llvm.get.dynamic.area.offset just returns 0.
|
2015-12-01 19:40:55 +08:00
|
|
|
setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand);
|
[SDAG][AArch64] Legalize VECREDUCE
Fixes https://bugs.llvm.org/show_bug.cgi?id=36796.
Implement basic legalizations (PromoteIntRes, PromoteIntOp,
ExpandIntRes, ScalarizeVecOp, WidenVecOp) for VECREDUCE opcodes.
There are more legalizations missing (esp float legalizations),
but there's no way to test them right now, so I'm not adding them.
This also includes a few more changes to make this work somewhat
reasonably:
* Add support for expanding VECREDUCE in SDAG. Usually
experimental.vector.reduce is expanded prior to codegen, but if the
target does have native vector reduce, it may of course still be
necessary to expand due to legalization issues. This uses a shuffle
reduction if possible, followed by a naive scalar reduction.
* Allow the result type of integer VECREDUCE to be larger than the
vector element type. For example we need to be able to reduce a v8i8
into an (nominally) i32 result type on AArch64.
* Use the vector operand type rather than the scalar result type to
determine the action, so we can control exactly which vector types are
supported. Also change the legalize vector op code to handle
operations that only have vector operands, but no vector results, as
is the case for VECREDUCE.
* Default VECREDUCE to Expand. On AArch64 (only target using VECREDUCE),
explicitly specify for which vector types the reductions are supported.
This does not handle anything related to VECREDUCE_STRICT_*.
Differential Revision: https://reviews.llvm.org/D58015
llvm-svn: 355860
2019-03-12 04:22:13 +08:00
|
|
|
|
|
|
|
// Vector reduction default to expand.
|
|
|
|
setOperationAction(ISD::VECREDUCE_FADD, VT, Expand);
|
|
|
|
setOperationAction(ISD::VECREDUCE_FMUL, VT, Expand);
|
|
|
|
setOperationAction(ISD::VECREDUCE_ADD, VT, Expand);
|
|
|
|
setOperationAction(ISD::VECREDUCE_MUL, VT, Expand);
|
|
|
|
setOperationAction(ISD::VECREDUCE_AND, VT, Expand);
|
|
|
|
setOperationAction(ISD::VECREDUCE_OR, VT, Expand);
|
|
|
|
setOperationAction(ISD::VECREDUCE_XOR, VT, Expand);
|
|
|
|
setOperationAction(ISD::VECREDUCE_SMAX, VT, Expand);
|
|
|
|
setOperationAction(ISD::VECREDUCE_SMIN, VT, Expand);
|
|
|
|
setOperationAction(ISD::VECREDUCE_UMAX, VT, Expand);
|
|
|
|
setOperationAction(ISD::VECREDUCE_UMIN, VT, Expand);
|
|
|
|
setOperationAction(ISD::VECREDUCE_FMAX, VT, Expand);
|
|
|
|
setOperationAction(ISD::VECREDUCE_FMIN, VT, Expand);
|
2013-01-12 04:05:37 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Most targets ignore the @llvm.prefetch intrinsic.
|
|
|
|
setOperationAction(ISD::PREFETCH, MVT::Other, Expand);
|
|
|
|
|
2015-08-28 09:49:59 +08:00
|
|
|
// Most targets also ignore the @llvm.readcyclecounter intrinsic.
|
|
|
|
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Expand);
|
|
|
|
|
2013-01-12 04:05:37 +08:00
|
|
|
// ConstantFP nodes default to expand. Targets can either change this to
|
|
|
|
// Legal, in which case all fp constants are legal, or use isFPImmLegal()
|
|
|
|
// to optimize expansions for certain constants.
|
|
|
|
setOperationAction(ISD::ConstantFP, MVT::f16, Expand);
|
|
|
|
setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
|
|
|
|
setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
|
|
|
|
setOperationAction(ISD::ConstantFP, MVT::f80, Expand);
|
|
|
|
setOperationAction(ISD::ConstantFP, MVT::f128, Expand);
|
|
|
|
|
|
|
|
// These library functions default to expand.
|
2015-03-27 07:21:03 +08:00
|
|
|
for (MVT VT : {MVT::f32, MVT::f64, MVT::f128}) {
|
[DAGCombiner] try to convert pow(x, 1/3) to cbrt(x)
This is a follow-up suggested in D51630 and originally proposed as an IR transform in D49040.
Copying the motivational statement by @evandro from that patch:
"This transformation helps some benchmarks in SPEC CPU2000 and CPU2006, such as 188.ammp,
447.dealII, 453.povray, and especially 300.twolf, as well as some proprietary benchmarks.
Otherwise, no regressions on x86-64 or A64."
I'm proposing to add only the minimum support for a DAG node here. Since we don't have an
LLVM IR intrinsic for cbrt, and there are no other DAG ways to create a FCBRT node yet, I
don't think we need to worry about DAG builder, legalization, a strict variant, etc. We
should be able to expand as needed when adding more functionality/transforms. For reference,
these are transform suggestions currently listed in SimplifyLibCalls.cpp:
// * cbrt(expN(X)) -> expN(x/3)
// * cbrt(sqrt(x)) -> pow(x,1/6)
// * cbrt(cbrt(x)) -> pow(x,1/9)
Also, given that we bail out on long double for now, there should not be any logical
differences between platforms (unless there's some platform out there that has pow()
but not cbrt()).
Differential Revision: https://reviews.llvm.org/D51753
llvm-svn: 342348
2018-09-17 00:50:26 +08:00
|
|
|
setOperationAction(ISD::FCBRT, VT, Expand);
|
2015-03-27 07:21:03 +08:00
|
|
|
setOperationAction(ISD::FLOG , VT, Expand);
|
|
|
|
setOperationAction(ISD::FLOG2, VT, Expand);
|
|
|
|
setOperationAction(ISD::FLOG10, VT, Expand);
|
|
|
|
setOperationAction(ISD::FEXP , VT, Expand);
|
|
|
|
setOperationAction(ISD::FEXP2, VT, Expand);
|
|
|
|
setOperationAction(ISD::FFLOOR, VT, Expand);
|
|
|
|
setOperationAction(ISD::FNEARBYINT, VT, Expand);
|
|
|
|
setOperationAction(ISD::FCEIL, VT, Expand);
|
|
|
|
setOperationAction(ISD::FRINT, VT, Expand);
|
|
|
|
setOperationAction(ISD::FTRUNC, VT, Expand);
|
|
|
|
setOperationAction(ISD::FROUND, VT, Expand);
|
2020-05-26 20:24:05 +08:00
|
|
|
setOperationAction(ISD::FROUNDEVEN, VT, Expand);
|
2019-05-16 21:15:27 +08:00
|
|
|
setOperationAction(ISD::LROUND, VT, Expand);
|
|
|
|
setOperationAction(ISD::LLROUND, VT, Expand);
|
2019-05-29 04:47:44 +08:00
|
|
|
setOperationAction(ISD::LRINT, VT, Expand);
|
|
|
|
setOperationAction(ISD::LLRINT, VT, Expand);
|
2015-03-27 07:21:03 +08:00
|
|
|
}
|
2013-01-12 04:05:37 +08:00
|
|
|
|
|
|
|
// Default ISD::TRAP to expand (which turns it into abort).
|
|
|
|
setOperationAction(ISD::TRAP, MVT::Other, Expand);
|
|
|
|
|
|
|
|
// On most systems, DEBUGTRAP and TRAP have no difference. The "Expand"
|
|
|
|
// here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP.
|
|
|
|
setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand);
|
|
|
|
}
|
|
|
|
|
2015-07-09 23:12:23 +08:00
|
|
|
MVT TargetLoweringBase::getScalarShiftAmountTy(const DataLayout &DL,
|
|
|
|
EVT) const {
|
2019-05-09 16:07:36 +08:00
|
|
|
return MVT::getIntegerVT(DL.getPointerSizeInBits(0));
|
2013-01-12 04:05:37 +08:00
|
|
|
}
|
|
|
|
|
2018-02-21 01:41:05 +08:00
|
|
|
EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy, const DataLayout &DL,
|
|
|
|
bool LegalTypes) const {
|
2013-03-02 02:40:30 +08:00
|
|
|
assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
|
|
|
|
if (LHSTy.isVector())
|
|
|
|
return LHSTy;
|
2018-02-21 01:41:05 +08:00
|
|
|
return LegalTypes ? getScalarShiftAmountTy(DL, LHSTy)
|
|
|
|
: getPointerTy(DL);
|
2013-03-02 02:40:30 +08:00
|
|
|
}
|
|
|
|
|
2013-01-12 04:05:37 +08:00
|
|
|
bool TargetLoweringBase::canOpTrap(unsigned Op, EVT VT) const {
|
|
|
|
assert(isTypeLegal(VT));
|
|
|
|
switch (Op) {
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
case ISD::SDIV:
|
|
|
|
case ISD::UDIV:
|
|
|
|
case ISD::SREM:
|
|
|
|
case ISD::UREM:
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-07-02 02:10:20 +08:00
|
|
|
void TargetLoweringBase::setJumpIsExpensive(bool isExpensive) {
|
|
|
|
// If the command-line option was specified, ignore this request.
|
|
|
|
if (!JumpIsExpensiveOverride.getNumOccurrences())
|
|
|
|
JumpIsExpensive = isExpensive;
|
|
|
|
}
|
|
|
|
|
2015-02-26 06:41:30 +08:00
|
|
|
TargetLoweringBase::LegalizeKind
|
|
|
|
TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
|
|
|
|
// If this is a simple type, use the ComputeRegisterProp mechanism.
|
|
|
|
if (VT.isSimple()) {
|
|
|
|
MVT SVT = VT.getSimpleVT();
|
|
|
|
assert((unsigned)SVT.SimpleTy < array_lengthof(TransformToType));
|
|
|
|
MVT NVT = TransformToType[SVT.SimpleTy];
|
|
|
|
LegalizeTypeAction LA = ValueTypeActions.getTypeAction(SVT);
|
|
|
|
|
|
|
|
assert((LA == TypeLegal || LA == TypeSoftenFloat ||
|
2020-02-01 14:42:07 +08:00
|
|
|
LA == TypeSoftPromoteHalf ||
|
2019-08-16 02:58:25 +08:00
|
|
|
(NVT.isVector() ||
|
|
|
|
ValueTypeActions.getTypeAction(NVT) != TypePromoteInteger)) &&
|
2015-02-26 06:41:30 +08:00
|
|
|
"Promote may not follow Expand or Promote");
|
|
|
|
|
|
|
|
if (LA == TypeSplitVector)
|
2020-06-05 23:03:13 +08:00
|
|
|
return LegalizeKind(LA,
|
|
|
|
EVT::getVectorVT(Context, SVT.getVectorElementType(),
|
2020-06-05 23:46:25 +08:00
|
|
|
SVT.getVectorElementCount() / 2));
|
2015-02-26 06:41:30 +08:00
|
|
|
if (LA == TypeScalarizeVector)
|
|
|
|
return LegalizeKind(LA, SVT.getVectorElementType());
|
|
|
|
return LegalizeKind(LA, NVT);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Handle Extended Scalar Types.
|
|
|
|
if (!VT.isVector()) {
|
|
|
|
assert(VT.isInteger() && "Float types must be simple");
|
|
|
|
unsigned BitSize = VT.getSizeInBits();
|
|
|
|
// First promote to a power-of-two size, then expand if necessary.
|
|
|
|
if (BitSize < 8 || !isPowerOf2_32(BitSize)) {
|
|
|
|
EVT NVT = VT.getRoundIntegerType(Context);
|
|
|
|
assert(NVT != VT && "Unable to round integer VT");
|
|
|
|
LegalizeKind NextStep = getTypeConversion(Context, NVT);
|
|
|
|
// Avoid multi-step promotion.
|
|
|
|
if (NextStep.first == TypePromoteInteger)
|
|
|
|
return NextStep;
|
|
|
|
// Return rounded integer type.
|
|
|
|
return LegalizeKind(TypePromoteInteger, NVT);
|
|
|
|
}
|
|
|
|
|
|
|
|
return LegalizeKind(TypeExpandInteger,
|
|
|
|
EVT::getIntegerVT(Context, VT.getSizeInBits() / 2));
|
|
|
|
}
|
|
|
|
|
|
|
|
// Handle vector types.
|
2020-06-05 23:46:25 +08:00
|
|
|
ElementCount NumElts = VT.getVectorElementCount();
|
2015-02-26 06:41:30 +08:00
|
|
|
EVT EltVT = VT.getVectorElementType();
|
|
|
|
|
|
|
|
// Vectors with only one element are always scalarized.
|
|
|
|
if (NumElts == 1)
|
|
|
|
return LegalizeKind(TypeScalarizeVector, EltVT);
|
|
|
|
|
2020-06-05 23:46:25 +08:00
|
|
|
if (VT.getVectorElementCount() == ElementCount(1, true))
|
|
|
|
report_fatal_error("Cannot legalize this vector");
|
|
|
|
|
2015-02-26 06:41:30 +08:00
|
|
|
// Try to widen vector elements until the element type is a power of two and
|
|
|
|
// promote it to a legal type later on, for example:
|
|
|
|
// <3 x i8> -> <4 x i8> -> <4 x i32>
|
|
|
|
if (EltVT.isInteger()) {
|
|
|
|
// Vectors with a number of elements that is not a power of two are always
|
|
|
|
// widened, for example <3 x i8> -> <4 x i8>.
|
|
|
|
if (!VT.isPow2VectorType()) {
|
2020-06-05 23:46:25 +08:00
|
|
|
NumElts = NumElts.NextPowerOf2();
|
2015-02-26 06:41:30 +08:00
|
|
|
EVT NVT = EVT::getVectorVT(Context, EltVT, NumElts);
|
|
|
|
return LegalizeKind(TypeWidenVector, NVT);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Examine the element type.
|
|
|
|
LegalizeKind LK = getTypeConversion(Context, EltVT);
|
|
|
|
|
|
|
|
// If type is to be expanded, split the vector.
|
|
|
|
// <4 x i140> -> <2 x i140>
|
|
|
|
if (LK.first == TypeExpandInteger)
|
|
|
|
return LegalizeKind(TypeSplitVector,
|
|
|
|
EVT::getVectorVT(Context, EltVT, NumElts / 2));
|
|
|
|
|
|
|
|
// Promote the integer element types until a legal vector type is found
|
|
|
|
// or until the element integer type is too big. If a legal type was not
|
|
|
|
// found, fallback to the usual mechanism of widening/splitting the
|
|
|
|
// vector.
|
|
|
|
EVT OldEltVT = EltVT;
|
2017-09-22 07:20:16 +08:00
|
|
|
while (true) {
|
2015-02-26 06:41:30 +08:00
|
|
|
// Increase the bitwidth of the element to the next pow-of-two
|
|
|
|
// (which is greater than 8 bits).
|
|
|
|
EltVT = EVT::getIntegerVT(Context, 1 + EltVT.getSizeInBits())
|
|
|
|
.getRoundIntegerType(Context);
|
|
|
|
|
|
|
|
// Stop trying when getting a non-simple element type.
|
|
|
|
// Note that vector elements may be greater than legal vector element
|
|
|
|
// types. Example: X86 XMM registers hold 64bit element on 32bit
|
|
|
|
// systems.
|
|
|
|
if (!EltVT.isSimple())
|
|
|
|
break;
|
|
|
|
|
|
|
|
// Build a new vector type and check if it is legal.
|
|
|
|
MVT NVT = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts);
|
|
|
|
// Found a legal promoted vector type.
|
|
|
|
if (NVT != MVT() && ValueTypeActions.getTypeAction(NVT) == TypeLegal)
|
|
|
|
return LegalizeKind(TypePromoteInteger,
|
|
|
|
EVT::getVectorVT(Context, EltVT, NumElts));
|
|
|
|
}
|
|
|
|
|
|
|
|
// Reset the type to the unexpanded type if we did not find a legal vector
|
|
|
|
// type with a promoted vector element type.
|
|
|
|
EltVT = OldEltVT;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Try to widen the vector until a legal type is found.
|
|
|
|
// If there is no wider legal type, split the vector.
|
2017-09-22 07:20:16 +08:00
|
|
|
while (true) {
|
2015-02-26 06:41:30 +08:00
|
|
|
// Round up to the next power of 2.
|
2020-06-05 23:46:25 +08:00
|
|
|
NumElts = NumElts.NextPowerOf2();
|
2015-02-26 06:41:30 +08:00
|
|
|
|
|
|
|
// If there is no simple vector type with this many elements then there
|
|
|
|
// cannot be a larger legal vector type. Note that this assumes that
|
|
|
|
// there are no skipped intermediate vector types in the simple types.
|
|
|
|
if (!EltVT.isSimple())
|
|
|
|
break;
|
|
|
|
MVT LargerVector = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts);
|
|
|
|
if (LargerVector == MVT())
|
|
|
|
break;
|
|
|
|
|
|
|
|
// If this type is legal then widen the vector.
|
|
|
|
if (ValueTypeActions.getTypeAction(LargerVector) == TypeLegal)
|
|
|
|
return LegalizeKind(TypeWidenVector, LargerVector);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Widen odd vectors to next power of two.
|
|
|
|
if (!VT.isPow2VectorType()) {
|
|
|
|
EVT NVT = VT.getPow2VectorType(Context);
|
|
|
|
return LegalizeKind(TypeWidenVector, NVT);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Vectors with illegal element types are expanded.
|
2020-06-05 23:46:25 +08:00
|
|
|
EVT NVT = EVT::getVectorVT(Context, EltVT, VT.getVectorElementCount() / 2);
|
2015-02-26 06:41:30 +08:00
|
|
|
return LegalizeKind(TypeSplitVector, NVT);
|
|
|
|
}
|
2013-01-12 04:05:37 +08:00
|
|
|
|
|
|
|
static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
|
|
|
|
unsigned &NumIntermediates,
|
|
|
|
MVT &RegisterVT,
|
|
|
|
TargetLoweringBase *TLI) {
|
|
|
|
// Figure out the right, legal destination reg to copy into.
|
2020-04-10 07:10:52 +08:00
|
|
|
ElementCount EC = VT.getVectorElementCount();
|
2013-01-12 04:05:37 +08:00
|
|
|
MVT EltTy = VT.getVectorElementType();
|
|
|
|
|
|
|
|
unsigned NumVectorRegs = 1;
|
|
|
|
|
2020-06-06 01:29:43 +08:00
|
|
|
// Scalable vectors cannot be scalarized, so splitting or widening is
|
|
|
|
// required.
|
|
|
|
if (VT.isScalableVector() && !isPowerOf2_32(EC.Min))
|
|
|
|
llvm_unreachable(
|
|
|
|
"Splitting or widening of non-power-of-2 MVTs is not implemented.");
|
|
|
|
|
2020-04-10 07:10:52 +08:00
|
|
|
// FIXME: We don't support non-power-of-2-sized vectors for now.
|
|
|
|
// Ideally we could break down into LHS/RHS like LegalizeDAG does.
|
|
|
|
if (!isPowerOf2_32(EC.Min)) {
|
|
|
|
// Split EC to unit size (scalable property is preserved).
|
|
|
|
NumVectorRegs = EC.Min;
|
|
|
|
EC = EC / NumVectorRegs;
|
2013-01-12 04:05:37 +08:00
|
|
|
}
|
|
|
|
|
2020-04-10 07:10:52 +08:00
|
|
|
// Divide the input until we get to a supported size. This will
|
|
|
|
// always end up with an EC that represent a scalar or a scalable
|
|
|
|
// scalar.
|
|
|
|
while (EC.Min > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, EC))) {
|
|
|
|
EC.Min >>= 1;
|
2013-01-12 04:05:37 +08:00
|
|
|
NumVectorRegs <<= 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
NumIntermediates = NumVectorRegs;
|
|
|
|
|
2020-04-10 07:10:52 +08:00
|
|
|
MVT NewVT = MVT::getVectorVT(EltTy, EC);
|
2013-01-12 04:05:37 +08:00
|
|
|
if (!TLI->isTypeLegal(NewVT))
|
|
|
|
NewVT = EltTy;
|
|
|
|
IntermediateVT = NewVT;
|
|
|
|
|
2020-04-10 07:10:52 +08:00
|
|
|
unsigned LaneSizeInBits = NewVT.getScalarSizeInBits().getFixedSize();
|
2013-01-12 04:05:37 +08:00
|
|
|
|
|
|
|
// Convert sizes such as i33 to i64.
|
2020-04-10 07:10:52 +08:00
|
|
|
if (!isPowerOf2_32(LaneSizeInBits))
|
|
|
|
LaneSizeInBits = NextPowerOf2(LaneSizeInBits);
|
2013-01-12 04:05:37 +08:00
|
|
|
|
|
|
|
MVT DestVT = TLI->getRegisterType(NewVT);
|
|
|
|
RegisterVT = DestVT;
|
|
|
|
if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16.
|
2020-04-10 07:10:52 +08:00
|
|
|
return NumVectorRegs *
|
|
|
|
(LaneSizeInBits / DestVT.getScalarSizeInBits().getFixedSize());
|
2013-01-12 04:05:37 +08:00
|
|
|
|
|
|
|
// Otherwise, promotion or legal types use the same number of registers as
|
|
|
|
// the vector decimated to the appropriate level.
|
|
|
|
return NumVectorRegs;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// isLegalRC - Return true if the value types that can be represented by the
|
|
|
|
/// specified register class are all legal.
|
2017-04-25 03:51:12 +08:00
|
|
|
bool TargetLoweringBase::isLegalRC(const TargetRegisterInfo &TRI,
|
|
|
|
const TargetRegisterClass &RC) const {
|
|
|
|
for (auto I = TRI.legalclasstypes_begin(RC); *I != MVT::Other; ++I)
|
2013-01-12 04:05:37 +08:00
|
|
|
if (isTypeLegal(*I))
|
|
|
|
return true;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2013-11-29 11:07:54 +08:00
|
|
|
/// Replace/modify any TargetFrameIndex operands with a targte-dependent
|
|
|
|
/// sequence of memory operands that is recognized by PrologEpilogInserter.
|
2016-07-01 06:52:52 +08:00
|
|
|
MachineBasicBlock *
|
|
|
|
TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI,
|
2013-11-29 11:07:54 +08:00
|
|
|
MachineBasicBlock *MBB) const {
|
2016-07-01 06:52:52 +08:00
|
|
|
MachineInstr *MI = &InitialMI;
|
2017-10-11 07:50:49 +08:00
|
|
|
MachineFunction &MF = *MI->getMF();
|
2016-07-29 02:40:00 +08:00
|
|
|
MachineFrameInfo &MFI = MF.getFrameInfo();
|
2015-12-24 07:44:28 +08:00
|
|
|
|
|
|
|
// We're handling multiple types of operands here:
|
|
|
|
// PATCHPOINT MetaArgs - live-in, read only, direct
|
|
|
|
// STATEPOINT Deopt Spill - live-through, read only, indirect
|
|
|
|
// STATEPOINT Deopt Alloca - live-through, read only, direct
|
|
|
|
// (We're currently conservative and mark the deopt slots read/write in
|
2018-07-31 03:41:25 +08:00
|
|
|
// practice.)
|
2015-12-24 07:44:28 +08:00
|
|
|
// STATEPOINT GC Spill - live-through, read/write, indirect
|
|
|
|
// STATEPOINT GC Alloca - live-through, read/write, direct
|
|
|
|
// The live-in vs live-through is handled already (the live through ones are
|
|
|
|
// all stack slots), but we need to handle the different type of stackmap
|
|
|
|
// operands and memory effects here.
|
2013-11-29 11:07:54 +08:00
|
|
|
|
2020-06-05 00:11:13 +08:00
|
|
|
if (!llvm::any_of(MI->operands(),
|
|
|
|
[](MachineOperand &Operand) { return Operand.isFI(); }))
|
|
|
|
return MBB;
|
|
|
|
|
|
|
|
MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), MI->getDesc());
|
|
|
|
|
|
|
|
// Inherit previous memory operands.
|
|
|
|
MIB.cloneMemRefs(*MI);
|
|
|
|
|
|
|
|
for (auto &MO : MI->operands()) {
|
|
|
|
if (!MO.isFI()) {
|
2020-06-05 20:52:14 +08:00
|
|
|
MIB.add(MO);
|
2013-11-29 11:07:54 +08:00
|
|
|
continue;
|
2020-06-05 00:11:13 +08:00
|
|
|
}
|
2013-11-29 11:07:54 +08:00
|
|
|
|
|
|
|
// foldMemoryOperand builds a new MI after replacing a single FI operand
|
|
|
|
// with the canonical set of five x86 addressing-mode operands.
|
|
|
|
int FI = MO.getIndex();
|
|
|
|
|
2015-12-24 07:44:28 +08:00
|
|
|
// Add frame index operands recognized by stackmaps.cpp
|
|
|
|
if (MFI.isStatepointSpillSlotObjectIndex(FI)) {
|
|
|
|
// indirect-mem-ref tag, size, #FI, offset.
|
|
|
|
// Used for spills inserted by StatepointLowering. This codepath is not
|
|
|
|
// used for patchpoints/stackmaps at all, for these spilling is done via
|
|
|
|
// foldMemoryOperand callback only.
|
|
|
|
assert(MI->getOpcode() == TargetOpcode::STATEPOINT && "sanity");
|
|
|
|
MIB.addImm(StackMaps::IndirectMemRefOp);
|
|
|
|
MIB.addImm(MFI.getObjectSize(FI));
|
2020-06-05 20:52:14 +08:00
|
|
|
MIB.add(MO);
|
2015-12-24 07:44:28 +08:00
|
|
|
MIB.addImm(0);
|
|
|
|
} else {
|
|
|
|
// direct-mem-ref tag, #FI, offset.
|
|
|
|
// Used by patchpoint, and direct alloca arguments to statepoints
|
|
|
|
MIB.addImm(StackMaps::DirectMemRefOp);
|
2020-06-05 20:52:14 +08:00
|
|
|
MIB.add(MO);
|
2015-12-24 07:44:28 +08:00
|
|
|
MIB.addImm(0);
|
|
|
|
}
|
2013-11-29 11:07:54 +08:00
|
|
|
|
|
|
|
assert(MIB->mayLoad() && "Folded a stackmap use to a non-load!");
|
|
|
|
|
|
|
|
// Add a new memory operand for this FI.
|
|
|
|
assert(MFI.getObjectOffset(FI) != -1);
|
2014-12-02 06:52:56 +08:00
|
|
|
|
2019-03-13 03:12:33 +08:00
|
|
|
// Note: STATEPOINT MMOs are added during SelectionDAG. STACKMAP, and
|
|
|
|
// PATCHPOINT should be updated to do the same. (TODO)
|
|
|
|
if (MI->getOpcode() != TargetOpcode::STATEPOINT) {
|
|
|
|
auto Flags = MachineMemOperand::MOLoad;
|
|
|
|
MachineMemOperand *MMO = MF.getMachineMemOperand(
|
|
|
|
MachinePointerInfo::getFixedStack(MF, FI), Flags,
|
2020-03-31 17:43:50 +08:00
|
|
|
MF.getDataLayout().getPointerSize(), MFI.getObjectAlign(FI));
|
2019-03-13 03:12:33 +08:00
|
|
|
MIB->addMemOperand(MF, MMO);
|
2014-12-02 06:52:56 +08:00
|
|
|
}
|
2013-11-29 11:07:54 +08:00
|
|
|
}
|
2020-06-05 00:11:13 +08:00
|
|
|
MBB->insert(MachineBasicBlock::iterator(MI), MIB);
|
|
|
|
MI->eraseFromParent();
|
2013-11-29 11:07:54 +08:00
|
|
|
return MBB;
|
|
|
|
}
|
|
|
|
|
[XRay][compiler-rt+llvm] Update XRay register stashing semantics
Summary:
This change expands the amount of registers stashed by the entry and
`__xray_CustomEvent` trampolines.
We've found that since the `__xray_CustomEvent` trampoline calls can show up in
situations where the scratch registers are being used, and since we don't
typically want to affect the code-gen around the disabled
`__xray_customevent(...)` intrinsic calls, that we need to save and restore the
state of even the scratch registers in the handling of these custom events.
Reviewers: pcc, pelikan, dblaikie, eizan, kpw, echristo, chandlerc
Reviewed By: echristo
Subscribers: chandlerc, echristo, hiraditya, davide, dblaikie, llvm-commits
Differential Revision: https://reviews.llvm.org/D40894
llvm-svn: 323940
2018-02-01 10:21:54 +08:00
|
|
|
MachineBasicBlock *
|
|
|
|
TargetLoweringBase::emitXRayCustomEvent(MachineInstr &MI,
|
|
|
|
MachineBasicBlock *MBB) const {
|
|
|
|
assert(MI.getOpcode() == TargetOpcode::PATCHABLE_EVENT_CALL &&
|
|
|
|
"Called emitXRayCustomEvent on the wrong MI!");
|
|
|
|
auto &MF = *MI.getMF();
|
|
|
|
auto MIB = BuildMI(MF, MI.getDebugLoc(), MI.getDesc());
|
|
|
|
for (unsigned OpIdx = 0; OpIdx != MI.getNumOperands(); ++OpIdx)
|
|
|
|
MIB.add(MI.getOperand(OpIdx));
|
|
|
|
|
|
|
|
MBB->insert(MachineBasicBlock::iterator(MI), MIB);
|
|
|
|
MI.eraseFromParent();
|
2018-04-18 05:30:29 +08:00
|
|
|
return MBB;
|
|
|
|
}
|
|
|
|
|
|
|
|
MachineBasicBlock *
|
|
|
|
TargetLoweringBase::emitXRayTypedEvent(MachineInstr &MI,
|
|
|
|
MachineBasicBlock *MBB) const {
|
|
|
|
assert(MI.getOpcode() == TargetOpcode::PATCHABLE_TYPED_EVENT_CALL &&
|
|
|
|
"Called emitXRayTypedEvent on the wrong MI!");
|
|
|
|
auto &MF = *MI.getMF();
|
|
|
|
auto MIB = BuildMI(MF, MI.getDebugLoc(), MI.getDesc());
|
|
|
|
for (unsigned OpIdx = 0; OpIdx != MI.getNumOperands(); ++OpIdx)
|
|
|
|
MIB.add(MI.getOperand(OpIdx));
|
|
|
|
|
|
|
|
MBB->insert(MachineBasicBlock::iterator(MI), MIB);
|
|
|
|
MI.eraseFromParent();
|
[XRay][compiler-rt+llvm] Update XRay register stashing semantics
Summary:
This change expands the amount of registers stashed by the entry and
`__xray_CustomEvent` trampolines.
We've found that since the `__xray_CustomEvent` trampoline calls can show up in
situations where the scratch registers are being used, and since we don't
typically want to affect the code-gen around the disabled
`__xray_customevent(...)` intrinsic calls, that we need to save and restore the
state of even the scratch registers in the handling of these custom events.
Reviewers: pcc, pelikan, dblaikie, eizan, kpw, echristo, chandlerc
Reviewed By: echristo
Subscribers: chandlerc, echristo, hiraditya, davide, dblaikie, llvm-commits
Differential Revision: https://reviews.llvm.org/D40894
llvm-svn: 323940
2018-02-01 10:21:54 +08:00
|
|
|
return MBB;
|
|
|
|
}
|
|
|
|
|
2013-01-12 04:05:37 +08:00
|
|
|
/// findRepresentativeClass - Return the largest legal super-reg register class
|
|
|
|
/// of the register class for the specified type and its associated "cost".
|
2015-03-04 03:47:14 +08:00
|
|
|
// This function is in TargetLowering because it uses RegClassForVT which would
|
|
|
|
// need to be moved to TargetRegisterInfo and would necessitate moving
|
|
|
|
// isTypeLegal over as well - a massive change that would just require
|
|
|
|
// TargetLowering having a TargetRegisterInfo class member that it would use.
|
2015-02-26 08:00:24 +08:00
|
|
|
std::pair<const TargetRegisterClass *, uint8_t>
|
|
|
|
TargetLoweringBase::findRepresentativeClass(const TargetRegisterInfo *TRI,
|
|
|
|
MVT VT) const {
|
2013-01-12 04:05:37 +08:00
|
|
|
const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy];
|
|
|
|
if (!RC)
|
|
|
|
return std::make_pair(RC, 0);
|
|
|
|
|
|
|
|
// Compute the set of all super-register classes.
|
|
|
|
BitVector SuperRegRC(TRI->getNumRegClasses());
|
|
|
|
for (SuperRegClassIterator RCI(RC, TRI); RCI.isValid(); ++RCI)
|
|
|
|
SuperRegRC.setBitsInMask(RCI.getMask());
|
|
|
|
|
|
|
|
// Find the first legal register class with the largest spill size.
|
|
|
|
const TargetRegisterClass *BestRC = RC;
|
2017-05-17 09:07:53 +08:00
|
|
|
for (unsigned i : SuperRegRC.set_bits()) {
|
2013-01-12 04:05:37 +08:00
|
|
|
const TargetRegisterClass *SuperRC = TRI->getRegClass(i);
|
|
|
|
// We want the largest possible spill size.
|
2017-04-25 02:55:33 +08:00
|
|
|
if (TRI->getSpillSize(*SuperRC) <= TRI->getSpillSize(*BestRC))
|
2013-01-12 04:05:37 +08:00
|
|
|
continue;
|
2017-04-25 03:51:12 +08:00
|
|
|
if (!isLegalRC(*TRI, *SuperRC))
|
2013-01-12 04:05:37 +08:00
|
|
|
continue;
|
|
|
|
BestRC = SuperRC;
|
|
|
|
}
|
|
|
|
return std::make_pair(BestRC, 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
/// computeRegisterProperties - Once all of the register classes are added,
|
|
|
|
/// this allows us to compute derived properties we expose.
|
2015-02-26 08:00:24 +08:00
|
|
|
void TargetLoweringBase::computeRegisterProperties(
|
|
|
|
const TargetRegisterInfo *TRI) {
|
2014-11-17 08:26:50 +08:00
|
|
|
static_assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE,
|
|
|
|
"Too many value types for ValueTypeActions to hold!");
|
2013-01-12 04:05:37 +08:00
|
|
|
|
|
|
|
// Everything defaults to needing one register.
|
|
|
|
for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
|
|
|
|
NumRegistersForVT[i] = 1;
|
|
|
|
RegisterTypeForVT[i] = TransformToType[i] = (MVT::SimpleValueType)i;
|
|
|
|
}
|
|
|
|
// ...except isVoid, which doesn't need any registers.
|
|
|
|
NumRegistersForVT[MVT::isVoid] = 0;
|
|
|
|
|
|
|
|
// Find the largest integer register class.
|
|
|
|
unsigned LargestIntReg = MVT::LAST_INTEGER_VALUETYPE;
|
2014-04-14 08:51:57 +08:00
|
|
|
for (; RegClassForVT[LargestIntReg] == nullptr; --LargestIntReg)
|
2013-01-12 04:05:37 +08:00
|
|
|
assert(LargestIntReg != MVT::i1 && "No integer registers defined!");
|
|
|
|
|
|
|
|
// Every integer value type larger than this largest register takes twice as
|
|
|
|
// many registers to represent as the previous ValueType.
|
|
|
|
for (unsigned ExpandedReg = LargestIntReg + 1;
|
|
|
|
ExpandedReg <= MVT::LAST_INTEGER_VALUETYPE; ++ExpandedReg) {
|
|
|
|
NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1];
|
|
|
|
RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg;
|
|
|
|
TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1);
|
|
|
|
ValueTypeActions.setTypeAction((MVT::SimpleValueType)ExpandedReg,
|
|
|
|
TypeExpandInteger);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Inspect all of the ValueType's smaller than the largest integer
|
|
|
|
// register to see which ones need promotion.
|
|
|
|
unsigned LegalIntReg = LargestIntReg;
|
|
|
|
for (unsigned IntReg = LargestIntReg - 1;
|
|
|
|
IntReg >= (unsigned)MVT::i1; --IntReg) {
|
|
|
|
MVT IVT = (MVT::SimpleValueType)IntReg;
|
|
|
|
if (isTypeLegal(IVT)) {
|
|
|
|
LegalIntReg = IntReg;
|
|
|
|
} else {
|
|
|
|
RegisterTypeForVT[IntReg] = TransformToType[IntReg] =
|
2018-11-08 00:17:30 +08:00
|
|
|
(MVT::SimpleValueType)LegalIntReg;
|
2013-01-12 04:05:37 +08:00
|
|
|
ValueTypeActions.setTypeAction(IVT, TypePromoteInteger);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// ppcf128 type is really two f64's.
|
|
|
|
if (!isTypeLegal(MVT::ppcf128)) {
|
2016-02-04 22:43:50 +08:00
|
|
|
if (isTypeLegal(MVT::f64)) {
|
|
|
|
NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64];
|
|
|
|
RegisterTypeForVT[MVT::ppcf128] = MVT::f64;
|
|
|
|
TransformToType[MVT::ppcf128] = MVT::f64;
|
|
|
|
ValueTypeActions.setTypeAction(MVT::ppcf128, TypeExpandFloat);
|
|
|
|
} else {
|
|
|
|
NumRegistersForVT[MVT::ppcf128] = NumRegistersForVT[MVT::i128];
|
|
|
|
RegisterTypeForVT[MVT::ppcf128] = RegisterTypeForVT[MVT::i128];
|
|
|
|
TransformToType[MVT::ppcf128] = MVT::i128;
|
|
|
|
ValueTypeActions.setTypeAction(MVT::ppcf128, TypeSoftenFloat);
|
|
|
|
}
|
2013-01-12 04:05:37 +08:00
|
|
|
}
|
|
|
|
|
2013-03-02 05:11:44 +08:00
|
|
|
// Decide how to handle f128. If the target does not have native f128 support,
|
|
|
|
// expand it to i128 and we will be generating soft float library calls.
|
|
|
|
if (!isTypeLegal(MVT::f128)) {
|
|
|
|
NumRegistersForVT[MVT::f128] = NumRegistersForVT[MVT::i128];
|
|
|
|
RegisterTypeForVT[MVT::f128] = RegisterTypeForVT[MVT::i128];
|
|
|
|
TransformToType[MVT::f128] = MVT::i128;
|
|
|
|
ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
|
|
|
|
}
|
|
|
|
|
2013-01-12 04:05:37 +08:00
|
|
|
// Decide how to handle f64. If the target does not have native f64 support,
|
|
|
|
// expand it to i64 and we will be generating soft float library calls.
|
|
|
|
if (!isTypeLegal(MVT::f64)) {
|
|
|
|
NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64];
|
|
|
|
RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64];
|
|
|
|
TransformToType[MVT::f64] = MVT::i64;
|
|
|
|
ValueTypeActions.setTypeAction(MVT::f64, TypeSoftenFloat);
|
|
|
|
}
|
|
|
|
|
[CodeGen] "PromoteInteger" f32 to f64 doesn't make sense.
The original f32->f64 promotion logic was refactored into roughly the
currently shape in r37781. However, starting with r132263, the
legalizer has been split into different kinds, and the previous
"Promote" (which did the right thing) was search-and-replace'd into
"PromoteInteger". The divide gradually deepened, with type legalization
("PromoteInteger") being separated from ops legalization
("Promote", which still works for floating point ops).
Fast-forward to today: there's no in-tree target with legal f64 but
illegal f32 (rather: no tests were harmed in the making of this patch).
With such a target, i.e., if you trick the legalizer into going through
the PromoteInteger path for FP, you get the expected brokenness.
For instance, there's no PromoteIntRes_FADD (the name itself sounds
wrong), so we'll just hit some assert in the PromoteInteger path.
Don't pretend we can promote f32 to f64. Instead, always soften.
llvm-svn: 233464
2015-03-28 09:22:37 +08:00
|
|
|
// Decide how to handle f32. If the target does not have native f32 support,
|
|
|
|
// expand it to i32 and we will be generating soft float library calls.
|
2013-01-12 04:05:37 +08:00
|
|
|
if (!isTypeLegal(MVT::f32)) {
|
[CodeGen] "PromoteInteger" f32 to f64 doesn't make sense.
The original f32->f64 promotion logic was refactored into roughly the
currently shape in r37781. However, starting with r132263, the
legalizer has been split into different kinds, and the previous
"Promote" (which did the right thing) was search-and-replace'd into
"PromoteInteger". The divide gradually deepened, with type legalization
("PromoteInteger") being separated from ops legalization
("Promote", which still works for floating point ops).
Fast-forward to today: there's no in-tree target with legal f64 but
illegal f32 (rather: no tests were harmed in the making of this patch).
With such a target, i.e., if you trick the legalizer into going through
the PromoteInteger path for FP, you get the expected brokenness.
For instance, there's no PromoteIntRes_FADD (the name itself sounds
wrong), so we'll just hit some assert in the PromoteInteger path.
Don't pretend we can promote f32 to f64. Instead, always soften.
llvm-svn: 233464
2015-03-28 09:22:37 +08:00
|
|
|
NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32];
|
|
|
|
RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32];
|
|
|
|
TransformToType[MVT::f32] = MVT::i32;
|
|
|
|
ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat);
|
2013-01-12 04:05:37 +08:00
|
|
|
}
|
|
|
|
|
2015-11-09 19:03:18 +08:00
|
|
|
// Decide how to handle f16. If the target does not have native f16 support,
|
|
|
|
// promote it to f32, because there are no f16 library calls (except for
|
|
|
|
// conversions).
|
2014-07-18 20:41:46 +08:00
|
|
|
if (!isTypeLegal(MVT::f16)) {
|
2020-02-01 14:42:07 +08:00
|
|
|
// Allow targets to control how we legalize half.
|
|
|
|
if (softPromoteHalfType()) {
|
|
|
|
NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::i16];
|
|
|
|
RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::i16];
|
|
|
|
TransformToType[MVT::f16] = MVT::f32;
|
|
|
|
ValueTypeActions.setTypeAction(MVT::f16, TypeSoftPromoteHalf);
|
|
|
|
} else {
|
|
|
|
NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::f32];
|
|
|
|
RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::f32];
|
|
|
|
TransformToType[MVT::f16] = MVT::f32;
|
|
|
|
ValueTypeActions.setTypeAction(MVT::f16, TypePromoteFloat);
|
|
|
|
}
|
2014-07-18 20:41:46 +08:00
|
|
|
}
|
|
|
|
|
2013-01-12 04:05:37 +08:00
|
|
|
// Loop over all of the vector value types to see which need transformations.
|
|
|
|
for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE;
|
|
|
|
i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
|
2014-07-03 08:23:43 +08:00
|
|
|
MVT VT = (MVT::SimpleValueType) i;
|
|
|
|
if (isTypeLegal(VT))
|
|
|
|
continue;
|
2013-01-12 04:05:37 +08:00
|
|
|
|
|
|
|
MVT EltVT = VT.getVectorElementType();
|
2020-06-05 23:46:25 +08:00
|
|
|
ElementCount EC = VT.getVectorElementCount();
|
2014-07-03 08:23:43 +08:00
|
|
|
bool IsLegalWiderType = false;
|
2019-10-18 19:48:35 +08:00
|
|
|
bool IsScalable = VT.isScalableVector();
|
2014-07-03 08:23:43 +08:00
|
|
|
LegalizeTypeAction PreferredAction = getPreferredVectorAction(VT);
|
|
|
|
switch (PreferredAction) {
|
2019-10-18 19:48:35 +08:00
|
|
|
case TypePromoteInteger: {
|
|
|
|
MVT::SimpleValueType EndVT = IsScalable ?
|
|
|
|
MVT::LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE :
|
|
|
|
MVT::LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE;
|
2014-07-03 08:23:43 +08:00
|
|
|
// Try to promote the elements of integer vectors. If no legal
|
|
|
|
// promotion was found, fall through to the widen-vector method.
|
2019-09-17 18:19:23 +08:00
|
|
|
for (unsigned nVT = i + 1;
|
2019-10-18 19:48:35 +08:00
|
|
|
(MVT::SimpleValueType)nVT <= EndVT; ++nVT) {
|
2014-07-03 08:23:43 +08:00
|
|
|
MVT SVT = (MVT::SimpleValueType) nVT;
|
2013-01-12 04:05:37 +08:00
|
|
|
// Promote vectors of integers to vectors with the same number
|
|
|
|
// of elements, with a wider element type.
|
2016-09-15 00:37:15 +08:00
|
|
|
if (SVT.getScalarSizeInBits() > EltVT.getSizeInBits() &&
|
2020-06-05 23:46:25 +08:00
|
|
|
SVT.getVectorElementCount() == EC && isTypeLegal(SVT)) {
|
2013-01-12 04:05:37 +08:00
|
|
|
TransformToType[i] = SVT;
|
|
|
|
RegisterTypeForVT[i] = SVT;
|
|
|
|
NumRegistersForVT[i] = 1;
|
|
|
|
ValueTypeActions.setTypeAction(VT, TypePromoteInteger);
|
|
|
|
IsLegalWiderType = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2014-07-03 08:23:43 +08:00
|
|
|
if (IsLegalWiderType)
|
|
|
|
break;
|
2017-06-03 13:11:14 +08:00
|
|
|
LLVM_FALLTHROUGH;
|
2019-10-18 19:48:35 +08:00
|
|
|
}
|
2017-09-22 07:20:16 +08:00
|
|
|
|
|
|
|
case TypeWidenVector:
|
2020-06-05 23:46:25 +08:00
|
|
|
if (isPowerOf2_32(EC.Min)) {
|
2019-08-18 14:28:06 +08:00
|
|
|
// Try to widen the vector.
|
|
|
|
for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
|
|
|
|
MVT SVT = (MVT::SimpleValueType) nVT;
|
2020-06-05 23:46:25 +08:00
|
|
|
if (SVT.getVectorElementType() == EltVT &&
|
|
|
|
SVT.isScalableVector() == IsScalable &&
|
|
|
|
SVT.getVectorElementCount().Min > EC.Min && isTypeLegal(SVT)) {
|
2019-08-18 14:28:06 +08:00
|
|
|
TransformToType[i] = SVT;
|
|
|
|
RegisterTypeForVT[i] = SVT;
|
|
|
|
NumRegistersForVT[i] = 1;
|
|
|
|
ValueTypeActions.setTypeAction(VT, TypeWidenVector);
|
|
|
|
IsLegalWiderType = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (IsLegalWiderType)
|
|
|
|
break;
|
|
|
|
} else {
|
|
|
|
// Only widen to the next power of 2 to keep consistency with EVT.
|
|
|
|
MVT NVT = VT.getPow2VectorType();
|
|
|
|
if (isTypeLegal(NVT)) {
|
|
|
|
TransformToType[i] = NVT;
|
2013-01-12 04:05:37 +08:00
|
|
|
ValueTypeActions.setTypeAction(VT, TypeWidenVector);
|
2019-08-18 14:28:06 +08:00
|
|
|
RegisterTypeForVT[i] = NVT;
|
|
|
|
NumRegistersForVT[i] = 1;
|
2013-01-12 04:05:37 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2017-06-03 13:11:14 +08:00
|
|
|
LLVM_FALLTHROUGH;
|
2017-09-22 07:20:16 +08:00
|
|
|
|
2014-07-03 08:23:43 +08:00
|
|
|
case TypeSplitVector:
|
|
|
|
case TypeScalarizeVector: {
|
|
|
|
MVT IntermediateVT;
|
|
|
|
MVT RegisterVT;
|
|
|
|
unsigned NumIntermediates;
|
2019-11-14 04:09:34 +08:00
|
|
|
unsigned NumRegisters = getVectorTypeBreakdownMVT(VT, IntermediateVT,
|
2014-07-03 08:23:43 +08:00
|
|
|
NumIntermediates, RegisterVT, this);
|
2019-11-14 04:09:34 +08:00
|
|
|
NumRegistersForVT[i] = NumRegisters;
|
|
|
|
assert(NumRegistersForVT[i] == NumRegisters &&
|
|
|
|
"NumRegistersForVT size cannot represent NumRegisters!");
|
2014-07-03 08:23:43 +08:00
|
|
|
RegisterTypeForVT[i] = RegisterVT;
|
|
|
|
|
|
|
|
MVT NVT = VT.getPow2VectorType();
|
|
|
|
if (NVT == VT) {
|
|
|
|
// Type is already a power of 2. The default action is to split.
|
|
|
|
TransformToType[i] = MVT::Other;
|
|
|
|
if (PreferredAction == TypeScalarizeVector)
|
|
|
|
ValueTypeActions.setTypeAction(VT, TypeScalarizeVector);
|
2014-10-31 10:35:34 +08:00
|
|
|
else if (PreferredAction == TypeSplitVector)
|
2014-07-03 08:23:43 +08:00
|
|
|
ValueTypeActions.setTypeAction(VT, TypeSplitVector);
|
2020-06-05 23:46:25 +08:00
|
|
|
else if (EC.Min > 1)
|
|
|
|
ValueTypeActions.setTypeAction(VT, TypeSplitVector);
|
2014-10-31 10:35:34 +08:00
|
|
|
else
|
2020-06-05 23:46:25 +08:00
|
|
|
ValueTypeActions.setTypeAction(VT, EC.Scalable
|
|
|
|
? TypeScalarizeScalableVector
|
|
|
|
: TypeScalarizeVector);
|
2014-07-03 08:23:43 +08:00
|
|
|
} else {
|
|
|
|
TransformToType[i] = NVT;
|
|
|
|
ValueTypeActions.setTypeAction(VT, TypeWidenVector);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unknown vector legalization action!");
|
2013-01-12 04:05:37 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Determine the 'representative' register class for each value type.
|
|
|
|
// An representative register class is the largest (meaning one which is
|
|
|
|
// not a sub-register class / subreg register class) legal register class for
|
|
|
|
// a group of value types. For example, on i386, i8, i16, and i32
|
|
|
|
// representative would be GR32; while on x86_64 it's GR64.
|
|
|
|
for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
|
|
|
|
const TargetRegisterClass* RRC;
|
|
|
|
uint8_t Cost;
|
2015-02-26 08:00:24 +08:00
|
|
|
std::tie(RRC, Cost) = findRepresentativeClass(TRI, (MVT::SimpleValueType)i);
|
2013-01-12 04:05:37 +08:00
|
|
|
RepRegClassForVT[i] = RRC;
|
|
|
|
RepRegClassCostForVT[i] = Cost;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-07-09 10:09:04 +08:00
|
|
|
EVT TargetLoweringBase::getSetCCResultType(const DataLayout &DL, LLVMContext &,
|
|
|
|
EVT VT) const {
|
2013-01-12 04:05:37 +08:00
|
|
|
assert(!VT.isVector() && "No default SetCC type for vectors!");
|
2015-07-09 10:09:04 +08:00
|
|
|
return getPointerTy(DL).SimpleTy;
|
2013-01-12 04:05:37 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
MVT::SimpleValueType TargetLoweringBase::getCmpLibcallReturnType() const {
|
|
|
|
return MVT::i32; // return the default value
|
|
|
|
}
|
|
|
|
|
|
|
|
/// getVectorTypeBreakdown - Vector types are broken down into some number of
|
|
|
|
/// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32
|
|
|
|
/// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack.
|
|
|
|
/// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86.
|
|
|
|
///
|
|
|
|
/// This method returns the number of registers needed, and the VT for each
|
|
|
|
/// register. It also returns the VT and quantity of the intermediate values
|
|
|
|
/// before they are promoted/expanded.
|
|
|
|
unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
|
|
|
|
EVT &IntermediateVT,
|
|
|
|
unsigned &NumIntermediates,
|
|
|
|
MVT &RegisterVT) const {
|
[SVE][CodeGen] Fix legalisation for scalable types
Summary:
This patch handles illegal scalable types when lowering IR operations,
addressing several places where the value of isScalableVector() is
ignored.
For types such as <vscale x 8 x i32>, this means splitting the
operations. In this example, we would split it into two
operations of type <vscale x 4 x i32> for the low and high halves.
In cases such as <vscale x 2 x i32>, the elements in the vector
will be promoted. In this case they will be promoted to
i64 (with a vector of type <vscale x 2 x i64>)
Reviewers: sdesmalen, efriedma, huntergr
Reviewed By: efriedma
Subscribers: david-arm, tschuett, hiraditya, rkruppe, psnobl, cfe-commits, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78812
2020-05-07 17:01:31 +08:00
|
|
|
ElementCount EltCnt = VT.getVectorElementCount();
|
2013-01-12 04:05:37 +08:00
|
|
|
|
|
|
|
// If there is a wider vector type with the same element type as this one,
|
|
|
|
// or a promoted vector type that has the same number of elements which
|
|
|
|
// are wider, then we should convert to that legal vector type.
|
|
|
|
// This handles things like <2 x float> -> <4 x float> and
|
|
|
|
// <4 x i1> -> <4 x i32>.
|
|
|
|
LegalizeTypeAction TA = getTypeAction(Context, VT);
|
[SVE][CodeGen] Fix legalisation for scalable types
Summary:
This patch handles illegal scalable types when lowering IR operations,
addressing several places where the value of isScalableVector() is
ignored.
For types such as <vscale x 8 x i32>, this means splitting the
operations. In this example, we would split it into two
operations of type <vscale x 4 x i32> for the low and high halves.
In cases such as <vscale x 2 x i32>, the elements in the vector
will be promoted. In this case they will be promoted to
i64 (with a vector of type <vscale x 2 x i64>)
Reviewers: sdesmalen, efriedma, huntergr
Reviewed By: efriedma
Subscribers: david-arm, tschuett, hiraditya, rkruppe, psnobl, cfe-commits, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78812
2020-05-07 17:01:31 +08:00
|
|
|
if (EltCnt.Min != 1 && (TA == TypeWidenVector || TA == TypePromoteInteger)) {
|
2013-01-12 04:05:37 +08:00
|
|
|
EVT RegisterEVT = getTypeToTransformTo(Context, VT);
|
|
|
|
if (isTypeLegal(RegisterEVT)) {
|
|
|
|
IntermediateVT = RegisterEVT;
|
|
|
|
RegisterVT = RegisterEVT.getSimpleVT();
|
|
|
|
NumIntermediates = 1;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Figure out the right, legal destination reg to copy into.
|
|
|
|
EVT EltTy = VT.getVectorElementType();
|
|
|
|
|
|
|
|
unsigned NumVectorRegs = 1;
|
|
|
|
|
2020-06-06 01:29:43 +08:00
|
|
|
// Scalable vectors cannot be scalarized, so handle the legalisation of the
|
|
|
|
// types like done elsewhere in SelectionDAG.
|
|
|
|
if (VT.isScalableVector() && !isPowerOf2_32(EltCnt.Min)) {
|
|
|
|
LegalizeKind LK;
|
|
|
|
EVT PartVT = VT;
|
|
|
|
do {
|
|
|
|
// Iterate until we've found a legal (part) type to hold VT.
|
|
|
|
LK = getTypeConversion(Context, PartVT);
|
|
|
|
PartVT = LK.second;
|
|
|
|
} while (LK.first != TypeLegal);
|
|
|
|
|
|
|
|
NumIntermediates =
|
|
|
|
VT.getVectorElementCount().Min / PartVT.getVectorElementCount().Min;
|
|
|
|
|
|
|
|
// FIXME: This code needs to be extended to handle more complex vector
|
|
|
|
// breakdowns, like nxv7i64 -> nxv8i64 -> 4 x nxv2i64. Currently the only
|
|
|
|
// supported cases are vectors that are broken down into equal parts
|
|
|
|
// such as nxv6i64 -> 3 x nxv2i64.
|
|
|
|
assert(NumIntermediates * PartVT.getVectorElementCount().Min ==
|
|
|
|
VT.getVectorElementCount().Min &&
|
|
|
|
"Expected an integer multiple of PartVT");
|
|
|
|
IntermediateVT = PartVT;
|
|
|
|
RegisterVT = getRegisterType(Context, IntermediateVT);
|
|
|
|
return NumIntermediates;
|
|
|
|
}
|
|
|
|
|
|
|
|
// FIXME: We don't support non-power-of-2-sized vectors for now. Ideally
|
|
|
|
// we could break down into LHS/RHS like LegalizeDAG does.
|
[SVE][CodeGen] Fix legalisation for scalable types
Summary:
This patch handles illegal scalable types when lowering IR operations,
addressing several places where the value of isScalableVector() is
ignored.
For types such as <vscale x 8 x i32>, this means splitting the
operations. In this example, we would split it into two
operations of type <vscale x 4 x i32> for the low and high halves.
In cases such as <vscale x 2 x i32>, the elements in the vector
will be promoted. In this case they will be promoted to
i64 (with a vector of type <vscale x 2 x i64>)
Reviewers: sdesmalen, efriedma, huntergr
Reviewed By: efriedma
Subscribers: david-arm, tschuett, hiraditya, rkruppe, psnobl, cfe-commits, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78812
2020-05-07 17:01:31 +08:00
|
|
|
if (!isPowerOf2_32(EltCnt.Min)) {
|
|
|
|
NumVectorRegs = EltCnt.Min;
|
|
|
|
EltCnt.Min = 1;
|
2013-01-12 04:05:37 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Divide the input until we get to a supported size. This will always
|
|
|
|
// end with a scalar if the target doesn't support vectors.
|
[SVE][CodeGen] Fix legalisation for scalable types
Summary:
This patch handles illegal scalable types when lowering IR operations,
addressing several places where the value of isScalableVector() is
ignored.
For types such as <vscale x 8 x i32>, this means splitting the
operations. In this example, we would split it into two
operations of type <vscale x 4 x i32> for the low and high halves.
In cases such as <vscale x 2 x i32>, the elements in the vector
will be promoted. In this case they will be promoted to
i64 (with a vector of type <vscale x 2 x i64>)
Reviewers: sdesmalen, efriedma, huntergr
Reviewed By: efriedma
Subscribers: david-arm, tschuett, hiraditya, rkruppe, psnobl, cfe-commits, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78812
2020-05-07 17:01:31 +08:00
|
|
|
while (EltCnt.Min > 1 &&
|
|
|
|
!isTypeLegal(EVT::getVectorVT(Context, EltTy, EltCnt))) {
|
|
|
|
EltCnt.Min >>= 1;
|
2013-01-12 04:05:37 +08:00
|
|
|
NumVectorRegs <<= 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
NumIntermediates = NumVectorRegs;
|
|
|
|
|
[SVE][CodeGen] Fix legalisation for scalable types
Summary:
This patch handles illegal scalable types when lowering IR operations,
addressing several places where the value of isScalableVector() is
ignored.
For types such as <vscale x 8 x i32>, this means splitting the
operations. In this example, we would split it into two
operations of type <vscale x 4 x i32> for the low and high halves.
In cases such as <vscale x 2 x i32>, the elements in the vector
will be promoted. In this case they will be promoted to
i64 (with a vector of type <vscale x 2 x i64>)
Reviewers: sdesmalen, efriedma, huntergr
Reviewed By: efriedma
Subscribers: david-arm, tschuett, hiraditya, rkruppe, psnobl, cfe-commits, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78812
2020-05-07 17:01:31 +08:00
|
|
|
EVT NewVT = EVT::getVectorVT(Context, EltTy, EltCnt);
|
2013-01-12 04:05:37 +08:00
|
|
|
if (!isTypeLegal(NewVT))
|
|
|
|
NewVT = EltTy;
|
|
|
|
IntermediateVT = NewVT;
|
|
|
|
|
|
|
|
MVT DestVT = getRegisterType(Context, NewVT);
|
|
|
|
RegisterVT = DestVT;
|
|
|
|
|
2020-06-11 14:47:23 +08:00
|
|
|
if (EVT(DestVT).bitsLT(NewVT)) { // Value is expanded, e.g. i64 -> i16.
|
|
|
|
TypeSize NewVTSize = NewVT.getSizeInBits();
|
|
|
|
// Convert sizes such as i33 to i64.
|
|
|
|
if (!isPowerOf2_32(NewVTSize.getKnownMinSize()))
|
|
|
|
NewVTSize = NewVTSize.NextPowerOf2();
|
2013-01-12 04:05:37 +08:00
|
|
|
return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());
|
2020-06-11 14:47:23 +08:00
|
|
|
}
|
2013-01-12 04:05:37 +08:00
|
|
|
|
|
|
|
// Otherwise, promotion or legal types use the same number of registers as
|
|
|
|
// the vector decimated to the appropriate level.
|
|
|
|
return NumVectorRegs;
|
|
|
|
}
|
|
|
|
|
2019-11-01 04:58:52 +08:00
|
|
|
bool TargetLoweringBase::isSuitableForJumpTable(const SwitchInst *SI,
|
|
|
|
uint64_t NumCases,
|
|
|
|
uint64_t Range,
|
|
|
|
ProfileSummaryInfo *PSI,
|
|
|
|
BlockFrequencyInfo *BFI) const {
|
|
|
|
// FIXME: This function check the maximum table size and density, but the
|
|
|
|
// minimum size is not checked. It would be nice if the minimum size is
|
|
|
|
// also combined within this function. Currently, the minimum size check is
|
|
|
|
// performed in findJumpTable() in SelectionDAGBuiler and
|
|
|
|
// getEstimatedNumberOfCaseClusters() in BasicTTIImpl.
|
|
|
|
const bool OptForSize =
|
|
|
|
SI->getParent()->getParent()->hasOptSize() ||
|
|
|
|
llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI);
|
|
|
|
const unsigned MinDensity = getMinimumJumpTableDensity(OptForSize);
|
|
|
|
const unsigned MaxJumpTableSize = getMaximumJumpTableSize();
|
|
|
|
|
|
|
|
// Check whether the number of cases is small enough and
|
|
|
|
// the range is dense enough for a jump table.
|
|
|
|
return (OptForSize || Range <= MaxJumpTableSize) &&
|
|
|
|
(NumCases * 100 >= Range * MinDensity);
|
|
|
|
}
|
|
|
|
|
2013-01-12 04:05:37 +08:00
|
|
|
/// Get the EVTs and ArgFlags collections that represent the legalized return
|
|
|
|
/// type of the given function. This does not require a DAG or a return value,
|
|
|
|
/// and is suitable for use before any DAGs for the function are constructed.
|
|
|
|
/// TODO: Move this out of TargetLowering.cpp.
|
2018-07-28 21:25:19 +08:00
|
|
|
void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType,
|
|
|
|
AttributeList attr,
|
2013-01-12 04:05:37 +08:00
|
|
|
SmallVectorImpl<ISD::OutputArg> &Outs,
|
2015-07-09 09:57:34 +08:00
|
|
|
const TargetLowering &TLI, const DataLayout &DL) {
|
2013-01-12 04:05:37 +08:00
|
|
|
SmallVector<EVT, 4> ValueVTs;
|
2015-07-09 09:57:34 +08:00
|
|
|
ComputeValueVTs(TLI, DL, ReturnType, ValueVTs);
|
2013-01-12 04:05:37 +08:00
|
|
|
unsigned NumValues = ValueVTs.size();
|
|
|
|
if (NumValues == 0) return;
|
|
|
|
|
|
|
|
for (unsigned j = 0, f = NumValues; j != f; ++j) {
|
|
|
|
EVT VT = ValueVTs[j];
|
|
|
|
ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
|
|
|
|
|
Rename AttributeSet to AttributeList
Summary:
This class is a list of AttributeSetNodes corresponding the function
prototype of a call or function declaration. This class used to be
called ParamAttrListPtr, then AttrListPtr, then AttributeSet. It is
typically accessed by parameter and return value index, so
"AttributeList" seems like a more intuitive name.
Rename AttributeSetImpl to AttributeListImpl to follow suit.
It's useful to rename this class so that we can rename AttributeSetNode
to AttributeSet later. AttributeSet is the set of attributes that apply
to a single function, argument, or return value.
Reviewers: sanjoy, javed.absar, chandlerc, pete
Reviewed By: pete
Subscribers: pete, jholewinski, arsenm, dschuff, mehdi_amini, jfb, nhaehnle, sbc100, void, llvm-commits
Differential Revision: https://reviews.llvm.org/D31102
llvm-svn: 298393
2017-03-22 00:57:19 +08:00
|
|
|
if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
|
2013-01-12 04:05:37 +08:00
|
|
|
ExtendKind = ISD::SIGN_EXTEND;
|
Rename AttributeSet to AttributeList
Summary:
This class is a list of AttributeSetNodes corresponding the function
prototype of a call or function declaration. This class used to be
called ParamAttrListPtr, then AttrListPtr, then AttributeSet. It is
typically accessed by parameter and return value index, so
"AttributeList" seems like a more intuitive name.
Rename AttributeSetImpl to AttributeListImpl to follow suit.
It's useful to rename this class so that we can rename AttributeSetNode
to AttributeSet later. AttributeSet is the set of attributes that apply
to a single function, argument, or return value.
Reviewers: sanjoy, javed.absar, chandlerc, pete
Reviewed By: pete
Subscribers: pete, jholewinski, arsenm, dschuff, mehdi_amini, jfb, nhaehnle, sbc100, void, llvm-commits
Differential Revision: https://reviews.llvm.org/D31102
llvm-svn: 298393
2017-03-22 00:57:19 +08:00
|
|
|
else if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt))
|
2013-01-12 04:05:37 +08:00
|
|
|
ExtendKind = ISD::ZERO_EXTEND;
|
|
|
|
|
|
|
|
// FIXME: C calling convention requires the return type to be promoted to
|
|
|
|
// at least 32-bit. But this is not necessary for non-C calling
|
|
|
|
// conventions. The frontend should mark functions whose return values
|
|
|
|
// require promoting with signext or zeroext attributes.
|
|
|
|
if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
|
|
|
|
MVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32);
|
|
|
|
if (VT.bitsLT(MinVT))
|
|
|
|
VT = MinVT;
|
|
|
|
}
|
|
|
|
|
Reland "[SelectionDAG] Enable target specific vector scalarization of calls and returns"
By target hookifying getRegisterType, getNumRegisters, getVectorBreakdown,
backends can request that LLVM to scalarize vector types for calls
and returns.
The MIPS vector ABI requires that vector arguments and returns are passed in
integer registers. With SelectionDAG's new hooks, the MIPS backend can now
handle LLVM-IR with vector types in calls and returns. E.g.
'call @foo(<4 x i32> %4)'.
Previously these cases would be scalarized for the MIPS O32/N32/N64 ABI for
calls and returns if vector types were not legal. If vector types were legal,
a single 128bit vector argument would be assigned to a single 32 bit / 64 bit
integer register.
By teaching the MIPS backend to inspect the original types, it can now
implement the MIPS vector ABI which requires a particular method of
scalarizing vectors.
Previously, the MIPS backend relied on clang to scalarize types such as "call
@foo(<4 x float> %a) into "call @foo(i32 inreg %1, i32 inreg %2, i32 inreg %3,
i32 inreg %4)".
This patch enables the MIPS backend to take either form for vector types.
The previous version of this patch had a "conditional move or jump depends on
uninitialized value".
Reviewers: zoran.jovanovic, jaydeep, vkalintiris, slthakur
Differential Revision: https://reviews.llvm.org/D27845
llvm-svn: 305083
2017-06-09 22:37:08 +08:00
|
|
|
unsigned NumParts =
|
2018-07-28 21:25:19 +08:00
|
|
|
TLI.getNumRegistersForCallingConv(ReturnType->getContext(), CC, VT);
|
Reland "[SelectionDAG] Enable target specific vector scalarization of calls and returns"
By target hookifying getRegisterType, getNumRegisters, getVectorBreakdown,
backends can request that LLVM to scalarize vector types for calls
and returns.
The MIPS vector ABI requires that vector arguments and returns are passed in
integer registers. With SelectionDAG's new hooks, the MIPS backend can now
handle LLVM-IR with vector types in calls and returns. E.g.
'call @foo(<4 x i32> %4)'.
Previously these cases would be scalarized for the MIPS O32/N32/N64 ABI for
calls and returns if vector types were not legal. If vector types were legal,
a single 128bit vector argument would be assigned to a single 32 bit / 64 bit
integer register.
By teaching the MIPS backend to inspect the original types, it can now
implement the MIPS vector ABI which requires a particular method of
scalarizing vectors.
Previously, the MIPS backend relied on clang to scalarize types such as "call
@foo(<4 x float> %a) into "call @foo(i32 inreg %1, i32 inreg %2, i32 inreg %3,
i32 inreg %4)".
This patch enables the MIPS backend to take either form for vector types.
The previous version of this patch had a "conditional move or jump depends on
uninitialized value".
Reviewers: zoran.jovanovic, jaydeep, vkalintiris, slthakur
Differential Revision: https://reviews.llvm.org/D27845
llvm-svn: 305083
2017-06-09 22:37:08 +08:00
|
|
|
MVT PartVT =
|
2018-07-28 21:25:19 +08:00
|
|
|
TLI.getRegisterTypeForCallingConv(ReturnType->getContext(), CC, VT);
|
2013-01-12 04:05:37 +08:00
|
|
|
|
|
|
|
// 'inreg' on function refers to return value
|
|
|
|
ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
|
Rename AttributeSet to AttributeList
Summary:
This class is a list of AttributeSetNodes corresponding the function
prototype of a call or function declaration. This class used to be
called ParamAttrListPtr, then AttrListPtr, then AttributeSet. It is
typically accessed by parameter and return value index, so
"AttributeList" seems like a more intuitive name.
Rename AttributeSetImpl to AttributeListImpl to follow suit.
It's useful to rename this class so that we can rename AttributeSetNode
to AttributeSet later. AttributeSet is the set of attributes that apply
to a single function, argument, or return value.
Reviewers: sanjoy, javed.absar, chandlerc, pete
Reviewed By: pete
Subscribers: pete, jholewinski, arsenm, dschuff, mehdi_amini, jfb, nhaehnle, sbc100, void, llvm-commits
Differential Revision: https://reviews.llvm.org/D31102
llvm-svn: 298393
2017-03-22 00:57:19 +08:00
|
|
|
if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::InReg))
|
2013-01-12 04:05:37 +08:00
|
|
|
Flags.setInReg();
|
|
|
|
|
|
|
|
// Propagate extension type if any
|
Rename AttributeSet to AttributeList
Summary:
This class is a list of AttributeSetNodes corresponding the function
prototype of a call or function declaration. This class used to be
called ParamAttrListPtr, then AttrListPtr, then AttributeSet. It is
typically accessed by parameter and return value index, so
"AttributeList" seems like a more intuitive name.
Rename AttributeSetImpl to AttributeListImpl to follow suit.
It's useful to rename this class so that we can rename AttributeSetNode
to AttributeSet later. AttributeSet is the set of attributes that apply
to a single function, argument, or return value.
Reviewers: sanjoy, javed.absar, chandlerc, pete
Reviewed By: pete
Subscribers: pete, jholewinski, arsenm, dschuff, mehdi_amini, jfb, nhaehnle, sbc100, void, llvm-commits
Differential Revision: https://reviews.llvm.org/D31102
llvm-svn: 298393
2017-03-22 00:57:19 +08:00
|
|
|
if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
|
2013-01-12 04:05:37 +08:00
|
|
|
Flags.setSExt();
|
Rename AttributeSet to AttributeList
Summary:
This class is a list of AttributeSetNodes corresponding the function
prototype of a call or function declaration. This class used to be
called ParamAttrListPtr, then AttrListPtr, then AttributeSet. It is
typically accessed by parameter and return value index, so
"AttributeList" seems like a more intuitive name.
Rename AttributeSetImpl to AttributeListImpl to follow suit.
It's useful to rename this class so that we can rename AttributeSetNode
to AttributeSet later. AttributeSet is the set of attributes that apply
to a single function, argument, or return value.
Reviewers: sanjoy, javed.absar, chandlerc, pete
Reviewed By: pete
Subscribers: pete, jholewinski, arsenm, dschuff, mehdi_amini, jfb, nhaehnle, sbc100, void, llvm-commits
Differential Revision: https://reviews.llvm.org/D31102
llvm-svn: 298393
2017-03-22 00:57:19 +08:00
|
|
|
else if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt))
|
2013-01-12 04:05:37 +08:00
|
|
|
Flags.setZExt();
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < NumParts; ++i)
|
2019-07-16 12:46:31 +08:00
|
|
|
Outs.push_back(ISD::OutputArg(Flags, PartVT, VT, /*isfixed=*/true, 0, 0));
|
2013-01-12 04:05:37 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
|
|
|
|
/// function arguments in the caller parameter area. This is the actual
|
|
|
|
/// alignment, not its logarithm.
|
2015-07-09 10:09:28 +08:00
|
|
|
unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty,
|
|
|
|
const DataLayout &DL) const {
|
[Alignment][NFC] Use more Align versions of various functions
Summary:
This is patch is part of a series to introduce an Alignment type.
See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2019-July/133851.html
See this patch for the introduction of the type: https://reviews.llvm.org/D64790
Reviewers: courbet
Subscribers: MatzeB, qcolombet, arsenm, sdardis, jvesely, nhaehnle, hiraditya, jrtc27, atanasyan, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D77291
2020-04-02 16:53:29 +08:00
|
|
|
return DL.getABITypeAlign(Ty).value();
|
2013-01-12 04:05:37 +08:00
|
|
|
}
|
|
|
|
|
2019-09-26 08:16:01 +08:00
|
|
|
bool TargetLoweringBase::allowsMemoryAccessForAlignment(
|
|
|
|
LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace,
|
2020-06-30 23:31:24 +08:00
|
|
|
Align Alignment, MachineMemOperand::Flags Flags, bool *Fast) const {
|
2015-07-30 02:24:18 +08:00
|
|
|
// Check if the specified alignment is sufficient based on the data layout.
|
|
|
|
// TODO: While using the data layout works in practice, a better solution
|
|
|
|
// would be to implement this check directly (make this a virtual function).
|
|
|
|
// For example, the ABI alignment may change based on software platform while
|
|
|
|
// this function should only be affected by hardware implementation.
|
|
|
|
Type *Ty = VT.getTypeForEVT(Context);
|
2020-06-30 23:31:24 +08:00
|
|
|
if (Alignment >= DL.getABITypeAlign(Ty)) {
|
2015-07-30 02:24:18 +08:00
|
|
|
// Assume that an access that meets the ABI-specified alignment is fast.
|
|
|
|
if (Fast != nullptr)
|
|
|
|
*Fast = true;
|
|
|
|
return true;
|
|
|
|
}
|
2018-07-31 03:41:25 +08:00
|
|
|
|
2015-07-30 02:24:18 +08:00
|
|
|
// This is a misaligned access.
|
2020-06-30 23:31:24 +08:00
|
|
|
return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment.value(), Flags,
|
|
|
|
Fast);
|
2015-07-30 02:24:18 +08:00
|
|
|
}
|
|
|
|
|
2019-09-26 08:16:01 +08:00
|
|
|
bool TargetLoweringBase::allowsMemoryAccessForAlignment(
|
|
|
|
LLVMContext &Context, const DataLayout &DL, EVT VT,
|
|
|
|
const MachineMemOperand &MMO, bool *Fast) const {
|
|
|
|
return allowsMemoryAccessForAlignment(Context, DL, VT, MMO.getAddrSpace(),
|
2020-06-30 23:31:24 +08:00
|
|
|
MMO.getAlign(), MMO.getFlags(), Fast);
|
2019-09-26 08:16:01 +08:00
|
|
|
}
|
|
|
|
|
2020-06-30 16:16:59 +08:00
|
|
|
bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
|
|
|
|
const DataLayout &DL, EVT VT,
|
|
|
|
unsigned AddrSpace, Align Alignment,
|
|
|
|
MachineMemOperand::Flags Flags,
|
|
|
|
bool *Fast) const {
|
2020-06-30 23:31:24 +08:00
|
|
|
return allowsMemoryAccessForAlignment(Context, DL, VT, AddrSpace, Alignment,
|
|
|
|
Flags, Fast);
|
2019-09-26 08:16:01 +08:00
|
|
|
}
|
|
|
|
|
2019-06-11 19:00:23 +08:00
|
|
|
bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
|
|
|
|
const DataLayout &DL, EVT VT,
|
|
|
|
const MachineMemOperand &MMO,
|
|
|
|
bool *Fast) const {
|
2020-06-30 16:16:59 +08:00
|
|
|
return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(), MMO.getAlign(),
|
|
|
|
MMO.getFlags(), Fast);
|
2019-06-11 19:00:23 +08:00
|
|
|
}
|
|
|
|
|
2016-04-27 01:11:17 +08:00
|
|
|
BranchProbability TargetLoweringBase::getPredictableBranchThreshold() const {
|
|
|
|
return BranchProbability(MinPercentageForPredictableBranch, 100);
|
|
|
|
}
|
2015-07-30 02:24:18 +08:00
|
|
|
|
2013-01-12 04:05:37 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// TargetTransformInfo Helpers
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const {
|
|
|
|
enum InstructionOpcodes {
|
|
|
|
#define HANDLE_INST(NUM, OPCODE, CLASS) OPCODE = NUM,
|
|
|
|
#define LAST_OTHER_INST(NUM) InstructionOpcodesCount = NUM
|
|
|
|
#include "llvm/IR/Instruction.def"
|
|
|
|
};
|
|
|
|
switch (static_cast<InstructionOpcodes>(Opcode)) {
|
|
|
|
case Ret: return 0;
|
|
|
|
case Br: return 0;
|
|
|
|
case Switch: return 0;
|
|
|
|
case IndirectBr: return 0;
|
|
|
|
case Invoke: return 0;
|
2019-02-09 04:48:56 +08:00
|
|
|
case CallBr: return 0;
|
2013-01-12 04:05:37 +08:00
|
|
|
case Resume: return 0;
|
|
|
|
case Unreachable: return 0;
|
2015-08-01 01:58:14 +08:00
|
|
|
case CleanupRet: return 0;
|
|
|
|
case CatchRet: return 0;
|
[IR] Reformulate LLVM's EH funclet IR
While we have successfully implemented a funclet-oriented EH scheme on
top of LLVM IR, our scheme has some notable deficiencies:
- catchendpad and cleanupendpad are necessary in the current design
but they are difficult to explain to others, even to seasoned LLVM
experts.
- catchendpad and cleanupendpad are optimization barriers. They cannot
be split and force all potentially throwing call-sites to be invokes.
This has a noticable effect on the quality of our code generation.
- catchpad, while similar in some aspects to invoke, is fairly awkward.
It is unsplittable, starts a funclet, and has control flow to other
funclets.
- The nesting relationship between funclets is currently a property of
control flow edges. Because of this, we are forced to carefully
analyze the flow graph to see if there might potentially exist illegal
nesting among funclets. While we have logic to clone funclets when
they are illegally nested, it would be nicer if we had a
representation which forbade them upfront.
Let's clean this up a bit by doing the following:
- Instead, make catchpad more like cleanuppad and landingpad: no control
flow, just a bunch of simple operands; catchpad would be splittable.
- Introduce catchswitch, a control flow instruction designed to model
the constraints of funclet oriented EH.
- Make funclet scoping explicit by having funclet instructions consume
the token produced by the funclet which contains them.
- Remove catchendpad and cleanupendpad. Their presence can be inferred
implicitly using coloring information.
N.B. The state numbering code for the CLR has been updated but the
veracity of it's output cannot be spoken for. An expert should take a
look to make sure the results are reasonable.
Reviewers: rnk, JosephTremoulet, andrew.w.kaylor
Differential Revision: http://reviews.llvm.org/D15139
llvm-svn: 255422
2015-12-12 13:38:55 +08:00
|
|
|
case CatchPad: return 0;
|
|
|
|
case CatchSwitch: return 0;
|
|
|
|
case CleanupPad: return 0;
|
2018-11-14 02:15:47 +08:00
|
|
|
case FNeg: return ISD::FNEG;
|
2013-01-12 04:05:37 +08:00
|
|
|
case Add: return ISD::ADD;
|
|
|
|
case FAdd: return ISD::FADD;
|
|
|
|
case Sub: return ISD::SUB;
|
|
|
|
case FSub: return ISD::FSUB;
|
|
|
|
case Mul: return ISD::MUL;
|
|
|
|
case FMul: return ISD::FMUL;
|
|
|
|
case UDiv: return ISD::UDIV;
|
2014-04-28 02:47:54 +08:00
|
|
|
case SDiv: return ISD::SDIV;
|
2013-01-12 04:05:37 +08:00
|
|
|
case FDiv: return ISD::FDIV;
|
|
|
|
case URem: return ISD::UREM;
|
|
|
|
case SRem: return ISD::SREM;
|
|
|
|
case FRem: return ISD::FREM;
|
|
|
|
case Shl: return ISD::SHL;
|
|
|
|
case LShr: return ISD::SRL;
|
|
|
|
case AShr: return ISD::SRA;
|
|
|
|
case And: return ISD::AND;
|
|
|
|
case Or: return ISD::OR;
|
|
|
|
case Xor: return ISD::XOR;
|
|
|
|
case Alloca: return 0;
|
|
|
|
case Load: return ISD::LOAD;
|
|
|
|
case Store: return ISD::STORE;
|
|
|
|
case GetElementPtr: return 0;
|
|
|
|
case Fence: return 0;
|
|
|
|
case AtomicCmpXchg: return 0;
|
|
|
|
case AtomicRMW: return 0;
|
|
|
|
case Trunc: return ISD::TRUNCATE;
|
|
|
|
case ZExt: return ISD::ZERO_EXTEND;
|
|
|
|
case SExt: return ISD::SIGN_EXTEND;
|
|
|
|
case FPToUI: return ISD::FP_TO_UINT;
|
|
|
|
case FPToSI: return ISD::FP_TO_SINT;
|
|
|
|
case UIToFP: return ISD::UINT_TO_FP;
|
|
|
|
case SIToFP: return ISD::SINT_TO_FP;
|
|
|
|
case FPTrunc: return ISD::FP_ROUND;
|
|
|
|
case FPExt: return ISD::FP_EXTEND;
|
|
|
|
case PtrToInt: return ISD::BITCAST;
|
|
|
|
case IntToPtr: return ISD::BITCAST;
|
|
|
|
case BitCast: return ISD::BITCAST;
|
2013-11-15 09:34:59 +08:00
|
|
|
case AddrSpaceCast: return ISD::ADDRSPACECAST;
|
2013-01-12 04:05:37 +08:00
|
|
|
case ICmp: return ISD::SETCC;
|
|
|
|
case FCmp: return ISD::SETCC;
|
|
|
|
case PHI: return 0;
|
|
|
|
case Call: return 0;
|
|
|
|
case Select: return ISD::SELECT;
|
|
|
|
case UserOp1: return 0;
|
|
|
|
case UserOp2: return 0;
|
|
|
|
case VAArg: return 0;
|
|
|
|
case ExtractElement: return ISD::EXTRACT_VECTOR_ELT;
|
|
|
|
case InsertElement: return ISD::INSERT_VECTOR_ELT;
|
|
|
|
case ShuffleVector: return ISD::VECTOR_SHUFFLE;
|
|
|
|
case ExtractValue: return ISD::MERGE_VALUES;
|
|
|
|
case InsertValue: return ISD::MERGE_VALUES;
|
|
|
|
case LandingPad: return 0;
|
[SelDag] Add FREEZE
Summary:
- Add FREEZE node to SelDag
- Lower FreezeInst (in IR) to FREEZE node
- Add Legalization for FREEZE node
Reviewers: qcolombet, bogner, efriedma, lebedev.ri, nlopes, craig.topper, arsenm
Reviewed By: lebedev.ri
Subscribers: wdng, xbolva00, Petar.Avramovic, liuz, lkail, dylanmckay, hiraditya, Jim, arsenm, craig.topper, RKSimon, spatel, lebedev.ri, regehr, trentxintong, nlopes, mkuper, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D29014
2020-03-23 12:03:13 +08:00
|
|
|
case Freeze: return ISD::FREEZE;
|
2013-01-12 04:05:37 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
llvm_unreachable("Unknown instruction type encountered!");
|
|
|
|
}
|
|
|
|
|
2015-08-06 02:08:10 +08:00
|
|
|
std::pair<int, MVT>
|
2015-07-09 10:09:04 +08:00
|
|
|
TargetLoweringBase::getTypeLegalizationCost(const DataLayout &DL,
|
|
|
|
Type *Ty) const {
|
2013-01-12 04:05:37 +08:00
|
|
|
LLVMContext &C = Ty->getContext();
|
2015-07-09 10:09:04 +08:00
|
|
|
EVT MTy = getValueType(DL, Ty);
|
2013-01-12 04:05:37 +08:00
|
|
|
|
2015-08-06 02:08:10 +08:00
|
|
|
int Cost = 1;
|
2013-01-12 04:05:37 +08:00
|
|
|
// We keep legalizing the type until we find a legal kind. We assume that
|
|
|
|
// the only operation that costs anything is the split. After splitting
|
|
|
|
// we need to handle two types.
|
|
|
|
while (true) {
|
|
|
|
LegalizeKind LK = getTypeConversion(C, MTy);
|
|
|
|
|
|
|
|
if (LK.first == TypeLegal)
|
|
|
|
return std::make_pair(Cost, MTy.getSimpleVT());
|
|
|
|
|
|
|
|
if (LK.first == TypeSplitVector || LK.first == TypeExpandInteger)
|
|
|
|
Cost *= 2;
|
|
|
|
|
[X86] Part 1 to fix x86-64 fp128 calling convention.
Almost all these changes are conditioned and only apply to the new
x86-64 f128 type configuration, which will be enabled in a follow up
patch. They are required together to make new f128 work. If there is
any error, we should fix or revert them as a whole.
These changes should have no impact to current configurations.
* Relax type legalization checks to accept new f128 type configuration,
whose TypeAction is TypeSoftenFloat, not TypeLegal, but also has
TLI.isTypeLegal true.
* Relax GetSoftenedFloat to return in some cases f128 type SDValue,
which is TLI.isTypeLegal but not "softened" to i128 node.
* Allow customized FABS, FNEG, FCOPYSIGN on new f128 type configuration,
to generate optimized bitwise operators for libm functions.
* Enhance related Lower* functions to handle f128 type.
* Enhance DAGTypeLegalizer::run, SoftenFloatResult, and related functions
to keep new f128 type in register, and convert f128 operators to library calls.
* Fix Combiner, Emitter, Legalizer routines that did not handle f128 type.
* Add ExpandConstant to handle i128 constants, ExpandNode
to handle ISD::Constant node.
* Add one more parameter to getCommonSubClass and firstCommonClass,
to guarantee that returned common sub class will contain the specified
simple value type.
This extra parameter is used by EmitCopyFromReg in InstrEmitter.cpp.
* Fix infinite loop in getTypeLegalizationCost when f128 is the value type.
* Fix printOperand to handle null operand.
* Enhance ISD::BITCAST node to handle f128 constant.
* Expand new f128 type for BR_CC, SELECT_CC, SELECT, SETCC nodes.
* Enhance X86AsmPrinter to emit f128 values in comments.
Differential Revision: http://reviews.llvm.org/D15134
llvm-svn: 254653
2015-12-04 06:02:40 +08:00
|
|
|
// Do not loop with f128 type.
|
|
|
|
if (MTy == LK.second)
|
|
|
|
return std::make_pair(Cost, MTy.getSimpleVT());
|
|
|
|
|
2013-01-12 04:05:37 +08:00
|
|
|
// Keep legalizing the type.
|
|
|
|
MTy = LK.second;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-10-15 01:56:00 +08:00
|
|
|
Value *TargetLoweringBase::getDefaultSafeStackPointerLocation(IRBuilder<> &IRB,
|
|
|
|
bool UseTLS) const {
|
|
|
|
// compiler-rt provides a variable with a magic name. Targets that do not
|
|
|
|
// link with compiler-rt may also provide such a variable.
|
|
|
|
Module *M = IRB.GetInsertBlock()->getParent()->getParent();
|
|
|
|
const char *UnsafeStackPtrVar = "__safestack_unsafe_stack_ptr";
|
|
|
|
auto UnsafeStackPtr =
|
|
|
|
dyn_cast_or_null<GlobalVariable>(M->getNamedValue(UnsafeStackPtrVar));
|
|
|
|
|
|
|
|
Type *StackPtrTy = Type::getInt8PtrTy(M->getContext());
|
|
|
|
|
|
|
|
if (!UnsafeStackPtr) {
|
|
|
|
auto TLSModel = UseTLS ?
|
|
|
|
GlobalValue::InitialExecTLSModel :
|
|
|
|
GlobalValue::NotThreadLocal;
|
|
|
|
// The global variable is not defined yet, define it ourselves.
|
|
|
|
// We use the initial-exec TLS model because we do not support the
|
|
|
|
// variable living anywhere other than in the main executable.
|
|
|
|
UnsafeStackPtr = new GlobalVariable(
|
|
|
|
*M, StackPtrTy, false, GlobalValue::ExternalLinkage, nullptr,
|
|
|
|
UnsafeStackPtrVar, nullptr, TLSModel);
|
|
|
|
} else {
|
|
|
|
// The variable exists, check its type and attributes.
|
|
|
|
if (UnsafeStackPtr->getValueType() != StackPtrTy)
|
|
|
|
report_fatal_error(Twine(UnsafeStackPtrVar) + " must have void* type");
|
|
|
|
if (UseTLS != UnsafeStackPtr->isThreadLocal())
|
|
|
|
report_fatal_error(Twine(UnsafeStackPtrVar) + " must " +
|
|
|
|
(UseTLS ? "" : "not ") + "be thread-local");
|
|
|
|
}
|
|
|
|
return UnsafeStackPtr;
|
|
|
|
}
|
|
|
|
|
2015-10-27 02:28:25 +08:00
|
|
|
Value *TargetLoweringBase::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
|
|
|
|
if (!TM.getTargetTriple().isAndroid())
|
2016-10-15 01:56:00 +08:00
|
|
|
return getDefaultSafeStackPointerLocation(IRB, true);
|
2015-10-27 02:28:25 +08:00
|
|
|
|
|
|
|
// Android provides a libc function to retrieve the address of the current
|
|
|
|
// thread's unsafe stack pointer.
|
|
|
|
Module *M = IRB.GetInsertBlock()->getParent()->getParent();
|
|
|
|
Type *StackPtrTy = Type::getInt8PtrTy(M->getContext());
|
[opaque pointer types] Add a FunctionCallee wrapper type, and use it.
Recommit r352791 after tweaking DerivedTypes.h slightly, so that gcc
doesn't choke on it, hopefully.
Original Message:
The FunctionCallee type is effectively a {FunctionType*,Value*} pair,
and is a useful convenience to enable code to continue passing the
result of getOrInsertFunction() through to EmitCall, even once pointer
types lose their pointee-type.
Then:
- update the CallInst/InvokeInst instruction creation functions to
take a Callee,
- modify getOrInsertFunction to return FunctionCallee, and
- update all callers appropriately.
One area of particular note is the change to the sanitizer
code. Previously, they had been casting the result of
`getOrInsertFunction` to a `Function*` via
`checkSanitizerInterfaceFunction`, and storing that. That would report
an error if someone had already inserted a function declaraction with
a mismatching signature.
However, in general, LLVM allows for such mismatches, as
`getOrInsertFunction` will automatically insert a bitcast if
needed. As part of this cleanup, cause the sanitizer code to do the
same. (It will call its functions using the expected signature,
however they may have been declared.)
Finally, in a small number of locations, callers of
`getOrInsertFunction` actually were expecting/requiring that a brand
new function was being created. In such cases, I've switched them to
Function::Create instead.
Differential Revision: https://reviews.llvm.org/D57315
llvm-svn: 352827
2019-02-01 10:28:03 +08:00
|
|
|
FunctionCallee Fn = M->getOrInsertFunction("__safestack_pointer_address",
|
|
|
|
StackPtrTy->getPointerTo(0));
|
2015-10-27 02:28:25 +08:00
|
|
|
return IRB.CreateCall(Fn);
|
|
|
|
}
|
|
|
|
|
2013-01-12 04:05:37 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Loop Strength Reduction hooks
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
/// isLegalAddressingMode - Return true if the addressing mode represented
|
|
|
|
/// by AM is legal for this target, for a load/store of the specified type.
|
2015-07-09 10:09:40 +08:00
|
|
|
bool TargetLoweringBase::isLegalAddressingMode(const DataLayout &DL,
|
|
|
|
const AddrMode &AM, Type *Ty,
|
2017-07-21 19:59:37 +08:00
|
|
|
unsigned AS, Instruction *I) const {
|
2013-01-12 04:05:37 +08:00
|
|
|
// The default implementation of this implements a conservative RISCy, r+r and
|
|
|
|
// r+i addr mode.
|
|
|
|
|
|
|
|
// Allows a sign-extended 16-bit immediate field.
|
|
|
|
if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// No global is ever allowed as a base.
|
|
|
|
if (AM.BaseGV)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Only support r+r,
|
|
|
|
switch (AM.Scale) {
|
|
|
|
case 0: // "r+i" or just "i", depending on HasBaseReg.
|
|
|
|
break;
|
|
|
|
case 1:
|
|
|
|
if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
|
|
|
|
return false;
|
|
|
|
// Otherwise we have r+r or r+i.
|
|
|
|
break;
|
|
|
|
case 2:
|
|
|
|
if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
|
|
|
|
return false;
|
|
|
|
// Allow 2*r as r+r.
|
|
|
|
break;
|
2014-02-15 05:10:34 +08:00
|
|
|
default: // Don't allow n * r
|
|
|
|
return false;
|
2013-01-12 04:05:37 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
2016-04-09 05:26:31 +08:00
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Stack Protector
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
// For OpenBSD return its special guard variable. Otherwise return nullptr,
|
|
|
|
// so that SelectionDAG handle SSP.
|
|
|
|
Value *TargetLoweringBase::getIRStackGuard(IRBuilder<> &IRB) const {
|
|
|
|
if (getTargetMachine().getTargetTriple().isOSOpenBSD()) {
|
|
|
|
Module &M = *IRB.GetInsertBlock()->getParent()->getParent();
|
|
|
|
PointerType *PtrTy = Type::getInt8PtrTy(M.getContext());
|
2016-08-23 02:26:27 +08:00
|
|
|
return M.getOrInsertGlobal("__guard_local", PtrTy);
|
2016-04-09 05:26:31 +08:00
|
|
|
}
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Currently only support "standard" __stack_chk_guard.
|
|
|
|
// TODO: add LOAD_STACK_GUARD support.
|
|
|
|
void TargetLoweringBase::insertSSPDeclarations(Module &M) const {
|
2018-04-21 08:07:46 +08:00
|
|
|
if (!M.getNamedValue("__stack_chk_guard"))
|
|
|
|
new GlobalVariable(M, Type::getInt8PtrTy(M.getContext()), false,
|
|
|
|
GlobalVariable::ExternalLinkage,
|
|
|
|
nullptr, "__stack_chk_guard");
|
2016-04-09 05:26:31 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Currently only support "standard" __stack_chk_guard.
|
|
|
|
// TODO: add LOAD_STACK_GUARD support.
|
2016-04-20 04:14:52 +08:00
|
|
|
Value *TargetLoweringBase::getSDagStackGuard(const Module &M) const {
|
2018-04-21 08:07:46 +08:00
|
|
|
return M.getNamedValue("__stack_chk_guard");
|
2016-04-09 05:26:31 +08:00
|
|
|
}
|
[stack-protection] Add support for MSVC buffer security check
Summary:
This patch is adding support for the MSVC buffer security check implementation
The buffer security check is turned on with the '/GS' compiler switch.
* https://msdn.microsoft.com/en-us/library/8dbf701c.aspx
* To be added to clang here: http://reviews.llvm.org/D20347
Some overview of buffer security check feature and implementation:
* https://msdn.microsoft.com/en-us/library/aa290051(VS.71).aspx
* http://www.ksyash.com/2011/01/buffer-overflow-protection-3/
* http://blog.osom.info/2012/02/understanding-vs-c-compilers-buffer.html
For the following example:
```
int example(int offset, int index) {
char buffer[10];
memset(buffer, 0xCC, index);
return buffer[index];
}
```
The MSVC compiler is adding these instructions to perform stack integrity check:
```
push ebp
mov ebp,esp
sub esp,50h
[1] mov eax,dword ptr [__security_cookie (01068024h)]
[2] xor eax,ebp
[3] mov dword ptr [ebp-4],eax
push ebx
push esi
push edi
mov eax,dword ptr [index]
push eax
push 0CCh
lea ecx,[buffer]
push ecx
call _memset (010610B9h)
add esp,0Ch
mov eax,dword ptr [index]
movsx eax,byte ptr buffer[eax]
pop edi
pop esi
pop ebx
[4] mov ecx,dword ptr [ebp-4]
[5] xor ecx,ebp
[6] call @__security_check_cookie@4 (01061276h)
mov esp,ebp
pop ebp
ret
```
The instrumentation above is:
* [1] is loading the global security canary,
* [3] is storing the local computed ([2]) canary to the guard slot,
* [4] is loading the guard slot and ([5]) re-compute the global canary,
* [6] is validating the resulting canary with the '__security_check_cookie' and performs error handling.
Overview of the current stack-protection implementation:
* lib/CodeGen/StackProtector.cpp
* There is a default stack-protection implementation applied on intermediate representation.
* The target can overload 'getIRStackGuard' method if it has a standard location for the stack protector cookie.
* An intrinsic 'Intrinsic::stackprotector' is added to the prologue. It will be expanded by the instruction selection pass (DAG or Fast).
* Basic Blocks are added to every instrumented function to receive the code for handling stack guard validation and errors handling.
* Guard manipulation and comparison are added directly to the intermediate representation.
* lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
* lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
* There is an implementation that adds instrumentation during instruction selection (for better handling of sibbling calls).
* see long comment above 'class StackProtectorDescriptor' declaration.
* The target needs to override 'getSDagStackGuard' to activate SDAG stack protection generation. (note: getIRStackGuard MUST be nullptr).
* 'getSDagStackGuard' returns the appropriate stack guard (security cookie)
* The code is generated by 'SelectionDAGBuilder.cpp' and 'SelectionDAGISel.cpp'.
* include/llvm/Target/TargetLowering.h
* Contains function to retrieve the default Guard 'Value'; should be overriden by each target to select which implementation is used and provide Guard 'Value'.
* lib/Target/X86/X86ISelLowering.cpp
* Contains the x86 specialisation; Guard 'Value' used by the SelectionDAG algorithm.
Function-based Instrumentation:
* The MSVC doesn't inline the stack guard comparison in every function. Instead, a call to '__security_check_cookie' is added to the epilogue before every return instructions.
* To support function-based instrumentation, this patch is
* adding a function to get the function-based check (llvm 'Value', see include/llvm/Target/TargetLowering.h),
* If provided, the stack protection instrumentation won't be inlined and a call to that function will be added to the prologue.
* modifying (SelectionDAGISel.cpp) do avoid producing basic blocks used for inline instrumentation,
* generating the function-based instrumentation during the ISEL pass (SelectionDAGBuilder.cpp),
* if FastISEL (not SelectionDAG), using the fallback which rely on the same function-based implemented over intermediate representation (StackProtector.cpp).
Modifications
* adding support for MSVC (lib/Target/X86/X86ISelLowering.cpp)
* adding support function-based instrumentation (lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp, .h)
Results
* IR generated instrumentation:
```
clang-cl /GS test.cc /Od /c -mllvm -print-isel-input
```
```
*** Final LLVM Code input to ISel ***
; Function Attrs: nounwind sspstrong
define i32 @"\01?example@@YAHHH@Z"(i32 %offset, i32 %index) #0 {
entry:
%StackGuardSlot = alloca i8* <<<-- Allocated guard slot
%0 = call i8* @llvm.stackguard() <<<-- Loading Stack Guard value
call void @llvm.stackprotector(i8* %0, i8** %StackGuardSlot) <<<-- Prologue intrinsic call (store to Guard slot)
%index.addr = alloca i32, align 4
%offset.addr = alloca i32, align 4
%buffer = alloca [10 x i8], align 1
store i32 %index, i32* %index.addr, align 4
store i32 %offset, i32* %offset.addr, align 4
%arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %buffer, i32 0, i32 0
%1 = load i32, i32* %index.addr, align 4
call void @llvm.memset.p0i8.i32(i8* %arraydecay, i8 -52, i32 %1, i32 1, i1 false)
%2 = load i32, i32* %index.addr, align 4
%arrayidx = getelementptr inbounds [10 x i8], [10 x i8]* %buffer, i32 0, i32 %2
%3 = load i8, i8* %arrayidx, align 1
%conv = sext i8 %3 to i32
%4 = load volatile i8*, i8** %StackGuardSlot <<<-- Loading Guard slot
call void @__security_check_cookie(i8* %4) <<<-- Epilogue function-based check
ret i32 %conv
}
```
* SelectionDAG generated instrumentation:
```
clang-cl /GS test.cc /O1 /c /FA
```
```
"?example@@YAHHH@Z": # @"\01?example@@YAHHH@Z"
# BB#0: # %entry
pushl %esi
subl $16, %esp
movl ___security_cookie, %eax <<<-- Loading Stack Guard value
movl 28(%esp), %esi
movl %eax, 12(%esp) <<<-- Store to Guard slot
leal 2(%esp), %eax
pushl %esi
pushl $204
pushl %eax
calll _memset
addl $12, %esp
movsbl 2(%esp,%esi), %esi
movl 12(%esp), %ecx <<<-- Loading Guard slot
calll @__security_check_cookie@4 <<<-- Epilogue function-based check
movl %esi, %eax
addl $16, %esp
popl %esi
retl
```
Reviewers: kcc, pcc, eugenis, rnk
Subscribers: majnemer, llvm-commits, hans, thakis, rnk
Differential Revision: http://reviews.llvm.org/D20346
llvm-svn: 272053
2016-06-08 04:15:35 +08:00
|
|
|
|
2019-02-02 04:43:25 +08:00
|
|
|
Function *TargetLoweringBase::getSSPStackGuardCheck(const Module &M) const {
|
[stack-protection] Add support for MSVC buffer security check
Summary:
This patch is adding support for the MSVC buffer security check implementation
The buffer security check is turned on with the '/GS' compiler switch.
* https://msdn.microsoft.com/en-us/library/8dbf701c.aspx
* To be added to clang here: http://reviews.llvm.org/D20347
Some overview of buffer security check feature and implementation:
* https://msdn.microsoft.com/en-us/library/aa290051(VS.71).aspx
* http://www.ksyash.com/2011/01/buffer-overflow-protection-3/
* http://blog.osom.info/2012/02/understanding-vs-c-compilers-buffer.html
For the following example:
```
int example(int offset, int index) {
char buffer[10];
memset(buffer, 0xCC, index);
return buffer[index];
}
```
The MSVC compiler is adding these instructions to perform stack integrity check:
```
push ebp
mov ebp,esp
sub esp,50h
[1] mov eax,dword ptr [__security_cookie (01068024h)]
[2] xor eax,ebp
[3] mov dword ptr [ebp-4],eax
push ebx
push esi
push edi
mov eax,dword ptr [index]
push eax
push 0CCh
lea ecx,[buffer]
push ecx
call _memset (010610B9h)
add esp,0Ch
mov eax,dword ptr [index]
movsx eax,byte ptr buffer[eax]
pop edi
pop esi
pop ebx
[4] mov ecx,dword ptr [ebp-4]
[5] xor ecx,ebp
[6] call @__security_check_cookie@4 (01061276h)
mov esp,ebp
pop ebp
ret
```
The instrumentation above is:
* [1] is loading the global security canary,
* [3] is storing the local computed ([2]) canary to the guard slot,
* [4] is loading the guard slot and ([5]) re-compute the global canary,
* [6] is validating the resulting canary with the '__security_check_cookie' and performs error handling.
Overview of the current stack-protection implementation:
* lib/CodeGen/StackProtector.cpp
* There is a default stack-protection implementation applied on intermediate representation.
* The target can overload 'getIRStackGuard' method if it has a standard location for the stack protector cookie.
* An intrinsic 'Intrinsic::stackprotector' is added to the prologue. It will be expanded by the instruction selection pass (DAG or Fast).
* Basic Blocks are added to every instrumented function to receive the code for handling stack guard validation and errors handling.
* Guard manipulation and comparison are added directly to the intermediate representation.
* lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
* lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
* There is an implementation that adds instrumentation during instruction selection (for better handling of sibbling calls).
* see long comment above 'class StackProtectorDescriptor' declaration.
* The target needs to override 'getSDagStackGuard' to activate SDAG stack protection generation. (note: getIRStackGuard MUST be nullptr).
* 'getSDagStackGuard' returns the appropriate stack guard (security cookie)
* The code is generated by 'SelectionDAGBuilder.cpp' and 'SelectionDAGISel.cpp'.
* include/llvm/Target/TargetLowering.h
* Contains function to retrieve the default Guard 'Value'; should be overriden by each target to select which implementation is used and provide Guard 'Value'.
* lib/Target/X86/X86ISelLowering.cpp
* Contains the x86 specialisation; Guard 'Value' used by the SelectionDAG algorithm.
Function-based Instrumentation:
* The MSVC doesn't inline the stack guard comparison in every function. Instead, a call to '__security_check_cookie' is added to the epilogue before every return instructions.
* To support function-based instrumentation, this patch is
* adding a function to get the function-based check (llvm 'Value', see include/llvm/Target/TargetLowering.h),
* If provided, the stack protection instrumentation won't be inlined and a call to that function will be added to the prologue.
* modifying (SelectionDAGISel.cpp) do avoid producing basic blocks used for inline instrumentation,
* generating the function-based instrumentation during the ISEL pass (SelectionDAGBuilder.cpp),
* if FastISEL (not SelectionDAG), using the fallback which rely on the same function-based implemented over intermediate representation (StackProtector.cpp).
Modifications
* adding support for MSVC (lib/Target/X86/X86ISelLowering.cpp)
* adding support function-based instrumentation (lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp, .h)
Results
* IR generated instrumentation:
```
clang-cl /GS test.cc /Od /c -mllvm -print-isel-input
```
```
*** Final LLVM Code input to ISel ***
; Function Attrs: nounwind sspstrong
define i32 @"\01?example@@YAHHH@Z"(i32 %offset, i32 %index) #0 {
entry:
%StackGuardSlot = alloca i8* <<<-- Allocated guard slot
%0 = call i8* @llvm.stackguard() <<<-- Loading Stack Guard value
call void @llvm.stackprotector(i8* %0, i8** %StackGuardSlot) <<<-- Prologue intrinsic call (store to Guard slot)
%index.addr = alloca i32, align 4
%offset.addr = alloca i32, align 4
%buffer = alloca [10 x i8], align 1
store i32 %index, i32* %index.addr, align 4
store i32 %offset, i32* %offset.addr, align 4
%arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %buffer, i32 0, i32 0
%1 = load i32, i32* %index.addr, align 4
call void @llvm.memset.p0i8.i32(i8* %arraydecay, i8 -52, i32 %1, i32 1, i1 false)
%2 = load i32, i32* %index.addr, align 4
%arrayidx = getelementptr inbounds [10 x i8], [10 x i8]* %buffer, i32 0, i32 %2
%3 = load i8, i8* %arrayidx, align 1
%conv = sext i8 %3 to i32
%4 = load volatile i8*, i8** %StackGuardSlot <<<-- Loading Guard slot
call void @__security_check_cookie(i8* %4) <<<-- Epilogue function-based check
ret i32 %conv
}
```
* SelectionDAG generated instrumentation:
```
clang-cl /GS test.cc /O1 /c /FA
```
```
"?example@@YAHHH@Z": # @"\01?example@@YAHHH@Z"
# BB#0: # %entry
pushl %esi
subl $16, %esp
movl ___security_cookie, %eax <<<-- Loading Stack Guard value
movl 28(%esp), %esi
movl %eax, 12(%esp) <<<-- Store to Guard slot
leal 2(%esp), %eax
pushl %esi
pushl $204
pushl %eax
calll _memset
addl $12, %esp
movsbl 2(%esp,%esi), %esi
movl 12(%esp), %ecx <<<-- Loading Guard slot
calll @__security_check_cookie@4 <<<-- Epilogue function-based check
movl %esi, %eax
addl $16, %esp
popl %esi
retl
```
Reviewers: kcc, pcc, eugenis, rnk
Subscribers: majnemer, llvm-commits, hans, thakis, rnk
Differential Revision: http://reviews.llvm.org/D20346
llvm-svn: 272053
2016-06-08 04:15:35 +08:00
|
|
|
return nullptr;
|
|
|
|
}
|
2016-09-26 23:32:33 +08:00
|
|
|
|
2016-10-26 03:53:51 +08:00
|
|
|
unsigned TargetLoweringBase::getMinimumJumpTableEntries() const {
|
|
|
|
return MinimumJumpTableEntries;
|
|
|
|
}
|
|
|
|
|
|
|
|
void TargetLoweringBase::setMinimumJumpTableEntries(unsigned Val) {
|
|
|
|
MinimumJumpTableEntries = Val;
|
|
|
|
}
|
|
|
|
|
Revert r372893 "[CodeGen] Replace -max-jump-table-size with -max-jump-table-targets"
This caused severe compile-time regressions, see PR43455.
> Modern processors predict the targets of an indirect branch regardless of
> the size of any jump table used to glean its target address. Moreover,
> branch predictors typically use resources limited by the number of actual
> targets that occur at run time.
>
> This patch changes the semantics of the option `-max-jump-table-size` to limit
> the number of different targets instead of the number of entries in a jump
> table. Thus, it is now renamed to `-max-jump-table-targets`.
>
> Before, when `-max-jump-table-size` was specified, it could happen that
> cluster jump tables could have targets used repeatedly, but each one was
> counted and typically resulted in tables with the same number of entries.
> With this patch, when specifying `-max-jump-table-targets`, tables may have
> different lengths, since the number of unique targets is counted towards the
> limit, but the number of unique targets in tables is the same, but for the
> last one containing the balance of targets.
>
> Differential revision: https://reviews.llvm.org/D60295
llvm-svn: 373060
2019-09-27 17:54:26 +08:00
|
|
|
unsigned TargetLoweringBase::getMinimumJumpTableDensity(bool OptForSize) const {
|
|
|
|
return OptForSize ? OptsizeJumpTableDensity : JumpTableDensity;
|
[InlineCost] Improve the cost heuristic for Switch
Summary:
The motivation example is like below which has 13 cases but only 2 distinct targets
```
lor.lhs.false2: ; preds = %if.then
switch i32 %Status, label %if.then27 [
i32 -7012, label %if.end35
i32 -10008, label %if.end35
i32 -10016, label %if.end35
i32 15000, label %if.end35
i32 14013, label %if.end35
i32 10114, label %if.end35
i32 10107, label %if.end35
i32 10105, label %if.end35
i32 10013, label %if.end35
i32 10011, label %if.end35
i32 7008, label %if.end35
i32 7007, label %if.end35
i32 5002, label %if.end35
]
```
which is compiled into a balanced binary tree like this on AArch64 (similar on X86)
```
.LBB853_9: // %lor.lhs.false2
mov w8, #10012
cmp w19, w8
b.gt .LBB853_14
// BB#10: // %lor.lhs.false2
mov w8, #5001
cmp w19, w8
b.gt .LBB853_18
// BB#11: // %lor.lhs.false2
mov w8, #-10016
cmp w19, w8
b.eq .LBB853_23
// BB#12: // %lor.lhs.false2
mov w8, #-10008
cmp w19, w8
b.eq .LBB853_23
// BB#13: // %lor.lhs.false2
mov w8, #-7012
cmp w19, w8
b.eq .LBB853_23
b .LBB853_3
.LBB853_14: // %lor.lhs.false2
mov w8, #14012
cmp w19, w8
b.gt .LBB853_21
// BB#15: // %lor.lhs.false2
mov w8, #-10105
add w8, w19, w8
cmp w8, #9 // =9
b.hi .LBB853_17
// BB#16: // %lor.lhs.false2
orr w9, wzr, #0x1
lsl w8, w9, w8
mov w9, #517
and w8, w8, w9
cbnz w8, .LBB853_23
.LBB853_17: // %lor.lhs.false2
mov w8, #10013
cmp w19, w8
b.eq .LBB853_23
b .LBB853_3
.LBB853_18: // %lor.lhs.false2
mov w8, #-7007
add w8, w19, w8
cmp w8, #2 // =2
b.lo .LBB853_23
// BB#19: // %lor.lhs.false2
mov w8, #5002
cmp w19, w8
b.eq .LBB853_23
// BB#20: // %lor.lhs.false2
mov w8, #10011
cmp w19, w8
b.eq .LBB853_23
b .LBB853_3
.LBB853_21: // %lor.lhs.false2
mov w8, #14013
cmp w19, w8
b.eq .LBB853_23
// BB#22: // %lor.lhs.false2
mov w8, #15000
cmp w19, w8
b.ne .LBB853_3
```
However, the inline cost model estimates the cost to be linear with the number
of distinct targets and the cost of the above switch is just 2 InstrCosts.
The function containing this switch is then inlined about 900 times.
This change use the general way of switch lowering for the inline heuristic. It
etimate the number of case clusters with the suitability check for a jump table
or bit test. Considering the binary search tree built for the clusters, this
change modifies the model to be linear with the size of the balanced binary
tree. The model is off by default for now :
-inline-generic-switch-cost=false
This change was originally proposed by Haicheng in D29870.
Reviewers: hans, bmakam, chandlerc, eraman, haicheng, mcrosier
Reviewed By: hans
Subscribers: joerg, aemerson, llvm-commits, rengolin
Differential Revision: https://reviews.llvm.org/D31085
llvm-svn: 301649
2017-04-29 00:04:03 +08:00
|
|
|
}
|
|
|
|
|
Revert r372893 "[CodeGen] Replace -max-jump-table-size with -max-jump-table-targets"
This caused severe compile-time regressions, see PR43455.
> Modern processors predict the targets of an indirect branch regardless of
> the size of any jump table used to glean its target address. Moreover,
> branch predictors typically use resources limited by the number of actual
> targets that occur at run time.
>
> This patch changes the semantics of the option `-max-jump-table-size` to limit
> the number of different targets instead of the number of entries in a jump
> table. Thus, it is now renamed to `-max-jump-table-targets`.
>
> Before, when `-max-jump-table-size` was specified, it could happen that
> cluster jump tables could have targets used repeatedly, but each one was
> counted and typically resulted in tables with the same number of entries.
> With this patch, when specifying `-max-jump-table-targets`, tables may have
> different lengths, since the number of unique targets is counted towards the
> limit, but the number of unique targets in tables is the same, but for the
> last one containing the balance of targets.
>
> Differential revision: https://reviews.llvm.org/D60295
llvm-svn: 373060
2019-09-27 17:54:26 +08:00
|
|
|
unsigned TargetLoweringBase::getMaximumJumpTableSize() const {
|
|
|
|
return MaximumJumpTableSize;
|
2016-09-26 23:32:33 +08:00
|
|
|
}
|
|
|
|
|
Revert r372893 "[CodeGen] Replace -max-jump-table-size with -max-jump-table-targets"
This caused severe compile-time regressions, see PR43455.
> Modern processors predict the targets of an indirect branch regardless of
> the size of any jump table used to glean its target address. Moreover,
> branch predictors typically use resources limited by the number of actual
> targets that occur at run time.
>
> This patch changes the semantics of the option `-max-jump-table-size` to limit
> the number of different targets instead of the number of entries in a jump
> table. Thus, it is now renamed to `-max-jump-table-targets`.
>
> Before, when `-max-jump-table-size` was specified, it could happen that
> cluster jump tables could have targets used repeatedly, but each one was
> counted and typically resulted in tables with the same number of entries.
> With this patch, when specifying `-max-jump-table-targets`, tables may have
> different lengths, since the number of unique targets is counted towards the
> limit, but the number of unique targets in tables is the same, but for the
> last one containing the balance of targets.
>
> Differential revision: https://reviews.llvm.org/D60295
llvm-svn: 373060
2019-09-27 17:54:26 +08:00
|
|
|
void TargetLoweringBase::setMaximumJumpTableSize(unsigned Val) {
|
|
|
|
MaximumJumpTableSize = Val;
|
2016-09-26 23:32:33 +08:00
|
|
|
}
|
2016-10-21 00:55:45 +08:00
|
|
|
|
2020-05-22 20:18:06 +08:00
|
|
|
bool TargetLoweringBase::isJumpTableRelative() const {
|
|
|
|
return getTargetMachine().isPositionIndependent();
|
|
|
|
}
|
|
|
|
|
2016-10-21 00:55:45 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Reciprocal Estimates
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
/// Get the reciprocal estimate attribute string for a function that will
|
|
|
|
/// override the target defaults.
|
|
|
|
static StringRef getRecipEstimateForFunc(MachineFunction &MF) {
|
2017-12-16 06:22:58 +08:00
|
|
|
const Function &F = MF.getFunction();
|
|
|
|
return F.getFnAttribute("reciprocal-estimates").getValueAsString();
|
2016-10-21 00:55:45 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Construct a string for the given reciprocal operation of the given type.
|
|
|
|
/// This string should match the corresponding option to the front-end's
|
|
|
|
/// "-mrecip" flag assuming those strings have been passed through in an
|
|
|
|
/// attribute string. For example, "vec-divf" for a division of a vXf32.
|
|
|
|
static std::string getReciprocalOpName(bool IsSqrt, EVT VT) {
|
|
|
|
std::string Name = VT.isVector() ? "vec-" : "";
|
|
|
|
|
|
|
|
Name += IsSqrt ? "sqrt" : "div";
|
|
|
|
|
|
|
|
// TODO: Handle "half" or other float types?
|
|
|
|
if (VT.getScalarType() == MVT::f64) {
|
|
|
|
Name += "d";
|
|
|
|
} else {
|
|
|
|
assert(VT.getScalarType() == MVT::f32 &&
|
|
|
|
"Unexpected FP type for reciprocal estimate");
|
|
|
|
Name += "f";
|
|
|
|
}
|
|
|
|
|
|
|
|
return Name;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Return the character position and value (a single numeric character) of a
|
|
|
|
/// customized refinement operation in the input string if it exists. Return
|
|
|
|
/// false if there is no customized refinement step count.
|
|
|
|
static bool parseRefinementStep(StringRef In, size_t &Position,
|
|
|
|
uint8_t &Value) {
|
|
|
|
const char RefStepToken = ':';
|
|
|
|
Position = In.find(RefStepToken);
|
|
|
|
if (Position == StringRef::npos)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
StringRef RefStepString = In.substr(Position + 1);
|
|
|
|
// Allow exactly one numeric character for the additional refinement
|
|
|
|
// step parameter.
|
|
|
|
if (RefStepString.size() == 1) {
|
|
|
|
char RefStepChar = RefStepString[0];
|
|
|
|
if (RefStepChar >= '0' && RefStepChar <= '9') {
|
|
|
|
Value = RefStepChar - '0';
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
report_fatal_error("Invalid refinement step for -recip.");
|
|
|
|
}
|
|
|
|
|
|
|
|
/// For the input attribute string, return one of the ReciprocalEstimate enum
|
|
|
|
/// status values (enabled, disabled, or not specified) for this operation on
|
|
|
|
/// the specified data type.
|
|
|
|
static int getOpEnabled(bool IsSqrt, EVT VT, StringRef Override) {
|
|
|
|
if (Override.empty())
|
|
|
|
return TargetLoweringBase::ReciprocalEstimate::Unspecified;
|
|
|
|
|
|
|
|
SmallVector<StringRef, 4> OverrideVector;
|
2018-05-07 09:32:18 +08:00
|
|
|
Override.split(OverrideVector, ',');
|
2016-10-21 00:55:45 +08:00
|
|
|
unsigned NumArgs = OverrideVector.size();
|
|
|
|
|
|
|
|
// Check if "all", "none", or "default" was specified.
|
|
|
|
if (NumArgs == 1) {
|
|
|
|
// Look for an optional setting of the number of refinement steps needed
|
|
|
|
// for this type of reciprocal operation.
|
|
|
|
size_t RefPos;
|
|
|
|
uint8_t RefSteps;
|
|
|
|
if (parseRefinementStep(Override, RefPos, RefSteps)) {
|
|
|
|
// Split the string for further processing.
|
|
|
|
Override = Override.substr(0, RefPos);
|
|
|
|
}
|
|
|
|
|
|
|
|
// All reciprocal types are enabled.
|
|
|
|
if (Override == "all")
|
|
|
|
return TargetLoweringBase::ReciprocalEstimate::Enabled;
|
|
|
|
|
|
|
|
// All reciprocal types are disabled.
|
|
|
|
if (Override == "none")
|
|
|
|
return TargetLoweringBase::ReciprocalEstimate::Disabled;
|
|
|
|
|
|
|
|
// Target defaults for enablement are used.
|
|
|
|
if (Override == "default")
|
|
|
|
return TargetLoweringBase::ReciprocalEstimate::Unspecified;
|
|
|
|
}
|
|
|
|
|
|
|
|
// The attribute string may omit the size suffix ('f'/'d').
|
|
|
|
std::string VTName = getReciprocalOpName(IsSqrt, VT);
|
|
|
|
std::string VTNameNoSize = VTName;
|
2016-10-21 22:58:30 +08:00
|
|
|
VTNameNoSize.pop_back();
|
2016-10-21 00:55:45 +08:00
|
|
|
static const char DisabledPrefix = '!';
|
|
|
|
|
|
|
|
for (StringRef RecipType : OverrideVector) {
|
|
|
|
size_t RefPos;
|
|
|
|
uint8_t RefSteps;
|
|
|
|
if (parseRefinementStep(RecipType, RefPos, RefSteps))
|
|
|
|
RecipType = RecipType.substr(0, RefPos);
|
|
|
|
|
|
|
|
// Ignore the disablement token for string matching.
|
|
|
|
bool IsDisabled = RecipType[0] == DisabledPrefix;
|
|
|
|
if (IsDisabled)
|
|
|
|
RecipType = RecipType.substr(1);
|
|
|
|
|
|
|
|
if (RecipType.equals(VTName) || RecipType.equals(VTNameNoSize))
|
|
|
|
return IsDisabled ? TargetLoweringBase::ReciprocalEstimate::Disabled
|
|
|
|
: TargetLoweringBase::ReciprocalEstimate::Enabled;
|
|
|
|
}
|
|
|
|
|
|
|
|
return TargetLoweringBase::ReciprocalEstimate::Unspecified;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// For the input attribute string, return the customized refinement step count
|
|
|
|
/// for this operation on the specified data type. If the step count does not
|
|
|
|
/// exist, return the ReciprocalEstimate enum value for unspecified.
|
|
|
|
static int getOpRefinementSteps(bool IsSqrt, EVT VT, StringRef Override) {
|
|
|
|
if (Override.empty())
|
|
|
|
return TargetLoweringBase::ReciprocalEstimate::Unspecified;
|
|
|
|
|
|
|
|
SmallVector<StringRef, 4> OverrideVector;
|
2018-05-07 09:32:18 +08:00
|
|
|
Override.split(OverrideVector, ',');
|
2016-10-21 00:55:45 +08:00
|
|
|
unsigned NumArgs = OverrideVector.size();
|
|
|
|
|
|
|
|
// Check if "all", "default", or "none" was specified.
|
|
|
|
if (NumArgs == 1) {
|
|
|
|
// Look for an optional setting of the number of refinement steps needed
|
|
|
|
// for this type of reciprocal operation.
|
|
|
|
size_t RefPos;
|
|
|
|
uint8_t RefSteps;
|
|
|
|
if (!parseRefinementStep(Override, RefPos, RefSteps))
|
|
|
|
return TargetLoweringBase::ReciprocalEstimate::Unspecified;
|
|
|
|
|
|
|
|
// Split the string for further processing.
|
|
|
|
Override = Override.substr(0, RefPos);
|
|
|
|
assert(Override != "none" &&
|
|
|
|
"Disabled reciprocals, but specifed refinement steps?");
|
|
|
|
|
|
|
|
// If this is a general override, return the specified number of steps.
|
|
|
|
if (Override == "all" || Override == "default")
|
|
|
|
return RefSteps;
|
|
|
|
}
|
|
|
|
|
|
|
|
// The attribute string may omit the size suffix ('f'/'d').
|
|
|
|
std::string VTName = getReciprocalOpName(IsSqrt, VT);
|
|
|
|
std::string VTNameNoSize = VTName;
|
2016-10-21 22:58:30 +08:00
|
|
|
VTNameNoSize.pop_back();
|
2016-10-21 00:55:45 +08:00
|
|
|
|
|
|
|
for (StringRef RecipType : OverrideVector) {
|
|
|
|
size_t RefPos;
|
|
|
|
uint8_t RefSteps;
|
|
|
|
if (!parseRefinementStep(RecipType, RefPos, RefSteps))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
RecipType = RecipType.substr(0, RefPos);
|
|
|
|
if (RecipType.equals(VTName) || RecipType.equals(VTNameNoSize))
|
|
|
|
return RefSteps;
|
|
|
|
}
|
|
|
|
|
|
|
|
return TargetLoweringBase::ReciprocalEstimate::Unspecified;
|
|
|
|
}
|
|
|
|
|
|
|
|
int TargetLoweringBase::getRecipEstimateSqrtEnabled(EVT VT,
|
|
|
|
MachineFunction &MF) const {
|
|
|
|
return getOpEnabled(true, VT, getRecipEstimateForFunc(MF));
|
|
|
|
}
|
|
|
|
|
|
|
|
int TargetLoweringBase::getRecipEstimateDivEnabled(EVT VT,
|
|
|
|
MachineFunction &MF) const {
|
|
|
|
return getOpEnabled(false, VT, getRecipEstimateForFunc(MF));
|
|
|
|
}
|
|
|
|
|
|
|
|
int TargetLoweringBase::getSqrtRefinementSteps(EVT VT,
|
|
|
|
MachineFunction &MF) const {
|
|
|
|
return getOpRefinementSteps(true, VT, getRecipEstimateForFunc(MF));
|
|
|
|
}
|
|
|
|
|
|
|
|
int TargetLoweringBase::getDivRefinementSteps(EVT VT,
|
|
|
|
MachineFunction &MF) const {
|
|
|
|
return getOpRefinementSteps(false, VT, getRecipEstimateForFunc(MF));
|
|
|
|
}
|
2017-04-29 04:25:05 +08:00
|
|
|
|
|
|
|
void TargetLoweringBase::finalizeLowering(MachineFunction &MF) const {
|
|
|
|
MF.getRegInfo().freezeReservedRegs(MF);
|
|
|
|
}
|
2020-01-13 03:10:42 +08:00
|
|
|
|
|
|
|
MachineMemOperand::Flags
|
|
|
|
TargetLoweringBase::getLoadMemOperandFlags(const LoadInst &LI,
|
|
|
|
const DataLayout &DL) const {
|
|
|
|
MachineMemOperand::Flags Flags = MachineMemOperand::MOLoad;
|
|
|
|
if (LI.isVolatile())
|
|
|
|
Flags |= MachineMemOperand::MOVolatile;
|
|
|
|
|
|
|
|
if (LI.hasMetadata(LLVMContext::MD_nontemporal))
|
|
|
|
Flags |= MachineMemOperand::MONonTemporal;
|
|
|
|
|
|
|
|
if (LI.hasMetadata(LLVMContext::MD_invariant_load))
|
|
|
|
Flags |= MachineMemOperand::MOInvariant;
|
|
|
|
|
|
|
|
if (isDereferenceablePointer(LI.getPointerOperand(), LI.getType(), DL))
|
|
|
|
Flags |= MachineMemOperand::MODereferenceable;
|
|
|
|
|
|
|
|
Flags |= getTargetMMOFlags(LI);
|
|
|
|
return Flags;
|
|
|
|
}
|
|
|
|
|
|
|
|
MachineMemOperand::Flags
|
|
|
|
TargetLoweringBase::getStoreMemOperandFlags(const StoreInst &SI,
|
|
|
|
const DataLayout &DL) const {
|
|
|
|
MachineMemOperand::Flags Flags = MachineMemOperand::MOStore;
|
|
|
|
|
|
|
|
if (SI.isVolatile())
|
|
|
|
Flags |= MachineMemOperand::MOVolatile;
|
|
|
|
|
|
|
|
if (SI.hasMetadata(LLVMContext::MD_nontemporal))
|
|
|
|
Flags |= MachineMemOperand::MONonTemporal;
|
|
|
|
|
|
|
|
// FIXME: Not preserving dereferenceable
|
|
|
|
Flags |= getTargetMMOFlags(SI);
|
|
|
|
return Flags;
|
|
|
|
}
|
2020-01-13 04:54:09 +08:00
|
|
|
|
|
|
|
MachineMemOperand::Flags
|
|
|
|
TargetLoweringBase::getAtomicMemOperandFlags(const Instruction &AI,
|
|
|
|
const DataLayout &DL) const {
|
|
|
|
auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
|
|
|
|
|
|
|
|
if (const AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(&AI)) {
|
|
|
|
if (RMW->isVolatile())
|
|
|
|
Flags |= MachineMemOperand::MOVolatile;
|
|
|
|
} else if (const AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(&AI)) {
|
|
|
|
if (CmpX->isVolatile())
|
|
|
|
Flags |= MachineMemOperand::MOVolatile;
|
|
|
|
} else
|
|
|
|
llvm_unreachable("not an atomic instruction");
|
|
|
|
|
|
|
|
// FIXME: Not preserving dereferenceable
|
|
|
|
Flags |= getTargetMMOFlags(AI);
|
|
|
|
return Flags;
|
|
|
|
}
|
2020-03-03 01:15:40 +08:00
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// GlobalISel Hooks
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
bool TargetLoweringBase::shouldLocalize(const MachineInstr &MI,
|
|
|
|
const TargetTransformInfo *TTI) const {
|
|
|
|
auto &MF = *MI.getMF();
|
|
|
|
auto &MRI = MF.getRegInfo();
|
|
|
|
// Assuming a spill and reload of a value has a cost of 1 instruction each,
|
|
|
|
// this helper function computes the maximum number of uses we should consider
|
|
|
|
// for remat. E.g. on arm64 global addresses take 2 insts to materialize. We
|
|
|
|
// break even in terms of code size when the original MI has 2 users vs
|
|
|
|
// choosing to potentially spill. Any more than 2 users we we have a net code
|
|
|
|
// size increase. This doesn't take into account register pressure though.
|
|
|
|
auto maxUses = [](unsigned RematCost) {
|
|
|
|
// A cost of 1 means remats are basically free.
|
|
|
|
if (RematCost == 1)
|
|
|
|
return UINT_MAX;
|
|
|
|
if (RematCost == 2)
|
|
|
|
return 2U;
|
|
|
|
|
|
|
|
// Remat is too expensive, only sink if there's one user.
|
|
|
|
if (RematCost > 2)
|
|
|
|
return 1U;
|
|
|
|
llvm_unreachable("Unexpected remat cost");
|
|
|
|
};
|
|
|
|
|
|
|
|
// Helper to walk through uses and terminate if we've reached a limit. Saves
|
|
|
|
// us spending time traversing uses if all we want to know is if it's >= min.
|
|
|
|
auto isUsesAtMost = [&](unsigned Reg, unsigned MaxUses) {
|
|
|
|
unsigned NumUses = 0;
|
|
|
|
auto UI = MRI.use_instr_nodbg_begin(Reg), UE = MRI.use_instr_nodbg_end();
|
|
|
|
for (; UI != UE && NumUses < MaxUses; ++UI) {
|
|
|
|
NumUses++;
|
|
|
|
}
|
|
|
|
// If we haven't reached the end yet then there are more than MaxUses users.
|
|
|
|
return UI == UE;
|
|
|
|
};
|
|
|
|
|
|
|
|
switch (MI.getOpcode()) {
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
// Constants-like instructions should be close to their users.
|
|
|
|
// We don't want long live-ranges for them.
|
|
|
|
case TargetOpcode::G_CONSTANT:
|
|
|
|
case TargetOpcode::G_FCONSTANT:
|
|
|
|
case TargetOpcode::G_FRAME_INDEX:
|
|
|
|
case TargetOpcode::G_INTTOPTR:
|
|
|
|
return true;
|
|
|
|
case TargetOpcode::G_GLOBAL_VALUE: {
|
|
|
|
unsigned RematCost = TTI->getGISelRematGlobalCost();
|
|
|
|
Register Reg = MI.getOperand(0).getReg();
|
|
|
|
unsigned MaxUses = maxUses(RematCost);
|
|
|
|
if (MaxUses == UINT_MAX)
|
|
|
|
return true; // Remats are "free" so always localize.
|
|
|
|
bool B = isUsesAtMost(Reg, MaxUses);
|
|
|
|
return B;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|