2014-05-24 20:50:23 +08:00
|
|
|
//==-- AArch64.h - Top-level interface for AArch64 --------------*- C++ -*-==//
|
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2014-05-24 20:50:23 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This file contains the entry points for global functions defined in the LLVM
|
|
|
|
// AArch64 back-end.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2014-08-14 00:26:38 +08:00
|
|
|
#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64_H
|
|
|
|
#define LLVM_LIB_TARGET_AARCH64_AARCH64_H
|
2014-05-24 20:50:23 +08:00
|
|
|
|
|
|
|
#include "MCTargetDesc/AArch64MCTargetDesc.h"
|
2014-07-25 19:42:14 +08:00
|
|
|
#include "Utils/AArch64BaseInfo.h"
|
2014-05-24 20:50:23 +08:00
|
|
|
#include "llvm/Support/DataTypes.h"
|
2014-07-25 19:42:14 +08:00
|
|
|
#include "llvm/Target/TargetMachine.h"
|
2014-05-24 20:50:23 +08:00
|
|
|
|
|
|
|
namespace llvm {
|
|
|
|
|
2017-04-06 17:49:34 +08:00
|
|
|
class AArch64RegisterBankInfo;
|
|
|
|
class AArch64Subtarget;
|
2014-05-24 20:50:23 +08:00
|
|
|
class AArch64TargetMachine;
|
|
|
|
class FunctionPass;
|
2017-04-06 17:49:34 +08:00
|
|
|
class InstructionSelector;
|
2014-05-24 20:50:23 +08:00
|
|
|
class MachineFunctionPass;
|
|
|
|
|
|
|
|
FunctionPass *createAArch64DeadRegisterDefinitions();
|
[AArch64] Add pass to remove redundant copy after RA
Summary:
This change will add a pass to remove unnecessary zero copies in target blocks
of cbz/cbnz instructions. E.g., the copy instruction in the code below can be
removed because the cbz jumps to BB1 when x0 is zero :
BB0:
cbz x0, .BB1
BB1:
mov x0, xzr
Jun
Reviewers: gberry, jmolloy, HaoLiu, MatzeB, mcrosier
Subscribers: mcrosier, mssimpso, haicheng, bmakam, llvm-commits, aemerson, rengolin
Differential Revision: http://reviews.llvm.org/D16203
llvm-svn: 261004
2016-02-17 04:02:39 +08:00
|
|
|
FunctionPass *createAArch64RedundantCopyEliminationPass();
|
[AArch64] Prefer Bcc to CBZ/CBNZ/TBZ/TBNZ when NZCV flags can be set for "free".
This patch contains a pass that transforms CBZ/CBNZ/TBZ/TBNZ instructions into a
conditional branch (Bcc), when the NZCV flags can be set for "free". This is
preferred on targets that have more flexibility when scheduling Bcc
instructions as compared to CBZ/CBNZ/TBZ/TBNZ (assuming all other variables are
equal). This can reduce register pressure and is also the default behavior for
GCC.
A few examples:
add w8, w0, w1 -> cmn w0, w1 ; CMN is an alias of ADDS.
cbz w8, .LBB_2 -> b.eq .LBB0_2 ; single def/use of w8 removed.
add w8, w0, w1 -> adds w8, w0, w1 ; w8 has multiple uses.
cbz w8, .LBB1_2 -> b.eq .LBB1_2
sub w8, w0, w1 -> subs w8, w0, w1 ; w8 has multiple uses.
tbz w8, #31, .LBB6_2 -> b.ge .LBB6_2
In looking at all current sub-target machine descriptions, this transformation
appears to be either positive or neutral.
Differential Revision: https://reviews.llvm.org/D34220.
llvm-svn: 306144
2017-06-24 03:20:12 +08:00
|
|
|
FunctionPass *createAArch64CondBrTuning();
|
2018-10-25 04:19:09 +08:00
|
|
|
FunctionPass *createAArch64CompressJumpTablesPass();
|
2014-05-24 20:50:23 +08:00
|
|
|
FunctionPass *createAArch64ConditionalCompares();
|
|
|
|
FunctionPass *createAArch64AdvSIMDScalar();
|
|
|
|
FunctionPass *createAArch64ISelDag(AArch64TargetMachine &TM,
|
|
|
|
CodeGenOpt::Level OptLevel);
|
|
|
|
FunctionPass *createAArch64StorePairSuppressPass();
|
|
|
|
FunctionPass *createAArch64ExpandPseudoPass();
|
Introduce control flow speculation tracking pass for AArch64
The pass implements tracking of control flow miss-speculation into a "taint"
register. That taint register can then be used to mask off registers with
sensitive data when executing under miss-speculation, a.k.a. "transient
execution".
This pass is aimed at mitigating against SpectreV1-style vulnarabilities.
At the moment, it implements the tracking of miss-speculation of control
flow into a taint register, but doesn't implement a mechanism yet to then
use that taint register to mask off vulnerable data in registers (something
for a follow-on improvement). Possible strategies to mask out vulnerable
data that can be implemented on top of this are:
- speculative load hardening to automatically mask of data loaded
in registers.
- using intrinsics to mask of data in registers as indicated by the
programmer (see https://lwn.net/Articles/759423/).
For AArch64, the following implementation choices are made.
Some of these are different than the implementation choices made in
the similar pass implemented in X86SpeculativeLoadHardening.cpp, as
the instruction set characteristics result in different trade-offs.
- The speculation hardening is done after register allocation. With a
relative abundance of registers, one register is reserved (X16) to be
the taint register. X16 is expected to not clash with other register
reservation mechanisms with very high probability because:
. The AArch64 ABI doesn't guarantee X16 to be retained across any call.
. The only way to request X16 to be used as a programmer is through
inline assembly. In the rare case a function explicitly demands to
use X16/W16, this pass falls back to hardening against speculation
by inserting a DSB SYS/ISB barrier pair which will prevent control
flow speculation.
- It is easy to insert mask operations at this late stage as we have
mask operations available that don't set flags.
- The taint variable contains all-ones when no miss-speculation is detected,
and contains all-zeros when miss-speculation is detected. Therefore, when
masking, an AND instruction (which only changes the register to be masked,
no other side effects) can easily be inserted anywhere that's needed.
- The tracking of miss-speculation is done by using a data-flow conditional
select instruction (CSEL) to evaluate the flags that were also used to
make conditional branch direction decisions. Speculation of the CSEL
instruction can be limited with a CSDB instruction - so the combination of
CSEL + a later CSDB gives the guarantee that the flags as used in the CSEL
aren't speculated. When conditional branch direction gets miss-speculated,
the semantics of the inserted CSEL instruction is such that the taint
register will contain all zero bits.
One key requirement for this to work is that the conditional branch is
followed by an execution of the CSEL instruction, where the CSEL
instruction needs to use the same flags status as the conditional branch.
This means that the conditional branches must not be implemented as one
of the AArch64 conditional branches that do not use the flags as input
(CB(N)Z and TB(N)Z). This is implemented by ensuring in the instruction
selectors to not produce these instructions when speculation hardening
is enabled. This pass will assert if it does encounter such an instruction.
- On function call boundaries, the miss-speculation state is transferred from
the taint register X16 to be encoded in the SP register as value 0.
Future extensions/improvements could be:
- Implement this functionality using full speculation barriers, akin to the
x86-slh-lfence option. This may be more useful for the intrinsics-based
approach than for the SLH approach to masking.
Note that this pass already inserts the full speculation barriers if the
function for some niche reason makes use of X16/W16.
- no indirect branch misprediction gets protected/instrumented; but this
could be done for some indirect branches, such as switch jump tables.
Differential Revision: https://reviews.llvm.org/D54896
llvm-svn: 349456
2018-12-18 16:50:02 +08:00
|
|
|
FunctionPass *createAArch64SpeculationHardeningPass();
|
2014-05-24 20:50:23 +08:00
|
|
|
FunctionPass *createAArch64LoadStoreOptimizationPass();
|
2017-12-08 08:58:49 +08:00
|
|
|
FunctionPass *createAArch64SIMDInstrOptPass();
|
2014-05-24 20:50:23 +08:00
|
|
|
ModulePass *createAArch64PromoteConstantPass();
|
2014-09-05 10:55:24 +08:00
|
|
|
FunctionPass *createAArch64ConditionOptimizerPass();
|
2014-08-08 20:33:21 +08:00
|
|
|
FunctionPass *createAArch64A57FPLoadBalancing();
|
2014-10-13 18:12:35 +08:00
|
|
|
FunctionPass *createAArch64A53Fix835769();
|
2017-07-19 00:14:22 +08:00
|
|
|
FunctionPass *createFalkorHWPFFixPass();
|
2017-07-15 05:44:12 +08:00
|
|
|
FunctionPass *createFalkorMarkStridedAccessesPass();
|
[AArch64][v8.5A] Branch Target Identification code-generation pass
The Branch Target Identification extension, introduced to AArch64 in
Armv8.5-A, adds the BTI instruction, which is used to mark valid targets
of indirect branches. When enabled, the processor will trap if an
instruction in a protected page tries to perform an indirect branch to
any instruction other than a BTI. The BTI instruction uses encodings
which were NOPs in earlier versions of the architecture, so BTI-enabled
code will still run on earlier hardware, just without the extra
protection.
There are 3 variants of the BTI instruction, which are valid targets for
different kinds or branches:
- BTI C can be targeted by call instructions, and is inteneded to be
used at function entry points. These are the BLR instruction, as well
as BR with x16 or x17. These BR instructions are allowed for use in
PLT entries, and we can also use them to allow indirect tail-calls.
- BTI J can be targeted by BR only, and is intended to be used by jump
tables.
- BTI JC acts ab both a BTI C and a BTI J instruction, and can be
targeted by any BLR or BR instruction.
Note that RET instructions are not restricted by branch target
identification, the reason for this is that return addresses can be
protected more effectively using return address signing. Direct branches
and calls are also unaffected, as it is assumed that an attacker cannot
modify executable pages (if they could, they wouldn't need to do a
ROP/JOP attack).
This patch adds a MachineFunctionPass which:
- Adds a BTI C at the start of every function which could be indirectly
called (either because it is address-taken, or externally visible so
could be address-taken in another translation unit).
- Adds a BTI J at the start of every basic block which could be
indirectly branched to. This could be either done by a jump table, or
by taking the address of the block (e.g. the using GCC label values
extension).
We only need to use BTI JC when a function is indirectly-callable, and
takes the address of the entry block. I've not been able to trigger this
from C or IR, but I've included a MIR test just in case.
Using BTI C at function entries relies on the fact that no other code in
BTI-protected pages uses indirect tail-calls, unless they use x16 or x17
to hold the address. I'll add that code-generation restriction as a
separate patch.
Differential revision: https://reviews.llvm.org/D52867
llvm-svn: 343967
2018-10-08 22:04:24 +08:00
|
|
|
FunctionPass *createAArch64BranchTargetsPass();
|
2014-05-24 20:50:23 +08:00
|
|
|
|
|
|
|
FunctionPass *createAArch64CleanupLocalDynamicTLSPass();
|
|
|
|
|
|
|
|
FunctionPass *createAArch64CollectLOHPass();
|
2017-04-06 17:49:34 +08:00
|
|
|
InstructionSelector *
|
|
|
|
createAArch64InstructionSelector(const AArch64TargetMachine &,
|
|
|
|
AArch64Subtarget &, AArch64RegisterBankInfo &);
|
Re-commit: [globalisel] Add a combiner helpers for extending loads and use them in a pre-legalize combiner for AArch64
Summary: Depends on D45541
Reviewers: ab, aditya_nandakumar, bogner, rtereshin, volkan, rovka, javed.absar, aemerson
Subscribers: aemerson, rengolin, mgorny, javed.absar, kristof.beyls, llvm-commits
Differential Revision: https://reviews.llvm.org/D45543
The previous commit failed portions of the test-suite on GreenDragon due to
duplicate COPY instructions and iterator invalidation. Both issues have now
been fixed. To assist with this, a helper (cloneVirtualRegister) has been added
to MachineRegisterInfo that can be used to get another register that has the same
type and class/bank as an existing one.
llvm-svn: 343654
2018-10-03 10:12:17 +08:00
|
|
|
FunctionPass *createAArch64PreLegalizeCombiner();
|
2016-04-02 07:14:52 +08:00
|
|
|
|
2016-08-01 13:56:57 +08:00
|
|
|
void initializeAArch64A53Fix835769Pass(PassRegistry&);
|
|
|
|
void initializeAArch64A57FPLoadBalancingPass(PassRegistry&);
|
|
|
|
void initializeAArch64AdvSIMDScalarPass(PassRegistry&);
|
[AArch64][v8.5A] Branch Target Identification code-generation pass
The Branch Target Identification extension, introduced to AArch64 in
Armv8.5-A, adds the BTI instruction, which is used to mark valid targets
of indirect branches. When enabled, the processor will trap if an
instruction in a protected page tries to perform an indirect branch to
any instruction other than a BTI. The BTI instruction uses encodings
which were NOPs in earlier versions of the architecture, so BTI-enabled
code will still run on earlier hardware, just without the extra
protection.
There are 3 variants of the BTI instruction, which are valid targets for
different kinds or branches:
- BTI C can be targeted by call instructions, and is inteneded to be
used at function entry points. These are the BLR instruction, as well
as BR with x16 or x17. These BR instructions are allowed for use in
PLT entries, and we can also use them to allow indirect tail-calls.
- BTI J can be targeted by BR only, and is intended to be used by jump
tables.
- BTI JC acts ab both a BTI C and a BTI J instruction, and can be
targeted by any BLR or BR instruction.
Note that RET instructions are not restricted by branch target
identification, the reason for this is that return addresses can be
protected more effectively using return address signing. Direct branches
and calls are also unaffected, as it is assumed that an attacker cannot
modify executable pages (if they could, they wouldn't need to do a
ROP/JOP attack).
This patch adds a MachineFunctionPass which:
- Adds a BTI C at the start of every function which could be indirectly
called (either because it is address-taken, or externally visible so
could be address-taken in another translation unit).
- Adds a BTI J at the start of every basic block which could be
indirectly branched to. This could be either done by a jump table, or
by taking the address of the block (e.g. the using GCC label values
extension).
We only need to use BTI JC when a function is indirectly-callable, and
takes the address of the entry block. I've not been able to trigger this
from C or IR, but I've included a MIR test just in case.
Using BTI C at function entries relies on the fact that no other code in
BTI-protected pages uses indirect tail-calls, unless they use x16 or x17
to hold the address. I'll add that code-generation restriction as a
separate patch.
Differential revision: https://reviews.llvm.org/D52867
llvm-svn: 343967
2018-10-08 22:04:24 +08:00
|
|
|
void initializeAArch64BranchTargetsPass(PassRegistry&);
|
2016-08-01 13:56:57 +08:00
|
|
|
void initializeAArch64CollectLOHPass(PassRegistry&);
|
[AArch64] Prefer Bcc to CBZ/CBNZ/TBZ/TBNZ when NZCV flags can be set for "free".
This patch contains a pass that transforms CBZ/CBNZ/TBZ/TBNZ instructions into a
conditional branch (Bcc), when the NZCV flags can be set for "free". This is
preferred on targets that have more flexibility when scheduling Bcc
instructions as compared to CBZ/CBNZ/TBZ/TBNZ (assuming all other variables are
equal). This can reduce register pressure and is also the default behavior for
GCC.
A few examples:
add w8, w0, w1 -> cmn w0, w1 ; CMN is an alias of ADDS.
cbz w8, .LBB_2 -> b.eq .LBB0_2 ; single def/use of w8 removed.
add w8, w0, w1 -> adds w8, w0, w1 ; w8 has multiple uses.
cbz w8, .LBB1_2 -> b.eq .LBB1_2
sub w8, w0, w1 -> subs w8, w0, w1 ; w8 has multiple uses.
tbz w8, #31, .LBB6_2 -> b.ge .LBB6_2
In looking at all current sub-target machine descriptions, this transformation
appears to be either positive or neutral.
Differential Revision: https://reviews.llvm.org/D34220.
llvm-svn: 306144
2017-06-24 03:20:12 +08:00
|
|
|
void initializeAArch64CondBrTuningPass(PassRegistry &);
|
2018-10-25 04:19:09 +08:00
|
|
|
void initializeAArch64CompressJumpTablesPass(PassRegistry&);
|
2016-08-01 13:56:57 +08:00
|
|
|
void initializeAArch64ConditionalComparesPass(PassRegistry&);
|
|
|
|
void initializeAArch64ConditionOptimizerPass(PassRegistry&);
|
|
|
|
void initializeAArch64DeadRegisterDefinitionsPass(PassRegistry&);
|
2016-04-02 07:14:52 +08:00
|
|
|
void initializeAArch64ExpandPseudoPass(PassRegistry&);
|
Introduce control flow speculation tracking pass for AArch64
The pass implements tracking of control flow miss-speculation into a "taint"
register. That taint register can then be used to mask off registers with
sensitive data when executing under miss-speculation, a.k.a. "transient
execution".
This pass is aimed at mitigating against SpectreV1-style vulnarabilities.
At the moment, it implements the tracking of miss-speculation of control
flow into a taint register, but doesn't implement a mechanism yet to then
use that taint register to mask off vulnerable data in registers (something
for a follow-on improvement). Possible strategies to mask out vulnerable
data that can be implemented on top of this are:
- speculative load hardening to automatically mask of data loaded
in registers.
- using intrinsics to mask of data in registers as indicated by the
programmer (see https://lwn.net/Articles/759423/).
For AArch64, the following implementation choices are made.
Some of these are different than the implementation choices made in
the similar pass implemented in X86SpeculativeLoadHardening.cpp, as
the instruction set characteristics result in different trade-offs.
- The speculation hardening is done after register allocation. With a
relative abundance of registers, one register is reserved (X16) to be
the taint register. X16 is expected to not clash with other register
reservation mechanisms with very high probability because:
. The AArch64 ABI doesn't guarantee X16 to be retained across any call.
. The only way to request X16 to be used as a programmer is through
inline assembly. In the rare case a function explicitly demands to
use X16/W16, this pass falls back to hardening against speculation
by inserting a DSB SYS/ISB barrier pair which will prevent control
flow speculation.
- It is easy to insert mask operations at this late stage as we have
mask operations available that don't set flags.
- The taint variable contains all-ones when no miss-speculation is detected,
and contains all-zeros when miss-speculation is detected. Therefore, when
masking, an AND instruction (which only changes the register to be masked,
no other side effects) can easily be inserted anywhere that's needed.
- The tracking of miss-speculation is done by using a data-flow conditional
select instruction (CSEL) to evaluate the flags that were also used to
make conditional branch direction decisions. Speculation of the CSEL
instruction can be limited with a CSDB instruction - so the combination of
CSEL + a later CSDB gives the guarantee that the flags as used in the CSEL
aren't speculated. When conditional branch direction gets miss-speculated,
the semantics of the inserted CSEL instruction is such that the taint
register will contain all zero bits.
One key requirement for this to work is that the conditional branch is
followed by an execution of the CSEL instruction, where the CSEL
instruction needs to use the same flags status as the conditional branch.
This means that the conditional branches must not be implemented as one
of the AArch64 conditional branches that do not use the flags as input
(CB(N)Z and TB(N)Z). This is implemented by ensuring in the instruction
selectors to not produce these instructions when speculation hardening
is enabled. This pass will assert if it does encounter such an instruction.
- On function call boundaries, the miss-speculation state is transferred from
the taint register X16 to be encoded in the SP register as value 0.
Future extensions/improvements could be:
- Implement this functionality using full speculation barriers, akin to the
x86-slh-lfence option. This may be more useful for the intrinsics-based
approach than for the SLH approach to masking.
Note that this pass already inserts the full speculation barriers if the
function for some niche reason makes use of X16/W16.
- no indirect branch misprediction gets protected/instrumented; but this
could be done for some indirect branches, such as switch jump tables.
Differential Revision: https://reviews.llvm.org/D54896
llvm-svn: 349456
2018-12-18 16:50:02 +08:00
|
|
|
void initializeAArch64SpeculationHardeningPass(PassRegistry&);
|
2016-07-21 05:45:58 +08:00
|
|
|
void initializeAArch64LoadStoreOptPass(PassRegistry&);
|
2017-12-08 08:58:49 +08:00
|
|
|
void initializeAArch64SIMDInstrOptPass(PassRegistry&);
|
Re-commit: [globalisel] Add a combiner helpers for extending loads and use them in a pre-legalize combiner for AArch64
Summary: Depends on D45541
Reviewers: ab, aditya_nandakumar, bogner, rtereshin, volkan, rovka, javed.absar, aemerson
Subscribers: aemerson, rengolin, mgorny, javed.absar, kristof.beyls, llvm-commits
Differential Revision: https://reviews.llvm.org/D45543
The previous commit failed portions of the test-suite on GreenDragon due to
duplicate COPY instructions and iterator invalidation. Both issues have now
been fixed. To assist with this, a helper (cloneVirtualRegister) has been added
to MachineRegisterInfo that can be used to get another register that has the same
type and class/bank as an existing one.
llvm-svn: 343654
2018-10-03 10:12:17 +08:00
|
|
|
void initializeAArch64PreLegalizerCombinerPass(PassRegistry&);
|
2016-08-01 13:56:57 +08:00
|
|
|
void initializeAArch64PromoteConstantPass(PassRegistry&);
|
|
|
|
void initializeAArch64RedundantCopyEliminationPass(PassRegistry&);
|
|
|
|
void initializeAArch64StorePairSuppressPass(PassRegistry&);
|
2017-07-19 00:14:22 +08:00
|
|
|
void initializeFalkorHWPFFixPass(PassRegistry&);
|
2017-07-15 05:44:12 +08:00
|
|
|
void initializeFalkorMarkStridedAccessesLegacyPass(PassRegistry&);
|
2016-08-01 13:56:57 +08:00
|
|
|
void initializeLDTLSCleanupPass(PassRegistry&);
|
2014-05-24 20:50:23 +08:00
|
|
|
} // end namespace llvm
|
|
|
|
|
|
|
|
#endif
|