2016-07-27 22:31:55 +08:00
|
|
|
//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
|
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2016-07-27 22:31:55 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
/// \file
|
|
|
|
/// This file implements the targeting of the InstructionSelector class for
|
|
|
|
/// AArch64.
|
|
|
|
/// \todo This should be generated by TableGen.
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "AArch64InstrInfo.h"
|
2017-02-09 01:57:27 +08:00
|
|
|
#include "AArch64MachineFunctionInfo.h"
|
2016-07-27 22:31:55 +08:00
|
|
|
#include "AArch64RegisterBankInfo.h"
|
|
|
|
#include "AArch64RegisterInfo.h"
|
|
|
|
#include "AArch64Subtarget.h"
|
2016-10-11 05:50:00 +08:00
|
|
|
#include "AArch64TargetMachine.h"
|
2016-11-08 08:45:29 +08:00
|
|
|
#include "MCTargetDesc/AArch64AddressingModes.h"
|
2020-09-22 06:28:04 +08:00
|
|
|
#include "MCTargetDesc/AArch64MCTargetDesc.h"
|
2019-03-15 06:48:15 +08:00
|
|
|
#include "llvm/ADT/Optional.h"
|
2017-04-06 17:49:34 +08:00
|
|
|
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
|
2017-10-27 07:39:54 +08:00
|
|
|
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
|
2020-10-02 05:04:54 +08:00
|
|
|
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
|
2020-10-02 05:15:57 +08:00
|
|
|
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
|
2017-04-20 04:48:50 +08:00
|
|
|
#include "llvm/CodeGen/GlobalISel/Utils.h"
|
2016-07-27 22:31:55 +08:00
|
|
|
#include "llvm/CodeGen/MachineBasicBlock.h"
|
2019-02-22 04:20:16 +08:00
|
|
|
#include "llvm/CodeGen/MachineConstantPool.h"
|
2016-07-27 22:31:55 +08:00
|
|
|
#include "llvm/CodeGen/MachineFunction.h"
|
|
|
|
#include "llvm/CodeGen/MachineInstr.h"
|
|
|
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
2017-04-06 17:49:34 +08:00
|
|
|
#include "llvm/CodeGen/MachineOperand.h"
|
2016-07-27 22:31:55 +08:00
|
|
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
2020-06-17 13:11:41 +08:00
|
|
|
#include "llvm/CodeGen/TargetOpcodes.h"
|
2020-06-23 07:25:49 +08:00
|
|
|
#include "llvm/IR/Constants.h"
|
2020-11-12 14:45:19 +08:00
|
|
|
#include "llvm/IR/PatternMatch.h"
|
2020-10-02 05:04:54 +08:00
|
|
|
#include "llvm/IR/Type.h"
|
2020-10-02 05:15:57 +08:00
|
|
|
#include "llvm/IR/IntrinsicsAArch64.h"
|
2020-09-11 05:57:16 +08:00
|
|
|
#include "llvm/Pass.h"
|
2016-07-27 22:31:55 +08:00
|
|
|
#include "llvm/Support/Debug.h"
|
|
|
|
#include "llvm/Support/raw_ostream.h"
|
|
|
|
|
|
|
|
#define DEBUG_TYPE "aarch64-isel"
|
|
|
|
|
|
|
|
using namespace llvm;
|
2020-09-02 05:22:53 +08:00
|
|
|
using namespace MIPatternMatch;
|
2016-07-27 22:31:55 +08:00
|
|
|
|
2017-04-06 17:49:34 +08:00
|
|
|
namespace {
|
|
|
|
|
[globalisel][tablegen] Import SelectionDAG's rule predicates and support the equivalent in GIRule.
Summary:
The SelectionDAG importer now imports rules with Predicate's attached via
Requires, PredicateControl, etc. These predicates are implemented as
bitset's to allow multiple predicates to be tested together. However,
unlike the MC layer subtarget features, each target only pays for it's own
predicates (e.g. AArch64 doesn't have 192 feature bits just because X86
needs a lot).
Both AArch64 and X86 derive at least one predicate from the MachineFunction
or Function so they must re-initialize AvailableFeatures before each
function. They also declare locals in <Target>InstructionSelector so that
computeAvailableFeatures() can use the code from SelectionDAG without
modification.
Reviewers: rovka, qcolombet, aditya_nandakumar, t.p.northover, ab
Reviewed By: rovka
Subscribers: aemerson, rengolin, dberris, kristof.beyls, llvm-commits, igorb
Differential Revision: https://reviews.llvm.org/D31418
llvm-svn: 300993
2017-04-21 23:59:56 +08:00
|
|
|
#define GET_GLOBALISEL_PREDICATE_BITSET
|
|
|
|
#include "AArch64GenGlobalISel.inc"
|
|
|
|
#undef GET_GLOBALISEL_PREDICATE_BITSET
|
|
|
|
|
2017-04-06 17:49:34 +08:00
|
|
|
class AArch64InstructionSelector : public InstructionSelector {
|
|
|
|
public:
|
|
|
|
AArch64InstructionSelector(const AArch64TargetMachine &TM,
|
|
|
|
const AArch64Subtarget &STI,
|
|
|
|
const AArch64RegisterBankInfo &RBI);
|
|
|
|
|
2019-08-13 14:26:59 +08:00
|
|
|
bool select(MachineInstr &I) override;
|
2017-10-27 07:39:54 +08:00
|
|
|
static const char *getName() { return DEBUG_TYPE; }
|
2017-04-06 17:49:34 +08:00
|
|
|
|
2019-08-30 01:24:32 +08:00
|
|
|
void setupMF(MachineFunction &MF, GISelKnownBits &KB,
|
|
|
|
CodeGenCoverage &CoverageInfo) override {
|
|
|
|
InstructionSelector::setupMF(MF, KB, CoverageInfo);
|
2019-08-13 14:26:59 +08:00
|
|
|
|
|
|
|
// hasFnAttribute() is expensive to call on every BRCOND selection, so
|
|
|
|
// cache it here for each run of the selector.
|
|
|
|
ProduceNonFlagSettingCondBr =
|
|
|
|
!MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
|
2020-01-22 14:38:02 +08:00
|
|
|
MFReturnAddr = Register();
|
[AArch64][GlobalISel] Fixup <32b heterogeneous regbanks of G_PHIs just before selection.
Since all types <32b on gpr end up being assigned gpr32 regclasses, we can end
up with PHIs here which try to select between a gpr32 and an fpr16. Ideally RBS
shouldn't be selecting heterogenous regbanks for operands if possible, but we
still need to be able to deal with it here.
To fix this, if we have a gpr-bank operand < 32b in size and at least one other
operand is on the fpr bank, then we add cross-bank copies to homogenize the
operand banks. For simplicity the bank that we choose to settle on is whatever
bank the def operand has. For example:
%endbb:
%dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
=>
%bb2:
...
%in2_copy:gpr(s16) = COPY %in2:fpr(s16)
...
%endbb:
%dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
Differential Revision: https://reviews.llvm.org/D75086
2020-02-25 06:27:32 +08:00
|
|
|
|
|
|
|
processPHIs(MF);
|
2019-08-13 14:26:59 +08:00
|
|
|
}
|
|
|
|
|
2017-04-06 17:49:34 +08:00
|
|
|
private:
|
|
|
|
/// tblgen-erated 'select' implementation, used as the initial selector for
|
|
|
|
/// the patterns that don't require complex C++.
|
[globalisel][tablegen] Generate rule coverage and use it to identify untested rules
Summary:
This patch adds a LLVM_ENABLE_GISEL_COV which, like LLVM_ENABLE_DAGISEL_COV,
causes TableGen to instrument the generated table to collect rule coverage
information. However, LLVM_ENABLE_GISEL_COV goes a bit further than
LLVM_ENABLE_DAGISEL_COV. The information is written to files
(${CMAKE_BINARY_DIR}/gisel-coverage-* by default). These files can then be
concatenated into ${LLVM_GISEL_COV_PREFIX}-all after which TableGen will
read this information and use it to emit warnings about untested rules.
This technique could also be used by SelectionDAG and can be further
extended to detect hot rules and give them priority over colder rules.
Usage:
* Enable LLVM_ENABLE_GISEL_COV in CMake
* Build the compiler and run some tests
* cat gisel-coverage-[0-9]* > gisel-coverage-all
* Delete lib/Target/*/*GenGlobalISel.inc*
* Build the compiler
Known issues:
* ${LLVM_GISEL_COV_PREFIX}-all must be generated as a manual
step due to a lack of a portable 'cat' command. It should be the
concatenation of all ${LLVM_GISEL_COV_PREFIX}-[0-9]* files.
* There's no mechanism to discard coverage information when the ruleset
changes
Depends on D39742
Reviewers: ab, qcolombet, t.p.northover, aditya_nandakumar, rovka
Reviewed By: rovka
Subscribers: vsk, arsenm, nhaehnle, mgorny, kristof.beyls, javed.absar, igorb, llvm-commits
Differential Revision: https://reviews.llvm.org/D39747
llvm-svn: 318356
2017-11-16 08:46:35 +08:00
|
|
|
bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
|
2017-04-06 17:49:34 +08:00
|
|
|
|
2019-07-03 09:49:06 +08:00
|
|
|
// A lowering phase that runs before any selection attempts.
|
2020-01-30 05:42:48 +08:00
|
|
|
// Returns true if the instruction was modified.
|
|
|
|
bool preISelLower(MachineInstr &I);
|
2019-07-03 09:49:06 +08:00
|
|
|
|
|
|
|
// An early selection function that runs before the selectImpl() call.
|
|
|
|
bool earlySelect(MachineInstr &I) const;
|
|
|
|
|
[AArch64][GlobalISel] Fixup <32b heterogeneous regbanks of G_PHIs just before selection.
Since all types <32b on gpr end up being assigned gpr32 regclasses, we can end
up with PHIs here which try to select between a gpr32 and an fpr16. Ideally RBS
shouldn't be selecting heterogenous regbanks for operands if possible, but we
still need to be able to deal with it here.
To fix this, if we have a gpr-bank operand < 32b in size and at least one other
operand is on the fpr bank, then we add cross-bank copies to homogenize the
operand banks. For simplicity the bank that we choose to settle on is whatever
bank the def operand has. For example:
%endbb:
%dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
=>
%bb2:
...
%in2_copy:gpr(s16) = COPY %in2:fpr(s16)
...
%endbb:
%dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
Differential Revision: https://reviews.llvm.org/D75086
2020-02-25 06:27:32 +08:00
|
|
|
// Do some preprocessing of G_PHIs before we begin selection.
|
|
|
|
void processPHIs(MachineFunction &MF);
|
|
|
|
|
2019-07-03 09:49:06 +08:00
|
|
|
bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
|
|
|
|
|
2019-07-20 09:55:35 +08:00
|
|
|
/// Eliminate same-sized cross-bank copies into stores before selectImpl().
|
2020-01-30 05:42:48 +08:00
|
|
|
bool contractCrossBankCopyIntoStore(MachineInstr &I,
|
|
|
|
MachineRegisterInfo &MRI);
|
|
|
|
|
|
|
|
bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
|
2019-07-20 09:55:35 +08:00
|
|
|
|
2017-04-06 17:49:34 +08:00
|
|
|
bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
|
|
|
|
MachineRegisterInfo &MRI) const;
|
|
|
|
bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
|
|
|
|
MachineRegisterInfo &MRI) const;
|
|
|
|
|
2020-12-02 03:58:19 +08:00
|
|
|
///@{
|
|
|
|
/// Helper functions for selectCompareBranch.
|
|
|
|
bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
|
|
|
|
MachineIRBuilder &MIB) const;
|
|
|
|
bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
|
|
|
|
MachineIRBuilder &MIB) const;
|
|
|
|
bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
|
|
|
|
MachineIRBuilder &MIB) const;
|
|
|
|
bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
|
2020-01-29 03:35:44 +08:00
|
|
|
MachineBasicBlock *DstMBB,
|
|
|
|
MachineIRBuilder &MIB) const;
|
2020-12-02 03:58:19 +08:00
|
|
|
///@}
|
|
|
|
|
2017-04-06 17:49:34 +08:00
|
|
|
bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
|
|
|
|
MachineRegisterInfo &MRI) const;
|
|
|
|
|
2020-09-22 06:28:04 +08:00
|
|
|
bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI) const;
|
2019-04-10 05:22:43 +08:00
|
|
|
bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
|
|
|
|
|
2018-12-11 02:44:58 +08:00
|
|
|
// Helper to generate an equivalent of scalar_to_vector into a new register,
|
|
|
|
// returned via 'Dst'.
|
2019-03-05 03:16:00 +08:00
|
|
|
MachineInstr *emitScalarToVector(unsigned EltSize,
|
2019-02-26 02:52:54 +08:00
|
|
|
const TargetRegisterClass *DstRC,
|
2019-06-25 00:16:12 +08:00
|
|
|
Register Scalar,
|
2019-02-26 02:52:54 +08:00
|
|
|
MachineIRBuilder &MIRBuilder) const;
|
2019-03-14 07:22:23 +08:00
|
|
|
|
|
|
|
/// Emit a lane insert into \p DstReg, or a new vector register if None is
|
|
|
|
/// provided.
|
|
|
|
///
|
|
|
|
/// The lane inserted into is defined by \p LaneIdx. The vector source
|
|
|
|
/// register is given by \p SrcReg. The register containing the element is
|
|
|
|
/// given by \p EltReg.
|
2019-06-25 00:16:12 +08:00
|
|
|
MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
|
|
|
|
Register EltReg, unsigned LaneIdx,
|
2019-03-14 07:22:23 +08:00
|
|
|
const RegisterBank &RB,
|
|
|
|
MachineIRBuilder &MIRBuilder) const;
|
2019-03-15 02:01:30 +08:00
|
|
|
bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
|
2020-06-23 07:25:49 +08:00
|
|
|
bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
|
|
|
|
MachineRegisterInfo &MRI) const;
|
2018-12-11 02:44:58 +08:00
|
|
|
bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
|
2018-12-20 09:11:04 +08:00
|
|
|
bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
|
2019-01-25 06:00:41 +08:00
|
|
|
bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
|
2018-12-11 02:44:58 +08:00
|
|
|
|
2019-02-22 04:20:16 +08:00
|
|
|
bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
|
2019-03-12 06:18:01 +08:00
|
|
|
bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
|
2019-03-15 06:48:15 +08:00
|
|
|
bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
|
2019-03-15 06:48:18 +08:00
|
|
|
bool selectSplitVectorUnmerge(MachineInstr &I,
|
|
|
|
MachineRegisterInfo &MRI) const;
|
2019-04-03 03:57:26 +08:00
|
|
|
bool selectIntrinsicWithSideEffects(MachineInstr &I,
|
|
|
|
MachineRegisterInfo &MRI) const;
|
2020-01-22 14:38:02 +08:00
|
|
|
bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
|
2019-04-10 05:22:43 +08:00
|
|
|
bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const;
|
2019-04-24 04:46:19 +08:00
|
|
|
bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
|
2019-04-24 07:03:03 +08:00
|
|
|
bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
|
2019-06-22 02:10:41 +08:00
|
|
|
bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const;
|
|
|
|
bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const;
|
2019-08-09 17:32:38 +08:00
|
|
|
bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI) const;
|
2020-10-10 03:38:39 +08:00
|
|
|
bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI) const;
|
2019-06-22 02:10:41 +08:00
|
|
|
|
2020-06-16 07:14:47 +08:00
|
|
|
unsigned emitConstantPoolEntry(const Constant *CPVal,
|
|
|
|
MachineFunction &MF) const;
|
|
|
|
MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,
|
2019-02-22 04:20:16 +08:00
|
|
|
MachineIRBuilder &MIRBuilder) const;
|
2019-03-15 06:48:15 +08:00
|
|
|
|
|
|
|
// Emit a vector concat operation.
|
2019-06-25 00:16:12 +08:00
|
|
|
MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
|
|
|
|
Register Op2,
|
2019-03-05 03:16:00 +08:00
|
|
|
MachineIRBuilder &MIRBuilder) const;
|
2020-04-23 07:43:31 +08:00
|
|
|
|
|
|
|
// Emit an integer compare between LHS and RHS, which checks for Predicate.
|
2020-10-21 04:17:39 +08:00
|
|
|
MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
|
|
|
|
MachineOperand &Predicate,
|
|
|
|
MachineIRBuilder &MIRBuilder) const;
|
2020-09-30 09:23:02 +08:00
|
|
|
|
|
|
|
/// Emit a floating point comparison between \p LHS and \p RHS.
|
|
|
|
MachineInstr *emitFPCompare(Register LHS, Register RHS,
|
|
|
|
MachineIRBuilder &MIRBuilder) const;
|
|
|
|
|
2020-09-11 04:34:15 +08:00
|
|
|
MachineInstr *emitInstr(unsigned Opcode,
|
|
|
|
std::initializer_list<llvm::DstOp> DstOps,
|
|
|
|
std::initializer_list<llvm::SrcOp> SrcOps,
|
|
|
|
MachineIRBuilder &MIRBuilder,
|
|
|
|
const ComplexRendererFns &RenderFns = None) const;
|
[AArch64][GlobalISel] Select negative arithmetic immediates in manual selector
Previously, we only handled negative arithmetic immediates in the imported
selector code.
Since we don't import code for, say, compares, we were missing opportunities
for things like
```
%cst:gpr(s64) = G_CONSTANT i64 -10
%cmp:gpr(s32) = G_ICMP intpred(eq), %reg0(s64), %cst
->
%adds = ADDSXri %reg0, 10, 0, implicit-def $nzcv
%cmp = CSINCWr $wzr, $wzr, 1, implicit $nzcv
```
Instead, we would have to materialize the constant and emit a SUBS.
This adds support for selection like above for SUB, SUBS, ADD, and ADDS.
This is a 0.1% geomean code size improvement on SPECINT2000 at -Os.
Differential Revision: https://reviews.llvm.org/D91108
2020-11-10 08:04:16 +08:00
|
|
|
/// Helper function to emit an add or sub instruction.
|
2020-09-11 04:34:15 +08:00
|
|
|
///
|
|
|
|
/// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
|
|
|
|
/// in a specific order.
|
|
|
|
///
|
|
|
|
/// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
|
|
|
|
///
|
|
|
|
/// \code
|
[AArch64][GlobalISel] Select negative arithmetic immediates in manual selector
Previously, we only handled negative arithmetic immediates in the imported
selector code.
Since we don't import code for, say, compares, we were missing opportunities
for things like
```
%cst:gpr(s64) = G_CONSTANT i64 -10
%cmp:gpr(s32) = G_ICMP intpred(eq), %reg0(s64), %cst
->
%adds = ADDSXri %reg0, 10, 0, implicit-def $nzcv
%cmp = CSINCWr $wzr, $wzr, 1, implicit $nzcv
```
Instead, we would have to materialize the constant and emit a SUBS.
This adds support for selection like above for SUB, SUBS, ADD, and ADDS.
This is a 0.1% geomean code size improvement on SPECINT2000 at -Os.
Differential Revision: https://reviews.llvm.org/D91108
2020-11-10 08:04:16 +08:00
|
|
|
/// const std::array<std::array<unsigned, 2>, 4> Table {
|
2020-09-11 04:34:15 +08:00
|
|
|
/// {{AArch64::ADDXri, AArch64::ADDWri},
|
|
|
|
/// {AArch64::ADDXrs, AArch64::ADDWrs},
|
[AArch64][GlobalISel] Select negative arithmetic immediates in manual selector
Previously, we only handled negative arithmetic immediates in the imported
selector code.
Since we don't import code for, say, compares, we were missing opportunities
for things like
```
%cst:gpr(s64) = G_CONSTANT i64 -10
%cmp:gpr(s32) = G_ICMP intpred(eq), %reg0(s64), %cst
->
%adds = ADDSXri %reg0, 10, 0, implicit-def $nzcv
%cmp = CSINCWr $wzr, $wzr, 1, implicit $nzcv
```
Instead, we would have to materialize the constant and emit a SUBS.
This adds support for selection like above for SUB, SUBS, ADD, and ADDS.
This is a 0.1% geomean code size improvement on SPECINT2000 at -Os.
Differential Revision: https://reviews.llvm.org/D91108
2020-11-10 08:04:16 +08:00
|
|
|
/// {AArch64::ADDXrr, AArch64::ADDWrr},
|
2020-11-11 06:12:19 +08:00
|
|
|
/// {AArch64::SUBXri, AArch64::SUBWri},
|
|
|
|
/// {AArch64::ADDXrx, AArch64::ADDWrx}}};
|
2020-09-11 04:34:15 +08:00
|
|
|
/// \endcode
|
|
|
|
///
|
|
|
|
/// Each row in the table corresponds to a different addressing mode. Each
|
|
|
|
/// column corresponds to a different register size.
|
|
|
|
///
|
|
|
|
/// \attention Rows must be structured as follows:
|
|
|
|
/// - Row 0: The ri opcode variants
|
|
|
|
/// - Row 1: The rs opcode variants
|
|
|
|
/// - Row 2: The rr opcode variants
|
[AArch64][GlobalISel] Select negative arithmetic immediates in manual selector
Previously, we only handled negative arithmetic immediates in the imported
selector code.
Since we don't import code for, say, compares, we were missing opportunities
for things like
```
%cst:gpr(s64) = G_CONSTANT i64 -10
%cmp:gpr(s32) = G_ICMP intpred(eq), %reg0(s64), %cst
->
%adds = ADDSXri %reg0, 10, 0, implicit-def $nzcv
%cmp = CSINCWr $wzr, $wzr, 1, implicit $nzcv
```
Instead, we would have to materialize the constant and emit a SUBS.
This adds support for selection like above for SUB, SUBS, ADD, and ADDS.
This is a 0.1% geomean code size improvement on SPECINT2000 at -Os.
Differential Revision: https://reviews.llvm.org/D91108
2020-11-10 08:04:16 +08:00
|
|
|
/// - Row 3: The ri opcode variants for negative immediates
|
2020-11-11 06:12:19 +08:00
|
|
|
/// - Row 4: The rx opcode variants
|
2020-09-11 04:34:15 +08:00
|
|
|
///
|
|
|
|
/// \attention Columns must be structured as follows:
|
|
|
|
/// - Column 0: The 64-bit opcode variants
|
|
|
|
/// - Column 1: The 32-bit opcode variants
|
|
|
|
///
|
|
|
|
/// \p Dst is the destination register of the binop to emit.
|
|
|
|
/// \p LHS is the left-hand operand of the binop to emit.
|
|
|
|
/// \p RHS is the right-hand operand of the binop to emit.
|
[AArch64][GlobalISel] Select negative arithmetic immediates in manual selector
Previously, we only handled negative arithmetic immediates in the imported
selector code.
Since we don't import code for, say, compares, we were missing opportunities
for things like
```
%cst:gpr(s64) = G_CONSTANT i64 -10
%cmp:gpr(s32) = G_ICMP intpred(eq), %reg0(s64), %cst
->
%adds = ADDSXri %reg0, 10, 0, implicit-def $nzcv
%cmp = CSINCWr $wzr, $wzr, 1, implicit $nzcv
```
Instead, we would have to materialize the constant and emit a SUBS.
This adds support for selection like above for SUB, SUBS, ADD, and ADDS.
This is a 0.1% geomean code size improvement on SPECINT2000 at -Os.
Differential Revision: https://reviews.llvm.org/D91108
2020-11-10 08:04:16 +08:00
|
|
|
MachineInstr *emitAddSub(
|
2020-11-11 06:12:19 +08:00
|
|
|
const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
|
2020-09-11 04:34:15 +08:00
|
|
|
Register Dst, MachineOperand &LHS, MachineOperand &RHS,
|
|
|
|
MachineIRBuilder &MIRBuilder) const;
|
|
|
|
MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
|
|
|
|
MachineOperand &RHS,
|
2019-07-25 07:11:01 +08:00
|
|
|
MachineIRBuilder &MIRBuilder) const;
|
2020-09-11 04:34:15 +08:00
|
|
|
MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
|
|
|
|
MachineIRBuilder &MIRBuilder) const;
|
2020-10-21 04:17:39 +08:00
|
|
|
MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
|
|
|
|
MachineIRBuilder &MIRBuilder) const;
|
2019-07-03 03:44:16 +08:00
|
|
|
MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
|
|
|
|
MachineIRBuilder &MIRBuilder) const;
|
2020-09-11 08:15:28 +08:00
|
|
|
MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
|
2019-07-09 06:58:36 +08:00
|
|
|
MachineIRBuilder &MIRBuilder) const;
|
[AArch64][GlobalISel] Select CSINC and CSINV for G_SELECT with constants
Select the following:
- G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
- G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
- G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
- G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
- G_SELECT cc, t, 1 -> CSINC t, zreg, cc
- G_SELECT cc, t, -1 -> CSINC t, zreg, cc
(IR example: https://godbolt.org/z/YfPna9)
These correspond to a bunch of the AArch64csel patterns in AArch64InstrInfo.td.
Unfortunately, it doesn't seem like we can import patterns that use NZCV like
those ones do. E.g.
```
def : Pat<(AArch64csel GPR32:$tval, (i32 1), (i32 imm:$cc), NZCV),
(CSINCWr GPR32:$tval, WZR, (i32 imm:$cc))>;
```
So we have to manually select these for now.
This replaces `selectSelectOpc` with an `emitSelect` function, which performs
these optimizations.
Differential Revision: https://reviews.llvm.org/D90701
2020-11-04 03:08:08 +08:00
|
|
|
MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
|
|
|
|
AArch64CC::CondCode CC,
|
|
|
|
MachineIRBuilder &MIRBuilder) const;
|
2019-06-25 00:16:12 +08:00
|
|
|
MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
|
2019-03-15 06:48:18 +08:00
|
|
|
const RegisterBank &DstRB, LLT ScalarTy,
|
2019-06-25 00:16:12 +08:00
|
|
|
Register VecReg, unsigned LaneIdx,
|
2019-03-15 06:48:18 +08:00
|
|
|
MachineIRBuilder &MIRBuilder) const;
|
2019-02-22 04:20:16 +08:00
|
|
|
|
2019-05-02 06:39:43 +08:00
|
|
|
/// Helper function for selecting G_FCONSTANT. If the G_FCONSTANT can be
|
|
|
|
/// materialized using a FMOV instruction, then update MI and return it.
|
|
|
|
/// Otherwise, do nothing and return a nullptr.
|
|
|
|
MachineInstr *emitFMovForFConstant(MachineInstr &MI,
|
|
|
|
MachineRegisterInfo &MRI) const;
|
|
|
|
|
2020-09-30 09:23:02 +08:00
|
|
|
/// Emit a CSet for an integer compare.
|
|
|
|
///
|
|
|
|
/// \p DefReg is expected to be a 32-bit scalar register.
|
2019-06-25 00:16:12 +08:00
|
|
|
MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
|
2019-06-18 02:40:06 +08:00
|
|
|
MachineIRBuilder &MIRBuilder) const;
|
2020-09-30 09:23:02 +08:00
|
|
|
/// Emit a CSet for a FP compare.
|
|
|
|
///
|
|
|
|
/// \p Dst is expected to be a 32-bit scalar register.
|
|
|
|
MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
|
|
|
|
MachineIRBuilder &MIRBuilder) const;
|
2019-06-18 02:40:06 +08:00
|
|
|
|
2020-12-04 06:31:43 +08:00
|
|
|
/// Emit the overflow op for \p Opcode.
|
|
|
|
///
|
|
|
|
/// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
|
|
|
|
/// G_USUBO, etc.
|
|
|
|
std::pair<MachineInstr *, AArch64CC::CondCode>
|
|
|
|
emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
|
|
|
|
MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
|
|
|
|
|
2020-02-06 03:32:50 +08:00
|
|
|
/// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
|
|
|
|
/// \p IsNegative is true if the test should be "not zero".
|
|
|
|
/// This will also optimize the test bit instruction when possible.
|
|
|
|
MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
|
|
|
|
MachineBasicBlock *DstMBB,
|
|
|
|
MachineIRBuilder &MIB) const;
|
|
|
|
|
2020-12-01 09:21:21 +08:00
|
|
|
/// Emit a CB(N)Z instruction which branches to \p DestMBB.
|
|
|
|
MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
|
|
|
|
MachineBasicBlock *DestMBB,
|
|
|
|
MachineIRBuilder &MIB) const;
|
|
|
|
|
2019-07-03 09:49:06 +08:00
|
|
|
// Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
|
|
|
|
// We use these manually instead of using the importer since it doesn't
|
|
|
|
// support SDNodeXForm.
|
|
|
|
ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
|
|
|
|
ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
|
|
|
|
ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
|
|
|
|
ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
|
|
|
|
|
2019-08-03 02:12:53 +08:00
|
|
|
ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
|
2017-10-21 04:55:29 +08:00
|
|
|
ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
|
2019-08-03 02:12:53 +08:00
|
|
|
ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
|
2017-04-06 17:49:34 +08:00
|
|
|
|
2017-10-21 04:55:29 +08:00
|
|
|
ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
|
|
|
|
unsigned Size) const;
|
2017-10-16 11:36:29 +08:00
|
|
|
|
2017-10-21 04:55:29 +08:00
|
|
|
ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
|
2017-10-16 11:36:29 +08:00
|
|
|
return selectAddrModeUnscaled(Root, 1);
|
|
|
|
}
|
2017-10-21 04:55:29 +08:00
|
|
|
ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
|
2017-10-16 11:36:29 +08:00
|
|
|
return selectAddrModeUnscaled(Root, 2);
|
|
|
|
}
|
2017-10-21 04:55:29 +08:00
|
|
|
ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
|
2017-10-16 11:36:29 +08:00
|
|
|
return selectAddrModeUnscaled(Root, 4);
|
|
|
|
}
|
2017-10-21 04:55:29 +08:00
|
|
|
ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
|
2017-10-16 11:36:29 +08:00
|
|
|
return selectAddrModeUnscaled(Root, 8);
|
|
|
|
}
|
2017-10-21 04:55:29 +08:00
|
|
|
ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
|
2017-10-16 11:36:29 +08:00
|
|
|
return selectAddrModeUnscaled(Root, 16);
|
|
|
|
}
|
|
|
|
|
2020-05-30 03:35:36 +08:00
|
|
|
/// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
|
|
|
|
/// from complex pattern matchers like selectAddrModeIndexed().
|
|
|
|
ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
|
|
|
|
MachineRegisterInfo &MRI) const;
|
|
|
|
|
2017-10-21 04:55:29 +08:00
|
|
|
ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
|
|
|
|
unsigned Size) const;
|
2017-10-16 11:36:29 +08:00
|
|
|
template <int Width>
|
2017-10-21 04:55:29 +08:00
|
|
|
ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
|
2017-10-16 11:36:29 +08:00
|
|
|
return selectAddrModeIndexed(Root, Width / 8);
|
|
|
|
}
|
2019-07-24 00:09:42 +08:00
|
|
|
|
|
|
|
bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
|
|
|
|
const MachineRegisterInfo &MRI) const;
|
|
|
|
ComplexRendererFns
|
|
|
|
selectAddrModeShiftedExtendXReg(MachineOperand &Root,
|
|
|
|
unsigned SizeInBytes) const;
|
2020-01-09 02:57:44 +08:00
|
|
|
|
|
|
|
/// Returns a \p ComplexRendererFns which contains a base, offset, and whether
|
|
|
|
/// or not a shift + extend should be folded into an addressing mode. Returns
|
|
|
|
/// None when this is not profitable or possible.
|
2020-10-02 05:15:57 +08:00
|
|
|
ComplexRendererFns
|
|
|
|
selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
|
|
|
|
MachineOperand &Offset, unsigned SizeInBytes,
|
|
|
|
bool WantsExt) const;
|
2019-07-19 05:50:11 +08:00
|
|
|
ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
|
2019-07-24 00:09:42 +08:00
|
|
|
ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
|
|
|
|
unsigned SizeInBytes) const;
|
2019-08-24 04:31:34 +08:00
|
|
|
template <int Width>
|
|
|
|
ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
|
|
|
|
return selectAddrModeXRO(Root, Width / 8);
|
|
|
|
}
|
2017-10-16 11:36:29 +08:00
|
|
|
|
2020-01-09 02:57:44 +08:00
|
|
|
ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
|
|
|
|
unsigned SizeInBytes) const;
|
|
|
|
template <int Width>
|
|
|
|
ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
|
|
|
|
return selectAddrModeWRO(Root, Width / 8);
|
|
|
|
}
|
|
|
|
|
2019-08-21 06:18:06 +08:00
|
|
|
ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const;
|
|
|
|
|
|
|
|
ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
|
|
|
|
return selectShiftedRegister(Root);
|
|
|
|
}
|
|
|
|
|
|
|
|
ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
|
|
|
|
// TODO: selectShiftedRegister should allow for rotates on logical shifts.
|
|
|
|
// For now, make them the same. The only difference between the two is that
|
|
|
|
// logical shifts are allowed to fold in rotates. Otherwise, these are
|
|
|
|
// functionally the same.
|
|
|
|
return selectShiftedRegister(Root);
|
|
|
|
}
|
|
|
|
|
2020-01-09 02:57:44 +08:00
|
|
|
/// Given an extend instruction, determine the correct shift-extend type for
|
|
|
|
/// that instruction.
|
|
|
|
///
|
|
|
|
/// If the instruction is going to be used in a load or store, pass
|
|
|
|
/// \p IsLoadStore = true.
|
|
|
|
AArch64_AM::ShiftExtendType
|
|
|
|
getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
|
|
|
|
bool IsLoadStore = false) const;
|
|
|
|
|
2020-12-05 07:51:44 +08:00
|
|
|
/// Move \p Reg to \p RC if \p Reg is not already on \p RC.
|
|
|
|
///
|
|
|
|
/// \returns Either \p Reg if no change was necessary, or the new register
|
|
|
|
/// created by moving \p Reg.
|
|
|
|
///
|
|
|
|
/// Note: This uses emitCopy right now.
|
|
|
|
Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
|
|
|
|
MachineIRBuilder &MIB) const;
|
|
|
|
|
2019-08-30 05:53:58 +08:00
|
|
|
ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
|
|
|
|
|
TableGen/GlobalISel: Add way for SDNodeXForm to work on timm
The current implementation assumes there is an instruction associated
with the transform, but this is not the case for
timm/TargetConstant/immarg values. These transforms should directly
operate on a specific MachineOperand in the source
instruction. TableGen would assert if you attempted to define an
equivalent GISDNodeXFormEquiv using timm when it failed to find the
instruction matcher.
Specially recognize SDNodeXForms on timm, and pass the operand index
to the render function.
Ideally this would be a separate render function type that looks like
void renderFoo(MachineInstrBuilder, const MachineOperand&), but this
proved to be somewhat mechanically painful. Add an optional operand
index which will only be passed if the transform should only look at
the one source operand.
Theoretically it would also be possible to only ever pass the
MachineOperand, and the existing renderers would check the parent. I
think that would be somewhat ugly for the standard usage which may
want to inspect other operands, and I also think MachineOperand should
eventually not carry a pointer to the parent instruction.
Use it in one sample pattern. This isn't a great example, since the
transform exists to satisfy DAG type constraints. This could also be
avoided by just changing the MachineInstr's arbitrary choice of
operand type from i16 to i32. Other patterns have nontrivial uses, but
this serves as the simplest example.
One flaw this still has is if you try to use an SDNodeXForm defined
for imm, but the source pattern uses timm, you still see the "Failed
to lookup instruction" assert. However, there is now a way to avoid
it.
2020-01-09 01:53:15 +08:00
|
|
|
void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
|
|
|
|
int OpIdx = -1) const;
|
|
|
|
void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
|
|
|
|
int OpIdx = -1) const;
|
|
|
|
void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
|
|
|
|
int OpIdx = -1) const;
|
2018-01-17 02:44:05 +08:00
|
|
|
|
2018-07-31 08:09:02 +08:00
|
|
|
// Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
|
|
|
|
void materializeLargeCMVal(MachineInstr &I, const Value *V,
|
2019-08-01 04:14:09 +08:00
|
|
|
unsigned OpFlags) const;
|
2018-07-31 08:09:02 +08:00
|
|
|
|
2019-03-20 05:43:05 +08:00
|
|
|
// Optimization methods.
|
2019-06-06 07:46:16 +08:00
|
|
|
bool tryOptSelect(MachineInstr &MI) const;
|
2019-07-09 06:58:36 +08:00
|
|
|
MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
|
|
|
|
MachineOperand &Predicate,
|
|
|
|
MachineIRBuilder &MIRBuilder) const;
|
2019-03-20 05:43:05 +08:00
|
|
|
|
2019-08-30 00:16:38 +08:00
|
|
|
/// Return true if \p MI is a load or store of \p NumBytes bytes.
|
|
|
|
bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
|
|
|
|
|
2019-08-30 05:53:58 +08:00
|
|
|
/// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
|
|
|
|
/// register zeroed out. In other words, the result of MI has been explicitly
|
|
|
|
/// zero extended.
|
|
|
|
bool isDef32(const MachineInstr &MI) const;
|
|
|
|
|
2017-04-06 17:49:34 +08:00
|
|
|
const AArch64TargetMachine &TM;
|
|
|
|
const AArch64Subtarget &STI;
|
|
|
|
const AArch64InstrInfo &TII;
|
|
|
|
const AArch64RegisterInfo &TRI;
|
|
|
|
const AArch64RegisterBankInfo &RBI;
|
[globalisel][tablegen] Import SelectionDAG's rule predicates and support the equivalent in GIRule.
Summary:
The SelectionDAG importer now imports rules with Predicate's attached via
Requires, PredicateControl, etc. These predicates are implemented as
bitset's to allow multiple predicates to be tested together. However,
unlike the MC layer subtarget features, each target only pays for it's own
predicates (e.g. AArch64 doesn't have 192 feature bits just because X86
needs a lot).
Both AArch64 and X86 derive at least one predicate from the MachineFunction
or Function so they must re-initialize AvailableFeatures before each
function. They also declare locals in <Target>InstructionSelector so that
computeAvailableFeatures() can use the code from SelectionDAG without
modification.
Reviewers: rovka, qcolombet, aditya_nandakumar, t.p.northover, ab
Reviewed By: rovka
Subscribers: aemerson, rengolin, dberris, kristof.beyls, llvm-commits, igorb
Differential Revision: https://reviews.llvm.org/D31418
llvm-svn: 300993
2017-04-21 23:59:56 +08:00
|
|
|
|
2019-08-13 14:26:59 +08:00
|
|
|
bool ProduceNonFlagSettingCondBr = false;
|
|
|
|
|
2020-01-22 14:38:02 +08:00
|
|
|
// Some cached values used during selection.
|
|
|
|
// We use LR as a live-in register, and we keep track of it here as it can be
|
|
|
|
// clobbered by calls.
|
|
|
|
Register MFReturnAddr;
|
|
|
|
|
2017-04-30 01:30:09 +08:00
|
|
|
#define GET_GLOBALISEL_PREDICATES_DECL
|
|
|
|
#include "AArch64GenGlobalISel.inc"
|
|
|
|
#undef GET_GLOBALISEL_PREDICATES_DECL
|
2017-04-06 17:49:34 +08:00
|
|
|
|
|
|
|
// We declare the temporaries used by selectImpl() in the class to minimize the
|
|
|
|
// cost of constructing placeholder values.
|
|
|
|
#define GET_GLOBALISEL_TEMPORARIES_DECL
|
|
|
|
#include "AArch64GenGlobalISel.inc"
|
|
|
|
#undef GET_GLOBALISEL_TEMPORARIES_DECL
|
|
|
|
};
|
|
|
|
|
|
|
|
} // end anonymous namespace
|
|
|
|
|
2017-03-15 05:32:08 +08:00
|
|
|
#define GET_GLOBALISEL_IMPL
|
2016-12-22 07:26:20 +08:00
|
|
|
#include "AArch64GenGlobalISel.inc"
|
2017-03-15 05:32:08 +08:00
|
|
|
#undef GET_GLOBALISEL_IMPL
|
2016-12-22 07:26:20 +08:00
|
|
|
|
2016-07-27 22:31:55 +08:00
|
|
|
AArch64InstructionSelector::AArch64InstructionSelector(
|
2016-10-11 05:50:00 +08:00
|
|
|
const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
|
|
|
|
const AArch64RegisterBankInfo &RBI)
|
2017-03-15 05:32:08 +08:00
|
|
|
: InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
|
2017-04-30 01:30:09 +08:00
|
|
|
TRI(*STI.getRegisterInfo()), RBI(RBI),
|
|
|
|
#define GET_GLOBALISEL_PREDICATES_INIT
|
|
|
|
#include "AArch64GenGlobalISel.inc"
|
|
|
|
#undef GET_GLOBALISEL_PREDICATES_INIT
|
2017-03-15 05:32:08 +08:00
|
|
|
#define GET_GLOBALISEL_TEMPORARIES_INIT
|
|
|
|
#include "AArch64GenGlobalISel.inc"
|
|
|
|
#undef GET_GLOBALISEL_TEMPORARIES_INIT
|
|
|
|
{
|
|
|
|
}
|
2016-07-27 22:31:55 +08:00
|
|
|
|
2016-10-13 06:49:15 +08:00
|
|
|
// FIXME: This should be target-independent, inferred from the types declared
|
|
|
|
// for each class in the bank.
|
|
|
|
static const TargetRegisterClass *
|
|
|
|
getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
|
2018-02-03 02:03:30 +08:00
|
|
|
const RegisterBankInfo &RBI,
|
|
|
|
bool GetAllRegSet = false) {
|
2016-10-13 06:49:15 +08:00
|
|
|
if (RB.getID() == AArch64::GPRRegBankID) {
|
|
|
|
if (Ty.getSizeInBits() <= 32)
|
2018-02-03 02:03:30 +08:00
|
|
|
return GetAllRegSet ? &AArch64::GPR32allRegClass
|
|
|
|
: &AArch64::GPR32RegClass;
|
2016-10-13 06:49:15 +08:00
|
|
|
if (Ty.getSizeInBits() == 64)
|
2018-02-03 02:03:30 +08:00
|
|
|
return GetAllRegSet ? &AArch64::GPR64allRegClass
|
|
|
|
: &AArch64::GPR64RegClass;
|
2016-10-13 06:49:15 +08:00
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (RB.getID() == AArch64::FPRRegBankID) {
|
2018-02-03 02:03:30 +08:00
|
|
|
if (Ty.getSizeInBits() <= 16)
|
|
|
|
return &AArch64::FPR16RegClass;
|
2016-10-13 06:49:15 +08:00
|
|
|
if (Ty.getSizeInBits() == 32)
|
|
|
|
return &AArch64::FPR32RegClass;
|
|
|
|
if (Ty.getSizeInBits() == 64)
|
|
|
|
return &AArch64::FPR64RegClass;
|
|
|
|
if (Ty.getSizeInBits() == 128)
|
|
|
|
return &AArch64::FPR128RegClass;
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
2019-01-25 06:00:41 +08:00
|
|
|
/// Given a register bank, and size in bits, return the smallest register class
|
|
|
|
/// that can represent that combination.
|
2019-02-11 23:16:21 +08:00
|
|
|
static const TargetRegisterClass *
|
|
|
|
getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
|
|
|
|
bool GetAllRegSet = false) {
|
2019-01-25 06:00:41 +08:00
|
|
|
unsigned RegBankID = RB.getID();
|
|
|
|
|
|
|
|
if (RegBankID == AArch64::GPRRegBankID) {
|
|
|
|
if (SizeInBits <= 32)
|
|
|
|
return GetAllRegSet ? &AArch64::GPR32allRegClass
|
|
|
|
: &AArch64::GPR32RegClass;
|
|
|
|
if (SizeInBits == 64)
|
|
|
|
return GetAllRegSet ? &AArch64::GPR64allRegClass
|
|
|
|
: &AArch64::GPR64RegClass;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (RegBankID == AArch64::FPRRegBankID) {
|
|
|
|
switch (SizeInBits) {
|
|
|
|
default:
|
|
|
|
return nullptr;
|
|
|
|
case 8:
|
|
|
|
return &AArch64::FPR8RegClass;
|
|
|
|
case 16:
|
|
|
|
return &AArch64::FPR16RegClass;
|
|
|
|
case 32:
|
|
|
|
return &AArch64::FPR32RegClass;
|
|
|
|
case 64:
|
|
|
|
return &AArch64::FPR64RegClass;
|
|
|
|
case 128:
|
|
|
|
return &AArch64::FPR128RegClass;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Returns the correct subregister to use for a given register class.
|
|
|
|
static bool getSubRegForClass(const TargetRegisterClass *RC,
|
|
|
|
const TargetRegisterInfo &TRI, unsigned &SubReg) {
|
|
|
|
switch (TRI.getRegSizeInBits(*RC)) {
|
|
|
|
case 8:
|
|
|
|
SubReg = AArch64::bsub;
|
|
|
|
break;
|
|
|
|
case 16:
|
|
|
|
SubReg = AArch64::hsub;
|
|
|
|
break;
|
|
|
|
case 32:
|
2019-08-30 05:53:58 +08:00
|
|
|
if (RC != &AArch64::FPR32RegClass)
|
2019-01-25 06:00:41 +08:00
|
|
|
SubReg = AArch64::sub_32;
|
|
|
|
else
|
|
|
|
SubReg = AArch64::ssub;
|
|
|
|
break;
|
|
|
|
case 64:
|
|
|
|
SubReg = AArch64::dsub;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
LLVM_DEBUG(
|
|
|
|
dbgs() << "Couldn't find appropriate subregister for register class.");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-03-06 02:57:52 +08:00
|
|
|
/// Returns the minimum size the given register bank can hold.
|
|
|
|
static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
|
|
|
|
switch (RB.getID()) {
|
|
|
|
case AArch64::GPRRegBankID:
|
|
|
|
return 32;
|
|
|
|
case AArch64::FPRRegBankID:
|
|
|
|
return 8;
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Tried to get minimum size for unknown register bank.");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-23 07:43:31 +08:00
|
|
|
static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
|
|
|
|
auto &MI = *Root.getParent();
|
|
|
|
auto &MBB = *MI.getParent();
|
|
|
|
auto &MF = *MBB.getParent();
|
|
|
|
auto &MRI = MF.getRegInfo();
|
|
|
|
uint64_t Immed;
|
|
|
|
if (Root.isImm())
|
|
|
|
Immed = Root.getImm();
|
|
|
|
else if (Root.isCImm())
|
|
|
|
Immed = Root.getCImm()->getZExtValue();
|
|
|
|
else if (Root.isReg()) {
|
|
|
|
auto ValAndVReg =
|
|
|
|
getConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
|
|
|
|
if (!ValAndVReg)
|
|
|
|
return None;
|
2020-11-03 22:50:17 +08:00
|
|
|
Immed = ValAndVReg->Value.getSExtValue();
|
2020-04-23 07:43:31 +08:00
|
|
|
} else
|
|
|
|
return None;
|
|
|
|
return Immed;
|
|
|
|
}
|
|
|
|
|
2016-08-16 22:37:40 +08:00
|
|
|
/// Check whether \p I is a currently unsupported binary operation:
|
|
|
|
/// - it has an unsized type
|
|
|
|
/// - an operand is not a vreg
|
|
|
|
/// - all operands are not in the same bank
|
|
|
|
/// These are checks that should someday live in the verifier, but right now,
|
|
|
|
/// these are mostly limitations of the aarch64 selector.
|
|
|
|
static bool unsupportedBinOp(const MachineInstr &I,
|
|
|
|
const AArch64RegisterBankInfo &RBI,
|
|
|
|
const MachineRegisterInfo &MRI,
|
|
|
|
const AArch64RegisterInfo &TRI) {
|
2016-09-09 19:46:34 +08:00
|
|
|
LLT Ty = MRI.getType(I.getOperand(0).getReg());
|
2016-09-15 18:09:59 +08:00
|
|
|
if (!Ty.isValid()) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
|
2016-08-16 22:37:40 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
const RegisterBank *PrevOpBank = nullptr;
|
|
|
|
for (auto &MO : I.operands()) {
|
|
|
|
// FIXME: Support non-register operands.
|
|
|
|
if (!MO.isReg()) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
|
2016-08-16 22:37:40 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// FIXME: Can generic operations have physical registers operands? If
|
|
|
|
// so, this will need to be taught about that, and we'll need to get the
|
|
|
|
// bank out of the minimal class for the register.
|
|
|
|
// Either way, this needs to be documented (and possibly verified).
|
2019-08-02 07:27:28 +08:00
|
|
|
if (!Register::isVirtualRegister(MO.getReg())) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
|
2016-08-16 22:37:40 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
|
|
|
|
if (!OpBank) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
|
2016-08-16 22:37:40 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (PrevOpBank && OpBank != PrevOpBank) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
|
2016-08-16 22:37:40 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
PrevOpBank = OpBank;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2016-07-27 22:31:55 +08:00
|
|
|
/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
|
[AArch64][GlobalISel] Legalize narrow scalar ops again.
Since r279760, we've been marking as legal operations on narrow integer
types that have wider legal equivalents (for instance, G_ADD s8).
Compared to legalizing these operations, this reduced the amount of
extends/truncates required, but was always a weird legalization decision
made at selection time.
So far, we haven't been able to formalize it in a way that permits the
selector generated from SelectionDAG patterns to be sufficient.
Using a wide instruction (say, s64), when a narrower instruction exists
(s32) would introduce register class incompatibilities (when one narrow
generic instruction is selected to the wider variant, but another is
selected to the narrower variant).
It's also impractical to limit which narrow operations are matched for
which instruction, as restricting "narrow selection" to ranges of types
clashes with potentially incompatible instruction predicates.
Concerns were also raised regarding MIPS64's sign-extended register
assumptions, as well as wrapping behavior.
See discussions in https://reviews.llvm.org/D26878.
Instead, legalize the operations.
Should we ever revert to selecting these narrow operations, we should
try to represent this more accurately: for instance, by separating
a "concrete" type on operations, and an "underlying" type on vregs, we
could move the "this narrow-looking op is really legal" decision to the
legalizer, and let the selector use the "underlying" vreg type only,
which would be guaranteed to map to a register class.
In any case, we eventually should mitigate:
- the performance impact by selecting no-op extract/truncates to COPYs
(which we currently do), and the COPYs to register reuses (which we
don't do yet).
- the compile-time impact by optimizing away extract/truncate sequences
in the legalizer.
llvm-svn: 292827
2017-01-24 05:10:05 +08:00
|
|
|
/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
|
2016-07-27 22:31:55 +08:00
|
|
|
/// and of size \p OpSize.
|
|
|
|
/// \returns \p GenericOpc if the combination is unsupported.
|
|
|
|
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
|
|
|
|
unsigned OpSize) {
|
|
|
|
switch (RegBankID) {
|
|
|
|
case AArch64::GPRRegBankID:
|
2017-01-25 10:41:38 +08:00
|
|
|
if (OpSize == 32) {
|
2016-07-27 22:31:55 +08:00
|
|
|
switch (GenericOpc) {
|
2016-08-16 22:02:47 +08:00
|
|
|
case TargetOpcode::G_SHL:
|
|
|
|
return AArch64::LSLVWr;
|
|
|
|
case TargetOpcode::G_LSHR:
|
|
|
|
return AArch64::LSRVWr;
|
|
|
|
case TargetOpcode::G_ASHR:
|
|
|
|
return AArch64::ASRVWr;
|
2016-07-27 22:31:55 +08:00
|
|
|
default:
|
|
|
|
return GenericOpc;
|
|
|
|
}
|
2016-10-19 04:03:48 +08:00
|
|
|
} else if (OpSize == 64) {
|
2016-07-27 22:31:55 +08:00
|
|
|
switch (GenericOpc) {
|
[globalisel] Rename G_GEP to G_PTR_ADD
Summary:
G_GEP is rather poorly named. It's a simple pointer+scalar addition and
doesn't support any of the complexities of getelementptr. I therefore
propose that we rename it. There's a G_PTR_MASK so let's follow that
convention and go with G_PTR_ADD
Reviewers: volkan, aditya_nandakumar, bogner, rovka, arsenm
Subscribers: sdardis, jvesely, wdng, nhaehnle, hiraditya, jrtc27, atanasyan, arphaman, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D69734
2019-11-02 04:18:00 +08:00
|
|
|
case TargetOpcode::G_PTR_ADD:
|
2016-07-27 22:31:55 +08:00
|
|
|
return AArch64::ADDXrr;
|
2016-08-16 22:02:47 +08:00
|
|
|
case TargetOpcode::G_SHL:
|
|
|
|
return AArch64::LSLVXr;
|
|
|
|
case TargetOpcode::G_LSHR:
|
|
|
|
return AArch64::LSRVXr;
|
|
|
|
case TargetOpcode::G_ASHR:
|
|
|
|
return AArch64::ASRVXr;
|
2016-07-27 22:31:55 +08:00
|
|
|
default:
|
|
|
|
return GenericOpc;
|
|
|
|
}
|
|
|
|
}
|
2017-07-09 03:28:24 +08:00
|
|
|
break;
|
2016-08-19 00:05:11 +08:00
|
|
|
case AArch64::FPRRegBankID:
|
|
|
|
switch (OpSize) {
|
|
|
|
case 32:
|
|
|
|
switch (GenericOpc) {
|
|
|
|
case TargetOpcode::G_FADD:
|
|
|
|
return AArch64::FADDSrr;
|
|
|
|
case TargetOpcode::G_FSUB:
|
|
|
|
return AArch64::FSUBSrr;
|
|
|
|
case TargetOpcode::G_FMUL:
|
|
|
|
return AArch64::FMULSrr;
|
|
|
|
case TargetOpcode::G_FDIV:
|
|
|
|
return AArch64::FDIVSrr;
|
|
|
|
default:
|
|
|
|
return GenericOpc;
|
|
|
|
}
|
|
|
|
case 64:
|
|
|
|
switch (GenericOpc) {
|
|
|
|
case TargetOpcode::G_FADD:
|
|
|
|
return AArch64::FADDDrr;
|
|
|
|
case TargetOpcode::G_FSUB:
|
|
|
|
return AArch64::FSUBDrr;
|
|
|
|
case TargetOpcode::G_FMUL:
|
|
|
|
return AArch64::FMULDrr;
|
|
|
|
case TargetOpcode::G_FDIV:
|
|
|
|
return AArch64::FDIVDrr;
|
2016-10-11 08:21:11 +08:00
|
|
|
case TargetOpcode::G_OR:
|
|
|
|
return AArch64::ORRv8i8;
|
2016-08-19 00:05:11 +08:00
|
|
|
default:
|
|
|
|
return GenericOpc;
|
|
|
|
}
|
|
|
|
}
|
2017-07-09 03:28:24 +08:00
|
|
|
break;
|
|
|
|
}
|
2016-07-27 22:31:55 +08:00
|
|
|
return GenericOpc;
|
|
|
|
}
|
|
|
|
|
2016-07-30 00:56:16 +08:00
|
|
|
/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
|
|
|
|
/// appropriate for the (value) register bank \p RegBankID and of memory access
|
|
|
|
/// size \p OpSize. This returns the variant with the base+unsigned-immediate
|
|
|
|
/// addressing mode (e.g., LDRXui).
|
|
|
|
/// \returns \p GenericOpc if the combination is unsupported.
|
|
|
|
static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
|
|
|
|
unsigned OpSize) {
|
|
|
|
const bool isStore = GenericOpc == TargetOpcode::G_STORE;
|
|
|
|
switch (RegBankID) {
|
|
|
|
case AArch64::GPRRegBankID:
|
|
|
|
switch (OpSize) {
|
2016-10-18 02:36:53 +08:00
|
|
|
case 8:
|
|
|
|
return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
|
|
|
|
case 16:
|
|
|
|
return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
|
2016-07-30 00:56:16 +08:00
|
|
|
case 32:
|
|
|
|
return isStore ? AArch64::STRWui : AArch64::LDRWui;
|
|
|
|
case 64:
|
|
|
|
return isStore ? AArch64::STRXui : AArch64::LDRXui;
|
|
|
|
}
|
2017-07-09 03:28:24 +08:00
|
|
|
break;
|
2016-10-11 08:21:14 +08:00
|
|
|
case AArch64::FPRRegBankID:
|
|
|
|
switch (OpSize) {
|
2016-10-18 02:36:53 +08:00
|
|
|
case 8:
|
|
|
|
return isStore ? AArch64::STRBui : AArch64::LDRBui;
|
|
|
|
case 16:
|
|
|
|
return isStore ? AArch64::STRHui : AArch64::LDRHui;
|
2016-10-11 08:21:14 +08:00
|
|
|
case 32:
|
|
|
|
return isStore ? AArch64::STRSui : AArch64::LDRSui;
|
|
|
|
case 64:
|
|
|
|
return isStore ? AArch64::STRDui : AArch64::LDRDui;
|
|
|
|
}
|
2017-07-09 03:28:24 +08:00
|
|
|
break;
|
|
|
|
}
|
2016-07-30 00:56:16 +08:00
|
|
|
return GenericOpc;
|
|
|
|
}
|
|
|
|
|
2019-01-25 07:39:47 +08:00
|
|
|
#ifndef NDEBUG
|
2019-01-25 06:00:41 +08:00
|
|
|
/// Helper function that verifies that we have a valid copy at the end of
|
|
|
|
/// selectCopy. Verifies that the source and dest have the expected sizes and
|
|
|
|
/// then returns true.
|
|
|
|
static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
|
|
|
|
const MachineRegisterInfo &MRI,
|
|
|
|
const TargetRegisterInfo &TRI,
|
|
|
|
const RegisterBankInfo &RBI) {
|
[aarch64] Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Manual fixups in:
AArch64InstrInfo.cpp - genFusedMultiply() now takes a Register* instead of unsigned*
AArch64LoadStoreOptimizer.cpp - Ternary operator was ambiguous between Register/MCRegister. Settled on Register
Depends on D65919
Reviewers: aemerson
Subscribers: jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision for full review was: https://reviews.llvm.org/D65962
llvm-svn: 368628
2019-08-13 06:40:53 +08:00
|
|
|
const Register DstReg = I.getOperand(0).getReg();
|
|
|
|
const Register SrcReg = I.getOperand(1).getReg();
|
2019-01-25 06:00:41 +08:00
|
|
|
const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
|
2016-10-12 11:57:49 +08:00
|
|
|
const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
|
2019-01-25 06:00:41 +08:00
|
|
|
|
|
|
|
// Make sure the size of the source and dest line up.
|
2016-10-12 11:57:49 +08:00
|
|
|
assert(
|
|
|
|
(DstSize == SrcSize ||
|
|
|
|
// Copies are a mean to setup initial types, the number of
|
|
|
|
// bits may not exactly match.
|
2019-08-02 07:27:28 +08:00
|
|
|
(Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
|
2016-10-12 11:57:49 +08:00
|
|
|
// Copies are a mean to copy bits around, as long as we are
|
|
|
|
// on the same register class, that's fine. Otherwise, that
|
|
|
|
// means we need some SUBREG_TO_REG or AND & co.
|
|
|
|
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
|
|
|
|
"Copy with different width?!");
|
2019-01-25 06:00:41 +08:00
|
|
|
|
|
|
|
// Check the size of the destination.
|
|
|
|
assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&
|
2016-10-12 11:57:49 +08:00
|
|
|
"GPRs cannot get more than 64-bit width values");
|
2018-02-03 02:03:30 +08:00
|
|
|
|
2019-01-25 06:00:41 +08:00
|
|
|
return true;
|
|
|
|
}
|
2019-01-25 07:39:47 +08:00
|
|
|
#endif
|
2019-01-25 06:00:41 +08:00
|
|
|
|
2020-03-06 02:57:52 +08:00
|
|
|
/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
|
|
|
|
/// to \p *To.
|
2019-01-25 06:00:41 +08:00
|
|
|
///
|
2020-03-06 02:57:52 +08:00
|
|
|
/// E.g "To = COPY SrcReg:SubReg"
|
|
|
|
static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI,
|
|
|
|
const RegisterBankInfo &RBI, Register SrcReg,
|
|
|
|
const TargetRegisterClass *To, unsigned SubReg) {
|
|
|
|
assert(SrcReg.isValid() && "Expected a valid source register?");
|
|
|
|
assert(To && "Destination register class cannot be null");
|
|
|
|
assert(SubReg && "Expected a valid subregister");
|
|
|
|
|
2019-03-16 05:59:50 +08:00
|
|
|
MachineIRBuilder MIB(I);
|
2020-03-06 02:57:52 +08:00
|
|
|
auto SubRegCopy =
|
|
|
|
MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
|
2019-01-25 06:00:41 +08:00
|
|
|
MachineOperand &RegOp = I.getOperand(1);
|
2019-03-16 05:59:50 +08:00
|
|
|
RegOp.setReg(SubRegCopy.getReg(0));
|
2019-01-25 06:00:41 +08:00
|
|
|
|
|
|
|
// It's possible that the destination register won't be constrained. Make
|
|
|
|
// sure that happens.
|
2019-08-02 07:27:28 +08:00
|
|
|
if (!Register::isPhysicalRegister(I.getOperand(0).getReg()))
|
2019-01-25 06:00:41 +08:00
|
|
|
RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-05-04 06:37:46 +08:00
|
|
|
/// Helper function to get the source and destination register classes for a
|
|
|
|
/// copy. Returns a std::pair containing the source register class for the
|
|
|
|
/// copy, and the destination register class for the copy. If a register class
|
|
|
|
/// cannot be determined, then it will be nullptr.
|
|
|
|
static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
|
|
|
|
getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
|
|
|
|
MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
|
|
|
|
const RegisterBankInfo &RBI) {
|
[aarch64] Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Manual fixups in:
AArch64InstrInfo.cpp - genFusedMultiply() now takes a Register* instead of unsigned*
AArch64LoadStoreOptimizer.cpp - Ternary operator was ambiguous between Register/MCRegister. Settled on Register
Depends on D65919
Reviewers: aemerson
Subscribers: jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision for full review was: https://reviews.llvm.org/D65962
llvm-svn: 368628
2019-08-13 06:40:53 +08:00
|
|
|
Register DstReg = I.getOperand(0).getReg();
|
|
|
|
Register SrcReg = I.getOperand(1).getReg();
|
2019-05-04 06:37:46 +08:00
|
|
|
const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
|
|
|
|
const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
|
|
|
|
unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
|
|
|
|
unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
|
|
|
|
|
|
|
|
// Special casing for cross-bank copies of s1s. We can technically represent
|
|
|
|
// a 1-bit value with any size of register. The minimum size for a GPR is 32
|
|
|
|
// bits. So, we need to put the FPR on 32 bits as well.
|
|
|
|
//
|
|
|
|
// FIXME: I'm not sure if this case holds true outside of copies. If it does,
|
|
|
|
// then we can pull it into the helpers that get the appropriate class for a
|
|
|
|
// register bank. Or make a new helper that carries along some constraint
|
|
|
|
// information.
|
|
|
|
if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
|
|
|
|
SrcSize = DstSize = 32;
|
|
|
|
|
|
|
|
return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
|
|
|
|
getMinClassForRegBank(DstRegBank, DstSize, true)};
|
|
|
|
}
|
|
|
|
|
2019-01-25 06:00:41 +08:00
|
|
|
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
|
|
|
|
MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
|
|
|
|
const RegisterBankInfo &RBI) {
|
[aarch64] Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Manual fixups in:
AArch64InstrInfo.cpp - genFusedMultiply() now takes a Register* instead of unsigned*
AArch64LoadStoreOptimizer.cpp - Ternary operator was ambiguous between Register/MCRegister. Settled on Register
Depends on D65919
Reviewers: aemerson
Subscribers: jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision for full review was: https://reviews.llvm.org/D65962
llvm-svn: 368628
2019-08-13 06:40:53 +08:00
|
|
|
Register DstReg = I.getOperand(0).getReg();
|
|
|
|
Register SrcReg = I.getOperand(1).getReg();
|
2019-01-25 06:00:41 +08:00
|
|
|
const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
|
|
|
|
const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
|
2019-05-04 06:37:46 +08:00
|
|
|
|
|
|
|
// Find the correct register classes for the source and destination registers.
|
|
|
|
const TargetRegisterClass *SrcRC;
|
|
|
|
const TargetRegisterClass *DstRC;
|
|
|
|
std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
|
|
|
|
|
2019-01-25 06:00:41 +08:00
|
|
|
if (!DstRC) {
|
|
|
|
LLVM_DEBUG(dbgs() << "Unexpected dest size "
|
|
|
|
<< RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
|
2018-02-03 02:03:30 +08:00
|
|
|
return false;
|
2016-10-12 11:57:49 +08:00
|
|
|
}
|
|
|
|
|
2019-01-25 06:00:41 +08:00
|
|
|
// A couple helpers below, for making sure that the copy we produce is valid.
|
|
|
|
|
|
|
|
// Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
|
|
|
|
// to verify that the src and dst are the same size, since that's handled by
|
|
|
|
// the SUBREG_TO_REG.
|
|
|
|
bool KnownValid = false;
|
|
|
|
|
|
|
|
// Returns true, or asserts if something we don't expect happens. Instead of
|
|
|
|
// returning true, we return isValidCopy() to ensure that we verify the
|
|
|
|
// result.
|
2019-01-25 06:51:31 +08:00
|
|
|
auto CheckCopy = [&]() {
|
2019-01-25 06:00:41 +08:00
|
|
|
// If we have a bitcast or something, we can't have physical registers.
|
2019-08-02 07:27:28 +08:00
|
|
|
assert((I.isCopy() ||
|
|
|
|
(!Register::isPhysicalRegister(I.getOperand(0).getReg()) &&
|
|
|
|
!Register::isPhysicalRegister(I.getOperand(1).getReg()))) &&
|
|
|
|
"No phys reg on generic operator!");
|
2020-04-29 06:19:17 +08:00
|
|
|
bool ValidCopy = true;
|
|
|
|
#ifndef NDEBUG
|
|
|
|
ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI);
|
2020-04-29 05:45:05 +08:00
|
|
|
assert(ValidCopy && "Invalid copy.");
|
2020-04-29 06:19:17 +08:00
|
|
|
#endif
|
2020-04-29 05:45:05 +08:00
|
|
|
return ValidCopy;
|
2019-01-25 06:00:41 +08:00
|
|
|
};
|
|
|
|
|
2020-04-29 05:45:05 +08:00
|
|
|
// Is this a copy? If so, then we may need to insert a subregister copy.
|
2019-01-25 06:00:41 +08:00
|
|
|
if (I.isCopy()) {
|
|
|
|
// Yes. Check if there's anything to fix up.
|
2018-02-19 01:10:49 +08:00
|
|
|
if (!SrcRC) {
|
2019-01-25 06:00:41 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
|
|
|
|
return false;
|
2018-02-19 01:10:49 +08:00
|
|
|
}
|
2019-01-25 06:00:41 +08:00
|
|
|
|
2019-08-30 05:53:58 +08:00
|
|
|
unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
|
|
|
|
unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
|
2020-04-29 05:45:05 +08:00
|
|
|
unsigned SubReg;
|
|
|
|
|
|
|
|
// If the source bank doesn't support a subregister copy small enough,
|
|
|
|
// then we first need to copy to the destination bank.
|
|
|
|
if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
|
|
|
|
const TargetRegisterClass *DstTempRC =
|
|
|
|
getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
|
|
|
|
getSubRegForClass(DstRC, TRI, SubReg);
|
|
|
|
|
|
|
|
MachineIRBuilder MIB(I);
|
|
|
|
auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
|
|
|
|
copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
|
|
|
|
} else if (SrcSize > DstSize) {
|
|
|
|
// If the source register is bigger than the destination we need to
|
|
|
|
// perform a subregister copy.
|
|
|
|
const TargetRegisterClass *SubRegRC =
|
|
|
|
getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
|
|
|
|
getSubRegForClass(SubRegRC, TRI, SubReg);
|
|
|
|
copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
|
|
|
|
} else if (DstSize > SrcSize) {
|
|
|
|
// If the destination register is bigger than the source we need to do
|
|
|
|
// a promotion using SUBREG_TO_REG.
|
|
|
|
const TargetRegisterClass *PromotionRC =
|
|
|
|
getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
|
|
|
|
getSubRegForClass(SrcRC, TRI, SubReg);
|
|
|
|
|
|
|
|
Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
|
|
|
|
BuildMI(*I.getParent(), I, I.getDebugLoc(),
|
|
|
|
TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
|
|
|
|
.addImm(0)
|
|
|
|
.addUse(SrcReg)
|
|
|
|
.addImm(SubReg);
|
|
|
|
MachineOperand &RegOp = I.getOperand(1);
|
|
|
|
RegOp.setReg(PromoteReg);
|
|
|
|
|
|
|
|
// Promise that the copy is implicitly validated by the SUBREG_TO_REG.
|
|
|
|
KnownValid = true;
|
2018-02-19 01:10:49 +08:00
|
|
|
}
|
2019-01-25 06:00:41 +08:00
|
|
|
|
|
|
|
// If the destination is a physical register, then there's nothing to
|
|
|
|
// change, so we're done.
|
2019-08-02 07:27:28 +08:00
|
|
|
if (Register::isPhysicalRegister(DstReg))
|
2019-01-25 06:00:41 +08:00
|
|
|
return CheckCopy();
|
2018-02-19 01:10:49 +08:00
|
|
|
}
|
|
|
|
|
2019-01-25 06:00:41 +08:00
|
|
|
// No need to constrain SrcReg. It will get constrained when we hit another
|
|
|
|
// of its use or its defs. Copies do not have constraints.
|
|
|
|
if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
|
|
|
|
<< " operand\n");
|
2016-10-12 11:57:49 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
I.setDesc(TII.get(AArch64::COPY));
|
2019-01-25 06:00:41 +08:00
|
|
|
return CheckCopy();
|
2016-10-12 11:57:49 +08:00
|
|
|
}
|
|
|
|
|
2016-10-13 06:49:11 +08:00
|
|
|
static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
|
|
|
|
if (!DstTy.isScalar() || !SrcTy.isScalar())
|
|
|
|
return GenericOpc;
|
|
|
|
|
|
|
|
const unsigned DstSize = DstTy.getSizeInBits();
|
|
|
|
const unsigned SrcSize = SrcTy.getSizeInBits();
|
|
|
|
|
|
|
|
switch (DstSize) {
|
|
|
|
case 32:
|
|
|
|
switch (SrcSize) {
|
|
|
|
case 32:
|
|
|
|
switch (GenericOpc) {
|
|
|
|
case TargetOpcode::G_SITOFP:
|
|
|
|
return AArch64::SCVTFUWSri;
|
|
|
|
case TargetOpcode::G_UITOFP:
|
|
|
|
return AArch64::UCVTFUWSri;
|
|
|
|
case TargetOpcode::G_FPTOSI:
|
|
|
|
return AArch64::FCVTZSUWSr;
|
|
|
|
case TargetOpcode::G_FPTOUI:
|
|
|
|
return AArch64::FCVTZUUWSr;
|
|
|
|
default:
|
|
|
|
return GenericOpc;
|
|
|
|
}
|
|
|
|
case 64:
|
|
|
|
switch (GenericOpc) {
|
|
|
|
case TargetOpcode::G_SITOFP:
|
|
|
|
return AArch64::SCVTFUXSri;
|
|
|
|
case TargetOpcode::G_UITOFP:
|
|
|
|
return AArch64::UCVTFUXSri;
|
|
|
|
case TargetOpcode::G_FPTOSI:
|
|
|
|
return AArch64::FCVTZSUWDr;
|
|
|
|
case TargetOpcode::G_FPTOUI:
|
|
|
|
return AArch64::FCVTZUUWDr;
|
|
|
|
default:
|
|
|
|
return GenericOpc;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
return GenericOpc;
|
|
|
|
}
|
|
|
|
case 64:
|
|
|
|
switch (SrcSize) {
|
|
|
|
case 32:
|
|
|
|
switch (GenericOpc) {
|
|
|
|
case TargetOpcode::G_SITOFP:
|
|
|
|
return AArch64::SCVTFUWDri;
|
|
|
|
case TargetOpcode::G_UITOFP:
|
|
|
|
return AArch64::UCVTFUWDri;
|
|
|
|
case TargetOpcode::G_FPTOSI:
|
|
|
|
return AArch64::FCVTZSUXSr;
|
|
|
|
case TargetOpcode::G_FPTOUI:
|
|
|
|
return AArch64::FCVTZUUXSr;
|
|
|
|
default:
|
|
|
|
return GenericOpc;
|
|
|
|
}
|
|
|
|
case 64:
|
|
|
|
switch (GenericOpc) {
|
|
|
|
case TargetOpcode::G_SITOFP:
|
|
|
|
return AArch64::SCVTFUXDri;
|
|
|
|
case TargetOpcode::G_UITOFP:
|
|
|
|
return AArch64::UCVTFUXDri;
|
|
|
|
case TargetOpcode::G_FPTOSI:
|
|
|
|
return AArch64::FCVTZSUXDr;
|
|
|
|
case TargetOpcode::G_FPTOUI:
|
|
|
|
return AArch64::FCVTZUUXDr;
|
|
|
|
default:
|
|
|
|
return GenericOpc;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
return GenericOpc;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
return GenericOpc;
|
|
|
|
};
|
|
|
|
return GenericOpc;
|
|
|
|
}
|
|
|
|
|
[AArch64][GlobalISel] Select CSINC and CSINV for G_SELECT with constants
Select the following:
- G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
- G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
- G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
- G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
- G_SELECT cc, t, 1 -> CSINC t, zreg, cc
- G_SELECT cc, t, -1 -> CSINC t, zreg, cc
(IR example: https://godbolt.org/z/YfPna9)
These correspond to a bunch of the AArch64csel patterns in AArch64InstrInfo.td.
Unfortunately, it doesn't seem like we can import patterns that use NZCV like
those ones do. E.g.
```
def : Pat<(AArch64csel GPR32:$tval, (i32 1), (i32 imm:$cc), NZCV),
(CSINCWr GPR32:$tval, WZR, (i32 imm:$cc))>;
```
So we have to manually select these for now.
This replaces `selectSelectOpc` with an `emitSelect` function, which performs
these optimizations.
Differential Revision: https://reviews.llvm.org/D90701
2020-11-04 03:08:08 +08:00
|
|
|
MachineInstr *
|
|
|
|
AArch64InstructionSelector::emitSelect(Register Dst, Register True,
|
|
|
|
Register False, AArch64CC::CondCode CC,
|
|
|
|
MachineIRBuilder &MIB) const {
|
|
|
|
MachineRegisterInfo &MRI = *MIB.getMRI();
|
|
|
|
assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
|
|
|
|
RBI.getRegBank(True, MRI, TRI)->getID() &&
|
|
|
|
"Expected both select operands to have the same regbank?");
|
|
|
|
LLT Ty = MRI.getType(True);
|
|
|
|
if (Ty.isVector())
|
|
|
|
return nullptr;
|
|
|
|
const unsigned Size = Ty.getSizeInBits();
|
|
|
|
assert((Size == 32 || Size == 64) &&
|
|
|
|
"Expected 32 bit or 64 bit select only?");
|
|
|
|
const bool Is32Bit = Size == 32;
|
|
|
|
if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
|
|
|
|
unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
|
|
|
|
auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
|
|
|
|
constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI);
|
|
|
|
return &*FCSel;
|
|
|
|
}
|
|
|
|
|
|
|
|
// By default, we'll try and emit a CSEL.
|
|
|
|
unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
|
[AArch64][GlobalISel] Select G_SELECT cc, t, (G_SUB 0, x) -> CSNEG t, x, cc
When we see
```
%sub = G_SUB 0, %x
%select = G_SELECT %cc, %t, %sub
```
Fold away the G_SUB by producing
```
%select = CSNEG %t, %x, cc
```
Simple IR example: https://godbolt.org/z/K8TEnh
This is valid on both sides of the select, but for now, just handle one side.
It may make more sense to handle swapping sides during post-legalizer lowering.
Differential Revision: https://reviews.llvm.org/D90723
2020-11-04 06:59:29 +08:00
|
|
|
bool Optimized = false;
|
2020-12-09 05:39:28 +08:00
|
|
|
auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
|
2020-12-09 06:05:38 +08:00
|
|
|
&Optimized](Register &Reg, Register &OtherReg,
|
|
|
|
bool Invert) {
|
2020-12-09 05:39:28 +08:00
|
|
|
if (Optimized)
|
|
|
|
return false;
|
|
|
|
|
[AArch64][GlobalISel] Select G_SELECT cc, t, (G_SUB 0, x) -> CSNEG t, x, cc
When we see
```
%sub = G_SUB 0, %x
%select = G_SELECT %cc, %t, %sub
```
Fold away the G_SUB by producing
```
%select = CSNEG %t, %x, cc
```
Simple IR example: https://godbolt.org/z/K8TEnh
This is valid on both sides of the select, but for now, just handle one side.
It may make more sense to handle swapping sides during post-legalizer lowering.
Differential Revision: https://reviews.llvm.org/D90723
2020-11-04 06:59:29 +08:00
|
|
|
// Attempt to fold:
|
|
|
|
//
|
[AArch64][GlobalISel] Fold binops on the true side of G_SELECT
This implements the following folds:
```
G_SELECT cc, (G_SUB 0, %x), %false -> CSNEG %x, %false, inv_cc
G_SELECT cc, (G_XOR x, -1), %false -> CSINV %x, %false, inv_cc
```
This is similar to the folds introduced in
5bc0bd05e6a8d788e08cdf3d154f3a33202aee53.
In 5bc0bd05e6a8d788e08cdf3d154f3a33202aee53 I mentioned that we may prefer to do
this in AArch64PostLegalizerLowering.
I think that it's probably better to do this in the selector. The way we select
G_SELECT depends on what register banks end up being assigned to it. If we did
this in AArch64PostLegalizerLowering, then we'd end up checking *every* G_SELECT
to see if it's worth swapping operands. Doing it in the selector allows us to
restrict the optimization to only relevant G_SELECTs.
Also fix up some comments in `TryFoldBinOpIntoSelect` which are kind of
confusing IMO.
Example IR: https://godbolt.org/z/3qPGca
Differential Revision: https://reviews.llvm.org/D92860
2020-12-09 01:34:42 +08:00
|
|
|
// %sub = G_SUB 0, %x
|
|
|
|
// %select = G_SELECT cc, %reg, %sub
|
[AArch64][GlobalISel] Select G_SELECT cc, t, (G_SUB 0, x) -> CSNEG t, x, cc
When we see
```
%sub = G_SUB 0, %x
%select = G_SELECT %cc, %t, %sub
```
Fold away the G_SUB by producing
```
%select = CSNEG %t, %x, cc
```
Simple IR example: https://godbolt.org/z/K8TEnh
This is valid on both sides of the select, but for now, just handle one side.
It may make more sense to handle swapping sides during post-legalizer lowering.
Differential Revision: https://reviews.llvm.org/D90723
2020-11-04 06:59:29 +08:00
|
|
|
//
|
|
|
|
// Into:
|
[AArch64][GlobalISel] Fold binops on the true side of G_SELECT
This implements the following folds:
```
G_SELECT cc, (G_SUB 0, %x), %false -> CSNEG %x, %false, inv_cc
G_SELECT cc, (G_XOR x, -1), %false -> CSINV %x, %false, inv_cc
```
This is similar to the folds introduced in
5bc0bd05e6a8d788e08cdf3d154f3a33202aee53.
In 5bc0bd05e6a8d788e08cdf3d154f3a33202aee53 I mentioned that we may prefer to do
this in AArch64PostLegalizerLowering.
I think that it's probably better to do this in the selector. The way we select
G_SELECT depends on what register banks end up being assigned to it. If we did
this in AArch64PostLegalizerLowering, then we'd end up checking *every* G_SELECT
to see if it's worth swapping operands. Doing it in the selector allows us to
restrict the optimization to only relevant G_SELECTs.
Also fix up some comments in `TryFoldBinOpIntoSelect` which are kind of
confusing IMO.
Example IR: https://godbolt.org/z/3qPGca
Differential Revision: https://reviews.llvm.org/D92860
2020-12-09 01:34:42 +08:00
|
|
|
// %select = CSNEG %reg, %x, cc
|
2020-11-14 06:09:08 +08:00
|
|
|
Register MatchReg;
|
[AArch64][GlobalISel] Fold binops on the true side of G_SELECT
This implements the following folds:
```
G_SELECT cc, (G_SUB 0, %x), %false -> CSNEG %x, %false, inv_cc
G_SELECT cc, (G_XOR x, -1), %false -> CSINV %x, %false, inv_cc
```
This is similar to the folds introduced in
5bc0bd05e6a8d788e08cdf3d154f3a33202aee53.
In 5bc0bd05e6a8d788e08cdf3d154f3a33202aee53 I mentioned that we may prefer to do
this in AArch64PostLegalizerLowering.
I think that it's probably better to do this in the selector. The way we select
G_SELECT depends on what register banks end up being assigned to it. If we did
this in AArch64PostLegalizerLowering, then we'd end up checking *every* G_SELECT
to see if it's worth swapping operands. Doing it in the selector allows us to
restrict the optimization to only relevant G_SELECTs.
Also fix up some comments in `TryFoldBinOpIntoSelect` which are kind of
confusing IMO.
Example IR: https://godbolt.org/z/3qPGca
Differential Revision: https://reviews.llvm.org/D92860
2020-12-09 01:34:42 +08:00
|
|
|
if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
|
2020-11-14 06:09:08 +08:00
|
|
|
Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
|
[AArch64][GlobalISel] Fold binops on the true side of G_SELECT
This implements the following folds:
```
G_SELECT cc, (G_SUB 0, %x), %false -> CSNEG %x, %false, inv_cc
G_SELECT cc, (G_XOR x, -1), %false -> CSINV %x, %false, inv_cc
```
This is similar to the folds introduced in
5bc0bd05e6a8d788e08cdf3d154f3a33202aee53.
In 5bc0bd05e6a8d788e08cdf3d154f3a33202aee53 I mentioned that we may prefer to do
this in AArch64PostLegalizerLowering.
I think that it's probably better to do this in the selector. The way we select
G_SELECT depends on what register banks end up being assigned to it. If we did
this in AArch64PostLegalizerLowering, then we'd end up checking *every* G_SELECT
to see if it's worth swapping operands. Doing it in the selector allows us to
restrict the optimization to only relevant G_SELECTs.
Also fix up some comments in `TryFoldBinOpIntoSelect` which are kind of
confusing IMO.
Example IR: https://godbolt.org/z/3qPGca
Differential Revision: https://reviews.llvm.org/D92860
2020-12-09 01:34:42 +08:00
|
|
|
Reg = MatchReg;
|
2020-12-09 06:05:38 +08:00
|
|
|
if (Invert) {
|
[AArch64][GlobalISel] Fold binops on the true side of G_SELECT
This implements the following folds:
```
G_SELECT cc, (G_SUB 0, %x), %false -> CSNEG %x, %false, inv_cc
G_SELECT cc, (G_XOR x, -1), %false -> CSINV %x, %false, inv_cc
```
This is similar to the folds introduced in
5bc0bd05e6a8d788e08cdf3d154f3a33202aee53.
In 5bc0bd05e6a8d788e08cdf3d154f3a33202aee53 I mentioned that we may prefer to do
this in AArch64PostLegalizerLowering.
I think that it's probably better to do this in the selector. The way we select
G_SELECT depends on what register banks end up being assigned to it. If we did
this in AArch64PostLegalizerLowering, then we'd end up checking *every* G_SELECT
to see if it's worth swapping operands. Doing it in the selector allows us to
restrict the optimization to only relevant G_SELECTs.
Also fix up some comments in `TryFoldBinOpIntoSelect` which are kind of
confusing IMO.
Example IR: https://godbolt.org/z/3qPGca
Differential Revision: https://reviews.llvm.org/D92860
2020-12-09 01:34:42 +08:00
|
|
|
CC = AArch64CC::getInvertedCondCode(CC);
|
2020-12-09 06:05:38 +08:00
|
|
|
std::swap(Reg, OtherReg);
|
|
|
|
}
|
2020-11-14 06:09:08 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Attempt to fold:
|
|
|
|
//
|
[AArch64][GlobalISel] Fold binops on the true side of G_SELECT
This implements the following folds:
```
G_SELECT cc, (G_SUB 0, %x), %false -> CSNEG %x, %false, inv_cc
G_SELECT cc, (G_XOR x, -1), %false -> CSINV %x, %false, inv_cc
```
This is similar to the folds introduced in
5bc0bd05e6a8d788e08cdf3d154f3a33202aee53.
In 5bc0bd05e6a8d788e08cdf3d154f3a33202aee53 I mentioned that we may prefer to do
this in AArch64PostLegalizerLowering.
I think that it's probably better to do this in the selector. The way we select
G_SELECT depends on what register banks end up being assigned to it. If we did
this in AArch64PostLegalizerLowering, then we'd end up checking *every* G_SELECT
to see if it's worth swapping operands. Doing it in the selector allows us to
restrict the optimization to only relevant G_SELECTs.
Also fix up some comments in `TryFoldBinOpIntoSelect` which are kind of
confusing IMO.
Example IR: https://godbolt.org/z/3qPGca
Differential Revision: https://reviews.llvm.org/D92860
2020-12-09 01:34:42 +08:00
|
|
|
// %xor = G_XOR %x, -1
|
|
|
|
// %select = G_SELECT cc, %reg, %xor
|
2020-11-14 06:09:08 +08:00
|
|
|
//
|
|
|
|
// Into:
|
[AArch64][GlobalISel] Fold binops on the true side of G_SELECT
This implements the following folds:
```
G_SELECT cc, (G_SUB 0, %x), %false -> CSNEG %x, %false, inv_cc
G_SELECT cc, (G_XOR x, -1), %false -> CSINV %x, %false, inv_cc
```
This is similar to the folds introduced in
5bc0bd05e6a8d788e08cdf3d154f3a33202aee53.
In 5bc0bd05e6a8d788e08cdf3d154f3a33202aee53 I mentioned that we may prefer to do
this in AArch64PostLegalizerLowering.
I think that it's probably better to do this in the selector. The way we select
G_SELECT depends on what register banks end up being assigned to it. If we did
this in AArch64PostLegalizerLowering, then we'd end up checking *every* G_SELECT
to see if it's worth swapping operands. Doing it in the selector allows us to
restrict the optimization to only relevant G_SELECTs.
Also fix up some comments in `TryFoldBinOpIntoSelect` which are kind of
confusing IMO.
Example IR: https://godbolt.org/z/3qPGca
Differential Revision: https://reviews.llvm.org/D92860
2020-12-09 01:34:42 +08:00
|
|
|
// %select = CSINV %reg, %x, cc
|
|
|
|
if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
|
2020-11-14 06:09:08 +08:00
|
|
|
Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
|
[AArch64][GlobalISel] Fold binops on the true side of G_SELECT
This implements the following folds:
```
G_SELECT cc, (G_SUB 0, %x), %false -> CSNEG %x, %false, inv_cc
G_SELECT cc, (G_XOR x, -1), %false -> CSINV %x, %false, inv_cc
```
This is similar to the folds introduced in
5bc0bd05e6a8d788e08cdf3d154f3a33202aee53.
In 5bc0bd05e6a8d788e08cdf3d154f3a33202aee53 I mentioned that we may prefer to do
this in AArch64PostLegalizerLowering.
I think that it's probably better to do this in the selector. The way we select
G_SELECT depends on what register banks end up being assigned to it. If we did
this in AArch64PostLegalizerLowering, then we'd end up checking *every* G_SELECT
to see if it's worth swapping operands. Doing it in the selector allows us to
restrict the optimization to only relevant G_SELECTs.
Also fix up some comments in `TryFoldBinOpIntoSelect` which are kind of
confusing IMO.
Example IR: https://godbolt.org/z/3qPGca
Differential Revision: https://reviews.llvm.org/D92860
2020-12-09 01:34:42 +08:00
|
|
|
Reg = MatchReg;
|
2020-12-09 06:05:38 +08:00
|
|
|
if (Invert) {
|
[AArch64][GlobalISel] Fold binops on the true side of G_SELECT
This implements the following folds:
```
G_SELECT cc, (G_SUB 0, %x), %false -> CSNEG %x, %false, inv_cc
G_SELECT cc, (G_XOR x, -1), %false -> CSINV %x, %false, inv_cc
```
This is similar to the folds introduced in
5bc0bd05e6a8d788e08cdf3d154f3a33202aee53.
In 5bc0bd05e6a8d788e08cdf3d154f3a33202aee53 I mentioned that we may prefer to do
this in AArch64PostLegalizerLowering.
I think that it's probably better to do this in the selector. The way we select
G_SELECT depends on what register banks end up being assigned to it. If we did
this in AArch64PostLegalizerLowering, then we'd end up checking *every* G_SELECT
to see if it's worth swapping operands. Doing it in the selector allows us to
restrict the optimization to only relevant G_SELECTs.
Also fix up some comments in `TryFoldBinOpIntoSelect` which are kind of
confusing IMO.
Example IR: https://godbolt.org/z/3qPGca
Differential Revision: https://reviews.llvm.org/D92860
2020-12-09 01:34:42 +08:00
|
|
|
CC = AArch64CC::getInvertedCondCode(CC);
|
2020-12-09 06:05:38 +08:00
|
|
|
std::swap(Reg, OtherReg);
|
|
|
|
}
|
2020-11-14 06:09:08 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
[AArch64][GlobalISel] Fold G_SELECT cc, %t, (G_ADD %x, 1) -> CSINC %t, %x, cc
This implements
```
G_SELECT cc, %true, (G_ADD %x, 1) -> CSINC %true, %x, cc
G_SELECT cc, (G_ADD %x, 1), %false -> CSINC %x, %false, inv_cc
```
Godbolt example: https://godbolt.org/z/eoPqKq
Differential Revision: https://reviews.llvm.org/D92868
2020-12-09 02:20:44 +08:00
|
|
|
// Attempt to fold:
|
|
|
|
//
|
|
|
|
// %add = G_ADD %x, 1
|
|
|
|
// %select = G_SELECT cc, %reg, %add
|
|
|
|
//
|
|
|
|
// Into:
|
|
|
|
// %select = CSINC %reg, %x, cc
|
|
|
|
if (mi_match(Reg, MRI, m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)))) {
|
|
|
|
Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
|
|
|
|
Reg = MatchReg;
|
2020-12-09 06:05:38 +08:00
|
|
|
if (Invert) {
|
[AArch64][GlobalISel] Fold G_SELECT cc, %t, (G_ADD %x, 1) -> CSINC %t, %x, cc
This implements
```
G_SELECT cc, %true, (G_ADD %x, 1) -> CSINC %true, %x, cc
G_SELECT cc, (G_ADD %x, 1), %false -> CSINC %x, %false, inv_cc
```
Godbolt example: https://godbolt.org/z/eoPqKq
Differential Revision: https://reviews.llvm.org/D92868
2020-12-09 02:20:44 +08:00
|
|
|
CC = AArch64CC::getInvertedCondCode(CC);
|
2020-12-09 06:05:38 +08:00
|
|
|
std::swap(Reg, OtherReg);
|
|
|
|
}
|
[AArch64][GlobalISel] Fold G_SELECT cc, %t, (G_ADD %x, 1) -> CSINC %t, %x, cc
This implements
```
G_SELECT cc, %true, (G_ADD %x, 1) -> CSINC %true, %x, cc
G_SELECT cc, (G_ADD %x, 1), %false -> CSINC %x, %false, inv_cc
```
Godbolt example: https://godbolt.org/z/eoPqKq
Differential Revision: https://reviews.llvm.org/D92868
2020-12-09 02:20:44 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-11-14 06:09:08 +08:00
|
|
|
return false;
|
[AArch64][GlobalISel] Select G_SELECT cc, t, (G_SUB 0, x) -> CSNEG t, x, cc
When we see
```
%sub = G_SUB 0, %x
%select = G_SELECT %cc, %t, %sub
```
Fold away the G_SUB by producing
```
%select = CSNEG %t, %x, cc
```
Simple IR example: https://godbolt.org/z/K8TEnh
This is valid on both sides of the select, but for now, just handle one side.
It may make more sense to handle swapping sides during post-legalizer lowering.
Differential Revision: https://reviews.llvm.org/D90723
2020-11-04 06:59:29 +08:00
|
|
|
};
|
[AArch64][GlobalISel] Select CSINC and CSINV for G_SELECT with constants
Select the following:
- G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
- G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
- G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
- G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
- G_SELECT cc, t, 1 -> CSINC t, zreg, cc
- G_SELECT cc, t, -1 -> CSINC t, zreg, cc
(IR example: https://godbolt.org/z/YfPna9)
These correspond to a bunch of the AArch64csel patterns in AArch64InstrInfo.td.
Unfortunately, it doesn't seem like we can import patterns that use NZCV like
those ones do. E.g.
```
def : Pat<(AArch64csel GPR32:$tval, (i32 1), (i32 imm:$cc), NZCV),
(CSINCWr GPR32:$tval, WZR, (i32 imm:$cc))>;
```
So we have to manually select these for now.
This replaces `selectSelectOpc` with an `emitSelect` function, which performs
these optimizations.
Differential Revision: https://reviews.llvm.org/D90701
2020-11-04 03:08:08 +08:00
|
|
|
|
|
|
|
// Helper lambda which tries to use CSINC/CSINV for the instruction when its
|
|
|
|
// true/false values are constants.
|
|
|
|
// FIXME: All of these patterns already exist in tablegen. We should be
|
|
|
|
// able to import these.
|
[AArch64][GlobalISel] Select G_SELECT cc, t, (G_SUB 0, x) -> CSNEG t, x, cc
When we see
```
%sub = G_SUB 0, %x
%select = G_SELECT %cc, %t, %sub
```
Fold away the G_SUB by producing
```
%select = CSNEG %t, %x, cc
```
Simple IR example: https://godbolt.org/z/K8TEnh
This is valid on both sides of the select, but for now, just handle one side.
It may make more sense to handle swapping sides during post-legalizer lowering.
Differential Revision: https://reviews.llvm.org/D90723
2020-11-04 06:59:29 +08:00
|
|
|
auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
|
|
|
|
&Optimized]() {
|
|
|
|
if (Optimized)
|
|
|
|
return false;
|
[AArch64][GlobalISel] Select CSINC and CSINV for G_SELECT with constants
Select the following:
- G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
- G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
- G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
- G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
- G_SELECT cc, t, 1 -> CSINC t, zreg, cc
- G_SELECT cc, t, -1 -> CSINC t, zreg, cc
(IR example: https://godbolt.org/z/YfPna9)
These correspond to a bunch of the AArch64csel patterns in AArch64InstrInfo.td.
Unfortunately, it doesn't seem like we can import patterns that use NZCV like
those ones do. E.g.
```
def : Pat<(AArch64csel GPR32:$tval, (i32 1), (i32 imm:$cc), NZCV),
(CSINCWr GPR32:$tval, WZR, (i32 imm:$cc))>;
```
So we have to manually select these for now.
This replaces `selectSelectOpc` with an `emitSelect` function, which performs
these optimizations.
Differential Revision: https://reviews.llvm.org/D90701
2020-11-04 03:08:08 +08:00
|
|
|
auto TrueCst = getConstantVRegValWithLookThrough(True, MRI);
|
|
|
|
auto FalseCst = getConstantVRegValWithLookThrough(False, MRI);
|
|
|
|
if (!TrueCst && !FalseCst)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
|
|
|
|
if (TrueCst && FalseCst) {
|
2020-11-03 22:50:17 +08:00
|
|
|
int64_t T = TrueCst->Value.getSExtValue();
|
|
|
|
int64_t F = FalseCst->Value.getSExtValue();
|
[AArch64][GlobalISel] Select CSINC and CSINV for G_SELECT with constants
Select the following:
- G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
- G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
- G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
- G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
- G_SELECT cc, t, 1 -> CSINC t, zreg, cc
- G_SELECT cc, t, -1 -> CSINC t, zreg, cc
(IR example: https://godbolt.org/z/YfPna9)
These correspond to a bunch of the AArch64csel patterns in AArch64InstrInfo.td.
Unfortunately, it doesn't seem like we can import patterns that use NZCV like
those ones do. E.g.
```
def : Pat<(AArch64csel GPR32:$tval, (i32 1), (i32 imm:$cc), NZCV),
(CSINCWr GPR32:$tval, WZR, (i32 imm:$cc))>;
```
So we have to manually select these for now.
This replaces `selectSelectOpc` with an `emitSelect` function, which performs
these optimizations.
Differential Revision: https://reviews.llvm.org/D90701
2020-11-04 03:08:08 +08:00
|
|
|
|
|
|
|
if (T == 0 && F == 1) {
|
|
|
|
// G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
|
|
|
|
Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
|
|
|
|
True = ZReg;
|
|
|
|
False = ZReg;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (T == 0 && F == -1) {
|
|
|
|
// G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
|
|
|
|
Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
|
|
|
|
True = ZReg;
|
|
|
|
False = ZReg;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (TrueCst) {
|
2020-11-03 22:50:17 +08:00
|
|
|
int64_t T = TrueCst->Value.getSExtValue();
|
[AArch64][GlobalISel] Select CSINC and CSINV for G_SELECT with constants
Select the following:
- G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
- G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
- G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
- G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
- G_SELECT cc, t, 1 -> CSINC t, zreg, cc
- G_SELECT cc, t, -1 -> CSINC t, zreg, cc
(IR example: https://godbolt.org/z/YfPna9)
These correspond to a bunch of the AArch64csel patterns in AArch64InstrInfo.td.
Unfortunately, it doesn't seem like we can import patterns that use NZCV like
those ones do. E.g.
```
def : Pat<(AArch64csel GPR32:$tval, (i32 1), (i32 imm:$cc), NZCV),
(CSINCWr GPR32:$tval, WZR, (i32 imm:$cc))>;
```
So we have to manually select these for now.
This replaces `selectSelectOpc` with an `emitSelect` function, which performs
these optimizations.
Differential Revision: https://reviews.llvm.org/D90701
2020-11-04 03:08:08 +08:00
|
|
|
if (T == 1) {
|
|
|
|
// G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
|
|
|
|
Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
|
|
|
|
True = False;
|
|
|
|
False = ZReg;
|
|
|
|
CC = AArch64CC::getInvertedCondCode(CC);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (T == -1) {
|
|
|
|
// G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
|
|
|
|
Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
|
|
|
|
True = False;
|
|
|
|
False = ZReg;
|
|
|
|
CC = AArch64CC::getInvertedCondCode(CC);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (FalseCst) {
|
2020-11-03 22:50:17 +08:00
|
|
|
int64_t F = FalseCst->Value.getSExtValue();
|
[AArch64][GlobalISel] Select CSINC and CSINV for G_SELECT with constants
Select the following:
- G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
- G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
- G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
- G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
- G_SELECT cc, t, 1 -> CSINC t, zreg, cc
- G_SELECT cc, t, -1 -> CSINC t, zreg, cc
(IR example: https://godbolt.org/z/YfPna9)
These correspond to a bunch of the AArch64csel patterns in AArch64InstrInfo.td.
Unfortunately, it doesn't seem like we can import patterns that use NZCV like
those ones do. E.g.
```
def : Pat<(AArch64csel GPR32:$tval, (i32 1), (i32 imm:$cc), NZCV),
(CSINCWr GPR32:$tval, WZR, (i32 imm:$cc))>;
```
So we have to manually select these for now.
This replaces `selectSelectOpc` with an `emitSelect` function, which performs
these optimizations.
Differential Revision: https://reviews.llvm.org/D90701
2020-11-04 03:08:08 +08:00
|
|
|
if (F == 1) {
|
|
|
|
// G_SELECT cc, t, 1 -> CSINC t, zreg, cc
|
|
|
|
Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
|
|
|
|
False = ZReg;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (F == -1) {
|
|
|
|
// G_SELECT cc, t, -1 -> CSINC t, zreg, cc
|
|
|
|
Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
|
|
|
|
False = ZReg;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
};
|
|
|
|
|
2020-12-09 06:05:38 +08:00
|
|
|
Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
|
|
|
|
Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
|
[AArch64][GlobalISel] Select G_SELECT cc, t, (G_SUB 0, x) -> CSNEG t, x, cc
When we see
```
%sub = G_SUB 0, %x
%select = G_SELECT %cc, %t, %sub
```
Fold away the G_SUB by producing
```
%select = CSNEG %t, %x, cc
```
Simple IR example: https://godbolt.org/z/K8TEnh
This is valid on both sides of the select, but for now, just handle one side.
It may make more sense to handle swapping sides during post-legalizer lowering.
Differential Revision: https://reviews.llvm.org/D90723
2020-11-04 06:59:29 +08:00
|
|
|
Optimized |= TryOptSelectCst();
|
[AArch64][GlobalISel] Select CSINC and CSINV for G_SELECT with constants
Select the following:
- G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
- G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
- G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
- G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
- G_SELECT cc, t, 1 -> CSINC t, zreg, cc
- G_SELECT cc, t, -1 -> CSINC t, zreg, cc
(IR example: https://godbolt.org/z/YfPna9)
These correspond to a bunch of the AArch64csel patterns in AArch64InstrInfo.td.
Unfortunately, it doesn't seem like we can import patterns that use NZCV like
those ones do. E.g.
```
def : Pat<(AArch64csel GPR32:$tval, (i32 1), (i32 imm:$cc), NZCV),
(CSINCWr GPR32:$tval, WZR, (i32 imm:$cc))>;
```
So we have to manually select these for now.
This replaces `selectSelectOpc` with an `emitSelect` function, which performs
these optimizations.
Differential Revision: https://reviews.llvm.org/D90701
2020-11-04 03:08:08 +08:00
|
|
|
auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
|
|
|
|
constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);
|
|
|
|
return &*SelectInst;
|
2019-06-06 07:46:16 +08:00
|
|
|
}
|
|
|
|
|
2016-10-13 06:49:04 +08:00
|
|
|
static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
|
|
|
|
switch (P) {
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unknown condition code!");
|
|
|
|
case CmpInst::ICMP_NE:
|
|
|
|
return AArch64CC::NE;
|
|
|
|
case CmpInst::ICMP_EQ:
|
|
|
|
return AArch64CC::EQ;
|
|
|
|
case CmpInst::ICMP_SGT:
|
|
|
|
return AArch64CC::GT;
|
|
|
|
case CmpInst::ICMP_SGE:
|
|
|
|
return AArch64CC::GE;
|
|
|
|
case CmpInst::ICMP_SLT:
|
|
|
|
return AArch64CC::LT;
|
|
|
|
case CmpInst::ICMP_SLE:
|
|
|
|
return AArch64CC::LE;
|
|
|
|
case CmpInst::ICMP_UGT:
|
|
|
|
return AArch64CC::HI;
|
|
|
|
case CmpInst::ICMP_UGE:
|
|
|
|
return AArch64CC::HS;
|
|
|
|
case CmpInst::ICMP_ULT:
|
|
|
|
return AArch64CC::LO;
|
|
|
|
case CmpInst::ICMP_ULE:
|
|
|
|
return AArch64CC::LS;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-10-13 06:49:07 +08:00
|
|
|
static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
|
|
|
|
AArch64CC::CondCode &CondCode,
|
|
|
|
AArch64CC::CondCode &CondCode2) {
|
|
|
|
CondCode2 = AArch64CC::AL;
|
|
|
|
switch (P) {
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unknown FP condition!");
|
|
|
|
case CmpInst::FCMP_OEQ:
|
|
|
|
CondCode = AArch64CC::EQ;
|
|
|
|
break;
|
|
|
|
case CmpInst::FCMP_OGT:
|
|
|
|
CondCode = AArch64CC::GT;
|
|
|
|
break;
|
|
|
|
case CmpInst::FCMP_OGE:
|
|
|
|
CondCode = AArch64CC::GE;
|
|
|
|
break;
|
|
|
|
case CmpInst::FCMP_OLT:
|
|
|
|
CondCode = AArch64CC::MI;
|
|
|
|
break;
|
|
|
|
case CmpInst::FCMP_OLE:
|
|
|
|
CondCode = AArch64CC::LS;
|
|
|
|
break;
|
|
|
|
case CmpInst::FCMP_ONE:
|
|
|
|
CondCode = AArch64CC::MI;
|
|
|
|
CondCode2 = AArch64CC::GT;
|
|
|
|
break;
|
|
|
|
case CmpInst::FCMP_ORD:
|
|
|
|
CondCode = AArch64CC::VC;
|
|
|
|
break;
|
|
|
|
case CmpInst::FCMP_UNO:
|
|
|
|
CondCode = AArch64CC::VS;
|
|
|
|
break;
|
|
|
|
case CmpInst::FCMP_UEQ:
|
|
|
|
CondCode = AArch64CC::EQ;
|
|
|
|
CondCode2 = AArch64CC::VS;
|
|
|
|
break;
|
|
|
|
case CmpInst::FCMP_UGT:
|
|
|
|
CondCode = AArch64CC::HI;
|
|
|
|
break;
|
|
|
|
case CmpInst::FCMP_UGE:
|
|
|
|
CondCode = AArch64CC::PL;
|
|
|
|
break;
|
|
|
|
case CmpInst::FCMP_ULT:
|
|
|
|
CondCode = AArch64CC::LT;
|
|
|
|
break;
|
|
|
|
case CmpInst::FCMP_ULE:
|
|
|
|
CondCode = AArch64CC::LE;
|
|
|
|
break;
|
|
|
|
case CmpInst::FCMP_UNE:
|
|
|
|
CondCode = AArch64CC::NE;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-01-30 05:28:30 +08:00
|
|
|
/// Return a register which can be used as a bit to test in a TB(N)Z.
|
[AArch64][GlobalISel] Fold G_XOR into TB(N)Z bit calculation
This ports the existing case for G_XOR from `getTestBitOperand` in
AArch64ISelLowering into GlobalISel.
The idea is to flip between TBZ and TBNZ while walking through G_XORs.
Let's say we have
```
tbz (xor x, c), b
```
Let's say the `b`-th bit in `c` is 1. Then
- If the `b`-th bit in `x` is 1, the `b`-th bit in `(xor x, c)` is 0.
- If the `b`-th bit in `x` is 0, then the `b`-th bit in `(xor x, c)` is 1.
So, then
```
tbz (xor x, c), b == tbnz x, b
```
Let's say the `b`-th bit in `c` is 0. Then
- If the `b`-th bit in `x` is 1, the `b`-th bit in `(xor x, c)` is 1.
- If the `b`-th bit in `x` is 0, then the `b`-th bit in `(xor x, c)` is 0.
So, then
```
tbz (xor x, c), b == tbz x, b
```
Differential Revision: https://reviews.llvm.org/D73929
2020-02-04 06:26:17 +08:00
|
|
|
static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
|
2020-01-31 08:17:21 +08:00
|
|
|
MachineRegisterInfo &MRI) {
|
2020-01-30 05:28:30 +08:00
|
|
|
assert(Reg.isValid() && "Expected valid register!");
|
|
|
|
while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
|
|
|
|
unsigned Opc = MI->getOpcode();
|
2020-02-05 07:10:53 +08:00
|
|
|
|
|
|
|
if (!MI->getOperand(0).isReg() ||
|
2020-04-16 08:10:53 +08:00
|
|
|
!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
|
2020-02-05 07:10:53 +08:00
|
|
|
break;
|
|
|
|
|
2020-01-30 05:28:30 +08:00
|
|
|
// (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
|
2020-01-31 06:53:37 +08:00
|
|
|
//
|
|
|
|
// (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
|
|
|
|
// on the truncated x is the same as the bit number on x.
|
|
|
|
if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
|
2020-01-31 08:17:21 +08:00
|
|
|
Opc == TargetOpcode::G_TRUNC) {
|
|
|
|
Register NextReg = MI->getOperand(1).getReg();
|
|
|
|
// Did we find something worth folding?
|
2020-04-16 08:10:53 +08:00
|
|
|
if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
|
2020-01-31 08:17:21 +08:00
|
|
|
break;
|
|
|
|
|
|
|
|
// NextReg is worth folding. Keep looking.
|
|
|
|
Reg = NextReg;
|
|
|
|
continue;
|
|
|
|
}
|
2020-01-30 05:28:30 +08:00
|
|
|
|
2020-01-31 08:17:21 +08:00
|
|
|
// Attempt to find a suitable operation with a constant on one side.
|
|
|
|
Optional<uint64_t> C;
|
|
|
|
Register TestReg;
|
|
|
|
switch (Opc) {
|
|
|
|
default:
|
|
|
|
break;
|
[AArch64][GlobalISel] Fold G_XOR into TB(N)Z bit calculation
This ports the existing case for G_XOR from `getTestBitOperand` in
AArch64ISelLowering into GlobalISel.
The idea is to flip between TBZ and TBNZ while walking through G_XORs.
Let's say we have
```
tbz (xor x, c), b
```
Let's say the `b`-th bit in `c` is 1. Then
- If the `b`-th bit in `x` is 1, the `b`-th bit in `(xor x, c)` is 0.
- If the `b`-th bit in `x` is 0, then the `b`-th bit in `(xor x, c)` is 1.
So, then
```
tbz (xor x, c), b == tbnz x, b
```
Let's say the `b`-th bit in `c` is 0. Then
- If the `b`-th bit in `x` is 1, the `b`-th bit in `(xor x, c)` is 1.
- If the `b`-th bit in `x` is 0, then the `b`-th bit in `(xor x, c)` is 0.
So, then
```
tbz (xor x, c), b == tbz x, b
```
Differential Revision: https://reviews.llvm.org/D73929
2020-02-04 06:26:17 +08:00
|
|
|
case TargetOpcode::G_AND:
|
|
|
|
case TargetOpcode::G_XOR: {
|
2020-01-31 08:17:21 +08:00
|
|
|
TestReg = MI->getOperand(1).getReg();
|
|
|
|
Register ConstantReg = MI->getOperand(2).getReg();
|
|
|
|
auto VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
|
|
|
|
if (!VRegAndVal) {
|
|
|
|
// AND commutes, check the other side for a constant.
|
|
|
|
// FIXME: Can we canonicalize the constant so that it's always on the
|
|
|
|
// same side at some point earlier?
|
|
|
|
std::swap(ConstantReg, TestReg);
|
|
|
|
VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
|
|
|
|
}
|
|
|
|
if (VRegAndVal)
|
2020-11-03 22:50:17 +08:00
|
|
|
C = VRegAndVal->Value.getSExtValue();
|
2020-02-04 05:35:09 +08:00
|
|
|
break;
|
|
|
|
}
|
2020-02-04 07:11:14 +08:00
|
|
|
case TargetOpcode::G_ASHR:
|
2020-02-06 02:25:48 +08:00
|
|
|
case TargetOpcode::G_LSHR:
|
2020-02-04 05:35:09 +08:00
|
|
|
case TargetOpcode::G_SHL: {
|
|
|
|
TestReg = MI->getOperand(1).getReg();
|
|
|
|
auto VRegAndVal =
|
|
|
|
getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
|
|
|
|
if (VRegAndVal)
|
2020-11-03 22:50:17 +08:00
|
|
|
C = VRegAndVal->Value.getSExtValue();
|
2020-02-04 05:35:09 +08:00
|
|
|
break;
|
2020-01-31 08:17:21 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-02-05 07:10:53 +08:00
|
|
|
// Didn't find a constant or viable register. Bail out of the loop.
|
|
|
|
if (!C || !TestReg.isValid())
|
2020-01-30 05:28:30 +08:00
|
|
|
break;
|
|
|
|
|
2020-01-31 08:17:21 +08:00
|
|
|
// We found a suitable instruction with a constant. Check to see if we can
|
|
|
|
// walk through the instruction.
|
|
|
|
Register NextReg;
|
2020-02-04 07:11:14 +08:00
|
|
|
unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
|
2020-01-31 08:17:21 +08:00
|
|
|
switch (Opc) {
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
case TargetOpcode::G_AND:
|
|
|
|
// (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
|
|
|
|
if ((*C >> Bit) & 1)
|
|
|
|
NextReg = TestReg;
|
|
|
|
break;
|
2020-02-04 05:35:09 +08:00
|
|
|
case TargetOpcode::G_SHL:
|
|
|
|
// (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
|
|
|
|
// the type of the register.
|
2020-02-04 07:11:14 +08:00
|
|
|
if (*C <= Bit && (Bit - *C) < TestRegSize) {
|
2020-02-04 05:35:09 +08:00
|
|
|
NextReg = TestReg;
|
|
|
|
Bit = Bit - *C;
|
|
|
|
}
|
|
|
|
break;
|
2020-02-04 07:11:14 +08:00
|
|
|
case TargetOpcode::G_ASHR:
|
|
|
|
// (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
|
|
|
|
// in x
|
|
|
|
NextReg = TestReg;
|
|
|
|
Bit = Bit + *C;
|
|
|
|
if (Bit >= TestRegSize)
|
|
|
|
Bit = TestRegSize - 1;
|
|
|
|
break;
|
2020-02-06 02:25:48 +08:00
|
|
|
case TargetOpcode::G_LSHR:
|
|
|
|
// (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
|
|
|
|
if ((Bit + *C) < TestRegSize) {
|
|
|
|
NextReg = TestReg;
|
|
|
|
Bit = Bit + *C;
|
|
|
|
}
|
|
|
|
break;
|
[AArch64][GlobalISel] Fold G_XOR into TB(N)Z bit calculation
This ports the existing case for G_XOR from `getTestBitOperand` in
AArch64ISelLowering into GlobalISel.
The idea is to flip between TBZ and TBNZ while walking through G_XORs.
Let's say we have
```
tbz (xor x, c), b
```
Let's say the `b`-th bit in `c` is 1. Then
- If the `b`-th bit in `x` is 1, the `b`-th bit in `(xor x, c)` is 0.
- If the `b`-th bit in `x` is 0, then the `b`-th bit in `(xor x, c)` is 1.
So, then
```
tbz (xor x, c), b == tbnz x, b
```
Let's say the `b`-th bit in `c` is 0. Then
- If the `b`-th bit in `x` is 1, the `b`-th bit in `(xor x, c)` is 1.
- If the `b`-th bit in `x` is 0, then the `b`-th bit in `(xor x, c)` is 0.
So, then
```
tbz (xor x, c), b == tbz x, b
```
Differential Revision: https://reviews.llvm.org/D73929
2020-02-04 06:26:17 +08:00
|
|
|
case TargetOpcode::G_XOR:
|
|
|
|
// We can walk through a G_XOR by inverting whether we use tbz/tbnz when
|
|
|
|
// appropriate.
|
|
|
|
//
|
|
|
|
// e.g. If x' = xor x, c, and the b-th bit is set in c then
|
|
|
|
//
|
|
|
|
// tbz x', b -> tbnz x, b
|
|
|
|
//
|
|
|
|
// Because x' only has the b-th bit set if x does not.
|
|
|
|
if ((*C >> Bit) & 1)
|
|
|
|
Invert = !Invert;
|
|
|
|
NextReg = TestReg;
|
|
|
|
break;
|
2020-01-31 08:17:21 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Check if we found anything worth folding.
|
2020-02-05 07:10:53 +08:00
|
|
|
if (!NextReg.isValid())
|
|
|
|
return Reg;
|
2020-01-30 05:28:30 +08:00
|
|
|
Reg = NextReg;
|
|
|
|
}
|
2020-01-31 08:17:21 +08:00
|
|
|
|
2020-01-30 05:28:30 +08:00
|
|
|
return Reg;
|
|
|
|
}
|
|
|
|
|
2020-02-06 03:32:50 +08:00
|
|
|
MachineInstr *AArch64InstructionSelector::emitTestBit(
|
|
|
|
Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
|
|
|
|
MachineIRBuilder &MIB) const {
|
2020-02-12 02:22:38 +08:00
|
|
|
assert(TestReg.isValid());
|
2020-02-06 03:32:50 +08:00
|
|
|
assert(ProduceNonFlagSettingCondBr &&
|
|
|
|
"Cannot emit TB(N)Z with speculation tracking!");
|
2020-02-12 02:22:38 +08:00
|
|
|
MachineRegisterInfo &MRI = *MIB.getMRI();
|
2020-02-06 03:32:50 +08:00
|
|
|
|
|
|
|
// Attempt to optimize the test bit by walking over instructions.
|
|
|
|
TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
|
2020-02-12 02:22:38 +08:00
|
|
|
LLT Ty = MRI.getType(TestReg);
|
|
|
|
unsigned Size = Ty.getSizeInBits();
|
|
|
|
assert(!Ty.isVector() && "Expected a scalar!");
|
|
|
|
assert(Bit < 64 && "Bit is too large!");
|
2020-02-06 03:32:50 +08:00
|
|
|
|
|
|
|
// When the test register is a 64-bit register, we have to narrow to make
|
|
|
|
// TBNZW work.
|
2020-02-12 02:22:38 +08:00
|
|
|
bool UseWReg = Bit < 32;
|
|
|
|
unsigned NecessarySize = UseWReg ? 32 : 64;
|
2020-12-05 07:51:44 +08:00
|
|
|
if (Size != NecessarySize)
|
|
|
|
TestReg = moveScalarRegClass(
|
|
|
|
TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
|
|
|
|
MIB);
|
2020-02-06 03:32:50 +08:00
|
|
|
|
|
|
|
static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
|
|
|
|
{AArch64::TBZW, AArch64::TBNZW}};
|
|
|
|
unsigned Opc = OpcTable[UseWReg][IsNegative];
|
|
|
|
auto TestBitMI =
|
|
|
|
MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
|
|
|
|
constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
|
|
|
|
return &*TestBitMI;
|
|
|
|
}
|
|
|
|
|
2020-01-29 03:35:44 +08:00
|
|
|
bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
|
2020-12-02 03:58:19 +08:00
|
|
|
MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
|
|
|
|
MachineIRBuilder &MIB) const {
|
|
|
|
assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
|
2020-01-29 03:35:44 +08:00
|
|
|
// Given something like this:
|
|
|
|
//
|
|
|
|
// %x = ...Something...
|
|
|
|
// %one = G_CONSTANT i64 1
|
|
|
|
// %zero = G_CONSTANT i64 0
|
|
|
|
// %and = G_AND %x, %one
|
|
|
|
// %cmp = G_ICMP intpred(ne), %and, %zero
|
|
|
|
// %cmp_trunc = G_TRUNC %cmp
|
|
|
|
// G_BRCOND %cmp_trunc, %bb.3
|
|
|
|
//
|
|
|
|
// We want to try and fold the AND into the G_BRCOND and produce either a
|
|
|
|
// TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
|
|
|
|
//
|
|
|
|
// In this case, we'd get
|
|
|
|
//
|
|
|
|
// TBNZ %x %bb.3
|
|
|
|
//
|
|
|
|
|
|
|
|
// Check if the AND has a constant on its RHS which we can use as a mask.
|
|
|
|
// If it's a power of 2, then it's the same as checking a specific bit.
|
|
|
|
// (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
|
2020-12-02 03:58:19 +08:00
|
|
|
auto MaybeBit = getConstantVRegValWithLookThrough(
|
|
|
|
AndInst.getOperand(2).getReg(), *MIB.getMRI());
|
2020-11-03 22:50:17 +08:00
|
|
|
if (!MaybeBit)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
int32_t Bit = MaybeBit->Value.exactLogBase2();
|
|
|
|
if (Bit < 0)
|
2020-01-29 03:35:44 +08:00
|
|
|
return false;
|
2020-01-30 05:28:30 +08:00
|
|
|
|
2020-12-02 03:58:19 +08:00
|
|
|
Register TestReg = AndInst.getOperand(1).getReg();
|
2020-01-30 04:50:48 +08:00
|
|
|
|
2020-02-06 03:32:50 +08:00
|
|
|
// Emit a TB(N)Z.
|
|
|
|
emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
|
2020-01-29 03:35:44 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-12-01 09:21:21 +08:00
|
|
|
MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
|
|
|
|
bool IsNegative,
|
|
|
|
MachineBasicBlock *DestMBB,
|
|
|
|
MachineIRBuilder &MIB) const {
|
|
|
|
assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
|
|
|
|
MachineRegisterInfo &MRI = *MIB.getMRI();
|
|
|
|
assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
|
|
|
|
AArch64::GPRRegBankID &&
|
|
|
|
"Expected GPRs only?");
|
|
|
|
auto Ty = MRI.getType(CompareReg);
|
|
|
|
unsigned Width = Ty.getSizeInBits();
|
|
|
|
assert(!Ty.isVector() && "Expected scalar only?");
|
|
|
|
assert(Width <= 64 && "Expected width to be at most 64?");
|
|
|
|
static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
|
|
|
|
{AArch64::CBNZW, AArch64::CBNZX}};
|
|
|
|
unsigned Opc = OpcTable[IsNegative][Width == 64];
|
|
|
|
auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
|
|
|
|
constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
|
|
|
|
return &*BranchMI;
|
|
|
|
}
|
|
|
|
|
2020-12-02 03:58:19 +08:00
|
|
|
bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
|
|
|
|
MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
|
|
|
|
assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
|
|
|
|
assert(I.getOpcode() == TargetOpcode::G_BRCOND);
|
|
|
|
// Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
|
|
|
|
// totally clean. Some of them require two branches to implement.
|
|
|
|
emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB);
|
|
|
|
AArch64CC::CondCode CC1, CC2;
|
|
|
|
changeFCMPPredToAArch64CC(
|
|
|
|
static_cast<CmpInst::Predicate>(FCmp.getOperand(1).getPredicate()), CC1,
|
|
|
|
CC2);
|
2017-03-28 00:35:31 +08:00
|
|
|
MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
|
2020-12-02 03:58:19 +08:00
|
|
|
MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
|
|
|
|
if (CC2 != AArch64CC::AL)
|
|
|
|
MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
|
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
2020-10-01 05:01:12 +08:00
|
|
|
|
2020-12-02 03:58:19 +08:00
|
|
|
bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
|
|
|
|
MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
|
|
|
|
assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
|
|
|
|
assert(I.getOpcode() == TargetOpcode::G_BRCOND);
|
|
|
|
// Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
|
|
|
|
//
|
|
|
|
// Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
|
|
|
|
// instructions will not be produced, as they are conditional branch
|
|
|
|
// instructions that do not set flags.
|
|
|
|
if (!ProduceNonFlagSettingCondBr)
|
2017-03-28 00:35:31 +08:00
|
|
|
return false;
|
|
|
|
|
2020-12-02 03:58:19 +08:00
|
|
|
MachineRegisterInfo &MRI = *MIB.getMRI();
|
|
|
|
MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
|
2020-10-01 05:01:12 +08:00
|
|
|
auto Pred =
|
2020-12-02 03:58:19 +08:00
|
|
|
static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
|
|
|
|
Register LHS = ICmp.getOperand(2).getReg();
|
|
|
|
Register RHS = ICmp.getOperand(3).getReg();
|
2020-10-01 05:01:12 +08:00
|
|
|
|
2020-12-02 03:58:19 +08:00
|
|
|
// We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
|
2019-07-11 03:21:43 +08:00
|
|
|
auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
|
2020-12-02 03:58:19 +08:00
|
|
|
MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
|
2020-02-08 01:07:47 +08:00
|
|
|
|
|
|
|
// When we can emit a TB(N)Z, prefer that.
|
|
|
|
//
|
|
|
|
// Handle non-commutative condition codes first.
|
|
|
|
// Note that we don't want to do this when we have a G_AND because it can
|
|
|
|
// become a tst. The tst will make the test bit in the TB(N)Z redundant.
|
2020-12-02 03:58:19 +08:00
|
|
|
if (VRegAndVal && !AndInst) {
|
2020-11-03 22:50:17 +08:00
|
|
|
int64_t C = VRegAndVal->Value.getSExtValue();
|
2020-02-08 01:07:47 +08:00
|
|
|
|
|
|
|
// When we have a greater-than comparison, we can just test if the msb is
|
|
|
|
// zero.
|
|
|
|
if (C == -1 && Pred == CmpInst::ICMP_SGT) {
|
|
|
|
uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
|
|
|
|
emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
|
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// When we have a less than comparison, we can just test if the msb is not
|
|
|
|
// zero.
|
|
|
|
if (C == 0 && Pred == CmpInst::ICMP_SLT) {
|
|
|
|
uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
|
|
|
|
emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
|
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-12-01 09:21:21 +08:00
|
|
|
// Attempt to handle commutative condition codes. Right now, that's only
|
|
|
|
// eq/ne.
|
|
|
|
if (ICmpInst::isEquality(Pred)) {
|
|
|
|
if (!VRegAndVal) {
|
|
|
|
std::swap(RHS, LHS);
|
|
|
|
VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
|
2020-12-02 03:58:19 +08:00
|
|
|
AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
|
2020-12-01 09:21:21 +08:00
|
|
|
}
|
2017-03-28 00:35:31 +08:00
|
|
|
|
2020-12-01 09:21:21 +08:00
|
|
|
if (VRegAndVal && VRegAndVal->Value == 0) {
|
|
|
|
// If there's a G_AND feeding into this branch, try to fold it away by
|
|
|
|
// emitting a TB(N)Z instead.
|
2020-12-02 03:58:19 +08:00
|
|
|
//
|
|
|
|
// Note: If we have LT, then it *is* possible to fold, but it wouldn't be
|
|
|
|
// beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
|
|
|
|
// would be redundant.
|
|
|
|
if (AndInst &&
|
|
|
|
tryOptAndIntoCompareBranch(
|
|
|
|
*AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
|
2020-12-01 09:21:21 +08:00
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
2017-03-28 00:35:31 +08:00
|
|
|
|
2020-12-01 09:21:21 +08:00
|
|
|
// Otherwise, try to emit a CB(N)Z instead.
|
|
|
|
auto LHSTy = MRI.getType(LHS);
|
|
|
|
if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
|
|
|
|
emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
|
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
2020-01-29 03:35:44 +08:00
|
|
|
}
|
|
|
|
|
2020-12-02 03:58:19 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
|
|
|
|
MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
|
|
|
|
assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
|
|
|
|
assert(I.getOpcode() == TargetOpcode::G_BRCOND);
|
|
|
|
if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
// Couldn't optimize. Emit a compare + a Bcc.
|
|
|
|
MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
|
|
|
|
auto PredOp = ICmp.getOperand(1);
|
|
|
|
emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
|
|
|
|
const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
|
|
|
|
static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
|
2020-12-01 09:21:21 +08:00
|
|
|
MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
|
2017-03-28 00:35:31 +08:00
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-12-02 03:58:19 +08:00
|
|
|
bool AArch64InstructionSelector::selectCompareBranch(
|
|
|
|
MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
|
|
|
|
Register CondReg = I.getOperand(0).getReg();
|
|
|
|
MachineInstr *CCMI = MRI.getVRegDef(CondReg);
|
|
|
|
if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) {
|
|
|
|
CondReg = CCMI->getOperand(1).getReg();
|
|
|
|
CCMI = MRI.getVRegDef(CondReg);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Try to select the G_BRCOND using whatever is feeding the condition if
|
|
|
|
// possible.
|
|
|
|
MachineIRBuilder MIB(I);
|
|
|
|
unsigned CCMIOpc = CCMI->getOpcode();
|
|
|
|
if (CCMIOpc == TargetOpcode::G_FCMP)
|
|
|
|
return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
|
|
|
|
if (CCMIOpc == TargetOpcode::G_ICMP)
|
|
|
|
return selectCompareBranchFedByICmp(I, *CCMI, MIB);
|
|
|
|
|
|
|
|
// Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
|
|
|
|
// instructions will not be produced, as they are conditional branch
|
|
|
|
// instructions that do not set flags.
|
|
|
|
if (ProduceNonFlagSettingCondBr) {
|
|
|
|
emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
|
|
|
|
I.getOperand(1).getMBB(), MIB);
|
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
|
|
|
|
auto TstMI =
|
|
|
|
MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
|
|
|
|
constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
|
|
|
|
auto Bcc = MIB.buildInstr(AArch64::Bcc)
|
|
|
|
.addImm(AArch64CC::EQ)
|
|
|
|
.addMBB(I.getOperand(1).getMBB());
|
|
|
|
I.eraseFromParent();
|
|
|
|
return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
|
|
|
|
}
|
|
|
|
|
2019-12-07 05:42:13 +08:00
|
|
|
/// Returns the element immediate value of a vector shift operand if found.
|
|
|
|
/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
|
|
|
|
static Optional<int64_t> getVectorShiftImm(Register Reg,
|
|
|
|
MachineRegisterInfo &MRI) {
|
2019-12-07 09:09:19 +08:00
|
|
|
assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
|
2019-12-07 05:42:13 +08:00
|
|
|
MachineInstr *OpMI = MRI.getVRegDef(Reg);
|
|
|
|
assert(OpMI && "Expected to find a vreg def for vector shift operand");
|
|
|
|
if (OpMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR)
|
|
|
|
return None;
|
|
|
|
|
|
|
|
// Check all operands are identical immediates.
|
|
|
|
int64_t ImmVal = 0;
|
|
|
|
for (unsigned Idx = 1; Idx < OpMI->getNumOperands(); ++Idx) {
|
2020-10-02 05:15:57 +08:00
|
|
|
auto VRegAndVal = getConstantVRegValWithLookThrough(OpMI->getOperand(Idx).getReg(), MRI);
|
2019-12-07 05:42:13 +08:00
|
|
|
if (!VRegAndVal)
|
|
|
|
return None;
|
|
|
|
|
|
|
|
if (Idx == 1)
|
2020-11-03 22:50:17 +08:00
|
|
|
ImmVal = VRegAndVal->Value.getSExtValue();
|
|
|
|
if (ImmVal != VRegAndVal->Value.getSExtValue())
|
2019-12-07 05:42:13 +08:00
|
|
|
return None;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ImmVal;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Matches and returns the shift immediate value for a SHL instruction given
|
|
|
|
/// a shift operand.
|
2020-10-02 05:15:57 +08:00
|
|
|
static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) {
|
2019-12-07 05:42:13 +08:00
|
|
|
Optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
|
|
|
|
if (!ShiftImm)
|
|
|
|
return None;
|
|
|
|
// Check the immediate is in range for a SHL.
|
|
|
|
int64_t Imm = *ShiftImm;
|
|
|
|
if (Imm < 0)
|
|
|
|
return None;
|
|
|
|
switch (SrcTy.getElementType().getSizeInBits()) {
|
2019-12-07 09:43:27 +08:00
|
|
|
default:
|
|
|
|
LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
|
|
|
|
return None;
|
2019-12-07 05:42:13 +08:00
|
|
|
case 8:
|
|
|
|
if (Imm > 7)
|
|
|
|
return None;
|
|
|
|
break;
|
|
|
|
case 16:
|
|
|
|
if (Imm > 15)
|
|
|
|
return None;
|
|
|
|
break;
|
|
|
|
case 32:
|
|
|
|
if (Imm > 31)
|
|
|
|
return None;
|
|
|
|
break;
|
|
|
|
case 64:
|
|
|
|
if (Imm > 63)
|
|
|
|
return None;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return Imm;
|
|
|
|
}
|
|
|
|
|
2019-04-10 05:22:43 +08:00
|
|
|
bool AArch64InstructionSelector::selectVectorSHL(
|
|
|
|
MachineInstr &I, MachineRegisterInfo &MRI) const {
|
|
|
|
assert(I.getOpcode() == TargetOpcode::G_SHL);
|
2019-06-25 00:16:12 +08:00
|
|
|
Register DstReg = I.getOperand(0).getReg();
|
2019-04-10 05:22:43 +08:00
|
|
|
const LLT Ty = MRI.getType(DstReg);
|
2019-06-25 00:16:12 +08:00
|
|
|
Register Src1Reg = I.getOperand(1).getReg();
|
|
|
|
Register Src2Reg = I.getOperand(2).getReg();
|
2019-04-10 05:22:43 +08:00
|
|
|
|
|
|
|
if (!Ty.isVector())
|
|
|
|
return false;
|
|
|
|
|
2019-12-07 05:42:13 +08:00
|
|
|
// Check if we have a vector of constants on RHS that we can select as the
|
|
|
|
// immediate form.
|
|
|
|
Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
|
|
|
|
|
2019-04-10 05:22:43 +08:00
|
|
|
unsigned Opc = 0;
|
2019-09-21 17:21:16 +08:00
|
|
|
if (Ty == LLT::vector(2, 64)) {
|
2019-12-07 05:42:13 +08:00
|
|
|
Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
|
2019-09-21 17:21:16 +08:00
|
|
|
} else if (Ty == LLT::vector(4, 32)) {
|
2019-12-07 05:42:13 +08:00
|
|
|
Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
|
2019-04-10 05:22:43 +08:00
|
|
|
} else if (Ty == LLT::vector(2, 32)) {
|
2019-12-07 05:42:13 +08:00
|
|
|
Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
|
2020-09-19 14:31:30 +08:00
|
|
|
} else if (Ty == LLT::vector(4, 16)) {
|
|
|
|
Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
|
2020-09-26 08:21:03 +08:00
|
|
|
} else if (Ty == LLT::vector(8, 16)) {
|
|
|
|
Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
|
|
|
|
} else if (Ty == LLT::vector(16, 8)) {
|
|
|
|
Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
|
2020-10-02 05:18:38 +08:00
|
|
|
} else if (Ty == LLT::vector(8, 8)) {
|
|
|
|
Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
|
2019-04-10 05:22:43 +08:00
|
|
|
} else {
|
|
|
|
LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
MachineIRBuilder MIB(I);
|
2019-12-07 05:42:13 +08:00
|
|
|
auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
|
|
|
|
if (ImmVal)
|
|
|
|
Shl.addImm(*ImmVal);
|
|
|
|
else
|
|
|
|
Shl.addUse(Src2Reg);
|
|
|
|
constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI);
|
2019-04-10 05:22:43 +08:00
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-09-22 06:28:04 +08:00
|
|
|
bool AArch64InstructionSelector::selectVectorAshrLshr(
|
2019-04-10 05:22:43 +08:00
|
|
|
MachineInstr &I, MachineRegisterInfo &MRI) const {
|
2020-09-22 06:28:04 +08:00
|
|
|
assert(I.getOpcode() == TargetOpcode::G_ASHR ||
|
|
|
|
I.getOpcode() == TargetOpcode::G_LSHR);
|
2019-06-25 00:16:12 +08:00
|
|
|
Register DstReg = I.getOperand(0).getReg();
|
2019-04-10 05:22:43 +08:00
|
|
|
const LLT Ty = MRI.getType(DstReg);
|
2019-06-25 00:16:12 +08:00
|
|
|
Register Src1Reg = I.getOperand(1).getReg();
|
|
|
|
Register Src2Reg = I.getOperand(2).getReg();
|
2019-04-10 05:22:43 +08:00
|
|
|
|
|
|
|
if (!Ty.isVector())
|
|
|
|
return false;
|
|
|
|
|
2020-09-22 06:28:04 +08:00
|
|
|
bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
|
|
|
|
|
|
|
|
// We expect the immediate case to be lowered in the PostLegalCombiner to
|
|
|
|
// AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
|
|
|
|
|
2019-04-10 05:22:43 +08:00
|
|
|
// There is not a shift right register instruction, but the shift left
|
|
|
|
// register instruction takes a signed value, where negative numbers specify a
|
|
|
|
// right shift.
|
|
|
|
|
|
|
|
unsigned Opc = 0;
|
|
|
|
unsigned NegOpc = 0;
|
2020-09-22 06:28:04 +08:00
|
|
|
const TargetRegisterClass *RC =
|
|
|
|
getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID), RBI);
|
2019-09-21 17:21:13 +08:00
|
|
|
if (Ty == LLT::vector(2, 64)) {
|
2020-09-22 06:28:04 +08:00
|
|
|
Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
|
2019-09-21 17:21:13 +08:00
|
|
|
NegOpc = AArch64::NEGv2i64;
|
|
|
|
} else if (Ty == LLT::vector(4, 32)) {
|
2020-09-22 06:28:04 +08:00
|
|
|
Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
|
2019-04-10 05:22:43 +08:00
|
|
|
NegOpc = AArch64::NEGv4i32;
|
|
|
|
} else if (Ty == LLT::vector(2, 32)) {
|
2020-09-22 06:28:04 +08:00
|
|
|
Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
|
2019-04-10 05:22:43 +08:00
|
|
|
NegOpc = AArch64::NEGv2i32;
|
2020-09-22 06:28:04 +08:00
|
|
|
} else if (Ty == LLT::vector(4, 16)) {
|
|
|
|
Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
|
|
|
|
NegOpc = AArch64::NEGv4i16;
|
|
|
|
} else if (Ty == LLT::vector(8, 16)) {
|
|
|
|
Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
|
|
|
|
NegOpc = AArch64::NEGv8i16;
|
2020-09-26 08:21:03 +08:00
|
|
|
} else if (Ty == LLT::vector(16, 8)) {
|
|
|
|
Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
|
|
|
|
NegOpc = AArch64::NEGv8i16;
|
2020-10-02 05:18:38 +08:00
|
|
|
} else if (Ty == LLT::vector(8, 8)) {
|
|
|
|
Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
|
|
|
|
NegOpc = AArch64::NEGv8i8;
|
2019-04-10 05:22:43 +08:00
|
|
|
} else {
|
|
|
|
LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
MachineIRBuilder MIB(I);
|
|
|
|
auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
|
|
|
|
constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
|
|
|
|
auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
|
|
|
|
constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
|
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2017-02-09 01:57:27 +08:00
|
|
|
bool AArch64InstructionSelector::selectVaStartAAPCS(
|
|
|
|
MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool AArch64InstructionSelector::selectVaStartDarwin(
|
|
|
|
MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
|
|
|
|
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
|
2019-06-25 00:16:12 +08:00
|
|
|
Register ListReg = I.getOperand(0).getReg();
|
2017-02-09 01:57:27 +08:00
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
|
2017-02-09 01:57:27 +08:00
|
|
|
|
|
|
|
auto MIB =
|
|
|
|
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
|
|
|
|
.addDef(ArgsAddrReg)
|
|
|
|
.addFrameIndex(FuncInfo->getVarArgsStackIndex())
|
|
|
|
.addImm(0)
|
|
|
|
.addImm(0);
|
|
|
|
|
|
|
|
constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
|
|
|
|
|
|
|
|
MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
|
|
|
|
.addUse(ArgsAddrReg)
|
|
|
|
.addUse(ListReg)
|
|
|
|
.addImm(0)
|
|
|
|
.addMemOperand(*I.memoperands_begin());
|
|
|
|
|
|
|
|
constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
|
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-10-02 05:15:57 +08:00
|
|
|
void AArch64InstructionSelector::materializeLargeCMVal(
|
|
|
|
MachineInstr &I, const Value *V, unsigned OpFlags) const {
|
2018-07-31 08:09:02 +08:00
|
|
|
MachineBasicBlock &MBB = *I.getParent();
|
|
|
|
MachineFunction &MF = *MBB.getParent();
|
|
|
|
MachineRegisterInfo &MRI = MF.getRegInfo();
|
|
|
|
MachineIRBuilder MIB(I);
|
|
|
|
|
[GISel]: Refactor MachineIRBuilder to allow passing additional parameters to build Instrs
https://reviews.llvm.org/D55294
Previously MachineIRBuilder::buildInstr used to accept variadic
arguments for sources (which were either unsigned or
MachineInstrBuilder). While this worked well in common cases, it doesn't
allow us to build instructions that have multiple destinations.
Additionally passing in other optional parameters in the end (such as
flags) is not possible trivially. Also a trivial call such as
B.buildInstr(Opc, Reg1, Reg2, Reg3)
can be interpreted differently based on the opcode (2defs + 1 src for
unmerge vs 1 def + 2srcs).
This patch refactors the buildInstr to
buildInstr(Opc, ArrayRef<DstOps>, ArrayRef<SrcOps>)
where DstOps and SrcOps are typed unions that know how to add itself to
MachineInstrBuilder.
After this patch, most invocations would look like
B.buildInstr(Opc, {s32, DstReg}, {SrcRegs..., SrcMIBs..});
Now all the other calls (such as buildAdd, buildSub etc) forward to
buildInstr. It also makes it possible to build instructions with
multiple defs.
Additionally in a subsequent patch, we should make it possible to add
flags directly while building instructions.
Additionally, the main buildInstr method is now virtual and other
builders now only have to override buildInstr (for say constant
folding/cseing) is straightforward.
Also attached here (https://reviews.llvm.org/F7675680) is a clang-tidy
patch that should upgrade the API calls if necessary.
llvm-svn: 348815
2018-12-11 08:48:50 +08:00
|
|
|
auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
|
2018-07-31 08:09:02 +08:00
|
|
|
MovZ->addOperand(MF, I.getOperand(1));
|
|
|
|
MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
|
|
|
|
AArch64II::MO_NC);
|
|
|
|
MovZ->addOperand(MF, MachineOperand::CreateImm(0));
|
|
|
|
constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
|
|
|
|
|
2019-06-24 23:50:29 +08:00
|
|
|
auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
|
|
|
|
Register ForceDstReg) {
|
|
|
|
Register DstReg = ForceDstReg
|
2018-07-31 08:09:02 +08:00
|
|
|
? ForceDstReg
|
|
|
|
: MRI.createVirtualRegister(&AArch64::GPR64RegClass);
|
|
|
|
auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
|
|
|
|
if (auto *GV = dyn_cast<GlobalValue>(V)) {
|
|
|
|
MovI->addOperand(MF, MachineOperand::CreateGA(
|
|
|
|
GV, MovZ->getOperand(1).getOffset(), Flags));
|
|
|
|
} else {
|
|
|
|
MovI->addOperand(
|
|
|
|
MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
|
|
|
|
MovZ->getOperand(1).getOffset(), Flags));
|
|
|
|
}
|
|
|
|
MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
|
|
|
|
constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
|
|
|
|
return DstReg;
|
|
|
|
};
|
2020-10-02 05:15:57 +08:00
|
|
|
Register DstReg = BuildMovK(MovZ.getReg(0),
|
|
|
|
AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
|
2018-07-31 08:09:02 +08:00
|
|
|
DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
|
|
|
|
BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2020-01-30 05:42:48 +08:00
|
|
|
bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
|
2019-07-03 09:49:06 +08:00
|
|
|
MachineBasicBlock &MBB = *I.getParent();
|
|
|
|
MachineFunction &MF = *MBB.getParent();
|
|
|
|
MachineRegisterInfo &MRI = MF.getRegInfo();
|
|
|
|
|
|
|
|
switch (I.getOpcode()) {
|
|
|
|
case TargetOpcode::G_SHL:
|
|
|
|
case TargetOpcode::G_ASHR:
|
|
|
|
case TargetOpcode::G_LSHR: {
|
|
|
|
// These shifts are legalized to have 64 bit shift amounts because we want
|
|
|
|
// to take advantage of the existing imported selection patterns that assume
|
|
|
|
// the immediates are s64s. However, if the shifted type is 32 bits and for
|
|
|
|
// some reason we receive input GMIR that has an s64 shift amount that's not
|
|
|
|
// a G_CONSTANT, insert a truncate so that we can still select the s32
|
|
|
|
// register-register variant.
|
[aarch64] Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Manual fixups in:
AArch64InstrInfo.cpp - genFusedMultiply() now takes a Register* instead of unsigned*
AArch64LoadStoreOptimizer.cpp - Ternary operator was ambiguous between Register/MCRegister. Settled on Register
Depends on D65919
Reviewers: aemerson
Subscribers: jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision for full review was: https://reviews.llvm.org/D65962
llvm-svn: 368628
2019-08-13 06:40:53 +08:00
|
|
|
Register SrcReg = I.getOperand(1).getReg();
|
|
|
|
Register ShiftReg = I.getOperand(2).getReg();
|
2019-07-03 09:49:06 +08:00
|
|
|
const LLT ShiftTy = MRI.getType(ShiftReg);
|
|
|
|
const LLT SrcTy = MRI.getType(SrcReg);
|
|
|
|
if (SrcTy.isVector())
|
2020-01-30 05:42:48 +08:00
|
|
|
return false;
|
2019-07-03 09:49:06 +08:00
|
|
|
assert(!ShiftTy.isVector() && "unexpected vector shift ty");
|
|
|
|
if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64)
|
2020-01-30 05:42:48 +08:00
|
|
|
return false;
|
2019-07-03 09:49:06 +08:00
|
|
|
auto *AmtMI = MRI.getVRegDef(ShiftReg);
|
|
|
|
assert(AmtMI && "could not find a vreg definition for shift amount");
|
|
|
|
if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) {
|
|
|
|
// Insert a subregister copy to implement a 64->32 trunc
|
|
|
|
MachineIRBuilder MIB(I);
|
|
|
|
auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
|
|
|
|
.addReg(ShiftReg, 0, AArch64::sub_32);
|
|
|
|
MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
|
|
|
|
I.getOperand(2).setReg(Trunc.getReg(0));
|
|
|
|
}
|
2020-01-30 05:42:48 +08:00
|
|
|
return true;
|
2019-07-03 09:49:06 +08:00
|
|
|
}
|
2019-07-20 09:55:35 +08:00
|
|
|
case TargetOpcode::G_STORE:
|
2020-01-30 05:42:48 +08:00
|
|
|
return contractCrossBankCopyIntoStore(I, MRI);
|
|
|
|
case TargetOpcode::G_PTR_ADD:
|
|
|
|
return convertPtrAddToAdd(I, MRI);
|
2020-05-30 03:35:36 +08:00
|
|
|
case TargetOpcode::G_LOAD: {
|
|
|
|
// For scalar loads of pointers, we try to convert the dest type from p0
|
|
|
|
// to s64 so that our imported patterns can match. Like with the G_PTR_ADD
|
|
|
|
// conversion, this should be ok because all users should have been
|
|
|
|
// selected already, so the type doesn't matter for them.
|
|
|
|
Register DstReg = I.getOperand(0).getReg();
|
|
|
|
const LLT DstTy = MRI.getType(DstReg);
|
|
|
|
if (!DstTy.isPointer())
|
|
|
|
return false;
|
|
|
|
MRI.setType(DstReg, LLT::scalar(64));
|
|
|
|
return true;
|
|
|
|
}
|
2020-11-24 14:57:37 +08:00
|
|
|
case AArch64::G_DUP: {
|
|
|
|
// Convert the type from p0 to s64 to help selection.
|
|
|
|
LLT DstTy = MRI.getType(I.getOperand(0).getReg());
|
|
|
|
if (!DstTy.getElementType().isPointer())
|
|
|
|
return false;
|
|
|
|
MachineIRBuilder MIB(I);
|
|
|
|
auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
|
|
|
|
MRI.setType(I.getOperand(0).getReg(),
|
|
|
|
DstTy.changeElementType(LLT::scalar(64)));
|
|
|
|
MRI.setRegBank(NewSrc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
|
|
|
|
I.getOperand(1).setReg(NewSrc.getReg(0));
|
|
|
|
return true;
|
|
|
|
}
|
2019-07-03 09:49:06 +08:00
|
|
|
default:
|
2020-01-30 05:42:48 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// This lowering tries to look for G_PTR_ADD instructions and then converts
|
2020-02-04 02:32:25 +08:00
|
|
|
/// them to a standard G_ADD with a COPY on the source.
|
2020-01-30 05:42:48 +08:00
|
|
|
///
|
|
|
|
/// The motivation behind this is to expose the add semantics to the imported
|
|
|
|
/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
|
|
|
|
/// because the selector works bottom up, uses before defs. By the time we
|
|
|
|
/// end up trying to select a G_PTR_ADD, we should have already attempted to
|
|
|
|
/// fold this into addressing modes and were therefore unsuccessful.
|
2020-10-02 05:15:57 +08:00
|
|
|
bool AArch64InstructionSelector::convertPtrAddToAdd(
|
|
|
|
MachineInstr &I, MachineRegisterInfo &MRI) {
|
2020-01-30 05:42:48 +08:00
|
|
|
assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
|
|
|
|
Register DstReg = I.getOperand(0).getReg();
|
|
|
|
Register AddOp1Reg = I.getOperand(1).getReg();
|
|
|
|
const LLT PtrTy = MRI.getType(DstReg);
|
|
|
|
if (PtrTy.getAddressSpace() != 0)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
MachineIRBuilder MIB(I);
|
2020-06-09 03:02:04 +08:00
|
|
|
const LLT CastPtrTy = PtrTy.isVector() ? LLT::vector(2, 64) : LLT::scalar(64);
|
|
|
|
auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
|
2020-01-30 05:42:48 +08:00
|
|
|
// Set regbanks on the registers.
|
2020-06-09 03:02:04 +08:00
|
|
|
if (PtrTy.isVector())
|
|
|
|
MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
|
|
|
|
else
|
|
|
|
MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
|
2020-02-04 02:32:25 +08:00
|
|
|
|
|
|
|
// Now turn the %dst(p0) = G_PTR_ADD %base, off into:
|
2020-06-09 03:02:04 +08:00
|
|
|
// %dst(intty) = G_ADD %intbase, off
|
2020-02-04 02:32:25 +08:00
|
|
|
I.setDesc(TII.get(TargetOpcode::G_ADD));
|
2020-06-09 03:02:04 +08:00
|
|
|
MRI.setType(DstReg, CastPtrTy);
|
2020-02-04 02:32:25 +08:00
|
|
|
I.getOperand(1).setReg(PtrToInt.getReg(0));
|
2020-01-30 05:42:48 +08:00
|
|
|
if (!select(*PtrToInt)) {
|
|
|
|
LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
|
|
|
|
return false;
|
|
|
|
}
|
2020-11-12 14:45:19 +08:00
|
|
|
|
|
|
|
// Also take the opportunity here to try to do some optimization.
|
|
|
|
// Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
|
|
|
|
Register NegatedReg;
|
2020-11-13 08:42:32 +08:00
|
|
|
if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
|
2020-11-12 14:45:19 +08:00
|
|
|
return true;
|
|
|
|
I.getOperand(2).setReg(NegatedReg);
|
|
|
|
I.setDesc(TII.get(TargetOpcode::G_SUB));
|
2020-01-30 05:42:48 +08:00
|
|
|
return true;
|
2019-07-03 09:49:06 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
bool AArch64InstructionSelector::earlySelectSHL(
|
|
|
|
MachineInstr &I, MachineRegisterInfo &MRI) const {
|
|
|
|
// We try to match the immediate variant of LSL, which is actually an alias
|
|
|
|
// for a special case of UBFM. Otherwise, we fall back to the imported
|
|
|
|
// selector which will match the register variant.
|
|
|
|
assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
|
|
|
|
const auto &MO = I.getOperand(2);
|
|
|
|
auto VRegAndVal = getConstantVRegVal(MO.getReg(), MRI);
|
|
|
|
if (!VRegAndVal)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
|
|
|
|
if (DstTy.isVector())
|
|
|
|
return false;
|
|
|
|
bool Is64Bit = DstTy.getSizeInBits() == 64;
|
|
|
|
auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
|
|
|
|
auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
|
|
|
|
MachineIRBuilder MIB(I);
|
|
|
|
|
|
|
|
if (!Imm1Fn || !Imm2Fn)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
auto NewI =
|
|
|
|
MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
|
|
|
|
{I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
|
|
|
|
|
|
|
|
for (auto &RenderFn : *Imm1Fn)
|
|
|
|
RenderFn(NewI);
|
|
|
|
for (auto &RenderFn : *Imm2Fn)
|
|
|
|
RenderFn(NewI);
|
|
|
|
|
|
|
|
I.eraseFromParent();
|
|
|
|
return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
|
|
|
|
}
|
|
|
|
|
2020-01-30 05:42:48 +08:00
|
|
|
bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
|
|
|
|
MachineInstr &I, MachineRegisterInfo &MRI) {
|
2019-07-20 09:55:35 +08:00
|
|
|
assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
|
|
|
|
// If we're storing a scalar, it doesn't matter what register bank that
|
|
|
|
// scalar is on. All that matters is the size.
|
|
|
|
//
|
|
|
|
// So, if we see something like this (with a 32-bit scalar as an example):
|
|
|
|
//
|
|
|
|
// %x:gpr(s32) = ... something ...
|
|
|
|
// %y:fpr(s32) = COPY %x:gpr(s32)
|
|
|
|
// G_STORE %y:fpr(s32)
|
|
|
|
//
|
|
|
|
// We can fix this up into something like this:
|
|
|
|
//
|
|
|
|
// G_STORE %x:gpr(s32)
|
|
|
|
//
|
|
|
|
// And then continue the selection process normally.
|
2020-02-05 07:52:57 +08:00
|
|
|
Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
|
|
|
|
if (!DefDstReg.isValid())
|
2020-01-30 05:42:48 +08:00
|
|
|
return false;
|
2019-07-20 09:55:35 +08:00
|
|
|
LLT DefDstTy = MRI.getType(DefDstReg);
|
|
|
|
Register StoreSrcReg = I.getOperand(0).getReg();
|
|
|
|
LLT StoreSrcTy = MRI.getType(StoreSrcReg);
|
|
|
|
|
|
|
|
// If we get something strange like a physical register, then we shouldn't
|
|
|
|
// go any further.
|
|
|
|
if (!DefDstTy.isValid())
|
2020-01-30 05:42:48 +08:00
|
|
|
return false;
|
2019-07-20 09:55:35 +08:00
|
|
|
|
|
|
|
// Are the source and dst types the same size?
|
|
|
|
if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
|
2020-01-30 05:42:48 +08:00
|
|
|
return false;
|
2019-07-20 09:55:35 +08:00
|
|
|
|
|
|
|
if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
|
|
|
|
RBI.getRegBank(DefDstReg, MRI, TRI))
|
2020-01-30 05:42:48 +08:00
|
|
|
return false;
|
2019-07-20 09:55:35 +08:00
|
|
|
|
|
|
|
// We have a cross-bank copy, which is entering a store. Let's fold it.
|
|
|
|
I.getOperand(0).setReg(DefDstReg);
|
2020-01-30 05:42:48 +08:00
|
|
|
return true;
|
2019-07-20 09:55:35 +08:00
|
|
|
}
|
|
|
|
|
2019-07-03 09:49:06 +08:00
|
|
|
bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
|
|
|
|
assert(I.getParent() && "Instruction should be in a basic block!");
|
|
|
|
assert(I.getParent()->getParent() && "Instruction should be in a function!");
|
|
|
|
|
|
|
|
MachineBasicBlock &MBB = *I.getParent();
|
|
|
|
MachineFunction &MF = *MBB.getParent();
|
|
|
|
MachineRegisterInfo &MRI = MF.getRegInfo();
|
|
|
|
|
|
|
|
switch (I.getOpcode()) {
|
2020-09-11 05:57:16 +08:00
|
|
|
case TargetOpcode::G_BR: {
|
|
|
|
// If the branch jumps to the fallthrough block, don't bother emitting it.
|
|
|
|
// Only do this for -O0 for a good code size improvement, because when
|
|
|
|
// optimizations are enabled we want to leave this choice to
|
|
|
|
// MachineBlockPlacement.
|
|
|
|
bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOpt::None;
|
|
|
|
if (EnableOpt || !MBB.isLayoutSuccessor(I.getOperand(0).getMBB()))
|
|
|
|
return false;
|
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
2019-07-03 09:49:06 +08:00
|
|
|
case TargetOpcode::G_SHL:
|
|
|
|
return earlySelectSHL(I, MRI);
|
2019-08-06 17:18:41 +08:00
|
|
|
case TargetOpcode::G_CONSTANT: {
|
|
|
|
bool IsZero = false;
|
|
|
|
if (I.getOperand(1).isCImm())
|
|
|
|
IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
|
|
|
|
else if (I.getOperand(1).isImm())
|
|
|
|
IsZero = I.getOperand(1).getImm() == 0;
|
|
|
|
|
|
|
|
if (!IsZero)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
Register DefReg = I.getOperand(0).getReg();
|
|
|
|
LLT Ty = MRI.getType(DefReg);
|
2020-02-18 03:40:44 +08:00
|
|
|
if (Ty.getSizeInBits() == 64) {
|
2019-08-06 17:18:41 +08:00
|
|
|
I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
|
|
|
|
RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
|
2020-02-18 03:40:44 +08:00
|
|
|
} else if (Ty.getSizeInBits() == 32) {
|
2019-08-06 17:18:41 +08:00
|
|
|
I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
|
|
|
|
RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
|
2020-02-18 03:40:44 +08:00
|
|
|
} else
|
|
|
|
return false;
|
|
|
|
|
2019-08-06 17:18:41 +08:00
|
|
|
I.setDesc(TII.get(TargetOpcode::COPY));
|
|
|
|
return true;
|
|
|
|
}
|
2019-07-03 09:49:06 +08:00
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-08-13 14:26:59 +08:00
|
|
|
bool AArch64InstructionSelector::select(MachineInstr &I) {
|
2016-07-27 22:31:55 +08:00
|
|
|
assert(I.getParent() && "Instruction should be in a basic block!");
|
|
|
|
assert(I.getParent()->getParent() && "Instruction should be in a function!");
|
|
|
|
|
|
|
|
MachineBasicBlock &MBB = *I.getParent();
|
|
|
|
MachineFunction &MF = *MBB.getParent();
|
|
|
|
MachineRegisterInfo &MRI = MF.getRegInfo();
|
|
|
|
|
2020-01-24 06:43:19 +08:00
|
|
|
const AArch64Subtarget *Subtarget =
|
|
|
|
&static_cast<const AArch64Subtarget &>(MF.getSubtarget());
|
|
|
|
if (Subtarget->requiresStrictAlign()) {
|
|
|
|
// We don't support this feature yet.
|
|
|
|
LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2016-11-01 02:30:59 +08:00
|
|
|
unsigned Opcode = I.getOpcode();
|
2017-08-24 04:45:48 +08:00
|
|
|
// G_PHI requires same handling as PHI
|
2020-05-30 03:35:36 +08:00
|
|
|
if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
|
2016-11-01 02:30:59 +08:00
|
|
|
// Certain non-generic instructions also need some special handling.
|
|
|
|
|
2020-10-02 05:15:57 +08:00
|
|
|
if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
|
2016-11-01 02:30:59 +08:00
|
|
|
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
2016-11-08 08:34:06 +08:00
|
|
|
|
2017-08-24 04:45:48 +08:00
|
|
|
if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
|
2019-06-25 00:16:12 +08:00
|
|
|
const Register DefReg = I.getOperand(0).getReg();
|
2016-11-08 08:34:06 +08:00
|
|
|
const LLT DefTy = MRI.getType(DefReg);
|
|
|
|
|
2019-07-02 01:02:24 +08:00
|
|
|
const RegClassOrRegBank &RegClassOrBank =
|
2020-10-02 05:15:57 +08:00
|
|
|
MRI.getRegClassOrRegBank(DefReg);
|
2019-07-02 01:02:24 +08:00
|
|
|
|
2020-10-02 05:15:57 +08:00
|
|
|
const TargetRegisterClass *DefRC
|
|
|
|
= RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
|
2019-07-02 01:02:24 +08:00
|
|
|
if (!DefRC) {
|
|
|
|
if (!DefTy.isValid()) {
|
|
|
|
LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
|
|
|
|
DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
|
2016-11-08 08:34:06 +08:00
|
|
|
if (!DefRC) {
|
2019-07-02 01:02:24 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
|
|
|
|
return false;
|
2016-11-08 08:34:06 +08:00
|
|
|
}
|
|
|
|
}
|
2019-07-02 01:02:24 +08:00
|
|
|
|
2017-08-24 04:45:48 +08:00
|
|
|
I.setDesc(TII.get(TargetOpcode::PHI));
|
2016-11-08 08:34:06 +08:00
|
|
|
|
|
|
|
return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (I.isCopy())
|
2016-11-01 02:30:59 +08:00
|
|
|
return selectCopy(I, TII, MRI, TRI, RBI);
|
2016-11-08 08:34:06 +08:00
|
|
|
|
|
|
|
return true;
|
2016-11-01 02:30:59 +08:00
|
|
|
}
|
|
|
|
|
2020-10-02 05:15:57 +08:00
|
|
|
|
2016-07-27 22:31:55 +08:00
|
|
|
if (I.getNumOperands() != I.getNumExplicitOperands()) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(
|
|
|
|
dbgs() << "Generic instruction has unexpected implicit operands\n");
|
2016-07-27 22:31:55 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2019-07-03 09:49:06 +08:00
|
|
|
// Try to do some lowering before we start instruction selecting. These
|
|
|
|
// lowerings are purely transformations on the input G_MIR and so selection
|
|
|
|
// must continue after any modification of the instruction.
|
2020-01-30 05:42:48 +08:00
|
|
|
if (preISelLower(I)) {
|
|
|
|
Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
|
|
|
|
}
|
2019-07-03 09:49:06 +08:00
|
|
|
|
|
|
|
// There may be patterns where the importer can't deal with them optimally,
|
|
|
|
// but does select it to a suboptimal sequence so our custom C++ selection
|
|
|
|
// code later never has a chance to work on it. Therefore, we have an early
|
|
|
|
// selection attempt here to give priority to certain selection routines
|
|
|
|
// over the imported ones.
|
|
|
|
if (earlySelect(I))
|
|
|
|
return true;
|
|
|
|
|
2019-08-13 14:26:59 +08:00
|
|
|
if (selectImpl(I, *CoverageInfo))
|
2016-12-22 07:26:20 +08:00
|
|
|
return true;
|
|
|
|
|
2016-09-15 18:09:59 +08:00
|
|
|
LLT Ty =
|
|
|
|
I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
|
2016-07-27 22:31:55 +08:00
|
|
|
|
2019-03-16 05:59:50 +08:00
|
|
|
MachineIRBuilder MIB(I);
|
|
|
|
|
2016-10-13 06:49:11 +08:00
|
|
|
switch (Opcode) {
|
2020-12-02 03:58:19 +08:00
|
|
|
case TargetOpcode::G_BRCOND:
|
|
|
|
return selectCompareBranch(I, MF, MRI);
|
2016-10-13 06:49:01 +08:00
|
|
|
|
2017-01-30 17:13:18 +08:00
|
|
|
case TargetOpcode::G_BRINDIRECT: {
|
|
|
|
I.setDesc(TII.get(AArch64::BR));
|
|
|
|
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
|
|
|
}
|
|
|
|
|
2019-06-22 02:10:41 +08:00
|
|
|
case TargetOpcode::G_BRJT:
|
|
|
|
return selectBrJT(I, MRI);
|
|
|
|
|
2020-05-30 03:35:36 +08:00
|
|
|
case AArch64::G_ADD_LOW: {
|
2020-06-10 06:14:04 +08:00
|
|
|
// This op may have been separated from it's ADRP companion by the localizer
|
|
|
|
// or some other code motion pass. Given that many CPUs will try to
|
|
|
|
// macro fuse these operations anyway, select this into a MOVaddr pseudo
|
|
|
|
// which will later be expanded into an ADRP+ADD pair after scheduling.
|
|
|
|
MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
|
|
|
|
if (BaseMI->getOpcode() != AArch64::ADRP) {
|
|
|
|
I.setDesc(TII.get(AArch64::ADDXri));
|
|
|
|
I.addOperand(MachineOperand::CreateImm(0));
|
|
|
|
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
|
|
|
}
|
|
|
|
assert(TM.getCodeModel() == CodeModel::Small &&
|
|
|
|
"Expected small code model");
|
|
|
|
MachineIRBuilder MIB(I);
|
|
|
|
auto Op1 = BaseMI->getOperand(1);
|
|
|
|
auto Op2 = I.getOperand(2);
|
|
|
|
auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
|
|
|
|
.addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
|
|
|
|
Op1.getTargetFlags())
|
|
|
|
.addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
|
|
|
|
Op2.getTargetFlags());
|
|
|
|
I.eraseFromParent();
|
|
|
|
return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
|
2020-05-30 03:35:36 +08:00
|
|
|
}
|
|
|
|
|
2019-04-27 02:00:01 +08:00
|
|
|
case TargetOpcode::G_BSWAP: {
|
|
|
|
// Handle vector types for G_BSWAP directly.
|
2019-06-25 00:16:12 +08:00
|
|
|
Register DstReg = I.getOperand(0).getReg();
|
2019-04-27 02:00:01 +08:00
|
|
|
LLT DstTy = MRI.getType(DstReg);
|
|
|
|
|
|
|
|
// We should only get vector types here; everything else is handled by the
|
|
|
|
// importer right now.
|
|
|
|
if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
|
|
|
|
LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Only handle 4 and 2 element vectors for now.
|
|
|
|
// TODO: 16-bit elements.
|
|
|
|
unsigned NumElts = DstTy.getNumElements();
|
|
|
|
if (NumElts != 4 && NumElts != 2) {
|
|
|
|
LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Choose the correct opcode for the supported types. Right now, that's
|
|
|
|
// v2s32, v4s32, and v2s64.
|
|
|
|
unsigned Opc = 0;
|
|
|
|
unsigned EltSize = DstTy.getElementType().getSizeInBits();
|
|
|
|
if (EltSize == 32)
|
|
|
|
Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
|
|
|
|
: AArch64::REV32v16i8;
|
|
|
|
else if (EltSize == 64)
|
|
|
|
Opc = AArch64::REV64v16i8;
|
|
|
|
|
|
|
|
// We should always get something by the time we get here...
|
|
|
|
assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?");
|
|
|
|
|
|
|
|
I.setDesc(TII.get(Opc));
|
|
|
|
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
|
|
|
}
|
|
|
|
|
2016-10-19 03:47:57 +08:00
|
|
|
case TargetOpcode::G_FCONSTANT:
|
2016-10-11 05:49:42 +08:00
|
|
|
case TargetOpcode::G_CONSTANT: {
|
2016-10-19 03:47:57 +08:00
|
|
|
const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
|
|
|
|
|
2019-06-22 00:43:50 +08:00
|
|
|
const LLT s8 = LLT::scalar(8);
|
|
|
|
const LLT s16 = LLT::scalar(16);
|
2016-10-19 03:47:57 +08:00
|
|
|
const LLT s32 = LLT::scalar(32);
|
|
|
|
const LLT s64 = LLT::scalar(64);
|
|
|
|
const LLT p0 = LLT::pointer(0, 64);
|
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
const Register DefReg = I.getOperand(0).getReg();
|
2016-10-19 03:47:57 +08:00
|
|
|
const LLT DefTy = MRI.getType(DefReg);
|
|
|
|
const unsigned DefSize = DefTy.getSizeInBits();
|
|
|
|
const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
|
|
|
|
|
|
|
|
// FIXME: Redundant check, but even less readable when factored out.
|
|
|
|
if (isFP) {
|
|
|
|
if (Ty != s32 && Ty != s64) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
|
|
|
|
<< " constant, expected: " << s32 << " or " << s64
|
|
|
|
<< '\n');
|
2016-10-19 03:47:57 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (RB.getID() != AArch64::FPRRegBankID) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
|
|
|
|
<< " constant on bank: " << RB
|
|
|
|
<< ", expected: FPR\n");
|
2016-10-19 03:47:57 +08:00
|
|
|
return false;
|
|
|
|
}
|
[globalisel][tablegen] Add support for fpimm and import of APInt/APFloat based ImmLeaf.
Summary:
There's only a tablegen testcase for IntImmLeaf and not a CodeGen one
because the relevant rules are rejected for other reasons at the moment.
On AArch64, it's because there's an SDNodeXForm attached to the operand.
On X86, it's because the rule either emits multiple instructions or has
another predicate using PatFrag which cannot easily be supported at the
same time.
Reviewers: ab, t.p.northover, qcolombet, rovka, aditya_nandakumar
Reviewed By: qcolombet
Subscribers: aemerson, javed.absar, igorb, llvm-commits, kristof.beyls
Differential Revision: https://reviews.llvm.org/D36569
llvm-svn: 315761
2017-10-14 05:28:03 +08:00
|
|
|
|
|
|
|
// The case when we have 0.0 is covered by tablegen. Reject it here so we
|
|
|
|
// can be sure tablegen works correctly and isn't rescued by this code.
|
|
|
|
if (I.getOperand(1).getFPImm()->getValueAPF().isExactlyValue(0.0))
|
|
|
|
return false;
|
2016-10-19 03:47:57 +08:00
|
|
|
} else {
|
2017-08-08 18:44:31 +08:00
|
|
|
// s32 and s64 are covered by tablegen.
|
2019-06-22 00:43:50 +08:00
|
|
|
if (Ty != p0 && Ty != s8 && Ty != s16) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
|
|
|
|
<< " constant, expected: " << s32 << ", " << s64
|
|
|
|
<< ", or " << p0 << '\n');
|
2016-10-19 03:47:57 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (RB.getID() != AArch64::GPRRegBankID) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
|
|
|
|
<< " constant on bank: " << RB
|
|
|
|
<< ", expected: GPR\n");
|
2016-10-19 03:47:57 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-06-22 00:43:50 +08:00
|
|
|
// We allow G_CONSTANT of types < 32b.
|
2016-10-19 03:47:57 +08:00
|
|
|
const unsigned MovOpc =
|
2019-06-22 00:43:50 +08:00
|
|
|
DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
|
2016-10-19 03:47:57 +08:00
|
|
|
|
|
|
|
if (isFP) {
|
2019-05-02 06:39:43 +08:00
|
|
|
// Either emit a FMOV, or emit a copy to emit a normal mov.
|
2016-10-19 03:47:57 +08:00
|
|
|
const TargetRegisterClass &GPRRC =
|
|
|
|
DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
|
|
|
|
const TargetRegisterClass &FPRRC =
|
|
|
|
DefSize == 32 ? AArch64::FPR32RegClass : AArch64::FPR64RegClass;
|
|
|
|
|
2019-05-02 06:39:43 +08:00
|
|
|
// Can we use a FMOV instruction to represent the immediate?
|
|
|
|
if (emitFMovForFConstant(I, MRI))
|
|
|
|
return true;
|
|
|
|
|
2020-06-16 07:14:47 +08:00
|
|
|
// For 64b values, emit a constant pool load instead.
|
|
|
|
if (DefSize == 64) {
|
|
|
|
auto *FPImm = I.getOperand(1).getFPImm();
|
|
|
|
MachineIRBuilder MIB(I);
|
|
|
|
auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
|
|
|
|
if (!LoadMI) {
|
|
|
|
LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
|
|
|
|
I.eraseFromParent();
|
|
|
|
return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
|
|
|
|
}
|
|
|
|
|
2019-05-02 06:39:43 +08:00
|
|
|
// Nope. Emit a copy and use a normal mov instead.
|
2019-06-25 00:16:12 +08:00
|
|
|
const Register DefGPRReg = MRI.createVirtualRegister(&GPRRC);
|
2016-10-19 03:47:57 +08:00
|
|
|
MachineOperand &RegOp = I.getOperand(0);
|
|
|
|
RegOp.setReg(DefGPRReg);
|
2019-03-16 05:59:50 +08:00
|
|
|
MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
|
|
|
|
MIB.buildCopy({DefReg}, {DefGPRReg});
|
2016-10-19 03:47:57 +08:00
|
|
|
|
|
|
|
if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
|
2016-10-19 03:47:57 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
MachineOperand &ImmOp = I.getOperand(1);
|
|
|
|
// FIXME: Is going through int64_t always correct?
|
|
|
|
ImmOp.ChangeToImmediate(
|
|
|
|
ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
|
[globalisel] Decouple src pattern operands from dst pattern operands.
Summary:
This isn't testable for AArch64 by itself so this patch also adds
support for constant immediates in the pattern and physical
register uses in the result.
The new IntOperandMatcher matches the constant in patterns such as
'(set $rd:GPR32, (G_XOR $rs:GPR32, -1))'. It's always safe to fold
immediates into an instruction so this is the first rule that will match
across multiple BB's.
The Renderer hierarchy is responsible for adding operands to the result
instruction. Renderers can copy operands (CopyRenderer) or add physical
registers (in particular %wzr and %xzr) to the result instruction
in any order (OperandMatchers now import the operand names from
SelectionDAG to allow renderers to access any operand). This allows us to
emit the result instruction for:
%1 = G_XOR %0, -1 --> %1 = ORNWrr %wzr, %0
%1 = G_XOR -1, %0 --> %1 = ORNWrr %wzr, %0
although the latter is untested since the matcher/importer has not been
taught about commutativity yet.
Added BuildMIAction which can build new instructions and mutate them where
possible. W.r.t the mutation aspect, MatchActions are now told the name of
an instruction they can recycle and BuildMIAction will emit mutation code
when the renderers are appropriate. They are appropriate when all operands
are rendered using CopyRenderer and the indices are the same as the matcher.
This currently assumes that all operands have at least one matcher.
Finally, this change also fixes a crash in
AArch64InstructionSelector::select() caused by an immediate operand
passing isImm() rather than isCImm(). This was uncovered by the other
changes and was detected by existing tests.
Depends on D29711
Reviewers: t.p.northover, ab, qcolombet, rovka, aditya_nandakumar, javed.absar
Reviewed By: rovka
Subscribers: aemerson, dberris, kristof.beyls, llvm-commits
Differential Revision: https://reviews.llvm.org/D29712
llvm-svn: 296131
2017-02-24 23:43:30 +08:00
|
|
|
} else if (I.getOperand(1).isCImm()) {
|
2016-12-06 05:47:07 +08:00
|
|
|
uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
|
|
|
|
I.getOperand(1).ChangeToImmediate(Val);
|
[globalisel] Decouple src pattern operands from dst pattern operands.
Summary:
This isn't testable for AArch64 by itself so this patch also adds
support for constant immediates in the pattern and physical
register uses in the result.
The new IntOperandMatcher matches the constant in patterns such as
'(set $rd:GPR32, (G_XOR $rs:GPR32, -1))'. It's always safe to fold
immediates into an instruction so this is the first rule that will match
across multiple BB's.
The Renderer hierarchy is responsible for adding operands to the result
instruction. Renderers can copy operands (CopyRenderer) or add physical
registers (in particular %wzr and %xzr) to the result instruction
in any order (OperandMatchers now import the operand names from
SelectionDAG to allow renderers to access any operand). This allows us to
emit the result instruction for:
%1 = G_XOR %0, -1 --> %1 = ORNWrr %wzr, %0
%1 = G_XOR -1, %0 --> %1 = ORNWrr %wzr, %0
although the latter is untested since the matcher/importer has not been
taught about commutativity yet.
Added BuildMIAction which can build new instructions and mutate them where
possible. W.r.t the mutation aspect, MatchActions are now told the name of
an instruction they can recycle and BuildMIAction will emit mutation code
when the renderers are appropriate. They are appropriate when all operands
are rendered using CopyRenderer and the indices are the same as the matcher.
This currently assumes that all operands have at least one matcher.
Finally, this change also fixes a crash in
AArch64InstructionSelector::select() caused by an immediate operand
passing isImm() rather than isCImm(). This was uncovered by the other
changes and was detected by existing tests.
Depends on D29711
Reviewers: t.p.northover, ab, qcolombet, rovka, aditya_nandakumar, javed.absar
Reviewed By: rovka
Subscribers: aemerson, dberris, kristof.beyls, llvm-commits
Differential Revision: https://reviews.llvm.org/D29712
llvm-svn: 296131
2017-02-24 23:43:30 +08:00
|
|
|
} else if (I.getOperand(1).isImm()) {
|
|
|
|
uint64_t Val = I.getOperand(1).getImm();
|
|
|
|
I.getOperand(1).ChangeToImmediate(Val);
|
2016-10-19 03:47:57 +08:00
|
|
|
}
|
|
|
|
|
2019-05-02 06:39:43 +08:00
|
|
|
I.setDesc(TII.get(MovOpc));
|
2016-10-19 03:47:57 +08:00
|
|
|
constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
|
|
|
return true;
|
2016-10-11 05:49:42 +08:00
|
|
|
}
|
2017-07-21 06:58:38 +08:00
|
|
|
case TargetOpcode::G_EXTRACT: {
|
2019-07-24 06:05:13 +08:00
|
|
|
Register DstReg = I.getOperand(0).getReg();
|
|
|
|
Register SrcReg = I.getOperand(1).getReg();
|
|
|
|
LLT SrcTy = MRI.getType(SrcReg);
|
|
|
|
LLT DstTy = MRI.getType(DstReg);
|
2018-02-19 01:28:34 +08:00
|
|
|
(void)DstTy;
|
2018-02-19 01:03:02 +08:00
|
|
|
unsigned SrcSize = SrcTy.getSizeInBits();
|
2019-07-24 06:05:13 +08:00
|
|
|
|
|
|
|
if (SrcTy.getSizeInBits() > 64) {
|
|
|
|
// This should be an extract of an s128, which is like a vector extract.
|
|
|
|
if (SrcTy.getSizeInBits() != 128)
|
|
|
|
return false;
|
|
|
|
// Only support extracting 64 bits from an s128 at the moment.
|
|
|
|
if (DstTy.getSizeInBits() != 64)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
|
|
|
|
const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
|
|
|
|
// Check we have the right regbank always.
|
|
|
|
assert(SrcRB.getID() == AArch64::FPRRegBankID &&
|
|
|
|
DstRB.getID() == AArch64::FPRRegBankID &&
|
|
|
|
"Wrong extract regbank!");
|
2019-07-24 09:59:44 +08:00
|
|
|
(void)SrcRB;
|
2019-07-24 06:05:13 +08:00
|
|
|
|
|
|
|
// Emit the same code as a vector extract.
|
|
|
|
// Offset must be a multiple of 64.
|
|
|
|
unsigned Offset = I.getOperand(2).getImm();
|
|
|
|
if (Offset % 64 != 0)
|
|
|
|
return false;
|
|
|
|
unsigned LaneIdx = Offset / 64;
|
|
|
|
MachineIRBuilder MIB(I);
|
|
|
|
MachineInstr *Extract = emitExtractVectorElt(
|
|
|
|
DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
|
|
|
|
if (!Extract)
|
|
|
|
return false;
|
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
2017-07-21 06:58:38 +08:00
|
|
|
|
2018-02-19 01:03:02 +08:00
|
|
|
I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
|
2017-07-21 06:58:38 +08:00
|
|
|
MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
|
|
|
|
Ty.getSizeInBits() - 1);
|
|
|
|
|
2018-02-19 01:03:02 +08:00
|
|
|
if (SrcSize < 64) {
|
|
|
|
assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
|
|
|
|
"unexpected G_EXTRACT types");
|
|
|
|
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
|
|
|
}
|
|
|
|
|
2019-07-24 06:05:13 +08:00
|
|
|
DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
|
2019-03-16 05:59:50 +08:00
|
|
|
MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
|
2019-03-19 03:20:10 +08:00
|
|
|
MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
|
|
|
|
.addReg(DstReg, 0, AArch64::sub_32);
|
2017-07-21 06:58:38 +08:00
|
|
|
RBI.constrainGenericRegister(I.getOperand(0).getReg(),
|
|
|
|
AArch64::GPR32RegClass, MRI);
|
|
|
|
I.getOperand(0).setReg(DstReg);
|
|
|
|
|
|
|
|
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
|
|
|
}
|
|
|
|
|
|
|
|
case TargetOpcode::G_INSERT: {
|
|
|
|
LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
|
2018-02-19 01:03:02 +08:00
|
|
|
LLT DstTy = MRI.getType(I.getOperand(0).getReg());
|
|
|
|
unsigned DstSize = DstTy.getSizeInBits();
|
2017-07-21 06:58:38 +08:00
|
|
|
// Larger inserts are vectors, same-size ones should be something else by
|
|
|
|
// now (split up or turned into COPYs).
|
|
|
|
if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
|
|
|
|
return false;
|
|
|
|
|
2018-02-19 01:03:02 +08:00
|
|
|
I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
|
2017-07-21 06:58:38 +08:00
|
|
|
unsigned LSB = I.getOperand(3).getImm();
|
|
|
|
unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
|
2018-02-19 01:03:02 +08:00
|
|
|
I.getOperand(3).setImm((DstSize - LSB) % DstSize);
|
2017-07-21 06:58:38 +08:00
|
|
|
MachineInstrBuilder(MF, I).addImm(Width - 1);
|
|
|
|
|
2018-02-19 01:03:02 +08:00
|
|
|
if (DstSize < 64) {
|
|
|
|
assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
|
|
|
|
"unexpected G_INSERT types");
|
|
|
|
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
|
|
|
}
|
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
|
2017-07-21 06:58:38 +08:00
|
|
|
BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
|
|
|
|
TII.get(AArch64::SUBREG_TO_REG))
|
|
|
|
.addDef(SrcReg)
|
|
|
|
.addImm(0)
|
|
|
|
.addUse(I.getOperand(2).getReg())
|
|
|
|
.addImm(AArch64::sub_32);
|
|
|
|
RBI.constrainGenericRegister(I.getOperand(2).getReg(),
|
|
|
|
AArch64::GPR32RegClass, MRI);
|
|
|
|
I.getOperand(2).setReg(SrcReg);
|
|
|
|
|
|
|
|
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
|
|
|
}
|
2016-08-16 22:02:42 +08:00
|
|
|
case TargetOpcode::G_FRAME_INDEX: {
|
|
|
|
// allocas and G_FRAME_INDEX are only supported in addrspace(0).
|
2016-09-15 17:20:34 +08:00
|
|
|
if (Ty != LLT::pointer(0, 64)) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
|
|
|
|
<< ", expected: " << LLT::pointer(0, 64) << '\n');
|
2016-08-16 22:02:42 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
I.setDesc(TII.get(AArch64::ADDXri));
|
|
|
|
|
|
|
|
// MOs for a #0 shifted immediate.
|
|
|
|
I.addOperand(MachineOperand::CreateImm(0));
|
|
|
|
I.addOperand(MachineOperand::CreateImm(0));
|
|
|
|
|
|
|
|
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
|
|
|
}
|
2016-10-11 05:50:00 +08:00
|
|
|
|
|
|
|
case TargetOpcode::G_GLOBAL_VALUE: {
|
|
|
|
auto GV = I.getOperand(1).getGlobal();
|
2019-08-09 17:32:38 +08:00
|
|
|
if (GV->isThreadLocal())
|
|
|
|
return selectTLSGlobalValue(I, MRI);
|
|
|
|
|
2019-08-01 04:14:09 +08:00
|
|
|
unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
|
2016-12-14 02:25:38 +08:00
|
|
|
if (OpFlags & AArch64II::MO_GOT) {
|
2016-10-11 05:50:00 +08:00
|
|
|
I.setDesc(TII.get(AArch64::LOADgot));
|
2016-12-14 02:25:38 +08:00
|
|
|
I.getOperand(1).setTargetFlags(OpFlags);
|
2018-01-19 03:21:27 +08:00
|
|
|
} else if (TM.getCodeModel() == CodeModel::Large) {
|
|
|
|
// Materialize the global using movz/movk instructions.
|
2018-07-31 08:09:02 +08:00
|
|
|
materializeLargeCMVal(I, GV, OpFlags);
|
2018-01-19 03:21:27 +08:00
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
2018-08-22 19:31:39 +08:00
|
|
|
} else if (TM.getCodeModel() == CodeModel::Tiny) {
|
|
|
|
I.setDesc(TII.get(AArch64::ADR));
|
|
|
|
I.getOperand(1).setTargetFlags(OpFlags);
|
2016-12-14 02:25:38 +08:00
|
|
|
} else {
|
2016-10-11 05:50:00 +08:00
|
|
|
I.setDesc(TII.get(AArch64::MOVaddr));
|
|
|
|
I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
|
|
|
|
MachineInstrBuilder MIB(MF, I);
|
|
|
|
MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
|
|
|
|
OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
|
|
|
|
}
|
|
|
|
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
|
|
|
}
|
|
|
|
|
2019-06-06 15:58:37 +08:00
|
|
|
case TargetOpcode::G_ZEXTLOAD:
|
2016-07-30 00:56:16 +08:00
|
|
|
case TargetOpcode::G_LOAD:
|
|
|
|
case TargetOpcode::G_STORE: {
|
2019-06-06 15:58:37 +08:00
|
|
|
bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
|
|
|
|
MachineIRBuilder MIB(I);
|
|
|
|
|
2016-09-09 19:46:34 +08:00
|
|
|
LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
|
2016-07-30 00:56:16 +08:00
|
|
|
|
2016-09-15 17:20:34 +08:00
|
|
|
if (PtrTy != LLT::pointer(0, 64)) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
|
|
|
|
<< ", expected: " << LLT::pointer(0, 64) << '\n');
|
2016-07-30 00:56:16 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2017-12-05 13:52:07 +08:00
|
|
|
auto &MemOp = **I.memoperands_begin();
|
2020-09-10 00:45:54 +08:00
|
|
|
uint64_t MemSizeInBytes = MemOp.getSize();
|
2019-08-16 06:21:14 +08:00
|
|
|
if (MemOp.isAtomic()) {
|
2019-08-15 05:30:30 +08:00
|
|
|
// For now we just support s8 acquire loads to be able to compile stack
|
|
|
|
// protector code.
|
|
|
|
if (MemOp.getOrdering() == AtomicOrdering::Acquire &&
|
2020-09-10 00:45:54 +08:00
|
|
|
MemSizeInBytes == 1) {
|
2019-08-15 05:30:30 +08:00
|
|
|
I.setDesc(TII.get(AArch64::LDARB));
|
|
|
|
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
|
|
|
}
|
|
|
|
LLVM_DEBUG(dbgs() << "Atomic load/store not fully supported yet\n");
|
2017-12-05 13:52:07 +08:00
|
|
|
return false;
|
|
|
|
}
|
2020-09-10 00:45:54 +08:00
|
|
|
unsigned MemSizeInBits = MemSizeInBytes * 8;
|
2017-12-05 13:52:07 +08:00
|
|
|
|
2017-03-28 02:14:20 +08:00
|
|
|
#ifndef NDEBUG
|
2020-09-10 11:23:59 +08:00
|
|
|
const Register PtrReg = I.getOperand(1).getReg();
|
2016-07-30 00:56:16 +08:00
|
|
|
const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
|
2017-03-28 02:14:20 +08:00
|
|
|
// Sanity-check the pointer register.
|
2016-07-30 00:56:16 +08:00
|
|
|
assert(PtrRB.getID() == AArch64::GPRRegBankID &&
|
|
|
|
"Load/Store pointer operand isn't a GPR");
|
2016-09-09 19:46:34 +08:00
|
|
|
assert(MRI.getType(PtrReg).isPointer() &&
|
|
|
|
"Load/Store pointer operand isn't a pointer");
|
2016-07-30 00:56:16 +08:00
|
|
|
#endif
|
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
const Register ValReg = I.getOperand(0).getReg();
|
2016-07-30 00:56:16 +08:00
|
|
|
const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
|
|
|
|
|
2020-09-10 00:45:54 +08:00
|
|
|
// Helper lambda for partially selecting I. Either returns the original
|
|
|
|
// instruction with an updated opcode, or a new instruction.
|
|
|
|
auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
|
|
|
|
bool IsStore = I.getOpcode() == TargetOpcode::G_STORE;
|
|
|
|
const unsigned NewOpc =
|
|
|
|
selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
|
|
|
|
if (NewOpc == I.getOpcode())
|
|
|
|
return nullptr;
|
|
|
|
// Check if we can fold anything into the addressing mode.
|
2020-10-02 05:15:57 +08:00
|
|
|
auto AddrModeFns =
|
|
|
|
selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
|
2020-09-10 00:45:54 +08:00
|
|
|
if (!AddrModeFns) {
|
|
|
|
// Can't fold anything. Use the original instruction.
|
|
|
|
I.setDesc(TII.get(NewOpc));
|
|
|
|
I.addOperand(MachineOperand::CreateImm(0));
|
|
|
|
return &I;
|
2017-03-28 01:31:52 +08:00
|
|
|
}
|
|
|
|
|
2020-09-10 00:45:54 +08:00
|
|
|
// Folded something. Create a new instruction and return it.
|
|
|
|
auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
|
|
|
|
IsStore ? NewInst.addUse(ValReg) : NewInst.addDef(ValReg);
|
|
|
|
NewInst.cloneMemRefs(I);
|
|
|
|
for (auto &Fn : *AddrModeFns)
|
|
|
|
Fn(NewInst);
|
|
|
|
I.eraseFromParent();
|
|
|
|
return &*NewInst;
|
|
|
|
};
|
2017-03-28 01:31:56 +08:00
|
|
|
|
2020-09-10 00:45:54 +08:00
|
|
|
MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
|
|
|
|
if (!LoadStore)
|
|
|
|
return false;
|
2017-03-28 01:31:48 +08:00
|
|
|
|
|
|
|
// If we're storing a 0, use WZR/XZR.
|
2020-07-25 09:14:41 +08:00
|
|
|
if (Opcode == TargetOpcode::G_STORE) {
|
|
|
|
auto CVal = getConstantVRegValWithLookThrough(
|
2020-09-10 00:45:54 +08:00
|
|
|
LoadStore->getOperand(0).getReg(), MRI, /*LookThroughInstrs = */ true,
|
2020-07-25 09:14:41 +08:00
|
|
|
/*HandleFConstants = */ false);
|
|
|
|
if (CVal && CVal->Value == 0) {
|
2020-09-10 00:45:54 +08:00
|
|
|
switch (LoadStore->getOpcode()) {
|
2020-07-25 09:14:41 +08:00
|
|
|
case AArch64::STRWui:
|
|
|
|
case AArch64::STRHHui:
|
|
|
|
case AArch64::STRBBui:
|
2020-09-10 00:45:54 +08:00
|
|
|
LoadStore->getOperand(0).setReg(AArch64::WZR);
|
2020-07-25 09:14:41 +08:00
|
|
|
break;
|
|
|
|
case AArch64::STRXui:
|
2020-09-10 00:45:54 +08:00
|
|
|
LoadStore->getOperand(0).setReg(AArch64::XZR);
|
2020-07-25 09:14:41 +08:00
|
|
|
break;
|
2020-07-25 07:57:37 +08:00
|
|
|
}
|
2017-03-28 01:31:48 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-06-06 15:58:37 +08:00
|
|
|
if (IsZExtLoad) {
|
2020-09-10 00:45:54 +08:00
|
|
|
// The zextload from a smaller type to i32 should be handled by the
|
|
|
|
// importer.
|
|
|
|
if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
|
2019-06-06 15:58:37 +08:00
|
|
|
return false;
|
|
|
|
// If we have a ZEXTLOAD then change the load's type to be a narrower reg
|
2020-09-10 00:45:54 +08:00
|
|
|
// and zero_extend with SUBREG_TO_REG.
|
2019-06-25 00:16:12 +08:00
|
|
|
Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
|
2020-09-10 00:45:54 +08:00
|
|
|
Register DstReg = LoadStore->getOperand(0).getReg();
|
|
|
|
LoadStore->getOperand(0).setReg(LdReg);
|
2019-06-06 15:58:37 +08:00
|
|
|
|
2020-09-10 00:45:54 +08:00
|
|
|
MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
|
2019-06-06 15:58:37 +08:00
|
|
|
MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
|
|
|
|
.addImm(0)
|
|
|
|
.addUse(LdReg)
|
|
|
|
.addImm(AArch64::sub_32);
|
2020-09-10 00:45:54 +08:00
|
|
|
constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
|
2019-06-06 15:58:37 +08:00
|
|
|
return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
|
|
|
|
MRI);
|
|
|
|
}
|
2020-09-10 00:45:54 +08:00
|
|
|
return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
|
2016-08-16 22:37:46 +08:00
|
|
|
}
|
|
|
|
|
2017-02-09 05:22:25 +08:00
|
|
|
case TargetOpcode::G_SMULH:
|
|
|
|
case TargetOpcode::G_UMULH: {
|
|
|
|
// Reject the various things we don't support yet.
|
|
|
|
if (unsupportedBinOp(I, RBI, MRI, TRI))
|
|
|
|
return false;
|
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
const Register DefReg = I.getOperand(0).getReg();
|
2017-02-09 05:22:25 +08:00
|
|
|
const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
|
|
|
|
|
|
|
|
if (RB.getID() != AArch64::GPRRegBankID) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
|
2017-02-09 05:22:25 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (Ty != LLT::scalar(64)) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
|
|
|
|
<< ", expected: " << LLT::scalar(64) << '\n');
|
2017-02-09 05:22:25 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
|
|
|
|
: AArch64::UMULHrr;
|
|
|
|
I.setDesc(TII.get(NewOpc));
|
|
|
|
|
|
|
|
// Now that we selected an opcode, we need to constrain the register
|
|
|
|
// operands to use appropriate classes.
|
|
|
|
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
|
|
|
}
|
2020-09-22 06:28:04 +08:00
|
|
|
case TargetOpcode::G_LSHR:
|
2019-04-10 05:22:43 +08:00
|
|
|
case TargetOpcode::G_ASHR:
|
|
|
|
if (MRI.getType(I.getOperand(0).getReg()).isVector())
|
2020-09-22 06:28:04 +08:00
|
|
|
return selectVectorAshrLshr(I, MRI);
|
2019-04-10 05:22:43 +08:00
|
|
|
LLVM_FALLTHROUGH;
|
2016-08-16 22:02:47 +08:00
|
|
|
case TargetOpcode::G_SHL:
|
2019-04-10 05:22:43 +08:00
|
|
|
if (Opcode == TargetOpcode::G_SHL &&
|
|
|
|
MRI.getType(I.getOperand(0).getReg()).isVector())
|
|
|
|
return selectVectorSHL(I, MRI);
|
|
|
|
LLVM_FALLTHROUGH;
|
2020-09-22 06:28:04 +08:00
|
|
|
case TargetOpcode::G_FADD:
|
|
|
|
case TargetOpcode::G_FSUB:
|
|
|
|
case TargetOpcode::G_FMUL:
|
|
|
|
case TargetOpcode::G_FDIV:
|
|
|
|
case TargetOpcode::G_OR: {
|
2016-08-16 22:37:40 +08:00
|
|
|
// Reject the various things we don't support yet.
|
|
|
|
if (unsupportedBinOp(I, RBI, MRI, TRI))
|
2016-07-29 01:15:15 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
const unsigned OpSize = Ty.getSizeInBits();
|
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
const Register DefReg = I.getOperand(0).getReg();
|
2016-07-27 22:31:55 +08:00
|
|
|
const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
|
|
|
|
|
|
|
|
const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
|
|
|
|
if (NewOpc == I.getOpcode())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
I.setDesc(TII.get(NewOpc));
|
|
|
|
// FIXME: Should the type be always reset in setDesc?
|
|
|
|
|
|
|
|
// Now that we selected an opcode, we need to constrain the register
|
|
|
|
// operands to use appropriate classes.
|
|
|
|
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
|
|
|
}
|
2016-10-12 04:50:21 +08:00
|
|
|
|
[globalisel] Rename G_GEP to G_PTR_ADD
Summary:
G_GEP is rather poorly named. It's a simple pointer+scalar addition and
doesn't support any of the complexities of getelementptr. I therefore
propose that we rename it. There's a G_PTR_MASK so let's follow that
convention and go with G_PTR_ADD
Reviewers: volkan, aditya_nandakumar, bogner, rovka, arsenm
Subscribers: sdardis, jvesely, wdng, nhaehnle, hiraditya, jrtc27, atanasyan, arphaman, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D69734
2019-11-02 04:18:00 +08:00
|
|
|
case TargetOpcode::G_PTR_ADD: {
|
2019-07-25 07:11:01 +08:00
|
|
|
MachineIRBuilder MIRBuilder(I);
|
|
|
|
emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2),
|
|
|
|
MIRBuilder);
|
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
2020-12-04 06:31:43 +08:00
|
|
|
case TargetOpcode::G_SADDO:
|
|
|
|
case TargetOpcode::G_UADDO:
|
|
|
|
case TargetOpcode::G_SSUBO: {
|
|
|
|
// Emit the operation and get the correct condition code.
|
2019-03-15 06:54:29 +08:00
|
|
|
MachineIRBuilder MIRBuilder(I);
|
2020-12-04 06:31:43 +08:00
|
|
|
auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(),
|
|
|
|
I.getOperand(2), I.getOperand(3), MIRBuilder);
|
2019-03-15 06:54:29 +08:00
|
|
|
|
|
|
|
// Now, put the overflow result in the register given by the first operand
|
2020-12-04 06:31:43 +08:00
|
|
|
// to the overflow op. CSINC increments the result when the predicate is
|
|
|
|
// false, so to get the increment when it's true, we need to use the
|
|
|
|
// inverse. In this case, we want to increment when carry is set.
|
|
|
|
Register ZReg = AArch64::WZR;
|
2020-10-02 05:15:57 +08:00
|
|
|
auto CsetMI = MIRBuilder
|
|
|
|
.buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
|
2020-12-04 06:31:43 +08:00
|
|
|
{ZReg, ZReg})
|
|
|
|
.addImm(getInvertedCondCode(OpAndCC.second));
|
2019-03-15 06:54:29 +08:00
|
|
|
constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
|
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-05-16 06:33:01 +08:00
|
|
|
case TargetOpcode::G_PTRMASK: {
|
|
|
|
Register MaskReg = I.getOperand(2).getReg();
|
2020-11-03 22:50:17 +08:00
|
|
|
Optional<int64_t> MaskVal = getConstantVRegSExtVal(MaskReg, MRI);
|
2020-05-16 06:33:01 +08:00
|
|
|
// TODO: Implement arbitrary cases
|
|
|
|
if (!MaskVal || !isShiftedMask_64(*MaskVal))
|
2017-02-15 04:56:29 +08:00
|
|
|
return false;
|
|
|
|
|
2020-05-16 06:33:01 +08:00
|
|
|
uint64_t Mask = *MaskVal;
|
2017-02-15 04:56:29 +08:00
|
|
|
I.setDesc(TII.get(AArch64::ANDXri));
|
2020-05-16 06:33:01 +08:00
|
|
|
I.getOperand(2).ChangeToImmediate(
|
|
|
|
AArch64_AM::encodeLogicalImmediate(Mask, 64));
|
2017-02-15 04:56:29 +08:00
|
|
|
|
|
|
|
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
|
|
|
}
|
2016-11-01 02:31:09 +08:00
|
|
|
case TargetOpcode::G_PTRTOINT:
|
2016-10-13 06:49:15 +08:00
|
|
|
case TargetOpcode::G_TRUNC: {
|
|
|
|
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
|
|
|
|
const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
|
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
const Register DstReg = I.getOperand(0).getReg();
|
|
|
|
const Register SrcReg = I.getOperand(1).getReg();
|
2016-10-13 06:49:15 +08:00
|
|
|
|
|
|
|
const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
|
|
|
|
const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
|
|
|
|
|
|
|
|
if (DstRB.getID() != SrcRB.getID()) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(
|
|
|
|
dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
|
2016-10-13 06:49:15 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (DstRB.getID() == AArch64::GPRRegBankID) {
|
|
|
|
const TargetRegisterClass *DstRC =
|
|
|
|
getRegClassForTypeOnBank(DstTy, DstRB, RBI);
|
|
|
|
if (!DstRC)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
const TargetRegisterClass *SrcRC =
|
|
|
|
getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
|
|
|
|
if (!SrcRC)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
|
|
|
|
!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
|
2016-10-13 06:49:15 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (DstRC == SrcRC) {
|
|
|
|
// Nothing to be done
|
2017-06-27 18:11:39 +08:00
|
|
|
} else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
|
|
|
|
SrcTy == LLT::scalar(64)) {
|
|
|
|
llvm_unreachable("TableGen can import this case");
|
|
|
|
return false;
|
2016-10-13 06:49:15 +08:00
|
|
|
} else if (DstRC == &AArch64::GPR32RegClass &&
|
|
|
|
SrcRC == &AArch64::GPR64RegClass) {
|
|
|
|
I.getOperand(1).setSubReg(AArch64::sub_32);
|
|
|
|
} else {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(
|
|
|
|
dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
|
2016-10-13 06:49:15 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
I.setDesc(TII.get(TargetOpcode::COPY));
|
|
|
|
return true;
|
|
|
|
} else if (DstRB.getID() == AArch64::FPRRegBankID) {
|
|
|
|
if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) {
|
|
|
|
I.setDesc(TII.get(AArch64::XTNv4i16));
|
|
|
|
constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
|
|
|
return true;
|
|
|
|
}
|
2019-07-24 06:05:13 +08:00
|
|
|
|
|
|
|
if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
|
|
|
|
MachineIRBuilder MIB(I);
|
|
|
|
MachineInstr *Extract = emitExtractVectorElt(
|
|
|
|
DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
|
|
|
|
if (!Extract)
|
|
|
|
return false;
|
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
2020-06-09 03:02:04 +08:00
|
|
|
|
|
|
|
// We might have a vector G_PTRTOINT, in which case just emit a COPY.
|
|
|
|
if (Opcode == TargetOpcode::G_PTRTOINT) {
|
|
|
|
assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
|
|
|
|
I.setDesc(TII.get(TargetOpcode::COPY));
|
|
|
|
return true;
|
|
|
|
}
|
2016-10-13 06:49:15 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2016-10-12 04:50:21 +08:00
|
|
|
case TargetOpcode::G_ANYEXT: {
|
2019-06-25 00:16:12 +08:00
|
|
|
const Register DstReg = I.getOperand(0).getReg();
|
|
|
|
const Register SrcReg = I.getOperand(1).getReg();
|
2016-10-12 04:50:21 +08:00
|
|
|
|
2016-10-12 11:57:49 +08:00
|
|
|
const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
|
|
|
|
if (RBDst.getID() != AArch64::GPRRegBankID) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
|
|
|
|
<< ", expected: GPR\n");
|
2016-10-12 11:57:49 +08:00
|
|
|
return false;
|
|
|
|
}
|
2016-10-12 04:50:21 +08:00
|
|
|
|
2016-10-12 11:57:49 +08:00
|
|
|
const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
|
|
|
|
if (RBSrc.getID() != AArch64::GPRRegBankID) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
|
|
|
|
<< ", expected: GPR\n");
|
2016-10-12 04:50:21 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
|
|
|
|
|
|
|
|
if (DstSize == 0) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
|
2016-10-12 04:50:21 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2016-10-12 11:57:49 +08:00
|
|
|
if (DstSize != 64 && DstSize > 32) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
|
|
|
|
<< ", expected: 32 or 64\n");
|
2016-10-12 04:50:21 +08:00
|
|
|
return false;
|
|
|
|
}
|
2016-10-12 11:57:49 +08:00
|
|
|
// At this point G_ANYEXT is just like a plain COPY, but we need
|
|
|
|
// to explicitly form the 64-bit value if any.
|
|
|
|
if (DstSize > 32) {
|
2019-06-25 00:16:12 +08:00
|
|
|
Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
|
2016-10-12 11:57:49 +08:00
|
|
|
BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
|
|
|
|
.addDef(ExtSrc)
|
|
|
|
.addImm(0)
|
|
|
|
.addUse(SrcReg)
|
|
|
|
.addImm(AArch64::sub_32);
|
|
|
|
I.getOperand(1).setReg(ExtSrc);
|
2016-10-12 04:50:21 +08:00
|
|
|
}
|
2016-10-12 11:57:49 +08:00
|
|
|
return selectCopy(I, TII, MRI, TRI, RBI);
|
2016-10-12 04:50:21 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
case TargetOpcode::G_ZEXT:
|
2020-06-17 13:11:41 +08:00
|
|
|
case TargetOpcode::G_SEXT_INREG:
|
2016-10-12 04:50:21 +08:00
|
|
|
case TargetOpcode::G_SEXT: {
|
|
|
|
unsigned Opcode = I.getOpcode();
|
2020-06-17 13:11:41 +08:00
|
|
|
const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
|
2019-06-25 00:16:12 +08:00
|
|
|
const Register DefReg = I.getOperand(0).getReg();
|
2020-06-17 13:11:41 +08:00
|
|
|
Register SrcReg = I.getOperand(1).getReg();
|
2019-07-26 08:01:09 +08:00
|
|
|
const LLT DstTy = MRI.getType(DefReg);
|
|
|
|
const LLT SrcTy = MRI.getType(SrcReg);
|
|
|
|
unsigned DstSize = DstTy.getSizeInBits();
|
|
|
|
unsigned SrcSize = SrcTy.getSizeInBits();
|
2016-10-12 04:50:21 +08:00
|
|
|
|
2020-06-17 13:11:41 +08:00
|
|
|
// SEXT_INREG has the same src reg size as dst, the size of the value to be
|
|
|
|
// extended is encoded in the imm.
|
|
|
|
if (Opcode == TargetOpcode::G_SEXT_INREG)
|
|
|
|
SrcSize = I.getOperand(2).getImm();
|
|
|
|
|
2019-10-28 08:04:47 +08:00
|
|
|
if (DstTy.isVector())
|
|
|
|
return false; // Should be handled by imported patterns.
|
|
|
|
|
2019-07-26 08:01:09 +08:00
|
|
|
assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
|
|
|
|
AArch64::GPRRegBankID &&
|
|
|
|
"Unexpected ext regbank");
|
2016-10-12 04:50:21 +08:00
|
|
|
|
2019-07-26 08:01:09 +08:00
|
|
|
MachineIRBuilder MIB(I);
|
2016-10-12 04:50:21 +08:00
|
|
|
MachineInstr *ExtI;
|
2019-07-26 08:01:09 +08:00
|
|
|
|
2019-08-03 05:15:36 +08:00
|
|
|
// First check if we're extending the result of a load which has a dest type
|
|
|
|
// smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
|
|
|
|
// GPR register on AArch64 and all loads which are smaller automatically
|
|
|
|
// zero-extend the upper bits. E.g.
|
|
|
|
// %v(s8) = G_LOAD %p, :: (load 1)
|
|
|
|
// %v2(s32) = G_ZEXT %v(s8)
|
|
|
|
if (!IsSigned) {
|
|
|
|
auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
|
2020-06-16 07:32:01 +08:00
|
|
|
bool IsGPR =
|
|
|
|
RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
|
|
|
|
if (LoadMI && IsGPR) {
|
2019-08-03 05:15:36 +08:00
|
|
|
const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
|
|
|
|
unsigned BytesLoaded = MemOp->getSize();
|
|
|
|
if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
|
|
|
|
return selectCopy(I, TII, MRI, TRI, RBI);
|
|
|
|
}
|
2020-06-16 07:32:01 +08:00
|
|
|
|
|
|
|
// If we are zero extending from 32 bits to 64 bits, it's possible that
|
|
|
|
// the instruction implicitly does the zero extend for us. In that case,
|
|
|
|
// we can just emit a SUBREG_TO_REG.
|
|
|
|
if (IsGPR && SrcSize == 32 && DstSize == 64) {
|
|
|
|
// Unlike with the G_LOAD case, we don't want to look through copies
|
|
|
|
// here.
|
|
|
|
MachineInstr *Def = MRI.getVRegDef(SrcReg);
|
|
|
|
if (Def && isDef32(*Def)) {
|
|
|
|
MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
|
|
|
|
.addImm(0)
|
|
|
|
.addUse(SrcReg)
|
|
|
|
.addImm(AArch64::sub_32);
|
|
|
|
|
|
|
|
if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
|
|
|
|
MRI)) {
|
|
|
|
LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
|
|
|
|
MRI)) {
|
|
|
|
LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
2019-08-03 05:15:36 +08:00
|
|
|
}
|
|
|
|
|
2019-07-26 08:01:09 +08:00
|
|
|
if (DstSize == 64) {
|
2020-06-17 13:11:41 +08:00
|
|
|
if (Opcode != TargetOpcode::G_SEXT_INREG) {
|
|
|
|
// FIXME: Can we avoid manually doing this?
|
|
|
|
if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
|
|
|
|
MRI)) {
|
|
|
|
LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
|
|
|
|
<< " operand\n");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
|
|
|
|
{&AArch64::GPR64RegClass}, {})
|
|
|
|
.addImm(0)
|
|
|
|
.addUse(SrcReg)
|
|
|
|
.addImm(AArch64::sub_32)
|
|
|
|
.getReg(0);
|
2016-10-12 04:50:21 +08:00
|
|
|
}
|
|
|
|
|
2019-07-26 08:01:09 +08:00
|
|
|
ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
|
2020-10-02 05:15:57 +08:00
|
|
|
{DefReg}, {SrcReg})
|
|
|
|
.addImm(0)
|
|
|
|
.addImm(SrcSize - 1);
|
2019-07-26 08:01:09 +08:00
|
|
|
} else if (DstSize <= 32) {
|
|
|
|
ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
|
2020-10-02 05:15:57 +08:00
|
|
|
{DefReg}, {SrcReg})
|
|
|
|
.addImm(0)
|
|
|
|
.addImm(SrcSize - 1);
|
2016-10-12 04:50:21 +08:00
|
|
|
} else {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
|
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
2016-10-12 06:29:23 +08:00
|
|
|
|
2016-10-13 06:49:11 +08:00
|
|
|
case TargetOpcode::G_SITOFP:
|
|
|
|
case TargetOpcode::G_UITOFP:
|
|
|
|
case TargetOpcode::G_FPTOSI:
|
|
|
|
case TargetOpcode::G_FPTOUI: {
|
|
|
|
const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
|
|
|
|
SrcTy = MRI.getType(I.getOperand(1).getReg());
|
|
|
|
const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
|
|
|
|
if (NewOpc == Opcode)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
I.setDesc(TII.get(NewOpc));
|
|
|
|
constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-05-19 07:23:53 +08:00
|
|
|
case TargetOpcode::G_FREEZE:
|
|
|
|
return selectCopy(I, TII, MRI, TRI, RBI);
|
2016-10-13 06:49:11 +08:00
|
|
|
|
2016-10-12 06:29:23 +08:00
|
|
|
case TargetOpcode::G_INTTOPTR:
|
Re-commit: [globalisel][tablegen] Support zero-instruction emission.
Summary:
Support the case where an operand of a pattern is also the whole of the
result pattern. In this case the original result and all its uses must be
replaced by the operand. However, register class restrictions can require
a COPY. This patch handles both cases by always emitting the copy and
leaving it for the register allocator to optimize.
The previous commit failed on Windows machines due to a flaw in the sort
predicate which allowed both A < B < C and B == C to be satisfied
simultaneously. The cause of this was some sloppiness in the priority order of
G_CONSTANT instructions compared to other instructions. These had equal priority
because it makes no difference, however there were operands had higher priority
than G_CONSTANT but lower priority than any other instruction. As a result, a
priority order between G_CONSTANT and other instructions must be enforced to
ensure the predicate defines a strict weak order.
Reviewers: ab, t.p.northover, qcolombet, rovka, aditya_nandakumar
Subscribers: javed.absar, kristof.beyls, igorb, llvm-commits
Differential Revision: https://reviews.llvm.org/D36084
llvm-svn: 311076
2017-08-17 17:26:14 +08:00
|
|
|
// The importer is currently unable to import pointer types since they
|
|
|
|
// didn't exist in SelectionDAG.
|
2017-08-15 23:10:31 +08:00
|
|
|
return selectCopy(I, TII, MRI, TRI, RBI);
|
2017-08-15 21:50:09 +08:00
|
|
|
|
Re-commit: [globalisel][tablegen] Support zero-instruction emission.
Summary:
Support the case where an operand of a pattern is also the whole of the
result pattern. In this case the original result and all its uses must be
replaced by the operand. However, register class restrictions can require
a COPY. This patch handles both cases by always emitting the copy and
leaving it for the register allocator to optimize.
The previous commit failed on Windows machines due to a flaw in the sort
predicate which allowed both A < B < C and B == C to be satisfied
simultaneously. The cause of this was some sloppiness in the priority order of
G_CONSTANT instructions compared to other instructions. These had equal priority
because it makes no difference, however there were operands had higher priority
than G_CONSTANT but lower priority than any other instruction. As a result, a
priority order between G_CONSTANT and other instructions must be enforced to
ensure the predicate defines a strict weak order.
Reviewers: ab, t.p.northover, qcolombet, rovka, aditya_nandakumar
Subscribers: javed.absar, kristof.beyls, igorb, llvm-commits
Differential Revision: https://reviews.llvm.org/D36084
llvm-svn: 311076
2017-08-17 17:26:14 +08:00
|
|
|
case TargetOpcode::G_BITCAST:
|
|
|
|
// Imported SelectionDAG rules can handle every bitcast except those that
|
|
|
|
// bitcast from a type to the same type. Ideally, these shouldn't occur
|
2019-04-12 04:32:24 +08:00
|
|
|
// but we might not run an optimizer that deletes them. The other exception
|
|
|
|
// is bitcasts involving pointer types, as SelectionDAG has no knowledge
|
|
|
|
// of them.
|
|
|
|
return selectCopy(I, TII, MRI, TRI, RBI);
|
Re-commit: [globalisel][tablegen] Support zero-instruction emission.
Summary:
Support the case where an operand of a pattern is also the whole of the
result pattern. In this case the original result and all its uses must be
replaced by the operand. However, register class restrictions can require
a COPY. This patch handles both cases by always emitting the copy and
leaving it for the register allocator to optimize.
The previous commit failed on Windows machines due to a flaw in the sort
predicate which allowed both A < B < C and B == C to be satisfied
simultaneously. The cause of this was some sloppiness in the priority order of
G_CONSTANT instructions compared to other instructions. These had equal priority
because it makes no difference, however there were operands had higher priority
than G_CONSTANT but lower priority than any other instruction. As a result, a
priority order between G_CONSTANT and other instructions must be enforced to
ensure the predicate defines a strict weak order.
Reviewers: ab, t.p.northover, qcolombet, rovka, aditya_nandakumar
Subscribers: javed.absar, kristof.beyls, igorb, llvm-commits
Differential Revision: https://reviews.llvm.org/D36084
llvm-svn: 311076
2017-08-17 17:26:14 +08:00
|
|
|
|
2016-11-08 08:45:29 +08:00
|
|
|
case TargetOpcode::G_SELECT: {
|
|
|
|
if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
|
|
|
|
<< ", expected: " << LLT::scalar(1) << '\n');
|
2016-11-08 08:45:29 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
const Register CondReg = I.getOperand(1).getReg();
|
|
|
|
const Register TReg = I.getOperand(2).getReg();
|
|
|
|
const Register FReg = I.getOperand(3).getReg();
|
2016-11-08 08:45:29 +08:00
|
|
|
|
2019-07-03 03:44:16 +08:00
|
|
|
if (tryOptSelect(I))
|
2019-06-06 07:46:16 +08:00
|
|
|
return true;
|
2016-11-08 08:45:29 +08:00
|
|
|
|
2020-10-15 03:32:33 +08:00
|
|
|
// Make sure to use an unused vreg instead of wzr, so that the peephole
|
|
|
|
// optimizations will be able to optimize these.
|
[AArch64][GlobalISel] Select CSINC and CSINV for G_SELECT with constants
Select the following:
- G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
- G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
- G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
- G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
- G_SELECT cc, t, 1 -> CSINC t, zreg, cc
- G_SELECT cc, t, -1 -> CSINC t, zreg, cc
(IR example: https://godbolt.org/z/YfPna9)
These correspond to a bunch of the AArch64csel patterns in AArch64InstrInfo.td.
Unfortunately, it doesn't seem like we can import patterns that use NZCV like
those ones do. E.g.
```
def : Pat<(AArch64csel GPR32:$tval, (i32 1), (i32 imm:$cc), NZCV),
(CSINCWr GPR32:$tval, WZR, (i32 imm:$cc))>;
```
So we have to manually select these for now.
This replaces `selectSelectOpc` with an `emitSelect` function, which performs
these optimizations.
Differential Revision: https://reviews.llvm.org/D90701
2020-11-04 03:08:08 +08:00
|
|
|
MachineIRBuilder MIB(I);
|
2020-10-15 03:32:33 +08:00
|
|
|
Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
|
[AArch64][GlobalISel] Select CSINC and CSINV for G_SELECT with constants
Select the following:
- G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
- G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
- G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
- G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
- G_SELECT cc, t, 1 -> CSINC t, zreg, cc
- G_SELECT cc, t, -1 -> CSINC t, zreg, cc
(IR example: https://godbolt.org/z/YfPna9)
These correspond to a bunch of the AArch64csel patterns in AArch64InstrInfo.td.
Unfortunately, it doesn't seem like we can import patterns that use NZCV like
those ones do. E.g.
```
def : Pat<(AArch64csel GPR32:$tval, (i32 1), (i32 imm:$cc), NZCV),
(CSINCWr GPR32:$tval, WZR, (i32 imm:$cc))>;
```
So we have to manually select these for now.
This replaces `selectSelectOpc` with an `emitSelect` function, which performs
these optimizations.
Differential Revision: https://reviews.llvm.org/D90701
2020-11-04 03:08:08 +08:00
|
|
|
auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
|
|
|
|
.addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
|
|
|
|
constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
|
|
|
|
if (!emitSelect(I.getOperand(0).getReg(), TReg, FReg, AArch64CC::NE, MIB))
|
|
|
|
return false;
|
2016-11-08 08:45:29 +08:00
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
2016-10-13 06:49:04 +08:00
|
|
|
case TargetOpcode::G_ICMP: {
|
2019-04-10 05:22:43 +08:00
|
|
|
if (Ty.isVector())
|
|
|
|
return selectVectorICmp(I, MRI);
|
|
|
|
|
2017-08-01 01:00:16 +08:00
|
|
|
if (Ty != LLT::scalar(32)) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
|
|
|
|
<< ", expected: " << LLT::scalar(32) << '\n');
|
2016-10-13 06:49:04 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2019-06-18 02:40:06 +08:00
|
|
|
MachineIRBuilder MIRBuilder(I);
|
2020-10-21 04:17:39 +08:00
|
|
|
auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
|
|
|
|
emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
|
|
|
|
MIRBuilder);
|
2020-04-23 07:43:31 +08:00
|
|
|
emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIRBuilder);
|
2016-10-13 06:49:04 +08:00
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2016-10-13 06:49:07 +08:00
|
|
|
case TargetOpcode::G_FCMP: {
|
2020-09-30 09:23:02 +08:00
|
|
|
MachineIRBuilder MIRBuilder(I);
|
|
|
|
CmpInst::Predicate Pred =
|
|
|
|
static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
|
|
|
|
if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(),
|
|
|
|
MIRBuilder) ||
|
|
|
|
!emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIRBuilder))
|
2016-10-13 06:49:07 +08:00
|
|
|
return false;
|
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
2017-02-09 01:57:27 +08:00
|
|
|
case TargetOpcode::G_VASTART:
|
|
|
|
return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
|
|
|
|
: selectVaStartAAPCS(I, MF, MRI);
|
2019-04-30 04:58:17 +08:00
|
|
|
case TargetOpcode::G_INTRINSIC:
|
|
|
|
return selectIntrinsic(I, MRI);
|
2018-04-25 22:43:59 +08:00
|
|
|
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
|
2019-04-03 03:57:26 +08:00
|
|
|
return selectIntrinsicWithSideEffects(I, MRI);
|
2018-07-31 08:09:02 +08:00
|
|
|
case TargetOpcode::G_IMPLICIT_DEF: {
|
2017-07-13 01:32:32 +08:00
|
|
|
I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
|
2018-02-02 09:44:43 +08:00
|
|
|
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
|
2019-06-25 00:16:12 +08:00
|
|
|
const Register DstReg = I.getOperand(0).getReg();
|
2018-02-02 09:44:43 +08:00
|
|
|
const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
|
|
|
|
const TargetRegisterClass *DstRC =
|
|
|
|
getRegClassForTypeOnBank(DstTy, DstRB, RBI);
|
|
|
|
RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
|
2017-07-13 01:32:32 +08:00
|
|
|
return true;
|
2016-07-27 22:31:55 +08:00
|
|
|
}
|
2018-07-31 08:09:02 +08:00
|
|
|
case TargetOpcode::G_BLOCK_ADDR: {
|
|
|
|
if (TM.getCodeModel() == CodeModel::Large) {
|
|
|
|
materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
|
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
} else {
|
|
|
|
I.setDesc(TII.get(AArch64::MOVaddrBA));
|
|
|
|
auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
|
|
|
|
I.getOperand(0).getReg())
|
|
|
|
.addBlockAddress(I.getOperand(1).getBlockAddress(),
|
|
|
|
/* Offset */ 0, AArch64II::MO_PAGE)
|
|
|
|
.addBlockAddress(
|
|
|
|
I.getOperand(1).getBlockAddress(), /* Offset */ 0,
|
|
|
|
AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
|
|
|
|
I.eraseFromParent();
|
|
|
|
return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
|
|
|
|
}
|
|
|
|
}
|
2020-09-25 16:28:50 +08:00
|
|
|
case AArch64::G_DUP: {
|
|
|
|
// When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
|
|
|
|
// imported patterns. Do it manually here. Avoiding generating s16 gpr is
|
|
|
|
// difficult because at RBS we may end up pessimizing the fpr case if we
|
|
|
|
// decided to add an anyextend to fix this. Manual selection is the most
|
|
|
|
// robust solution for now.
|
|
|
|
Register SrcReg = I.getOperand(1).getReg();
|
|
|
|
if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::GPRRegBankID)
|
|
|
|
return false; // We expect the fpr regbank case to be imported.
|
|
|
|
LLT SrcTy = MRI.getType(SrcReg);
|
|
|
|
if (SrcTy.getSizeInBits() == 16)
|
|
|
|
I.setDesc(TII.get(AArch64::DUPv8i16gpr));
|
|
|
|
else if (SrcTy.getSizeInBits() == 8)
|
|
|
|
I.setDesc(TII.get(AArch64::DUPv16i8gpr));
|
|
|
|
else
|
|
|
|
return false;
|
|
|
|
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
|
|
|
}
|
2019-04-24 04:46:19 +08:00
|
|
|
case TargetOpcode::G_INTRINSIC_TRUNC:
|
|
|
|
return selectIntrinsicTrunc(I, MRI);
|
2019-04-24 07:03:03 +08:00
|
|
|
case TargetOpcode::G_INTRINSIC_ROUND:
|
|
|
|
return selectIntrinsicRound(I, MRI);
|
2018-12-11 02:44:58 +08:00
|
|
|
case TargetOpcode::G_BUILD_VECTOR:
|
|
|
|
return selectBuildVector(I, MRI);
|
2018-12-20 09:11:04 +08:00
|
|
|
case TargetOpcode::G_MERGE_VALUES:
|
|
|
|
return selectMergeValues(I, MRI);
|
2019-01-25 06:00:41 +08:00
|
|
|
case TargetOpcode::G_UNMERGE_VALUES:
|
|
|
|
return selectUnmergeValues(I, MRI);
|
2019-02-22 04:20:16 +08:00
|
|
|
case TargetOpcode::G_SHUFFLE_VECTOR:
|
|
|
|
return selectShuffleVector(I, MRI);
|
2019-03-12 06:18:01 +08:00
|
|
|
case TargetOpcode::G_EXTRACT_VECTOR_ELT:
|
|
|
|
return selectExtractElt(I, MRI);
|
2019-03-15 02:01:30 +08:00
|
|
|
case TargetOpcode::G_INSERT_VECTOR_ELT:
|
|
|
|
return selectInsertElt(I, MRI);
|
2019-03-15 06:48:15 +08:00
|
|
|
case TargetOpcode::G_CONCAT_VECTORS:
|
|
|
|
return selectConcatVectors(I, MRI);
|
2019-06-22 02:10:41 +08:00
|
|
|
case TargetOpcode::G_JUMP_TABLE:
|
|
|
|
return selectJumpTable(I, MRI);
|
2020-10-10 03:38:39 +08:00
|
|
|
case TargetOpcode::G_VECREDUCE_FADD:
|
|
|
|
case TargetOpcode::G_VECREDUCE_ADD:
|
|
|
|
return selectReduction(I, MRI);
|
2018-07-31 08:09:02 +08:00
|
|
|
}
|
2016-07-27 22:31:55 +08:00
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
2017-03-15 05:32:08 +08:00
|
|
|
|
2020-10-10 03:38:39 +08:00
|
|
|
bool AArch64InstructionSelector::selectReduction(
|
|
|
|
MachineInstr &I, MachineRegisterInfo &MRI) const {
|
|
|
|
Register VecReg = I.getOperand(1).getReg();
|
|
|
|
LLT VecTy = MRI.getType(VecReg);
|
|
|
|
if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
|
|
|
|
unsigned Opc = 0;
|
|
|
|
if (VecTy == LLT::vector(16, 8))
|
|
|
|
Opc = AArch64::ADDVv16i8v;
|
|
|
|
else if (VecTy == LLT::vector(8, 16))
|
|
|
|
Opc = AArch64::ADDVv8i16v;
|
|
|
|
else if (VecTy == LLT::vector(4, 32))
|
|
|
|
Opc = AArch64::ADDVv4i32v;
|
|
|
|
else if (VecTy == LLT::vector(2, 64))
|
|
|
|
Opc = AArch64::ADDPv2i64p;
|
|
|
|
else {
|
|
|
|
LLVM_DEBUG(dbgs() << "Unhandled type for add reduction");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
I.setDesc(TII.get(Opc));
|
|
|
|
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
|
|
|
|
unsigned Opc = 0;
|
|
|
|
if (VecTy == LLT::vector(2, 32))
|
|
|
|
Opc = AArch64::FADDPv2i32p;
|
|
|
|
else if (VecTy == LLT::vector(2, 64))
|
|
|
|
Opc = AArch64::FADDPv2i64p;
|
|
|
|
else {
|
|
|
|
LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
I.setDesc(TII.get(Opc));
|
|
|
|
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2019-06-22 02:10:41 +08:00
|
|
|
bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
|
|
|
|
MachineRegisterInfo &MRI) const {
|
|
|
|
assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
|
2019-06-25 00:16:12 +08:00
|
|
|
Register JTAddr = I.getOperand(0).getReg();
|
2019-06-22 02:10:41 +08:00
|
|
|
unsigned JTI = I.getOperand(1).getIndex();
|
2019-06-25 00:16:12 +08:00
|
|
|
Register Index = I.getOperand(2).getReg();
|
2019-06-22 02:10:41 +08:00
|
|
|
MachineIRBuilder MIB(I);
|
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
|
|
|
|
Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
|
2020-09-08 18:08:25 +08:00
|
|
|
|
|
|
|
MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
|
2020-04-03 07:33:35 +08:00
|
|
|
auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
|
|
|
|
{TargetReg, ScratchReg}, {JTAddr, Index})
|
|
|
|
.addJumpTableIndex(JTI);
|
2019-06-22 02:10:41 +08:00
|
|
|
// Build the indirect branch.
|
|
|
|
MIB.buildInstr(AArch64::BR, {}, {TargetReg});
|
|
|
|
I.eraseFromParent();
|
2020-04-03 07:33:35 +08:00
|
|
|
return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
|
2019-06-22 02:10:41 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
bool AArch64InstructionSelector::selectJumpTable(
|
|
|
|
MachineInstr &I, MachineRegisterInfo &MRI) const {
|
|
|
|
assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
|
|
|
|
assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
|
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
Register DstReg = I.getOperand(0).getReg();
|
2019-06-22 02:10:41 +08:00
|
|
|
unsigned JTI = I.getOperand(1).getIndex();
|
|
|
|
// We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
|
|
|
|
MachineIRBuilder MIB(I);
|
|
|
|
auto MovMI =
|
2020-10-02 05:15:57 +08:00
|
|
|
MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
|
2019-06-22 02:10:41 +08:00
|
|
|
.addJumpTableIndex(JTI, AArch64II::MO_PAGE)
|
|
|
|
.addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
|
|
|
|
I.eraseFromParent();
|
|
|
|
return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
|
|
|
|
}
|
|
|
|
|
2019-08-09 17:32:38 +08:00
|
|
|
bool AArch64InstructionSelector::selectTLSGlobalValue(
|
|
|
|
MachineInstr &I, MachineRegisterInfo &MRI) const {
|
|
|
|
if (!STI.isTargetMachO())
|
|
|
|
return false;
|
|
|
|
MachineFunction &MF = *I.getParent()->getParent();
|
|
|
|
MF.getFrameInfo().setAdjustsStack(true);
|
|
|
|
|
|
|
|
const GlobalValue &GV = *I.getOperand(1).getGlobal();
|
|
|
|
MachineIRBuilder MIB(I);
|
|
|
|
|
2020-07-15 08:15:27 +08:00
|
|
|
auto LoadGOT =
|
|
|
|
MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
|
|
|
|
.addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
|
2019-08-09 17:32:38 +08:00
|
|
|
|
2019-08-13 14:55:32 +08:00
|
|
|
auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
|
2020-07-15 08:15:27 +08:00
|
|
|
{LoadGOT.getReg(0)})
|
2019-08-13 14:55:32 +08:00
|
|
|
.addImm(0);
|
2019-08-09 17:32:38 +08:00
|
|
|
|
2020-07-15 08:15:27 +08:00
|
|
|
MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
|
2019-08-09 17:32:38 +08:00
|
|
|
// TLS calls preserve all registers except those that absolutely must be
|
|
|
|
// trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
|
|
|
|
// silly).
|
2020-06-11 16:23:15 +08:00
|
|
|
MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
|
2020-07-15 08:15:27 +08:00
|
|
|
.addUse(AArch64::X0, RegState::Implicit)
|
2019-08-09 17:32:38 +08:00
|
|
|
.addDef(AArch64::X0, RegState::Implicit)
|
|
|
|
.addRegMask(TRI.getTLSCallPreservedMask());
|
|
|
|
|
|
|
|
MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
|
|
|
|
RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
|
|
|
|
MRI);
|
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-04-24 04:46:19 +08:00
|
|
|
bool AArch64InstructionSelector::selectIntrinsicTrunc(
|
|
|
|
MachineInstr &I, MachineRegisterInfo &MRI) const {
|
|
|
|
const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
|
|
|
|
|
|
|
|
// Select the correct opcode.
|
|
|
|
unsigned Opc = 0;
|
|
|
|
if (!SrcTy.isVector()) {
|
|
|
|
switch (SrcTy.getSizeInBits()) {
|
|
|
|
default:
|
|
|
|
case 16:
|
|
|
|
Opc = AArch64::FRINTZHr;
|
|
|
|
break;
|
|
|
|
case 32:
|
|
|
|
Opc = AArch64::FRINTZSr;
|
|
|
|
break;
|
|
|
|
case 64:
|
|
|
|
Opc = AArch64::FRINTZDr;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
unsigned NumElts = SrcTy.getNumElements();
|
|
|
|
switch (SrcTy.getElementType().getSizeInBits()) {
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
case 16:
|
|
|
|
if (NumElts == 4)
|
|
|
|
Opc = AArch64::FRINTZv4f16;
|
|
|
|
else if (NumElts == 8)
|
|
|
|
Opc = AArch64::FRINTZv8f16;
|
|
|
|
break;
|
|
|
|
case 32:
|
|
|
|
if (NumElts == 2)
|
|
|
|
Opc = AArch64::FRINTZv2f32;
|
|
|
|
else if (NumElts == 4)
|
|
|
|
Opc = AArch64::FRINTZv4f32;
|
|
|
|
break;
|
|
|
|
case 64:
|
|
|
|
if (NumElts == 2)
|
|
|
|
Opc = AArch64::FRINTZv2f64;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!Opc) {
|
|
|
|
// Didn't get an opcode above, bail.
|
|
|
|
LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Legalization would have set us up perfectly for this; we just need to
|
|
|
|
// set the opcode and move on.
|
|
|
|
I.setDesc(TII.get(Opc));
|
|
|
|
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
|
|
|
}
|
|
|
|
|
2019-04-24 07:03:03 +08:00
|
|
|
bool AArch64InstructionSelector::selectIntrinsicRound(
|
|
|
|
MachineInstr &I, MachineRegisterInfo &MRI) const {
|
|
|
|
const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
|
|
|
|
|
|
|
|
// Select the correct opcode.
|
|
|
|
unsigned Opc = 0;
|
|
|
|
if (!SrcTy.isVector()) {
|
|
|
|
switch (SrcTy.getSizeInBits()) {
|
|
|
|
default:
|
|
|
|
case 16:
|
|
|
|
Opc = AArch64::FRINTAHr;
|
|
|
|
break;
|
|
|
|
case 32:
|
|
|
|
Opc = AArch64::FRINTASr;
|
|
|
|
break;
|
|
|
|
case 64:
|
|
|
|
Opc = AArch64::FRINTADr;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
unsigned NumElts = SrcTy.getNumElements();
|
|
|
|
switch (SrcTy.getElementType().getSizeInBits()) {
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
case 16:
|
|
|
|
if (NumElts == 4)
|
|
|
|
Opc = AArch64::FRINTAv4f16;
|
|
|
|
else if (NumElts == 8)
|
|
|
|
Opc = AArch64::FRINTAv8f16;
|
|
|
|
break;
|
|
|
|
case 32:
|
|
|
|
if (NumElts == 2)
|
|
|
|
Opc = AArch64::FRINTAv2f32;
|
|
|
|
else if (NumElts == 4)
|
|
|
|
Opc = AArch64::FRINTAv4f32;
|
|
|
|
break;
|
|
|
|
case 64:
|
|
|
|
if (NumElts == 2)
|
|
|
|
Opc = AArch64::FRINTAv2f64;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!Opc) {
|
|
|
|
// Didn't get an opcode above, bail.
|
|
|
|
LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Legalization would have set us up perfectly for this; we just need to
|
|
|
|
// set the opcode and move on.
|
|
|
|
I.setDesc(TII.get(Opc));
|
|
|
|
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
|
|
|
}
|
|
|
|
|
2019-04-10 05:22:43 +08:00
|
|
|
bool AArch64InstructionSelector::selectVectorICmp(
|
|
|
|
MachineInstr &I, MachineRegisterInfo &MRI) const {
|
2019-06-25 00:16:12 +08:00
|
|
|
Register DstReg = I.getOperand(0).getReg();
|
2019-04-10 05:22:43 +08:00
|
|
|
LLT DstTy = MRI.getType(DstReg);
|
2019-06-25 00:16:12 +08:00
|
|
|
Register SrcReg = I.getOperand(2).getReg();
|
|
|
|
Register Src2Reg = I.getOperand(3).getReg();
|
2019-04-10 05:22:43 +08:00
|
|
|
LLT SrcTy = MRI.getType(SrcReg);
|
|
|
|
|
|
|
|
unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
|
|
|
|
unsigned NumElts = DstTy.getNumElements();
|
|
|
|
|
|
|
|
// First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
|
|
|
|
// Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
|
|
|
|
// Third index is cc opcode:
|
|
|
|
// 0 == eq
|
|
|
|
// 1 == ugt
|
|
|
|
// 2 == uge
|
|
|
|
// 3 == ult
|
|
|
|
// 4 == ule
|
|
|
|
// 5 == sgt
|
|
|
|
// 6 == sge
|
|
|
|
// 7 == slt
|
|
|
|
// 8 == sle
|
|
|
|
// ne is done by negating 'eq' result.
|
|
|
|
|
|
|
|
// This table below assumes that for some comparisons the operands will be
|
|
|
|
// commuted.
|
|
|
|
// ult op == commute + ugt op
|
|
|
|
// ule op == commute + uge op
|
|
|
|
// slt op == commute + sgt op
|
|
|
|
// sle op == commute + sge op
|
|
|
|
unsigned PredIdx = 0;
|
|
|
|
bool SwapOperands = false;
|
|
|
|
CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
|
|
|
|
switch (Pred) {
|
|
|
|
case CmpInst::ICMP_NE:
|
|
|
|
case CmpInst::ICMP_EQ:
|
|
|
|
PredIdx = 0;
|
|
|
|
break;
|
|
|
|
case CmpInst::ICMP_UGT:
|
|
|
|
PredIdx = 1;
|
|
|
|
break;
|
|
|
|
case CmpInst::ICMP_UGE:
|
|
|
|
PredIdx = 2;
|
|
|
|
break;
|
|
|
|
case CmpInst::ICMP_ULT:
|
|
|
|
PredIdx = 3;
|
|
|
|
SwapOperands = true;
|
|
|
|
break;
|
|
|
|
case CmpInst::ICMP_ULE:
|
|
|
|
PredIdx = 4;
|
|
|
|
SwapOperands = true;
|
|
|
|
break;
|
|
|
|
case CmpInst::ICMP_SGT:
|
|
|
|
PredIdx = 5;
|
|
|
|
break;
|
|
|
|
case CmpInst::ICMP_SGE:
|
|
|
|
PredIdx = 6;
|
|
|
|
break;
|
|
|
|
case CmpInst::ICMP_SLT:
|
|
|
|
PredIdx = 7;
|
|
|
|
SwapOperands = true;
|
|
|
|
break;
|
|
|
|
case CmpInst::ICMP_SLE:
|
|
|
|
PredIdx = 8;
|
|
|
|
SwapOperands = true;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unhandled icmp predicate");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// This table obviously should be tablegen'd when we have our GISel native
|
|
|
|
// tablegen selector.
|
|
|
|
|
|
|
|
static const unsigned OpcTable[4][4][9] = {
|
2020-10-02 05:15:57 +08:00
|
|
|
{
|
|
|
|
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
|
|
|
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
|
|
|
0 /* invalid */},
|
|
|
|
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
|
|
|
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
|
|
|
0 /* invalid */},
|
|
|
|
{AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
|
|
|
|
AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
|
|
|
|
AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
|
|
|
|
{AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
|
|
|
|
AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
|
|
|
|
AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
|
|
|
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
|
|
|
0 /* invalid */},
|
|
|
|
{AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
|
|
|
|
AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
|
|
|
|
AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
|
|
|
|
{AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
|
|
|
|
AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
|
|
|
|
AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
|
|
|
|
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
|
|
|
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
|
|
|
0 /* invalid */}
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
|
|
|
|
AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
|
|
|
|
AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
|
|
|
|
{AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
|
|
|
|
AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
|
|
|
|
AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
|
|
|
|
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
|
|
|
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
|
|
|
0 /* invalid */},
|
|
|
|
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
|
|
|
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
|
|
|
0 /* invalid */}
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
|
|
|
|
AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
|
|
|
|
AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
|
|
|
|
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
|
|
|
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
|
|
|
0 /* invalid */},
|
|
|
|
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
|
|
|
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
|
|
|
0 /* invalid */},
|
|
|
|
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
|
|
|
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
|
|
|
0 /* invalid */}
|
|
|
|
},
|
2019-04-10 05:22:43 +08:00
|
|
|
};
|
|
|
|
unsigned EltIdx = Log2_32(SrcEltSize / 8);
|
|
|
|
unsigned NumEltsIdx = Log2_32(NumElts / 2);
|
|
|
|
unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
|
|
|
|
if (!Opc) {
|
|
|
|
LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
|
|
|
|
const TargetRegisterClass *SrcRC =
|
|
|
|
getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
|
|
|
|
if (!SrcRC) {
|
|
|
|
LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
|
|
|
|
if (SrcTy.getSizeInBits() == 128)
|
|
|
|
NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
|
|
|
|
|
|
|
|
if (SwapOperands)
|
|
|
|
std::swap(SrcReg, Src2Reg);
|
|
|
|
|
|
|
|
MachineIRBuilder MIB(I);
|
|
|
|
auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
|
|
|
|
constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
|
|
|
|
|
|
|
|
// Invert if we had a 'ne' cc.
|
|
|
|
if (NotOpc) {
|
|
|
|
Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
|
|
|
|
constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
|
|
|
|
} else {
|
|
|
|
MIB.buildCopy(DstReg, Cmp.getReg(0));
|
|
|
|
}
|
|
|
|
RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
|
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-02-26 02:52:54 +08:00
|
|
|
MachineInstr *AArch64InstructionSelector::emitScalarToVector(
|
2019-06-25 00:16:12 +08:00
|
|
|
unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
|
2019-02-26 02:52:54 +08:00
|
|
|
MachineIRBuilder &MIRBuilder) const {
|
|
|
|
auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
|
2018-12-11 02:44:58 +08:00
|
|
|
|
|
|
|
auto BuildFn = [&](unsigned SubregIndex) {
|
2019-02-26 02:52:54 +08:00
|
|
|
auto Ins =
|
|
|
|
MIRBuilder
|
|
|
|
.buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
|
|
|
|
.addImm(SubregIndex);
|
|
|
|
constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
|
|
|
|
constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
|
|
|
|
return &*Ins;
|
2018-12-11 02:44:58 +08:00
|
|
|
};
|
|
|
|
|
2019-03-05 03:16:00 +08:00
|
|
|
switch (EltSize) {
|
2019-01-25 06:00:41 +08:00
|
|
|
case 16:
|
|
|
|
return BuildFn(AArch64::hsub);
|
2018-12-11 02:44:58 +08:00
|
|
|
case 32:
|
|
|
|
return BuildFn(AArch64::ssub);
|
|
|
|
case 64:
|
|
|
|
return BuildFn(AArch64::dsub);
|
|
|
|
default:
|
2019-02-26 02:52:54 +08:00
|
|
|
return nullptr;
|
2018-12-11 02:44:58 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-12-20 09:11:04 +08:00
|
|
|
bool AArch64InstructionSelector::selectMergeValues(
|
|
|
|
MachineInstr &I, MachineRegisterInfo &MRI) const {
|
|
|
|
assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
|
|
|
|
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
|
|
|
|
const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
|
|
|
|
assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
|
2019-07-24 06:05:13 +08:00
|
|
|
const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
|
2018-12-20 09:11:04 +08:00
|
|
|
|
|
|
|
if (I.getNumOperands() != 3)
|
|
|
|
return false;
|
2019-07-24 06:05:13 +08:00
|
|
|
|
|
|
|
// Merging 2 s64s into an s128.
|
|
|
|
if (DstTy == LLT::scalar(128)) {
|
|
|
|
if (SrcTy.getSizeInBits() != 64)
|
|
|
|
return false;
|
|
|
|
MachineIRBuilder MIB(I);
|
|
|
|
Register DstReg = I.getOperand(0).getReg();
|
|
|
|
Register Src1Reg = I.getOperand(1).getReg();
|
|
|
|
Register Src2Reg = I.getOperand(2).getReg();
|
|
|
|
auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
|
|
|
|
MachineInstr *InsMI =
|
|
|
|
emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
|
|
|
|
if (!InsMI)
|
|
|
|
return false;
|
|
|
|
MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
|
|
|
|
Src2Reg, /* LaneIdx */ 1, RB, MIB);
|
|
|
|
if (!Ins2MI)
|
|
|
|
return false;
|
|
|
|
constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
|
|
|
|
constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
|
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-12-20 09:11:04 +08:00
|
|
|
if (RB.getID() != AArch64::GPRRegBankID)
|
|
|
|
return false;
|
|
|
|
|
2019-07-24 06:05:13 +08:00
|
|
|
if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
|
|
|
|
return false;
|
|
|
|
|
2018-12-20 09:11:04 +08:00
|
|
|
auto *DstRC = &AArch64::GPR64RegClass;
|
2019-06-25 00:16:12 +08:00
|
|
|
Register SubToRegDef = MRI.createVirtualRegister(DstRC);
|
2018-12-20 09:11:04 +08:00
|
|
|
MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
|
|
|
|
TII.get(TargetOpcode::SUBREG_TO_REG))
|
|
|
|
.addDef(SubToRegDef)
|
|
|
|
.addImm(0)
|
|
|
|
.addUse(I.getOperand(1).getReg())
|
|
|
|
.addImm(AArch64::sub_32);
|
2019-06-25 00:16:12 +08:00
|
|
|
Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
|
2018-12-20 09:11:04 +08:00
|
|
|
// Need to anyext the second scalar before we can use bfm
|
|
|
|
MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
|
2020-10-02 05:15:57 +08:00
|
|
|
TII.get(TargetOpcode::SUBREG_TO_REG))
|
|
|
|
.addDef(SubToRegDef2)
|
|
|
|
.addImm(0)
|
|
|
|
.addUse(I.getOperand(2).getReg())
|
|
|
|
.addImm(AArch64::sub_32);
|
2018-12-20 09:11:04 +08:00
|
|
|
MachineInstr &BFM =
|
|
|
|
*BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
|
2018-12-20 11:27:42 +08:00
|
|
|
.addDef(I.getOperand(0).getReg())
|
2018-12-20 09:11:04 +08:00
|
|
|
.addUse(SubToRegDef)
|
|
|
|
.addUse(SubToRegDef2)
|
|
|
|
.addImm(32)
|
|
|
|
.addImm(31);
|
|
|
|
constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
|
|
|
|
constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
|
|
|
|
constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
|
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-03-12 06:18:01 +08:00
|
|
|
static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
|
|
|
|
const unsigned EltSize) {
|
|
|
|
// Choose a lane copy opcode and subregister based off of the size of the
|
|
|
|
// vector's elements.
|
|
|
|
switch (EltSize) {
|
|
|
|
case 16:
|
|
|
|
CopyOpc = AArch64::CPYi16;
|
|
|
|
ExtractSubReg = AArch64::hsub;
|
|
|
|
break;
|
|
|
|
case 32:
|
|
|
|
CopyOpc = AArch64::CPYi32;
|
|
|
|
ExtractSubReg = AArch64::ssub;
|
|
|
|
break;
|
|
|
|
case 64:
|
|
|
|
CopyOpc = AArch64::CPYi64;
|
|
|
|
ExtractSubReg = AArch64::dsub;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
// Unknown size, bail out.
|
|
|
|
LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-03-15 06:48:18 +08:00
|
|
|
MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
|
2019-06-25 00:16:12 +08:00
|
|
|
Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
|
|
|
|
Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
|
2019-03-15 06:48:18 +08:00
|
|
|
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
|
|
|
|
unsigned CopyOpc = 0;
|
|
|
|
unsigned ExtractSubReg = 0;
|
|
|
|
if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
|
|
|
|
LLVM_DEBUG(
|
|
|
|
dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
const TargetRegisterClass *DstRC =
|
|
|
|
getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
|
|
|
|
if (!DstRC) {
|
|
|
|
LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
|
|
|
|
const LLT &VecTy = MRI.getType(VecReg);
|
|
|
|
const TargetRegisterClass *VecRC =
|
|
|
|
getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
|
|
|
|
if (!VecRC) {
|
|
|
|
LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
// The register that we're going to copy into.
|
2019-06-25 00:16:12 +08:00
|
|
|
Register InsertReg = VecReg;
|
2019-03-15 06:48:18 +08:00
|
|
|
if (!DstReg)
|
|
|
|
DstReg = MRI.createVirtualRegister(DstRC);
|
|
|
|
// If the lane index is 0, we just use a subregister COPY.
|
|
|
|
if (LaneIdx == 0) {
|
2019-03-19 03:20:10 +08:00
|
|
|
auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
|
|
|
|
.addReg(VecReg, 0, ExtractSubReg);
|
2019-03-15 06:48:18 +08:00
|
|
|
RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
|
2019-03-16 05:59:50 +08:00
|
|
|
return &*Copy;
|
2019-03-15 06:48:18 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Lane copies require 128-bit wide registers. If we're dealing with an
|
|
|
|
// unpacked vector, then we need to move up to that width. Insert an implicit
|
|
|
|
// def and a subregister insert to get us there.
|
|
|
|
if (VecTy.getSizeInBits() != 128) {
|
|
|
|
MachineInstr *ScalarToVector = emitScalarToVector(
|
|
|
|
VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
|
|
|
|
if (!ScalarToVector)
|
|
|
|
return nullptr;
|
|
|
|
InsertReg = ScalarToVector->getOperand(0).getReg();
|
|
|
|
}
|
|
|
|
|
|
|
|
MachineInstr *LaneCopyMI =
|
|
|
|
MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
|
|
|
|
constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
|
|
|
|
|
|
|
|
// Make sure that we actually constrain the initial copy.
|
|
|
|
RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
|
|
|
|
return LaneCopyMI;
|
|
|
|
}
|
|
|
|
|
2019-03-12 06:18:01 +08:00
|
|
|
bool AArch64InstructionSelector::selectExtractElt(
|
|
|
|
MachineInstr &I, MachineRegisterInfo &MRI) const {
|
|
|
|
assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
|
|
|
|
"unexpected opcode!");
|
2019-06-25 00:16:12 +08:00
|
|
|
Register DstReg = I.getOperand(0).getReg();
|
2019-03-12 06:18:01 +08:00
|
|
|
const LLT NarrowTy = MRI.getType(DstReg);
|
2019-06-25 00:16:12 +08:00
|
|
|
const Register SrcReg = I.getOperand(1).getReg();
|
2019-03-12 06:18:01 +08:00
|
|
|
const LLT WideTy = MRI.getType(SrcReg);
|
2019-03-15 06:48:18 +08:00
|
|
|
(void)WideTy;
|
2019-03-12 06:18:01 +08:00
|
|
|
assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
|
|
|
|
"source register size too small!");
|
2020-11-21 05:36:49 +08:00
|
|
|
assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
|
2019-03-12 06:18:01 +08:00
|
|
|
|
|
|
|
// Need the lane index to determine the correct copy opcode.
|
|
|
|
MachineOperand &LaneIdxOp = I.getOperand(2);
|
|
|
|
assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
|
|
|
|
|
|
|
|
if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
|
|
|
|
LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2019-03-14 05:19:29 +08:00
|
|
|
// Find the index to extract from.
|
2019-04-27 05:53:13 +08:00
|
|
|
auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
|
|
|
|
if (!VRegAndVal)
|
2019-03-12 06:18:01 +08:00
|
|
|
return false;
|
2020-11-03 22:50:17 +08:00
|
|
|
unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
|
2019-03-12 06:18:01 +08:00
|
|
|
|
2019-03-15 06:48:18 +08:00
|
|
|
MachineIRBuilder MIRBuilder(I);
|
2019-03-12 06:18:01 +08:00
|
|
|
|
|
|
|
const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
|
2019-03-15 06:48:18 +08:00
|
|
|
MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
|
|
|
|
LaneIdx, MIRBuilder);
|
|
|
|
if (!Extract)
|
2019-03-12 06:18:01 +08:00
|
|
|
return false;
|
|
|
|
|
2019-03-15 06:48:18 +08:00
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
2019-03-12 06:18:01 +08:00
|
|
|
|
2019-03-15 06:48:18 +08:00
|
|
|
bool AArch64InstructionSelector::selectSplitVectorUnmerge(
|
|
|
|
MachineInstr &I, MachineRegisterInfo &MRI) const {
|
|
|
|
unsigned NumElts = I.getNumOperands() - 1;
|
2019-06-25 00:16:12 +08:00
|
|
|
Register SrcReg = I.getOperand(NumElts).getReg();
|
2019-03-15 06:48:18 +08:00
|
|
|
const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
|
|
|
|
const LLT SrcTy = MRI.getType(SrcReg);
|
2019-03-12 06:18:01 +08:00
|
|
|
|
2019-03-15 06:48:18 +08:00
|
|
|
assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
|
|
|
|
if (SrcTy.getSizeInBits() > 128) {
|
|
|
|
LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
|
|
|
|
return false;
|
2019-03-12 06:18:01 +08:00
|
|
|
}
|
|
|
|
|
2019-03-15 06:48:18 +08:00
|
|
|
MachineIRBuilder MIB(I);
|
|
|
|
|
|
|
|
// We implement a split vector operation by treating the sub-vectors as
|
|
|
|
// scalars and extracting them.
|
|
|
|
const RegisterBank &DstRB =
|
|
|
|
*RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
|
|
|
|
for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
|
2019-06-25 00:16:12 +08:00
|
|
|
Register Dst = I.getOperand(OpIdx).getReg();
|
2019-03-15 06:48:18 +08:00
|
|
|
MachineInstr *Extract =
|
|
|
|
emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
|
|
|
|
if (!Extract)
|
2019-03-12 06:18:01 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-01-25 06:00:41 +08:00
|
|
|
bool AArch64InstructionSelector::selectUnmergeValues(
|
|
|
|
MachineInstr &I, MachineRegisterInfo &MRI) const {
|
|
|
|
assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
|
|
|
|
"unexpected opcode");
|
|
|
|
|
|
|
|
// TODO: Handle unmerging into GPRs and from scalars to scalars.
|
|
|
|
if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
|
|
|
|
AArch64::FPRRegBankID ||
|
|
|
|
RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
|
|
|
|
AArch64::FPRRegBankID) {
|
|
|
|
LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
|
|
|
|
"currently unsupported.\n");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// The last operand is the vector source register, and every other operand is
|
|
|
|
// a register to unpack into.
|
|
|
|
unsigned NumElts = I.getNumOperands() - 1;
|
2019-06-25 00:16:12 +08:00
|
|
|
Register SrcReg = I.getOperand(NumElts).getReg();
|
2019-01-25 06:00:41 +08:00
|
|
|
const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
|
|
|
|
const LLT WideTy = MRI.getType(SrcReg);
|
2019-01-25 07:45:07 +08:00
|
|
|
(void)WideTy;
|
2019-08-19 22:40:33 +08:00
|
|
|
assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
|
|
|
|
"can only unmerge from vector or s128 types!");
|
2019-01-25 06:00:41 +08:00
|
|
|
assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
|
|
|
|
"source register size too small!");
|
|
|
|
|
2019-03-15 06:48:18 +08:00
|
|
|
if (!NarrowTy.isScalar())
|
|
|
|
return selectSplitVectorUnmerge(I, MRI);
|
2019-01-25 06:00:41 +08:00
|
|
|
|
2019-03-16 05:59:50 +08:00
|
|
|
MachineIRBuilder MIB(I);
|
|
|
|
|
2019-01-25 06:00:41 +08:00
|
|
|
// Choose a lane copy opcode and subregister based off of the size of the
|
|
|
|
// vector's elements.
|
|
|
|
unsigned CopyOpc = 0;
|
|
|
|
unsigned ExtractSubReg = 0;
|
2019-03-12 06:18:01 +08:00
|
|
|
if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
|
2019-01-25 06:00:41 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
// Set up for the lane copies.
|
|
|
|
MachineBasicBlock &MBB = *I.getParent();
|
|
|
|
|
|
|
|
// Stores the registers we'll be copying from.
|
2019-06-25 00:16:12 +08:00
|
|
|
SmallVector<Register, 4> InsertRegs;
|
2019-01-25 06:00:41 +08:00
|
|
|
|
|
|
|
// We'll use the first register twice, so we only need NumElts-1 registers.
|
|
|
|
unsigned NumInsertRegs = NumElts - 1;
|
|
|
|
|
|
|
|
// If our elements fit into exactly 128 bits, then we can copy from the source
|
|
|
|
// directly. Otherwise, we need to do a bit of setup with some subregister
|
|
|
|
// inserts.
|
|
|
|
if (NarrowTy.getSizeInBits() * NumElts == 128) {
|
2019-06-25 00:16:12 +08:00
|
|
|
InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
|
2019-01-25 06:00:41 +08:00
|
|
|
} else {
|
|
|
|
// No. We have to perform subregister inserts. For each insert, create an
|
|
|
|
// implicit def and a subregister insert, and save the register we create.
|
|
|
|
for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
|
2019-06-25 00:16:12 +08:00
|
|
|
Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
|
2019-01-25 06:00:41 +08:00
|
|
|
MachineInstr &ImpDefMI =
|
|
|
|
*BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
|
|
|
|
ImpDefReg);
|
|
|
|
|
|
|
|
// Now, create the subregister insert from SrcReg.
|
2019-06-25 00:16:12 +08:00
|
|
|
Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
|
2019-01-25 06:00:41 +08:00
|
|
|
MachineInstr &InsMI =
|
|
|
|
*BuildMI(MBB, I, I.getDebugLoc(),
|
|
|
|
TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
|
|
|
|
.addUse(ImpDefReg)
|
|
|
|
.addUse(SrcReg)
|
|
|
|
.addImm(AArch64::dsub);
|
|
|
|
|
|
|
|
constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
|
|
|
|
constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
|
|
|
|
|
|
|
|
// Save the register so that we can copy from it after.
|
|
|
|
InsertRegs.push_back(InsertReg);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Now that we've created any necessary subregister inserts, we can
|
|
|
|
// create the copies.
|
|
|
|
//
|
|
|
|
// Perform the first copy separately as a subregister copy.
|
2019-06-25 00:16:12 +08:00
|
|
|
Register CopyTo = I.getOperand(0).getReg();
|
2019-03-19 03:20:10 +08:00
|
|
|
auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
|
|
|
|
.addReg(InsertRegs[0], 0, ExtractSubReg);
|
2019-03-16 05:59:50 +08:00
|
|
|
constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
|
2019-01-25 06:00:41 +08:00
|
|
|
|
|
|
|
// Now, perform the remaining copies as vector lane copies.
|
|
|
|
unsigned LaneIdx = 1;
|
2019-06-25 00:16:12 +08:00
|
|
|
for (Register InsReg : InsertRegs) {
|
|
|
|
Register CopyTo = I.getOperand(LaneIdx).getReg();
|
2019-01-25 06:00:41 +08:00
|
|
|
MachineInstr &CopyInst =
|
|
|
|
*BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
|
|
|
|
.addUse(InsReg)
|
|
|
|
.addImm(LaneIdx);
|
|
|
|
constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
|
|
|
|
++LaneIdx;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Separately constrain the first copy's destination. Because of the
|
|
|
|
// limitation in constrainOperandRegClass, we can't guarantee that this will
|
|
|
|
// actually be constrained. So, do it ourselves using the second operand.
|
|
|
|
const TargetRegisterClass *RC =
|
|
|
|
MRI.getRegClassOrNull(I.getOperand(1).getReg());
|
|
|
|
if (!RC) {
|
|
|
|
LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
RBI.constrainGenericRegister(CopyTo, *RC, MRI);
|
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-03-15 06:48:15 +08:00
|
|
|
bool AArch64InstructionSelector::selectConcatVectors(
|
|
|
|
MachineInstr &I, MachineRegisterInfo &MRI) const {
|
|
|
|
assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
|
|
|
|
"Unexpected opcode");
|
2019-06-25 00:16:12 +08:00
|
|
|
Register Dst = I.getOperand(0).getReg();
|
|
|
|
Register Op1 = I.getOperand(1).getReg();
|
|
|
|
Register Op2 = I.getOperand(2).getReg();
|
2019-03-15 06:48:15 +08:00
|
|
|
MachineIRBuilder MIRBuilder(I);
|
|
|
|
MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder);
|
|
|
|
if (!ConcatMI)
|
|
|
|
return false;
|
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-02-22 04:20:16 +08:00
|
|
|
unsigned
|
2020-06-16 07:14:47 +08:00
|
|
|
AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
|
2019-02-22 04:20:16 +08:00
|
|
|
MachineFunction &MF) const {
|
2019-04-26 16:31:00 +08:00
|
|
|
Type *CPTy = CPVal->getType();
|
2020-05-13 00:43:24 +08:00
|
|
|
Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
|
2019-02-22 04:20:16 +08:00
|
|
|
|
|
|
|
MachineConstantPool *MCP = MF.getConstantPool();
|
2020-05-13 00:43:24 +08:00
|
|
|
return MCP->getConstantPoolIndex(CPVal, Alignment);
|
2019-02-22 04:20:16 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
|
2020-06-16 07:14:47 +08:00
|
|
|
const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
|
2019-02-22 04:20:16 +08:00
|
|
|
unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF());
|
|
|
|
|
|
|
|
auto Adrp =
|
|
|
|
MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
|
|
|
|
.addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
|
2019-03-05 03:16:00 +08:00
|
|
|
|
|
|
|
MachineInstr *LoadMI = nullptr;
|
|
|
|
switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) {
|
|
|
|
case 16:
|
|
|
|
LoadMI =
|
|
|
|
&*MIRBuilder
|
|
|
|
.buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
|
|
|
|
.addConstantPoolIndex(CPIdx, 0,
|
|
|
|
AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
|
|
|
|
break;
|
|
|
|
case 8:
|
2020-10-02 05:15:57 +08:00
|
|
|
LoadMI = &*MIRBuilder
|
|
|
|
.buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
|
|
|
|
.addConstantPoolIndex(
|
|
|
|
CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
|
2019-03-05 03:16:00 +08:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
|
|
|
|
<< *CPVal->getType());
|
|
|
|
return nullptr;
|
|
|
|
}
|
2019-02-22 04:20:16 +08:00
|
|
|
constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
|
2019-03-05 03:16:00 +08:00
|
|
|
constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
|
|
|
|
return LoadMI;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
|
|
|
|
/// size and RB.
|
|
|
|
static std::pair<unsigned, unsigned>
|
|
|
|
getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
|
|
|
|
unsigned Opc, SubregIdx;
|
|
|
|
if (RB.getID() == AArch64::GPRRegBankID) {
|
2020-09-29 04:42:56 +08:00
|
|
|
if (EltSize == 16) {
|
|
|
|
Opc = AArch64::INSvi16gpr;
|
|
|
|
SubregIdx = AArch64::ssub;
|
|
|
|
} else if (EltSize == 32) {
|
2019-03-05 03:16:00 +08:00
|
|
|
Opc = AArch64::INSvi32gpr;
|
|
|
|
SubregIdx = AArch64::ssub;
|
|
|
|
} else if (EltSize == 64) {
|
|
|
|
Opc = AArch64::INSvi64gpr;
|
|
|
|
SubregIdx = AArch64::dsub;
|
|
|
|
} else {
|
|
|
|
llvm_unreachable("invalid elt size!");
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (EltSize == 8) {
|
|
|
|
Opc = AArch64::INSvi8lane;
|
|
|
|
SubregIdx = AArch64::bsub;
|
|
|
|
} else if (EltSize == 16) {
|
|
|
|
Opc = AArch64::INSvi16lane;
|
|
|
|
SubregIdx = AArch64::hsub;
|
|
|
|
} else if (EltSize == 32) {
|
|
|
|
Opc = AArch64::INSvi32lane;
|
|
|
|
SubregIdx = AArch64::ssub;
|
|
|
|
} else if (EltSize == 64) {
|
|
|
|
Opc = AArch64::INSvi64lane;
|
|
|
|
SubregIdx = AArch64::dsub;
|
|
|
|
} else {
|
|
|
|
llvm_unreachable("invalid elt size!");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return std::make_pair(Opc, SubregIdx);
|
|
|
|
}
|
|
|
|
|
2020-09-11 04:34:15 +08:00
|
|
|
MachineInstr *AArch64InstructionSelector::emitInstr(
|
|
|
|
unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
|
|
|
|
std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
|
|
|
|
const ComplexRendererFns &RenderFns) const {
|
|
|
|
assert(Opcode && "Expected an opcode?");
|
|
|
|
assert(!isPreISelGenericOpcode(Opcode) &&
|
|
|
|
"Function should only be used to produce selected instructions!");
|
|
|
|
auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
|
|
|
|
if (RenderFns)
|
|
|
|
for (auto &Fn : *RenderFns)
|
|
|
|
Fn(MI);
|
|
|
|
constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
|
|
|
|
return &*MI;
|
|
|
|
}
|
|
|
|
|
[AArch64][GlobalISel] Select negative arithmetic immediates in manual selector
Previously, we only handled negative arithmetic immediates in the imported
selector code.
Since we don't import code for, say, compares, we were missing opportunities
for things like
```
%cst:gpr(s64) = G_CONSTANT i64 -10
%cmp:gpr(s32) = G_ICMP intpred(eq), %reg0(s64), %cst
->
%adds = ADDSXri %reg0, 10, 0, implicit-def $nzcv
%cmp = CSINCWr $wzr, $wzr, 1, implicit $nzcv
```
Instead, we would have to materialize the constant and emit a SUBS.
This adds support for selection like above for SUB, SUBS, ADD, and ADDS.
This is a 0.1% geomean code size improvement on SPECINT2000 at -Os.
Differential Revision: https://reviews.llvm.org/D91108
2020-11-10 08:04:16 +08:00
|
|
|
MachineInstr *AArch64InstructionSelector::emitAddSub(
|
2020-11-11 06:12:19 +08:00
|
|
|
const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
|
2020-09-11 04:34:15 +08:00
|
|
|
Register Dst, MachineOperand &LHS, MachineOperand &RHS,
|
|
|
|
MachineIRBuilder &MIRBuilder) const {
|
|
|
|
MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
|
|
|
|
assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
|
|
|
|
auto Ty = MRI.getType(LHS.getReg());
|
2020-10-21 04:17:39 +08:00
|
|
|
assert(!Ty.isVector() && "Expected a scalar or pointer?");
|
2020-09-11 04:34:15 +08:00
|
|
|
unsigned Size = Ty.getSizeInBits();
|
|
|
|
assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
|
|
|
|
bool Is32Bit = Size == 32;
|
[AArch64][GlobalISel] Select negative arithmetic immediates in manual selector
Previously, we only handled negative arithmetic immediates in the imported
selector code.
Since we don't import code for, say, compares, we were missing opportunities
for things like
```
%cst:gpr(s64) = G_CONSTANT i64 -10
%cmp:gpr(s32) = G_ICMP intpred(eq), %reg0(s64), %cst
->
%adds = ADDSXri %reg0, 10, 0, implicit-def $nzcv
%cmp = CSINCWr $wzr, $wzr, 1, implicit $nzcv
```
Instead, we would have to materialize the constant and emit a SUBS.
This adds support for selection like above for SUB, SUBS, ADD, and ADDS.
This is a 0.1% geomean code size improvement on SPECINT2000 at -Os.
Differential Revision: https://reviews.llvm.org/D91108
2020-11-10 08:04:16 +08:00
|
|
|
|
|
|
|
// INSTRri form with positive arithmetic immediate.
|
2020-09-11 04:34:15 +08:00
|
|
|
if (auto Fns = selectArithImmed(RHS))
|
|
|
|
return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
|
|
|
|
MIRBuilder, Fns);
|
[AArch64][GlobalISel] Select negative arithmetic immediates in manual selector
Previously, we only handled negative arithmetic immediates in the imported
selector code.
Since we don't import code for, say, compares, we were missing opportunities
for things like
```
%cst:gpr(s64) = G_CONSTANT i64 -10
%cmp:gpr(s32) = G_ICMP intpred(eq), %reg0(s64), %cst
->
%adds = ADDSXri %reg0, 10, 0, implicit-def $nzcv
%cmp = CSINCWr $wzr, $wzr, 1, implicit $nzcv
```
Instead, we would have to materialize the constant and emit a SUBS.
This adds support for selection like above for SUB, SUBS, ADD, and ADDS.
This is a 0.1% geomean code size improvement on SPECINT2000 at -Os.
Differential Revision: https://reviews.llvm.org/D91108
2020-11-10 08:04:16 +08:00
|
|
|
|
|
|
|
// INSTRri form with negative arithmetic immediate.
|
|
|
|
if (auto Fns = selectNegArithImmed(RHS))
|
|
|
|
return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
|
|
|
|
MIRBuilder, Fns);
|
|
|
|
|
2020-11-11 06:12:19 +08:00
|
|
|
// INSTRrx form.
|
|
|
|
if (auto Fns = selectArithExtendedRegister(RHS))
|
|
|
|
return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
|
|
|
|
MIRBuilder, Fns);
|
|
|
|
|
[AArch64][GlobalISel] Select negative arithmetic immediates in manual selector
Previously, we only handled negative arithmetic immediates in the imported
selector code.
Since we don't import code for, say, compares, we were missing opportunities
for things like
```
%cst:gpr(s64) = G_CONSTANT i64 -10
%cmp:gpr(s32) = G_ICMP intpred(eq), %reg0(s64), %cst
->
%adds = ADDSXri %reg0, 10, 0, implicit-def $nzcv
%cmp = CSINCWr $wzr, $wzr, 1, implicit $nzcv
```
Instead, we would have to materialize the constant and emit a SUBS.
This adds support for selection like above for SUB, SUBS, ADD, and ADDS.
This is a 0.1% geomean code size improvement on SPECINT2000 at -Os.
Differential Revision: https://reviews.llvm.org/D91108
2020-11-10 08:04:16 +08:00
|
|
|
// INSTRrs form.
|
2020-09-11 04:34:15 +08:00
|
|
|
if (auto Fns = selectShiftedRegister(RHS))
|
|
|
|
return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
|
|
|
|
MIRBuilder, Fns);
|
|
|
|
return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
|
|
|
|
MIRBuilder);
|
|
|
|
}
|
|
|
|
|
2019-07-25 07:11:01 +08:00
|
|
|
MachineInstr *
|
|
|
|
AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
|
|
|
|
MachineOperand &RHS,
|
|
|
|
MachineIRBuilder &MIRBuilder) const {
|
2020-11-11 06:12:19 +08:00
|
|
|
const std::array<std::array<unsigned, 2>, 5> OpcTable{
|
2020-09-11 04:34:15 +08:00
|
|
|
{{AArch64::ADDXri, AArch64::ADDWri},
|
|
|
|
{AArch64::ADDXrs, AArch64::ADDWrs},
|
[AArch64][GlobalISel] Select negative arithmetic immediates in manual selector
Previously, we only handled negative arithmetic immediates in the imported
selector code.
Since we don't import code for, say, compares, we were missing opportunities
for things like
```
%cst:gpr(s64) = G_CONSTANT i64 -10
%cmp:gpr(s32) = G_ICMP intpred(eq), %reg0(s64), %cst
->
%adds = ADDSXri %reg0, 10, 0, implicit-def $nzcv
%cmp = CSINCWr $wzr, $wzr, 1, implicit $nzcv
```
Instead, we would have to materialize the constant and emit a SUBS.
This adds support for selection like above for SUB, SUBS, ADD, and ADDS.
This is a 0.1% geomean code size improvement on SPECINT2000 at -Os.
Differential Revision: https://reviews.llvm.org/D91108
2020-11-10 08:04:16 +08:00
|
|
|
{AArch64::ADDXrr, AArch64::ADDWrr},
|
2020-11-11 06:12:19 +08:00
|
|
|
{AArch64::SUBXri, AArch64::SUBWri},
|
|
|
|
{AArch64::ADDXrx, AArch64::ADDWrx}}};
|
[AArch64][GlobalISel] Select negative arithmetic immediates in manual selector
Previously, we only handled negative arithmetic immediates in the imported
selector code.
Since we don't import code for, say, compares, we were missing opportunities
for things like
```
%cst:gpr(s64) = G_CONSTANT i64 -10
%cmp:gpr(s32) = G_ICMP intpred(eq), %reg0(s64), %cst
->
%adds = ADDSXri %reg0, 10, 0, implicit-def $nzcv
%cmp = CSINCWr $wzr, $wzr, 1, implicit $nzcv
```
Instead, we would have to materialize the constant and emit a SUBS.
This adds support for selection like above for SUB, SUBS, ADD, and ADDS.
This is a 0.1% geomean code size improvement on SPECINT2000 at -Os.
Differential Revision: https://reviews.llvm.org/D91108
2020-11-10 08:04:16 +08:00
|
|
|
return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
|
2020-09-11 04:34:15 +08:00
|
|
|
}
|
2019-07-25 07:11:01 +08:00
|
|
|
|
2020-09-11 04:34:15 +08:00
|
|
|
MachineInstr *
|
|
|
|
AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
|
|
|
|
MachineOperand &RHS,
|
|
|
|
MachineIRBuilder &MIRBuilder) const {
|
2020-11-11 06:12:19 +08:00
|
|
|
const std::array<std::array<unsigned, 2>, 5> OpcTable{
|
2020-09-11 04:34:15 +08:00
|
|
|
{{AArch64::ADDSXri, AArch64::ADDSWri},
|
|
|
|
{AArch64::ADDSXrs, AArch64::ADDSWrs},
|
[AArch64][GlobalISel] Select negative arithmetic immediates in manual selector
Previously, we only handled negative arithmetic immediates in the imported
selector code.
Since we don't import code for, say, compares, we were missing opportunities
for things like
```
%cst:gpr(s64) = G_CONSTANT i64 -10
%cmp:gpr(s32) = G_ICMP intpred(eq), %reg0(s64), %cst
->
%adds = ADDSXri %reg0, 10, 0, implicit-def $nzcv
%cmp = CSINCWr $wzr, $wzr, 1, implicit $nzcv
```
Instead, we would have to materialize the constant and emit a SUBS.
This adds support for selection like above for SUB, SUBS, ADD, and ADDS.
This is a 0.1% geomean code size improvement on SPECINT2000 at -Os.
Differential Revision: https://reviews.llvm.org/D91108
2020-11-10 08:04:16 +08:00
|
|
|
{AArch64::ADDSXrr, AArch64::ADDSWrr},
|
2020-11-11 06:12:19 +08:00
|
|
|
{AArch64::SUBSXri, AArch64::SUBSWri},
|
|
|
|
{AArch64::ADDSXrx, AArch64::ADDSWrx}}};
|
[AArch64][GlobalISel] Select negative arithmetic immediates in manual selector
Previously, we only handled negative arithmetic immediates in the imported
selector code.
Since we don't import code for, say, compares, we were missing opportunities
for things like
```
%cst:gpr(s64) = G_CONSTANT i64 -10
%cmp:gpr(s32) = G_ICMP intpred(eq), %reg0(s64), %cst
->
%adds = ADDSXri %reg0, 10, 0, implicit-def $nzcv
%cmp = CSINCWr $wzr, $wzr, 1, implicit $nzcv
```
Instead, we would have to materialize the constant and emit a SUBS.
This adds support for selection like above for SUB, SUBS, ADD, and ADDS.
This is a 0.1% geomean code size improvement on SPECINT2000 at -Os.
Differential Revision: https://reviews.llvm.org/D91108
2020-11-10 08:04:16 +08:00
|
|
|
return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
|
2019-07-25 07:11:01 +08:00
|
|
|
}
|
|
|
|
|
2020-10-21 04:17:39 +08:00
|
|
|
MachineInstr *
|
|
|
|
AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
|
|
|
|
MachineOperand &RHS,
|
|
|
|
MachineIRBuilder &MIRBuilder) const {
|
2020-11-11 06:12:19 +08:00
|
|
|
const std::array<std::array<unsigned, 2>, 5> OpcTable{
|
2020-10-21 04:17:39 +08:00
|
|
|
{{AArch64::SUBSXri, AArch64::SUBSWri},
|
|
|
|
{AArch64::SUBSXrs, AArch64::SUBSWrs},
|
[AArch64][GlobalISel] Select negative arithmetic immediates in manual selector
Previously, we only handled negative arithmetic immediates in the imported
selector code.
Since we don't import code for, say, compares, we were missing opportunities
for things like
```
%cst:gpr(s64) = G_CONSTANT i64 -10
%cmp:gpr(s32) = G_ICMP intpred(eq), %reg0(s64), %cst
->
%adds = ADDSXri %reg0, 10, 0, implicit-def $nzcv
%cmp = CSINCWr $wzr, $wzr, 1, implicit $nzcv
```
Instead, we would have to materialize the constant and emit a SUBS.
This adds support for selection like above for SUB, SUBS, ADD, and ADDS.
This is a 0.1% geomean code size improvement on SPECINT2000 at -Os.
Differential Revision: https://reviews.llvm.org/D91108
2020-11-10 08:04:16 +08:00
|
|
|
{AArch64::SUBSXrr, AArch64::SUBSWrr},
|
2020-11-11 06:12:19 +08:00
|
|
|
{AArch64::ADDSXri, AArch64::ADDSWri},
|
|
|
|
{AArch64::SUBSXrx, AArch64::SUBSWrx}}};
|
[AArch64][GlobalISel] Select negative arithmetic immediates in manual selector
Previously, we only handled negative arithmetic immediates in the imported
selector code.
Since we don't import code for, say, compares, we were missing opportunities
for things like
```
%cst:gpr(s64) = G_CONSTANT i64 -10
%cmp:gpr(s32) = G_ICMP intpred(eq), %reg0(s64), %cst
->
%adds = ADDSXri %reg0, 10, 0, implicit-def $nzcv
%cmp = CSINCWr $wzr, $wzr, 1, implicit $nzcv
```
Instead, we would have to materialize the constant and emit a SUBS.
This adds support for selection like above for SUB, SUBS, ADD, and ADDS.
This is a 0.1% geomean code size improvement on SPECINT2000 at -Os.
Differential Revision: https://reviews.llvm.org/D91108
2020-11-10 08:04:16 +08:00
|
|
|
return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
|
2020-10-21 04:17:39 +08:00
|
|
|
}
|
|
|
|
|
2019-07-03 03:44:16 +08:00
|
|
|
MachineInstr *
|
|
|
|
AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
|
|
|
|
MachineIRBuilder &MIRBuilder) const {
|
|
|
|
MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
|
|
|
|
bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
|
2020-12-02 08:21:41 +08:00
|
|
|
auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
|
|
|
|
return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
|
2019-07-03 03:44:16 +08:00
|
|
|
}
|
|
|
|
|
2019-07-09 06:58:36 +08:00
|
|
|
MachineInstr *
|
2020-09-11 08:15:28 +08:00
|
|
|
AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
|
2019-07-09 06:58:36 +08:00
|
|
|
MachineIRBuilder &MIRBuilder) const {
|
2020-09-11 08:15:28 +08:00
|
|
|
assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
|
2019-07-09 06:58:36 +08:00
|
|
|
MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
|
2020-10-15 03:32:33 +08:00
|
|
|
LLT Ty = MRI.getType(LHS.getReg());
|
|
|
|
unsigned RegSize = Ty.getSizeInBits();
|
2019-07-09 06:58:36 +08:00
|
|
|
bool Is32Bit = (RegSize == 32);
|
2020-09-11 08:15:28 +08:00
|
|
|
const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
|
|
|
|
{AArch64::ANDSXrs, AArch64::ANDSWrs},
|
|
|
|
{AArch64::ANDSXrr, AArch64::ANDSWrr}};
|
|
|
|
// ANDS needs a logical immediate for its immediate form. Check if we can
|
|
|
|
// fold one in.
|
|
|
|
if (auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
|
2020-11-03 22:50:17 +08:00
|
|
|
int64_t Imm = ValAndVReg->Value.getSExtValue();
|
|
|
|
|
|
|
|
if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) {
|
2020-10-15 03:32:33 +08:00
|
|
|
auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
|
2020-11-03 22:50:17 +08:00
|
|
|
TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
|
2020-09-11 08:15:28 +08:00
|
|
|
constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
|
|
|
|
return &*TstMI;
|
|
|
|
}
|
|
|
|
}
|
2019-07-09 06:58:36 +08:00
|
|
|
|
2020-09-11 08:15:28 +08:00
|
|
|
if (auto Fns = selectLogicalShiftedRegister(RHS))
|
2020-10-15 03:32:33 +08:00
|
|
|
return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
|
|
|
|
return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
|
2019-07-09 06:58:36 +08:00
|
|
|
}
|
|
|
|
|
2020-10-21 04:17:39 +08:00
|
|
|
MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
|
2019-07-03 03:44:16 +08:00
|
|
|
MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
|
|
|
|
MachineIRBuilder &MIRBuilder) const {
|
|
|
|
assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
|
2020-05-27 03:56:14 +08:00
|
|
|
assert(Predicate.isPredicate() && "Expected predicate?");
|
2019-07-03 03:44:16 +08:00
|
|
|
MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
|
2020-10-15 06:19:52 +08:00
|
|
|
LLT CmpTy = MRI.getType(LHS.getReg());
|
|
|
|
assert(!CmpTy.isVector() && "Expected scalar or pointer");
|
|
|
|
unsigned Size = CmpTy.getSizeInBits();
|
2020-10-25 03:46:47 +08:00
|
|
|
(void)Size;
|
2020-10-15 06:19:52 +08:00
|
|
|
assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");
|
|
|
|
// Fold the compare into a cmn or tst if possible.
|
|
|
|
if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
|
2020-10-21 04:17:39 +08:00
|
|
|
return FoldCmp;
|
|
|
|
auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
|
|
|
|
return emitSUBS(Dst, LHS, RHS, MIRBuilder);
|
2019-07-03 03:44:16 +08:00
|
|
|
}
|
|
|
|
|
2020-09-30 09:23:02 +08:00
|
|
|
MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
|
|
|
|
Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
|
|
|
|
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
|
|
|
|
#ifndef NDEBUG
|
|
|
|
LLT Ty = MRI.getType(Dst);
|
|
|
|
assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
|
|
|
|
"Expected a 32-bit scalar register?");
|
|
|
|
#endif
|
|
|
|
const Register ZeroReg = AArch64::WZR;
|
|
|
|
auto EmitCSet = [&](Register CsetDst, AArch64CC::CondCode CC) {
|
|
|
|
auto CSet =
|
|
|
|
MIRBuilder.buildInstr(AArch64::CSINCWr, {CsetDst}, {ZeroReg, ZeroReg})
|
|
|
|
.addImm(getInvertedCondCode(CC));
|
|
|
|
constrainSelectedInstRegOperands(*CSet, TII, TRI, RBI);
|
|
|
|
return &*CSet;
|
|
|
|
};
|
|
|
|
|
|
|
|
AArch64CC::CondCode CC1, CC2;
|
|
|
|
changeFCMPPredToAArch64CC(Pred, CC1, CC2);
|
|
|
|
if (CC2 == AArch64CC::AL)
|
|
|
|
return EmitCSet(Dst, CC1);
|
|
|
|
|
|
|
|
const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
|
|
|
|
Register Def1Reg = MRI.createVirtualRegister(RC);
|
|
|
|
Register Def2Reg = MRI.createVirtualRegister(RC);
|
|
|
|
EmitCSet(Def1Reg, CC1);
|
|
|
|
EmitCSet(Def2Reg, CC2);
|
|
|
|
auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
|
|
|
|
constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI);
|
|
|
|
return &*OrMI;
|
|
|
|
}
|
|
|
|
|
|
|
|
MachineInstr *
|
|
|
|
AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS,
|
|
|
|
MachineIRBuilder &MIRBuilder) const {
|
|
|
|
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
|
|
|
|
LLT Ty = MRI.getType(LHS);
|
|
|
|
if (Ty.isVector())
|
|
|
|
return nullptr;
|
|
|
|
unsigned OpSize = Ty.getSizeInBits();
|
|
|
|
if (OpSize != 32 && OpSize != 64)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// If this is a compare against +0.0, then we don't have
|
|
|
|
// to explicitly materialize a constant.
|
|
|
|
const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
|
|
|
|
bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
|
|
|
|
unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
|
|
|
|
{AArch64::FCMPSri, AArch64::FCMPDri}};
|
|
|
|
unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64];
|
|
|
|
|
|
|
|
// Partially build the compare. Decide if we need to add a use for the
|
|
|
|
// third operand based off whether or not we're comparing against 0.0.
|
|
|
|
auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
|
|
|
|
if (!ShouldUseImm)
|
|
|
|
CmpMI.addUse(RHS);
|
|
|
|
constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
|
|
|
|
return &*CmpMI;
|
|
|
|
}
|
|
|
|
|
2019-03-05 03:16:00 +08:00
|
|
|
MachineInstr *AArch64InstructionSelector::emitVectorConcat(
|
2019-06-25 00:16:12 +08:00
|
|
|
Optional<Register> Dst, Register Op1, Register Op2,
|
2019-03-15 06:48:15 +08:00
|
|
|
MachineIRBuilder &MIRBuilder) const {
|
2019-03-05 03:16:00 +08:00
|
|
|
// We implement a vector concat by:
|
|
|
|
// 1. Use scalar_to_vector to insert the lower vector into the larger dest
|
|
|
|
// 2. Insert the upper vector into the destination's upper element
|
|
|
|
// TODO: some of this code is common with G_BUILD_VECTOR handling.
|
|
|
|
MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
|
|
|
|
|
|
|
|
const LLT Op1Ty = MRI.getType(Op1);
|
|
|
|
const LLT Op2Ty = MRI.getType(Op2);
|
|
|
|
|
|
|
|
if (Op1Ty != Op2Ty) {
|
|
|
|
LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
assert(Op1Ty.isVector() && "Expected a vector for vector concat");
|
|
|
|
|
|
|
|
if (Op1Ty.getSizeInBits() >= 128) {
|
|
|
|
LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
// At the moment we just support 64 bit vector concats.
|
|
|
|
if (Op1Ty.getSizeInBits() != 64) {
|
|
|
|
LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
|
|
|
|
const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
|
|
|
|
const TargetRegisterClass *DstRC =
|
|
|
|
getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
|
|
|
|
|
|
|
|
MachineInstr *WidenedOp1 =
|
|
|
|
emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
|
|
|
|
MachineInstr *WidenedOp2 =
|
|
|
|
emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
|
|
|
|
if (!WidenedOp1 || !WidenedOp2) {
|
|
|
|
LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Now do the insert of the upper element.
|
|
|
|
unsigned InsertOpc, InsSubRegIdx;
|
|
|
|
std::tie(InsertOpc, InsSubRegIdx) =
|
|
|
|
getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
|
|
|
|
|
2019-03-15 06:48:15 +08:00
|
|
|
if (!Dst)
|
|
|
|
Dst = MRI.createVirtualRegister(DstRC);
|
2019-03-05 03:16:00 +08:00
|
|
|
auto InsElt =
|
|
|
|
MIRBuilder
|
2019-03-15 06:48:15 +08:00
|
|
|
.buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
|
2019-03-05 03:16:00 +08:00
|
|
|
.addImm(1) /* Lane index */
|
|
|
|
.addUse(WidenedOp2->getOperand(0).getReg())
|
|
|
|
.addImm(0);
|
|
|
|
constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
|
|
|
|
return &*InsElt;
|
2019-02-22 04:20:16 +08:00
|
|
|
}
|
|
|
|
|
2019-05-02 06:39:43 +08:00
|
|
|
MachineInstr *AArch64InstructionSelector::emitFMovForFConstant(
|
|
|
|
MachineInstr &I, MachineRegisterInfo &MRI) const {
|
|
|
|
assert(I.getOpcode() == TargetOpcode::G_FCONSTANT &&
|
|
|
|
"Expected a G_FCONSTANT!");
|
|
|
|
MachineOperand &ImmOp = I.getOperand(1);
|
|
|
|
unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
|
|
|
|
|
|
|
|
// Only handle 32 and 64 bit defs for now.
|
|
|
|
if (DefSize != 32 && DefSize != 64)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// Don't handle null values using FMOV.
|
|
|
|
if (ImmOp.getFPImm()->isNullValue())
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// Get the immediate representation for the FMOV.
|
|
|
|
const APFloat &ImmValAPF = ImmOp.getFPImm()->getValueAPF();
|
|
|
|
int Imm = DefSize == 32 ? AArch64_AM::getFP32Imm(ImmValAPF)
|
|
|
|
: AArch64_AM::getFP64Imm(ImmValAPF);
|
|
|
|
|
|
|
|
// If this is -1, it means the immediate can't be represented as the requested
|
|
|
|
// floating point value. Bail.
|
|
|
|
if (Imm == -1)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// Update MI to represent the new FMOV instruction, constrain it, and return.
|
|
|
|
ImmOp.ChangeToImmediate(Imm);
|
|
|
|
unsigned MovOpc = DefSize == 32 ? AArch64::FMOVSi : AArch64::FMOVDi;
|
|
|
|
I.setDesc(TII.get(MovOpc));
|
|
|
|
constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
|
|
|
return &I;
|
|
|
|
}
|
|
|
|
|
2020-10-02 05:15:57 +08:00
|
|
|
MachineInstr *
|
|
|
|
AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
|
|
|
|
MachineIRBuilder &MIRBuilder) const {
|
2019-06-18 02:40:06 +08:00
|
|
|
// CSINC increments the result when the predicate is false. Invert it.
|
|
|
|
const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(
|
|
|
|
CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
|
2020-10-02 05:15:57 +08:00
|
|
|
auto I =
|
|
|
|
MIRBuilder
|
|
|
|
.buildInstr(AArch64::CSINCWr, {DefReg}, {Register(AArch64::WZR), Register(AArch64::WZR)})
|
|
|
|
.addImm(InvCC);
|
2019-06-18 02:40:06 +08:00
|
|
|
constrainSelectedInstRegOperands(*I, TII, TRI, RBI);
|
|
|
|
return &*I;
|
|
|
|
}
|
|
|
|
|
2020-12-04 06:31:43 +08:00
|
|
|
std::pair<MachineInstr *, AArch64CC::CondCode>
|
|
|
|
AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
|
|
|
|
MachineOperand &LHS,
|
|
|
|
MachineOperand &RHS,
|
|
|
|
MachineIRBuilder &MIRBuilder) const {
|
|
|
|
switch (Opcode) {
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unexpected opcode!");
|
|
|
|
case TargetOpcode::G_SADDO:
|
|
|
|
return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
|
|
|
|
case TargetOpcode::G_UADDO:
|
|
|
|
return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
|
|
|
|
case TargetOpcode::G_SSUBO:
|
|
|
|
return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-06-06 07:46:16 +08:00
|
|
|
bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
|
|
|
|
MachineIRBuilder MIB(I);
|
|
|
|
MachineRegisterInfo &MRI = *MIB.getMRI();
|
|
|
|
// We want to recognize this pattern:
|
|
|
|
//
|
|
|
|
// $z = G_FCMP pred, $x, $y
|
|
|
|
// ...
|
|
|
|
// $w = G_SELECT $z, $a, $b
|
|
|
|
//
|
|
|
|
// Where the value of $z is *only* ever used by the G_SELECT (possibly with
|
|
|
|
// some copies/truncs in between.)
|
|
|
|
//
|
|
|
|
// If we see this, then we can emit something like this:
|
|
|
|
//
|
|
|
|
// fcmp $x, $y
|
|
|
|
// fcsel $w, $a, $b, pred
|
|
|
|
//
|
|
|
|
// Rather than emitting both of the rather long sequences in the standard
|
|
|
|
// G_FCMP/G_SELECT select methods.
|
|
|
|
|
|
|
|
// First, check if the condition is defined by a compare.
|
|
|
|
MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
|
|
|
|
while (CondDef) {
|
|
|
|
// We can only fold if all of the defs have one use.
|
2020-01-28 07:57:16 +08:00
|
|
|
Register CondDefReg = CondDef->getOperand(0).getReg();
|
2020-04-16 08:10:53 +08:00
|
|
|
if (!MRI.hasOneNonDBGUse(CondDefReg)) {
|
2020-01-28 07:57:16 +08:00
|
|
|
// Unless it's another select.
|
2020-04-16 08:10:53 +08:00
|
|
|
for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
|
|
|
|
if (CondDef == &UI)
|
2020-01-28 07:57:16 +08:00
|
|
|
continue;
|
2020-04-16 08:10:53 +08:00
|
|
|
if (UI.getOpcode() != TargetOpcode::G_SELECT)
|
2020-01-28 07:57:16 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
2019-06-06 07:46:16 +08:00
|
|
|
|
|
|
|
// We can skip over G_TRUNC since the condition is 1-bit.
|
|
|
|
// Truncating/extending can have no impact on the value.
|
|
|
|
unsigned Opc = CondDef->getOpcode();
|
|
|
|
if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
|
|
|
|
break;
|
|
|
|
|
2019-06-06 15:33:47 +08:00
|
|
|
// Can't see past copies from physregs.
|
|
|
|
if (Opc == TargetOpcode::COPY &&
|
2019-08-02 07:27:28 +08:00
|
|
|
Register::isPhysicalRegister(CondDef->getOperand(1).getReg()))
|
2019-06-06 15:33:47 +08:00
|
|
|
return false;
|
|
|
|
|
2019-06-06 07:46:16 +08:00
|
|
|
CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
|
|
|
|
}
|
|
|
|
|
|
|
|
// Is the condition defined by a compare?
|
2019-07-03 03:44:16 +08:00
|
|
|
if (!CondDef)
|
2019-06-06 07:46:16 +08:00
|
|
|
return false;
|
|
|
|
|
2019-07-03 03:44:16 +08:00
|
|
|
unsigned CondOpc = CondDef->getOpcode();
|
|
|
|
if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP)
|
2019-06-06 07:46:16 +08:00
|
|
|
return false;
|
|
|
|
|
2019-07-03 03:44:16 +08:00
|
|
|
AArch64CC::CondCode CondCode;
|
|
|
|
if (CondOpc == TargetOpcode::G_ICMP) {
|
2020-10-21 04:17:39 +08:00
|
|
|
auto Pred =
|
|
|
|
static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
|
2020-04-23 07:43:31 +08:00
|
|
|
CondCode = changeICMPPredToAArch64CC(Pred);
|
2020-10-21 04:17:39 +08:00
|
|
|
emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
|
|
|
|
CondDef->getOperand(1), MIB);
|
2019-07-03 03:44:16 +08:00
|
|
|
} else {
|
|
|
|
// Get the condition code for the select.
|
2020-10-21 04:17:39 +08:00
|
|
|
auto Pred =
|
2020-09-30 09:23:02 +08:00
|
|
|
static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
|
2019-07-03 03:44:16 +08:00
|
|
|
AArch64CC::CondCode CondCode2;
|
2020-09-30 09:23:02 +08:00
|
|
|
changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
|
2019-07-03 03:44:16 +08:00
|
|
|
|
|
|
|
// changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
|
|
|
|
// instructions to emit the comparison.
|
|
|
|
// TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
|
|
|
|
// unnecessary.
|
|
|
|
if (CondCode2 != AArch64CC::AL)
|
|
|
|
return false;
|
|
|
|
|
2020-09-30 09:23:02 +08:00
|
|
|
if (!emitFPCompare(CondDef->getOperand(2).getReg(),
|
|
|
|
CondDef->getOperand(3).getReg(), MIB)) {
|
|
|
|
LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
|
2019-07-03 03:44:16 +08:00
|
|
|
return false;
|
2020-09-30 09:23:02 +08:00
|
|
|
}
|
2019-07-03 03:44:16 +08:00
|
|
|
}
|
2019-06-06 07:46:16 +08:00
|
|
|
|
|
|
|
// Emit the select.
|
[AArch64][GlobalISel] Select CSINC and CSINV for G_SELECT with constants
Select the following:
- G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
- G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
- G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
- G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
- G_SELECT cc, t, 1 -> CSINC t, zreg, cc
- G_SELECT cc, t, -1 -> CSINC t, zreg, cc
(IR example: https://godbolt.org/z/YfPna9)
These correspond to a bunch of the AArch64csel patterns in AArch64InstrInfo.td.
Unfortunately, it doesn't seem like we can import patterns that use NZCV like
those ones do. E.g.
```
def : Pat<(AArch64csel GPR32:$tval, (i32 1), (i32 imm:$cc), NZCV),
(CSINCWr GPR32:$tval, WZR, (i32 imm:$cc))>;
```
So we have to manually select these for now.
This replaces `selectSelectOpc` with an `emitSelect` function, which performs
these optimizations.
Differential Revision: https://reviews.llvm.org/D90701
2020-11-04 03:08:08 +08:00
|
|
|
emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
|
|
|
|
I.getOperand(3).getReg(), CondCode, MIB);
|
2019-06-06 07:46:16 +08:00
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-07-09 06:58:36 +08:00
|
|
|
MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
|
|
|
|
MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
|
|
|
|
MachineIRBuilder &MIRBuilder) const {
|
2019-07-03 03:44:16 +08:00
|
|
|
assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
|
|
|
|
"Unexpected MachineOperand");
|
2019-06-18 02:40:06 +08:00
|
|
|
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
|
|
|
|
// We want to find this sort of thing:
|
|
|
|
// x = G_SUB 0, y
|
|
|
|
// G_ICMP z, x
|
|
|
|
//
|
|
|
|
// In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
|
|
|
|
// e.g:
|
|
|
|
//
|
|
|
|
// cmn z, y
|
|
|
|
|
|
|
|
// Helper lambda to detect the subtract followed by the compare.
|
|
|
|
// Takes in the def of the LHS or RHS, and checks if it's a subtract from 0.
|
|
|
|
auto IsCMN = [&](MachineInstr *DefMI, const AArch64CC::CondCode &CC) {
|
|
|
|
if (!DefMI || DefMI->getOpcode() != TargetOpcode::G_SUB)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Need to make sure NZCV is the same at the end of the transformation.
|
|
|
|
if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// We want to match against SUBs.
|
|
|
|
if (DefMI->getOpcode() != TargetOpcode::G_SUB)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Make sure that we're getting
|
|
|
|
// x = G_SUB 0, y
|
|
|
|
auto ValAndVReg =
|
|
|
|
getConstantVRegValWithLookThrough(DefMI->getOperand(1).getReg(), MRI);
|
|
|
|
if (!ValAndVReg || ValAndVReg->Value != 0)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// This can safely be represented as a CMN.
|
|
|
|
return true;
|
|
|
|
};
|
|
|
|
|
|
|
|
// Check if the RHS or LHS of the G_ICMP is defined by a SUB
|
2019-07-11 02:44:57 +08:00
|
|
|
MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
|
|
|
|
MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
|
2019-07-09 06:58:36 +08:00
|
|
|
CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate();
|
|
|
|
const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(P);
|
|
|
|
|
|
|
|
// Given this:
|
|
|
|
//
|
|
|
|
// x = G_SUB 0, y
|
|
|
|
// G_ICMP x, z
|
|
|
|
//
|
|
|
|
// Produce this:
|
|
|
|
//
|
|
|
|
// cmn y, z
|
|
|
|
if (IsCMN(LHSDef, CC))
|
|
|
|
return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
|
|
|
|
|
|
|
|
// Same idea here, but with the RHS of the compare instead:
|
|
|
|
//
|
|
|
|
// Given this:
|
|
|
|
//
|
|
|
|
// x = G_SUB 0, y
|
|
|
|
// G_ICMP z, x
|
|
|
|
//
|
|
|
|
// Produce this:
|
|
|
|
//
|
|
|
|
// cmn z, y
|
|
|
|
if (IsCMN(RHSDef, CC))
|
|
|
|
return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
|
|
|
|
|
|
|
|
// Given this:
|
|
|
|
//
|
|
|
|
// z = G_AND x, y
|
|
|
|
// G_ICMP z, 0
|
|
|
|
//
|
|
|
|
// Produce this if the compare is signed:
|
|
|
|
//
|
|
|
|
// tst x, y
|
2020-11-14 01:28:37 +08:00
|
|
|
if (!CmpInst::isUnsigned(P) && LHSDef &&
|
2019-07-09 06:58:36 +08:00
|
|
|
LHSDef->getOpcode() == TargetOpcode::G_AND) {
|
|
|
|
// Make sure that the RHS is 0.
|
|
|
|
auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI);
|
|
|
|
if (!ValAndVReg || ValAndVReg->Value != 0)
|
|
|
|
return nullptr;
|
|
|
|
|
2020-10-02 05:15:57 +08:00
|
|
|
return emitTST(LHSDef->getOperand(1),
|
|
|
|
LHSDef->getOperand(2), MIRBuilder);
|
2019-06-18 02:40:06 +08:00
|
|
|
}
|
|
|
|
|
2019-07-03 03:44:16 +08:00
|
|
|
return nullptr;
|
2019-06-18 02:40:06 +08:00
|
|
|
}
|
|
|
|
|
2019-02-22 04:20:16 +08:00
|
|
|
bool AArch64InstructionSelector::selectShuffleVector(
|
|
|
|
MachineInstr &I, MachineRegisterInfo &MRI) const {
|
|
|
|
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
|
2019-06-25 00:16:12 +08:00
|
|
|
Register Src1Reg = I.getOperand(1).getReg();
|
2019-02-22 04:20:16 +08:00
|
|
|
const LLT Src1Ty = MRI.getType(Src1Reg);
|
2019-06-25 00:16:12 +08:00
|
|
|
Register Src2Reg = I.getOperand(2).getReg();
|
2019-02-22 04:20:16 +08:00
|
|
|
const LLT Src2Ty = MRI.getType(Src2Reg);
|
2020-01-14 07:32:45 +08:00
|
|
|
ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
|
2019-02-22 04:20:16 +08:00
|
|
|
|
|
|
|
MachineBasicBlock &MBB = *I.getParent();
|
|
|
|
MachineFunction &MF = *MBB.getParent();
|
|
|
|
LLVMContext &Ctx = MF.getFunction().getContext();
|
|
|
|
|
|
|
|
// G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
|
|
|
|
// it's originated from a <1 x T> type. Those should have been lowered into
|
|
|
|
// G_BUILD_VECTOR earlier.
|
|
|
|
if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
|
|
|
|
LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
|
|
|
|
|
|
|
|
SmallVector<Constant *, 64> CstIdxs;
|
2019-08-13 23:34:38 +08:00
|
|
|
for (int Val : Mask) {
|
2019-04-13 05:31:21 +08:00
|
|
|
// For now, any undef indexes we'll just assume to be 0. This should be
|
|
|
|
// optimized in future, e.g. to select DUP etc.
|
2019-08-13 23:34:38 +08:00
|
|
|
Val = Val < 0 ? 0 : Val;
|
2019-02-22 04:20:16 +08:00
|
|
|
for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
|
|
|
|
unsigned Offset = Byte + Val * BytesPerElt;
|
|
|
|
CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-03-05 03:16:00 +08:00
|
|
|
MachineIRBuilder MIRBuilder(I);
|
2019-02-22 04:20:16 +08:00
|
|
|
|
|
|
|
// Use a constant pool to load the index vector for TBL.
|
|
|
|
Constant *CPVal = ConstantVector::get(CstIdxs);
|
|
|
|
MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder);
|
|
|
|
if (!IndexLoad) {
|
|
|
|
LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2019-03-05 03:16:00 +08:00
|
|
|
if (DstTy.getSizeInBits() != 128) {
|
|
|
|
assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
|
|
|
|
// This case can be done with TBL1.
|
2019-03-15 06:48:15 +08:00
|
|
|
MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder);
|
2019-03-05 03:16:00 +08:00
|
|
|
if (!Concat) {
|
|
|
|
LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// The constant pool load will be 64 bits, so need to convert to FPR128 reg.
|
|
|
|
IndexLoad =
|
|
|
|
emitScalarToVector(64, &AArch64::FPR128RegClass,
|
|
|
|
IndexLoad->getOperand(0).getReg(), MIRBuilder);
|
|
|
|
|
|
|
|
auto TBL1 = MIRBuilder.buildInstr(
|
|
|
|
AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
|
|
|
|
{Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
|
|
|
|
constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
|
|
|
|
|
2019-03-16 05:59:50 +08:00
|
|
|
auto Copy =
|
2019-03-19 03:20:10 +08:00
|
|
|
MIRBuilder
|
|
|
|
.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
|
|
|
|
.addReg(TBL1.getReg(0), 0, AArch64::dsub);
|
2019-03-05 03:16:00 +08:00
|
|
|
RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
|
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-02-22 04:20:16 +08:00
|
|
|
// For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
|
|
|
|
// Q registers for regalloc.
|
|
|
|
auto RegSeq = MIRBuilder
|
|
|
|
.buildInstr(TargetOpcode::REG_SEQUENCE,
|
|
|
|
{&AArch64::QQRegClass}, {Src1Reg})
|
|
|
|
.addImm(AArch64::qsub0)
|
|
|
|
.addUse(Src2Reg)
|
|
|
|
.addImm(AArch64::qsub1);
|
|
|
|
|
2020-01-16 20:37:00 +08:00
|
|
|
auto TBL2 = MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
|
|
|
|
{RegSeq, IndexLoad->getOperand(0)});
|
2019-02-22 04:20:16 +08:00
|
|
|
constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI);
|
|
|
|
constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
|
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-03-14 07:22:23 +08:00
|
|
|
MachineInstr *AArch64InstructionSelector::emitLaneInsert(
|
2019-06-25 00:16:12 +08:00
|
|
|
Optional<Register> DstReg, Register SrcReg, Register EltReg,
|
2019-03-14 07:22:23 +08:00
|
|
|
unsigned LaneIdx, const RegisterBank &RB,
|
|
|
|
MachineIRBuilder &MIRBuilder) const {
|
|
|
|
MachineInstr *InsElt = nullptr;
|
|
|
|
const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
|
|
|
|
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
|
|
|
|
|
|
|
|
// Create a register to define with the insert if one wasn't passed in.
|
|
|
|
if (!DstReg)
|
|
|
|
DstReg = MRI.createVirtualRegister(DstRC);
|
|
|
|
|
|
|
|
unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
|
|
|
|
unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
|
|
|
|
|
|
|
|
if (RB.getID() == AArch64::FPRRegBankID) {
|
|
|
|
auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
|
|
|
|
InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
|
|
|
|
.addImm(LaneIdx)
|
|
|
|
.addUse(InsSub->getOperand(0).getReg())
|
|
|
|
.addImm(0);
|
|
|
|
} else {
|
|
|
|
InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
|
|
|
|
.addImm(LaneIdx)
|
|
|
|
.addUse(EltReg);
|
|
|
|
}
|
|
|
|
|
|
|
|
constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
|
|
|
|
return InsElt;
|
|
|
|
}
|
|
|
|
|
2019-03-15 02:01:30 +08:00
|
|
|
bool AArch64InstructionSelector::selectInsertElt(
|
|
|
|
MachineInstr &I, MachineRegisterInfo &MRI) const {
|
|
|
|
assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
|
|
|
|
|
|
|
|
// Get information on the destination.
|
2019-06-25 00:16:12 +08:00
|
|
|
Register DstReg = I.getOperand(0).getReg();
|
2019-03-15 02:01:30 +08:00
|
|
|
const LLT DstTy = MRI.getType(DstReg);
|
2019-03-30 05:39:36 +08:00
|
|
|
unsigned VecSize = DstTy.getSizeInBits();
|
2019-03-15 02:01:30 +08:00
|
|
|
|
|
|
|
// Get information on the element we want to insert into the destination.
|
2019-06-25 00:16:12 +08:00
|
|
|
Register EltReg = I.getOperand(2).getReg();
|
2019-03-15 02:01:30 +08:00
|
|
|
const LLT EltTy = MRI.getType(EltReg);
|
|
|
|
unsigned EltSize = EltTy.getSizeInBits();
|
|
|
|
if (EltSize < 16 || EltSize > 64)
|
|
|
|
return false; // Don't support all element types yet.
|
|
|
|
|
|
|
|
// Find the definition of the index. Bail out if it's not defined by a
|
|
|
|
// G_CONSTANT.
|
2019-06-25 00:16:12 +08:00
|
|
|
Register IdxReg = I.getOperand(3).getReg();
|
2019-04-27 05:53:13 +08:00
|
|
|
auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
|
|
|
|
if (!VRegAndVal)
|
2019-03-15 02:01:30 +08:00
|
|
|
return false;
|
2020-11-03 22:50:17 +08:00
|
|
|
unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
|
2019-03-15 02:01:30 +08:00
|
|
|
|
|
|
|
// Perform the lane insert.
|
2019-06-25 00:16:12 +08:00
|
|
|
Register SrcReg = I.getOperand(1).getReg();
|
2019-03-15 02:01:30 +08:00
|
|
|
const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
|
|
|
|
MachineIRBuilder MIRBuilder(I);
|
2019-03-30 05:39:36 +08:00
|
|
|
|
|
|
|
if (VecSize < 128) {
|
|
|
|
// If the vector we're inserting into is smaller than 128 bits, widen it
|
|
|
|
// to 128 to do the insert.
|
|
|
|
MachineInstr *ScalarToVec = emitScalarToVector(
|
|
|
|
VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
|
|
|
|
if (!ScalarToVec)
|
|
|
|
return false;
|
|
|
|
SrcReg = ScalarToVec->getOperand(0).getReg();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Create an insert into a new FPR128 register.
|
|
|
|
// Note that if our vector is already 128 bits, we end up emitting an extra
|
|
|
|
// register.
|
|
|
|
MachineInstr *InsMI =
|
|
|
|
emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder);
|
|
|
|
|
|
|
|
if (VecSize < 128) {
|
|
|
|
// If we had to widen to perform the insert, then we have to demote back to
|
|
|
|
// the original size to get the result we want.
|
2019-06-25 00:16:12 +08:00
|
|
|
Register DemoteVec = InsMI->getOperand(0).getReg();
|
2019-03-30 05:39:36 +08:00
|
|
|
const TargetRegisterClass *RC =
|
|
|
|
getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
|
|
|
|
if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
|
|
|
|
LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
unsigned SubReg = 0;
|
|
|
|
if (!getSubRegForClass(RC, TRI, SubReg))
|
|
|
|
return false;
|
|
|
|
if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
|
|
|
|
LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize
|
|
|
|
<< "\n");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
|
|
|
|
.addReg(DemoteVec, 0, SubReg);
|
|
|
|
RBI.constrainGenericRegister(DstReg, *RC, MRI);
|
|
|
|
} else {
|
|
|
|
// No widening needed.
|
|
|
|
InsMI->getOperand(0).setReg(DstReg);
|
|
|
|
constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
|
|
|
|
}
|
|
|
|
|
2019-03-15 02:01:30 +08:00
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-06-23 07:25:49 +08:00
|
|
|
bool AArch64InstructionSelector::tryOptConstantBuildVec(
|
|
|
|
MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) const {
|
|
|
|
assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
|
2020-09-19 02:33:16 +08:00
|
|
|
unsigned DstSize = DstTy.getSizeInBits();
|
|
|
|
assert(DstSize <= 128 && "Unexpected build_vec type!");
|
|
|
|
if (DstSize < 32)
|
2020-06-23 07:25:49 +08:00
|
|
|
return false;
|
|
|
|
// Check if we're building a constant vector, in which case we want to
|
|
|
|
// generate a constant pool load instead of a vector insert sequence.
|
|
|
|
SmallVector<Constant *, 16> Csts;
|
|
|
|
for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
|
|
|
|
// Try to find G_CONSTANT or G_FCONSTANT
|
|
|
|
auto *OpMI =
|
|
|
|
getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
|
|
|
|
if (OpMI)
|
|
|
|
Csts.emplace_back(
|
|
|
|
const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
|
|
|
|
else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
|
|
|
|
I.getOperand(Idx).getReg(), MRI)))
|
|
|
|
Csts.emplace_back(
|
|
|
|
const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
|
|
|
|
else
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
Constant *CV = ConstantVector::get(Csts);
|
|
|
|
MachineIRBuilder MIB(I);
|
2020-09-19 02:33:16 +08:00
|
|
|
if (CV->isNullValue()) {
|
|
|
|
// Until the importer can support immAllZerosV in pattern leaf nodes,
|
|
|
|
// select a zero move manually here.
|
|
|
|
Register DstReg = I.getOperand(0).getReg();
|
|
|
|
if (DstSize == 128) {
|
|
|
|
auto Mov = MIB.buildInstr(AArch64::MOVIv2d_ns, {DstReg}, {}).addImm(0);
|
|
|
|
I.eraseFromParent();
|
|
|
|
return constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
|
|
|
|
} else if (DstSize == 64) {
|
|
|
|
auto Mov =
|
|
|
|
MIB.buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
|
|
|
|
.addImm(0);
|
|
|
|
MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
|
|
|
|
.addReg(Mov.getReg(0), 0, AArch64::dsub);
|
|
|
|
I.eraseFromParent();
|
|
|
|
return RBI.constrainGenericRegister(DstReg, AArch64::FPR64RegClass, MRI);
|
|
|
|
}
|
|
|
|
}
|
2020-06-23 07:25:49 +08:00
|
|
|
auto *CPLoad = emitLoadFromConstantPool(CV, MIB);
|
|
|
|
if (!CPLoad) {
|
|
|
|
LLVM_DEBUG(dbgs() << "Could not generate cp load for build_vector");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
MIB.buildCopy(I.getOperand(0), CPLoad->getOperand(0));
|
|
|
|
RBI.constrainGenericRegister(I.getOperand(0).getReg(),
|
|
|
|
*MRI.getRegClass(CPLoad->getOperand(0).getReg()),
|
|
|
|
MRI);
|
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-12-11 02:44:58 +08:00
|
|
|
bool AArch64InstructionSelector::selectBuildVector(
|
|
|
|
MachineInstr &I, MachineRegisterInfo &MRI) const {
|
|
|
|
assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
|
|
|
|
// Until we port more of the optimized selections, for now just use a vector
|
|
|
|
// insert sequence.
|
|
|
|
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
|
|
|
|
const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
|
|
|
|
unsigned EltSize = EltTy.getSizeInBits();
|
2020-06-23 07:25:49 +08:00
|
|
|
|
|
|
|
if (tryOptConstantBuildVec(I, DstTy, MRI))
|
|
|
|
return true;
|
2019-01-25 06:00:41 +08:00
|
|
|
if (EltSize < 16 || EltSize > 64)
|
2018-12-11 02:44:58 +08:00
|
|
|
return false; // Don't support all element types yet.
|
|
|
|
const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
|
2019-02-26 02:52:54 +08:00
|
|
|
MachineIRBuilder MIRBuilder(I);
|
2019-01-25 06:00:41 +08:00
|
|
|
|
|
|
|
const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
|
2019-02-26 02:52:54 +08:00
|
|
|
MachineInstr *ScalarToVec =
|
2019-03-05 03:16:00 +08:00
|
|
|
emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
|
|
|
|
I.getOperand(1).getReg(), MIRBuilder);
|
2019-02-26 02:52:54 +08:00
|
|
|
if (!ScalarToVec)
|
2019-01-25 06:00:41 +08:00
|
|
|
return false;
|
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
Register DstVec = ScalarToVec->getOperand(0).getReg();
|
2019-01-25 06:00:41 +08:00
|
|
|
unsigned DstSize = DstTy.getSizeInBits();
|
|
|
|
|
|
|
|
// Keep track of the last MI we inserted. Later on, we might be able to save
|
|
|
|
// a copy using it.
|
|
|
|
MachineInstr *PrevMI = nullptr;
|
|
|
|
for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
|
2019-03-14 07:22:23 +08:00
|
|
|
// Note that if we don't do a subregister copy, we can end up making an
|
|
|
|
// extra register.
|
|
|
|
PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
|
|
|
|
MIRBuilder);
|
|
|
|
DstVec = PrevMI->getOperand(0).getReg();
|
2018-12-11 02:44:58 +08:00
|
|
|
}
|
2019-01-25 06:00:41 +08:00
|
|
|
|
|
|
|
// If DstTy's size in bits is less than 128, then emit a subregister copy
|
|
|
|
// from DstVec to the last register we've defined.
|
|
|
|
if (DstSize < 128) {
|
2019-03-14 07:29:54 +08:00
|
|
|
// Force this to be FPR using the destination vector.
|
|
|
|
const TargetRegisterClass *RC =
|
|
|
|
getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
|
2019-01-25 06:00:41 +08:00
|
|
|
if (!RC)
|
|
|
|
return false;
|
2019-03-14 07:29:54 +08:00
|
|
|
if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
|
|
|
|
LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned SubReg = 0;
|
|
|
|
if (!getSubRegForClass(RC, TRI, SubReg))
|
|
|
|
return false;
|
|
|
|
if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
|
|
|
|
LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
|
|
|
|
<< "\n");
|
|
|
|
return false;
|
|
|
|
}
|
2019-01-25 06:00:41 +08:00
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
Register Reg = MRI.createVirtualRegister(RC);
|
|
|
|
Register DstReg = I.getOperand(0).getReg();
|
2019-01-25 06:00:41 +08:00
|
|
|
|
2019-03-19 03:20:10 +08:00
|
|
|
MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
|
|
|
|
.addReg(DstVec, 0, SubReg);
|
2019-01-25 06:00:41 +08:00
|
|
|
MachineOperand &RegOp = I.getOperand(1);
|
|
|
|
RegOp.setReg(Reg);
|
|
|
|
RBI.constrainGenericRegister(DstReg, *RC, MRI);
|
|
|
|
} else {
|
|
|
|
// We don't need a subregister copy. Save a copy by re-using the
|
|
|
|
// destination register on the final insert.
|
|
|
|
assert(PrevMI && "PrevMI was null?");
|
|
|
|
PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
|
|
|
|
constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
|
|
|
|
}
|
|
|
|
|
2018-12-11 02:44:58 +08:00
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-04-30 04:58:17 +08:00
|
|
|
/// Helper function to find an intrinsic ID on an a MachineInstr. Returns the
|
|
|
|
/// ID if it exists, and 0 otherwise.
|
|
|
|
static unsigned findIntrinsicID(MachineInstr &I) {
|
|
|
|
auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) {
|
|
|
|
return Op.isIntrinsicID();
|
|
|
|
});
|
|
|
|
if (IntrinOp == I.operands_end())
|
|
|
|
return 0;
|
|
|
|
return IntrinOp->getIntrinsicID();
|
|
|
|
}
|
|
|
|
|
2019-04-03 03:57:26 +08:00
|
|
|
bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
|
|
|
|
MachineInstr &I, MachineRegisterInfo &MRI) const {
|
|
|
|
// Find the intrinsic ID.
|
2019-04-30 04:58:17 +08:00
|
|
|
unsigned IntrinID = findIntrinsicID(I);
|
|
|
|
if (!IntrinID)
|
2019-04-03 03:57:26 +08:00
|
|
|
return false;
|
|
|
|
MachineIRBuilder MIRBuilder(I);
|
|
|
|
|
|
|
|
// Select the instruction.
|
|
|
|
switch (IntrinID) {
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
case Intrinsic::trap:
|
|
|
|
MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1);
|
|
|
|
break;
|
2019-06-22 07:38:05 +08:00
|
|
|
case Intrinsic::debugtrap:
|
|
|
|
MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
|
|
|
|
break;
|
2020-10-21 17:11:25 +08:00
|
|
|
case Intrinsic::ubsantrap:
|
|
|
|
MIRBuilder.buildInstr(AArch64::BRK, {}, {})
|
2020-12-09 18:13:36 +08:00
|
|
|
.addImm(I.getOperand(1).getImm() | ('U' << 8));
|
2020-10-21 17:11:25 +08:00
|
|
|
break;
|
2019-04-03 03:57:26 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-01-22 14:38:02 +08:00
|
|
|
bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
|
|
|
|
MachineRegisterInfo &MRI) {
|
2019-04-30 04:58:17 +08:00
|
|
|
unsigned IntrinID = findIntrinsicID(I);
|
|
|
|
if (!IntrinID)
|
|
|
|
return false;
|
|
|
|
MachineIRBuilder MIRBuilder(I);
|
|
|
|
|
|
|
|
switch (IntrinID) {
|
|
|
|
default:
|
|
|
|
break;
|
2020-01-16 00:49:22 +08:00
|
|
|
case Intrinsic::aarch64_crypto_sha1h: {
|
2019-06-25 00:16:12 +08:00
|
|
|
Register DstReg = I.getOperand(0).getReg();
|
|
|
|
Register SrcReg = I.getOperand(2).getReg();
|
2019-04-30 04:58:17 +08:00
|
|
|
|
|
|
|
// FIXME: Should this be an assert?
|
|
|
|
if (MRI.getType(DstReg).getSizeInBits() != 32 ||
|
|
|
|
MRI.getType(SrcReg).getSizeInBits() != 32)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// The operation has to happen on FPRs. Set up some new FPR registers for
|
|
|
|
// the source and destination if they are on GPRs.
|
|
|
|
if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
|
|
|
|
SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
|
|
|
|
MIRBuilder.buildCopy({SrcReg}, {I.getOperand(2)});
|
|
|
|
|
|
|
|
// Make sure the copy ends up getting constrained properly.
|
|
|
|
RBI.constrainGenericRegister(I.getOperand(2).getReg(),
|
|
|
|
AArch64::GPR32RegClass, MRI);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
|
|
|
|
DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
|
|
|
|
|
|
|
|
// Actually insert the instruction.
|
|
|
|
auto SHA1Inst = MIRBuilder.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
|
|
|
|
constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
|
|
|
|
|
|
|
|
// Did we create a new register for the destination?
|
|
|
|
if (DstReg != I.getOperand(0).getReg()) {
|
|
|
|
// Yep. Copy the result of the instruction back into the original
|
|
|
|
// destination.
|
|
|
|
MIRBuilder.buildCopy({I.getOperand(0)}, {DstReg});
|
|
|
|
RBI.constrainGenericRegister(I.getOperand(0).getReg(),
|
|
|
|
AArch64::GPR32RegClass, MRI);
|
|
|
|
}
|
|
|
|
|
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
2020-01-16 00:49:22 +08:00
|
|
|
case Intrinsic::frameaddress:
|
|
|
|
case Intrinsic::returnaddress: {
|
|
|
|
MachineFunction &MF = *I.getParent()->getParent();
|
|
|
|
MachineFrameInfo &MFI = MF.getFrameInfo();
|
|
|
|
|
|
|
|
unsigned Depth = I.getOperand(2).getImm();
|
|
|
|
Register DstReg = I.getOperand(0).getReg();
|
|
|
|
RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
|
|
|
|
|
|
|
|
if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
|
2020-09-24 23:34:27 +08:00
|
|
|
if (!MFReturnAddr) {
|
|
|
|
// Insert the copy from LR/X30 into the entry block, before it can be
|
|
|
|
// clobbered by anything.
|
|
|
|
MFI.setReturnAddressIsTaken(true);
|
|
|
|
MFReturnAddr = getFunctionLiveInPhysReg(MF, TII, AArch64::LR,
|
|
|
|
AArch64::GPR64RegClass);
|
2020-01-22 14:38:02 +08:00
|
|
|
}
|
2020-07-20 20:26:33 +08:00
|
|
|
|
2020-09-24 23:34:27 +08:00
|
|
|
if (STI.hasV8_3aOps()) {
|
|
|
|
MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
|
|
|
|
} else {
|
|
|
|
MIRBuilder.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
|
|
|
|
MIRBuilder.buildInstr(AArch64::XPACLRI);
|
|
|
|
MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
|
|
|
|
}
|
2020-07-20 20:26:33 +08:00
|
|
|
|
2020-01-16 00:49:22 +08:00
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
MFI.setFrameAddressIsTaken(true);
|
|
|
|
Register FrameAddr(AArch64::FP);
|
|
|
|
while (Depth--) {
|
|
|
|
Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
|
|
|
|
auto Ldr =
|
|
|
|
MIRBuilder.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr})
|
|
|
|
.addImm(0);
|
|
|
|
constrainSelectedInstRegOperands(*Ldr, TII, TRI, RBI);
|
|
|
|
FrameAddr = NextFrame;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (IntrinID == Intrinsic::frameaddress)
|
|
|
|
MIRBuilder.buildCopy({DstReg}, {FrameAddr});
|
|
|
|
else {
|
|
|
|
MFI.setReturnAddressIsTaken(true);
|
2020-09-24 23:34:27 +08:00
|
|
|
|
|
|
|
if (STI.hasV8_3aOps()) {
|
|
|
|
Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
|
|
|
|
MIRBuilder.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
|
|
|
|
MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
|
|
|
|
} else {
|
2020-10-02 05:15:57 +08:00
|
|
|
MIRBuilder.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr}).addImm(1);
|
2020-09-24 23:34:27 +08:00
|
|
|
MIRBuilder.buildInstr(AArch64::XPACLRI);
|
|
|
|
MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
|
|
|
|
}
|
2020-01-16 00:49:22 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
2019-04-30 04:58:17 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2019-07-03 09:49:06 +08:00
|
|
|
InstructionSelector::ComplexRendererFns
|
|
|
|
AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
|
|
|
|
auto MaybeImmed = getImmedFromMO(Root);
|
|
|
|
if (MaybeImmed == None || *MaybeImmed > 31)
|
|
|
|
return None;
|
|
|
|
uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
|
|
|
|
return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
|
|
|
|
}
|
|
|
|
|
|
|
|
InstructionSelector::ComplexRendererFns
|
|
|
|
AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
|
|
|
|
auto MaybeImmed = getImmedFromMO(Root);
|
|
|
|
if (MaybeImmed == None || *MaybeImmed > 31)
|
|
|
|
return None;
|
|
|
|
uint64_t Enc = 31 - *MaybeImmed;
|
|
|
|
return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
|
|
|
|
}
|
|
|
|
|
|
|
|
InstructionSelector::ComplexRendererFns
|
|
|
|
AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
|
|
|
|
auto MaybeImmed = getImmedFromMO(Root);
|
|
|
|
if (MaybeImmed == None || *MaybeImmed > 63)
|
|
|
|
return None;
|
|
|
|
uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
|
|
|
|
return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
|
|
|
|
}
|
|
|
|
|
|
|
|
InstructionSelector::ComplexRendererFns
|
|
|
|
AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
|
|
|
|
auto MaybeImmed = getImmedFromMO(Root);
|
|
|
|
if (MaybeImmed == None || *MaybeImmed > 63)
|
|
|
|
return None;
|
|
|
|
uint64_t Enc = 63 - *MaybeImmed;
|
|
|
|
return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
|
|
|
|
}
|
2017-03-15 05:32:08 +08:00
|
|
|
|
2019-08-03 02:12:53 +08:00
|
|
|
/// Helper to select an immediate value that can be represented as a 12-bit
|
|
|
|
/// value shifted left by either 0 or 12. If it is possible to do so, return
|
|
|
|
/// the immediate and shift value. If not, return None.
|
|
|
|
///
|
|
|
|
/// Used by selectArithImmed and selectNegArithImmed.
|
|
|
|
InstructionSelector::ComplexRendererFns
|
|
|
|
AArch64InstructionSelector::select12BitValueWithLeftShift(
|
|
|
|
uint64_t Immed) const {
|
|
|
|
unsigned ShiftAmt;
|
|
|
|
if (Immed >> 12 == 0) {
|
|
|
|
ShiftAmt = 0;
|
|
|
|
} else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
|
|
|
|
ShiftAmt = 12;
|
|
|
|
Immed = Immed >> 12;
|
|
|
|
} else
|
|
|
|
return None;
|
|
|
|
|
|
|
|
unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
|
|
|
|
return {{
|
|
|
|
[=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
|
|
|
|
[=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
|
|
|
|
}};
|
|
|
|
}
|
|
|
|
|
2019-07-03 09:49:06 +08:00
|
|
|
/// SelectArithImmed - Select an immediate value that can be represented as
|
|
|
|
/// a 12-bit value shifted left by either 0 or 12. If so, return true with
|
|
|
|
/// Val set to the 12-bit value and Shift set to the shifter operand.
|
|
|
|
InstructionSelector::ComplexRendererFns
|
|
|
|
AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
|
|
|
|
// This function is called from the addsub_shifted_imm ComplexPattern,
|
|
|
|
// which lists [imm] as the list of opcode it's interested in, however
|
|
|
|
// we still need to check whether the operand is actually an immediate
|
|
|
|
// here because the ComplexPattern opcode list is only used in
|
|
|
|
// root-level opcode matching.
|
|
|
|
auto MaybeImmed = getImmedFromMO(Root);
|
2019-08-03 02:12:53 +08:00
|
|
|
if (MaybeImmed == None)
|
|
|
|
return None;
|
|
|
|
return select12BitValueWithLeftShift(*MaybeImmed);
|
|
|
|
}
|
|
|
|
|
|
|
|
/// SelectNegArithImmed - As above, but negates the value before trying to
|
|
|
|
/// select it.
|
|
|
|
InstructionSelector::ComplexRendererFns
|
|
|
|
AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
|
|
|
|
// We need a register here, because we need to know if we have a 64 or 32
|
|
|
|
// bit immediate.
|
|
|
|
if (!Root.isReg())
|
|
|
|
return None;
|
|
|
|
auto MaybeImmed = getImmedFromMO(Root);
|
2019-07-03 09:49:06 +08:00
|
|
|
if (MaybeImmed == None)
|
|
|
|
return None;
|
|
|
|
uint64_t Immed = *MaybeImmed;
|
2017-03-15 05:32:08 +08:00
|
|
|
|
2019-08-03 02:12:53 +08:00
|
|
|
// This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
|
|
|
|
// have the opposite effect on the C flag, so this pattern mustn't match under
|
|
|
|
// those circumstances.
|
|
|
|
if (Immed == 0)
|
2017-10-16 02:22:54 +08:00
|
|
|
return None;
|
2017-03-15 05:32:08 +08:00
|
|
|
|
2019-08-03 02:12:53 +08:00
|
|
|
// Check if we're dealing with a 32-bit type on the root or a 64-bit type on
|
|
|
|
// the root.
|
|
|
|
MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
|
|
|
|
if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
|
|
|
|
Immed = ~((uint32_t)Immed) + 1;
|
|
|
|
else
|
|
|
|
Immed = ~Immed + 1ULL;
|
|
|
|
|
|
|
|
if (Immed & 0xFFFFFFFFFF000000ULL)
|
|
|
|
return None;
|
|
|
|
|
|
|
|
Immed &= 0xFFFFFFULL;
|
|
|
|
return select12BitValueWithLeftShift(Immed);
|
2017-03-15 05:32:08 +08:00
|
|
|
}
|
2017-04-06 17:49:34 +08:00
|
|
|
|
2019-07-24 00:09:42 +08:00
|
|
|
/// Return true if it is worth folding MI into an extended register. That is,
|
|
|
|
/// if it's safe to pull it into the addressing mode of a load or store as a
|
|
|
|
/// shift.
|
|
|
|
bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
|
|
|
|
MachineInstr &MI, const MachineRegisterInfo &MRI) const {
|
|
|
|
// Always fold if there is one use, or if we're optimizing for size.
|
|
|
|
Register DefReg = MI.getOperand(0).getReg();
|
2020-04-16 08:10:53 +08:00
|
|
|
if (MRI.hasOneNonDBGUse(DefReg) ||
|
2019-07-24 00:09:42 +08:00
|
|
|
MI.getParent()->getParent()->getFunction().hasMinSize())
|
|
|
|
return true;
|
|
|
|
|
|
|
|
// It's better to avoid folding and recomputing shifts when we don't have a
|
|
|
|
// fastpath.
|
|
|
|
if (!STI.hasLSLFast())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// We have a fastpath, so folding a shift in and potentially computing it
|
|
|
|
// many times may be beneficial. Check if this is only used in memory ops.
|
|
|
|
// If it is, then we should fold.
|
2020-04-16 08:10:53 +08:00
|
|
|
return all_of(MRI.use_nodbg_instructions(DefReg),
|
2019-07-24 00:09:42 +08:00
|
|
|
[](MachineInstr &Use) { return Use.mayLoadOrStore(); });
|
|
|
|
}
|
|
|
|
|
2020-06-26 08:21:37 +08:00
|
|
|
static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) {
|
|
|
|
switch (Type) {
|
|
|
|
case AArch64_AM::SXTB:
|
|
|
|
case AArch64_AM::SXTH:
|
|
|
|
case AArch64_AM::SXTW:
|
|
|
|
return true;
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-07-24 00:09:42 +08:00
|
|
|
InstructionSelector::ComplexRendererFns
|
2020-10-02 05:15:57 +08:00
|
|
|
AArch64InstructionSelector::selectExtendedSHL(
|
|
|
|
MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
|
|
|
|
unsigned SizeInBytes, bool WantsExt) const {
|
2020-01-09 02:57:44 +08:00
|
|
|
assert(Base.isReg() && "Expected base to be a register operand");
|
|
|
|
assert(Offset.isReg() && "Expected offset to be a register operand");
|
2019-07-24 00:09:42 +08:00
|
|
|
|
2020-01-09 02:57:44 +08:00
|
|
|
MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
|
|
|
|
MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
|
2019-07-25 06:49:42 +08:00
|
|
|
if (!OffsetInst)
|
2019-07-24 00:09:42 +08:00
|
|
|
return None;
|
|
|
|
|
2019-07-25 06:49:42 +08:00
|
|
|
unsigned OffsetOpc = OffsetInst->getOpcode();
|
2020-11-14 15:08:47 +08:00
|
|
|
bool LookedThroughZExt = false;
|
|
|
|
if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
|
|
|
|
// Try to look through a ZEXT.
|
|
|
|
if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
|
|
|
|
return None;
|
|
|
|
|
|
|
|
OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg());
|
|
|
|
OffsetOpc = OffsetInst->getOpcode();
|
|
|
|
LookedThroughZExt = true;
|
2019-07-24 00:09:42 +08:00
|
|
|
|
2020-11-14 15:08:47 +08:00
|
|
|
if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
|
|
|
|
return None;
|
|
|
|
}
|
2020-01-09 02:57:44 +08:00
|
|
|
// Make sure that the memory op is a valid size.
|
|
|
|
int64_t LegalShiftVal = Log2_32(SizeInBytes);
|
|
|
|
if (LegalShiftVal == 0)
|
|
|
|
return None;
|
2019-07-25 06:49:42 +08:00
|
|
|
if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
|
|
|
|
return None;
|
|
|
|
|
|
|
|
// Now, try to find the specific G_CONSTANT. Start by assuming that the
|
|
|
|
// register we will offset is the LHS, and the register containing the
|
|
|
|
// constant is the RHS.
|
|
|
|
Register OffsetReg = OffsetInst->getOperand(1).getReg();
|
|
|
|
Register ConstantReg = OffsetInst->getOperand(2).getReg();
|
|
|
|
auto ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
|
|
|
|
if (!ValAndVReg) {
|
|
|
|
// We didn't get a constant on the RHS. If the opcode is a shift, then
|
|
|
|
// we're done.
|
|
|
|
if (OffsetOpc == TargetOpcode::G_SHL)
|
|
|
|
return None;
|
|
|
|
|
|
|
|
// If we have a G_MUL, we can use either register. Try looking at the RHS.
|
|
|
|
std::swap(OffsetReg, ConstantReg);
|
|
|
|
ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
|
|
|
|
if (!ValAndVReg)
|
|
|
|
return None;
|
|
|
|
}
|
|
|
|
|
2019-07-24 00:09:42 +08:00
|
|
|
// The value must fit into 3 bits, and must be positive. Make sure that is
|
|
|
|
// true.
|
2020-11-03 22:50:17 +08:00
|
|
|
int64_t ImmVal = ValAndVReg->Value.getSExtValue();
|
2019-07-25 06:49:42 +08:00
|
|
|
|
|
|
|
// Since we're going to pull this into a shift, the constant value must be
|
|
|
|
// a power of 2. If we got a multiply, then we need to check this.
|
|
|
|
if (OffsetOpc == TargetOpcode::G_MUL) {
|
|
|
|
if (!isPowerOf2_32(ImmVal))
|
|
|
|
return None;
|
|
|
|
|
|
|
|
// Got a power of 2. So, the amount we'll shift is the log base-2 of that.
|
|
|
|
ImmVal = Log2_32(ImmVal);
|
|
|
|
}
|
|
|
|
|
2019-07-24 00:09:42 +08:00
|
|
|
if ((ImmVal & 0x7) != ImmVal)
|
|
|
|
return None;
|
|
|
|
|
|
|
|
// We are only allowed to shift by LegalShiftVal. This shift value is built
|
|
|
|
// into the instruction, so we can't just use whatever we want.
|
|
|
|
if (ImmVal != LegalShiftVal)
|
|
|
|
return None;
|
|
|
|
|
2020-01-09 02:57:44 +08:00
|
|
|
unsigned SignExtend = 0;
|
|
|
|
if (WantsExt) {
|
2020-11-14 15:08:47 +08:00
|
|
|
// Check if the offset is defined by an extend, unless we looked through a
|
|
|
|
// G_ZEXT earlier.
|
|
|
|
if (!LookedThroughZExt) {
|
|
|
|
MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
|
|
|
|
auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
|
|
|
|
if (Ext == AArch64_AM::InvalidShiftExtend)
|
|
|
|
return None;
|
|
|
|
|
|
|
|
SignExtend = isSignExtendShiftType(Ext) ? 1 : 0;
|
|
|
|
// We only support SXTW for signed extension here.
|
|
|
|
if (SignExtend && Ext != AArch64_AM::SXTW)
|
|
|
|
return None;
|
|
|
|
OffsetReg = ExtInst->getOperand(1).getReg();
|
|
|
|
}
|
2020-01-09 02:57:44 +08:00
|
|
|
|
|
|
|
// Need a 32-bit wide register here.
|
|
|
|
MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
|
2020-12-05 07:51:44 +08:00
|
|
|
OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
|
2020-01-09 02:57:44 +08:00
|
|
|
}
|
|
|
|
|
2019-07-24 00:09:42 +08:00
|
|
|
// We can use the LHS of the GEP as the base, and the LHS of the shift as an
|
|
|
|
// offset. Signify that we are shifting by setting the shift flag to 1.
|
2020-01-09 02:57:44 +08:00
|
|
|
return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
|
2019-08-24 04:31:34 +08:00
|
|
|
[=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
|
|
|
|
[=](MachineInstrBuilder &MIB) {
|
|
|
|
// Need to add both immediates here to make sure that they are both
|
|
|
|
// added to the instruction.
|
2020-01-09 02:57:44 +08:00
|
|
|
MIB.addImm(SignExtend);
|
2019-08-24 04:31:34 +08:00
|
|
|
MIB.addImm(1);
|
|
|
|
}}};
|
2019-07-24 00:09:42 +08:00
|
|
|
}
|
|
|
|
|
2020-01-09 02:57:44 +08:00
|
|
|
/// This is used for computing addresses like this:
|
|
|
|
///
|
|
|
|
/// ldr x1, [x2, x3, lsl #3]
|
|
|
|
///
|
|
|
|
/// Where x2 is the base register, and x3 is an offset register. The shift-left
|
|
|
|
/// is a constant value specific to this load instruction. That is, we'll never
|
|
|
|
/// see anything other than a 3 here (which corresponds to the size of the
|
|
|
|
/// element being loaded.)
|
|
|
|
InstructionSelector::ComplexRendererFns
|
|
|
|
AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
|
|
|
|
MachineOperand &Root, unsigned SizeInBytes) const {
|
|
|
|
if (!Root.isReg())
|
|
|
|
return None;
|
|
|
|
MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
|
|
|
|
|
|
|
|
// We want to find something like this:
|
|
|
|
//
|
|
|
|
// val = G_CONSTANT LegalShiftVal
|
|
|
|
// shift = G_SHL off_reg val
|
|
|
|
// ptr = G_PTR_ADD base_reg shift
|
|
|
|
// x = G_LOAD ptr
|
|
|
|
//
|
|
|
|
// And fold it into this addressing mode:
|
|
|
|
//
|
|
|
|
// ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
|
|
|
|
|
|
|
|
// Check if we can find the G_PTR_ADD.
|
|
|
|
MachineInstr *PtrAdd =
|
|
|
|
getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
|
|
|
|
if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
|
|
|
|
return None;
|
|
|
|
|
|
|
|
// Now, try to match an opcode which will match our specific offset.
|
|
|
|
// We want a G_SHL or a G_MUL.
|
|
|
|
MachineInstr *OffsetInst =
|
|
|
|
getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI);
|
|
|
|
return selectExtendedSHL(Root, PtrAdd->getOperand(1),
|
|
|
|
OffsetInst->getOperand(0), SizeInBytes,
|
|
|
|
/*WantsExt=*/false);
|
|
|
|
}
|
|
|
|
|
2019-07-19 05:50:11 +08:00
|
|
|
/// This is used for computing addresses like this:
|
|
|
|
///
|
|
|
|
/// ldr x1, [x2, x3]
|
|
|
|
///
|
|
|
|
/// Where x2 is the base register, and x3 is an offset register.
|
|
|
|
///
|
2020-10-02 05:15:57 +08:00
|
|
|
/// When possible (or profitable) to fold a G_PTR_ADD into the address calculation,
|
|
|
|
/// this will do so. Otherwise, it will return None.
|
2019-07-19 05:50:11 +08:00
|
|
|
InstructionSelector::ComplexRendererFns
|
|
|
|
AArch64InstructionSelector::selectAddrModeRegisterOffset(
|
|
|
|
MachineOperand &Root) const {
|
|
|
|
MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
|
|
|
|
|
|
|
|
// We need a GEP.
|
|
|
|
MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
|
[globalisel] Rename G_GEP to G_PTR_ADD
Summary:
G_GEP is rather poorly named. It's a simple pointer+scalar addition and
doesn't support any of the complexities of getelementptr. I therefore
propose that we rename it. There's a G_PTR_MASK so let's follow that
convention and go with G_PTR_ADD
Reviewers: volkan, aditya_nandakumar, bogner, rovka, arsenm
Subscribers: sdardis, jvesely, wdng, nhaehnle, hiraditya, jrtc27, atanasyan, arphaman, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D69734
2019-11-02 04:18:00 +08:00
|
|
|
if (!Gep || Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
|
2019-07-19 05:50:11 +08:00
|
|
|
return None;
|
|
|
|
|
|
|
|
// If this is used more than once, let's not bother folding.
|
|
|
|
// TODO: Check if they are memory ops. If they are, then we can still fold
|
|
|
|
// without having to recompute anything.
|
2020-04-16 08:10:53 +08:00
|
|
|
if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg()))
|
2019-07-19 05:50:11 +08:00
|
|
|
return None;
|
|
|
|
|
|
|
|
// Base is the GEP's LHS, offset is its RHS.
|
2019-08-24 04:31:34 +08:00
|
|
|
return {{[=](MachineInstrBuilder &MIB) {
|
|
|
|
MIB.addUse(Gep->getOperand(1).getReg());
|
|
|
|
},
|
|
|
|
[=](MachineInstrBuilder &MIB) {
|
|
|
|
MIB.addUse(Gep->getOperand(2).getReg());
|
|
|
|
},
|
|
|
|
[=](MachineInstrBuilder &MIB) {
|
|
|
|
// Need to add both immediates here to make sure that they are both
|
|
|
|
// added to the instruction.
|
|
|
|
MIB.addImm(0);
|
|
|
|
MIB.addImm(0);
|
|
|
|
}}};
|
2019-07-19 05:50:11 +08:00
|
|
|
}
|
|
|
|
|
2019-07-24 00:09:42 +08:00
|
|
|
/// This is intended to be equivalent to selectAddrModeXRO in
|
|
|
|
/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
|
|
|
|
InstructionSelector::ComplexRendererFns
|
|
|
|
AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
|
|
|
|
unsigned SizeInBytes) const {
|
|
|
|
MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
|
[AArch64][GlobalISel] Select XRO addressing mode with wide immediates
Port the wide immediate case from AArch64DAGToDAGISel::SelectAddrModeXRO.
If we have a wide immediate which can't be represented in an add, we can end up
with code like this:
```
mov x0, imm
add x1, base, x0
ldr x2, [x1, 0]
```
If we use the [base, xN] addressing mode instead, we can produce this:
```
mov x0, imm
ldr x2, [base, x0]
```
This saves 0.4% code size on 7zip at -O3, and gives a geomean code size
improvement of 0.1% on CTMark.
Differential Revision: https://reviews.llvm.org/D84784
2020-07-29 02:33:39 +08:00
|
|
|
if (!Root.isReg())
|
|
|
|
return None;
|
|
|
|
MachineInstr *PtrAdd =
|
|
|
|
getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
|
|
|
|
if (!PtrAdd)
|
2019-07-24 00:09:42 +08:00
|
|
|
return None;
|
|
|
|
|
[AArch64][GlobalISel] Select XRO addressing mode with wide immediates
Port the wide immediate case from AArch64DAGToDAGISel::SelectAddrModeXRO.
If we have a wide immediate which can't be represented in an add, we can end up
with code like this:
```
mov x0, imm
add x1, base, x0
ldr x2, [x1, 0]
```
If we use the [base, xN] addressing mode instead, we can produce this:
```
mov x0, imm
ldr x2, [base, x0]
```
This saves 0.4% code size on 7zip at -O3, and gives a geomean code size
improvement of 0.1% on CTMark.
Differential Revision: https://reviews.llvm.org/D84784
2020-07-29 02:33:39 +08:00
|
|
|
// Check for an immediates which cannot be encoded in the [base + imm]
|
|
|
|
// addressing mode, and can't be encoded in an add/sub. If this happens, we'll
|
|
|
|
// end up with code like:
|
|
|
|
//
|
|
|
|
// mov x0, wide
|
|
|
|
// add x1 base, x0
|
|
|
|
// ldr x2, [x1, x0]
|
|
|
|
//
|
|
|
|
// In this situation, we can use the [base, xreg] addressing mode to save an
|
|
|
|
// add/sub:
|
|
|
|
//
|
|
|
|
// mov x0, wide
|
|
|
|
// ldr x2, [base, x0]
|
|
|
|
auto ValAndVReg =
|
|
|
|
getConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI);
|
|
|
|
if (ValAndVReg) {
|
|
|
|
unsigned Scale = Log2_32(SizeInBytes);
|
2020-11-03 22:50:17 +08:00
|
|
|
int64_t ImmOff = ValAndVReg->Value.getSExtValue();
|
[AArch64][GlobalISel] Select XRO addressing mode with wide immediates
Port the wide immediate case from AArch64DAGToDAGISel::SelectAddrModeXRO.
If we have a wide immediate which can't be represented in an add, we can end up
with code like this:
```
mov x0, imm
add x1, base, x0
ldr x2, [x1, 0]
```
If we use the [base, xN] addressing mode instead, we can produce this:
```
mov x0, imm
ldr x2, [base, x0]
```
This saves 0.4% code size on 7zip at -O3, and gives a geomean code size
improvement of 0.1% on CTMark.
Differential Revision: https://reviews.llvm.org/D84784
2020-07-29 02:33:39 +08:00
|
|
|
|
|
|
|
// Skip immediates that can be selected in the load/store addresing
|
|
|
|
// mode.
|
2020-10-02 05:15:57 +08:00
|
|
|
if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
|
|
|
|
ImmOff < (0x1000 << Scale))
|
[AArch64][GlobalISel] Select XRO addressing mode with wide immediates
Port the wide immediate case from AArch64DAGToDAGISel::SelectAddrModeXRO.
If we have a wide immediate which can't be represented in an add, we can end up
with code like this:
```
mov x0, imm
add x1, base, x0
ldr x2, [x1, 0]
```
If we use the [base, xN] addressing mode instead, we can produce this:
```
mov x0, imm
ldr x2, [base, x0]
```
This saves 0.4% code size on 7zip at -O3, and gives a geomean code size
improvement of 0.1% on CTMark.
Differential Revision: https://reviews.llvm.org/D84784
2020-07-29 02:33:39 +08:00
|
|
|
return None;
|
|
|
|
|
|
|
|
// Helper lambda to decide whether or not it is preferable to emit an add.
|
|
|
|
auto isPreferredADD = [](int64_t ImmOff) {
|
|
|
|
// Constants in [0x0, 0xfff] can be encoded in an add.
|
|
|
|
if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
// Can it be encoded in an add lsl #12?
|
|
|
|
if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// It can be encoded in an add lsl #12, but we may not want to. If it is
|
|
|
|
// possible to select this as a single movz, then prefer that. A single
|
|
|
|
// movz is faster than an add with a shift.
|
|
|
|
return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
|
|
|
|
(ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
|
|
|
|
};
|
|
|
|
|
|
|
|
// If the immediate can be encoded in a single add/sub, then bail out.
|
|
|
|
if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
|
|
|
|
return None;
|
|
|
|
}
|
|
|
|
|
2019-07-24 00:09:42 +08:00
|
|
|
// Try to fold shifts into the addressing mode.
|
|
|
|
auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
|
|
|
|
if (AddrModeFns)
|
|
|
|
return AddrModeFns;
|
|
|
|
|
|
|
|
// If that doesn't work, see if it's possible to fold in registers from
|
|
|
|
// a GEP.
|
|
|
|
return selectAddrModeRegisterOffset(Root);
|
|
|
|
}
|
|
|
|
|
2020-01-09 02:57:44 +08:00
|
|
|
/// This is used for computing addresses like this:
|
|
|
|
///
|
|
|
|
/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
|
|
|
|
///
|
|
|
|
/// Where we have a 64-bit base register, a 32-bit offset register, and an
|
|
|
|
/// extend (which may or may not be signed).
|
|
|
|
InstructionSelector::ComplexRendererFns
|
|
|
|
AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
|
|
|
|
unsigned SizeInBytes) const {
|
|
|
|
MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
|
|
|
|
|
|
|
|
MachineInstr *PtrAdd =
|
|
|
|
getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
|
|
|
|
if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
|
|
|
|
return None;
|
|
|
|
|
|
|
|
MachineOperand &LHS = PtrAdd->getOperand(1);
|
|
|
|
MachineOperand &RHS = PtrAdd->getOperand(2);
|
|
|
|
MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
|
|
|
|
|
|
|
|
// The first case is the same as selectAddrModeXRO, except we need an extend.
|
|
|
|
// In this case, we try to find a shift and extend, and fold them into the
|
|
|
|
// addressing mode.
|
|
|
|
//
|
|
|
|
// E.g.
|
|
|
|
//
|
|
|
|
// off_reg = G_Z/S/ANYEXT ext_reg
|
|
|
|
// val = G_CONSTANT LegalShiftVal
|
|
|
|
// shift = G_SHL off_reg val
|
|
|
|
// ptr = G_PTR_ADD base_reg shift
|
|
|
|
// x = G_LOAD ptr
|
|
|
|
//
|
|
|
|
// In this case we can get a load like this:
|
|
|
|
//
|
|
|
|
// ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
|
|
|
|
auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
|
|
|
|
SizeInBytes, /*WantsExt=*/true);
|
|
|
|
if (ExtendedShl)
|
|
|
|
return ExtendedShl;
|
|
|
|
|
|
|
|
// There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
|
|
|
|
//
|
|
|
|
// e.g.
|
|
|
|
// ldr something, [base_reg, ext_reg, sxtw]
|
|
|
|
if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
|
|
|
|
return None;
|
|
|
|
|
|
|
|
// Check if this is an extend. We'll get an extend type if it is.
|
|
|
|
AArch64_AM::ShiftExtendType Ext =
|
|
|
|
getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
|
|
|
|
if (Ext == AArch64_AM::InvalidShiftExtend)
|
|
|
|
return None;
|
|
|
|
|
|
|
|
// Need a 32-bit wide register.
|
|
|
|
MachineIRBuilder MIB(*PtrAdd);
|
2020-12-05 07:51:44 +08:00
|
|
|
Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(),
|
|
|
|
AArch64::GPR32RegClass, MIB);
|
2020-01-09 02:57:44 +08:00
|
|
|
unsigned SignExtend = Ext == AArch64_AM::SXTW;
|
|
|
|
|
|
|
|
// Base is LHS, offset is ExtReg.
|
|
|
|
return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
|
|
|
|
[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
|
|
|
|
[=](MachineInstrBuilder &MIB) {
|
|
|
|
MIB.addImm(SignExtend);
|
|
|
|
MIB.addImm(0);
|
|
|
|
}}};
|
|
|
|
}
|
|
|
|
|
2017-10-16 11:36:29 +08:00
|
|
|
/// Select a "register plus unscaled signed 9-bit immediate" address. This
|
|
|
|
/// should only match when there is an offset that is not valid for a scaled
|
|
|
|
/// immediate addressing mode. The "Size" argument is the size in bytes of the
|
|
|
|
/// memory reference, which is needed here to know what is valid for a scaled
|
|
|
|
/// immediate.
|
2017-10-21 04:55:29 +08:00
|
|
|
InstructionSelector::ComplexRendererFns
|
2017-10-16 11:36:29 +08:00
|
|
|
AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
|
|
|
|
unsigned Size) const {
|
|
|
|
MachineRegisterInfo &MRI =
|
|
|
|
Root.getParent()->getParent()->getParent()->getRegInfo();
|
|
|
|
|
|
|
|
if (!Root.isReg())
|
|
|
|
return None;
|
|
|
|
|
|
|
|
if (!isBaseWithConstantOffset(Root, MRI))
|
|
|
|
return None;
|
|
|
|
|
|
|
|
MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
|
|
|
|
if (!RootDef)
|
|
|
|
return None;
|
|
|
|
|
|
|
|
MachineOperand &OffImm = RootDef->getOperand(2);
|
|
|
|
if (!OffImm.isReg())
|
|
|
|
return None;
|
|
|
|
MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
|
|
|
|
if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
|
|
|
|
return None;
|
|
|
|
int64_t RHSC;
|
|
|
|
MachineOperand &RHSOp1 = RHS->getOperand(1);
|
|
|
|
if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
|
|
|
|
return None;
|
|
|
|
RHSC = RHSOp1.getCImm()->getSExtValue();
|
|
|
|
|
|
|
|
// If the offset is valid as a scaled immediate, don't match here.
|
|
|
|
if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
|
|
|
|
return None;
|
|
|
|
if (RHSC >= -256 && RHSC < 256) {
|
|
|
|
MachineOperand &Base = RootDef->getOperand(1);
|
|
|
|
return {{
|
|
|
|
[=](MachineInstrBuilder &MIB) { MIB.add(Base); },
|
|
|
|
[=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
|
|
|
|
}};
|
|
|
|
}
|
|
|
|
return None;
|
|
|
|
}
|
|
|
|
|
2020-05-30 03:35:36 +08:00
|
|
|
InstructionSelector::ComplexRendererFns
|
2020-10-02 05:15:57 +08:00
|
|
|
AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
|
|
|
|
unsigned Size,
|
|
|
|
MachineRegisterInfo &MRI) const {
|
2020-05-30 03:35:36 +08:00
|
|
|
if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
|
|
|
|
return None;
|
|
|
|
MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());
|
|
|
|
if (Adrp.getOpcode() != AArch64::ADRP)
|
|
|
|
return None;
|
|
|
|
|
|
|
|
// TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
|
2020-10-02 05:15:57 +08:00
|
|
|
// TODO: Need to check GV's offset % size if doing offset folding into globals.
|
2020-05-30 03:35:36 +08:00
|
|
|
assert(Adrp.getOperand(1).getOffset() == 0 && "Unexpected offset in global");
|
|
|
|
auto GV = Adrp.getOperand(1).getGlobal();
|
|
|
|
if (GV->isThreadLocal())
|
|
|
|
return None;
|
|
|
|
|
|
|
|
auto &MF = *RootDef.getParent()->getParent();
|
2020-05-19 11:38:13 +08:00
|
|
|
if (GV->getPointerAlignment(MF.getDataLayout()) < Size)
|
2020-05-30 03:35:36 +08:00
|
|
|
return None;
|
|
|
|
|
|
|
|
unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget());
|
|
|
|
MachineIRBuilder MIRBuilder(RootDef);
|
|
|
|
Register AdrpReg = Adrp.getOperand(0).getReg();
|
|
|
|
return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },
|
|
|
|
[=](MachineInstrBuilder &MIB) {
|
|
|
|
MIB.addGlobalAddress(GV, /* Offset */ 0,
|
|
|
|
OpFlags | AArch64II::MO_PAGEOFF |
|
|
|
|
AArch64II::MO_NC);
|
|
|
|
}}};
|
|
|
|
}
|
|
|
|
|
2017-10-16 11:36:29 +08:00
|
|
|
/// Select a "register plus scaled unsigned 12-bit immediate" address. The
|
|
|
|
/// "Size" argument is the size in bytes of the memory reference, which
|
|
|
|
/// determines the scale.
|
2017-10-21 04:55:29 +08:00
|
|
|
InstructionSelector::ComplexRendererFns
|
2017-10-16 11:36:29 +08:00
|
|
|
AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
|
|
|
|
unsigned Size) const {
|
2020-05-30 03:35:36 +08:00
|
|
|
MachineFunction &MF = *Root.getParent()->getParent()->getParent();
|
|
|
|
MachineRegisterInfo &MRI = MF.getRegInfo();
|
2017-10-16 11:36:29 +08:00
|
|
|
|
|
|
|
if (!Root.isReg())
|
|
|
|
return None;
|
|
|
|
|
|
|
|
MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
|
|
|
|
if (!RootDef)
|
|
|
|
return None;
|
|
|
|
|
|
|
|
if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
|
|
|
|
return {{
|
|
|
|
[=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
|
|
|
|
[=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
|
|
|
|
}};
|
|
|
|
}
|
|
|
|
|
2020-05-30 03:35:36 +08:00
|
|
|
CodeModel::Model CM = MF.getTarget().getCodeModel();
|
|
|
|
// Check if we can fold in the ADD of small code model ADRP + ADD address.
|
|
|
|
if (CM == CodeModel::Small) {
|
|
|
|
auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI);
|
|
|
|
if (OpFns)
|
|
|
|
return OpFns;
|
|
|
|
}
|
|
|
|
|
2017-10-16 11:36:29 +08:00
|
|
|
if (isBaseWithConstantOffset(Root, MRI)) {
|
|
|
|
MachineOperand &LHS = RootDef->getOperand(1);
|
|
|
|
MachineOperand &RHS = RootDef->getOperand(2);
|
|
|
|
MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
|
|
|
|
MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
|
|
|
|
if (LHSDef && RHSDef) {
|
|
|
|
int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
|
|
|
|
unsigned Scale = Log2_32(Size);
|
|
|
|
if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
|
|
|
|
if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
|
2017-10-16 13:39:30 +08:00
|
|
|
return {{
|
|
|
|
[=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
|
|
|
|
[=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
|
|
|
|
}};
|
|
|
|
|
2017-10-16 11:36:29 +08:00
|
|
|
return {{
|
|
|
|
[=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
|
|
|
|
[=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
|
|
|
|
}};
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Before falling back to our general case, check if the unscaled
|
|
|
|
// instructions can handle this. If so, that's preferable.
|
|
|
|
if (selectAddrModeUnscaled(Root, Size).hasValue())
|
|
|
|
return None;
|
|
|
|
|
|
|
|
return {{
|
|
|
|
[=](MachineInstrBuilder &MIB) { MIB.add(Root); },
|
|
|
|
[=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
|
|
|
|
}};
|
|
|
|
}
|
|
|
|
|
2019-08-21 06:18:06 +08:00
|
|
|
/// Given a shift instruction, return the correct shift type for that
|
|
|
|
/// instruction.
|
|
|
|
static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) {
|
|
|
|
// TODO: Handle AArch64_AM::ROR
|
|
|
|
switch (MI.getOpcode()) {
|
|
|
|
default:
|
|
|
|
return AArch64_AM::InvalidShiftExtend;
|
|
|
|
case TargetOpcode::G_SHL:
|
|
|
|
return AArch64_AM::LSL;
|
|
|
|
case TargetOpcode::G_LSHR:
|
|
|
|
return AArch64_AM::LSR;
|
|
|
|
case TargetOpcode::G_ASHR:
|
|
|
|
return AArch64_AM::ASR;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Select a "shifted register" operand. If the value is not shifted, set the
|
|
|
|
/// shift operand to a default value of "lsl 0".
|
|
|
|
///
|
|
|
|
/// TODO: Allow shifted register to be rotated in logical instructions.
|
|
|
|
InstructionSelector::ComplexRendererFns
|
|
|
|
AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root) const {
|
|
|
|
if (!Root.isReg())
|
|
|
|
return None;
|
|
|
|
MachineRegisterInfo &MRI =
|
|
|
|
Root.getParent()->getParent()->getParent()->getRegInfo();
|
|
|
|
|
|
|
|
// Check if the operand is defined by an instruction which corresponds to
|
|
|
|
// a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
|
|
|
|
//
|
|
|
|
// TODO: Handle AArch64_AM::ROR for logical instructions.
|
|
|
|
MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
|
|
|
|
if (!ShiftInst)
|
|
|
|
return None;
|
|
|
|
AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst);
|
|
|
|
if (ShType == AArch64_AM::InvalidShiftExtend)
|
|
|
|
return None;
|
|
|
|
if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI))
|
|
|
|
return None;
|
|
|
|
|
|
|
|
// Need an immediate on the RHS.
|
|
|
|
MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
|
|
|
|
auto Immed = getImmedFromMO(ShiftRHS);
|
|
|
|
if (!Immed)
|
|
|
|
return None;
|
|
|
|
|
|
|
|
// We have something that we can fold. Fold in the shift's LHS and RHS into
|
|
|
|
// the instruction.
|
|
|
|
MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
|
|
|
|
Register ShiftReg = ShiftLHS.getReg();
|
|
|
|
|
|
|
|
unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
|
|
|
|
unsigned Val = *Immed & (NumBits - 1);
|
|
|
|
unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
|
|
|
|
|
|
|
|
return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
|
|
|
|
[=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
|
|
|
|
}
|
|
|
|
|
2020-01-09 02:57:44 +08:00
|
|
|
AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
|
|
|
|
MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
|
2019-08-30 05:53:58 +08:00
|
|
|
unsigned Opc = MI.getOpcode();
|
|
|
|
|
|
|
|
// Handle explicit extend instructions first.
|
|
|
|
if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
|
2020-06-17 13:11:41 +08:00
|
|
|
unsigned Size;
|
|
|
|
if (Opc == TargetOpcode::G_SEXT)
|
|
|
|
Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
|
|
|
|
else
|
|
|
|
Size = MI.getOperand(2).getImm();
|
2019-08-30 05:53:58 +08:00
|
|
|
assert(Size != 64 && "Extend from 64 bits?");
|
|
|
|
switch (Size) {
|
|
|
|
case 8:
|
|
|
|
return AArch64_AM::SXTB;
|
|
|
|
case 16:
|
|
|
|
return AArch64_AM::SXTH;
|
|
|
|
case 32:
|
|
|
|
return AArch64_AM::SXTW;
|
|
|
|
default:
|
|
|
|
return AArch64_AM::InvalidShiftExtend;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
|
|
|
|
unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
|
|
|
|
assert(Size != 64 && "Extend from 64 bits?");
|
|
|
|
switch (Size) {
|
|
|
|
case 8:
|
|
|
|
return AArch64_AM::UXTB;
|
|
|
|
case 16:
|
|
|
|
return AArch64_AM::UXTH;
|
|
|
|
case 32:
|
|
|
|
return AArch64_AM::UXTW;
|
|
|
|
default:
|
|
|
|
return AArch64_AM::InvalidShiftExtend;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Don't have an explicit extend. Try to handle a G_AND with a constant mask
|
|
|
|
// on the RHS.
|
|
|
|
if (Opc != TargetOpcode::G_AND)
|
|
|
|
return AArch64_AM::InvalidShiftExtend;
|
|
|
|
|
|
|
|
Optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
|
|
|
|
if (!MaybeAndMask)
|
|
|
|
return AArch64_AM::InvalidShiftExtend;
|
|
|
|
uint64_t AndMask = *MaybeAndMask;
|
|
|
|
switch (AndMask) {
|
|
|
|
default:
|
|
|
|
return AArch64_AM::InvalidShiftExtend;
|
|
|
|
case 0xFF:
|
2020-01-09 02:57:44 +08:00
|
|
|
return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
|
2019-08-30 05:53:58 +08:00
|
|
|
case 0xFFFF:
|
2020-01-09 02:57:44 +08:00
|
|
|
return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
|
2019-08-30 05:53:58 +08:00
|
|
|
case 0xFFFFFFFF:
|
|
|
|
return AArch64_AM::UXTW;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-12-05 07:51:44 +08:00
|
|
|
Register AArch64InstructionSelector::moveScalarRegClass(
|
|
|
|
Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const {
|
2019-08-30 05:53:58 +08:00
|
|
|
MachineRegisterInfo &MRI = *MIB.getMRI();
|
2020-12-05 07:51:44 +08:00
|
|
|
auto Ty = MRI.getType(Reg);
|
|
|
|
assert(!Ty.isVector() && "Expected scalars only!");
|
|
|
|
if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC))
|
|
|
|
return Reg;
|
2019-08-30 05:53:58 +08:00
|
|
|
|
2020-12-05 07:51:44 +08:00
|
|
|
// Create a copy and immediately select it.
|
|
|
|
// FIXME: We should have an emitCopy function?
|
|
|
|
auto Copy = MIB.buildCopy({&RC}, {Reg});
|
2019-08-30 05:53:58 +08:00
|
|
|
selectCopy(*Copy, TII, MRI, TRI, RBI);
|
|
|
|
return Copy.getReg(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Select an "extended register" operand. This operand folds in an extend
|
|
|
|
/// followed by an optional left shift.
|
|
|
|
InstructionSelector::ComplexRendererFns
|
|
|
|
AArch64InstructionSelector::selectArithExtendedRegister(
|
|
|
|
MachineOperand &Root) const {
|
|
|
|
if (!Root.isReg())
|
|
|
|
return None;
|
|
|
|
MachineRegisterInfo &MRI =
|
|
|
|
Root.getParent()->getParent()->getParent()->getRegInfo();
|
|
|
|
|
|
|
|
uint64_t ShiftVal = 0;
|
|
|
|
Register ExtReg;
|
|
|
|
AArch64_AM::ShiftExtendType Ext;
|
|
|
|
MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);
|
|
|
|
if (!RootDef)
|
|
|
|
return None;
|
|
|
|
|
|
|
|
if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI))
|
|
|
|
return None;
|
|
|
|
|
|
|
|
// Check if we can fold a shift and an extend.
|
|
|
|
if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
|
|
|
|
// Look for a constant on the RHS of the shift.
|
|
|
|
MachineOperand &RHS = RootDef->getOperand(2);
|
|
|
|
Optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);
|
|
|
|
if (!MaybeShiftVal)
|
|
|
|
return None;
|
|
|
|
ShiftVal = *MaybeShiftVal;
|
|
|
|
if (ShiftVal > 4)
|
|
|
|
return None;
|
|
|
|
// Look for a valid extend instruction on the LHS of the shift.
|
|
|
|
MachineOperand &LHS = RootDef->getOperand(1);
|
|
|
|
MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);
|
|
|
|
if (!ExtDef)
|
|
|
|
return None;
|
|
|
|
Ext = getExtendTypeForInst(*ExtDef, MRI);
|
|
|
|
if (Ext == AArch64_AM::InvalidShiftExtend)
|
|
|
|
return None;
|
|
|
|
ExtReg = ExtDef->getOperand(1).getReg();
|
|
|
|
} else {
|
|
|
|
// Didn't get a shift. Try just folding an extend.
|
|
|
|
Ext = getExtendTypeForInst(*RootDef, MRI);
|
|
|
|
if (Ext == AArch64_AM::InvalidShiftExtend)
|
|
|
|
return None;
|
|
|
|
ExtReg = RootDef->getOperand(1).getReg();
|
|
|
|
|
|
|
|
// If we have a 32 bit instruction which zeroes out the high half of a
|
|
|
|
// register, we get an implicit zero extend for free. Check if we have one.
|
|
|
|
// FIXME: We actually emit the extend right now even though we don't have
|
|
|
|
// to.
|
|
|
|
if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {
|
|
|
|
MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);
|
|
|
|
if (ExtInst && isDef32(*ExtInst))
|
|
|
|
return None;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// We require a GPR32 here. Narrow the ExtReg if needed using a subregister
|
|
|
|
// copy.
|
|
|
|
MachineIRBuilder MIB(*RootDef);
|
2020-12-05 07:51:44 +08:00
|
|
|
ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB);
|
2019-08-30 05:53:58 +08:00
|
|
|
|
|
|
|
return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
|
|
|
|
[=](MachineInstrBuilder &MIB) {
|
|
|
|
MIB.addImm(getArithExtendImm(Ext, ShiftVal));
|
|
|
|
}}};
|
|
|
|
}
|
|
|
|
|
2018-01-17 02:44:05 +08:00
|
|
|
void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
|
TableGen/GlobalISel: Add way for SDNodeXForm to work on timm
The current implementation assumes there is an instruction associated
with the transform, but this is not the case for
timm/TargetConstant/immarg values. These transforms should directly
operate on a specific MachineOperand in the source
instruction. TableGen would assert if you attempted to define an
equivalent GISDNodeXFormEquiv using timm when it failed to find the
instruction matcher.
Specially recognize SDNodeXForms on timm, and pass the operand index
to the render function.
Ideally this would be a separate render function type that looks like
void renderFoo(MachineInstrBuilder, const MachineOperand&), but this
proved to be somewhat mechanically painful. Add an optional operand
index which will only be passed if the transform should only look at
the one source operand.
Theoretically it would also be possible to only ever pass the
MachineOperand, and the existing renderers would check the parent. I
think that would be somewhat ugly for the standard usage which may
want to inspect other operands, and I also think MachineOperand should
eventually not carry a pointer to the parent instruction.
Use it in one sample pattern. This isn't a great example, since the
transform exists to satisfy DAG type constraints. This could also be
avoided by just changing the MachineInstr's arbitrary choice of
operand type from i16 to i32. Other patterns have nontrivial uses, but
this serves as the simplest example.
One flaw this still has is if you try to use an SDNodeXForm defined
for imm, but the source pattern uses timm, you still see the "Failed
to lookup instruction" assert. However, there is now a way to avoid
it.
2020-01-09 01:53:15 +08:00
|
|
|
const MachineInstr &MI,
|
|
|
|
int OpIdx) const {
|
2018-01-17 02:44:05 +08:00
|
|
|
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
|
TableGen/GlobalISel: Add way for SDNodeXForm to work on timm
The current implementation assumes there is an instruction associated
with the transform, but this is not the case for
timm/TargetConstant/immarg values. These transforms should directly
operate on a specific MachineOperand in the source
instruction. TableGen would assert if you attempted to define an
equivalent GISDNodeXFormEquiv using timm when it failed to find the
instruction matcher.
Specially recognize SDNodeXForms on timm, and pass the operand index
to the render function.
Ideally this would be a separate render function type that looks like
void renderFoo(MachineInstrBuilder, const MachineOperand&), but this
proved to be somewhat mechanically painful. Add an optional operand
index which will only be passed if the transform should only look at
the one source operand.
Theoretically it would also be possible to only ever pass the
MachineOperand, and the existing renderers would check the parent. I
think that would be somewhat ugly for the standard usage which may
want to inspect other operands, and I also think MachineOperand should
eventually not carry a pointer to the parent instruction.
Use it in one sample pattern. This isn't a great example, since the
transform exists to satisfy DAG type constraints. This could also be
avoided by just changing the MachineInstr's arbitrary choice of
operand type from i16 to i32. Other patterns have nontrivial uses, but
this serves as the simplest example.
One flaw this still has is if you try to use an SDNodeXForm defined
for imm, but the source pattern uses timm, you still see the "Failed
to lookup instruction" assert. However, there is now a way to avoid
it.
2020-01-09 01:53:15 +08:00
|
|
|
assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
|
|
|
|
"Expected G_CONSTANT");
|
2020-11-03 22:50:17 +08:00
|
|
|
Optional<int64_t> CstVal =
|
|
|
|
getConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI);
|
2018-01-17 02:44:05 +08:00
|
|
|
assert(CstVal && "Expected constant value");
|
|
|
|
MIB.addImm(CstVal.getValue());
|
|
|
|
}
|
|
|
|
|
2020-10-02 05:15:57 +08:00
|
|
|
void AArch64InstructionSelector::renderLogicalImm32(
|
|
|
|
MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
|
TableGen/GlobalISel: Add way for SDNodeXForm to work on timm
The current implementation assumes there is an instruction associated
with the transform, but this is not the case for
timm/TargetConstant/immarg values. These transforms should directly
operate on a specific MachineOperand in the source
instruction. TableGen would assert if you attempted to define an
equivalent GISDNodeXFormEquiv using timm when it failed to find the
instruction matcher.
Specially recognize SDNodeXForms on timm, and pass the operand index
to the render function.
Ideally this would be a separate render function type that looks like
void renderFoo(MachineInstrBuilder, const MachineOperand&), but this
proved to be somewhat mechanically painful. Add an optional operand
index which will only be passed if the transform should only look at
the one source operand.
Theoretically it would also be possible to only ever pass the
MachineOperand, and the existing renderers would check the parent. I
think that would be somewhat ugly for the standard usage which may
want to inspect other operands, and I also think MachineOperand should
eventually not carry a pointer to the parent instruction.
Use it in one sample pattern. This isn't a great example, since the
transform exists to satisfy DAG type constraints. This could also be
avoided by just changing the MachineInstr's arbitrary choice of
operand type from i16 to i32. Other patterns have nontrivial uses, but
this serves as the simplest example.
One flaw this still has is if you try to use an SDNodeXForm defined
for imm, but the source pattern uses timm, you still see the "Failed
to lookup instruction" assert. However, there is now a way to avoid
it.
2020-01-09 01:53:15 +08:00
|
|
|
assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
|
|
|
|
"Expected G_CONSTANT");
|
2019-08-21 06:31:25 +08:00
|
|
|
uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
|
|
|
|
uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32);
|
|
|
|
MIB.addImm(Enc);
|
|
|
|
}
|
|
|
|
|
2020-10-02 05:15:57 +08:00
|
|
|
void AArch64InstructionSelector::renderLogicalImm64(
|
|
|
|
MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
|
TableGen/GlobalISel: Add way for SDNodeXForm to work on timm
The current implementation assumes there is an instruction associated
with the transform, but this is not the case for
timm/TargetConstant/immarg values. These transforms should directly
operate on a specific MachineOperand in the source
instruction. TableGen would assert if you attempted to define an
equivalent GISDNodeXFormEquiv using timm when it failed to find the
instruction matcher.
Specially recognize SDNodeXForms on timm, and pass the operand index
to the render function.
Ideally this would be a separate render function type that looks like
void renderFoo(MachineInstrBuilder, const MachineOperand&), but this
proved to be somewhat mechanically painful. Add an optional operand
index which will only be passed if the transform should only look at
the one source operand.
Theoretically it would also be possible to only ever pass the
MachineOperand, and the existing renderers would check the parent. I
think that would be somewhat ugly for the standard usage which may
want to inspect other operands, and I also think MachineOperand should
eventually not carry a pointer to the parent instruction.
Use it in one sample pattern. This isn't a great example, since the
transform exists to satisfy DAG type constraints. This could also be
avoided by just changing the MachineInstr's arbitrary choice of
operand type from i16 to i32. Other patterns have nontrivial uses, but
this serves as the simplest example.
One flaw this still has is if you try to use an SDNodeXForm defined
for imm, but the source pattern uses timm, you still see the "Failed
to lookup instruction" assert. However, there is now a way to avoid
it.
2020-01-09 01:53:15 +08:00
|
|
|
assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
|
|
|
|
"Expected G_CONSTANT");
|
2019-08-21 06:31:25 +08:00
|
|
|
uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
|
|
|
|
uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64);
|
|
|
|
MIB.addImm(Enc);
|
|
|
|
}
|
|
|
|
|
2019-08-30 00:16:38 +08:00
|
|
|
bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
|
|
|
|
const MachineInstr &MI, unsigned NumBytes) const {
|
|
|
|
if (!MI.mayLoadOrStore())
|
|
|
|
return false;
|
|
|
|
assert(MI.hasOneMemOperand() &&
|
|
|
|
"Expected load/store to have only one mem op!");
|
|
|
|
return (*MI.memoperands_begin())->getSize() == NumBytes;
|
|
|
|
}
|
|
|
|
|
2019-08-30 05:53:58 +08:00
|
|
|
bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
|
|
|
|
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
|
|
|
|
if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Only return true if we know the operation will zero-out the high half of
|
|
|
|
// the 64-bit register. Truncates can be subregister copies, which don't
|
|
|
|
// zero out the high bits. Copies and other copy-like instructions can be
|
|
|
|
// fed by truncates, or could be lowered as subregister copies.
|
|
|
|
switch (MI.getOpcode()) {
|
|
|
|
default:
|
|
|
|
return true;
|
|
|
|
case TargetOpcode::COPY:
|
|
|
|
case TargetOpcode::G_BITCAST:
|
|
|
|
case TargetOpcode::G_TRUNC:
|
|
|
|
case TargetOpcode::G_PHI:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-10-02 05:15:57 +08:00
|
|
|
|
[AArch64][GlobalISel] Fixup <32b heterogeneous regbanks of G_PHIs just before selection.
Since all types <32b on gpr end up being assigned gpr32 regclasses, we can end
up with PHIs here which try to select between a gpr32 and an fpr16. Ideally RBS
shouldn't be selecting heterogenous regbanks for operands if possible, but we
still need to be able to deal with it here.
To fix this, if we have a gpr-bank operand < 32b in size and at least one other
operand is on the fpr bank, then we add cross-bank copies to homogenize the
operand banks. For simplicity the bank that we choose to settle on is whatever
bank the def operand has. For example:
%endbb:
%dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
=>
%bb2:
...
%in2_copy:gpr(s16) = COPY %in2:fpr(s16)
...
%endbb:
%dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
Differential Revision: https://reviews.llvm.org/D75086
2020-02-25 06:27:32 +08:00
|
|
|
// Perform fixups on the given PHI instruction's operands to force them all
|
|
|
|
// to be the same as the destination regbank.
|
|
|
|
static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI,
|
|
|
|
const AArch64RegisterBankInfo &RBI) {
|
|
|
|
assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");
|
|
|
|
Register DstReg = MI.getOperand(0).getReg();
|
|
|
|
const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg);
|
|
|
|
assert(DstRB && "Expected PHI dst to have regbank assigned");
|
|
|
|
MachineIRBuilder MIB(MI);
|
|
|
|
|
|
|
|
// Go through each operand and ensure it has the same regbank.
|
|
|
|
for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
|
|
|
|
MachineOperand &MO = MI.getOperand(OpIdx);
|
|
|
|
if (!MO.isReg())
|
|
|
|
continue;
|
|
|
|
Register OpReg = MO.getReg();
|
|
|
|
const RegisterBank *RB = MRI.getRegBankOrNull(OpReg);
|
|
|
|
if (RB != DstRB) {
|
|
|
|
// Insert a cross-bank copy.
|
|
|
|
auto *OpDef = MRI.getVRegDef(OpReg);
|
|
|
|
const LLT &Ty = MRI.getType(OpReg);
|
|
|
|
MIB.setInsertPt(*OpDef->getParent(), std::next(OpDef->getIterator()));
|
|
|
|
auto Copy = MIB.buildCopy(Ty, OpReg);
|
|
|
|
MRI.setRegBank(Copy.getReg(0), *DstRB);
|
|
|
|
MO.setReg(Copy.getReg(0));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
|
|
|
|
// We're looking for PHIs, build a list so we don't invalidate iterators.
|
|
|
|
MachineRegisterInfo &MRI = MF.getRegInfo();
|
|
|
|
SmallVector<MachineInstr *, 32> Phis;
|
|
|
|
for (auto &BB : MF) {
|
|
|
|
for (auto &MI : BB) {
|
|
|
|
if (MI.getOpcode() == TargetOpcode::G_PHI)
|
|
|
|
Phis.emplace_back(&MI);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (auto *MI : Phis) {
|
|
|
|
// We need to do some work here if the operand types are < 16 bit and they
|
|
|
|
// are split across fpr/gpr banks. Since all types <32b on gpr
|
|
|
|
// end up being assigned gpr32 regclasses, we can end up with PHIs here
|
|
|
|
// which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
|
|
|
|
// be selecting heterogenous regbanks for operands if possible, but we
|
|
|
|
// still need to be able to deal with it here.
|
|
|
|
//
|
|
|
|
// To fix this, if we have a gpr-bank operand < 32b in size and at least
|
|
|
|
// one other operand is on the fpr bank, then we add cross-bank copies
|
|
|
|
// to homogenize the operand banks. For simplicity the bank that we choose
|
|
|
|
// to settle on is whatever bank the def operand has. For example:
|
|
|
|
//
|
|
|
|
// %endbb:
|
|
|
|
// %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
|
|
|
|
// =>
|
|
|
|
// %bb2:
|
|
|
|
// ...
|
|
|
|
// %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
|
|
|
|
// ...
|
|
|
|
// %endbb:
|
|
|
|
// %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
|
|
|
|
bool HasGPROp = false, HasFPROp = false;
|
|
|
|
for (unsigned OpIdx = 1; OpIdx < MI->getNumOperands(); ++OpIdx) {
|
|
|
|
const auto &MO = MI->getOperand(OpIdx);
|
|
|
|
if (!MO.isReg())
|
|
|
|
continue;
|
|
|
|
const LLT &Ty = MRI.getType(MO.getReg());
|
|
|
|
if (!Ty.isValid() || !Ty.isScalar())
|
|
|
|
break;
|
|
|
|
if (Ty.getSizeInBits() >= 32)
|
|
|
|
break;
|
|
|
|
const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
|
|
|
|
// If for some reason we don't have a regbank yet. Don't try anything.
|
|
|
|
if (!RB)
|
|
|
|
break;
|
|
|
|
|
|
|
|
if (RB->getID() == AArch64::GPRRegBankID)
|
|
|
|
HasGPROp = true;
|
|
|
|
else
|
|
|
|
HasFPROp = true;
|
|
|
|
}
|
|
|
|
// We have heterogenous regbanks, need to fixup.
|
|
|
|
if (HasGPROp && HasFPROp)
|
|
|
|
fixupPHIOpBanks(*MI, MRI, RBI);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-04-06 17:49:34 +08:00
|
|
|
namespace llvm {
|
|
|
|
InstructionSelector *
|
|
|
|
createAArch64InstructionSelector(const AArch64TargetMachine &TM,
|
|
|
|
AArch64Subtarget &Subtarget,
|
|
|
|
AArch64RegisterBankInfo &RBI) {
|
|
|
|
return new AArch64InstructionSelector(TM, Subtarget, RBI);
|
|
|
|
}
|
2020-10-02 05:15:57 +08:00
|
|
|
}
|