2016-07-27 22:31:55 +08:00
|
|
|
//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
|
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2016-07-27 22:31:55 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
/// \file
|
|
|
|
/// This file implements the targeting of the InstructionSelector class for
|
|
|
|
/// AArch64.
|
|
|
|
/// \todo This should be generated by TableGen.
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "AArch64InstrInfo.h"
|
2017-02-09 01:57:27 +08:00
|
|
|
#include "AArch64MachineFunctionInfo.h"
|
2016-07-27 22:31:55 +08:00
|
|
|
#include "AArch64RegisterBankInfo.h"
|
|
|
|
#include "AArch64RegisterInfo.h"
|
|
|
|
#include "AArch64Subtarget.h"
|
2016-10-11 05:50:00 +08:00
|
|
|
#include "AArch64TargetMachine.h"
|
2016-11-08 08:45:29 +08:00
|
|
|
#include "MCTargetDesc/AArch64AddressingModes.h"
|
2017-04-06 17:49:34 +08:00
|
|
|
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
|
2017-10-27 07:39:54 +08:00
|
|
|
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
|
2018-07-31 08:09:02 +08:00
|
|
|
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
|
2017-04-20 04:48:50 +08:00
|
|
|
#include "llvm/CodeGen/GlobalISel/Utils.h"
|
2016-07-27 22:31:55 +08:00
|
|
|
#include "llvm/CodeGen/MachineBasicBlock.h"
|
|
|
|
#include "llvm/CodeGen/MachineFunction.h"
|
|
|
|
#include "llvm/CodeGen/MachineInstr.h"
|
|
|
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
2017-04-06 17:49:34 +08:00
|
|
|
#include "llvm/CodeGen/MachineOperand.h"
|
2016-07-27 22:31:55 +08:00
|
|
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
|
|
|
#include "llvm/IR/Type.h"
|
|
|
|
#include "llvm/Support/Debug.h"
|
|
|
|
#include "llvm/Support/raw_ostream.h"
|
|
|
|
|
|
|
|
#define DEBUG_TYPE "aarch64-isel"
|
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
|
2017-04-06 17:49:34 +08:00
|
|
|
namespace {
|
|
|
|
|
[globalisel][tablegen] Import SelectionDAG's rule predicates and support the equivalent in GIRule.
Summary:
The SelectionDAG importer now imports rules with Predicate's attached via
Requires, PredicateControl, etc. These predicates are implemented as
bitset's to allow multiple predicates to be tested together. However,
unlike the MC layer subtarget features, each target only pays for it's own
predicates (e.g. AArch64 doesn't have 192 feature bits just because X86
needs a lot).
Both AArch64 and X86 derive at least one predicate from the MachineFunction
or Function so they must re-initialize AvailableFeatures before each
function. They also declare locals in <Target>InstructionSelector so that
computeAvailableFeatures() can use the code from SelectionDAG without
modification.
Reviewers: rovka, qcolombet, aditya_nandakumar, t.p.northover, ab
Reviewed By: rovka
Subscribers: aemerson, rengolin, dberris, kristof.beyls, llvm-commits, igorb
Differential Revision: https://reviews.llvm.org/D31418
llvm-svn: 300993
2017-04-21 23:59:56 +08:00
|
|
|
#define GET_GLOBALISEL_PREDICATE_BITSET
|
|
|
|
#include "AArch64GenGlobalISel.inc"
|
|
|
|
#undef GET_GLOBALISEL_PREDICATE_BITSET
|
|
|
|
|
2017-04-06 17:49:34 +08:00
|
|
|
class AArch64InstructionSelector : public InstructionSelector {
|
|
|
|
public:
|
|
|
|
AArch64InstructionSelector(const AArch64TargetMachine &TM,
|
|
|
|
const AArch64Subtarget &STI,
|
|
|
|
const AArch64RegisterBankInfo &RBI);
|
|
|
|
|
[globalisel][tablegen] Generate rule coverage and use it to identify untested rules
Summary:
This patch adds a LLVM_ENABLE_GISEL_COV which, like LLVM_ENABLE_DAGISEL_COV,
causes TableGen to instrument the generated table to collect rule coverage
information. However, LLVM_ENABLE_GISEL_COV goes a bit further than
LLVM_ENABLE_DAGISEL_COV. The information is written to files
(${CMAKE_BINARY_DIR}/gisel-coverage-* by default). These files can then be
concatenated into ${LLVM_GISEL_COV_PREFIX}-all after which TableGen will
read this information and use it to emit warnings about untested rules.
This technique could also be used by SelectionDAG and can be further
extended to detect hot rules and give them priority over colder rules.
Usage:
* Enable LLVM_ENABLE_GISEL_COV in CMake
* Build the compiler and run some tests
* cat gisel-coverage-[0-9]* > gisel-coverage-all
* Delete lib/Target/*/*GenGlobalISel.inc*
* Build the compiler
Known issues:
* ${LLVM_GISEL_COV_PREFIX}-all must be generated as a manual
step due to a lack of a portable 'cat' command. It should be the
concatenation of all ${LLVM_GISEL_COV_PREFIX}-[0-9]* files.
* There's no mechanism to discard coverage information when the ruleset
changes
Depends on D39742
Reviewers: ab, qcolombet, t.p.northover, aditya_nandakumar, rovka
Reviewed By: rovka
Subscribers: vsk, arsenm, nhaehnle, mgorny, kristof.beyls, javed.absar, igorb, llvm-commits
Differential Revision: https://reviews.llvm.org/D39747
llvm-svn: 318356
2017-11-16 08:46:35 +08:00
|
|
|
bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const override;
|
2017-10-27 07:39:54 +08:00
|
|
|
static const char *getName() { return DEBUG_TYPE; }
|
2017-04-06 17:49:34 +08:00
|
|
|
|
|
|
|
private:
|
|
|
|
/// tblgen-erated 'select' implementation, used as the initial selector for
|
|
|
|
/// the patterns that don't require complex C++.
|
[globalisel][tablegen] Generate rule coverage and use it to identify untested rules
Summary:
This patch adds a LLVM_ENABLE_GISEL_COV which, like LLVM_ENABLE_DAGISEL_COV,
causes TableGen to instrument the generated table to collect rule coverage
information. However, LLVM_ENABLE_GISEL_COV goes a bit further than
LLVM_ENABLE_DAGISEL_COV. The information is written to files
(${CMAKE_BINARY_DIR}/gisel-coverage-* by default). These files can then be
concatenated into ${LLVM_GISEL_COV_PREFIX}-all after which TableGen will
read this information and use it to emit warnings about untested rules.
This technique could also be used by SelectionDAG and can be further
extended to detect hot rules and give them priority over colder rules.
Usage:
* Enable LLVM_ENABLE_GISEL_COV in CMake
* Build the compiler and run some tests
* cat gisel-coverage-[0-9]* > gisel-coverage-all
* Delete lib/Target/*/*GenGlobalISel.inc*
* Build the compiler
Known issues:
* ${LLVM_GISEL_COV_PREFIX}-all must be generated as a manual
step due to a lack of a portable 'cat' command. It should be the
concatenation of all ${LLVM_GISEL_COV_PREFIX}-[0-9]* files.
* There's no mechanism to discard coverage information when the ruleset
changes
Depends on D39742
Reviewers: ab, qcolombet, t.p.northover, aditya_nandakumar, rovka
Reviewed By: rovka
Subscribers: vsk, arsenm, nhaehnle, mgorny, kristof.beyls, javed.absar, igorb, llvm-commits
Differential Revision: https://reviews.llvm.org/D39747
llvm-svn: 318356
2017-11-16 08:46:35 +08:00
|
|
|
bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
|
2017-04-06 17:49:34 +08:00
|
|
|
|
|
|
|
bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
|
|
|
|
MachineRegisterInfo &MRI) const;
|
|
|
|
bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
|
|
|
|
MachineRegisterInfo &MRI) const;
|
|
|
|
|
|
|
|
bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
|
|
|
|
MachineRegisterInfo &MRI) const;
|
|
|
|
|
2018-12-11 02:44:58 +08:00
|
|
|
// Helper to generate an equivalent of scalar_to_vector into a new register,
|
|
|
|
// returned via 'Dst'.
|
|
|
|
bool emitScalarToVector(unsigned &Dst, const LLT DstTy,
|
|
|
|
const TargetRegisterClass *DstRC, unsigned Scalar,
|
|
|
|
MachineBasicBlock &MBB,
|
|
|
|
MachineBasicBlock::iterator MBBI,
|
|
|
|
MachineRegisterInfo &MRI) const;
|
|
|
|
bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
|
2018-12-20 09:11:04 +08:00
|
|
|
bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
|
2019-01-25 06:00:41 +08:00
|
|
|
bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
|
2018-12-11 02:44:58 +08:00
|
|
|
|
2017-10-21 04:55:29 +08:00
|
|
|
ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
|
2017-04-06 17:49:34 +08:00
|
|
|
|
2017-10-21 04:55:29 +08:00
|
|
|
ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
|
|
|
|
unsigned Size) const;
|
2017-10-16 11:36:29 +08:00
|
|
|
|
2017-10-21 04:55:29 +08:00
|
|
|
ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
|
2017-10-16 11:36:29 +08:00
|
|
|
return selectAddrModeUnscaled(Root, 1);
|
|
|
|
}
|
2017-10-21 04:55:29 +08:00
|
|
|
ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
|
2017-10-16 11:36:29 +08:00
|
|
|
return selectAddrModeUnscaled(Root, 2);
|
|
|
|
}
|
2017-10-21 04:55:29 +08:00
|
|
|
ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
|
2017-10-16 11:36:29 +08:00
|
|
|
return selectAddrModeUnscaled(Root, 4);
|
|
|
|
}
|
2017-10-21 04:55:29 +08:00
|
|
|
ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
|
2017-10-16 11:36:29 +08:00
|
|
|
return selectAddrModeUnscaled(Root, 8);
|
|
|
|
}
|
2017-10-21 04:55:29 +08:00
|
|
|
ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
|
2017-10-16 11:36:29 +08:00
|
|
|
return selectAddrModeUnscaled(Root, 16);
|
|
|
|
}
|
|
|
|
|
2017-10-21 04:55:29 +08:00
|
|
|
ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
|
|
|
|
unsigned Size) const;
|
2017-10-16 11:36:29 +08:00
|
|
|
template <int Width>
|
2017-10-21 04:55:29 +08:00
|
|
|
ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
|
2017-10-16 11:36:29 +08:00
|
|
|
return selectAddrModeIndexed(Root, Width / 8);
|
|
|
|
}
|
|
|
|
|
2018-01-17 02:44:05 +08:00
|
|
|
void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const;
|
|
|
|
|
2018-07-31 08:09:02 +08:00
|
|
|
// Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
|
|
|
|
void materializeLargeCMVal(MachineInstr &I, const Value *V,
|
|
|
|
unsigned char OpFlags) const;
|
|
|
|
|
2017-04-06 17:49:34 +08:00
|
|
|
const AArch64TargetMachine &TM;
|
|
|
|
const AArch64Subtarget &STI;
|
|
|
|
const AArch64InstrInfo &TII;
|
|
|
|
const AArch64RegisterInfo &TRI;
|
|
|
|
const AArch64RegisterBankInfo &RBI;
|
[globalisel][tablegen] Import SelectionDAG's rule predicates and support the equivalent in GIRule.
Summary:
The SelectionDAG importer now imports rules with Predicate's attached via
Requires, PredicateControl, etc. These predicates are implemented as
bitset's to allow multiple predicates to be tested together. However,
unlike the MC layer subtarget features, each target only pays for it's own
predicates (e.g. AArch64 doesn't have 192 feature bits just because X86
needs a lot).
Both AArch64 and X86 derive at least one predicate from the MachineFunction
or Function so they must re-initialize AvailableFeatures before each
function. They also declare locals in <Target>InstructionSelector so that
computeAvailableFeatures() can use the code from SelectionDAG without
modification.
Reviewers: rovka, qcolombet, aditya_nandakumar, t.p.northover, ab
Reviewed By: rovka
Subscribers: aemerson, rengolin, dberris, kristof.beyls, llvm-commits, igorb
Differential Revision: https://reviews.llvm.org/D31418
llvm-svn: 300993
2017-04-21 23:59:56 +08:00
|
|
|
|
2017-04-30 01:30:09 +08:00
|
|
|
#define GET_GLOBALISEL_PREDICATES_DECL
|
|
|
|
#include "AArch64GenGlobalISel.inc"
|
|
|
|
#undef GET_GLOBALISEL_PREDICATES_DECL
|
2017-04-06 17:49:34 +08:00
|
|
|
|
|
|
|
// We declare the temporaries used by selectImpl() in the class to minimize the
|
|
|
|
// cost of constructing placeholder values.
|
|
|
|
#define GET_GLOBALISEL_TEMPORARIES_DECL
|
|
|
|
#include "AArch64GenGlobalISel.inc"
|
|
|
|
#undef GET_GLOBALISEL_TEMPORARIES_DECL
|
|
|
|
};
|
|
|
|
|
|
|
|
} // end anonymous namespace
|
|
|
|
|
2017-03-15 05:32:08 +08:00
|
|
|
#define GET_GLOBALISEL_IMPL
|
2016-12-22 07:26:20 +08:00
|
|
|
#include "AArch64GenGlobalISel.inc"
|
2017-03-15 05:32:08 +08:00
|
|
|
#undef GET_GLOBALISEL_IMPL
|
2016-12-22 07:26:20 +08:00
|
|
|
|
2016-07-27 22:31:55 +08:00
|
|
|
AArch64InstructionSelector::AArch64InstructionSelector(
|
2016-10-11 05:50:00 +08:00
|
|
|
const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
|
|
|
|
const AArch64RegisterBankInfo &RBI)
|
2017-03-15 05:32:08 +08:00
|
|
|
: InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
|
2017-04-30 01:30:09 +08:00
|
|
|
TRI(*STI.getRegisterInfo()), RBI(RBI),
|
|
|
|
#define GET_GLOBALISEL_PREDICATES_INIT
|
|
|
|
#include "AArch64GenGlobalISel.inc"
|
|
|
|
#undef GET_GLOBALISEL_PREDICATES_INIT
|
2017-03-15 05:32:08 +08:00
|
|
|
#define GET_GLOBALISEL_TEMPORARIES_INIT
|
|
|
|
#include "AArch64GenGlobalISel.inc"
|
|
|
|
#undef GET_GLOBALISEL_TEMPORARIES_INIT
|
|
|
|
{
|
|
|
|
}
|
2016-07-27 22:31:55 +08:00
|
|
|
|
2016-10-13 06:49:15 +08:00
|
|
|
// FIXME: This should be target-independent, inferred from the types declared
|
|
|
|
// for each class in the bank.
|
|
|
|
static const TargetRegisterClass *
|
|
|
|
getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
|
2018-02-03 02:03:30 +08:00
|
|
|
const RegisterBankInfo &RBI,
|
|
|
|
bool GetAllRegSet = false) {
|
2016-10-13 06:49:15 +08:00
|
|
|
if (RB.getID() == AArch64::GPRRegBankID) {
|
|
|
|
if (Ty.getSizeInBits() <= 32)
|
2018-02-03 02:03:30 +08:00
|
|
|
return GetAllRegSet ? &AArch64::GPR32allRegClass
|
|
|
|
: &AArch64::GPR32RegClass;
|
2016-10-13 06:49:15 +08:00
|
|
|
if (Ty.getSizeInBits() == 64)
|
2018-02-03 02:03:30 +08:00
|
|
|
return GetAllRegSet ? &AArch64::GPR64allRegClass
|
|
|
|
: &AArch64::GPR64RegClass;
|
2016-10-13 06:49:15 +08:00
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (RB.getID() == AArch64::FPRRegBankID) {
|
2018-02-03 02:03:30 +08:00
|
|
|
if (Ty.getSizeInBits() <= 16)
|
|
|
|
return &AArch64::FPR16RegClass;
|
2016-10-13 06:49:15 +08:00
|
|
|
if (Ty.getSizeInBits() == 32)
|
|
|
|
return &AArch64::FPR32RegClass;
|
|
|
|
if (Ty.getSizeInBits() == 64)
|
|
|
|
return &AArch64::FPR64RegClass;
|
|
|
|
if (Ty.getSizeInBits() == 128)
|
|
|
|
return &AArch64::FPR128RegClass;
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
2019-01-25 06:00:41 +08:00
|
|
|
/// Given a register bank, and size in bits, return the smallest register class
|
|
|
|
/// that can represent that combination.
|
2019-02-11 23:16:21 +08:00
|
|
|
static const TargetRegisterClass *
|
|
|
|
getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
|
|
|
|
bool GetAllRegSet = false) {
|
2019-01-25 06:00:41 +08:00
|
|
|
unsigned RegBankID = RB.getID();
|
|
|
|
|
|
|
|
if (RegBankID == AArch64::GPRRegBankID) {
|
|
|
|
if (SizeInBits <= 32)
|
|
|
|
return GetAllRegSet ? &AArch64::GPR32allRegClass
|
|
|
|
: &AArch64::GPR32RegClass;
|
|
|
|
if (SizeInBits == 64)
|
|
|
|
return GetAllRegSet ? &AArch64::GPR64allRegClass
|
|
|
|
: &AArch64::GPR64RegClass;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (RegBankID == AArch64::FPRRegBankID) {
|
|
|
|
switch (SizeInBits) {
|
|
|
|
default:
|
|
|
|
return nullptr;
|
|
|
|
case 8:
|
|
|
|
return &AArch64::FPR8RegClass;
|
|
|
|
case 16:
|
|
|
|
return &AArch64::FPR16RegClass;
|
|
|
|
case 32:
|
|
|
|
return &AArch64::FPR32RegClass;
|
|
|
|
case 64:
|
|
|
|
return &AArch64::FPR64RegClass;
|
|
|
|
case 128:
|
|
|
|
return &AArch64::FPR128RegClass;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Returns the correct subregister to use for a given register class.
|
|
|
|
static bool getSubRegForClass(const TargetRegisterClass *RC,
|
|
|
|
const TargetRegisterInfo &TRI, unsigned &SubReg) {
|
|
|
|
switch (TRI.getRegSizeInBits(*RC)) {
|
|
|
|
case 8:
|
|
|
|
SubReg = AArch64::bsub;
|
|
|
|
break;
|
|
|
|
case 16:
|
|
|
|
SubReg = AArch64::hsub;
|
|
|
|
break;
|
|
|
|
case 32:
|
|
|
|
if (RC == &AArch64::GPR32RegClass)
|
|
|
|
SubReg = AArch64::sub_32;
|
|
|
|
else
|
|
|
|
SubReg = AArch64::ssub;
|
|
|
|
break;
|
|
|
|
case 64:
|
|
|
|
SubReg = AArch64::dsub;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
LLVM_DEBUG(
|
|
|
|
dbgs() << "Couldn't find appropriate subregister for register class.");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2016-08-16 22:37:40 +08:00
|
|
|
/// Check whether \p I is a currently unsupported binary operation:
|
|
|
|
/// - it has an unsized type
|
|
|
|
/// - an operand is not a vreg
|
|
|
|
/// - all operands are not in the same bank
|
|
|
|
/// These are checks that should someday live in the verifier, but right now,
|
|
|
|
/// these are mostly limitations of the aarch64 selector.
|
|
|
|
static bool unsupportedBinOp(const MachineInstr &I,
|
|
|
|
const AArch64RegisterBankInfo &RBI,
|
|
|
|
const MachineRegisterInfo &MRI,
|
|
|
|
const AArch64RegisterInfo &TRI) {
|
2016-09-09 19:46:34 +08:00
|
|
|
LLT Ty = MRI.getType(I.getOperand(0).getReg());
|
2016-09-15 18:09:59 +08:00
|
|
|
if (!Ty.isValid()) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
|
2016-08-16 22:37:40 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
const RegisterBank *PrevOpBank = nullptr;
|
|
|
|
for (auto &MO : I.operands()) {
|
|
|
|
// FIXME: Support non-register operands.
|
|
|
|
if (!MO.isReg()) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
|
2016-08-16 22:37:40 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// FIXME: Can generic operations have physical registers operands? If
|
|
|
|
// so, this will need to be taught about that, and we'll need to get the
|
|
|
|
// bank out of the minimal class for the register.
|
|
|
|
// Either way, this needs to be documented (and possibly verified).
|
|
|
|
if (!TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
|
2016-08-16 22:37:40 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
|
|
|
|
if (!OpBank) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
|
2016-08-16 22:37:40 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (PrevOpBank && OpBank != PrevOpBank) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
|
2016-08-16 22:37:40 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
PrevOpBank = OpBank;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2016-07-27 22:31:55 +08:00
|
|
|
/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
|
[AArch64][GlobalISel] Legalize narrow scalar ops again.
Since r279760, we've been marking as legal operations on narrow integer
types that have wider legal equivalents (for instance, G_ADD s8).
Compared to legalizing these operations, this reduced the amount of
extends/truncates required, but was always a weird legalization decision
made at selection time.
So far, we haven't been able to formalize it in a way that permits the
selector generated from SelectionDAG patterns to be sufficient.
Using a wide instruction (say, s64), when a narrower instruction exists
(s32) would introduce register class incompatibilities (when one narrow
generic instruction is selected to the wider variant, but another is
selected to the narrower variant).
It's also impractical to limit which narrow operations are matched for
which instruction, as restricting "narrow selection" to ranges of types
clashes with potentially incompatible instruction predicates.
Concerns were also raised regarding MIPS64's sign-extended register
assumptions, as well as wrapping behavior.
See discussions in https://reviews.llvm.org/D26878.
Instead, legalize the operations.
Should we ever revert to selecting these narrow operations, we should
try to represent this more accurately: for instance, by separating
a "concrete" type on operations, and an "underlying" type on vregs, we
could move the "this narrow-looking op is really legal" decision to the
legalizer, and let the selector use the "underlying" vreg type only,
which would be guaranteed to map to a register class.
In any case, we eventually should mitigate:
- the performance impact by selecting no-op extract/truncates to COPYs
(which we currently do), and the COPYs to register reuses (which we
don't do yet).
- the compile-time impact by optimizing away extract/truncate sequences
in the legalizer.
llvm-svn: 292827
2017-01-24 05:10:05 +08:00
|
|
|
/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
|
2016-07-27 22:31:55 +08:00
|
|
|
/// and of size \p OpSize.
|
|
|
|
/// \returns \p GenericOpc if the combination is unsupported.
|
|
|
|
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
|
|
|
|
unsigned OpSize) {
|
|
|
|
switch (RegBankID) {
|
|
|
|
case AArch64::GPRRegBankID:
|
2017-01-25 10:41:38 +08:00
|
|
|
if (OpSize == 32) {
|
2016-07-27 22:31:55 +08:00
|
|
|
switch (GenericOpc) {
|
2016-08-16 22:02:47 +08:00
|
|
|
case TargetOpcode::G_SHL:
|
|
|
|
return AArch64::LSLVWr;
|
|
|
|
case TargetOpcode::G_LSHR:
|
|
|
|
return AArch64::LSRVWr;
|
|
|
|
case TargetOpcode::G_ASHR:
|
|
|
|
return AArch64::ASRVWr;
|
2016-07-27 22:31:55 +08:00
|
|
|
default:
|
|
|
|
return GenericOpc;
|
|
|
|
}
|
2016-10-19 04:03:48 +08:00
|
|
|
} else if (OpSize == 64) {
|
2016-07-27 22:31:55 +08:00
|
|
|
switch (GenericOpc) {
|
2016-10-11 05:49:49 +08:00
|
|
|
case TargetOpcode::G_GEP:
|
2016-07-27 22:31:55 +08:00
|
|
|
return AArch64::ADDXrr;
|
2016-08-16 22:02:47 +08:00
|
|
|
case TargetOpcode::G_SHL:
|
|
|
|
return AArch64::LSLVXr;
|
|
|
|
case TargetOpcode::G_LSHR:
|
|
|
|
return AArch64::LSRVXr;
|
|
|
|
case TargetOpcode::G_ASHR:
|
|
|
|
return AArch64::ASRVXr;
|
2016-07-27 22:31:55 +08:00
|
|
|
default:
|
|
|
|
return GenericOpc;
|
|
|
|
}
|
|
|
|
}
|
2017-07-09 03:28:24 +08:00
|
|
|
break;
|
2016-08-19 00:05:11 +08:00
|
|
|
case AArch64::FPRRegBankID:
|
|
|
|
switch (OpSize) {
|
|
|
|
case 32:
|
|
|
|
switch (GenericOpc) {
|
|
|
|
case TargetOpcode::G_FADD:
|
|
|
|
return AArch64::FADDSrr;
|
|
|
|
case TargetOpcode::G_FSUB:
|
|
|
|
return AArch64::FSUBSrr;
|
|
|
|
case TargetOpcode::G_FMUL:
|
|
|
|
return AArch64::FMULSrr;
|
|
|
|
case TargetOpcode::G_FDIV:
|
|
|
|
return AArch64::FDIVSrr;
|
|
|
|
default:
|
|
|
|
return GenericOpc;
|
|
|
|
}
|
|
|
|
case 64:
|
|
|
|
switch (GenericOpc) {
|
|
|
|
case TargetOpcode::G_FADD:
|
|
|
|
return AArch64::FADDDrr;
|
|
|
|
case TargetOpcode::G_FSUB:
|
|
|
|
return AArch64::FSUBDrr;
|
|
|
|
case TargetOpcode::G_FMUL:
|
|
|
|
return AArch64::FMULDrr;
|
|
|
|
case TargetOpcode::G_FDIV:
|
|
|
|
return AArch64::FDIVDrr;
|
2016-10-11 08:21:11 +08:00
|
|
|
case TargetOpcode::G_OR:
|
|
|
|
return AArch64::ORRv8i8;
|
2016-08-19 00:05:11 +08:00
|
|
|
default:
|
|
|
|
return GenericOpc;
|
|
|
|
}
|
|
|
|
}
|
2017-07-09 03:28:24 +08:00
|
|
|
break;
|
|
|
|
}
|
2016-07-27 22:31:55 +08:00
|
|
|
return GenericOpc;
|
|
|
|
}
|
|
|
|
|
2016-07-30 00:56:16 +08:00
|
|
|
/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
|
|
|
|
/// appropriate for the (value) register bank \p RegBankID and of memory access
|
|
|
|
/// size \p OpSize. This returns the variant with the base+unsigned-immediate
|
|
|
|
/// addressing mode (e.g., LDRXui).
|
|
|
|
/// \returns \p GenericOpc if the combination is unsupported.
|
|
|
|
static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
|
|
|
|
unsigned OpSize) {
|
|
|
|
const bool isStore = GenericOpc == TargetOpcode::G_STORE;
|
|
|
|
switch (RegBankID) {
|
|
|
|
case AArch64::GPRRegBankID:
|
|
|
|
switch (OpSize) {
|
2016-10-18 02:36:53 +08:00
|
|
|
case 8:
|
|
|
|
return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
|
|
|
|
case 16:
|
|
|
|
return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
|
2016-07-30 00:56:16 +08:00
|
|
|
case 32:
|
|
|
|
return isStore ? AArch64::STRWui : AArch64::LDRWui;
|
|
|
|
case 64:
|
|
|
|
return isStore ? AArch64::STRXui : AArch64::LDRXui;
|
|
|
|
}
|
2017-07-09 03:28:24 +08:00
|
|
|
break;
|
2016-10-11 08:21:14 +08:00
|
|
|
case AArch64::FPRRegBankID:
|
|
|
|
switch (OpSize) {
|
2016-10-18 02:36:53 +08:00
|
|
|
case 8:
|
|
|
|
return isStore ? AArch64::STRBui : AArch64::LDRBui;
|
|
|
|
case 16:
|
|
|
|
return isStore ? AArch64::STRHui : AArch64::LDRHui;
|
2016-10-11 08:21:14 +08:00
|
|
|
case 32:
|
|
|
|
return isStore ? AArch64::STRSui : AArch64::LDRSui;
|
|
|
|
case 64:
|
|
|
|
return isStore ? AArch64::STRDui : AArch64::LDRDui;
|
|
|
|
}
|
2017-07-09 03:28:24 +08:00
|
|
|
break;
|
|
|
|
}
|
2016-07-30 00:56:16 +08:00
|
|
|
return GenericOpc;
|
|
|
|
}
|
|
|
|
|
2019-01-25 07:39:47 +08:00
|
|
|
#ifndef NDEBUG
|
2019-01-25 06:00:41 +08:00
|
|
|
/// Helper function that verifies that we have a valid copy at the end of
|
|
|
|
/// selectCopy. Verifies that the source and dest have the expected sizes and
|
|
|
|
/// then returns true.
|
|
|
|
static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
|
|
|
|
const MachineRegisterInfo &MRI,
|
|
|
|
const TargetRegisterInfo &TRI,
|
|
|
|
const RegisterBankInfo &RBI) {
|
|
|
|
const unsigned DstReg = I.getOperand(0).getReg();
|
|
|
|
const unsigned SrcReg = I.getOperand(1).getReg();
|
|
|
|
const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
|
2016-10-12 11:57:49 +08:00
|
|
|
const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
|
2019-01-25 06:00:41 +08:00
|
|
|
|
|
|
|
// Make sure the size of the source and dest line up.
|
2016-10-12 11:57:49 +08:00
|
|
|
assert(
|
|
|
|
(DstSize == SrcSize ||
|
|
|
|
// Copies are a mean to setup initial types, the number of
|
|
|
|
// bits may not exactly match.
|
2019-01-25 06:00:41 +08:00
|
|
|
(TargetRegisterInfo::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
|
2016-10-12 11:57:49 +08:00
|
|
|
// Copies are a mean to copy bits around, as long as we are
|
|
|
|
// on the same register class, that's fine. Otherwise, that
|
|
|
|
// means we need some SUBREG_TO_REG or AND & co.
|
|
|
|
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
|
|
|
|
"Copy with different width?!");
|
2019-01-25 06:00:41 +08:00
|
|
|
|
|
|
|
// Check the size of the destination.
|
|
|
|
assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&
|
2016-10-12 11:57:49 +08:00
|
|
|
"GPRs cannot get more than 64-bit width values");
|
2018-02-03 02:03:30 +08:00
|
|
|
|
2019-01-25 06:00:41 +08:00
|
|
|
return true;
|
|
|
|
}
|
2019-01-25 07:39:47 +08:00
|
|
|
#endif
|
2019-01-25 06:00:41 +08:00
|
|
|
|
|
|
|
/// Helper function for selectCopy. Inserts a subregister copy from
|
|
|
|
/// \p *From to \p *To, linking it up to \p I.
|
|
|
|
///
|
|
|
|
/// e.g, given I = "Dst = COPY SrcReg", we'll transform that into
|
|
|
|
///
|
|
|
|
/// CopyReg (From class) = COPY SrcReg
|
|
|
|
/// SubRegCopy (To class) = COPY CopyReg:SubReg
|
|
|
|
/// Dst = COPY SubRegCopy
|
|
|
|
static bool selectSubregisterCopy(MachineInstr &I, const TargetInstrInfo &TII,
|
|
|
|
MachineRegisterInfo &MRI,
|
|
|
|
const RegisterBankInfo &RBI, unsigned SrcReg,
|
|
|
|
const TargetRegisterClass *From,
|
|
|
|
const TargetRegisterClass *To,
|
|
|
|
unsigned SubReg) {
|
|
|
|
unsigned CopyReg = MRI.createVirtualRegister(From);
|
|
|
|
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::COPY), CopyReg)
|
|
|
|
.addUse(SrcReg);
|
|
|
|
unsigned SubRegCopy = MRI.createVirtualRegister(To);
|
|
|
|
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY),
|
|
|
|
SubRegCopy)
|
|
|
|
.addUse(CopyReg, 0, SubReg);
|
|
|
|
MachineOperand &RegOp = I.getOperand(1);
|
|
|
|
RegOp.setReg(SubRegCopy);
|
|
|
|
|
|
|
|
// It's possible that the destination register won't be constrained. Make
|
|
|
|
// sure that happens.
|
|
|
|
if (!TargetRegisterInfo::isPhysicalRegister(I.getOperand(0).getReg()))
|
|
|
|
RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
|
|
|
|
MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
|
|
|
|
const RegisterBankInfo &RBI) {
|
|
|
|
|
|
|
|
unsigned DstReg = I.getOperand(0).getReg();
|
|
|
|
unsigned SrcReg = I.getOperand(1).getReg();
|
|
|
|
const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
|
|
|
|
const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
|
|
|
|
const TargetRegisterClass *DstRC = getMinClassForRegBank(
|
|
|
|
DstRegBank, RBI.getSizeInBits(DstReg, MRI, TRI), true);
|
|
|
|
if (!DstRC) {
|
|
|
|
LLVM_DEBUG(dbgs() << "Unexpected dest size "
|
|
|
|
<< RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
|
2018-02-03 02:03:30 +08:00
|
|
|
return false;
|
2016-10-12 11:57:49 +08:00
|
|
|
}
|
|
|
|
|
2019-01-25 06:00:41 +08:00
|
|
|
// A couple helpers below, for making sure that the copy we produce is valid.
|
|
|
|
|
|
|
|
// Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
|
|
|
|
// to verify that the src and dst are the same size, since that's handled by
|
|
|
|
// the SUBREG_TO_REG.
|
|
|
|
bool KnownValid = false;
|
|
|
|
|
|
|
|
// Returns true, or asserts if something we don't expect happens. Instead of
|
|
|
|
// returning true, we return isValidCopy() to ensure that we verify the
|
|
|
|
// result.
|
2019-01-25 06:51:31 +08:00
|
|
|
auto CheckCopy = [&]() {
|
2019-01-25 06:00:41 +08:00
|
|
|
// If we have a bitcast or something, we can't have physical registers.
|
|
|
|
assert(
|
2019-01-25 19:38:40 +08:00
|
|
|
(I.isCopy() ||
|
|
|
|
(!TargetRegisterInfo::isPhysicalRegister(I.getOperand(0).getReg()) &&
|
|
|
|
!TargetRegisterInfo::isPhysicalRegister(I.getOperand(1).getReg()))) &&
|
|
|
|
"No phys reg on generic operator!");
|
2019-01-25 06:00:41 +08:00
|
|
|
assert(KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI));
|
|
|
|
return true;
|
|
|
|
};
|
|
|
|
|
|
|
|
// Is this a copy? If so, then we may need to insert a subregister copy, or
|
|
|
|
// a SUBREG_TO_REG.
|
|
|
|
if (I.isCopy()) {
|
|
|
|
// Yes. Check if there's anything to fix up.
|
|
|
|
const TargetRegisterClass *SrcRC = getMinClassForRegBank(
|
|
|
|
SrcRegBank, RBI.getSizeInBits(SrcReg, MRI, TRI), true);
|
2018-02-19 01:10:49 +08:00
|
|
|
if (!SrcRC) {
|
2019-01-25 06:00:41 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
|
|
|
|
return false;
|
2018-02-19 01:10:49 +08:00
|
|
|
}
|
2019-01-25 06:00:41 +08:00
|
|
|
|
|
|
|
// Is this a cross-bank copy?
|
|
|
|
if (DstRegBank.getID() != SrcRegBank.getID()) {
|
|
|
|
// If we're doing a cross-bank copy on different-sized registers, we need
|
|
|
|
// to do a bit more work.
|
|
|
|
unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
|
|
|
|
unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
|
|
|
|
|
|
|
|
if (SrcSize > DstSize) {
|
|
|
|
// We're doing a cross-bank copy into a smaller register. We need a
|
|
|
|
// subregister copy. First, get a register class that's on the same bank
|
|
|
|
// as the destination, but the same size as the source.
|
|
|
|
const TargetRegisterClass *SubregRC =
|
|
|
|
getMinClassForRegBank(DstRegBank, SrcSize, true);
|
|
|
|
assert(SubregRC && "Didn't get a register class for subreg?");
|
|
|
|
|
|
|
|
// Get the appropriate subregister for the destination.
|
|
|
|
unsigned SubReg = 0;
|
|
|
|
if (!getSubRegForClass(DstRC, TRI, SubReg)) {
|
|
|
|
LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Now, insert a subregister copy using the new register class.
|
|
|
|
selectSubregisterCopy(I, TII, MRI, RBI, SrcReg, SubregRC, DstRC,
|
|
|
|
SubReg);
|
|
|
|
return CheckCopy();
|
|
|
|
}
|
|
|
|
|
|
|
|
else if (DstRegBank.getID() == AArch64::GPRRegBankID && DstSize == 32 &&
|
|
|
|
SrcSize == 16) {
|
|
|
|
// Special case for FPR16 to GPR32.
|
|
|
|
// FIXME: This can probably be generalized like the above case.
|
|
|
|
unsigned PromoteReg =
|
|
|
|
MRI.createVirtualRegister(&AArch64::FPR32RegClass);
|
|
|
|
BuildMI(*I.getParent(), I, I.getDebugLoc(),
|
|
|
|
TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
|
|
|
|
.addImm(0)
|
|
|
|
.addUse(SrcReg)
|
|
|
|
.addImm(AArch64::hsub);
|
|
|
|
MachineOperand &RegOp = I.getOperand(1);
|
|
|
|
RegOp.setReg(PromoteReg);
|
|
|
|
|
|
|
|
// Promise that the copy is implicitly validated by the SUBREG_TO_REG.
|
|
|
|
KnownValid = true;
|
|
|
|
}
|
2018-02-19 01:10:49 +08:00
|
|
|
}
|
2019-01-25 06:00:41 +08:00
|
|
|
|
|
|
|
// If the destination is a physical register, then there's nothing to
|
|
|
|
// change, so we're done.
|
|
|
|
if (TargetRegisterInfo::isPhysicalRegister(DstReg))
|
|
|
|
return CheckCopy();
|
2018-02-19 01:10:49 +08:00
|
|
|
}
|
|
|
|
|
2019-01-25 06:00:41 +08:00
|
|
|
// No need to constrain SrcReg. It will get constrained when we hit another
|
|
|
|
// of its use or its defs. Copies do not have constraints.
|
|
|
|
if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
|
|
|
|
<< " operand\n");
|
2016-10-12 11:57:49 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
I.setDesc(TII.get(AArch64::COPY));
|
2019-01-25 06:00:41 +08:00
|
|
|
return CheckCopy();
|
2016-10-12 11:57:49 +08:00
|
|
|
}
|
|
|
|
|
2016-10-13 06:49:11 +08:00
|
|
|
static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
|
|
|
|
if (!DstTy.isScalar() || !SrcTy.isScalar())
|
|
|
|
return GenericOpc;
|
|
|
|
|
|
|
|
const unsigned DstSize = DstTy.getSizeInBits();
|
|
|
|
const unsigned SrcSize = SrcTy.getSizeInBits();
|
|
|
|
|
|
|
|
switch (DstSize) {
|
|
|
|
case 32:
|
|
|
|
switch (SrcSize) {
|
|
|
|
case 32:
|
|
|
|
switch (GenericOpc) {
|
|
|
|
case TargetOpcode::G_SITOFP:
|
|
|
|
return AArch64::SCVTFUWSri;
|
|
|
|
case TargetOpcode::G_UITOFP:
|
|
|
|
return AArch64::UCVTFUWSri;
|
|
|
|
case TargetOpcode::G_FPTOSI:
|
|
|
|
return AArch64::FCVTZSUWSr;
|
|
|
|
case TargetOpcode::G_FPTOUI:
|
|
|
|
return AArch64::FCVTZUUWSr;
|
|
|
|
default:
|
|
|
|
return GenericOpc;
|
|
|
|
}
|
|
|
|
case 64:
|
|
|
|
switch (GenericOpc) {
|
|
|
|
case TargetOpcode::G_SITOFP:
|
|
|
|
return AArch64::SCVTFUXSri;
|
|
|
|
case TargetOpcode::G_UITOFP:
|
|
|
|
return AArch64::UCVTFUXSri;
|
|
|
|
case TargetOpcode::G_FPTOSI:
|
|
|
|
return AArch64::FCVTZSUWDr;
|
|
|
|
case TargetOpcode::G_FPTOUI:
|
|
|
|
return AArch64::FCVTZUUWDr;
|
|
|
|
default:
|
|
|
|
return GenericOpc;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
return GenericOpc;
|
|
|
|
}
|
|
|
|
case 64:
|
|
|
|
switch (SrcSize) {
|
|
|
|
case 32:
|
|
|
|
switch (GenericOpc) {
|
|
|
|
case TargetOpcode::G_SITOFP:
|
|
|
|
return AArch64::SCVTFUWDri;
|
|
|
|
case TargetOpcode::G_UITOFP:
|
|
|
|
return AArch64::UCVTFUWDri;
|
|
|
|
case TargetOpcode::G_FPTOSI:
|
|
|
|
return AArch64::FCVTZSUXSr;
|
|
|
|
case TargetOpcode::G_FPTOUI:
|
|
|
|
return AArch64::FCVTZUUXSr;
|
|
|
|
default:
|
|
|
|
return GenericOpc;
|
|
|
|
}
|
|
|
|
case 64:
|
|
|
|
switch (GenericOpc) {
|
|
|
|
case TargetOpcode::G_SITOFP:
|
|
|
|
return AArch64::SCVTFUXDri;
|
|
|
|
case TargetOpcode::G_UITOFP:
|
|
|
|
return AArch64::UCVTFUXDri;
|
|
|
|
case TargetOpcode::G_FPTOSI:
|
|
|
|
return AArch64::FCVTZSUXDr;
|
|
|
|
case TargetOpcode::G_FPTOUI:
|
|
|
|
return AArch64::FCVTZUUXDr;
|
|
|
|
default:
|
|
|
|
return GenericOpc;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
return GenericOpc;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
return GenericOpc;
|
|
|
|
};
|
|
|
|
return GenericOpc;
|
|
|
|
}
|
|
|
|
|
2016-10-13 06:49:04 +08:00
|
|
|
static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
|
|
|
|
switch (P) {
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unknown condition code!");
|
|
|
|
case CmpInst::ICMP_NE:
|
|
|
|
return AArch64CC::NE;
|
|
|
|
case CmpInst::ICMP_EQ:
|
|
|
|
return AArch64CC::EQ;
|
|
|
|
case CmpInst::ICMP_SGT:
|
|
|
|
return AArch64CC::GT;
|
|
|
|
case CmpInst::ICMP_SGE:
|
|
|
|
return AArch64CC::GE;
|
|
|
|
case CmpInst::ICMP_SLT:
|
|
|
|
return AArch64CC::LT;
|
|
|
|
case CmpInst::ICMP_SLE:
|
|
|
|
return AArch64CC::LE;
|
|
|
|
case CmpInst::ICMP_UGT:
|
|
|
|
return AArch64CC::HI;
|
|
|
|
case CmpInst::ICMP_UGE:
|
|
|
|
return AArch64CC::HS;
|
|
|
|
case CmpInst::ICMP_ULT:
|
|
|
|
return AArch64CC::LO;
|
|
|
|
case CmpInst::ICMP_ULE:
|
|
|
|
return AArch64CC::LS;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-10-13 06:49:07 +08:00
|
|
|
static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
|
|
|
|
AArch64CC::CondCode &CondCode,
|
|
|
|
AArch64CC::CondCode &CondCode2) {
|
|
|
|
CondCode2 = AArch64CC::AL;
|
|
|
|
switch (P) {
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unknown FP condition!");
|
|
|
|
case CmpInst::FCMP_OEQ:
|
|
|
|
CondCode = AArch64CC::EQ;
|
|
|
|
break;
|
|
|
|
case CmpInst::FCMP_OGT:
|
|
|
|
CondCode = AArch64CC::GT;
|
|
|
|
break;
|
|
|
|
case CmpInst::FCMP_OGE:
|
|
|
|
CondCode = AArch64CC::GE;
|
|
|
|
break;
|
|
|
|
case CmpInst::FCMP_OLT:
|
|
|
|
CondCode = AArch64CC::MI;
|
|
|
|
break;
|
|
|
|
case CmpInst::FCMP_OLE:
|
|
|
|
CondCode = AArch64CC::LS;
|
|
|
|
break;
|
|
|
|
case CmpInst::FCMP_ONE:
|
|
|
|
CondCode = AArch64CC::MI;
|
|
|
|
CondCode2 = AArch64CC::GT;
|
|
|
|
break;
|
|
|
|
case CmpInst::FCMP_ORD:
|
|
|
|
CondCode = AArch64CC::VC;
|
|
|
|
break;
|
|
|
|
case CmpInst::FCMP_UNO:
|
|
|
|
CondCode = AArch64CC::VS;
|
|
|
|
break;
|
|
|
|
case CmpInst::FCMP_UEQ:
|
|
|
|
CondCode = AArch64CC::EQ;
|
|
|
|
CondCode2 = AArch64CC::VS;
|
|
|
|
break;
|
|
|
|
case CmpInst::FCMP_UGT:
|
|
|
|
CondCode = AArch64CC::HI;
|
|
|
|
break;
|
|
|
|
case CmpInst::FCMP_UGE:
|
|
|
|
CondCode = AArch64CC::PL;
|
|
|
|
break;
|
|
|
|
case CmpInst::FCMP_ULT:
|
|
|
|
CondCode = AArch64CC::LT;
|
|
|
|
break;
|
|
|
|
case CmpInst::FCMP_ULE:
|
|
|
|
CondCode = AArch64CC::LE;
|
|
|
|
break;
|
|
|
|
case CmpInst::FCMP_UNE:
|
|
|
|
CondCode = AArch64CC::NE;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-03-28 00:35:31 +08:00
|
|
|
bool AArch64InstructionSelector::selectCompareBranch(
|
|
|
|
MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
|
|
|
|
|
|
|
|
const unsigned CondReg = I.getOperand(0).getReg();
|
|
|
|
MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
|
|
|
|
MachineInstr *CCMI = MRI.getVRegDef(CondReg);
|
2017-08-01 01:00:16 +08:00
|
|
|
if (CCMI->getOpcode() == TargetOpcode::G_TRUNC)
|
|
|
|
CCMI = MRI.getVRegDef(CCMI->getOperand(1).getReg());
|
2017-03-28 00:35:31 +08:00
|
|
|
if (CCMI->getOpcode() != TargetOpcode::G_ICMP)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
unsigned LHS = CCMI->getOperand(2).getReg();
|
|
|
|
unsigned RHS = CCMI->getOperand(3).getReg();
|
|
|
|
if (!getConstantVRegVal(RHS, MRI))
|
|
|
|
std::swap(RHS, LHS);
|
|
|
|
|
|
|
|
const auto RHSImm = getConstantVRegVal(RHS, MRI);
|
|
|
|
if (!RHSImm || *RHSImm != 0)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
|
|
|
|
if (RB.getID() != AArch64::GPRRegBankID)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
|
|
|
|
if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits();
|
|
|
|
unsigned CBOpc = 0;
|
|
|
|
if (CmpWidth <= 32)
|
|
|
|
CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW);
|
|
|
|
else if (CmpWidth == 64)
|
|
|
|
CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX);
|
|
|
|
else
|
|
|
|
return false;
|
|
|
|
|
2018-01-18 03:31:33 +08:00
|
|
|
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc))
|
|
|
|
.addUse(LHS)
|
|
|
|
.addMBB(DestMBB)
|
|
|
|
.constrainAllUses(TII, TRI, RBI);
|
2017-03-28 00:35:31 +08:00
|
|
|
|
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2017-02-09 01:57:27 +08:00
|
|
|
bool AArch64InstructionSelector::selectVaStartAAPCS(
|
|
|
|
MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool AArch64InstructionSelector::selectVaStartDarwin(
|
|
|
|
MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
|
|
|
|
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
|
|
|
|
unsigned ListReg = I.getOperand(0).getReg();
|
|
|
|
|
|
|
|
unsigned ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
|
|
|
|
|
|
|
|
auto MIB =
|
|
|
|
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
|
|
|
|
.addDef(ArgsAddrReg)
|
|
|
|
.addFrameIndex(FuncInfo->getVarArgsStackIndex())
|
|
|
|
.addImm(0)
|
|
|
|
.addImm(0);
|
|
|
|
|
|
|
|
constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
|
|
|
|
|
|
|
|
MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
|
|
|
|
.addUse(ArgsAddrReg)
|
|
|
|
.addUse(ListReg)
|
|
|
|
.addImm(0)
|
|
|
|
.addMemOperand(*I.memoperands_begin());
|
|
|
|
|
|
|
|
constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
|
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-07-31 08:09:02 +08:00
|
|
|
void AArch64InstructionSelector::materializeLargeCMVal(
|
|
|
|
MachineInstr &I, const Value *V, unsigned char OpFlags) const {
|
|
|
|
MachineBasicBlock &MBB = *I.getParent();
|
|
|
|
MachineFunction &MF = *MBB.getParent();
|
|
|
|
MachineRegisterInfo &MRI = MF.getRegInfo();
|
|
|
|
MachineIRBuilder MIB(I);
|
|
|
|
|
[GISel]: Refactor MachineIRBuilder to allow passing additional parameters to build Instrs
https://reviews.llvm.org/D55294
Previously MachineIRBuilder::buildInstr used to accept variadic
arguments for sources (which were either unsigned or
MachineInstrBuilder). While this worked well in common cases, it doesn't
allow us to build instructions that have multiple destinations.
Additionally passing in other optional parameters in the end (such as
flags) is not possible trivially. Also a trivial call such as
B.buildInstr(Opc, Reg1, Reg2, Reg3)
can be interpreted differently based on the opcode (2defs + 1 src for
unmerge vs 1 def + 2srcs).
This patch refactors the buildInstr to
buildInstr(Opc, ArrayRef<DstOps>, ArrayRef<SrcOps>)
where DstOps and SrcOps are typed unions that know how to add itself to
MachineInstrBuilder.
After this patch, most invocations would look like
B.buildInstr(Opc, {s32, DstReg}, {SrcRegs..., SrcMIBs..});
Now all the other calls (such as buildAdd, buildSub etc) forward to
buildInstr. It also makes it possible to build instructions with
multiple defs.
Additionally in a subsequent patch, we should make it possible to add
flags directly while building instructions.
Additionally, the main buildInstr method is now virtual and other
builders now only have to override buildInstr (for say constant
folding/cseing) is straightforward.
Also attached here (https://reviews.llvm.org/F7675680) is a clang-tidy
patch that should upgrade the API calls if necessary.
llvm-svn: 348815
2018-12-11 08:48:50 +08:00
|
|
|
auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
|
2018-07-31 08:09:02 +08:00
|
|
|
MovZ->addOperand(MF, I.getOperand(1));
|
|
|
|
MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
|
|
|
|
AArch64II::MO_NC);
|
|
|
|
MovZ->addOperand(MF, MachineOperand::CreateImm(0));
|
|
|
|
constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
|
|
|
|
|
|
|
|
auto BuildMovK = [&](unsigned SrcReg, unsigned char Flags, unsigned Offset,
|
|
|
|
unsigned ForceDstReg) {
|
|
|
|
unsigned DstReg = ForceDstReg
|
|
|
|
? ForceDstReg
|
|
|
|
: MRI.createVirtualRegister(&AArch64::GPR64RegClass);
|
|
|
|
auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
|
|
|
|
if (auto *GV = dyn_cast<GlobalValue>(V)) {
|
|
|
|
MovI->addOperand(MF, MachineOperand::CreateGA(
|
|
|
|
GV, MovZ->getOperand(1).getOffset(), Flags));
|
|
|
|
} else {
|
|
|
|
MovI->addOperand(
|
|
|
|
MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
|
|
|
|
MovZ->getOperand(1).getOffset(), Flags));
|
|
|
|
}
|
|
|
|
MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
|
|
|
|
constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
|
|
|
|
return DstReg;
|
|
|
|
};
|
2019-02-06 06:14:40 +08:00
|
|
|
unsigned DstReg = BuildMovK(MovZ.getReg(0),
|
2018-07-31 08:09:02 +08:00
|
|
|
AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
|
|
|
|
DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
|
|
|
|
BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
[globalisel][tablegen] Generate rule coverage and use it to identify untested rules
Summary:
This patch adds a LLVM_ENABLE_GISEL_COV which, like LLVM_ENABLE_DAGISEL_COV,
causes TableGen to instrument the generated table to collect rule coverage
information. However, LLVM_ENABLE_GISEL_COV goes a bit further than
LLVM_ENABLE_DAGISEL_COV. The information is written to files
(${CMAKE_BINARY_DIR}/gisel-coverage-* by default). These files can then be
concatenated into ${LLVM_GISEL_COV_PREFIX}-all after which TableGen will
read this information and use it to emit warnings about untested rules.
This technique could also be used by SelectionDAG and can be further
extended to detect hot rules and give them priority over colder rules.
Usage:
* Enable LLVM_ENABLE_GISEL_COV in CMake
* Build the compiler and run some tests
* cat gisel-coverage-[0-9]* > gisel-coverage-all
* Delete lib/Target/*/*GenGlobalISel.inc*
* Build the compiler
Known issues:
* ${LLVM_GISEL_COV_PREFIX}-all must be generated as a manual
step due to a lack of a portable 'cat' command. It should be the
concatenation of all ${LLVM_GISEL_COV_PREFIX}-[0-9]* files.
* There's no mechanism to discard coverage information when the ruleset
changes
Depends on D39742
Reviewers: ab, qcolombet, t.p.northover, aditya_nandakumar, rovka
Reviewed By: rovka
Subscribers: vsk, arsenm, nhaehnle, mgorny, kristof.beyls, javed.absar, igorb, llvm-commits
Differential Revision: https://reviews.llvm.org/D39747
llvm-svn: 318356
2017-11-16 08:46:35 +08:00
|
|
|
bool AArch64InstructionSelector::select(MachineInstr &I,
|
|
|
|
CodeGenCoverage &CoverageInfo) const {
|
2016-07-27 22:31:55 +08:00
|
|
|
assert(I.getParent() && "Instruction should be in a basic block!");
|
|
|
|
assert(I.getParent()->getParent() && "Instruction should be in a function!");
|
|
|
|
|
|
|
|
MachineBasicBlock &MBB = *I.getParent();
|
|
|
|
MachineFunction &MF = *MBB.getParent();
|
|
|
|
MachineRegisterInfo &MRI = MF.getRegInfo();
|
|
|
|
|
2016-11-01 02:30:59 +08:00
|
|
|
unsigned Opcode = I.getOpcode();
|
2017-08-24 04:45:48 +08:00
|
|
|
// G_PHI requires same handling as PHI
|
|
|
|
if (!isPreISelGenericOpcode(Opcode) || Opcode == TargetOpcode::G_PHI) {
|
2016-11-01 02:30:59 +08:00
|
|
|
// Certain non-generic instructions also need some special handling.
|
|
|
|
|
|
|
|
if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
|
|
|
|
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
2016-11-08 08:34:06 +08:00
|
|
|
|
2017-08-24 04:45:48 +08:00
|
|
|
if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
|
2016-11-08 08:34:06 +08:00
|
|
|
const unsigned DefReg = I.getOperand(0).getReg();
|
|
|
|
const LLT DefTy = MRI.getType(DefReg);
|
|
|
|
|
|
|
|
const TargetRegisterClass *DefRC = nullptr;
|
|
|
|
if (TargetRegisterInfo::isPhysicalRegister(DefReg)) {
|
|
|
|
DefRC = TRI.getRegClass(DefReg);
|
|
|
|
} else {
|
|
|
|
const RegClassOrRegBank &RegClassOrBank =
|
|
|
|
MRI.getRegClassOrRegBank(DefReg);
|
|
|
|
|
|
|
|
DefRC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
|
|
|
|
if (!DefRC) {
|
|
|
|
if (!DefTy.isValid()) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
|
2016-11-08 08:34:06 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
|
|
|
|
DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
|
|
|
|
if (!DefRC) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
|
2016-11-08 08:34:06 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2017-08-24 04:45:48 +08:00
|
|
|
I.setDesc(TII.get(TargetOpcode::PHI));
|
2016-11-08 08:34:06 +08:00
|
|
|
|
|
|
|
return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (I.isCopy())
|
2016-11-01 02:30:59 +08:00
|
|
|
return selectCopy(I, TII, MRI, TRI, RBI);
|
2016-11-08 08:34:06 +08:00
|
|
|
|
|
|
|
return true;
|
2016-11-01 02:30:59 +08:00
|
|
|
}
|
|
|
|
|
2016-07-27 22:31:55 +08:00
|
|
|
|
|
|
|
if (I.getNumOperands() != I.getNumExplicitOperands()) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(
|
|
|
|
dbgs() << "Generic instruction has unexpected implicit operands\n");
|
2016-07-27 22:31:55 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
[globalisel][tablegen] Generate rule coverage and use it to identify untested rules
Summary:
This patch adds a LLVM_ENABLE_GISEL_COV which, like LLVM_ENABLE_DAGISEL_COV,
causes TableGen to instrument the generated table to collect rule coverage
information. However, LLVM_ENABLE_GISEL_COV goes a bit further than
LLVM_ENABLE_DAGISEL_COV. The information is written to files
(${CMAKE_BINARY_DIR}/gisel-coverage-* by default). These files can then be
concatenated into ${LLVM_GISEL_COV_PREFIX}-all after which TableGen will
read this information and use it to emit warnings about untested rules.
This technique could also be used by SelectionDAG and can be further
extended to detect hot rules and give them priority over colder rules.
Usage:
* Enable LLVM_ENABLE_GISEL_COV in CMake
* Build the compiler and run some tests
* cat gisel-coverage-[0-9]* > gisel-coverage-all
* Delete lib/Target/*/*GenGlobalISel.inc*
* Build the compiler
Known issues:
* ${LLVM_GISEL_COV_PREFIX}-all must be generated as a manual
step due to a lack of a portable 'cat' command. It should be the
concatenation of all ${LLVM_GISEL_COV_PREFIX}-[0-9]* files.
* There's no mechanism to discard coverage information when the ruleset
changes
Depends on D39742
Reviewers: ab, qcolombet, t.p.northover, aditya_nandakumar, rovka
Reviewed By: rovka
Subscribers: vsk, arsenm, nhaehnle, mgorny, kristof.beyls, javed.absar, igorb, llvm-commits
Differential Revision: https://reviews.llvm.org/D39747
llvm-svn: 318356
2017-11-16 08:46:35 +08:00
|
|
|
if (selectImpl(I, CoverageInfo))
|
2016-12-22 07:26:20 +08:00
|
|
|
return true;
|
|
|
|
|
2016-09-15 18:09:59 +08:00
|
|
|
LLT Ty =
|
|
|
|
I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
|
2016-07-27 22:31:55 +08:00
|
|
|
|
2016-10-13 06:49:11 +08:00
|
|
|
switch (Opcode) {
|
2016-10-13 06:49:01 +08:00
|
|
|
case TargetOpcode::G_BRCOND: {
|
|
|
|
if (Ty.getSizeInBits() > 32) {
|
|
|
|
// We shouldn't need this on AArch64, but it would be implemented as an
|
|
|
|
// EXTRACT_SUBREG followed by a TBNZW because TBNZX has no encoding if the
|
|
|
|
// bit being tested is < 32.
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "G_BRCOND has type: " << Ty
|
|
|
|
<< ", expected at most 32-bits");
|
2016-10-13 06:49:01 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
const unsigned CondReg = I.getOperand(0).getReg();
|
|
|
|
MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
|
|
|
|
|
Introduce control flow speculation tracking pass for AArch64
The pass implements tracking of control flow miss-speculation into a "taint"
register. That taint register can then be used to mask off registers with
sensitive data when executing under miss-speculation, a.k.a. "transient
execution".
This pass is aimed at mitigating against SpectreV1-style vulnarabilities.
At the moment, it implements the tracking of miss-speculation of control
flow into a taint register, but doesn't implement a mechanism yet to then
use that taint register to mask off vulnerable data in registers (something
for a follow-on improvement). Possible strategies to mask out vulnerable
data that can be implemented on top of this are:
- speculative load hardening to automatically mask of data loaded
in registers.
- using intrinsics to mask of data in registers as indicated by the
programmer (see https://lwn.net/Articles/759423/).
For AArch64, the following implementation choices are made.
Some of these are different than the implementation choices made in
the similar pass implemented in X86SpeculativeLoadHardening.cpp, as
the instruction set characteristics result in different trade-offs.
- The speculation hardening is done after register allocation. With a
relative abundance of registers, one register is reserved (X16) to be
the taint register. X16 is expected to not clash with other register
reservation mechanisms with very high probability because:
. The AArch64 ABI doesn't guarantee X16 to be retained across any call.
. The only way to request X16 to be used as a programmer is through
inline assembly. In the rare case a function explicitly demands to
use X16/W16, this pass falls back to hardening against speculation
by inserting a DSB SYS/ISB barrier pair which will prevent control
flow speculation.
- It is easy to insert mask operations at this late stage as we have
mask operations available that don't set flags.
- The taint variable contains all-ones when no miss-speculation is detected,
and contains all-zeros when miss-speculation is detected. Therefore, when
masking, an AND instruction (which only changes the register to be masked,
no other side effects) can easily be inserted anywhere that's needed.
- The tracking of miss-speculation is done by using a data-flow conditional
select instruction (CSEL) to evaluate the flags that were also used to
make conditional branch direction decisions. Speculation of the CSEL
instruction can be limited with a CSDB instruction - so the combination of
CSEL + a later CSDB gives the guarantee that the flags as used in the CSEL
aren't speculated. When conditional branch direction gets miss-speculated,
the semantics of the inserted CSEL instruction is such that the taint
register will contain all zero bits.
One key requirement for this to work is that the conditional branch is
followed by an execution of the CSEL instruction, where the CSEL
instruction needs to use the same flags status as the conditional branch.
This means that the conditional branches must not be implemented as one
of the AArch64 conditional branches that do not use the flags as input
(CB(N)Z and TB(N)Z). This is implemented by ensuring in the instruction
selectors to not produce these instructions when speculation hardening
is enabled. This pass will assert if it does encounter such an instruction.
- On function call boundaries, the miss-speculation state is transferred from
the taint register X16 to be encoded in the SP register as value 0.
Future extensions/improvements could be:
- Implement this functionality using full speculation barriers, akin to the
x86-slh-lfence option. This may be more useful for the intrinsics-based
approach than for the SLH approach to masking.
Note that this pass already inserts the full speculation barriers if the
function for some niche reason makes use of X16/W16.
- no indirect branch misprediction gets protected/instrumented; but this
could be done for some indirect branches, such as switch jump tables.
Differential Revision: https://reviews.llvm.org/D54896
llvm-svn: 349456
2018-12-18 16:50:02 +08:00
|
|
|
// Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
|
|
|
|
// instructions will not be produced, as they are conditional branch
|
|
|
|
// instructions that do not set flags.
|
|
|
|
bool ProduceNonFlagSettingCondBr =
|
|
|
|
!MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
|
|
|
|
if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MF, MRI))
|
2017-03-28 00:35:31 +08:00
|
|
|
return true;
|
|
|
|
|
Introduce control flow speculation tracking pass for AArch64
The pass implements tracking of control flow miss-speculation into a "taint"
register. That taint register can then be used to mask off registers with
sensitive data when executing under miss-speculation, a.k.a. "transient
execution".
This pass is aimed at mitigating against SpectreV1-style vulnarabilities.
At the moment, it implements the tracking of miss-speculation of control
flow into a taint register, but doesn't implement a mechanism yet to then
use that taint register to mask off vulnerable data in registers (something
for a follow-on improvement). Possible strategies to mask out vulnerable
data that can be implemented on top of this are:
- speculative load hardening to automatically mask of data loaded
in registers.
- using intrinsics to mask of data in registers as indicated by the
programmer (see https://lwn.net/Articles/759423/).
For AArch64, the following implementation choices are made.
Some of these are different than the implementation choices made in
the similar pass implemented in X86SpeculativeLoadHardening.cpp, as
the instruction set characteristics result in different trade-offs.
- The speculation hardening is done after register allocation. With a
relative abundance of registers, one register is reserved (X16) to be
the taint register. X16 is expected to not clash with other register
reservation mechanisms with very high probability because:
. The AArch64 ABI doesn't guarantee X16 to be retained across any call.
. The only way to request X16 to be used as a programmer is through
inline assembly. In the rare case a function explicitly demands to
use X16/W16, this pass falls back to hardening against speculation
by inserting a DSB SYS/ISB barrier pair which will prevent control
flow speculation.
- It is easy to insert mask operations at this late stage as we have
mask operations available that don't set flags.
- The taint variable contains all-ones when no miss-speculation is detected,
and contains all-zeros when miss-speculation is detected. Therefore, when
masking, an AND instruction (which only changes the register to be masked,
no other side effects) can easily be inserted anywhere that's needed.
- The tracking of miss-speculation is done by using a data-flow conditional
select instruction (CSEL) to evaluate the flags that were also used to
make conditional branch direction decisions. Speculation of the CSEL
instruction can be limited with a CSDB instruction - so the combination of
CSEL + a later CSDB gives the guarantee that the flags as used in the CSEL
aren't speculated. When conditional branch direction gets miss-speculated,
the semantics of the inserted CSEL instruction is such that the taint
register will contain all zero bits.
One key requirement for this to work is that the conditional branch is
followed by an execution of the CSEL instruction, where the CSEL
instruction needs to use the same flags status as the conditional branch.
This means that the conditional branches must not be implemented as one
of the AArch64 conditional branches that do not use the flags as input
(CB(N)Z and TB(N)Z). This is implemented by ensuring in the instruction
selectors to not produce these instructions when speculation hardening
is enabled. This pass will assert if it does encounter such an instruction.
- On function call boundaries, the miss-speculation state is transferred from
the taint register X16 to be encoded in the SP register as value 0.
Future extensions/improvements could be:
- Implement this functionality using full speculation barriers, akin to the
x86-slh-lfence option. This may be more useful for the intrinsics-based
approach than for the SLH approach to masking.
Note that this pass already inserts the full speculation barriers if the
function for some niche reason makes use of X16/W16.
- no indirect branch misprediction gets protected/instrumented; but this
could be done for some indirect branches, such as switch jump tables.
Differential Revision: https://reviews.llvm.org/D54896
llvm-svn: 349456
2018-12-18 16:50:02 +08:00
|
|
|
if (ProduceNonFlagSettingCondBr) {
|
|
|
|
auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW))
|
|
|
|
.addUse(CondReg)
|
|
|
|
.addImm(/*bit offset=*/0)
|
|
|
|
.addMBB(DestMBB);
|
2016-10-13 06:49:01 +08:00
|
|
|
|
Introduce control flow speculation tracking pass for AArch64
The pass implements tracking of control flow miss-speculation into a "taint"
register. That taint register can then be used to mask off registers with
sensitive data when executing under miss-speculation, a.k.a. "transient
execution".
This pass is aimed at mitigating against SpectreV1-style vulnarabilities.
At the moment, it implements the tracking of miss-speculation of control
flow into a taint register, but doesn't implement a mechanism yet to then
use that taint register to mask off vulnerable data in registers (something
for a follow-on improvement). Possible strategies to mask out vulnerable
data that can be implemented on top of this are:
- speculative load hardening to automatically mask of data loaded
in registers.
- using intrinsics to mask of data in registers as indicated by the
programmer (see https://lwn.net/Articles/759423/).
For AArch64, the following implementation choices are made.
Some of these are different than the implementation choices made in
the similar pass implemented in X86SpeculativeLoadHardening.cpp, as
the instruction set characteristics result in different trade-offs.
- The speculation hardening is done after register allocation. With a
relative abundance of registers, one register is reserved (X16) to be
the taint register. X16 is expected to not clash with other register
reservation mechanisms with very high probability because:
. The AArch64 ABI doesn't guarantee X16 to be retained across any call.
. The only way to request X16 to be used as a programmer is through
inline assembly. In the rare case a function explicitly demands to
use X16/W16, this pass falls back to hardening against speculation
by inserting a DSB SYS/ISB barrier pair which will prevent control
flow speculation.
- It is easy to insert mask operations at this late stage as we have
mask operations available that don't set flags.
- The taint variable contains all-ones when no miss-speculation is detected,
and contains all-zeros when miss-speculation is detected. Therefore, when
masking, an AND instruction (which only changes the register to be masked,
no other side effects) can easily be inserted anywhere that's needed.
- The tracking of miss-speculation is done by using a data-flow conditional
select instruction (CSEL) to evaluate the flags that were also used to
make conditional branch direction decisions. Speculation of the CSEL
instruction can be limited with a CSDB instruction - so the combination of
CSEL + a later CSDB gives the guarantee that the flags as used in the CSEL
aren't speculated. When conditional branch direction gets miss-speculated,
the semantics of the inserted CSEL instruction is such that the taint
register will contain all zero bits.
One key requirement for this to work is that the conditional branch is
followed by an execution of the CSEL instruction, where the CSEL
instruction needs to use the same flags status as the conditional branch.
This means that the conditional branches must not be implemented as one
of the AArch64 conditional branches that do not use the flags as input
(CB(N)Z and TB(N)Z). This is implemented by ensuring in the instruction
selectors to not produce these instructions when speculation hardening
is enabled. This pass will assert if it does encounter such an instruction.
- On function call boundaries, the miss-speculation state is transferred from
the taint register X16 to be encoded in the SP register as value 0.
Future extensions/improvements could be:
- Implement this functionality using full speculation barriers, akin to the
x86-slh-lfence option. This may be more useful for the intrinsics-based
approach than for the SLH approach to masking.
Note that this pass already inserts the full speculation barriers if the
function for some niche reason makes use of X16/W16.
- no indirect branch misprediction gets protected/instrumented; but this
could be done for some indirect branches, such as switch jump tables.
Differential Revision: https://reviews.llvm.org/D54896
llvm-svn: 349456
2018-12-18 16:50:02 +08:00
|
|
|
I.eraseFromParent();
|
|
|
|
return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
|
|
|
|
} else {
|
|
|
|
auto CMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
|
|
|
|
.addDef(AArch64::WZR)
|
|
|
|
.addUse(CondReg)
|
|
|
|
.addImm(1);
|
|
|
|
constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI);
|
|
|
|
auto Bcc =
|
|
|
|
BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc))
|
|
|
|
.addImm(AArch64CC::EQ)
|
|
|
|
.addMBB(DestMBB);
|
|
|
|
|
|
|
|
I.eraseFromParent();
|
|
|
|
return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI);
|
|
|
|
}
|
2016-10-13 06:49:01 +08:00
|
|
|
}
|
|
|
|
|
2017-01-30 17:13:18 +08:00
|
|
|
case TargetOpcode::G_BRINDIRECT: {
|
|
|
|
I.setDesc(TII.get(AArch64::BR));
|
|
|
|
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
|
|
|
}
|
|
|
|
|
2016-10-19 03:47:57 +08:00
|
|
|
case TargetOpcode::G_FCONSTANT:
|
2016-10-11 05:49:42 +08:00
|
|
|
case TargetOpcode::G_CONSTANT: {
|
2016-10-19 03:47:57 +08:00
|
|
|
const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
|
|
|
|
|
|
|
|
const LLT s32 = LLT::scalar(32);
|
|
|
|
const LLT s64 = LLT::scalar(64);
|
|
|
|
const LLT p0 = LLT::pointer(0, 64);
|
|
|
|
|
|
|
|
const unsigned DefReg = I.getOperand(0).getReg();
|
|
|
|
const LLT DefTy = MRI.getType(DefReg);
|
|
|
|
const unsigned DefSize = DefTy.getSizeInBits();
|
|
|
|
const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
|
|
|
|
|
|
|
|
// FIXME: Redundant check, but even less readable when factored out.
|
|
|
|
if (isFP) {
|
|
|
|
if (Ty != s32 && Ty != s64) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
|
|
|
|
<< " constant, expected: " << s32 << " or " << s64
|
|
|
|
<< '\n');
|
2016-10-19 03:47:57 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (RB.getID() != AArch64::FPRRegBankID) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
|
|
|
|
<< " constant on bank: " << RB
|
|
|
|
<< ", expected: FPR\n");
|
2016-10-19 03:47:57 +08:00
|
|
|
return false;
|
|
|
|
}
|
[globalisel][tablegen] Add support for fpimm and import of APInt/APFloat based ImmLeaf.
Summary:
There's only a tablegen testcase for IntImmLeaf and not a CodeGen one
because the relevant rules are rejected for other reasons at the moment.
On AArch64, it's because there's an SDNodeXForm attached to the operand.
On X86, it's because the rule either emits multiple instructions or has
another predicate using PatFrag which cannot easily be supported at the
same time.
Reviewers: ab, t.p.northover, qcolombet, rovka, aditya_nandakumar
Reviewed By: qcolombet
Subscribers: aemerson, javed.absar, igorb, llvm-commits, kristof.beyls
Differential Revision: https://reviews.llvm.org/D36569
llvm-svn: 315761
2017-10-14 05:28:03 +08:00
|
|
|
|
|
|
|
// The case when we have 0.0 is covered by tablegen. Reject it here so we
|
|
|
|
// can be sure tablegen works correctly and isn't rescued by this code.
|
|
|
|
if (I.getOperand(1).getFPImm()->getValueAPF().isExactlyValue(0.0))
|
|
|
|
return false;
|
2016-10-19 03:47:57 +08:00
|
|
|
} else {
|
2017-08-08 18:44:31 +08:00
|
|
|
// s32 and s64 are covered by tablegen.
|
|
|
|
if (Ty != p0) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
|
|
|
|
<< " constant, expected: " << s32 << ", " << s64
|
|
|
|
<< ", or " << p0 << '\n');
|
2016-10-19 03:47:57 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (RB.getID() != AArch64::GPRRegBankID) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
|
|
|
|
<< " constant on bank: " << RB
|
|
|
|
<< ", expected: GPR\n");
|
2016-10-19 03:47:57 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
const unsigned MovOpc =
|
|
|
|
DefSize == 32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
|
|
|
|
|
|
|
|
I.setDesc(TII.get(MovOpc));
|
|
|
|
|
|
|
|
if (isFP) {
|
|
|
|
const TargetRegisterClass &GPRRC =
|
|
|
|
DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
|
|
|
|
const TargetRegisterClass &FPRRC =
|
|
|
|
DefSize == 32 ? AArch64::FPR32RegClass : AArch64::FPR64RegClass;
|
|
|
|
|
|
|
|
const unsigned DefGPRReg = MRI.createVirtualRegister(&GPRRC);
|
|
|
|
MachineOperand &RegOp = I.getOperand(0);
|
|
|
|
RegOp.setReg(DefGPRReg);
|
|
|
|
|
|
|
|
BuildMI(MBB, std::next(I.getIterator()), I.getDebugLoc(),
|
|
|
|
TII.get(AArch64::COPY))
|
|
|
|
.addDef(DefReg)
|
|
|
|
.addUse(DefGPRReg);
|
|
|
|
|
|
|
|
if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
|
2016-10-19 03:47:57 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
MachineOperand &ImmOp = I.getOperand(1);
|
|
|
|
// FIXME: Is going through int64_t always correct?
|
|
|
|
ImmOp.ChangeToImmediate(
|
|
|
|
ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
|
[globalisel] Decouple src pattern operands from dst pattern operands.
Summary:
This isn't testable for AArch64 by itself so this patch also adds
support for constant immediates in the pattern and physical
register uses in the result.
The new IntOperandMatcher matches the constant in patterns such as
'(set $rd:GPR32, (G_XOR $rs:GPR32, -1))'. It's always safe to fold
immediates into an instruction so this is the first rule that will match
across multiple BB's.
The Renderer hierarchy is responsible for adding operands to the result
instruction. Renderers can copy operands (CopyRenderer) or add physical
registers (in particular %wzr and %xzr) to the result instruction
in any order (OperandMatchers now import the operand names from
SelectionDAG to allow renderers to access any operand). This allows us to
emit the result instruction for:
%1 = G_XOR %0, -1 --> %1 = ORNWrr %wzr, %0
%1 = G_XOR -1, %0 --> %1 = ORNWrr %wzr, %0
although the latter is untested since the matcher/importer has not been
taught about commutativity yet.
Added BuildMIAction which can build new instructions and mutate them where
possible. W.r.t the mutation aspect, MatchActions are now told the name of
an instruction they can recycle and BuildMIAction will emit mutation code
when the renderers are appropriate. They are appropriate when all operands
are rendered using CopyRenderer and the indices are the same as the matcher.
This currently assumes that all operands have at least one matcher.
Finally, this change also fixes a crash in
AArch64InstructionSelector::select() caused by an immediate operand
passing isImm() rather than isCImm(). This was uncovered by the other
changes and was detected by existing tests.
Depends on D29711
Reviewers: t.p.northover, ab, qcolombet, rovka, aditya_nandakumar, javed.absar
Reviewed By: rovka
Subscribers: aemerson, dberris, kristof.beyls, llvm-commits
Differential Revision: https://reviews.llvm.org/D29712
llvm-svn: 296131
2017-02-24 23:43:30 +08:00
|
|
|
} else if (I.getOperand(1).isCImm()) {
|
2016-12-06 05:47:07 +08:00
|
|
|
uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
|
|
|
|
I.getOperand(1).ChangeToImmediate(Val);
|
[globalisel] Decouple src pattern operands from dst pattern operands.
Summary:
This isn't testable for AArch64 by itself so this patch also adds
support for constant immediates in the pattern and physical
register uses in the result.
The new IntOperandMatcher matches the constant in patterns such as
'(set $rd:GPR32, (G_XOR $rs:GPR32, -1))'. It's always safe to fold
immediates into an instruction so this is the first rule that will match
across multiple BB's.
The Renderer hierarchy is responsible for adding operands to the result
instruction. Renderers can copy operands (CopyRenderer) or add physical
registers (in particular %wzr and %xzr) to the result instruction
in any order (OperandMatchers now import the operand names from
SelectionDAG to allow renderers to access any operand). This allows us to
emit the result instruction for:
%1 = G_XOR %0, -1 --> %1 = ORNWrr %wzr, %0
%1 = G_XOR -1, %0 --> %1 = ORNWrr %wzr, %0
although the latter is untested since the matcher/importer has not been
taught about commutativity yet.
Added BuildMIAction which can build new instructions and mutate them where
possible. W.r.t the mutation aspect, MatchActions are now told the name of
an instruction they can recycle and BuildMIAction will emit mutation code
when the renderers are appropriate. They are appropriate when all operands
are rendered using CopyRenderer and the indices are the same as the matcher.
This currently assumes that all operands have at least one matcher.
Finally, this change also fixes a crash in
AArch64InstructionSelector::select() caused by an immediate operand
passing isImm() rather than isCImm(). This was uncovered by the other
changes and was detected by existing tests.
Depends on D29711
Reviewers: t.p.northover, ab, qcolombet, rovka, aditya_nandakumar, javed.absar
Reviewed By: rovka
Subscribers: aemerson, dberris, kristof.beyls, llvm-commits
Differential Revision: https://reviews.llvm.org/D29712
llvm-svn: 296131
2017-02-24 23:43:30 +08:00
|
|
|
} else if (I.getOperand(1).isImm()) {
|
|
|
|
uint64_t Val = I.getOperand(1).getImm();
|
|
|
|
I.getOperand(1).ChangeToImmediate(Val);
|
2016-10-19 03:47:57 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
|
|
|
return true;
|
2016-10-11 05:49:42 +08:00
|
|
|
}
|
2017-07-21 06:58:38 +08:00
|
|
|
case TargetOpcode::G_EXTRACT: {
|
|
|
|
LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
|
2018-02-19 01:03:02 +08:00
|
|
|
LLT DstTy = MRI.getType(I.getOperand(0).getReg());
|
2018-02-19 01:28:34 +08:00
|
|
|
(void)DstTy;
|
2018-02-19 01:03:02 +08:00
|
|
|
unsigned SrcSize = SrcTy.getSizeInBits();
|
2017-07-21 06:58:38 +08:00
|
|
|
// Larger extracts are vectors, same-size extracts should be something else
|
|
|
|
// by now (either split up or simplified to a COPY).
|
|
|
|
if (SrcTy.getSizeInBits() > 64 || Ty.getSizeInBits() > 32)
|
|
|
|
return false;
|
|
|
|
|
2018-02-19 01:03:02 +08:00
|
|
|
I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
|
2017-07-21 06:58:38 +08:00
|
|
|
MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
|
|
|
|
Ty.getSizeInBits() - 1);
|
|
|
|
|
2018-02-19 01:03:02 +08:00
|
|
|
if (SrcSize < 64) {
|
|
|
|
assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
|
|
|
|
"unexpected G_EXTRACT types");
|
|
|
|
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
|
|
|
}
|
|
|
|
|
2017-07-21 06:58:38 +08:00
|
|
|
unsigned DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
|
|
|
|
BuildMI(MBB, std::next(I.getIterator()), I.getDebugLoc(),
|
|
|
|
TII.get(AArch64::COPY))
|
|
|
|
.addDef(I.getOperand(0).getReg())
|
|
|
|
.addUse(DstReg, 0, AArch64::sub_32);
|
|
|
|
RBI.constrainGenericRegister(I.getOperand(0).getReg(),
|
|
|
|
AArch64::GPR32RegClass, MRI);
|
|
|
|
I.getOperand(0).setReg(DstReg);
|
|
|
|
|
|
|
|
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
|
|
|
}
|
|
|
|
|
|
|
|
case TargetOpcode::G_INSERT: {
|
|
|
|
LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
|
2018-02-19 01:03:02 +08:00
|
|
|
LLT DstTy = MRI.getType(I.getOperand(0).getReg());
|
|
|
|
unsigned DstSize = DstTy.getSizeInBits();
|
2017-07-21 06:58:38 +08:00
|
|
|
// Larger inserts are vectors, same-size ones should be something else by
|
|
|
|
// now (split up or turned into COPYs).
|
|
|
|
if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
|
|
|
|
return false;
|
|
|
|
|
2018-02-19 01:03:02 +08:00
|
|
|
I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
|
2017-07-21 06:58:38 +08:00
|
|
|
unsigned LSB = I.getOperand(3).getImm();
|
|
|
|
unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
|
2018-02-19 01:03:02 +08:00
|
|
|
I.getOperand(3).setImm((DstSize - LSB) % DstSize);
|
2017-07-21 06:58:38 +08:00
|
|
|
MachineInstrBuilder(MF, I).addImm(Width - 1);
|
|
|
|
|
2018-02-19 01:03:02 +08:00
|
|
|
if (DstSize < 64) {
|
|
|
|
assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
|
|
|
|
"unexpected G_INSERT types");
|
|
|
|
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
|
|
|
}
|
|
|
|
|
2017-07-21 06:58:38 +08:00
|
|
|
unsigned SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
|
|
|
|
BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
|
|
|
|
TII.get(AArch64::SUBREG_TO_REG))
|
|
|
|
.addDef(SrcReg)
|
|
|
|
.addImm(0)
|
|
|
|
.addUse(I.getOperand(2).getReg())
|
|
|
|
.addImm(AArch64::sub_32);
|
|
|
|
RBI.constrainGenericRegister(I.getOperand(2).getReg(),
|
|
|
|
AArch64::GPR32RegClass, MRI);
|
|
|
|
I.getOperand(2).setReg(SrcReg);
|
|
|
|
|
|
|
|
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
|
|
|
}
|
2016-08-16 22:02:42 +08:00
|
|
|
case TargetOpcode::G_FRAME_INDEX: {
|
|
|
|
// allocas and G_FRAME_INDEX are only supported in addrspace(0).
|
2016-09-15 17:20:34 +08:00
|
|
|
if (Ty != LLT::pointer(0, 64)) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
|
|
|
|
<< ", expected: " << LLT::pointer(0, 64) << '\n');
|
2016-08-16 22:02:42 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
I.setDesc(TII.get(AArch64::ADDXri));
|
|
|
|
|
|
|
|
// MOs for a #0 shifted immediate.
|
|
|
|
I.addOperand(MachineOperand::CreateImm(0));
|
|
|
|
I.addOperand(MachineOperand::CreateImm(0));
|
|
|
|
|
|
|
|
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
|
|
|
}
|
2016-10-11 05:50:00 +08:00
|
|
|
|
|
|
|
case TargetOpcode::G_GLOBAL_VALUE: {
|
|
|
|
auto GV = I.getOperand(1).getGlobal();
|
|
|
|
if (GV->isThreadLocal()) {
|
|
|
|
// FIXME: we don't support TLS yet.
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
unsigned char OpFlags = STI.ClassifyGlobalReference(GV, TM);
|
2016-12-14 02:25:38 +08:00
|
|
|
if (OpFlags & AArch64II::MO_GOT) {
|
2016-10-11 05:50:00 +08:00
|
|
|
I.setDesc(TII.get(AArch64::LOADgot));
|
2016-12-14 02:25:38 +08:00
|
|
|
I.getOperand(1).setTargetFlags(OpFlags);
|
2018-01-19 03:21:27 +08:00
|
|
|
} else if (TM.getCodeModel() == CodeModel::Large) {
|
|
|
|
// Materialize the global using movz/movk instructions.
|
2018-07-31 08:09:02 +08:00
|
|
|
materializeLargeCMVal(I, GV, OpFlags);
|
2018-01-19 03:21:27 +08:00
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
2018-08-22 19:31:39 +08:00
|
|
|
} else if (TM.getCodeModel() == CodeModel::Tiny) {
|
|
|
|
I.setDesc(TII.get(AArch64::ADR));
|
|
|
|
I.getOperand(1).setTargetFlags(OpFlags);
|
2016-12-14 02:25:38 +08:00
|
|
|
} else {
|
2016-10-11 05:50:00 +08:00
|
|
|
I.setDesc(TII.get(AArch64::MOVaddr));
|
|
|
|
I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
|
|
|
|
MachineInstrBuilder MIB(MF, I);
|
|
|
|
MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
|
|
|
|
OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
|
|
|
|
}
|
|
|
|
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
|
|
|
}
|
|
|
|
|
2016-07-30 00:56:16 +08:00
|
|
|
case TargetOpcode::G_LOAD:
|
|
|
|
case TargetOpcode::G_STORE: {
|
2016-09-09 19:46:34 +08:00
|
|
|
LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
|
2016-07-30 00:56:16 +08:00
|
|
|
|
2016-09-15 17:20:34 +08:00
|
|
|
if (PtrTy != LLT::pointer(0, 64)) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
|
|
|
|
<< ", expected: " << LLT::pointer(0, 64) << '\n');
|
2016-07-30 00:56:16 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2017-12-05 13:52:07 +08:00
|
|
|
auto &MemOp = **I.memoperands_begin();
|
|
|
|
if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Atomic load/store not supported yet\n");
|
2017-12-05 13:52:07 +08:00
|
|
|
return false;
|
|
|
|
}
|
[globalisel] Update GlobalISel emitter to match new representation of extending loads
Summary:
Previously, a extending load was represented at (G_*EXT (G_LOAD x)).
This had a few drawbacks:
* G_LOAD had to be legal for all sizes you could extend from, even if
registers didn't naturally hold those sizes.
* All sizes you could extend from had to be allocatable just in case the
extend went missing (e.g. by optimization).
* At minimum, G_*EXT and G_TRUNC had to be legal for these sizes. As we
improve optimization of extends and truncates, this legality requirement
would spread without considerable care w.r.t when certain combines were
permitted.
* The SelectionDAG importer required some ugly and fragile pattern
rewriting to translate patterns into this style.
This patch changes the representation to:
* (G_[SZ]EXTLOAD x)
* (G_LOAD x) any-extends when MMO.getSize() * 8 < ResultTy.getSizeInBits()
which resolves these issues by allowing targets to work entirely in their
native register sizes, and by having a more direct translation from
SelectionDAG patterns.
Each extending load can be lowered by the legalizer into separate extends
and loads, however a target that supports s1 will need the any-extending
load to extend to at least s8 since LLVM does not represent memory accesses
smaller than 8 bit. The legalizer can widenScalar G_LOAD into an
any-extending load but sign/zero-extending loads need help from something
else like a combiner pass. A follow-up patch that adds combiner helpers for
for this will follow.
The new representation requires that the MMO correctly reflect the memory
access so this has been corrected in a couple tests. I've also moved the
extending loads to their own tests since they are (mostly) separate opcodes
now. Additionally, the re-write appears to have invalidated two tests from
select-with-no-legality-check.mir since the matcher table no longer contains
loads that result in s1's and they aren't legal in AArch64 anymore.
Depends on D45540
Reviewers: ab, aditya_nandakumar, bogner, rtereshin, volkan, rovka, javed.absar
Reviewed By: rtereshin
Subscribers: javed.absar, llvm-commits, kristof.beyls
Differential Revision: https://reviews.llvm.org/D45541
llvm-svn: 331601
2018-05-06 04:53:24 +08:00
|
|
|
unsigned MemSizeInBits = MemOp.getSize() * 8;
|
2017-12-05 13:52:07 +08:00
|
|
|
|
2016-07-30 00:56:16 +08:00
|
|
|
const unsigned PtrReg = I.getOperand(1).getReg();
|
2017-03-28 02:14:20 +08:00
|
|
|
#ifndef NDEBUG
|
2016-07-30 00:56:16 +08:00
|
|
|
const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
|
2017-03-28 02:14:20 +08:00
|
|
|
// Sanity-check the pointer register.
|
2016-07-30 00:56:16 +08:00
|
|
|
assert(PtrRB.getID() == AArch64::GPRRegBankID &&
|
|
|
|
"Load/Store pointer operand isn't a GPR");
|
2016-09-09 19:46:34 +08:00
|
|
|
assert(MRI.getType(PtrReg).isPointer() &&
|
|
|
|
"Load/Store pointer operand isn't a pointer");
|
2016-07-30 00:56:16 +08:00
|
|
|
#endif
|
|
|
|
|
|
|
|
const unsigned ValReg = I.getOperand(0).getReg();
|
|
|
|
const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
|
|
|
|
|
|
|
|
const unsigned NewOpc =
|
[globalisel] Update GlobalISel emitter to match new representation of extending loads
Summary:
Previously, a extending load was represented at (G_*EXT (G_LOAD x)).
This had a few drawbacks:
* G_LOAD had to be legal for all sizes you could extend from, even if
registers didn't naturally hold those sizes.
* All sizes you could extend from had to be allocatable just in case the
extend went missing (e.g. by optimization).
* At minimum, G_*EXT and G_TRUNC had to be legal for these sizes. As we
improve optimization of extends and truncates, this legality requirement
would spread without considerable care w.r.t when certain combines were
permitted.
* The SelectionDAG importer required some ugly and fragile pattern
rewriting to translate patterns into this style.
This patch changes the representation to:
* (G_[SZ]EXTLOAD x)
* (G_LOAD x) any-extends when MMO.getSize() * 8 < ResultTy.getSizeInBits()
which resolves these issues by allowing targets to work entirely in their
native register sizes, and by having a more direct translation from
SelectionDAG patterns.
Each extending load can be lowered by the legalizer into separate extends
and loads, however a target that supports s1 will need the any-extending
load to extend to at least s8 since LLVM does not represent memory accesses
smaller than 8 bit. The legalizer can widenScalar G_LOAD into an
any-extending load but sign/zero-extending loads need help from something
else like a combiner pass. A follow-up patch that adds combiner helpers for
for this will follow.
The new representation requires that the MMO correctly reflect the memory
access so this has been corrected in a couple tests. I've also moved the
extending loads to their own tests since they are (mostly) separate opcodes
now. Additionally, the re-write appears to have invalidated two tests from
select-with-no-legality-check.mir since the matcher table no longer contains
loads that result in s1's and they aren't legal in AArch64 anymore.
Depends on D45540
Reviewers: ab, aditya_nandakumar, bogner, rtereshin, volkan, rovka, javed.absar
Reviewed By: rtereshin
Subscribers: javed.absar, llvm-commits, kristof.beyls
Differential Revision: https://reviews.llvm.org/D45541
llvm-svn: 331601
2018-05-06 04:53:24 +08:00
|
|
|
selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
|
2016-07-30 00:56:16 +08:00
|
|
|
if (NewOpc == I.getOpcode())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
I.setDesc(TII.get(NewOpc));
|
|
|
|
|
2017-03-28 01:31:52 +08:00
|
|
|
uint64_t Offset = 0;
|
|
|
|
auto *PtrMI = MRI.getVRegDef(PtrReg);
|
|
|
|
|
|
|
|
// Try to fold a GEP into our unsigned immediate addressing mode.
|
|
|
|
if (PtrMI->getOpcode() == TargetOpcode::G_GEP) {
|
|
|
|
if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) {
|
|
|
|
int64_t Imm = *COff;
|
[globalisel] Update GlobalISel emitter to match new representation of extending loads
Summary:
Previously, a extending load was represented at (G_*EXT (G_LOAD x)).
This had a few drawbacks:
* G_LOAD had to be legal for all sizes you could extend from, even if
registers didn't naturally hold those sizes.
* All sizes you could extend from had to be allocatable just in case the
extend went missing (e.g. by optimization).
* At minimum, G_*EXT and G_TRUNC had to be legal for these sizes. As we
improve optimization of extends and truncates, this legality requirement
would spread without considerable care w.r.t when certain combines were
permitted.
* The SelectionDAG importer required some ugly and fragile pattern
rewriting to translate patterns into this style.
This patch changes the representation to:
* (G_[SZ]EXTLOAD x)
* (G_LOAD x) any-extends when MMO.getSize() * 8 < ResultTy.getSizeInBits()
which resolves these issues by allowing targets to work entirely in their
native register sizes, and by having a more direct translation from
SelectionDAG patterns.
Each extending load can be lowered by the legalizer into separate extends
and loads, however a target that supports s1 will need the any-extending
load to extend to at least s8 since LLVM does not represent memory accesses
smaller than 8 bit. The legalizer can widenScalar G_LOAD into an
any-extending load but sign/zero-extending loads need help from something
else like a combiner pass. A follow-up patch that adds combiner helpers for
for this will follow.
The new representation requires that the MMO correctly reflect the memory
access so this has been corrected in a couple tests. I've also moved the
extending loads to their own tests since they are (mostly) separate opcodes
now. Additionally, the re-write appears to have invalidated two tests from
select-with-no-legality-check.mir since the matcher table no longer contains
loads that result in s1's and they aren't legal in AArch64 anymore.
Depends on D45540
Reviewers: ab, aditya_nandakumar, bogner, rtereshin, volkan, rovka, javed.absar
Reviewed By: rtereshin
Subscribers: javed.absar, llvm-commits, kristof.beyls
Differential Revision: https://reviews.llvm.org/D45541
llvm-svn: 331601
2018-05-06 04:53:24 +08:00
|
|
|
const unsigned Size = MemSizeInBits / 8;
|
2017-03-28 01:31:52 +08:00
|
|
|
const unsigned Scale = Log2_32(Size);
|
|
|
|
if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) {
|
|
|
|
unsigned Ptr2Reg = PtrMI->getOperand(1).getReg();
|
|
|
|
I.getOperand(1).setReg(Ptr2Reg);
|
|
|
|
PtrMI = MRI.getVRegDef(Ptr2Reg);
|
|
|
|
Offset = Imm / Size;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-03-28 01:31:56 +08:00
|
|
|
// If we haven't folded anything into our addressing mode yet, try to fold
|
|
|
|
// a frame index into the base+offset.
|
|
|
|
if (!Offset && PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX)
|
|
|
|
I.getOperand(1).ChangeToFrameIndex(PtrMI->getOperand(1).getIndex());
|
|
|
|
|
2017-03-28 01:31:52 +08:00
|
|
|
I.addOperand(MachineOperand::CreateImm(Offset));
|
2017-03-28 01:31:48 +08:00
|
|
|
|
|
|
|
// If we're storing a 0, use WZR/XZR.
|
|
|
|
if (auto CVal = getConstantVRegVal(ValReg, MRI)) {
|
|
|
|
if (*CVal == 0 && Opcode == TargetOpcode::G_STORE) {
|
|
|
|
if (I.getOpcode() == AArch64::STRWui)
|
|
|
|
I.getOperand(0).setReg(AArch64::WZR);
|
|
|
|
else if (I.getOpcode() == AArch64::STRXui)
|
|
|
|
I.getOperand(0).setReg(AArch64::XZR);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-07-30 00:56:16 +08:00
|
|
|
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
2016-08-16 22:37:46 +08:00
|
|
|
}
|
|
|
|
|
2017-02-09 05:22:25 +08:00
|
|
|
case TargetOpcode::G_SMULH:
|
|
|
|
case TargetOpcode::G_UMULH: {
|
|
|
|
// Reject the various things we don't support yet.
|
|
|
|
if (unsupportedBinOp(I, RBI, MRI, TRI))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
const unsigned DefReg = I.getOperand(0).getReg();
|
|
|
|
const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
|
|
|
|
|
|
|
|
if (RB.getID() != AArch64::GPRRegBankID) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
|
2017-02-09 05:22:25 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (Ty != LLT::scalar(64)) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
|
|
|
|
<< ", expected: " << LLT::scalar(64) << '\n');
|
2017-02-09 05:22:25 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
|
|
|
|
: AArch64::UMULHrr;
|
|
|
|
I.setDesc(TII.get(NewOpc));
|
|
|
|
|
|
|
|
// Now that we selected an opcode, we need to constrain the register
|
|
|
|
// operands to use appropriate classes.
|
|
|
|
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
|
|
|
}
|
2016-08-19 00:05:11 +08:00
|
|
|
case TargetOpcode::G_FADD:
|
|
|
|
case TargetOpcode::G_FSUB:
|
|
|
|
case TargetOpcode::G_FMUL:
|
|
|
|
case TargetOpcode::G_FDIV:
|
|
|
|
|
2016-07-27 22:31:55 +08:00
|
|
|
case TargetOpcode::G_OR:
|
2016-08-16 22:02:47 +08:00
|
|
|
case TargetOpcode::G_SHL:
|
|
|
|
case TargetOpcode::G_LSHR:
|
|
|
|
case TargetOpcode::G_ASHR:
|
2016-10-11 05:49:49 +08:00
|
|
|
case TargetOpcode::G_GEP: {
|
2016-08-16 22:37:40 +08:00
|
|
|
// Reject the various things we don't support yet.
|
|
|
|
if (unsupportedBinOp(I, RBI, MRI, TRI))
|
2016-07-29 01:15:15 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
const unsigned OpSize = Ty.getSizeInBits();
|
|
|
|
|
2016-07-27 22:31:55 +08:00
|
|
|
const unsigned DefReg = I.getOperand(0).getReg();
|
|
|
|
const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
|
|
|
|
|
|
|
|
const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
|
|
|
|
if (NewOpc == I.getOpcode())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
I.setDesc(TII.get(NewOpc));
|
|
|
|
// FIXME: Should the type be always reset in setDesc?
|
|
|
|
|
|
|
|
// Now that we selected an opcode, we need to constrain the register
|
|
|
|
// operands to use appropriate classes.
|
|
|
|
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
|
|
|
}
|
2016-10-12 04:50:21 +08:00
|
|
|
|
2017-02-15 04:56:29 +08:00
|
|
|
case TargetOpcode::G_PTR_MASK: {
|
|
|
|
uint64_t Align = I.getOperand(2).getImm();
|
|
|
|
if (Align >= 64 || Align == 0)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
uint64_t Mask = ~((1ULL << Align) - 1);
|
|
|
|
I.setDesc(TII.get(AArch64::ANDXri));
|
|
|
|
I.getOperand(2).setImm(AArch64_AM::encodeLogicalImmediate(Mask, 64));
|
|
|
|
|
|
|
|
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
|
|
|
}
|
2016-11-01 02:31:09 +08:00
|
|
|
case TargetOpcode::G_PTRTOINT:
|
2016-10-13 06:49:15 +08:00
|
|
|
case TargetOpcode::G_TRUNC: {
|
|
|
|
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
|
|
|
|
const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
|
|
|
|
|
|
|
|
const unsigned DstReg = I.getOperand(0).getReg();
|
|
|
|
const unsigned SrcReg = I.getOperand(1).getReg();
|
|
|
|
|
|
|
|
const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
|
|
|
|
const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
|
|
|
|
|
|
|
|
if (DstRB.getID() != SrcRB.getID()) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(
|
|
|
|
dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
|
2016-10-13 06:49:15 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (DstRB.getID() == AArch64::GPRRegBankID) {
|
|
|
|
const TargetRegisterClass *DstRC =
|
|
|
|
getRegClassForTypeOnBank(DstTy, DstRB, RBI);
|
|
|
|
if (!DstRC)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
const TargetRegisterClass *SrcRC =
|
|
|
|
getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
|
|
|
|
if (!SrcRC)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
|
|
|
|
!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
|
2016-10-13 06:49:15 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (DstRC == SrcRC) {
|
|
|
|
// Nothing to be done
|
2017-06-27 18:11:39 +08:00
|
|
|
} else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
|
|
|
|
SrcTy == LLT::scalar(64)) {
|
|
|
|
llvm_unreachable("TableGen can import this case");
|
|
|
|
return false;
|
2016-10-13 06:49:15 +08:00
|
|
|
} else if (DstRC == &AArch64::GPR32RegClass &&
|
|
|
|
SrcRC == &AArch64::GPR64RegClass) {
|
|
|
|
I.getOperand(1).setSubReg(AArch64::sub_32);
|
|
|
|
} else {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(
|
|
|
|
dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
|
2016-10-13 06:49:15 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
I.setDesc(TII.get(TargetOpcode::COPY));
|
|
|
|
return true;
|
|
|
|
} else if (DstRB.getID() == AArch64::FPRRegBankID) {
|
|
|
|
if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) {
|
|
|
|
I.setDesc(TII.get(AArch64::XTNv4i16));
|
|
|
|
constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2016-10-12 04:50:21 +08:00
|
|
|
case TargetOpcode::G_ANYEXT: {
|
|
|
|
const unsigned DstReg = I.getOperand(0).getReg();
|
|
|
|
const unsigned SrcReg = I.getOperand(1).getReg();
|
|
|
|
|
2016-10-12 11:57:49 +08:00
|
|
|
const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
|
|
|
|
if (RBDst.getID() != AArch64::GPRRegBankID) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
|
|
|
|
<< ", expected: GPR\n");
|
2016-10-12 11:57:49 +08:00
|
|
|
return false;
|
|
|
|
}
|
2016-10-12 04:50:21 +08:00
|
|
|
|
2016-10-12 11:57:49 +08:00
|
|
|
const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
|
|
|
|
if (RBSrc.getID() != AArch64::GPRRegBankID) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
|
|
|
|
<< ", expected: GPR\n");
|
2016-10-12 04:50:21 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
|
|
|
|
|
|
|
|
if (DstSize == 0) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
|
2016-10-12 04:50:21 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2016-10-12 11:57:49 +08:00
|
|
|
if (DstSize != 64 && DstSize > 32) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
|
|
|
|
<< ", expected: 32 or 64\n");
|
2016-10-12 04:50:21 +08:00
|
|
|
return false;
|
|
|
|
}
|
2016-10-12 11:57:49 +08:00
|
|
|
// At this point G_ANYEXT is just like a plain COPY, but we need
|
|
|
|
// to explicitly form the 64-bit value if any.
|
|
|
|
if (DstSize > 32) {
|
|
|
|
unsigned ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
|
|
|
|
BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
|
|
|
|
.addDef(ExtSrc)
|
|
|
|
.addImm(0)
|
|
|
|
.addUse(SrcReg)
|
|
|
|
.addImm(AArch64::sub_32);
|
|
|
|
I.getOperand(1).setReg(ExtSrc);
|
2016-10-12 04:50:21 +08:00
|
|
|
}
|
2016-10-12 11:57:49 +08:00
|
|
|
return selectCopy(I, TII, MRI, TRI, RBI);
|
2016-10-12 04:50:21 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
case TargetOpcode::G_ZEXT:
|
|
|
|
case TargetOpcode::G_SEXT: {
|
|
|
|
unsigned Opcode = I.getOpcode();
|
|
|
|
const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
|
|
|
|
SrcTy = MRI.getType(I.getOperand(1).getReg());
|
|
|
|
const bool isSigned = Opcode == TargetOpcode::G_SEXT;
|
|
|
|
const unsigned DefReg = I.getOperand(0).getReg();
|
|
|
|
const unsigned SrcReg = I.getOperand(1).getReg();
|
|
|
|
const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
|
|
|
|
|
|
|
|
if (RB.getID() != AArch64::GPRRegBankID) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << TII.getName(I.getOpcode()) << " on bank: " << RB
|
|
|
|
<< ", expected: GPR\n");
|
2016-10-12 04:50:21 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
MachineInstr *ExtI;
|
|
|
|
if (DstTy == LLT::scalar(64)) {
|
|
|
|
// FIXME: Can we avoid manually doing this?
|
|
|
|
if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, MRI)) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
|
|
|
|
<< " operand\n");
|
2016-10-12 04:50:21 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
const unsigned SrcXReg =
|
|
|
|
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
|
|
|
|
BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
|
|
|
|
.addDef(SrcXReg)
|
|
|
|
.addImm(0)
|
|
|
|
.addUse(SrcReg)
|
|
|
|
.addImm(AArch64::sub_32);
|
|
|
|
|
|
|
|
const unsigned NewOpc = isSigned ? AArch64::SBFMXri : AArch64::UBFMXri;
|
|
|
|
ExtI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc))
|
|
|
|
.addDef(DefReg)
|
|
|
|
.addUse(SrcXReg)
|
|
|
|
.addImm(0)
|
|
|
|
.addImm(SrcTy.getSizeInBits() - 1);
|
2016-11-10 06:39:54 +08:00
|
|
|
} else if (DstTy.isScalar() && DstTy.getSizeInBits() <= 32) {
|
2016-10-12 04:50:21 +08:00
|
|
|
const unsigned NewOpc = isSigned ? AArch64::SBFMWri : AArch64::UBFMWri;
|
|
|
|
ExtI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc))
|
|
|
|
.addDef(DefReg)
|
|
|
|
.addUse(SrcReg)
|
|
|
|
.addImm(0)
|
|
|
|
.addImm(SrcTy.getSizeInBits() - 1);
|
|
|
|
} else {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
|
|
|
|
|
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
2016-10-12 06:29:23 +08:00
|
|
|
|
2016-10-13 06:49:11 +08:00
|
|
|
case TargetOpcode::G_SITOFP:
|
|
|
|
case TargetOpcode::G_UITOFP:
|
|
|
|
case TargetOpcode::G_FPTOSI:
|
|
|
|
case TargetOpcode::G_FPTOUI: {
|
|
|
|
const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
|
|
|
|
SrcTy = MRI.getType(I.getOperand(1).getReg());
|
|
|
|
const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
|
|
|
|
if (NewOpc == Opcode)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
I.setDesc(TII.get(NewOpc));
|
|
|
|
constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2016-10-12 06:29:23 +08:00
|
|
|
case TargetOpcode::G_INTTOPTR:
|
Re-commit: [globalisel][tablegen] Support zero-instruction emission.
Summary:
Support the case where an operand of a pattern is also the whole of the
result pattern. In this case the original result and all its uses must be
replaced by the operand. However, register class restrictions can require
a COPY. This patch handles both cases by always emitting the copy and
leaving it for the register allocator to optimize.
The previous commit failed on Windows machines due to a flaw in the sort
predicate which allowed both A < B < C and B == C to be satisfied
simultaneously. The cause of this was some sloppiness in the priority order of
G_CONSTANT instructions compared to other instructions. These had equal priority
because it makes no difference, however there were operands had higher priority
than G_CONSTANT but lower priority than any other instruction. As a result, a
priority order between G_CONSTANT and other instructions must be enforced to
ensure the predicate defines a strict weak order.
Reviewers: ab, t.p.northover, qcolombet, rovka, aditya_nandakumar
Subscribers: javed.absar, kristof.beyls, igorb, llvm-commits
Differential Revision: https://reviews.llvm.org/D36084
llvm-svn: 311076
2017-08-17 17:26:14 +08:00
|
|
|
// The importer is currently unable to import pointer types since they
|
|
|
|
// didn't exist in SelectionDAG.
|
2017-08-15 23:10:31 +08:00
|
|
|
return selectCopy(I, TII, MRI, TRI, RBI);
|
2017-08-15 21:50:09 +08:00
|
|
|
|
Re-commit: [globalisel][tablegen] Support zero-instruction emission.
Summary:
Support the case where an operand of a pattern is also the whole of the
result pattern. In this case the original result and all its uses must be
replaced by the operand. However, register class restrictions can require
a COPY. This patch handles both cases by always emitting the copy and
leaving it for the register allocator to optimize.
The previous commit failed on Windows machines due to a flaw in the sort
predicate which allowed both A < B < C and B == C to be satisfied
simultaneously. The cause of this was some sloppiness in the priority order of
G_CONSTANT instructions compared to other instructions. These had equal priority
because it makes no difference, however there were operands had higher priority
than G_CONSTANT but lower priority than any other instruction. As a result, a
priority order between G_CONSTANT and other instructions must be enforced to
ensure the predicate defines a strict weak order.
Reviewers: ab, t.p.northover, qcolombet, rovka, aditya_nandakumar
Subscribers: javed.absar, kristof.beyls, igorb, llvm-commits
Differential Revision: https://reviews.llvm.org/D36084
llvm-svn: 311076
2017-08-17 17:26:14 +08:00
|
|
|
case TargetOpcode::G_BITCAST:
|
|
|
|
// Imported SelectionDAG rules can handle every bitcast except those that
|
|
|
|
// bitcast from a type to the same type. Ideally, these shouldn't occur
|
|
|
|
// but we might not run an optimizer that deletes them.
|
|
|
|
if (MRI.getType(I.getOperand(0).getReg()) ==
|
|
|
|
MRI.getType(I.getOperand(1).getReg()))
|
|
|
|
return selectCopy(I, TII, MRI, TRI, RBI);
|
|
|
|
return false;
|
|
|
|
|
2016-11-08 08:45:29 +08:00
|
|
|
case TargetOpcode::G_SELECT: {
|
|
|
|
if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
|
|
|
|
<< ", expected: " << LLT::scalar(1) << '\n');
|
2016-11-08 08:45:29 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
const unsigned CondReg = I.getOperand(1).getReg();
|
|
|
|
const unsigned TReg = I.getOperand(2).getReg();
|
|
|
|
const unsigned FReg = I.getOperand(3).getReg();
|
|
|
|
|
|
|
|
unsigned CSelOpc = 0;
|
|
|
|
|
|
|
|
if (Ty == LLT::scalar(32)) {
|
|
|
|
CSelOpc = AArch64::CSELWr;
|
2017-01-19 21:32:14 +08:00
|
|
|
} else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) {
|
2016-11-08 08:45:29 +08:00
|
|
|
CSelOpc = AArch64::CSELXr;
|
|
|
|
} else {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
MachineInstr &TstMI =
|
|
|
|
*BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
|
|
|
|
.addDef(AArch64::WZR)
|
|
|
|
.addUse(CondReg)
|
|
|
|
.addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
|
|
|
|
|
|
|
|
MachineInstr &CSelMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CSelOpc))
|
|
|
|
.addDef(I.getOperand(0).getReg())
|
|
|
|
.addUse(TReg)
|
|
|
|
.addUse(FReg)
|
|
|
|
.addImm(AArch64CC::NE);
|
|
|
|
|
|
|
|
constrainSelectedInstRegOperands(TstMI, TII, TRI, RBI);
|
|
|
|
constrainSelectedInstRegOperands(CSelMI, TII, TRI, RBI);
|
|
|
|
|
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
2016-10-13 06:49:04 +08:00
|
|
|
case TargetOpcode::G_ICMP: {
|
2017-08-01 01:00:16 +08:00
|
|
|
if (Ty != LLT::scalar(32)) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
|
|
|
|
<< ", expected: " << LLT::scalar(32) << '\n');
|
2016-10-13 06:49:04 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned CmpOpc = 0;
|
|
|
|
unsigned ZReg = 0;
|
|
|
|
|
|
|
|
LLT CmpTy = MRI.getType(I.getOperand(2).getReg());
|
|
|
|
if (CmpTy == LLT::scalar(32)) {
|
|
|
|
CmpOpc = AArch64::SUBSWrr;
|
|
|
|
ZReg = AArch64::WZR;
|
|
|
|
} else if (CmpTy == LLT::scalar(64) || CmpTy.isPointer()) {
|
|
|
|
CmpOpc = AArch64::SUBSXrr;
|
|
|
|
ZReg = AArch64::XZR;
|
|
|
|
} else {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2017-01-05 18:16:08 +08:00
|
|
|
// CSINC increments the result by one when the condition code is false.
|
|
|
|
// Therefore, we have to invert the predicate to get an increment by 1 when
|
|
|
|
// the predicate is true.
|
|
|
|
const AArch64CC::CondCode invCC =
|
|
|
|
changeICMPPredToAArch64CC(CmpInst::getInversePredicate(
|
|
|
|
(CmpInst::Predicate)I.getOperand(1).getPredicate()));
|
2016-10-13 06:49:04 +08:00
|
|
|
|
|
|
|
MachineInstr &CmpMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
|
|
|
|
.addDef(ZReg)
|
|
|
|
.addUse(I.getOperand(2).getReg())
|
|
|
|
.addUse(I.getOperand(3).getReg());
|
|
|
|
|
|
|
|
MachineInstr &CSetMI =
|
|
|
|
*BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
|
|
|
|
.addDef(I.getOperand(0).getReg())
|
|
|
|
.addUse(AArch64::WZR)
|
|
|
|
.addUse(AArch64::WZR)
|
2017-01-05 18:16:08 +08:00
|
|
|
.addImm(invCC);
|
2016-10-13 06:49:04 +08:00
|
|
|
|
|
|
|
constrainSelectedInstRegOperands(CmpMI, TII, TRI, RBI);
|
|
|
|
constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
|
|
|
|
|
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2016-10-13 06:49:07 +08:00
|
|
|
case TargetOpcode::G_FCMP: {
|
2017-08-01 01:00:16 +08:00
|
|
|
if (Ty != LLT::scalar(32)) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "G_FCMP result has type: " << Ty
|
|
|
|
<< ", expected: " << LLT::scalar(32) << '\n');
|
2016-10-13 06:49:07 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned CmpOpc = 0;
|
|
|
|
LLT CmpTy = MRI.getType(I.getOperand(2).getReg());
|
|
|
|
if (CmpTy == LLT::scalar(32)) {
|
|
|
|
CmpOpc = AArch64::FCMPSrr;
|
|
|
|
} else if (CmpTy == LLT::scalar(64)) {
|
|
|
|
CmpOpc = AArch64::FCMPDrr;
|
|
|
|
} else {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// FIXME: regbank
|
|
|
|
|
|
|
|
AArch64CC::CondCode CC1, CC2;
|
|
|
|
changeFCMPPredToAArch64CC(
|
|
|
|
(CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2);
|
|
|
|
|
|
|
|
MachineInstr &CmpMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
|
|
|
|
.addUse(I.getOperand(2).getReg())
|
|
|
|
.addUse(I.getOperand(3).getReg());
|
|
|
|
|
|
|
|
const unsigned DefReg = I.getOperand(0).getReg();
|
|
|
|
unsigned Def1Reg = DefReg;
|
|
|
|
if (CC2 != AArch64CC::AL)
|
|
|
|
Def1Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
|
|
|
|
|
|
|
|
MachineInstr &CSetMI =
|
|
|
|
*BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
|
|
|
|
.addDef(Def1Reg)
|
|
|
|
.addUse(AArch64::WZR)
|
|
|
|
.addUse(AArch64::WZR)
|
2017-01-18 07:04:01 +08:00
|
|
|
.addImm(getInvertedCondCode(CC1));
|
2016-10-13 06:49:07 +08:00
|
|
|
|
|
|
|
if (CC2 != AArch64CC::AL) {
|
|
|
|
unsigned Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
|
|
|
|
MachineInstr &CSet2MI =
|
|
|
|
*BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
|
|
|
|
.addDef(Def2Reg)
|
|
|
|
.addUse(AArch64::WZR)
|
|
|
|
.addUse(AArch64::WZR)
|
2017-01-18 07:04:01 +08:00
|
|
|
.addImm(getInvertedCondCode(CC2));
|
2016-10-13 06:49:07 +08:00
|
|
|
MachineInstr &OrMI =
|
|
|
|
*BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr))
|
|
|
|
.addDef(DefReg)
|
|
|
|
.addUse(Def1Reg)
|
|
|
|
.addUse(Def2Reg);
|
|
|
|
constrainSelectedInstRegOperands(OrMI, TII, TRI, RBI);
|
|
|
|
constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI);
|
|
|
|
}
|
|
|
|
|
|
|
|
constrainSelectedInstRegOperands(CmpMI, TII, TRI, RBI);
|
|
|
|
constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
|
|
|
|
|
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
2017-02-09 01:57:27 +08:00
|
|
|
case TargetOpcode::G_VASTART:
|
|
|
|
return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
|
|
|
|
: selectVaStartAAPCS(I, MF, MRI);
|
2018-04-25 22:43:59 +08:00
|
|
|
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
|
|
|
|
if (!I.getOperand(0).isIntrinsicID())
|
|
|
|
return false;
|
|
|
|
if (I.getOperand(0).getIntrinsicID() != Intrinsic::trap)
|
|
|
|
return false;
|
|
|
|
BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::BRK))
|
|
|
|
.addImm(1);
|
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
2018-07-31 08:09:02 +08:00
|
|
|
case TargetOpcode::G_IMPLICIT_DEF: {
|
2017-07-13 01:32:32 +08:00
|
|
|
I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
|
2018-02-02 09:44:43 +08:00
|
|
|
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
|
|
|
|
const unsigned DstReg = I.getOperand(0).getReg();
|
|
|
|
const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
|
|
|
|
const TargetRegisterClass *DstRC =
|
|
|
|
getRegClassForTypeOnBank(DstTy, DstRB, RBI);
|
|
|
|
RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
|
2017-07-13 01:32:32 +08:00
|
|
|
return true;
|
2016-07-27 22:31:55 +08:00
|
|
|
}
|
2018-07-31 08:09:02 +08:00
|
|
|
case TargetOpcode::G_BLOCK_ADDR: {
|
|
|
|
if (TM.getCodeModel() == CodeModel::Large) {
|
|
|
|
materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
|
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
} else {
|
|
|
|
I.setDesc(TII.get(AArch64::MOVaddrBA));
|
|
|
|
auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
|
|
|
|
I.getOperand(0).getReg())
|
|
|
|
.addBlockAddress(I.getOperand(1).getBlockAddress(),
|
|
|
|
/* Offset */ 0, AArch64II::MO_PAGE)
|
|
|
|
.addBlockAddress(
|
|
|
|
I.getOperand(1).getBlockAddress(), /* Offset */ 0,
|
|
|
|
AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
|
|
|
|
I.eraseFromParent();
|
|
|
|
return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
|
|
|
|
}
|
|
|
|
}
|
2018-12-11 02:44:58 +08:00
|
|
|
case TargetOpcode::G_BUILD_VECTOR:
|
|
|
|
return selectBuildVector(I, MRI);
|
2018-12-20 09:11:04 +08:00
|
|
|
case TargetOpcode::G_MERGE_VALUES:
|
|
|
|
return selectMergeValues(I, MRI);
|
2019-01-25 06:00:41 +08:00
|
|
|
case TargetOpcode::G_UNMERGE_VALUES:
|
|
|
|
return selectUnmergeValues(I, MRI);
|
2018-07-31 08:09:02 +08:00
|
|
|
}
|
2016-07-27 22:31:55 +08:00
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
2017-03-15 05:32:08 +08:00
|
|
|
|
2018-12-11 02:44:58 +08:00
|
|
|
bool AArch64InstructionSelector::emitScalarToVector(
|
|
|
|
unsigned &Dst, const LLT DstTy, const TargetRegisterClass *DstRC,
|
|
|
|
unsigned Scalar, MachineBasicBlock &MBB,
|
|
|
|
MachineBasicBlock::iterator MBBI, MachineRegisterInfo &MRI) const {
|
|
|
|
Dst = MRI.createVirtualRegister(DstRC);
|
|
|
|
|
|
|
|
unsigned UndefVec = MRI.createVirtualRegister(DstRC);
|
|
|
|
MachineInstr &UndefMI = *BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
|
|
|
|
TII.get(TargetOpcode::IMPLICIT_DEF))
|
|
|
|
.addDef(UndefVec);
|
|
|
|
|
|
|
|
auto BuildFn = [&](unsigned SubregIndex) {
|
|
|
|
MachineInstr &InsMI = *BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
|
|
|
|
TII.get(TargetOpcode::INSERT_SUBREG))
|
|
|
|
.addDef(Dst)
|
|
|
|
.addUse(UndefVec)
|
|
|
|
.addUse(Scalar)
|
|
|
|
.addImm(SubregIndex);
|
|
|
|
constrainSelectedInstRegOperands(UndefMI, TII, TRI, RBI);
|
|
|
|
return constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
|
|
|
|
};
|
|
|
|
|
|
|
|
switch (DstTy.getElementType().getSizeInBits()) {
|
2019-01-25 06:00:41 +08:00
|
|
|
case 16:
|
|
|
|
return BuildFn(AArch64::hsub);
|
2018-12-11 02:44:58 +08:00
|
|
|
case 32:
|
|
|
|
return BuildFn(AArch64::ssub);
|
|
|
|
case 64:
|
|
|
|
return BuildFn(AArch64::dsub);
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-12-20 09:11:04 +08:00
|
|
|
bool AArch64InstructionSelector::selectMergeValues(
|
|
|
|
MachineInstr &I, MachineRegisterInfo &MRI) const {
|
|
|
|
assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
|
|
|
|
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
|
|
|
|
const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
|
|
|
|
assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
|
|
|
|
|
|
|
|
// At the moment we only support merging two s32s into an s64.
|
|
|
|
if (I.getNumOperands() != 3)
|
|
|
|
return false;
|
|
|
|
if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
|
|
|
|
return false;
|
|
|
|
const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
|
|
|
|
if (RB.getID() != AArch64::GPRRegBankID)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
auto *DstRC = &AArch64::GPR64RegClass;
|
|
|
|
unsigned SubToRegDef = MRI.createVirtualRegister(DstRC);
|
|
|
|
MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
|
|
|
|
TII.get(TargetOpcode::SUBREG_TO_REG))
|
|
|
|
.addDef(SubToRegDef)
|
|
|
|
.addImm(0)
|
|
|
|
.addUse(I.getOperand(1).getReg())
|
|
|
|
.addImm(AArch64::sub_32);
|
|
|
|
unsigned SubToRegDef2 = MRI.createVirtualRegister(DstRC);
|
|
|
|
// Need to anyext the second scalar before we can use bfm
|
|
|
|
MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
|
|
|
|
TII.get(TargetOpcode::SUBREG_TO_REG))
|
|
|
|
.addDef(SubToRegDef2)
|
|
|
|
.addImm(0)
|
|
|
|
.addUse(I.getOperand(2).getReg())
|
|
|
|
.addImm(AArch64::sub_32);
|
|
|
|
MachineInstr &BFM =
|
|
|
|
*BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
|
2018-12-20 11:27:42 +08:00
|
|
|
.addDef(I.getOperand(0).getReg())
|
2018-12-20 09:11:04 +08:00
|
|
|
.addUse(SubToRegDef)
|
|
|
|
.addUse(SubToRegDef2)
|
|
|
|
.addImm(32)
|
|
|
|
.addImm(31);
|
|
|
|
constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
|
|
|
|
constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
|
|
|
|
constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
|
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-01-25 06:00:41 +08:00
|
|
|
bool AArch64InstructionSelector::selectUnmergeValues(
|
|
|
|
MachineInstr &I, MachineRegisterInfo &MRI) const {
|
|
|
|
assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
|
|
|
|
"unexpected opcode");
|
|
|
|
|
|
|
|
// TODO: Handle unmerging into GPRs and from scalars to scalars.
|
|
|
|
if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
|
|
|
|
AArch64::FPRRegBankID ||
|
|
|
|
RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
|
|
|
|
AArch64::FPRRegBankID) {
|
|
|
|
LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
|
|
|
|
"currently unsupported.\n");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// The last operand is the vector source register, and every other operand is
|
|
|
|
// a register to unpack into.
|
|
|
|
unsigned NumElts = I.getNumOperands() - 1;
|
|
|
|
unsigned SrcReg = I.getOperand(NumElts).getReg();
|
|
|
|
const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
|
|
|
|
const LLT WideTy = MRI.getType(SrcReg);
|
2019-01-25 07:45:07 +08:00
|
|
|
(void)WideTy;
|
2019-01-25 06:00:41 +08:00
|
|
|
assert(WideTy.isVector() && "can only unmerge from vector types!");
|
|
|
|
assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
|
|
|
|
"source register size too small!");
|
|
|
|
|
2019-01-26 05:28:27 +08:00
|
|
|
// TODO: Handle unmerging into vectors.
|
2019-01-25 06:00:41 +08:00
|
|
|
if (!NarrowTy.isScalar()) {
|
|
|
|
LLVM_DEBUG(dbgs() << "Vector-to-vector unmerges not supported yet.\n");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Choose a lane copy opcode and subregister based off of the size of the
|
|
|
|
// vector's elements.
|
|
|
|
unsigned CopyOpc = 0;
|
|
|
|
unsigned ExtractSubReg = 0;
|
|
|
|
switch (NarrowTy.getSizeInBits()) {
|
|
|
|
case 16:
|
|
|
|
CopyOpc = AArch64::CPYi16;
|
|
|
|
ExtractSubReg = AArch64::hsub;
|
|
|
|
break;
|
|
|
|
case 32:
|
|
|
|
CopyOpc = AArch64::CPYi32;
|
|
|
|
ExtractSubReg = AArch64::ssub;
|
|
|
|
break;
|
|
|
|
case 64:
|
|
|
|
CopyOpc = AArch64::CPYi64;
|
|
|
|
ExtractSubReg = AArch64::dsub;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
// Unknown size, bail out.
|
|
|
|
LLVM_DEBUG(dbgs() << "NarrowTy had unsupported size.\n");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Set up for the lane copies.
|
|
|
|
MachineBasicBlock &MBB = *I.getParent();
|
|
|
|
|
|
|
|
// Stores the registers we'll be copying from.
|
|
|
|
SmallVector<unsigned, 4> InsertRegs;
|
|
|
|
|
|
|
|
// We'll use the first register twice, so we only need NumElts-1 registers.
|
|
|
|
unsigned NumInsertRegs = NumElts - 1;
|
|
|
|
|
|
|
|
// If our elements fit into exactly 128 bits, then we can copy from the source
|
|
|
|
// directly. Otherwise, we need to do a bit of setup with some subregister
|
|
|
|
// inserts.
|
|
|
|
if (NarrowTy.getSizeInBits() * NumElts == 128) {
|
|
|
|
InsertRegs = SmallVector<unsigned, 4>(NumInsertRegs, SrcReg);
|
|
|
|
} else {
|
|
|
|
// No. We have to perform subregister inserts. For each insert, create an
|
|
|
|
// implicit def and a subregister insert, and save the register we create.
|
|
|
|
for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
|
|
|
|
unsigned ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
|
|
|
|
MachineInstr &ImpDefMI =
|
|
|
|
*BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
|
|
|
|
ImpDefReg);
|
|
|
|
|
|
|
|
// Now, create the subregister insert from SrcReg.
|
|
|
|
unsigned InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
|
|
|
|
MachineInstr &InsMI =
|
|
|
|
*BuildMI(MBB, I, I.getDebugLoc(),
|
|
|
|
TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
|
|
|
|
.addUse(ImpDefReg)
|
|
|
|
.addUse(SrcReg)
|
|
|
|
.addImm(AArch64::dsub);
|
|
|
|
|
|
|
|
constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
|
|
|
|
constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
|
|
|
|
|
|
|
|
// Save the register so that we can copy from it after.
|
|
|
|
InsertRegs.push_back(InsertReg);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Now that we've created any necessary subregister inserts, we can
|
|
|
|
// create the copies.
|
|
|
|
//
|
|
|
|
// Perform the first copy separately as a subregister copy.
|
|
|
|
unsigned CopyTo = I.getOperand(0).getReg();
|
|
|
|
MachineInstr &FirstCopy =
|
|
|
|
*BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::COPY), CopyTo)
|
|
|
|
.addUse(InsertRegs[0], 0, ExtractSubReg);
|
|
|
|
constrainSelectedInstRegOperands(FirstCopy, TII, TRI, RBI);
|
|
|
|
|
|
|
|
// Now, perform the remaining copies as vector lane copies.
|
|
|
|
unsigned LaneIdx = 1;
|
|
|
|
for (unsigned InsReg : InsertRegs) {
|
|
|
|
unsigned CopyTo = I.getOperand(LaneIdx).getReg();
|
|
|
|
MachineInstr &CopyInst =
|
|
|
|
*BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
|
|
|
|
.addUse(InsReg)
|
|
|
|
.addImm(LaneIdx);
|
|
|
|
constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
|
|
|
|
++LaneIdx;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Separately constrain the first copy's destination. Because of the
|
|
|
|
// limitation in constrainOperandRegClass, we can't guarantee that this will
|
|
|
|
// actually be constrained. So, do it ourselves using the second operand.
|
|
|
|
const TargetRegisterClass *RC =
|
|
|
|
MRI.getRegClassOrNull(I.getOperand(1).getReg());
|
|
|
|
if (!RC) {
|
|
|
|
LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
RBI.constrainGenericRegister(CopyTo, *RC, MRI);
|
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-12-11 02:44:58 +08:00
|
|
|
bool AArch64InstructionSelector::selectBuildVector(
|
|
|
|
MachineInstr &I, MachineRegisterInfo &MRI) const {
|
|
|
|
assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
|
|
|
|
// Until we port more of the optimized selections, for now just use a vector
|
|
|
|
// insert sequence.
|
|
|
|
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
|
|
|
|
const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
|
|
|
|
unsigned EltSize = EltTy.getSizeInBits();
|
2019-01-25 06:00:41 +08:00
|
|
|
if (EltSize < 16 || EltSize > 64)
|
2018-12-11 02:44:58 +08:00
|
|
|
return false; // Don't support all element types yet.
|
|
|
|
const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
|
|
|
|
unsigned Opc;
|
|
|
|
unsigned SubregIdx;
|
|
|
|
if (RB.getID() == AArch64::GPRRegBankID) {
|
|
|
|
if (EltSize == 32) {
|
|
|
|
Opc = AArch64::INSvi32gpr;
|
|
|
|
SubregIdx = AArch64::ssub;
|
|
|
|
} else {
|
|
|
|
Opc = AArch64::INSvi64gpr;
|
|
|
|
SubregIdx = AArch64::dsub;
|
|
|
|
}
|
|
|
|
} else {
|
2019-01-25 06:00:41 +08:00
|
|
|
if (EltSize == 16) {
|
|
|
|
Opc = AArch64::INSvi16lane;
|
|
|
|
SubregIdx = AArch64::hsub;
|
|
|
|
} else if (EltSize == 32) {
|
2018-12-11 02:44:58 +08:00
|
|
|
Opc = AArch64::INSvi32lane;
|
|
|
|
SubregIdx = AArch64::ssub;
|
|
|
|
} else {
|
|
|
|
Opc = AArch64::INSvi64lane;
|
|
|
|
SubregIdx = AArch64::dsub;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned DstVec = 0;
|
2019-01-25 06:00:41 +08:00
|
|
|
|
|
|
|
const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
|
|
|
|
if (!emitScalarToVector(DstVec, DstTy, DstRC, I.getOperand(1).getReg(),
|
|
|
|
*I.getParent(), I.getIterator(), MRI))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
unsigned DstSize = DstTy.getSizeInBits();
|
|
|
|
|
|
|
|
// Keep track of the last MI we inserted. Later on, we might be able to save
|
|
|
|
// a copy using it.
|
|
|
|
MachineInstr *PrevMI = nullptr;
|
|
|
|
for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
|
2018-12-11 02:44:58 +08:00
|
|
|
unsigned InsDef;
|
2019-01-25 06:00:41 +08:00
|
|
|
|
|
|
|
// Note that if we don't do a subregister copy, we end up making one more
|
|
|
|
// of these than we need.
|
|
|
|
InsDef = MRI.createVirtualRegister(DstRC);
|
2018-12-11 02:44:58 +08:00
|
|
|
unsigned LaneIdx = i - 1;
|
|
|
|
if (RB.getID() == AArch64::FPRRegBankID) {
|
|
|
|
unsigned ImpDef = MRI.createVirtualRegister(DstRC);
|
|
|
|
MachineInstr &ImpDefMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
|
|
|
|
TII.get(TargetOpcode::IMPLICIT_DEF))
|
|
|
|
.addDef(ImpDef);
|
|
|
|
unsigned InsSubDef = MRI.createVirtualRegister(DstRC);
|
|
|
|
MachineInstr &InsSubMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
|
|
|
|
TII.get(TargetOpcode::INSERT_SUBREG))
|
|
|
|
.addDef(InsSubDef)
|
|
|
|
.addUse(ImpDef)
|
|
|
|
.addUse(I.getOperand(i).getReg())
|
|
|
|
.addImm(SubregIdx);
|
|
|
|
MachineInstr &InsEltMI =
|
|
|
|
*BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opc))
|
|
|
|
.addDef(InsDef)
|
|
|
|
.addUse(DstVec)
|
|
|
|
.addImm(LaneIdx)
|
|
|
|
.addUse(InsSubDef)
|
|
|
|
.addImm(0);
|
|
|
|
constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
|
|
|
|
constrainSelectedInstRegOperands(InsSubMI, TII, TRI, RBI);
|
|
|
|
constrainSelectedInstRegOperands(InsEltMI, TII, TRI, RBI);
|
|
|
|
DstVec = InsDef;
|
2019-01-25 06:00:41 +08:00
|
|
|
PrevMI = &InsEltMI;
|
2018-12-11 02:44:58 +08:00
|
|
|
} else {
|
|
|
|
MachineInstr &InsMI =
|
|
|
|
*BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opc))
|
|
|
|
.addDef(InsDef)
|
|
|
|
.addUse(DstVec)
|
|
|
|
.addImm(LaneIdx)
|
|
|
|
.addUse(I.getOperand(i).getReg());
|
|
|
|
constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
|
|
|
|
DstVec = InsDef;
|
2019-01-25 06:00:41 +08:00
|
|
|
PrevMI = &InsMI;
|
2018-12-11 02:44:58 +08:00
|
|
|
}
|
|
|
|
}
|
2019-01-25 06:00:41 +08:00
|
|
|
|
|
|
|
// If DstTy's size in bits is less than 128, then emit a subregister copy
|
|
|
|
// from DstVec to the last register we've defined.
|
|
|
|
if (DstSize < 128) {
|
|
|
|
unsigned SubReg = 0;
|
|
|
|
|
|
|
|
// Helper lambda to decide on a register class and subregister for the
|
|
|
|
// subregister copy.
|
|
|
|
auto GetRegInfoForCopy = [&SubReg,
|
|
|
|
&DstSize]() -> const TargetRegisterClass * {
|
|
|
|
switch (DstSize) {
|
|
|
|
default:
|
|
|
|
LLVM_DEBUG(dbgs() << "Unknown destination size (" << DstSize << ")\n");
|
|
|
|
return nullptr;
|
|
|
|
case 32:
|
|
|
|
SubReg = AArch64::ssub;
|
|
|
|
return &AArch64::FPR32RegClass;
|
|
|
|
case 64:
|
|
|
|
SubReg = AArch64::dsub;
|
|
|
|
return &AArch64::FPR64RegClass;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
const TargetRegisterClass *RC = GetRegInfoForCopy();
|
|
|
|
if (!RC)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
unsigned Reg = MRI.createVirtualRegister(RC);
|
|
|
|
unsigned DstReg = I.getOperand(0).getReg();
|
|
|
|
|
|
|
|
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY),
|
|
|
|
DstReg)
|
|
|
|
.addUse(DstVec, 0, SubReg);
|
|
|
|
MachineOperand &RegOp = I.getOperand(1);
|
|
|
|
RegOp.setReg(Reg);
|
|
|
|
RBI.constrainGenericRegister(DstReg, *RC, MRI);
|
|
|
|
} else {
|
|
|
|
// We don't need a subregister copy. Save a copy by re-using the
|
|
|
|
// destination register on the final insert.
|
|
|
|
assert(PrevMI && "PrevMI was null?");
|
|
|
|
PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
|
|
|
|
constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
|
|
|
|
}
|
|
|
|
|
2018-12-11 02:44:58 +08:00
|
|
|
I.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2017-03-15 05:32:08 +08:00
|
|
|
/// SelectArithImmed - Select an immediate value that can be represented as
|
|
|
|
/// a 12-bit value shifted left by either 0 or 12. If so, return true with
|
|
|
|
/// Val set to the 12-bit value and Shift set to the shifter operand.
|
2017-10-21 04:55:29 +08:00
|
|
|
InstructionSelector::ComplexRendererFns
|
[globalisel][tablegen] Revise API for ComplexPattern operands to improve flexibility.
Summary:
Some targets need to be able to do more complex rendering than just adding an
operand or two to an instruction. For example, it may need to insert an
instruction to extract a subreg first, or it may need to perform an operation
on the operand.
In SelectionDAG, targets would create SDNode's to achieve the desired effect
during the complex pattern predicate. This worked because SelectionDAG had a
form of garbage collection that would take care of SDNode's that were created
but not used due to a later predicate rejecting a match. This doesn't translate
well to GlobalISel and the churn was wasteful.
The API changes in this patch enable GlobalISel to accomplish the same thing
without the waste. The API is now:
InstructionSelector::OptionalComplexRendererFn selectArithImmed(MachineOperand &Root) const;
where Root is the root of the match. The return value can be omitted to
indicate that the predicate failed to match, or a function with the signature
ComplexRendererFn can be returned. For example:
return OptionalComplexRendererFn(
[=](MachineInstrBuilder &MIB) { MIB.addImm(Immed).addImm(ShVal); });
adds two immediate operands to the rendered instruction. Immed and ShVal are
captured from the predicate function.
As an added bonus, this also reduces the amount of information we need to
provide to GIComplexOperandMatcher.
Depends on D31418
Reviewers: aditya_nandakumar, t.p.northover, qcolombet, rovka, ab, javed.absar
Reviewed By: ab
Subscribers: dberris, kristof.beyls, igorb, llvm-commits
Differential Revision: https://reviews.llvm.org/D31761
llvm-svn: 301079
2017-04-22 23:11:04 +08:00
|
|
|
AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
|
2017-03-15 05:32:08 +08:00
|
|
|
MachineInstr &MI = *Root.getParent();
|
|
|
|
MachineBasicBlock &MBB = *MI.getParent();
|
|
|
|
MachineFunction &MF = *MBB.getParent();
|
|
|
|
MachineRegisterInfo &MRI = MF.getRegInfo();
|
|
|
|
|
|
|
|
// This function is called from the addsub_shifted_imm ComplexPattern,
|
|
|
|
// which lists [imm] as the list of opcode it's interested in, however
|
|
|
|
// we still need to check whether the operand is actually an immediate
|
|
|
|
// here because the ComplexPattern opcode list is only used in
|
|
|
|
// root-level opcode matching.
|
|
|
|
uint64_t Immed;
|
|
|
|
if (Root.isImm())
|
|
|
|
Immed = Root.getImm();
|
|
|
|
else if (Root.isCImm())
|
|
|
|
Immed = Root.getCImm()->getZExtValue();
|
|
|
|
else if (Root.isReg()) {
|
|
|
|
MachineInstr *Def = MRI.getVRegDef(Root.getReg());
|
|
|
|
if (Def->getOpcode() != TargetOpcode::G_CONSTANT)
|
2017-10-16 02:22:54 +08:00
|
|
|
return None;
|
2017-03-17 02:04:50 +08:00
|
|
|
MachineOperand &Op1 = Def->getOperand(1);
|
|
|
|
if (!Op1.isCImm() || Op1.getCImm()->getBitWidth() > 64)
|
2017-10-16 02:22:54 +08:00
|
|
|
return None;
|
2017-03-17 02:04:50 +08:00
|
|
|
Immed = Op1.getCImm()->getZExtValue();
|
2017-03-15 05:32:08 +08:00
|
|
|
} else
|
2017-10-16 02:22:54 +08:00
|
|
|
return None;
|
2017-03-15 05:32:08 +08:00
|
|
|
|
|
|
|
unsigned ShiftAmt;
|
|
|
|
|
|
|
|
if (Immed >> 12 == 0) {
|
|
|
|
ShiftAmt = 0;
|
|
|
|
} else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
|
|
|
|
ShiftAmt = 12;
|
|
|
|
Immed = Immed >> 12;
|
|
|
|
} else
|
2017-10-16 02:22:54 +08:00
|
|
|
return None;
|
2017-03-15 05:32:08 +08:00
|
|
|
|
|
|
|
unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
|
2017-10-16 02:22:54 +08:00
|
|
|
return {{
|
|
|
|
[=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
|
|
|
|
[=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
|
|
|
|
}};
|
2017-03-15 05:32:08 +08:00
|
|
|
}
|
2017-04-06 17:49:34 +08:00
|
|
|
|
2017-10-16 11:36:29 +08:00
|
|
|
/// Select a "register plus unscaled signed 9-bit immediate" address. This
|
|
|
|
/// should only match when there is an offset that is not valid for a scaled
|
|
|
|
/// immediate addressing mode. The "Size" argument is the size in bytes of the
|
|
|
|
/// memory reference, which is needed here to know what is valid for a scaled
|
|
|
|
/// immediate.
|
2017-10-21 04:55:29 +08:00
|
|
|
InstructionSelector::ComplexRendererFns
|
2017-10-16 11:36:29 +08:00
|
|
|
AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
|
|
|
|
unsigned Size) const {
|
|
|
|
MachineRegisterInfo &MRI =
|
|
|
|
Root.getParent()->getParent()->getParent()->getRegInfo();
|
|
|
|
|
|
|
|
if (!Root.isReg())
|
|
|
|
return None;
|
|
|
|
|
|
|
|
if (!isBaseWithConstantOffset(Root, MRI))
|
|
|
|
return None;
|
|
|
|
|
|
|
|
MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
|
|
|
|
if (!RootDef)
|
|
|
|
return None;
|
|
|
|
|
|
|
|
MachineOperand &OffImm = RootDef->getOperand(2);
|
|
|
|
if (!OffImm.isReg())
|
|
|
|
return None;
|
|
|
|
MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
|
|
|
|
if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
|
|
|
|
return None;
|
|
|
|
int64_t RHSC;
|
|
|
|
MachineOperand &RHSOp1 = RHS->getOperand(1);
|
|
|
|
if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
|
|
|
|
return None;
|
|
|
|
RHSC = RHSOp1.getCImm()->getSExtValue();
|
|
|
|
|
|
|
|
// If the offset is valid as a scaled immediate, don't match here.
|
|
|
|
if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
|
|
|
|
return None;
|
|
|
|
if (RHSC >= -256 && RHSC < 256) {
|
|
|
|
MachineOperand &Base = RootDef->getOperand(1);
|
|
|
|
return {{
|
|
|
|
[=](MachineInstrBuilder &MIB) { MIB.add(Base); },
|
|
|
|
[=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
|
|
|
|
}};
|
|
|
|
}
|
|
|
|
return None;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Select a "register plus scaled unsigned 12-bit immediate" address. The
|
|
|
|
/// "Size" argument is the size in bytes of the memory reference, which
|
|
|
|
/// determines the scale.
|
2017-10-21 04:55:29 +08:00
|
|
|
InstructionSelector::ComplexRendererFns
|
2017-10-16 11:36:29 +08:00
|
|
|
AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
|
|
|
|
unsigned Size) const {
|
|
|
|
MachineRegisterInfo &MRI =
|
|
|
|
Root.getParent()->getParent()->getParent()->getRegInfo();
|
|
|
|
|
|
|
|
if (!Root.isReg())
|
|
|
|
return None;
|
|
|
|
|
|
|
|
MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
|
|
|
|
if (!RootDef)
|
|
|
|
return None;
|
|
|
|
|
|
|
|
if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
|
|
|
|
return {{
|
|
|
|
[=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
|
|
|
|
[=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
|
|
|
|
}};
|
|
|
|
}
|
|
|
|
|
|
|
|
if (isBaseWithConstantOffset(Root, MRI)) {
|
|
|
|
MachineOperand &LHS = RootDef->getOperand(1);
|
|
|
|
MachineOperand &RHS = RootDef->getOperand(2);
|
|
|
|
MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
|
|
|
|
MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
|
|
|
|
if (LHSDef && RHSDef) {
|
|
|
|
int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
|
|
|
|
unsigned Scale = Log2_32(Size);
|
|
|
|
if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
|
|
|
|
if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
|
2017-10-16 13:39:30 +08:00
|
|
|
return {{
|
|
|
|
[=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
|
|
|
|
[=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
|
|
|
|
}};
|
|
|
|
|
2017-10-16 11:36:29 +08:00
|
|
|
return {{
|
|
|
|
[=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
|
|
|
|
[=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
|
|
|
|
}};
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Before falling back to our general case, check if the unscaled
|
|
|
|
// instructions can handle this. If so, that's preferable.
|
|
|
|
if (selectAddrModeUnscaled(Root, Size).hasValue())
|
|
|
|
return None;
|
|
|
|
|
|
|
|
return {{
|
|
|
|
[=](MachineInstrBuilder &MIB) { MIB.add(Root); },
|
|
|
|
[=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
|
|
|
|
}};
|
|
|
|
}
|
|
|
|
|
2018-01-17 02:44:05 +08:00
|
|
|
void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
|
|
|
|
const MachineInstr &MI) const {
|
|
|
|
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
|
|
|
|
assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
|
|
|
|
Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI);
|
|
|
|
assert(CstVal && "Expected constant value");
|
|
|
|
MIB.addImm(CstVal.getValue());
|
|
|
|
}
|
|
|
|
|
2017-04-06 17:49:34 +08:00
|
|
|
namespace llvm {
|
|
|
|
InstructionSelector *
|
|
|
|
createAArch64InstructionSelector(const AArch64TargetMachine &TM,
|
|
|
|
AArch64Subtarget &Subtarget,
|
|
|
|
AArch64RegisterBankInfo &RBI) {
|
|
|
|
return new AArch64InstructionSelector(TM, Subtarget, RBI);
|
|
|
|
}
|
|
|
|
}
|