2016-10-15 06:18:18 +08:00
|
|
|
//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
|
2016-07-23 04:03:43 +08:00
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2016-07-23 04:03:43 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
2016-10-15 06:18:18 +08:00
|
|
|
/// \file This file implements the LegalizerHelper class to legalize
|
2016-07-23 04:03:43 +08:00
|
|
|
/// individual instructions and the LegalizeMachineIR wrapper pass for the
|
|
|
|
/// primary legalization.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2016-10-15 06:18:18 +08:00
|
|
|
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
|
2016-08-30 03:07:16 +08:00
|
|
|
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
|
2018-12-06 04:14:52 +08:00
|
|
|
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
|
2016-10-15 06:18:18 +08:00
|
|
|
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
|
2016-07-23 04:03:43 +08:00
|
|
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
2019-08-28 03:54:27 +08:00
|
|
|
#include "llvm/CodeGen/TargetFrameLowering.h"
|
2018-08-22 01:30:31 +08:00
|
|
|
#include "llvm/CodeGen/TargetInstrInfo.h"
|
2017-11-17 09:07:10 +08:00
|
|
|
#include "llvm/CodeGen/TargetLowering.h"
|
|
|
|
#include "llvm/CodeGen/TargetSubtargetInfo.h"
|
2016-07-23 04:03:43 +08:00
|
|
|
#include "llvm/Support/Debug.h"
|
2018-08-22 01:30:31 +08:00
|
|
|
#include "llvm/Support/MathExtras.h"
|
2016-07-23 04:03:43 +08:00
|
|
|
#include "llvm/Support/raw_ostream.h"
|
|
|
|
|
2017-04-20 23:46:12 +08:00
|
|
|
#define DEBUG_TYPE "legalizer"
|
2016-07-23 04:03:43 +08:00
|
|
|
|
|
|
|
using namespace llvm;
|
2018-01-30 01:37:29 +08:00
|
|
|
using namespace LegalizeActions;
|
2016-07-23 04:03:43 +08:00
|
|
|
|
2019-02-08 01:38:00 +08:00
|
|
|
/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
|
|
|
|
///
|
|
|
|
/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
|
|
|
|
/// with any leftover piece as type \p LeftoverTy
|
|
|
|
///
|
2019-02-28 08:16:32 +08:00
|
|
|
/// Returns -1 in the first element of the pair if the breakdown is not
|
|
|
|
/// satisfiable.
|
|
|
|
static std::pair<int, int>
|
|
|
|
getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
|
2019-02-08 01:38:00 +08:00
|
|
|
assert(!LeftoverTy.isValid() && "this is an out argument");
|
|
|
|
|
|
|
|
unsigned Size = OrigTy.getSizeInBits();
|
|
|
|
unsigned NarrowSize = NarrowTy.getSizeInBits();
|
|
|
|
unsigned NumParts = Size / NarrowSize;
|
|
|
|
unsigned LeftoverSize = Size - NumParts * NarrowSize;
|
|
|
|
assert(Size > NarrowSize);
|
|
|
|
|
|
|
|
if (LeftoverSize == 0)
|
2019-02-28 08:16:32 +08:00
|
|
|
return {NumParts, 0};
|
2019-02-08 01:38:00 +08:00
|
|
|
|
|
|
|
if (NarrowTy.isVector()) {
|
|
|
|
unsigned EltSize = OrigTy.getScalarSizeInBits();
|
|
|
|
if (LeftoverSize % EltSize != 0)
|
2019-02-28 08:16:32 +08:00
|
|
|
return {-1, -1};
|
2019-02-08 01:38:00 +08:00
|
|
|
LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
|
|
|
|
} else {
|
|
|
|
LeftoverTy = LLT::scalar(LeftoverSize);
|
|
|
|
}
|
|
|
|
|
2019-02-28 08:16:32 +08:00
|
|
|
int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
|
|
|
|
return std::make_pair(NumParts, NumLeftover);
|
2019-02-08 01:38:00 +08:00
|
|
|
}
|
|
|
|
|
2020-01-10 23:41:29 +08:00
|
|
|
static LLT getGCDType(LLT OrigTy, LLT TargetTy) {
|
|
|
|
if (OrigTy.isVector() && TargetTy.isVector()) {
|
|
|
|
assert(OrigTy.getElementType() == TargetTy.getElementType());
|
|
|
|
int GCD = greatestCommonDivisor(OrigTy.getNumElements(),
|
|
|
|
TargetTy.getNumElements());
|
|
|
|
return LLT::scalarOrVector(GCD, OrigTy.getElementType());
|
|
|
|
}
|
|
|
|
|
|
|
|
if (OrigTy.isVector() && !TargetTy.isVector()) {
|
|
|
|
assert(OrigTy.getElementType() == TargetTy);
|
|
|
|
return TargetTy;
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(!OrigTy.isVector() && !TargetTy.isVector() &&
|
|
|
|
"GCD type of vector and scalar not implemented");
|
|
|
|
|
|
|
|
int GCD = greatestCommonDivisor(OrigTy.getSizeInBits(),
|
|
|
|
TargetTy.getSizeInBits());
|
|
|
|
return LLT::scalar(GCD);
|
|
|
|
}
|
|
|
|
|
|
|
|
static LLT getLCMType(LLT Ty0, LLT Ty1) {
|
2020-01-22 00:12:36 +08:00
|
|
|
if (!Ty0.isVector() && !Ty1.isVector()) {
|
|
|
|
unsigned Mul = Ty0.getSizeInBits() * Ty1.getSizeInBits();
|
|
|
|
int GCDSize = greatestCommonDivisor(Ty0.getSizeInBits(),
|
|
|
|
Ty1.getSizeInBits());
|
|
|
|
return LLT::scalar(Mul / GCDSize);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (Ty0.isVector() && !Ty1.isVector()) {
|
|
|
|
assert(Ty0.getElementType() == Ty1 && "not yet handled");
|
|
|
|
return Ty0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (Ty1.isVector() && !Ty0.isVector()) {
|
|
|
|
assert(Ty1.getElementType() == Ty0 && "not yet handled");
|
|
|
|
return Ty1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (Ty0.isVector() && Ty1.isVector()) {
|
|
|
|
assert(Ty0.getElementType() == Ty1.getElementType() && "not yet handled");
|
|
|
|
|
|
|
|
int GCDElts = greatestCommonDivisor(Ty0.getNumElements(),
|
|
|
|
Ty1.getNumElements());
|
|
|
|
|
|
|
|
int Mul = Ty0.getNumElements() * Ty1.getNumElements();
|
|
|
|
return LLT::vector(Mul / GCDElts, Ty0.getElementType());
|
|
|
|
}
|
|
|
|
|
|
|
|
llvm_unreachable("not yet handled");
|
2020-01-10 23:41:29 +08:00
|
|
|
}
|
|
|
|
|
2020-02-07 02:01:57 +08:00
|
|
|
static Type *getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty) {
|
|
|
|
|
|
|
|
if (!Ty.isScalar())
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
switch (Ty.getSizeInBits()) {
|
|
|
|
case 16:
|
|
|
|
return Type::getHalfTy(Ctx);
|
|
|
|
case 32:
|
|
|
|
return Type::getFloatTy(Ctx);
|
|
|
|
case 64:
|
|
|
|
return Type::getDoubleTy(Ctx);
|
|
|
|
case 128:
|
|
|
|
return Type::getFP128Ty(Ctx);
|
|
|
|
default:
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-12-06 04:14:52 +08:00
|
|
|
LegalizerHelper::LegalizerHelper(MachineFunction &MF,
|
2019-01-16 08:40:37 +08:00
|
|
|
GISelChangeObserver &Observer,
|
|
|
|
MachineIRBuilder &Builder)
|
|
|
|
: MIRBuilder(Builder), MRI(MF.getRegInfo()),
|
|
|
|
LI(*MF.getSubtarget().getLegalizerInfo()), Observer(Observer) {
|
2016-07-23 04:03:43 +08:00
|
|
|
MIRBuilder.setMF(MF);
|
2018-12-06 04:14:52 +08:00
|
|
|
MIRBuilder.setChangeObserver(Observer);
|
2016-07-23 04:03:43 +08:00
|
|
|
}
|
|
|
|
|
2018-12-06 04:14:52 +08:00
|
|
|
LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
|
2019-01-16 08:40:37 +08:00
|
|
|
GISelChangeObserver &Observer,
|
|
|
|
MachineIRBuilder &B)
|
|
|
|
: MIRBuilder(B), MRI(MF.getRegInfo()), LI(LI), Observer(Observer) {
|
2018-08-22 01:30:31 +08:00
|
|
|
MIRBuilder.setMF(MF);
|
2018-12-06 04:14:52 +08:00
|
|
|
MIRBuilder.setChangeObserver(Observer);
|
2018-08-22 01:30:31 +08:00
|
|
|
}
|
2016-10-15 06:18:18 +08:00
|
|
|
LegalizerHelper::LegalizeResult
|
[GlobalISel] Make LegalizerInfo accessible in LegalizerHelper
Summary:
We don’t actually use LegalizerInfo in Legalizer pass, it’s just passed
as an argument.
In order to check if an instruction is legal or not, we need to get LegalizerInfo
by calling `MI.getParent()->getParent()->getSubtarget().getLegalizerInfo()`.
Instead, make LegalizerInfo accessible in LegalizerHelper.
Reviewers: qcolombet, aditya_nandakumar, dsanders, ab, t.p.northover, kristof.beyls
Reviewed By: qcolombet
Subscribers: dberris, llvm-commits, rovka
Differential Revision: https://reviews.llvm.org/D30838
llvm-svn: 297491
2017-03-11 02:34:57 +08:00
|
|
|
LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs()));
|
2017-04-20 23:46:12 +08:00
|
|
|
|
2019-07-02 01:53:50 +08:00
|
|
|
if (MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
|
|
|
|
MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
|
2020-01-28 04:50:55 +08:00
|
|
|
return LI.legalizeIntrinsic(MI, MIRBuilder, Observer) ? Legalized
|
|
|
|
: UnableToLegalize;
|
[globalisel] Introduce LegalityQuery to better encapsulate the legalizer decisions. NFC.
Summary:
`getAction(const InstrAspect &) const` breaks encapsulation by exposing
the smaller components that are used to decide how to legalize an
instruction.
This is a problem because we need to change the implementation of
LegalizerInfo so that it's able to describe particular type combinations
rather than just cartesian products of types.
For example, declaring the following
setAction({..., 0, s32}, Legal)
setAction({..., 0, s64}, Legal)
setAction({..., 1, s32}, Legal)
setAction({..., 1, s64}, Legal)
currently declares these type combinations as legal:
{s32, s32}
{s64, s32}
{s32, s64}
{s64, s64}
but we currently have no means to say that, for example, {s64, s32} is
not legal. Some operations such as G_INSERT/G_EXTRACT/G_MERGE_VALUES/
G_UNMERGE_VALUES has relationships between the types that are currently
described incorrectly.
Additionally, G_LOAD/G_STORE currently have no means to legalize non-atomics
differently to atomics. The necessary information is in the MMO but we have no
way to use this in the legalizer. Similarly, there is currently no way for the
register type and the memory type to differ so there is no way to cleanly
represent extending-load/truncating-store in a way that can't be broken by
optimizers (resulting in illegal MIR).
This patch introduces LegalityQuery which provides all the information
needed by the legalizer to make a decision on whether something is legal
and how to legalize it.
Reviewers: ab, t.p.northover, qcolombet, rovka, aditya_nandakumar, volkan, reames, bogner
Reviewed By: bogner
Subscribers: bogner, llvm-commits, kristof.beyls
Differential Revision: https://reviews.llvm.org/D42244
llvm-svn: 323342
2018-01-25 01:17:46 +08:00
|
|
|
auto Step = LI.getAction(MI, MRI);
|
|
|
|
switch (Step.Action) {
|
2018-01-30 01:37:29 +08:00
|
|
|
case Legal:
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << ".. Already legal\n");
|
2016-07-23 04:03:43 +08:00
|
|
|
return AlreadyLegal;
|
2018-01-30 01:37:29 +08:00
|
|
|
case Libcall:
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
|
2016-08-30 03:07:16 +08:00
|
|
|
return libcall(MI);
|
2018-01-30 01:37:29 +08:00
|
|
|
case NarrowScalar:
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
|
[globalisel] Introduce LegalityQuery to better encapsulate the legalizer decisions. NFC.
Summary:
`getAction(const InstrAspect &) const` breaks encapsulation by exposing
the smaller components that are used to decide how to legalize an
instruction.
This is a problem because we need to change the implementation of
LegalizerInfo so that it's able to describe particular type combinations
rather than just cartesian products of types.
For example, declaring the following
setAction({..., 0, s32}, Legal)
setAction({..., 0, s64}, Legal)
setAction({..., 1, s32}, Legal)
setAction({..., 1, s64}, Legal)
currently declares these type combinations as legal:
{s32, s32}
{s64, s32}
{s32, s64}
{s64, s64}
but we currently have no means to say that, for example, {s64, s32} is
not legal. Some operations such as G_INSERT/G_EXTRACT/G_MERGE_VALUES/
G_UNMERGE_VALUES has relationships between the types that are currently
described incorrectly.
Additionally, G_LOAD/G_STORE currently have no means to legalize non-atomics
differently to atomics. The necessary information is in the MMO but we have no
way to use this in the legalizer. Similarly, there is currently no way for the
register type and the memory type to differ so there is no way to cleanly
represent extending-load/truncating-store in a way that can't be broken by
optimizers (resulting in illegal MIR).
This patch introduces LegalityQuery which provides all the information
needed by the legalizer to make a decision on whether something is legal
and how to legalize it.
Reviewers: ab, t.p.northover, qcolombet, rovka, aditya_nandakumar, volkan, reames, bogner
Reviewed By: bogner
Subscribers: bogner, llvm-commits, kristof.beyls
Differential Revision: https://reviews.llvm.org/D42244
llvm-svn: 323342
2018-01-25 01:17:46 +08:00
|
|
|
return narrowScalar(MI, Step.TypeIdx, Step.NewType);
|
2018-01-30 01:37:29 +08:00
|
|
|
case WidenScalar:
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
|
[globalisel] Introduce LegalityQuery to better encapsulate the legalizer decisions. NFC.
Summary:
`getAction(const InstrAspect &) const` breaks encapsulation by exposing
the smaller components that are used to decide how to legalize an
instruction.
This is a problem because we need to change the implementation of
LegalizerInfo so that it's able to describe particular type combinations
rather than just cartesian products of types.
For example, declaring the following
setAction({..., 0, s32}, Legal)
setAction({..., 0, s64}, Legal)
setAction({..., 1, s32}, Legal)
setAction({..., 1, s64}, Legal)
currently declares these type combinations as legal:
{s32, s32}
{s64, s32}
{s32, s64}
{s64, s64}
but we currently have no means to say that, for example, {s64, s32} is
not legal. Some operations such as G_INSERT/G_EXTRACT/G_MERGE_VALUES/
G_UNMERGE_VALUES has relationships between the types that are currently
described incorrectly.
Additionally, G_LOAD/G_STORE currently have no means to legalize non-atomics
differently to atomics. The necessary information is in the MMO but we have no
way to use this in the legalizer. Similarly, there is currently no way for the
register type and the memory type to differ so there is no way to cleanly
represent extending-load/truncating-store in a way that can't be broken by
optimizers (resulting in illegal MIR).
This patch introduces LegalityQuery which provides all the information
needed by the legalizer to make a decision on whether something is legal
and how to legalize it.
Reviewers: ab, t.p.northover, qcolombet, rovka, aditya_nandakumar, volkan, reames, bogner
Reviewed By: bogner
Subscribers: bogner, llvm-commits, kristof.beyls
Differential Revision: https://reviews.llvm.org/D42244
llvm-svn: 323342
2018-01-25 01:17:46 +08:00
|
|
|
return widenScalar(MI, Step.TypeIdx, Step.NewType);
|
2018-01-30 01:37:29 +08:00
|
|
|
case Lower:
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << ".. Lower\n");
|
[globalisel] Introduce LegalityQuery to better encapsulate the legalizer decisions. NFC.
Summary:
`getAction(const InstrAspect &) const` breaks encapsulation by exposing
the smaller components that are used to decide how to legalize an
instruction.
This is a problem because we need to change the implementation of
LegalizerInfo so that it's able to describe particular type combinations
rather than just cartesian products of types.
For example, declaring the following
setAction({..., 0, s32}, Legal)
setAction({..., 0, s64}, Legal)
setAction({..., 1, s32}, Legal)
setAction({..., 1, s64}, Legal)
currently declares these type combinations as legal:
{s32, s32}
{s64, s32}
{s32, s64}
{s64, s64}
but we currently have no means to say that, for example, {s64, s32} is
not legal. Some operations such as G_INSERT/G_EXTRACT/G_MERGE_VALUES/
G_UNMERGE_VALUES has relationships between the types that are currently
described incorrectly.
Additionally, G_LOAD/G_STORE currently have no means to legalize non-atomics
differently to atomics. The necessary information is in the MMO but we have no
way to use this in the legalizer. Similarly, there is currently no way for the
register type and the memory type to differ so there is no way to cleanly
represent extending-load/truncating-store in a way that can't be broken by
optimizers (resulting in illegal MIR).
This patch introduces LegalityQuery which provides all the information
needed by the legalizer to make a decision on whether something is legal
and how to legalize it.
Reviewers: ab, t.p.northover, qcolombet, rovka, aditya_nandakumar, volkan, reames, bogner
Reviewed By: bogner
Subscribers: bogner, llvm-commits, kristof.beyls
Differential Revision: https://reviews.llvm.org/D42244
llvm-svn: 323342
2018-01-25 01:17:46 +08:00
|
|
|
return lower(MI, Step.TypeIdx, Step.NewType);
|
2018-01-30 01:37:29 +08:00
|
|
|
case FewerElements:
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
|
[globalisel] Introduce LegalityQuery to better encapsulate the legalizer decisions. NFC.
Summary:
`getAction(const InstrAspect &) const` breaks encapsulation by exposing
the smaller components that are used to decide how to legalize an
instruction.
This is a problem because we need to change the implementation of
LegalizerInfo so that it's able to describe particular type combinations
rather than just cartesian products of types.
For example, declaring the following
setAction({..., 0, s32}, Legal)
setAction({..., 0, s64}, Legal)
setAction({..., 1, s32}, Legal)
setAction({..., 1, s64}, Legal)
currently declares these type combinations as legal:
{s32, s32}
{s64, s32}
{s32, s64}
{s64, s64}
but we currently have no means to say that, for example, {s64, s32} is
not legal. Some operations such as G_INSERT/G_EXTRACT/G_MERGE_VALUES/
G_UNMERGE_VALUES has relationships between the types that are currently
described incorrectly.
Additionally, G_LOAD/G_STORE currently have no means to legalize non-atomics
differently to atomics. The necessary information is in the MMO but we have no
way to use this in the legalizer. Similarly, there is currently no way for the
register type and the memory type to differ so there is no way to cleanly
represent extending-load/truncating-store in a way that can't be broken by
optimizers (resulting in illegal MIR).
This patch introduces LegalityQuery which provides all the information
needed by the legalizer to make a decision on whether something is legal
and how to legalize it.
Reviewers: ab, t.p.northover, qcolombet, rovka, aditya_nandakumar, volkan, reames, bogner
Reviewed By: bogner
Subscribers: bogner, llvm-commits, kristof.beyls
Differential Revision: https://reviews.llvm.org/D42244
llvm-svn: 323342
2018-01-25 01:17:46 +08:00
|
|
|
return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
|
2019-02-12 06:00:39 +08:00
|
|
|
case MoreElements:
|
|
|
|
LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
|
|
|
|
return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
|
2018-01-30 01:37:29 +08:00
|
|
|
case Custom:
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
|
2018-12-06 04:14:52 +08:00
|
|
|
return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized
|
|
|
|
: UnableToLegalize;
|
2016-07-23 04:03:43 +08:00
|
|
|
default:
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
|
2016-07-23 04:03:43 +08:00
|
|
|
return UnableToLegalize;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-06-24 23:50:29 +08:00
|
|
|
void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts,
|
|
|
|
SmallVectorImpl<Register> &VRegs) {
|
2017-03-04 06:46:09 +08:00
|
|
|
for (int i = 0; i < NumParts; ++i)
|
2016-09-09 19:46:34 +08:00
|
|
|
VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
|
2017-03-04 06:46:09 +08:00
|
|
|
MIRBuilder.buildUnmerge(VRegs, Reg);
|
2016-07-23 04:03:43 +08:00
|
|
|
}
|
|
|
|
|
2019-06-24 23:50:29 +08:00
|
|
|
bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
|
2019-01-31 10:46:05 +08:00
|
|
|
LLT MainTy, LLT &LeftoverTy,
|
2019-06-24 23:50:29 +08:00
|
|
|
SmallVectorImpl<Register> &VRegs,
|
|
|
|
SmallVectorImpl<Register> &LeftoverRegs) {
|
2019-01-31 10:46:05 +08:00
|
|
|
assert(!LeftoverTy.isValid() && "this is an out argument");
|
|
|
|
|
|
|
|
unsigned RegSize = RegTy.getSizeInBits();
|
|
|
|
unsigned MainSize = MainTy.getSizeInBits();
|
|
|
|
unsigned NumParts = RegSize / MainSize;
|
|
|
|
unsigned LeftoverSize = RegSize - NumParts * MainSize;
|
|
|
|
|
|
|
|
// Use an unmerge when possible.
|
|
|
|
if (LeftoverSize == 0) {
|
|
|
|
for (unsigned I = 0; I < NumParts; ++I)
|
|
|
|
VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
|
|
|
|
MIRBuilder.buildUnmerge(VRegs, Reg);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (MainTy.isVector()) {
|
|
|
|
unsigned EltSize = MainTy.getScalarSizeInBits();
|
|
|
|
if (LeftoverSize % EltSize != 0)
|
|
|
|
return false;
|
|
|
|
LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
|
|
|
|
} else {
|
|
|
|
LeftoverTy = LLT::scalar(LeftoverSize);
|
|
|
|
}
|
|
|
|
|
|
|
|
// For irregular sizes, extract the individual parts.
|
|
|
|
for (unsigned I = 0; I != NumParts; ++I) {
|
2019-06-24 23:50:29 +08:00
|
|
|
Register NewReg = MRI.createGenericVirtualRegister(MainTy);
|
2019-01-31 10:46:05 +08:00
|
|
|
VRegs.push_back(NewReg);
|
|
|
|
MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
|
|
|
|
Offset += LeftoverSize) {
|
2019-06-24 23:50:29 +08:00
|
|
|
Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
|
2019-01-31 10:46:05 +08:00
|
|
|
LeftoverRegs.push_back(NewReg);
|
|
|
|
MIRBuilder.buildExtract(NewReg, Reg, Offset);
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-06-24 23:50:29 +08:00
|
|
|
void LegalizerHelper::insertParts(Register DstReg,
|
2019-01-31 10:46:05 +08:00
|
|
|
LLT ResultTy, LLT PartTy,
|
2019-06-24 23:50:29 +08:00
|
|
|
ArrayRef<Register> PartRegs,
|
2019-01-31 10:46:05 +08:00
|
|
|
LLT LeftoverTy,
|
2019-06-24 23:50:29 +08:00
|
|
|
ArrayRef<Register> LeftoverRegs) {
|
2019-01-31 10:46:05 +08:00
|
|
|
if (!LeftoverTy.isValid()) {
|
|
|
|
assert(LeftoverRegs.empty());
|
|
|
|
|
2019-02-05 08:13:44 +08:00
|
|
|
if (!ResultTy.isVector()) {
|
|
|
|
MIRBuilder.buildMerge(DstReg, PartRegs);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2019-01-31 10:46:05 +08:00
|
|
|
if (PartTy.isVector())
|
|
|
|
MIRBuilder.buildConcatVectors(DstReg, PartRegs);
|
|
|
|
else
|
|
|
|
MIRBuilder.buildBuildVector(DstReg, PartRegs);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned PartSize = PartTy.getSizeInBits();
|
|
|
|
unsigned LeftoverPartSize = LeftoverTy.getSizeInBits();
|
|
|
|
|
2019-06-28 09:47:44 +08:00
|
|
|
Register CurResultReg = MRI.createGenericVirtualRegister(ResultTy);
|
2019-01-31 10:46:05 +08:00
|
|
|
MIRBuilder.buildUndef(CurResultReg);
|
|
|
|
|
|
|
|
unsigned Offset = 0;
|
2019-06-28 09:47:44 +08:00
|
|
|
for (Register PartReg : PartRegs) {
|
|
|
|
Register NewResultReg = MRI.createGenericVirtualRegister(ResultTy);
|
2019-01-31 10:46:05 +08:00
|
|
|
MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset);
|
|
|
|
CurResultReg = NewResultReg;
|
|
|
|
Offset += PartSize;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) {
|
|
|
|
// Use the original output register for the final insert to avoid a copy.
|
2019-06-28 09:47:44 +08:00
|
|
|
Register NewResultReg = (I + 1 == E) ?
|
2019-01-31 10:46:05 +08:00
|
|
|
DstReg : MRI.createGenericVirtualRegister(ResultTy);
|
|
|
|
|
|
|
|
MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset);
|
|
|
|
CurResultReg = NewResultReg;
|
|
|
|
Offset += LeftoverPartSize;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-01-10 23:41:29 +08:00
|
|
|
/// Return the result registers of G_UNMERGE_VALUES \p MI in \p Regs
|
|
|
|
static void getUnmergeResults(SmallVectorImpl<Register> &Regs,
|
|
|
|
const MachineInstr &MI) {
|
|
|
|
assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
|
|
|
|
|
|
|
|
const int NumResults = MI.getNumOperands() - 1;
|
|
|
|
Regs.resize(NumResults);
|
|
|
|
for (int I = 0; I != NumResults; ++I)
|
|
|
|
Regs[I] = MI.getOperand(I).getReg();
|
|
|
|
}
|
|
|
|
|
|
|
|
LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
|
|
|
|
LLT NarrowTy, Register SrcReg) {
|
|
|
|
LLT SrcTy = MRI.getType(SrcReg);
|
|
|
|
|
2020-01-12 08:05:06 +08:00
|
|
|
LLT GCDTy = getGCDType(DstTy, getGCDType(SrcTy, NarrowTy));
|
2020-01-10 23:41:29 +08:00
|
|
|
if (SrcTy == GCDTy) {
|
|
|
|
// If the source already evenly divides the result type, we don't need to do
|
|
|
|
// anything.
|
|
|
|
Parts.push_back(SrcReg);
|
|
|
|
} else {
|
|
|
|
// Need to split into common type sized pieces.
|
|
|
|
auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
|
|
|
|
getUnmergeResults(Parts, *Unmerge);
|
|
|
|
}
|
|
|
|
|
|
|
|
return GCDTy;
|
|
|
|
}
|
|
|
|
|
2020-01-12 08:05:06 +08:00
|
|
|
LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
|
|
|
|
SmallVectorImpl<Register> &VRegs,
|
|
|
|
unsigned PadStrategy) {
|
2020-01-10 23:41:29 +08:00
|
|
|
LLT LCMTy = getLCMType(DstTy, NarrowTy);
|
|
|
|
|
|
|
|
int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
|
|
|
|
int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
|
|
|
|
int NumOrigSrc = VRegs.size();
|
|
|
|
|
|
|
|
Register PadReg;
|
|
|
|
|
|
|
|
// Get a value we can use to pad the source value if the sources won't evenly
|
|
|
|
// cover the result type.
|
|
|
|
if (NumOrigSrc < NumParts * NumSubParts) {
|
|
|
|
if (PadStrategy == TargetOpcode::G_ZEXT)
|
|
|
|
PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
|
|
|
|
else if (PadStrategy == TargetOpcode::G_ANYEXT)
|
|
|
|
PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
|
|
|
|
else {
|
|
|
|
assert(PadStrategy == TargetOpcode::G_SEXT);
|
|
|
|
|
|
|
|
// Shift the sign bit of the low register through the high register.
|
|
|
|
auto ShiftAmt =
|
|
|
|
MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1);
|
|
|
|
PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Registers for the final merge to be produced.
|
2020-02-04 23:34:22 +08:00
|
|
|
SmallVector<Register, 4> Remerge(NumParts);
|
2020-01-10 23:41:29 +08:00
|
|
|
|
|
|
|
// Registers needed for intermediate merges, which will be merged into a
|
|
|
|
// source for Remerge.
|
2020-02-04 23:34:22 +08:00
|
|
|
SmallVector<Register, 4> SubMerge(NumSubParts);
|
2020-01-10 23:41:29 +08:00
|
|
|
|
|
|
|
// Once we've fully read off the end of the original source bits, we can reuse
|
|
|
|
// the same high bits for remaining padding elements.
|
|
|
|
Register AllPadReg;
|
|
|
|
|
|
|
|
// Build merges to the LCM type to cover the original result type.
|
|
|
|
for (int I = 0; I != NumParts; ++I) {
|
|
|
|
bool AllMergePartsArePadding = true;
|
|
|
|
|
|
|
|
// Build the requested merges to the requested type.
|
|
|
|
for (int J = 0; J != NumSubParts; ++J) {
|
|
|
|
int Idx = I * NumSubParts + J;
|
|
|
|
if (Idx >= NumOrigSrc) {
|
|
|
|
SubMerge[J] = PadReg;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
SubMerge[J] = VRegs[Idx];
|
|
|
|
|
|
|
|
// There are meaningful bits here we can't reuse later.
|
|
|
|
AllMergePartsArePadding = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// If we've filled up a complete piece with padding bits, we can directly
|
|
|
|
// emit the natural sized constant if applicable, rather than a merge of
|
|
|
|
// smaller constants.
|
|
|
|
if (AllMergePartsArePadding && !AllPadReg) {
|
|
|
|
if (PadStrategy == TargetOpcode::G_ANYEXT)
|
|
|
|
AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
|
|
|
|
else if (PadStrategy == TargetOpcode::G_ZEXT)
|
|
|
|
AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
|
|
|
|
|
|
|
|
// If this is a sign extension, we can't materialize a trivial constant
|
|
|
|
// with the right type and have to produce a merge.
|
|
|
|
}
|
|
|
|
|
|
|
|
if (AllPadReg) {
|
|
|
|
// Avoid creating additional instructions if we're just adding additional
|
|
|
|
// copies of padding bits.
|
|
|
|
Remerge[I] = AllPadReg;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (NumSubParts == 1)
|
|
|
|
Remerge[I] = SubMerge[0];
|
|
|
|
else
|
|
|
|
Remerge[I] = MIRBuilder.buildMerge(NarrowTy, SubMerge).getReg(0);
|
|
|
|
|
|
|
|
// In the sign extend padding case, re-use the first all-signbit merge.
|
|
|
|
if (AllMergePartsArePadding && !AllPadReg)
|
|
|
|
AllPadReg = Remerge[I];
|
|
|
|
}
|
|
|
|
|
2020-01-12 08:05:06 +08:00
|
|
|
VRegs = std::move(Remerge);
|
|
|
|
return LCMTy;
|
|
|
|
}
|
|
|
|
|
|
|
|
void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
|
|
|
|
ArrayRef<Register> RemergeRegs) {
|
|
|
|
LLT DstTy = MRI.getType(DstReg);
|
|
|
|
|
2020-01-10 23:41:29 +08:00
|
|
|
// Create the merge to the widened source, and extract the relevant bits into
|
|
|
|
// the result.
|
2020-01-12 08:05:06 +08:00
|
|
|
|
|
|
|
if (DstTy == LCMTy) {
|
|
|
|
MIRBuilder.buildMerge(DstReg, RemergeRegs);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto Remerge = MIRBuilder.buildMerge(LCMTy, RemergeRegs);
|
|
|
|
if (DstTy.isScalar() && LCMTy.isScalar()) {
|
|
|
|
MIRBuilder.buildTrunc(DstReg, Remerge);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (LCMTy.isVector()) {
|
|
|
|
MIRBuilder.buildExtract(DstReg, Remerge, 0);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
llvm_unreachable("unhandled case");
|
2020-01-10 23:41:29 +08:00
|
|
|
}
|
|
|
|
|
2017-02-09 07:23:39 +08:00
|
|
|
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
|
|
|
|
switch (Opcode) {
|
2017-04-24 15:22:31 +08:00
|
|
|
case TargetOpcode::G_SDIV:
|
2019-09-04 05:42:32 +08:00
|
|
|
assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
|
|
|
|
switch (Size) {
|
|
|
|
case 32:
|
|
|
|
return RTLIB::SDIV_I32;
|
|
|
|
case 64:
|
|
|
|
return RTLIB::SDIV_I64;
|
|
|
|
case 128:
|
|
|
|
return RTLIB::SDIV_I128;
|
|
|
|
default:
|
|
|
|
llvm_unreachable("unexpected size");
|
|
|
|
}
|
2017-04-24 15:22:31 +08:00
|
|
|
case TargetOpcode::G_UDIV:
|
2019-09-04 05:42:32 +08:00
|
|
|
assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
|
|
|
|
switch (Size) {
|
|
|
|
case 32:
|
|
|
|
return RTLIB::UDIV_I32;
|
|
|
|
case 64:
|
|
|
|
return RTLIB::UDIV_I64;
|
|
|
|
case 128:
|
|
|
|
return RTLIB::UDIV_I128;
|
|
|
|
default:
|
|
|
|
llvm_unreachable("unexpected size");
|
|
|
|
}
|
2017-06-15 18:53:31 +08:00
|
|
|
case TargetOpcode::G_SREM:
|
2018-12-18 23:59:51 +08:00
|
|
|
assert((Size == 32 || Size == 64) && "Unsupported size");
|
|
|
|
return Size == 64 ? RTLIB::SREM_I64 : RTLIB::SREM_I32;
|
2017-06-15 18:53:31 +08:00
|
|
|
case TargetOpcode::G_UREM:
|
2018-12-18 23:59:51 +08:00
|
|
|
assert((Size == 32 || Size == 64) && "Unsupported size");
|
|
|
|
return Size == 64 ? RTLIB::UREM_I64 : RTLIB::UREM_I32;
|
2018-11-26 19:07:02 +08:00
|
|
|
case TargetOpcode::G_CTLZ_ZERO_UNDEF:
|
|
|
|
assert(Size == 32 && "Unsupported size");
|
|
|
|
return RTLIB::CTLZ_I32;
|
2017-04-11 18:52:34 +08:00
|
|
|
case TargetOpcode::G_FADD:
|
|
|
|
assert((Size == 32 || Size == 64) && "Unsupported size");
|
|
|
|
return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32;
|
2017-10-30 21:51:56 +08:00
|
|
|
case TargetOpcode::G_FSUB:
|
|
|
|
assert((Size == 32 || Size == 64) && "Unsupported size");
|
|
|
|
return Size == 64 ? RTLIB::SUB_F64 : RTLIB::SUB_F32;
|
2017-11-23 20:44:20 +08:00
|
|
|
case TargetOpcode::G_FMUL:
|
|
|
|
assert((Size == 32 || Size == 64) && "Unsupported size");
|
|
|
|
return Size == 64 ? RTLIB::MUL_F64 : RTLIB::MUL_F32;
|
2017-11-23 21:26:07 +08:00
|
|
|
case TargetOpcode::G_FDIV:
|
|
|
|
assert((Size == 32 || Size == 64) && "Unsupported size");
|
|
|
|
return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32;
|
2019-01-31 07:46:15 +08:00
|
|
|
case TargetOpcode::G_FEXP:
|
|
|
|
assert((Size == 32 || Size == 64) && "Unsupported size");
|
|
|
|
return Size == 64 ? RTLIB::EXP_F64 : RTLIB::EXP_F32;
|
2019-04-04 00:58:32 +08:00
|
|
|
case TargetOpcode::G_FEXP2:
|
|
|
|
assert((Size == 32 || Size == 64) && "Unsupported size");
|
|
|
|
return Size == 64 ? RTLIB::EXP2_F64 : RTLIB::EXP2_F32;
|
2017-02-09 07:23:39 +08:00
|
|
|
case TargetOpcode::G_FREM:
|
|
|
|
return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32;
|
|
|
|
case TargetOpcode::G_FPOW:
|
|
|
|
return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32;
|
2018-01-12 19:30:45 +08:00
|
|
|
case TargetOpcode::G_FMA:
|
|
|
|
assert((Size == 32 || Size == 64) && "Unsupported size");
|
|
|
|
return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32;
|
2019-01-29 02:34:18 +08:00
|
|
|
case TargetOpcode::G_FSIN:
|
|
|
|
assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
|
|
|
|
return Size == 128 ? RTLIB::SIN_F128
|
|
|
|
: Size == 64 ? RTLIB::SIN_F64 : RTLIB::SIN_F32;
|
|
|
|
case TargetOpcode::G_FCOS:
|
|
|
|
assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
|
|
|
|
return Size == 128 ? RTLIB::COS_F128
|
|
|
|
: Size == 64 ? RTLIB::COS_F64 : RTLIB::COS_F32;
|
2019-01-29 03:53:14 +08:00
|
|
|
case TargetOpcode::G_FLOG10:
|
|
|
|
assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
|
|
|
|
return Size == 128 ? RTLIB::LOG10_F128
|
|
|
|
: Size == 64 ? RTLIB::LOG10_F64 : RTLIB::LOG10_F32;
|
2019-01-29 05:27:23 +08:00
|
|
|
case TargetOpcode::G_FLOG:
|
|
|
|
assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
|
|
|
|
return Size == 128 ? RTLIB::LOG_F128
|
|
|
|
: Size == 64 ? RTLIB::LOG_F64 : RTLIB::LOG_F32;
|
2019-01-31 05:16:04 +08:00
|
|
|
case TargetOpcode::G_FLOG2:
|
|
|
|
assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
|
|
|
|
return Size == 128 ? RTLIB::LOG2_F128
|
|
|
|
: Size == 64 ? RTLIB::LOG2_F64 : RTLIB::LOG2_F32;
|
2019-06-06 17:02:24 +08:00
|
|
|
case TargetOpcode::G_FCEIL:
|
|
|
|
assert((Size == 32 || Size == 64) && "Unsupported size");
|
|
|
|
return Size == 64 ? RTLIB::CEIL_F64 : RTLIB::CEIL_F32;
|
|
|
|
case TargetOpcode::G_FFLOOR:
|
|
|
|
assert((Size == 32 || Size == 64) && "Unsupported size");
|
|
|
|
return Size == 64 ? RTLIB::FLOOR_F64 : RTLIB::FLOOR_F32;
|
2017-02-09 07:23:39 +08:00
|
|
|
}
|
|
|
|
llvm_unreachable("Unknown libcall function");
|
|
|
|
}
|
|
|
|
|
2019-09-14 04:25:58 +08:00
|
|
|
/// True if an instruction is in tail position in its caller. Intended for
|
|
|
|
/// legalizing libcalls as tail calls when possible.
|
|
|
|
static bool isLibCallInTailPosition(MachineInstr &MI) {
|
|
|
|
const Function &F = MI.getParent()->getParent()->getFunction();
|
|
|
|
|
|
|
|
// Conservatively require the attributes of the call to match those of
|
|
|
|
// the return. Ignore NoAlias and NonNull because they don't affect the
|
|
|
|
// call sequence.
|
|
|
|
AttributeList CallerAttrs = F.getAttributes();
|
|
|
|
if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
|
|
|
|
.removeAttribute(Attribute::NoAlias)
|
|
|
|
.removeAttribute(Attribute::NonNull)
|
|
|
|
.hasAttributes())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// It's not safe to eliminate the sign / zero extension of the return value.
|
|
|
|
if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
|
|
|
|
CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Only tail call if the following instruction is a standard return.
|
|
|
|
auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
|
|
|
|
MachineInstr *Next = MI.getNextNode();
|
|
|
|
if (!Next || TII.isTailCall(*Next) || !Next->isReturn())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2017-07-05 20:57:24 +08:00
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
|
|
|
|
const CallLowering::ArgInfo &Result,
|
|
|
|
ArrayRef<CallLowering::ArgInfo> Args) {
|
2017-04-24 15:22:31 +08:00
|
|
|
auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
|
|
|
|
auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
|
|
|
|
const char *Name = TLI.getLibcallName(Libcall);
|
2017-07-06 17:09:33 +08:00
|
|
|
|
2019-08-09 16:26:38 +08:00
|
|
|
CallLowering::CallLoweringInfo Info;
|
|
|
|
Info.CallConv = TLI.getLibcallCallingConv(Libcall);
|
|
|
|
Info.Callee = MachineOperand::CreateES(Name);
|
|
|
|
Info.OrigRet = Result;
|
|
|
|
std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
|
|
|
|
if (!CLI.lowerCall(MIRBuilder, Info))
|
2017-06-15 18:53:31 +08:00
|
|
|
return LegalizerHelper::UnableToLegalize;
|
2017-07-06 17:09:33 +08:00
|
|
|
|
2017-04-24 15:22:31 +08:00
|
|
|
return LegalizerHelper::Legalized;
|
|
|
|
}
|
|
|
|
|
2018-01-17 21:34:10 +08:00
|
|
|
// Useful for libcalls where all operands have the same type.
|
2017-06-15 18:53:31 +08:00
|
|
|
static LegalizerHelper::LegalizeResult
|
|
|
|
simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
|
|
|
|
Type *OpType) {
|
|
|
|
auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
|
2018-01-12 19:30:45 +08:00
|
|
|
|
|
|
|
SmallVector<CallLowering::ArgInfo, 3> Args;
|
|
|
|
for (unsigned i = 1; i < MI.getNumOperands(); i++)
|
|
|
|
Args.push_back({MI.getOperand(i).getReg(), OpType});
|
2017-07-05 20:57:24 +08:00
|
|
|
return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType},
|
2018-01-12 19:30:45 +08:00
|
|
|
Args);
|
2017-06-15 18:53:31 +08:00
|
|
|
}
|
|
|
|
|
2019-07-19 08:24:45 +08:00
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
|
|
|
|
MachineInstr &MI) {
|
|
|
|
assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
|
|
|
|
auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
|
|
|
|
|
|
|
|
SmallVector<CallLowering::ArgInfo, 3> Args;
|
2019-09-28 13:33:21 +08:00
|
|
|
// Add all the args, except for the last which is an imm denoting 'tail'.
|
|
|
|
for (unsigned i = 1; i < MI.getNumOperands() - 1; i++) {
|
2019-07-19 08:24:45 +08:00
|
|
|
Register Reg = MI.getOperand(i).getReg();
|
|
|
|
|
|
|
|
// Need derive an IR type for call lowering.
|
|
|
|
LLT OpLLT = MRI.getType(Reg);
|
|
|
|
Type *OpTy = nullptr;
|
|
|
|
if (OpLLT.isPointer())
|
|
|
|
OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace());
|
|
|
|
else
|
|
|
|
OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
|
|
|
|
Args.push_back({Reg, OpTy});
|
|
|
|
}
|
|
|
|
|
|
|
|
auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
|
|
|
|
auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
|
|
|
|
Intrinsic::ID ID = MI.getOperand(0).getIntrinsicID();
|
|
|
|
RTLIB::Libcall RTLibcall;
|
|
|
|
switch (ID) {
|
|
|
|
case Intrinsic::memcpy:
|
|
|
|
RTLibcall = RTLIB::MEMCPY;
|
|
|
|
break;
|
|
|
|
case Intrinsic::memset:
|
|
|
|
RTLibcall = RTLIB::MEMSET;
|
|
|
|
break;
|
|
|
|
case Intrinsic::memmove:
|
|
|
|
RTLibcall = RTLIB::MEMMOVE;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
return LegalizerHelper::UnableToLegalize;
|
|
|
|
}
|
|
|
|
const char *Name = TLI.getLibcallName(RTLibcall);
|
|
|
|
|
|
|
|
MIRBuilder.setInstr(MI);
|
2019-08-09 16:26:38 +08:00
|
|
|
|
|
|
|
CallLowering::CallLoweringInfo Info;
|
|
|
|
Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
|
|
|
|
Info.Callee = MachineOperand::CreateES(Name);
|
|
|
|
Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx));
|
2019-09-28 13:33:21 +08:00
|
|
|
Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() == 1 &&
|
|
|
|
isLibCallInTailPosition(MI);
|
2019-09-14 04:25:58 +08:00
|
|
|
|
2019-08-09 16:26:38 +08:00
|
|
|
std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
|
|
|
|
if (!CLI.lowerCall(MIRBuilder, Info))
|
2019-07-19 08:24:45 +08:00
|
|
|
return LegalizerHelper::UnableToLegalize;
|
|
|
|
|
2019-09-14 04:25:58 +08:00
|
|
|
if (Info.LoweredTailCall) {
|
|
|
|
assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
|
|
|
|
// We must have a return following the call to get past
|
|
|
|
// isLibCallInTailPosition.
|
|
|
|
assert(MI.getNextNode() && MI.getNextNode()->isReturn() &&
|
|
|
|
"Expected instr following MI to be a return?");
|
|
|
|
|
|
|
|
// We lowered a tail call, so the call is now the return from the block.
|
|
|
|
// Delete the old return.
|
|
|
|
MI.getNextNode()->eraseFromParent();
|
|
|
|
}
|
|
|
|
|
2019-07-19 08:24:45 +08:00
|
|
|
return LegalizerHelper::Legalized;
|
|
|
|
}
|
|
|
|
|
2018-01-17 21:34:10 +08:00
|
|
|
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
|
|
|
|
Type *FromType) {
|
|
|
|
auto ToMVT = MVT::getVT(ToType);
|
|
|
|
auto FromMVT = MVT::getVT(FromType);
|
|
|
|
|
|
|
|
switch (Opcode) {
|
|
|
|
case TargetOpcode::G_FPEXT:
|
|
|
|
return RTLIB::getFPEXT(FromMVT, ToMVT);
|
|
|
|
case TargetOpcode::G_FPTRUNC:
|
|
|
|
return RTLIB::getFPROUND(FromMVT, ToMVT);
|
2018-01-30 15:54:52 +08:00
|
|
|
case TargetOpcode::G_FPTOSI:
|
|
|
|
return RTLIB::getFPTOSINT(FromMVT, ToMVT);
|
|
|
|
case TargetOpcode::G_FPTOUI:
|
|
|
|
return RTLIB::getFPTOUINT(FromMVT, ToMVT);
|
2018-01-30 17:15:17 +08:00
|
|
|
case TargetOpcode::G_SITOFP:
|
|
|
|
return RTLIB::getSINTTOFP(FromMVT, ToMVT);
|
|
|
|
case TargetOpcode::G_UITOFP:
|
|
|
|
return RTLIB::getUINTTOFP(FromMVT, ToMVT);
|
2018-01-17 21:34:10 +08:00
|
|
|
}
|
|
|
|
llvm_unreachable("Unsupported libcall function");
|
|
|
|
}
|
|
|
|
|
|
|
|
static LegalizerHelper::LegalizeResult
|
|
|
|
conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType,
|
|
|
|
Type *FromType) {
|
|
|
|
RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
|
|
|
|
return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType},
|
|
|
|
{{MI.getOperand(1).getReg(), FromType}});
|
|
|
|
}
|
|
|
|
|
2016-10-15 06:18:18 +08:00
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
LegalizerHelper::libcall(MachineInstr &MI) {
|
2017-06-15 18:53:31 +08:00
|
|
|
LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
|
|
|
|
unsigned Size = LLTy.getSizeInBits();
|
2017-12-16 06:22:58 +08:00
|
|
|
auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
|
2016-08-30 03:07:16 +08:00
|
|
|
|
2017-07-05 20:57:24 +08:00
|
|
|
MIRBuilder.setInstr(MI);
|
|
|
|
|
2016-08-30 03:07:16 +08:00
|
|
|
switch (MI.getOpcode()) {
|
|
|
|
default:
|
|
|
|
return UnableToLegalize;
|
2017-04-24 15:22:31 +08:00
|
|
|
case TargetOpcode::G_SDIV:
|
2017-06-15 18:53:31 +08:00
|
|
|
case TargetOpcode::G_UDIV:
|
|
|
|
case TargetOpcode::G_SREM:
|
2018-11-26 19:07:02 +08:00
|
|
|
case TargetOpcode::G_UREM:
|
|
|
|
case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
|
2018-12-18 23:59:51 +08:00
|
|
|
Type *HLTy = IntegerType::get(Ctx, Size);
|
2017-07-05 20:57:24 +08:00
|
|
|
auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
|
|
|
|
if (Status != Legalized)
|
|
|
|
return Status;
|
|
|
|
break;
|
2017-04-24 15:22:31 +08:00
|
|
|
}
|
2017-04-11 18:52:34 +08:00
|
|
|
case TargetOpcode::G_FADD:
|
2017-10-30 21:51:56 +08:00
|
|
|
case TargetOpcode::G_FSUB:
|
2017-11-23 20:44:20 +08:00
|
|
|
case TargetOpcode::G_FMUL:
|
2017-11-23 21:26:07 +08:00
|
|
|
case TargetOpcode::G_FDIV:
|
2018-01-12 19:30:45 +08:00
|
|
|
case TargetOpcode::G_FMA:
|
2017-02-09 07:23:39 +08:00
|
|
|
case TargetOpcode::G_FPOW:
|
2019-01-29 02:34:18 +08:00
|
|
|
case TargetOpcode::G_FREM:
|
|
|
|
case TargetOpcode::G_FCOS:
|
2019-01-29 03:53:14 +08:00
|
|
|
case TargetOpcode::G_FSIN:
|
2019-01-29 05:27:23 +08:00
|
|
|
case TargetOpcode::G_FLOG10:
|
2019-01-31 05:16:04 +08:00
|
|
|
case TargetOpcode::G_FLOG:
|
2019-01-31 07:46:15 +08:00
|
|
|
case TargetOpcode::G_FLOG2:
|
2019-04-04 00:58:32 +08:00
|
|
|
case TargetOpcode::G_FEXP:
|
2019-06-06 17:02:24 +08:00
|
|
|
case TargetOpcode::G_FEXP2:
|
|
|
|
case TargetOpcode::G_FCEIL:
|
|
|
|
case TargetOpcode::G_FFLOOR: {
|
2020-02-07 02:01:57 +08:00
|
|
|
Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
|
|
|
|
if (!HLTy || (Size != 32 && Size != 64)) {
|
|
|
|
LLVM_DEBUG(dbgs() << "No libcall available for size " << Size << ".\n");
|
2019-01-29 02:34:18 +08:00
|
|
|
return UnableToLegalize;
|
|
|
|
}
|
2017-07-05 20:57:24 +08:00
|
|
|
auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
|
|
|
|
if (Status != Legalized)
|
|
|
|
return Status;
|
|
|
|
break;
|
2016-08-30 03:07:16 +08:00
|
|
|
}
|
2020-02-07 02:01:57 +08:00
|
|
|
case TargetOpcode::G_FPEXT:
|
2018-01-17 21:34:10 +08:00
|
|
|
case TargetOpcode::G_FPTRUNC: {
|
2020-02-07 02:01:57 +08:00
|
|
|
Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
|
|
|
|
Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
|
|
|
|
if (!FromTy || !ToTy)
|
2018-01-17 21:34:10 +08:00
|
|
|
return UnableToLegalize;
|
2020-02-07 02:01:57 +08:00
|
|
|
LegalizeResult Status = conversionLibcall(MI, MIRBuilder, ToTy, FromTy );
|
2018-01-17 21:34:10 +08:00
|
|
|
if (Status != Legalized)
|
|
|
|
return Status;
|
|
|
|
break;
|
|
|
|
}
|
2018-01-30 15:54:52 +08:00
|
|
|
case TargetOpcode::G_FPTOSI:
|
|
|
|
case TargetOpcode::G_FPTOUI: {
|
|
|
|
// FIXME: Support other types
|
|
|
|
unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
|
|
|
|
unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
|
2019-06-20 16:52:53 +08:00
|
|
|
if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
|
2018-01-30 15:54:52 +08:00
|
|
|
return UnableToLegalize;
|
|
|
|
LegalizeResult Status = conversionLibcall(
|
2019-06-20 16:52:53 +08:00
|
|
|
MI, MIRBuilder,
|
|
|
|
ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
|
2018-01-30 15:54:52 +08:00
|
|
|
FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
|
|
|
|
if (Status != Legalized)
|
|
|
|
return Status;
|
|
|
|
break;
|
|
|
|
}
|
2018-01-30 17:15:17 +08:00
|
|
|
case TargetOpcode::G_SITOFP:
|
|
|
|
case TargetOpcode::G_UITOFP: {
|
|
|
|
// FIXME: Support other types
|
|
|
|
unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
|
|
|
|
unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
|
2019-06-20 17:05:02 +08:00
|
|
|
if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
|
2018-01-30 17:15:17 +08:00
|
|
|
return UnableToLegalize;
|
|
|
|
LegalizeResult Status = conversionLibcall(
|
|
|
|
MI, MIRBuilder,
|
|
|
|
ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
|
2019-06-20 17:05:02 +08:00
|
|
|
FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx));
|
2018-01-30 17:15:17 +08:00
|
|
|
if (Status != Legalized)
|
|
|
|
return Status;
|
|
|
|
break;
|
|
|
|
}
|
2016-08-30 03:07:16 +08:00
|
|
|
}
|
2017-07-05 20:57:24 +08:00
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
2016-08-30 03:07:16 +08:00
|
|
|
}
|
|
|
|
|
2016-10-15 06:18:18 +08:00
|
|
|
LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
|
|
|
|
unsigned TypeIdx,
|
|
|
|
LLT NarrowTy) {
|
2017-01-19 01:29:54 +08:00
|
|
|
MIRBuilder.setInstr(MI);
|
|
|
|
|
2018-04-28 03:48:53 +08:00
|
|
|
uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
|
|
|
|
uint64_t NarrowSize = NarrowTy.getSizeInBits();
|
[GlobalISel] Enable legalizing non-power-of-2 sized types.
This changes the interface of how targets describe how to legalize, see
the below description.
1. Interface for targets to describe how to legalize.
In GlobalISel, the API in the LegalizerInfo class is the main interface
for targets to specify which types are legal for which operations, and
what to do to turn illegal type/operation combinations into legal ones.
For each operation the type sizes that can be legalized without having
to change the size of the type are specified with a call to setAction.
This isn't different to how GlobalISel worked before. For example, for a
target that supports 32 and 64 bit adds natively:
for (auto Ty : {s32, s64})
setAction({G_ADD, 0, s32}, Legal);
or for a target that needs a library call for a 32 bit division:
setAction({G_SDIV, s32}, Libcall);
The main conceptual change to the LegalizerInfo API, is in specifying
how to legalize the type sizes for which a change of size is needed. For
example, in the above example, how to specify how all types from i1 to
i8388607 (apart from s32 and s64 which are legal) need to be legalized
and expressed in terms of operations on the available legal sizes
(again, i32 and i64 in this case). Before, the implementation only
allowed specifying power-of-2-sized types (e.g. setAction({G_ADD, 0,
s128}, NarrowScalar). A worse limitation was that if you'd wanted to
specify how to legalize all the sized types as allowed by the LLVM-IR
LangRef, i1 to i8388607, you'd have to call setAction 8388607-3 times
and probably would need a lot of memory to store all of these
specifications.
Instead, the legalization actions that need to change the size of the
type are specified now using a "SizeChangeStrategy". For example:
setLegalizeScalarToDifferentSizeStrategy(
G_ADD, 0, widenToLargerAndNarrowToLargest);
This example indicates that for type sizes for which there is a larger
size that can be legalized towards, do it by Widening the size.
For example, G_ADD on s17 will be legalized by first doing WidenScalar
to make it s32, after which it's legal.
The "NarrowToLargest" indicates what to do if there is no larger size
that can be legalized towards. E.g. G_ADD on s92 will be legalized by
doing NarrowScalar to s64.
Another example, taken from the ARM backend is:
for (unsigned Op : {G_SDIV, G_UDIV}) {
setLegalizeScalarToDifferentSizeStrategy(Op, 0,
widenToLargerTypesUnsupportedOtherwise);
if (ST.hasDivideInARMMode())
setAction({Op, s32}, Legal);
else
setAction({Op, s32}, Libcall);
}
For this example, G_SDIV on s8, on a target without a divide
instruction, would be legalized by first doing action (WidenScalar,
s32), followed by (Libcall, s32).
The same principle is also followed for when the number of vector lanes
on vector data types need to be changed, e.g.:
setAction({G_ADD, LLT::vector(8, 8)}, LegalizerInfo::Legal);
setAction({G_ADD, LLT::vector(16, 8)}, LegalizerInfo::Legal);
setAction({G_ADD, LLT::vector(4, 16)}, LegalizerInfo::Legal);
setAction({G_ADD, LLT::vector(8, 16)}, LegalizerInfo::Legal);
setAction({G_ADD, LLT::vector(2, 32)}, LegalizerInfo::Legal);
setAction({G_ADD, LLT::vector(4, 32)}, LegalizerInfo::Legal);
setLegalizeVectorElementToDifferentSizeStrategy(
G_ADD, 0, widenToLargerTypesUnsupportedOtherwise);
As currently implemented here, vector types are legalized by first
making the vector element size legal, followed by then making the number
of lanes legal. The strategy to follow in the first step is set by a
call to setLegalizeVectorElementToDifferentSizeStrategy, see example
above. The strategy followed in the second step
"moreToWiderTypesAndLessToWidest" (see code for its definition),
indicating that vectors are widened to more elements so they map to
natively supported vector widths, or when there isn't a legal wider
vector, split the vector to map it to the widest vector supported.
Therefore, for the above specification, some example legalizations are:
* getAction({G_ADD, LLT::vector(3, 3)})
returns {WidenScalar, LLT::vector(3, 8)}
* getAction({G_ADD, LLT::vector(3, 8)})
then returns {MoreElements, LLT::vector(8, 8)}
* getAction({G_ADD, LLT::vector(20, 8)})
returns {FewerElements, LLT::vector(16, 8)}
2. Key implementation aspects.
How to legalize a specific (operation, type index, size) tuple is
represented by mapping intervals of integers representing a range of
size types to an action to take, e.g.:
setScalarAction({G_ADD, LLT:scalar(1)},
{{1, WidenScalar}, // bit sizes [ 1, 31[
{32, Legal}, // bit sizes [32, 33[
{33, WidenScalar}, // bit sizes [33, 64[
{64, Legal}, // bit sizes [64, 65[
{65, NarrowScalar} // bit sizes [65, +inf[
});
Please note that most of the code to do the actual lowering of
non-power-of-2 sized types is currently missing, this is just trying to
make it possible for targets to specify what is legal, and how non-legal
types should be legalized. Probably quite a bit of further work is
needed in the actual legalizing and the other passes in GlobalISel to
support non-power-of-2 sized types.
I hope the documentation in LegalizerInfo.h and the examples provided in the
various {Target}LegalizerInfo.cpp and LegalizerInfoTest.cpp explains well
enough how this is meant to be used.
This drops the need for LLT::{half,double}...Size().
Differential Revision: https://reviews.llvm.org/D30529
llvm-svn: 317560
2017-11-07 18:34:34 +08:00
|
|
|
|
2016-08-05 04:54:13 +08:00
|
|
|
switch (MI.getOpcode()) {
|
|
|
|
default:
|
|
|
|
return UnableToLegalize;
|
2017-07-01 04:27:36 +08:00
|
|
|
case TargetOpcode::G_IMPLICIT_DEF: {
|
[GlobalISel] Enable legalizing non-power-of-2 sized types.
This changes the interface of how targets describe how to legalize, see
the below description.
1. Interface for targets to describe how to legalize.
In GlobalISel, the API in the LegalizerInfo class is the main interface
for targets to specify which types are legal for which operations, and
what to do to turn illegal type/operation combinations into legal ones.
For each operation the type sizes that can be legalized without having
to change the size of the type are specified with a call to setAction.
This isn't different to how GlobalISel worked before. For example, for a
target that supports 32 and 64 bit adds natively:
for (auto Ty : {s32, s64})
setAction({G_ADD, 0, s32}, Legal);
or for a target that needs a library call for a 32 bit division:
setAction({G_SDIV, s32}, Libcall);
The main conceptual change to the LegalizerInfo API, is in specifying
how to legalize the type sizes for which a change of size is needed. For
example, in the above example, how to specify how all types from i1 to
i8388607 (apart from s32 and s64 which are legal) need to be legalized
and expressed in terms of operations on the available legal sizes
(again, i32 and i64 in this case). Before, the implementation only
allowed specifying power-of-2-sized types (e.g. setAction({G_ADD, 0,
s128}, NarrowScalar). A worse limitation was that if you'd wanted to
specify how to legalize all the sized types as allowed by the LLVM-IR
LangRef, i1 to i8388607, you'd have to call setAction 8388607-3 times
and probably would need a lot of memory to store all of these
specifications.
Instead, the legalization actions that need to change the size of the
type are specified now using a "SizeChangeStrategy". For example:
setLegalizeScalarToDifferentSizeStrategy(
G_ADD, 0, widenToLargerAndNarrowToLargest);
This example indicates that for type sizes for which there is a larger
size that can be legalized towards, do it by Widening the size.
For example, G_ADD on s17 will be legalized by first doing WidenScalar
to make it s32, after which it's legal.
The "NarrowToLargest" indicates what to do if there is no larger size
that can be legalized towards. E.g. G_ADD on s92 will be legalized by
doing NarrowScalar to s64.
Another example, taken from the ARM backend is:
for (unsigned Op : {G_SDIV, G_UDIV}) {
setLegalizeScalarToDifferentSizeStrategy(Op, 0,
widenToLargerTypesUnsupportedOtherwise);
if (ST.hasDivideInARMMode())
setAction({Op, s32}, Legal);
else
setAction({Op, s32}, Libcall);
}
For this example, G_SDIV on s8, on a target without a divide
instruction, would be legalized by first doing action (WidenScalar,
s32), followed by (Libcall, s32).
The same principle is also followed for when the number of vector lanes
on vector data types need to be changed, e.g.:
setAction({G_ADD, LLT::vector(8, 8)}, LegalizerInfo::Legal);
setAction({G_ADD, LLT::vector(16, 8)}, LegalizerInfo::Legal);
setAction({G_ADD, LLT::vector(4, 16)}, LegalizerInfo::Legal);
setAction({G_ADD, LLT::vector(8, 16)}, LegalizerInfo::Legal);
setAction({G_ADD, LLT::vector(2, 32)}, LegalizerInfo::Legal);
setAction({G_ADD, LLT::vector(4, 32)}, LegalizerInfo::Legal);
setLegalizeVectorElementToDifferentSizeStrategy(
G_ADD, 0, widenToLargerTypesUnsupportedOtherwise);
As currently implemented here, vector types are legalized by first
making the vector element size legal, followed by then making the number
of lanes legal. The strategy to follow in the first step is set by a
call to setLegalizeVectorElementToDifferentSizeStrategy, see example
above. The strategy followed in the second step
"moreToWiderTypesAndLessToWidest" (see code for its definition),
indicating that vectors are widened to more elements so they map to
natively supported vector widths, or when there isn't a legal wider
vector, split the vector to map it to the widest vector supported.
Therefore, for the above specification, some example legalizations are:
* getAction({G_ADD, LLT::vector(3, 3)})
returns {WidenScalar, LLT::vector(3, 8)}
* getAction({G_ADD, LLT::vector(3, 8)})
then returns {MoreElements, LLT::vector(8, 8)}
* getAction({G_ADD, LLT::vector(20, 8)})
returns {FewerElements, LLT::vector(16, 8)}
2. Key implementation aspects.
How to legalize a specific (operation, type index, size) tuple is
represented by mapping intervals of integers representing a range of
size types to an action to take, e.g.:
setScalarAction({G_ADD, LLT:scalar(1)},
{{1, WidenScalar}, // bit sizes [ 1, 31[
{32, Legal}, // bit sizes [32, 33[
{33, WidenScalar}, // bit sizes [33, 64[
{64, Legal}, // bit sizes [64, 65[
{65, NarrowScalar} // bit sizes [65, +inf[
});
Please note that most of the code to do the actual lowering of
non-power-of-2 sized types is currently missing, this is just trying to
make it possible for targets to specify what is legal, and how non-legal
types should be legalized. Probably quite a bit of further work is
needed in the actual legalizing and the other passes in GlobalISel to
support non-power-of-2 sized types.
I hope the documentation in LegalizerInfo.h and the examples provided in the
various {Target}LegalizerInfo.cpp and LegalizerInfoTest.cpp explains well
enough how this is meant to be used.
This drops the need for LLT::{half,double}...Size().
Differential Revision: https://reviews.llvm.org/D30529
llvm-svn: 317560
2017-11-07 18:34:34 +08:00
|
|
|
// FIXME: add support for when SizeOp0 isn't an exact multiple of
|
|
|
|
// NarrowSize.
|
|
|
|
if (SizeOp0 % NarrowSize != 0)
|
|
|
|
return UnableToLegalize;
|
|
|
|
int NumParts = SizeOp0 / NarrowSize;
|
2017-07-01 04:27:36 +08:00
|
|
|
|
2019-06-24 23:50:29 +08:00
|
|
|
SmallVector<Register, 2> DstRegs;
|
2018-02-15 03:58:36 +08:00
|
|
|
for (int i = 0; i < NumParts; ++i)
|
|
|
|
DstRegs.push_back(
|
2020-01-23 19:51:35 +08:00
|
|
|
MIRBuilder.buildUndef(NarrowTy).getReg(0));
|
2018-12-11 02:44:58 +08:00
|
|
|
|
2019-06-28 09:47:44 +08:00
|
|
|
Register DstReg = MI.getOperand(0).getReg();
|
2018-12-11 02:44:58 +08:00
|
|
|
if(MRI.getType(DstReg).isVector())
|
|
|
|
MIRBuilder.buildBuildVector(DstReg, DstRegs);
|
|
|
|
else
|
|
|
|
MIRBuilder.buildMerge(DstReg, DstRegs);
|
2017-07-01 04:27:36 +08:00
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
2019-04-11 01:27:53 +08:00
|
|
|
case TargetOpcode::G_CONSTANT: {
|
|
|
|
LLT Ty = MRI.getType(MI.getOperand(0).getReg());
|
|
|
|
const APInt &Val = MI.getOperand(1).getCImm()->getValue();
|
|
|
|
unsigned TotalSize = Ty.getSizeInBits();
|
|
|
|
unsigned NarrowSize = NarrowTy.getSizeInBits();
|
|
|
|
int NumParts = TotalSize / NarrowSize;
|
|
|
|
|
2019-06-24 23:50:29 +08:00
|
|
|
SmallVector<Register, 4> PartRegs;
|
2019-04-11 01:27:53 +08:00
|
|
|
for (int I = 0; I != NumParts; ++I) {
|
|
|
|
unsigned Offset = I * NarrowSize;
|
|
|
|
auto K = MIRBuilder.buildConstant(NarrowTy,
|
|
|
|
Val.lshr(Offset).trunc(NarrowSize));
|
|
|
|
PartRegs.push_back(K.getReg(0));
|
|
|
|
}
|
|
|
|
|
|
|
|
LLT LeftoverTy;
|
|
|
|
unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
|
2019-06-24 23:50:29 +08:00
|
|
|
SmallVector<Register, 1> LeftoverRegs;
|
2019-04-11 01:27:53 +08:00
|
|
|
if (LeftoverBits != 0) {
|
|
|
|
LeftoverTy = LLT::scalar(LeftoverBits);
|
|
|
|
auto K = MIRBuilder.buildConstant(
|
|
|
|
LeftoverTy,
|
|
|
|
Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
|
|
|
|
LeftoverRegs.push_back(K.getReg(0));
|
|
|
|
}
|
|
|
|
|
|
|
|
insertParts(MI.getOperand(0).getReg(),
|
|
|
|
Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
|
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
2020-01-10 23:07:24 +08:00
|
|
|
case TargetOpcode::G_SEXT:
|
2020-01-10 22:47:17 +08:00
|
|
|
case TargetOpcode::G_ZEXT:
|
2020-01-11 00:02:18 +08:00
|
|
|
case TargetOpcode::G_ANYEXT:
|
|
|
|
return narrowScalarExt(MI, TypeIdx, NarrowTy);
|
2019-08-21 17:26:39 +08:00
|
|
|
case TargetOpcode::G_TRUNC: {
|
|
|
|
if (TypeIdx != 1)
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
|
|
|
|
if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
|
|
|
|
LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
|
|
|
|
return UnableToLegalize;
|
|
|
|
}
|
|
|
|
|
2020-01-16 20:37:00 +08:00
|
|
|
auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
|
|
|
|
MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
|
2019-08-21 17:26:39 +08:00
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
2019-07-27 07:46:38 +08:00
|
|
|
|
2016-08-05 04:54:13 +08:00
|
|
|
case TargetOpcode::G_ADD: {
|
[GlobalISel] Enable legalizing non-power-of-2 sized types.
This changes the interface of how targets describe how to legalize, see
the below description.
1. Interface for targets to describe how to legalize.
In GlobalISel, the API in the LegalizerInfo class is the main interface
for targets to specify which types are legal for which operations, and
what to do to turn illegal type/operation combinations into legal ones.
For each operation the type sizes that can be legalized without having
to change the size of the type are specified with a call to setAction.
This isn't different to how GlobalISel worked before. For example, for a
target that supports 32 and 64 bit adds natively:
for (auto Ty : {s32, s64})
setAction({G_ADD, 0, s32}, Legal);
or for a target that needs a library call for a 32 bit division:
setAction({G_SDIV, s32}, Libcall);
The main conceptual change to the LegalizerInfo API, is in specifying
how to legalize the type sizes for which a change of size is needed. For
example, in the above example, how to specify how all types from i1 to
i8388607 (apart from s32 and s64 which are legal) need to be legalized
and expressed in terms of operations on the available legal sizes
(again, i32 and i64 in this case). Before, the implementation only
allowed specifying power-of-2-sized types (e.g. setAction({G_ADD, 0,
s128}, NarrowScalar). A worse limitation was that if you'd wanted to
specify how to legalize all the sized types as allowed by the LLVM-IR
LangRef, i1 to i8388607, you'd have to call setAction 8388607-3 times
and probably would need a lot of memory to store all of these
specifications.
Instead, the legalization actions that need to change the size of the
type are specified now using a "SizeChangeStrategy". For example:
setLegalizeScalarToDifferentSizeStrategy(
G_ADD, 0, widenToLargerAndNarrowToLargest);
This example indicates that for type sizes for which there is a larger
size that can be legalized towards, do it by Widening the size.
For example, G_ADD on s17 will be legalized by first doing WidenScalar
to make it s32, after which it's legal.
The "NarrowToLargest" indicates what to do if there is no larger size
that can be legalized towards. E.g. G_ADD on s92 will be legalized by
doing NarrowScalar to s64.
Another example, taken from the ARM backend is:
for (unsigned Op : {G_SDIV, G_UDIV}) {
setLegalizeScalarToDifferentSizeStrategy(Op, 0,
widenToLargerTypesUnsupportedOtherwise);
if (ST.hasDivideInARMMode())
setAction({Op, s32}, Legal);
else
setAction({Op, s32}, Libcall);
}
For this example, G_SDIV on s8, on a target without a divide
instruction, would be legalized by first doing action (WidenScalar,
s32), followed by (Libcall, s32).
The same principle is also followed for when the number of vector lanes
on vector data types need to be changed, e.g.:
setAction({G_ADD, LLT::vector(8, 8)}, LegalizerInfo::Legal);
setAction({G_ADD, LLT::vector(16, 8)}, LegalizerInfo::Legal);
setAction({G_ADD, LLT::vector(4, 16)}, LegalizerInfo::Legal);
setAction({G_ADD, LLT::vector(8, 16)}, LegalizerInfo::Legal);
setAction({G_ADD, LLT::vector(2, 32)}, LegalizerInfo::Legal);
setAction({G_ADD, LLT::vector(4, 32)}, LegalizerInfo::Legal);
setLegalizeVectorElementToDifferentSizeStrategy(
G_ADD, 0, widenToLargerTypesUnsupportedOtherwise);
As currently implemented here, vector types are legalized by first
making the vector element size legal, followed by then making the number
of lanes legal. The strategy to follow in the first step is set by a
call to setLegalizeVectorElementToDifferentSizeStrategy, see example
above. The strategy followed in the second step
"moreToWiderTypesAndLessToWidest" (see code for its definition),
indicating that vectors are widened to more elements so they map to
natively supported vector widths, or when there isn't a legal wider
vector, split the vector to map it to the widest vector supported.
Therefore, for the above specification, some example legalizations are:
* getAction({G_ADD, LLT::vector(3, 3)})
returns {WidenScalar, LLT::vector(3, 8)}
* getAction({G_ADD, LLT::vector(3, 8)})
then returns {MoreElements, LLT::vector(8, 8)}
* getAction({G_ADD, LLT::vector(20, 8)})
returns {FewerElements, LLT::vector(16, 8)}
2. Key implementation aspects.
How to legalize a specific (operation, type index, size) tuple is
represented by mapping intervals of integers representing a range of
size types to an action to take, e.g.:
setScalarAction({G_ADD, LLT:scalar(1)},
{{1, WidenScalar}, // bit sizes [ 1, 31[
{32, Legal}, // bit sizes [32, 33[
{33, WidenScalar}, // bit sizes [33, 64[
{64, Legal}, // bit sizes [64, 65[
{65, NarrowScalar} // bit sizes [65, +inf[
});
Please note that most of the code to do the actual lowering of
non-power-of-2 sized types is currently missing, this is just trying to
make it possible for targets to specify what is legal, and how non-legal
types should be legalized. Probably quite a bit of further work is
needed in the actual legalizing and the other passes in GlobalISel to
support non-power-of-2 sized types.
I hope the documentation in LegalizerInfo.h and the examples provided in the
various {Target}LegalizerInfo.cpp and LegalizerInfoTest.cpp explains well
enough how this is meant to be used.
This drops the need for LLT::{half,double}...Size().
Differential Revision: https://reviews.llvm.org/D30529
llvm-svn: 317560
2017-11-07 18:34:34 +08:00
|
|
|
// FIXME: add support for when SizeOp0 isn't an exact multiple of
|
|
|
|
// NarrowSize.
|
|
|
|
if (SizeOp0 % NarrowSize != 0)
|
|
|
|
return UnableToLegalize;
|
2016-08-05 04:54:13 +08:00
|
|
|
// Expand in terms of carry-setting/consuming G_ADDE instructions.
|
[GlobalISel] Enable legalizing non-power-of-2 sized types.
This changes the interface of how targets describe how to legalize, see
the below description.
1. Interface for targets to describe how to legalize.
In GlobalISel, the API in the LegalizerInfo class is the main interface
for targets to specify which types are legal for which operations, and
what to do to turn illegal type/operation combinations into legal ones.
For each operation the type sizes that can be legalized without having
to change the size of the type are specified with a call to setAction.
This isn't different to how GlobalISel worked before. For example, for a
target that supports 32 and 64 bit adds natively:
for (auto Ty : {s32, s64})
setAction({G_ADD, 0, s32}, Legal);
or for a target that needs a library call for a 32 bit division:
setAction({G_SDIV, s32}, Libcall);
The main conceptual change to the LegalizerInfo API, is in specifying
how to legalize the type sizes for which a change of size is needed. For
example, in the above example, how to specify how all types from i1 to
i8388607 (apart from s32 and s64 which are legal) need to be legalized
and expressed in terms of operations on the available legal sizes
(again, i32 and i64 in this case). Before, the implementation only
allowed specifying power-of-2-sized types (e.g. setAction({G_ADD, 0,
s128}, NarrowScalar). A worse limitation was that if you'd wanted to
specify how to legalize all the sized types as allowed by the LLVM-IR
LangRef, i1 to i8388607, you'd have to call setAction 8388607-3 times
and probably would need a lot of memory to store all of these
specifications.
Instead, the legalization actions that need to change the size of the
type are specified now using a "SizeChangeStrategy". For example:
setLegalizeScalarToDifferentSizeStrategy(
G_ADD, 0, widenToLargerAndNarrowToLargest);
This example indicates that for type sizes for which there is a larger
size that can be legalized towards, do it by Widening the size.
For example, G_ADD on s17 will be legalized by first doing WidenScalar
to make it s32, after which it's legal.
The "NarrowToLargest" indicates what to do if there is no larger size
that can be legalized towards. E.g. G_ADD on s92 will be legalized by
doing NarrowScalar to s64.
Another example, taken from the ARM backend is:
for (unsigned Op : {G_SDIV, G_UDIV}) {
setLegalizeScalarToDifferentSizeStrategy(Op, 0,
widenToLargerTypesUnsupportedOtherwise);
if (ST.hasDivideInARMMode())
setAction({Op, s32}, Legal);
else
setAction({Op, s32}, Libcall);
}
For this example, G_SDIV on s8, on a target without a divide
instruction, would be legalized by first doing action (WidenScalar,
s32), followed by (Libcall, s32).
The same principle is also followed for when the number of vector lanes
on vector data types need to be changed, e.g.:
setAction({G_ADD, LLT::vector(8, 8)}, LegalizerInfo::Legal);
setAction({G_ADD, LLT::vector(16, 8)}, LegalizerInfo::Legal);
setAction({G_ADD, LLT::vector(4, 16)}, LegalizerInfo::Legal);
setAction({G_ADD, LLT::vector(8, 16)}, LegalizerInfo::Legal);
setAction({G_ADD, LLT::vector(2, 32)}, LegalizerInfo::Legal);
setAction({G_ADD, LLT::vector(4, 32)}, LegalizerInfo::Legal);
setLegalizeVectorElementToDifferentSizeStrategy(
G_ADD, 0, widenToLargerTypesUnsupportedOtherwise);
As currently implemented here, vector types are legalized by first
making the vector element size legal, followed by then making the number
of lanes legal. The strategy to follow in the first step is set by a
call to setLegalizeVectorElementToDifferentSizeStrategy, see example
above. The strategy followed in the second step
"moreToWiderTypesAndLessToWidest" (see code for its definition),
indicating that vectors are widened to more elements so they map to
natively supported vector widths, or when there isn't a legal wider
vector, split the vector to map it to the widest vector supported.
Therefore, for the above specification, some example legalizations are:
* getAction({G_ADD, LLT::vector(3, 3)})
returns {WidenScalar, LLT::vector(3, 8)}
* getAction({G_ADD, LLT::vector(3, 8)})
then returns {MoreElements, LLT::vector(8, 8)}
* getAction({G_ADD, LLT::vector(20, 8)})
returns {FewerElements, LLT::vector(16, 8)}
2. Key implementation aspects.
How to legalize a specific (operation, type index, size) tuple is
represented by mapping intervals of integers representing a range of
size types to an action to take, e.g.:
setScalarAction({G_ADD, LLT:scalar(1)},
{{1, WidenScalar}, // bit sizes [ 1, 31[
{32, Legal}, // bit sizes [32, 33[
{33, WidenScalar}, // bit sizes [33, 64[
{64, Legal}, // bit sizes [64, 65[
{65, NarrowScalar} // bit sizes [65, +inf[
});
Please note that most of the code to do the actual lowering of
non-power-of-2 sized types is currently missing, this is just trying to
make it possible for targets to specify what is legal, and how non-legal
types should be legalized. Probably quite a bit of further work is
needed in the actual legalizing and the other passes in GlobalISel to
support non-power-of-2 sized types.
I hope the documentation in LegalizerInfo.h and the examples provided in the
various {Target}LegalizerInfo.cpp and LegalizerInfoTest.cpp explains well
enough how this is meant to be used.
This drops the need for LLT::{half,double}...Size().
Differential Revision: https://reviews.llvm.org/D30529
llvm-svn: 317560
2017-11-07 18:34:34 +08:00
|
|
|
int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
|
2016-08-05 04:54:13 +08:00
|
|
|
|
2019-06-24 23:50:29 +08:00
|
|
|
SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
|
2016-08-05 04:54:13 +08:00
|
|
|
extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
|
|
|
|
extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
|
|
|
|
|
2019-08-23 01:29:17 +08:00
|
|
|
Register CarryIn;
|
2016-08-05 04:54:13 +08:00
|
|
|
for (int i = 0; i < NumParts; ++i) {
|
2019-06-25 00:16:12 +08:00
|
|
|
Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
|
|
|
|
Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
|
2016-08-05 04:54:13 +08:00
|
|
|
|
2019-08-23 01:29:17 +08:00
|
|
|
if (i == 0)
|
|
|
|
MIRBuilder.buildUAddo(DstReg, CarryOut, Src1Regs[i], Src2Regs[i]);
|
|
|
|
else {
|
|
|
|
MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i],
|
|
|
|
Src2Regs[i], CarryIn);
|
|
|
|
}
|
2016-08-05 04:54:13 +08:00
|
|
|
|
|
|
|
DstRegs.push_back(DstReg);
|
|
|
|
CarryIn = CarryOut;
|
|
|
|
}
|
2019-06-25 00:16:12 +08:00
|
|
|
Register DstReg = MI.getOperand(0).getReg();
|
2018-12-11 02:44:58 +08:00
|
|
|
if(MRI.getType(DstReg).isVector())
|
|
|
|
MIRBuilder.buildBuildVector(DstReg, DstRegs);
|
|
|
|
else
|
|
|
|
MIRBuilder.buildMerge(DstReg, DstRegs);
|
2016-08-05 04:54:13 +08:00
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
2019-01-28 20:10:17 +08:00
|
|
|
case TargetOpcode::G_SUB: {
|
|
|
|
// FIXME: add support for when SizeOp0 isn't an exact multiple of
|
|
|
|
// NarrowSize.
|
|
|
|
if (SizeOp0 % NarrowSize != 0)
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
|
|
|
|
|
2019-06-24 23:50:29 +08:00
|
|
|
SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
|
2019-01-28 20:10:17 +08:00
|
|
|
extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
|
|
|
|
extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
|
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
|
|
|
|
Register BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
|
2019-01-28 20:10:17 +08:00
|
|
|
MIRBuilder.buildInstr(TargetOpcode::G_USUBO, {DstReg, BorrowOut},
|
|
|
|
{Src1Regs[0], Src2Regs[0]});
|
|
|
|
DstRegs.push_back(DstReg);
|
2019-06-25 00:16:12 +08:00
|
|
|
Register BorrowIn = BorrowOut;
|
2019-01-28 20:10:17 +08:00
|
|
|
for (int i = 1; i < NumParts; ++i) {
|
|
|
|
DstReg = MRI.createGenericVirtualRegister(NarrowTy);
|
|
|
|
BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
|
|
|
|
|
|
|
|
MIRBuilder.buildInstr(TargetOpcode::G_USUBE, {DstReg, BorrowOut},
|
|
|
|
{Src1Regs[i], Src2Regs[i], BorrowIn});
|
|
|
|
|
|
|
|
DstRegs.push_back(DstReg);
|
|
|
|
BorrowIn = BorrowOut;
|
|
|
|
}
|
2020-01-16 20:37:00 +08:00
|
|
|
MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
|
2019-01-28 20:10:17 +08:00
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
2019-01-27 08:52:51 +08:00
|
|
|
case TargetOpcode::G_MUL:
|
2019-03-11 18:08:44 +08:00
|
|
|
case TargetOpcode::G_UMULH:
|
2019-03-11 18:00:17 +08:00
|
|
|
return narrowScalarMul(MI, NarrowTy);
|
2019-02-12 22:54:52 +08:00
|
|
|
case TargetOpcode::G_EXTRACT:
|
|
|
|
return narrowScalarExtract(MI, TypeIdx, NarrowTy);
|
|
|
|
case TargetOpcode::G_INSERT:
|
|
|
|
return narrowScalarInsert(MI, TypeIdx, NarrowTy);
|
2017-01-19 09:05:48 +08:00
|
|
|
case TargetOpcode::G_LOAD: {
|
2018-04-28 03:48:53 +08:00
|
|
|
const auto &MMO = **MI.memoperands_begin();
|
2019-06-25 00:16:12 +08:00
|
|
|
Register DstReg = MI.getOperand(0).getReg();
|
2019-01-30 02:13:02 +08:00
|
|
|
LLT DstTy = MRI.getType(DstReg);
|
2019-02-05 08:26:12 +08:00
|
|
|
if (DstTy.isVector())
|
2019-01-30 10:35:38 +08:00
|
|
|
return UnableToLegalize;
|
2019-01-30 02:13:02 +08:00
|
|
|
|
|
|
|
if (8 * MMO.getSize() != DstTy.getSizeInBits()) {
|
2019-06-25 00:16:12 +08:00
|
|
|
Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
|
2019-01-30 02:13:02 +08:00
|
|
|
auto &MMO = **MI.memoperands_begin();
|
2020-01-16 20:37:00 +08:00
|
|
|
MIRBuilder.buildLoad(TmpReg, MI.getOperand(1), MMO);
|
2019-01-30 02:13:02 +08:00
|
|
|
MIRBuilder.buildAnyExt(DstReg, TmpReg);
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
2019-02-05 08:26:12 +08:00
|
|
|
return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
|
2017-01-19 09:05:48 +08:00
|
|
|
}
|
2019-01-23 03:02:10 +08:00
|
|
|
case TargetOpcode::G_ZEXTLOAD:
|
|
|
|
case TargetOpcode::G_SEXTLOAD: {
|
|
|
|
bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD;
|
2019-06-25 00:16:12 +08:00
|
|
|
Register DstReg = MI.getOperand(0).getReg();
|
|
|
|
Register PtrReg = MI.getOperand(1).getReg();
|
2019-01-23 03:02:10 +08:00
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
|
2019-01-23 03:02:10 +08:00
|
|
|
auto &MMO = **MI.memoperands_begin();
|
2019-04-18 06:21:05 +08:00
|
|
|
if (MMO.getSizeInBits() == NarrowSize) {
|
2019-01-23 03:02:10 +08:00
|
|
|
MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
|
|
|
|
} else {
|
2020-01-16 20:09:48 +08:00
|
|
|
MIRBuilder.buildLoadInstr(MI.getOpcode(), TmpReg, PtrReg, MMO);
|
2019-01-23 03:02:10 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (ZExt)
|
|
|
|
MIRBuilder.buildZExt(DstReg, TmpReg);
|
|
|
|
else
|
|
|
|
MIRBuilder.buildSExt(DstReg, TmpReg);
|
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
2017-01-19 01:29:54 +08:00
|
|
|
case TargetOpcode::G_STORE: {
|
2018-04-28 03:48:53 +08:00
|
|
|
const auto &MMO = **MI.memoperands_begin();
|
2019-01-30 02:13:02 +08:00
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
Register SrcReg = MI.getOperand(0).getReg();
|
2019-01-30 02:13:02 +08:00
|
|
|
LLT SrcTy = MRI.getType(SrcReg);
|
2019-02-05 08:26:12 +08:00
|
|
|
if (SrcTy.isVector())
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
int NumParts = SizeOp0 / NarrowSize;
|
|
|
|
unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
|
|
|
|
unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
|
|
|
|
if (SrcTy.isVector() && LeftoverBits != 0)
|
|
|
|
return UnableToLegalize;
|
2019-01-30 02:13:02 +08:00
|
|
|
|
|
|
|
if (8 * MMO.getSize() != SrcTy.getSizeInBits()) {
|
2019-06-25 00:16:12 +08:00
|
|
|
Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
|
2019-01-30 02:13:02 +08:00
|
|
|
auto &MMO = **MI.memoperands_begin();
|
|
|
|
MIRBuilder.buildTrunc(TmpReg, SrcReg);
|
2020-01-16 20:37:00 +08:00
|
|
|
MIRBuilder.buildStore(TmpReg, MI.getOperand(1), MMO);
|
2019-01-30 02:13:02 +08:00
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
2019-02-05 08:26:12 +08:00
|
|
|
return reduceLoadStoreWidth(MI, 0, NarrowTy);
|
2017-01-19 01:29:54 +08:00
|
|
|
}
|
2019-02-05 08:13:44 +08:00
|
|
|
case TargetOpcode::G_SELECT:
|
|
|
|
return narrowScalarSelect(MI, TypeIdx, NarrowTy);
|
2018-12-18 19:36:14 +08:00
|
|
|
case TargetOpcode::G_AND:
|
|
|
|
case TargetOpcode::G_OR:
|
|
|
|
case TargetOpcode::G_XOR: {
|
2017-10-03 12:53:56 +08:00
|
|
|
// Legalize bitwise operation:
|
|
|
|
// A = BinOp<Ty> B, C
|
|
|
|
// into:
|
|
|
|
// B1, ..., BN = G_UNMERGE_VALUES B
|
|
|
|
// C1, ..., CN = G_UNMERGE_VALUES C
|
|
|
|
// A1 = BinOp<Ty/N> B1, C2
|
|
|
|
// ...
|
|
|
|
// AN = BinOp<Ty/N> BN, CN
|
|
|
|
// A = G_MERGE_VALUES A1, ..., AN
|
2019-04-11 01:07:56 +08:00
|
|
|
return narrowScalarBasic(MI, TypeIdx, NarrowTy);
|
2017-04-07 22:41:59 +08:00
|
|
|
}
|
2019-01-23 05:42:11 +08:00
|
|
|
case TargetOpcode::G_SHL:
|
|
|
|
case TargetOpcode::G_LSHR:
|
2019-02-08 03:37:44 +08:00
|
|
|
case TargetOpcode::G_ASHR:
|
|
|
|
return narrowScalarShift(MI, TypeIdx, NarrowTy);
|
2019-01-31 10:09:57 +08:00
|
|
|
case TargetOpcode::G_CTLZ:
|
|
|
|
case TargetOpcode::G_CTLZ_ZERO_UNDEF:
|
|
|
|
case TargetOpcode::G_CTTZ:
|
|
|
|
case TargetOpcode::G_CTTZ_ZERO_UNDEF:
|
|
|
|
case TargetOpcode::G_CTPOP:
|
2020-01-27 16:43:38 +08:00
|
|
|
if (TypeIdx == 1)
|
|
|
|
switch (MI.getOpcode()) {
|
|
|
|
case TargetOpcode::G_CTLZ:
|
|
|
|
return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
|
2020-01-27 16:51:06 +08:00
|
|
|
case TargetOpcode::G_CTTZ:
|
|
|
|
return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
|
2020-01-27 16:59:50 +08:00
|
|
|
case TargetOpcode::G_CTPOP:
|
|
|
|
return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
|
2020-01-27 16:43:38 +08:00
|
|
|
default:
|
|
|
|
return UnableToLegalize;
|
|
|
|
}
|
2019-01-31 10:09:57 +08:00
|
|
|
|
2019-02-03 07:29:55 +08:00
|
|
|
Observer.changingInstr(MI);
|
|
|
|
narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
|
|
|
|
Observer.changedInstr(MI);
|
|
|
|
return Legalized;
|
|
|
|
case TargetOpcode::G_INTTOPTR:
|
|
|
|
if (TypeIdx != 1)
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
Observer.changingInstr(MI);
|
|
|
|
narrowScalarSrc(MI, NarrowTy, 1);
|
|
|
|
Observer.changedInstr(MI);
|
|
|
|
return Legalized;
|
|
|
|
case TargetOpcode::G_PTRTOINT:
|
|
|
|
if (TypeIdx != 0)
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
2019-01-31 10:09:57 +08:00
|
|
|
Observer.changingInstr(MI);
|
|
|
|
narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
|
|
|
|
Observer.changedInstr(MI);
|
|
|
|
return Legalized;
|
2019-07-09 22:36:17 +08:00
|
|
|
case TargetOpcode::G_PHI: {
|
|
|
|
unsigned NumParts = SizeOp0 / NarrowSize;
|
2020-02-04 23:34:22 +08:00
|
|
|
SmallVector<Register, 2> DstRegs(NumParts);
|
|
|
|
SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
|
2019-07-09 22:36:17 +08:00
|
|
|
Observer.changingInstr(MI);
|
|
|
|
for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
|
|
|
|
MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
|
|
|
|
MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
|
|
|
|
extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
|
|
|
|
SrcRegs[i / 2]);
|
|
|
|
}
|
|
|
|
MachineBasicBlock &MBB = *MI.getParent();
|
|
|
|
MIRBuilder.setInsertPt(MBB, MI);
|
|
|
|
for (unsigned i = 0; i < NumParts; ++i) {
|
|
|
|
DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
|
|
|
|
MachineInstrBuilder MIB =
|
|
|
|
MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
|
|
|
|
for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
|
|
|
|
MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
|
|
|
|
}
|
2019-09-14 05:49:24 +08:00
|
|
|
MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
|
2020-01-16 20:37:00 +08:00
|
|
|
MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
|
2019-07-09 22:36:17 +08:00
|
|
|
Observer.changedInstr(MI);
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
2019-07-16 03:37:34 +08:00
|
|
|
case TargetOpcode::G_EXTRACT_VECTOR_ELT:
|
|
|
|
case TargetOpcode::G_INSERT_VECTOR_ELT: {
|
|
|
|
if (TypeIdx != 2)
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
|
|
|
|
Observer.changingInstr(MI);
|
|
|
|
narrowScalarSrc(MI, NarrowTy, OpIdx);
|
|
|
|
Observer.changedInstr(MI);
|
|
|
|
return Legalized;
|
|
|
|
}
|
2019-07-17 20:08:01 +08:00
|
|
|
case TargetOpcode::G_ICMP: {
|
|
|
|
uint64_t SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
|
|
|
|
if (NarrowSize * 2 != SrcSize)
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
Observer.changingInstr(MI);
|
|
|
|
Register LHSL = MRI.createGenericVirtualRegister(NarrowTy);
|
|
|
|
Register LHSH = MRI.createGenericVirtualRegister(NarrowTy);
|
2020-01-16 20:37:00 +08:00
|
|
|
MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2));
|
2019-07-17 20:08:01 +08:00
|
|
|
|
|
|
|
Register RHSL = MRI.createGenericVirtualRegister(NarrowTy);
|
|
|
|
Register RHSH = MRI.createGenericVirtualRegister(NarrowTy);
|
2020-01-16 20:37:00 +08:00
|
|
|
MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3));
|
2019-07-17 20:08:01 +08:00
|
|
|
|
|
|
|
CmpInst::Predicate Pred =
|
|
|
|
static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
|
2019-07-25 04:46:42 +08:00
|
|
|
LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
|
2019-07-17 20:08:01 +08:00
|
|
|
|
|
|
|
if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) {
|
|
|
|
MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL);
|
|
|
|
MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH);
|
|
|
|
MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH);
|
|
|
|
MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0);
|
2020-01-16 20:37:00 +08:00
|
|
|
MIRBuilder.buildICmp(Pred, MI.getOperand(0), Or, Zero);
|
2019-07-17 20:08:01 +08:00
|
|
|
} else {
|
2019-07-25 04:46:42 +08:00
|
|
|
MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
|
2019-07-17 20:08:01 +08:00
|
|
|
MachineInstrBuilder CmpHEQ =
|
2019-07-25 04:46:42 +08:00
|
|
|
MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH);
|
2019-07-17 20:08:01 +08:00
|
|
|
MachineInstrBuilder CmpLU = MIRBuilder.buildICmp(
|
2019-07-25 04:46:42 +08:00
|
|
|
ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
|
2020-01-16 20:37:00 +08:00
|
|
|
MIRBuilder.buildSelect(MI.getOperand(0), CmpHEQ, CmpLU, CmpH);
|
2019-07-17 20:08:01 +08:00
|
|
|
}
|
|
|
|
Observer.changedInstr(MI);
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
[globalisel] Add G_SEXT_INREG
Summary:
Targets often have instructions that can sign-extend certain cases faster
than the equivalent shift-left/arithmetic-shift-right. Such cases can be
identified by matching a shift-left/shift-right pair but there are some
issues with this in the context of combines. For example, suppose you can
sign-extend 8-bit up to 32-bit with a target extend instruction.
%1:_(s32) = G_SHL %0:_(s32), i32 24 # (I've inlined the G_CONSTANT for brevity)
%2:_(s32) = G_ASHR %1:_(s32), i32 24
%3:_(s32) = G_ASHR %2:_(s32), i32 1
would reasonably combine to:
%1:_(s32) = G_SHL %0:_(s32), i32 24
%2:_(s32) = G_ASHR %1:_(s32), i32 25
which no longer matches the special case. If your shifts and extend are
equal cost, this would break even as a pair of shifts but if your shift is
more expensive than the extend then it's cheaper as:
%2:_(s32) = G_SEXT_INREG %0:_(s32), i32 8
%3:_(s32) = G_ASHR %2:_(s32), i32 1
It's possible to match the shift-pair in ISel and emit an extend and ashr.
However, this is far from the only way to break this shift pair and make
it hard to match the extends. Another example is that with the right
known-zeros, this:
%1:_(s32) = G_SHL %0:_(s32), i32 24
%2:_(s32) = G_ASHR %1:_(s32), i32 24
%3:_(s32) = G_MUL %2:_(s32), i32 2
can become:
%1:_(s32) = G_SHL %0:_(s32), i32 24
%2:_(s32) = G_ASHR %1:_(s32), i32 23
All upstream targets have been configured to lower it to the current
G_SHL,G_ASHR pair but will likely want to make it legal in some cases to
handle their faster cases.
To follow-up: Provide a way to legalize based on the constant. At the
moment, I'm thinking that the best way to achieve this is to provide the
MI in LegalityQuery but that opens the door to breaking core principles
of the legalizer (legality is not context sensitive). That said, it's
worth noting that looking at other instructions and acting on that
information doesn't violate this principle in itself. It's only a
violation if, at the end of legalization, a pass that checks legality
without being able to see the context would say an instruction might not be
legal. That's a fairly subtle distinction so to give a concrete example,
saying %2 in:
%1 = G_CONSTANT 16
%2 = G_SEXT_INREG %0, %1
is legal is in violation of that principle if the legality of %2 depends
on %1 being constant and/or being 16. However, legalizing to either:
%2 = G_SEXT_INREG %0, 16
or:
%1 = G_CONSTANT 16
%2:_(s32) = G_SHL %0, %1
%3:_(s32) = G_ASHR %2, %1
depending on whether %1 is constant and 16 does not violate that principle
since both outputs are genuinely legal.
Reviewers: bogner, aditya_nandakumar, volkan, aemerson, paquette, arsenm
Subscribers: sdardis, jvesely, wdng, nhaehnle, rovka, kristof.beyls, javed.absar, hiraditya, jrtc27, atanasyan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D61289
llvm-svn: 368487
2019-08-10 05:11:20 +08:00
|
|
|
case TargetOpcode::G_SEXT_INREG: {
|
|
|
|
if (TypeIdx != 0)
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
int64_t SizeInBits = MI.getOperand(2).getImm();
|
|
|
|
|
|
|
|
// So long as the new type has more bits than the bits we're extending we
|
|
|
|
// don't need to break it apart.
|
|
|
|
if (NarrowTy.getScalarSizeInBits() >= SizeInBits) {
|
|
|
|
Observer.changingInstr(MI);
|
|
|
|
// We don't lose any non-extension bits by truncating the src and
|
|
|
|
// sign-extending the dst.
|
|
|
|
MachineOperand &MO1 = MI.getOperand(1);
|
2020-01-16 20:37:00 +08:00
|
|
|
auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
|
2020-01-23 19:51:35 +08:00
|
|
|
MO1.setReg(TruncMIB.getReg(0));
|
[globalisel] Add G_SEXT_INREG
Summary:
Targets often have instructions that can sign-extend certain cases faster
than the equivalent shift-left/arithmetic-shift-right. Such cases can be
identified by matching a shift-left/shift-right pair but there are some
issues with this in the context of combines. For example, suppose you can
sign-extend 8-bit up to 32-bit with a target extend instruction.
%1:_(s32) = G_SHL %0:_(s32), i32 24 # (I've inlined the G_CONSTANT for brevity)
%2:_(s32) = G_ASHR %1:_(s32), i32 24
%3:_(s32) = G_ASHR %2:_(s32), i32 1
would reasonably combine to:
%1:_(s32) = G_SHL %0:_(s32), i32 24
%2:_(s32) = G_ASHR %1:_(s32), i32 25
which no longer matches the special case. If your shifts and extend are
equal cost, this would break even as a pair of shifts but if your shift is
more expensive than the extend then it's cheaper as:
%2:_(s32) = G_SEXT_INREG %0:_(s32), i32 8
%3:_(s32) = G_ASHR %2:_(s32), i32 1
It's possible to match the shift-pair in ISel and emit an extend and ashr.
However, this is far from the only way to break this shift pair and make
it hard to match the extends. Another example is that with the right
known-zeros, this:
%1:_(s32) = G_SHL %0:_(s32), i32 24
%2:_(s32) = G_ASHR %1:_(s32), i32 24
%3:_(s32) = G_MUL %2:_(s32), i32 2
can become:
%1:_(s32) = G_SHL %0:_(s32), i32 24
%2:_(s32) = G_ASHR %1:_(s32), i32 23
All upstream targets have been configured to lower it to the current
G_SHL,G_ASHR pair but will likely want to make it legal in some cases to
handle their faster cases.
To follow-up: Provide a way to legalize based on the constant. At the
moment, I'm thinking that the best way to achieve this is to provide the
MI in LegalityQuery but that opens the door to breaking core principles
of the legalizer (legality is not context sensitive). That said, it's
worth noting that looking at other instructions and acting on that
information doesn't violate this principle in itself. It's only a
violation if, at the end of legalization, a pass that checks legality
without being able to see the context would say an instruction might not be
legal. That's a fairly subtle distinction so to give a concrete example,
saying %2 in:
%1 = G_CONSTANT 16
%2 = G_SEXT_INREG %0, %1
is legal is in violation of that principle if the legality of %2 depends
on %1 being constant and/or being 16. However, legalizing to either:
%2 = G_SEXT_INREG %0, 16
or:
%1 = G_CONSTANT 16
%2:_(s32) = G_SHL %0, %1
%3:_(s32) = G_ASHR %2, %1
depending on whether %1 is constant and 16 does not violate that principle
since both outputs are genuinely legal.
Reviewers: bogner, aditya_nandakumar, volkan, aemerson, paquette, arsenm
Subscribers: sdardis, jvesely, wdng, nhaehnle, rovka, kristof.beyls, javed.absar, hiraditya, jrtc27, atanasyan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D61289
llvm-svn: 368487
2019-08-10 05:11:20 +08:00
|
|
|
|
|
|
|
MachineOperand &MO2 = MI.getOperand(0);
|
|
|
|
Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
|
|
|
|
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
|
2020-01-16 20:37:00 +08:00
|
|
|
MIRBuilder.buildSExt(MO2, DstExt);
|
[globalisel] Add G_SEXT_INREG
Summary:
Targets often have instructions that can sign-extend certain cases faster
than the equivalent shift-left/arithmetic-shift-right. Such cases can be
identified by matching a shift-left/shift-right pair but there are some
issues with this in the context of combines. For example, suppose you can
sign-extend 8-bit up to 32-bit with a target extend instruction.
%1:_(s32) = G_SHL %0:_(s32), i32 24 # (I've inlined the G_CONSTANT for brevity)
%2:_(s32) = G_ASHR %1:_(s32), i32 24
%3:_(s32) = G_ASHR %2:_(s32), i32 1
would reasonably combine to:
%1:_(s32) = G_SHL %0:_(s32), i32 24
%2:_(s32) = G_ASHR %1:_(s32), i32 25
which no longer matches the special case. If your shifts and extend are
equal cost, this would break even as a pair of shifts but if your shift is
more expensive than the extend then it's cheaper as:
%2:_(s32) = G_SEXT_INREG %0:_(s32), i32 8
%3:_(s32) = G_ASHR %2:_(s32), i32 1
It's possible to match the shift-pair in ISel and emit an extend and ashr.
However, this is far from the only way to break this shift pair and make
it hard to match the extends. Another example is that with the right
known-zeros, this:
%1:_(s32) = G_SHL %0:_(s32), i32 24
%2:_(s32) = G_ASHR %1:_(s32), i32 24
%3:_(s32) = G_MUL %2:_(s32), i32 2
can become:
%1:_(s32) = G_SHL %0:_(s32), i32 24
%2:_(s32) = G_ASHR %1:_(s32), i32 23
All upstream targets have been configured to lower it to the current
G_SHL,G_ASHR pair but will likely want to make it legal in some cases to
handle their faster cases.
To follow-up: Provide a way to legalize based on the constant. At the
moment, I'm thinking that the best way to achieve this is to provide the
MI in LegalityQuery but that opens the door to breaking core principles
of the legalizer (legality is not context sensitive). That said, it's
worth noting that looking at other instructions and acting on that
information doesn't violate this principle in itself. It's only a
violation if, at the end of legalization, a pass that checks legality
without being able to see the context would say an instruction might not be
legal. That's a fairly subtle distinction so to give a concrete example,
saying %2 in:
%1 = G_CONSTANT 16
%2 = G_SEXT_INREG %0, %1
is legal is in violation of that principle if the legality of %2 depends
on %1 being constant and/or being 16. However, legalizing to either:
%2 = G_SEXT_INREG %0, 16
or:
%1 = G_CONSTANT 16
%2:_(s32) = G_SHL %0, %1
%3:_(s32) = G_ASHR %2, %1
depending on whether %1 is constant and 16 does not violate that principle
since both outputs are genuinely legal.
Reviewers: bogner, aditya_nandakumar, volkan, aemerson, paquette, arsenm
Subscribers: sdardis, jvesely, wdng, nhaehnle, rovka, kristof.beyls, javed.absar, hiraditya, jrtc27, atanasyan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D61289
llvm-svn: 368487
2019-08-10 05:11:20 +08:00
|
|
|
MO2.setReg(DstExt);
|
|
|
|
Observer.changedInstr(MI);
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Break it apart. Components below the extension point are unmodified. The
|
|
|
|
// component containing the extension point becomes a narrower SEXT_INREG.
|
|
|
|
// Components above it are ashr'd from the component containing the
|
|
|
|
// extension point.
|
|
|
|
if (SizeOp0 % NarrowSize != 0)
|
|
|
|
return UnableToLegalize;
|
|
|
|
int NumParts = SizeOp0 / NarrowSize;
|
|
|
|
|
|
|
|
// List the registers where the destination will be scattered.
|
|
|
|
SmallVector<Register, 2> DstRegs;
|
|
|
|
// List the registers where the source will be split.
|
|
|
|
SmallVector<Register, 2> SrcRegs;
|
|
|
|
|
|
|
|
// Create all the temporary registers.
|
|
|
|
for (int i = 0; i < NumParts; ++i) {
|
|
|
|
Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
|
|
|
|
|
|
|
|
SrcRegs.push_back(SrcReg);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Explode the big arguments into smaller chunks.
|
2020-01-16 20:37:00 +08:00
|
|
|
MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
|
[globalisel] Add G_SEXT_INREG
Summary:
Targets often have instructions that can sign-extend certain cases faster
than the equivalent shift-left/arithmetic-shift-right. Such cases can be
identified by matching a shift-left/shift-right pair but there are some
issues with this in the context of combines. For example, suppose you can
sign-extend 8-bit up to 32-bit with a target extend instruction.
%1:_(s32) = G_SHL %0:_(s32), i32 24 # (I've inlined the G_CONSTANT for brevity)
%2:_(s32) = G_ASHR %1:_(s32), i32 24
%3:_(s32) = G_ASHR %2:_(s32), i32 1
would reasonably combine to:
%1:_(s32) = G_SHL %0:_(s32), i32 24
%2:_(s32) = G_ASHR %1:_(s32), i32 25
which no longer matches the special case. If your shifts and extend are
equal cost, this would break even as a pair of shifts but if your shift is
more expensive than the extend then it's cheaper as:
%2:_(s32) = G_SEXT_INREG %0:_(s32), i32 8
%3:_(s32) = G_ASHR %2:_(s32), i32 1
It's possible to match the shift-pair in ISel and emit an extend and ashr.
However, this is far from the only way to break this shift pair and make
it hard to match the extends. Another example is that with the right
known-zeros, this:
%1:_(s32) = G_SHL %0:_(s32), i32 24
%2:_(s32) = G_ASHR %1:_(s32), i32 24
%3:_(s32) = G_MUL %2:_(s32), i32 2
can become:
%1:_(s32) = G_SHL %0:_(s32), i32 24
%2:_(s32) = G_ASHR %1:_(s32), i32 23
All upstream targets have been configured to lower it to the current
G_SHL,G_ASHR pair but will likely want to make it legal in some cases to
handle their faster cases.
To follow-up: Provide a way to legalize based on the constant. At the
moment, I'm thinking that the best way to achieve this is to provide the
MI in LegalityQuery but that opens the door to breaking core principles
of the legalizer (legality is not context sensitive). That said, it's
worth noting that looking at other instructions and acting on that
information doesn't violate this principle in itself. It's only a
violation if, at the end of legalization, a pass that checks legality
without being able to see the context would say an instruction might not be
legal. That's a fairly subtle distinction so to give a concrete example,
saying %2 in:
%1 = G_CONSTANT 16
%2 = G_SEXT_INREG %0, %1
is legal is in violation of that principle if the legality of %2 depends
on %1 being constant and/or being 16. However, legalizing to either:
%2 = G_SEXT_INREG %0, 16
or:
%1 = G_CONSTANT 16
%2:_(s32) = G_SHL %0, %1
%3:_(s32) = G_ASHR %2, %1
depending on whether %1 is constant and 16 does not violate that principle
since both outputs are genuinely legal.
Reviewers: bogner, aditya_nandakumar, volkan, aemerson, paquette, arsenm
Subscribers: sdardis, jvesely, wdng, nhaehnle, rovka, kristof.beyls, javed.absar, hiraditya, jrtc27, atanasyan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D61289
llvm-svn: 368487
2019-08-10 05:11:20 +08:00
|
|
|
|
|
|
|
Register AshrCstReg =
|
|
|
|
MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
|
2020-01-23 19:51:35 +08:00
|
|
|
.getReg(0);
|
[globalisel] Add G_SEXT_INREG
Summary:
Targets often have instructions that can sign-extend certain cases faster
than the equivalent shift-left/arithmetic-shift-right. Such cases can be
identified by matching a shift-left/shift-right pair but there are some
issues with this in the context of combines. For example, suppose you can
sign-extend 8-bit up to 32-bit with a target extend instruction.
%1:_(s32) = G_SHL %0:_(s32), i32 24 # (I've inlined the G_CONSTANT for brevity)
%2:_(s32) = G_ASHR %1:_(s32), i32 24
%3:_(s32) = G_ASHR %2:_(s32), i32 1
would reasonably combine to:
%1:_(s32) = G_SHL %0:_(s32), i32 24
%2:_(s32) = G_ASHR %1:_(s32), i32 25
which no longer matches the special case. If your shifts and extend are
equal cost, this would break even as a pair of shifts but if your shift is
more expensive than the extend then it's cheaper as:
%2:_(s32) = G_SEXT_INREG %0:_(s32), i32 8
%3:_(s32) = G_ASHR %2:_(s32), i32 1
It's possible to match the shift-pair in ISel and emit an extend and ashr.
However, this is far from the only way to break this shift pair and make
it hard to match the extends. Another example is that with the right
known-zeros, this:
%1:_(s32) = G_SHL %0:_(s32), i32 24
%2:_(s32) = G_ASHR %1:_(s32), i32 24
%3:_(s32) = G_MUL %2:_(s32), i32 2
can become:
%1:_(s32) = G_SHL %0:_(s32), i32 24
%2:_(s32) = G_ASHR %1:_(s32), i32 23
All upstream targets have been configured to lower it to the current
G_SHL,G_ASHR pair but will likely want to make it legal in some cases to
handle their faster cases.
To follow-up: Provide a way to legalize based on the constant. At the
moment, I'm thinking that the best way to achieve this is to provide the
MI in LegalityQuery but that opens the door to breaking core principles
of the legalizer (legality is not context sensitive). That said, it's
worth noting that looking at other instructions and acting on that
information doesn't violate this principle in itself. It's only a
violation if, at the end of legalization, a pass that checks legality
without being able to see the context would say an instruction might not be
legal. That's a fairly subtle distinction so to give a concrete example,
saying %2 in:
%1 = G_CONSTANT 16
%2 = G_SEXT_INREG %0, %1
is legal is in violation of that principle if the legality of %2 depends
on %1 being constant and/or being 16. However, legalizing to either:
%2 = G_SEXT_INREG %0, 16
or:
%1 = G_CONSTANT 16
%2:_(s32) = G_SHL %0, %1
%3:_(s32) = G_ASHR %2, %1
depending on whether %1 is constant and 16 does not violate that principle
since both outputs are genuinely legal.
Reviewers: bogner, aditya_nandakumar, volkan, aemerson, paquette, arsenm
Subscribers: sdardis, jvesely, wdng, nhaehnle, rovka, kristof.beyls, javed.absar, hiraditya, jrtc27, atanasyan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D61289
llvm-svn: 368487
2019-08-10 05:11:20 +08:00
|
|
|
Register FullExtensionReg = 0;
|
|
|
|
Register PartialExtensionReg = 0;
|
|
|
|
|
|
|
|
// Do the operation on each small part.
|
|
|
|
for (int i = 0; i < NumParts; ++i) {
|
|
|
|
if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits)
|
|
|
|
DstRegs.push_back(SrcRegs[i]);
|
|
|
|
else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) {
|
|
|
|
assert(PartialExtensionReg &&
|
|
|
|
"Expected to visit partial extension before full");
|
|
|
|
if (FullExtensionReg) {
|
|
|
|
DstRegs.push_back(FullExtensionReg);
|
|
|
|
continue;
|
|
|
|
}
|
2020-01-16 20:09:48 +08:00
|
|
|
DstRegs.push_back(
|
|
|
|
MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
|
2020-01-23 19:51:35 +08:00
|
|
|
.getReg(0));
|
[globalisel] Add G_SEXT_INREG
Summary:
Targets often have instructions that can sign-extend certain cases faster
than the equivalent shift-left/arithmetic-shift-right. Such cases can be
identified by matching a shift-left/shift-right pair but there are some
issues with this in the context of combines. For example, suppose you can
sign-extend 8-bit up to 32-bit with a target extend instruction.
%1:_(s32) = G_SHL %0:_(s32), i32 24 # (I've inlined the G_CONSTANT for brevity)
%2:_(s32) = G_ASHR %1:_(s32), i32 24
%3:_(s32) = G_ASHR %2:_(s32), i32 1
would reasonably combine to:
%1:_(s32) = G_SHL %0:_(s32), i32 24
%2:_(s32) = G_ASHR %1:_(s32), i32 25
which no longer matches the special case. If your shifts and extend are
equal cost, this would break even as a pair of shifts but if your shift is
more expensive than the extend then it's cheaper as:
%2:_(s32) = G_SEXT_INREG %0:_(s32), i32 8
%3:_(s32) = G_ASHR %2:_(s32), i32 1
It's possible to match the shift-pair in ISel and emit an extend and ashr.
However, this is far from the only way to break this shift pair and make
it hard to match the extends. Another example is that with the right
known-zeros, this:
%1:_(s32) = G_SHL %0:_(s32), i32 24
%2:_(s32) = G_ASHR %1:_(s32), i32 24
%3:_(s32) = G_MUL %2:_(s32), i32 2
can become:
%1:_(s32) = G_SHL %0:_(s32), i32 24
%2:_(s32) = G_ASHR %1:_(s32), i32 23
All upstream targets have been configured to lower it to the current
G_SHL,G_ASHR pair but will likely want to make it legal in some cases to
handle their faster cases.
To follow-up: Provide a way to legalize based on the constant. At the
moment, I'm thinking that the best way to achieve this is to provide the
MI in LegalityQuery but that opens the door to breaking core principles
of the legalizer (legality is not context sensitive). That said, it's
worth noting that looking at other instructions and acting on that
information doesn't violate this principle in itself. It's only a
violation if, at the end of legalization, a pass that checks legality
without being able to see the context would say an instruction might not be
legal. That's a fairly subtle distinction so to give a concrete example,
saying %2 in:
%1 = G_CONSTANT 16
%2 = G_SEXT_INREG %0, %1
is legal is in violation of that principle if the legality of %2 depends
on %1 being constant and/or being 16. However, legalizing to either:
%2 = G_SEXT_INREG %0, 16
or:
%1 = G_CONSTANT 16
%2:_(s32) = G_SHL %0, %1
%3:_(s32) = G_ASHR %2, %1
depending on whether %1 is constant and 16 does not violate that principle
since both outputs are genuinely legal.
Reviewers: bogner, aditya_nandakumar, volkan, aemerson, paquette, arsenm
Subscribers: sdardis, jvesely, wdng, nhaehnle, rovka, kristof.beyls, javed.absar, hiraditya, jrtc27, atanasyan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D61289
llvm-svn: 368487
2019-08-10 05:11:20 +08:00
|
|
|
FullExtensionReg = DstRegs.back();
|
|
|
|
} else {
|
|
|
|
DstRegs.push_back(
|
|
|
|
MIRBuilder
|
|
|
|
.buildInstr(
|
|
|
|
TargetOpcode::G_SEXT_INREG, {NarrowTy},
|
|
|
|
{SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
|
2020-01-23 19:51:35 +08:00
|
|
|
.getReg(0));
|
[globalisel] Add G_SEXT_INREG
Summary:
Targets often have instructions that can sign-extend certain cases faster
than the equivalent shift-left/arithmetic-shift-right. Such cases can be
identified by matching a shift-left/shift-right pair but there are some
issues with this in the context of combines. For example, suppose you can
sign-extend 8-bit up to 32-bit with a target extend instruction.
%1:_(s32) = G_SHL %0:_(s32), i32 24 # (I've inlined the G_CONSTANT for brevity)
%2:_(s32) = G_ASHR %1:_(s32), i32 24
%3:_(s32) = G_ASHR %2:_(s32), i32 1
would reasonably combine to:
%1:_(s32) = G_SHL %0:_(s32), i32 24
%2:_(s32) = G_ASHR %1:_(s32), i32 25
which no longer matches the special case. If your shifts and extend are
equal cost, this would break even as a pair of shifts but if your shift is
more expensive than the extend then it's cheaper as:
%2:_(s32) = G_SEXT_INREG %0:_(s32), i32 8
%3:_(s32) = G_ASHR %2:_(s32), i32 1
It's possible to match the shift-pair in ISel and emit an extend and ashr.
However, this is far from the only way to break this shift pair and make
it hard to match the extends. Another example is that with the right
known-zeros, this:
%1:_(s32) = G_SHL %0:_(s32), i32 24
%2:_(s32) = G_ASHR %1:_(s32), i32 24
%3:_(s32) = G_MUL %2:_(s32), i32 2
can become:
%1:_(s32) = G_SHL %0:_(s32), i32 24
%2:_(s32) = G_ASHR %1:_(s32), i32 23
All upstream targets have been configured to lower it to the current
G_SHL,G_ASHR pair but will likely want to make it legal in some cases to
handle their faster cases.
To follow-up: Provide a way to legalize based on the constant. At the
moment, I'm thinking that the best way to achieve this is to provide the
MI in LegalityQuery but that opens the door to breaking core principles
of the legalizer (legality is not context sensitive). That said, it's
worth noting that looking at other instructions and acting on that
information doesn't violate this principle in itself. It's only a
violation if, at the end of legalization, a pass that checks legality
without being able to see the context would say an instruction might not be
legal. That's a fairly subtle distinction so to give a concrete example,
saying %2 in:
%1 = G_CONSTANT 16
%2 = G_SEXT_INREG %0, %1
is legal is in violation of that principle if the legality of %2 depends
on %1 being constant and/or being 16. However, legalizing to either:
%2 = G_SEXT_INREG %0, 16
or:
%1 = G_CONSTANT 16
%2:_(s32) = G_SHL %0, %1
%3:_(s32) = G_ASHR %2, %1
depending on whether %1 is constant and 16 does not violate that principle
since both outputs are genuinely legal.
Reviewers: bogner, aditya_nandakumar, volkan, aemerson, paquette, arsenm
Subscribers: sdardis, jvesely, wdng, nhaehnle, rovka, kristof.beyls, javed.absar, hiraditya, jrtc27, atanasyan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D61289
llvm-svn: 368487
2019-08-10 05:11:20 +08:00
|
|
|
PartialExtensionReg = DstRegs.back();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Gather the destination registers into the final destination.
|
|
|
|
Register DstReg = MI.getOperand(0).getReg();
|
|
|
|
MIRBuilder.buildMerge(DstReg, DstRegs);
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
2019-12-31 01:06:29 +08:00
|
|
|
case TargetOpcode::G_BSWAP:
|
|
|
|
case TargetOpcode::G_BITREVERSE: {
|
2019-12-30 18:13:22 +08:00
|
|
|
if (SizeOp0 % NarrowSize != 0)
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
Observer.changingInstr(MI);
|
|
|
|
SmallVector<Register, 2> SrcRegs, DstRegs;
|
|
|
|
unsigned NumParts = SizeOp0 / NarrowSize;
|
|
|
|
extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < NumParts; ++i) {
|
|
|
|
auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
|
|
|
|
{SrcRegs[NumParts - 1 - i]});
|
|
|
|
DstRegs.push_back(DstPart.getReg(0));
|
|
|
|
}
|
|
|
|
|
2020-01-16 20:37:00 +08:00
|
|
|
MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
|
2019-12-30 18:13:22 +08:00
|
|
|
|
|
|
|
Observer.changedInstr(MI);
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
2016-08-05 04:54:13 +08:00
|
|
|
}
|
2016-07-23 04:03:43 +08:00
|
|
|
}
|
|
|
|
|
2018-05-10 01:28:18 +08:00
|
|
|
void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
|
|
|
|
unsigned OpIdx, unsigned ExtOpcode) {
|
|
|
|
MachineOperand &MO = MI.getOperand(OpIdx);
|
2020-01-16 20:37:00 +08:00
|
|
|
auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
|
2020-01-23 19:51:35 +08:00
|
|
|
MO.setReg(ExtB.getReg(0));
|
2018-05-10 01:28:18 +08:00
|
|
|
}
|
|
|
|
|
2019-01-23 05:42:11 +08:00
|
|
|
void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
|
|
|
|
unsigned OpIdx) {
|
|
|
|
MachineOperand &MO = MI.getOperand(OpIdx);
|
2020-01-16 20:37:00 +08:00
|
|
|
auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
|
2020-01-23 19:51:35 +08:00
|
|
|
MO.setReg(ExtB.getReg(0));
|
2019-01-23 05:42:11 +08:00
|
|
|
}
|
|
|
|
|
2018-05-10 01:28:18 +08:00
|
|
|
void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
|
|
|
|
unsigned OpIdx, unsigned TruncOpcode) {
|
|
|
|
MachineOperand &MO = MI.getOperand(OpIdx);
|
2019-06-25 00:16:12 +08:00
|
|
|
Register DstExt = MRI.createGenericVirtualRegister(WideTy);
|
2018-05-10 01:28:18 +08:00
|
|
|
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
|
2020-01-16 20:37:00 +08:00
|
|
|
MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
|
2018-05-10 01:28:18 +08:00
|
|
|
MO.setReg(DstExt);
|
|
|
|
}
|
|
|
|
|
2019-01-31 10:09:57 +08:00
|
|
|
void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
|
|
|
|
unsigned OpIdx, unsigned ExtOpcode) {
|
|
|
|
MachineOperand &MO = MI.getOperand(OpIdx);
|
2019-06-25 00:16:12 +08:00
|
|
|
Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
|
2019-01-31 10:09:57 +08:00
|
|
|
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
|
2020-01-16 20:37:00 +08:00
|
|
|
MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
|
2019-01-31 10:09:57 +08:00
|
|
|
MO.setReg(DstTrunc);
|
|
|
|
}
|
|
|
|
|
2019-02-12 06:00:39 +08:00
|
|
|
void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
|
|
|
|
unsigned OpIdx) {
|
|
|
|
MachineOperand &MO = MI.getOperand(OpIdx);
|
2019-06-25 00:16:12 +08:00
|
|
|
Register DstExt = MRI.createGenericVirtualRegister(WideTy);
|
2019-02-12 06:00:39 +08:00
|
|
|
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
|
2020-01-16 20:37:00 +08:00
|
|
|
MIRBuilder.buildExtract(MO, DstExt, 0);
|
2019-02-12 06:00:39 +08:00
|
|
|
MO.setReg(DstExt);
|
|
|
|
}
|
|
|
|
|
2019-02-20 00:30:19 +08:00
|
|
|
void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
|
|
|
|
unsigned OpIdx) {
|
|
|
|
MachineOperand &MO = MI.getOperand(OpIdx);
|
|
|
|
|
|
|
|
LLT OldTy = MRI.getType(MO.getReg());
|
|
|
|
unsigned OldElts = OldTy.getNumElements();
|
|
|
|
unsigned NewElts = MoreTy.getNumElements();
|
|
|
|
|
|
|
|
unsigned NumParts = NewElts / OldElts;
|
|
|
|
|
|
|
|
// Use concat_vectors if the result is a multiple of the number of elements.
|
|
|
|
if (NumParts * OldElts == NewElts) {
|
2019-06-24 23:50:29 +08:00
|
|
|
SmallVector<Register, 8> Parts;
|
2019-02-20 00:30:19 +08:00
|
|
|
Parts.push_back(MO.getReg());
|
|
|
|
|
2019-06-28 09:47:44 +08:00
|
|
|
Register ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0);
|
2019-02-20 00:30:19 +08:00
|
|
|
for (unsigned I = 1; I != NumParts; ++I)
|
|
|
|
Parts.push_back(ImpDef);
|
|
|
|
|
|
|
|
auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts);
|
|
|
|
MO.setReg(Concat.getReg(0));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
Register MoreReg = MRI.createGenericVirtualRegister(MoreTy);
|
|
|
|
Register ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0);
|
2019-02-20 00:30:19 +08:00
|
|
|
MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0);
|
|
|
|
MO.setReg(MoreReg);
|
|
|
|
}
|
|
|
|
|
2019-02-03 08:07:33 +08:00
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
|
|
|
|
LLT WideTy) {
|
|
|
|
if (TypeIdx != 1)
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
2019-06-24 23:50:29 +08:00
|
|
|
Register DstReg = MI.getOperand(0).getReg();
|
2019-02-03 08:07:33 +08:00
|
|
|
LLT DstTy = MRI.getType(DstReg);
|
2019-07-04 07:08:06 +08:00
|
|
|
if (DstTy.isVector())
|
2019-02-03 08:07:33 +08:00
|
|
|
return UnableToLegalize;
|
|
|
|
|
2019-07-02 03:36:10 +08:00
|
|
|
Register Src1 = MI.getOperand(1).getReg();
|
|
|
|
LLT SrcTy = MRI.getType(Src1);
|
2019-07-18 04:22:44 +08:00
|
|
|
const int DstSize = DstTy.getSizeInBits();
|
|
|
|
const int SrcSize = SrcTy.getSizeInBits();
|
|
|
|
const int WideSize = WideTy.getSizeInBits();
|
|
|
|
const int NumMerge = (DstSize + WideSize - 1) / WideSize;
|
2019-07-02 03:36:10 +08:00
|
|
|
|
2019-07-18 04:22:44 +08:00
|
|
|
unsigned NumOps = MI.getNumOperands();
|
|
|
|
unsigned NumSrc = MI.getNumOperands() - 1;
|
|
|
|
unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
|
2019-07-18 04:22:38 +08:00
|
|
|
|
2019-07-18 04:22:44 +08:00
|
|
|
if (WideSize >= DstSize) {
|
|
|
|
// Directly pack the bits in the target type.
|
|
|
|
Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0);
|
2019-07-02 03:36:10 +08:00
|
|
|
|
2019-07-18 04:22:44 +08:00
|
|
|
for (unsigned I = 2; I != NumOps; ++I) {
|
|
|
|
const unsigned Offset = (I - 1) * PartSize;
|
2019-07-02 03:36:10 +08:00
|
|
|
|
2019-07-18 04:22:44 +08:00
|
|
|
Register SrcReg = MI.getOperand(I).getReg();
|
|
|
|
assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
|
2019-07-02 03:36:10 +08:00
|
|
|
|
2019-07-18 04:22:44 +08:00
|
|
|
auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
|
2019-07-18 04:22:38 +08:00
|
|
|
|
2019-08-02 02:13:16 +08:00
|
|
|
Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
|
2019-07-18 04:22:44 +08:00
|
|
|
MRI.createGenericVirtualRegister(WideTy);
|
2019-07-18 04:22:38 +08:00
|
|
|
|
2019-07-18 04:22:44 +08:00
|
|
|
auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
|
|
|
|
auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
|
|
|
|
MIRBuilder.buildOr(NextResult, ResultReg, Shl);
|
|
|
|
ResultReg = NextResult;
|
|
|
|
}
|
2019-07-18 04:22:38 +08:00
|
|
|
|
2019-07-18 04:22:44 +08:00
|
|
|
if (WideSize > DstSize)
|
|
|
|
MIRBuilder.buildTrunc(DstReg, ResultReg);
|
2019-08-02 02:13:16 +08:00
|
|
|
else if (DstTy.isPointer())
|
|
|
|
MIRBuilder.buildIntToPtr(DstReg, ResultReg);
|
2019-07-18 04:22:38 +08:00
|
|
|
|
2019-07-02 03:36:10 +08:00
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
2019-07-18 04:22:44 +08:00
|
|
|
// Unmerge the original values to the GCD type, and recombine to the next
|
|
|
|
// multiple greater than the original type.
|
|
|
|
//
|
|
|
|
// %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
|
|
|
|
// %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
|
|
|
|
// %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
|
|
|
|
// %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
|
|
|
|
// %10:_(s6) = G_MERGE_VALUES %4, %5, %6
|
|
|
|
// %11:_(s6) = G_MERGE_VALUES %7, %8, %9
|
|
|
|
// %12:_(s12) = G_MERGE_VALUES %10, %11
|
|
|
|
//
|
|
|
|
// Padding with undef if necessary:
|
|
|
|
//
|
|
|
|
// %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
|
|
|
|
// %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
|
|
|
|
// %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
|
|
|
|
// %7:_(s2) = G_IMPLICIT_DEF
|
|
|
|
// %8:_(s6) = G_MERGE_VALUES %3, %4, %5
|
|
|
|
// %9:_(s6) = G_MERGE_VALUES %6, %7, %7
|
|
|
|
// %10:_(s12) = G_MERGE_VALUES %8, %9
|
|
|
|
|
|
|
|
const int GCD = greatestCommonDivisor(SrcSize, WideSize);
|
|
|
|
LLT GCDTy = LLT::scalar(GCD);
|
|
|
|
|
|
|
|
SmallVector<Register, 8> Parts;
|
|
|
|
SmallVector<Register, 8> NewMergeRegs;
|
|
|
|
SmallVector<Register, 8> Unmerges;
|
|
|
|
LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
|
|
|
|
|
|
|
|
// Decompose the original operands if they don't evenly divide.
|
|
|
|
for (int I = 1, E = MI.getNumOperands(); I != E; ++I) {
|
2019-06-24 23:50:29 +08:00
|
|
|
Register SrcReg = MI.getOperand(I).getReg();
|
2019-07-18 04:22:44 +08:00
|
|
|
if (GCD == SrcSize) {
|
|
|
|
Unmerges.push_back(SrcReg);
|
|
|
|
} else {
|
|
|
|
auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
|
|
|
|
for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
|
|
|
|
Unmerges.push_back(Unmerge.getReg(J));
|
|
|
|
}
|
|
|
|
}
|
2019-02-03 08:07:33 +08:00
|
|
|
|
2019-07-18 04:22:44 +08:00
|
|
|
// Pad with undef to the next size that is a multiple of the requested size.
|
|
|
|
if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
|
|
|
|
Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
|
|
|
|
for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
|
|
|
|
Unmerges.push_back(UndefReg);
|
|
|
|
}
|
2019-02-03 08:07:33 +08:00
|
|
|
|
2019-07-18 04:22:44 +08:00
|
|
|
const int PartsPerGCD = WideSize / GCD;
|
2019-02-03 08:07:33 +08:00
|
|
|
|
2019-07-18 04:22:44 +08:00
|
|
|
// Build merges of each piece.
|
|
|
|
ArrayRef<Register> Slicer(Unmerges);
|
|
|
|
for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
|
|
|
|
auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD));
|
|
|
|
NewMergeRegs.push_back(Merge.getReg(0));
|
|
|
|
}
|
|
|
|
|
|
|
|
// A truncate may be necessary if the requested type doesn't evenly divide the
|
|
|
|
// original result type.
|
|
|
|
if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
|
|
|
|
MIRBuilder.buildMerge(DstReg, NewMergeRegs);
|
|
|
|
} else {
|
|
|
|
auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs);
|
|
|
|
MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
|
2019-02-03 08:07:33 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
|
|
|
|
LLT WideTy) {
|
|
|
|
if (TypeIdx != 0)
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
2020-01-21 22:02:42 +08:00
|
|
|
int NumDst = MI.getNumOperands() - 1;
|
2019-06-24 23:50:29 +08:00
|
|
|
Register SrcReg = MI.getOperand(NumDst).getReg();
|
2019-02-03 08:07:33 +08:00
|
|
|
LLT SrcTy = MRI.getType(SrcReg);
|
2020-01-22 00:12:36 +08:00
|
|
|
if (SrcTy.isVector())
|
2019-02-03 08:07:33 +08:00
|
|
|
return UnableToLegalize;
|
|
|
|
|
2019-06-24 23:50:29 +08:00
|
|
|
Register Dst0Reg = MI.getOperand(0).getReg();
|
2019-02-03 08:07:33 +08:00
|
|
|
LLT DstTy = MRI.getType(Dst0Reg);
|
|
|
|
if (!DstTy.isScalar())
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
2020-01-22 00:12:36 +08:00
|
|
|
if (WideTy.getSizeInBits() == SrcTy.getSizeInBits()) {
|
|
|
|
if (SrcTy.isPointer()) {
|
|
|
|
const DataLayout &DL = MIRBuilder.getDataLayout();
|
|
|
|
if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
|
|
|
|
LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
|
|
|
|
return UnableToLegalize;
|
|
|
|
}
|
|
|
|
|
|
|
|
SrcTy = LLT::scalar(SrcTy.getSizeInBits());
|
|
|
|
SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
|
|
|
|
}
|
|
|
|
|
2020-01-21 22:02:42 +08:00
|
|
|
// Theres no unmerge type to target. Directly extract the bits from the
|
|
|
|
// source type
|
|
|
|
unsigned DstSize = DstTy.getSizeInBits();
|
2019-02-03 08:07:33 +08:00
|
|
|
|
2020-01-21 22:02:42 +08:00
|
|
|
MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
|
|
|
|
for (int I = 1; I != NumDst; ++I) {
|
|
|
|
auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
|
|
|
|
auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
|
|
|
|
MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
|
|
|
|
}
|
2019-02-03 08:07:33 +08:00
|
|
|
|
2020-01-21 22:02:42 +08:00
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
2019-02-03 08:07:33 +08:00
|
|
|
}
|
|
|
|
|
2020-01-21 22:02:42 +08:00
|
|
|
// TODO
|
|
|
|
if (WideTy.getSizeInBits() > SrcTy.getSizeInBits())
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
// Extend the source to a wider type.
|
|
|
|
LLT LCMTy = getLCMType(SrcTy, WideTy);
|
|
|
|
|
|
|
|
Register WideSrc = SrcReg;
|
2020-01-22 00:12:36 +08:00
|
|
|
if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
|
|
|
|
// TODO: If this is an integral address space, cast to integer and anyext.
|
|
|
|
if (SrcTy.isPointer()) {
|
|
|
|
LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
|
|
|
|
return UnableToLegalize;
|
|
|
|
}
|
|
|
|
|
2020-01-21 22:02:42 +08:00
|
|
|
WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
|
2020-01-22 00:12:36 +08:00
|
|
|
}
|
|
|
|
|
2020-01-21 22:02:42 +08:00
|
|
|
auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
|
2019-02-03 08:07:33 +08:00
|
|
|
|
2020-01-21 22:02:42 +08:00
|
|
|
// Create a sequence of unmerges to the original results. since we may have
|
|
|
|
// widened the source, we will need to pad the results with dead defs to cover
|
|
|
|
// the source register.
|
|
|
|
// e.g. widen s16 to s32:
|
|
|
|
// %1:_(s16), %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0:_(s48)
|
|
|
|
//
|
|
|
|
// =>
|
|
|
|
// %4:_(s64) = G_ANYEXT %0:_(s48)
|
|
|
|
// %5:_(s32), %6:_(s32) = G_UNMERGE_VALUES %4 ; Requested unmerge
|
|
|
|
// %1:_(s16), %2:_(s16) = G_UNMERGE_VALUES %5 ; unpack to original regs
|
|
|
|
// %3:_(s16), dead %7 = G_UNMERGE_VALUES %6 ; original reg + extra dead def
|
|
|
|
|
|
|
|
const int NumUnmerge = Unmerge->getNumOperands() - 1;
|
|
|
|
const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
|
|
|
|
|
|
|
|
for (int I = 0; I != NumUnmerge; ++I) {
|
|
|
|
auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
|
|
|
|
|
|
|
|
for (int J = 0; J != PartsPerUnmerge; ++J) {
|
|
|
|
int Idx = I * PartsPerUnmerge + J;
|
|
|
|
if (Idx < NumDst)
|
|
|
|
MIB.addDef(MI.getOperand(Idx).getReg());
|
|
|
|
else {
|
|
|
|
// Create dead def for excess components.
|
|
|
|
MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
|
|
|
|
}
|
|
|
|
}
|
2019-02-03 08:07:33 +08:00
|
|
|
|
2020-01-21 22:02:42 +08:00
|
|
|
MIB.addUse(Unmerge.getReg(I));
|
|
|
|
}
|
2019-02-03 08:07:33 +08:00
|
|
|
|
2020-01-21 22:02:42 +08:00
|
|
|
MI.eraseFromParent();
|
2019-02-03 08:07:33 +08:00
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
2016-10-15 06:18:18 +08:00
|
|
|
LegalizerHelper::LegalizeResult
|
2019-02-12 22:54:52 +08:00
|
|
|
LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
|
|
|
|
LLT WideTy) {
|
2019-06-24 23:50:29 +08:00
|
|
|
Register DstReg = MI.getOperand(0).getReg();
|
|
|
|
Register SrcReg = MI.getOperand(1).getReg();
|
2019-02-12 22:54:52 +08:00
|
|
|
LLT SrcTy = MRI.getType(SrcReg);
|
2019-02-19 06:39:27 +08:00
|
|
|
|
|
|
|
LLT DstTy = MRI.getType(DstReg);
|
|
|
|
unsigned Offset = MI.getOperand(2).getImm();
|
|
|
|
|
|
|
|
if (TypeIdx == 0) {
|
|
|
|
if (SrcTy.isVector() || DstTy.isVector())
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
SrcOp Src(SrcReg);
|
|
|
|
if (SrcTy.isPointer()) {
|
|
|
|
// Extracts from pointers can be handled only if they are really just
|
|
|
|
// simple integers.
|
|
|
|
const DataLayout &DL = MIRBuilder.getDataLayout();
|
|
|
|
if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
|
|
|
|
Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
|
|
|
|
SrcTy = SrcAsIntTy;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (DstTy.isPointer())
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
if (Offset == 0) {
|
|
|
|
// Avoid a shift in the degenerate case.
|
|
|
|
MIRBuilder.buildTrunc(DstReg,
|
|
|
|
MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Do a shift in the source type.
|
|
|
|
LLT ShiftTy = SrcTy;
|
|
|
|
if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
|
|
|
|
Src = MIRBuilder.buildAnyExt(WideTy, Src);
|
|
|
|
ShiftTy = WideTy;
|
|
|
|
} else if (WideTy.getSizeInBits() > SrcTy.getSizeInBits())
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
auto LShr = MIRBuilder.buildLShr(
|
|
|
|
ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
|
|
|
|
MIRBuilder.buildTrunc(DstReg, LShr);
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
2019-04-22 23:10:42 +08:00
|
|
|
if (SrcTy.isScalar()) {
|
|
|
|
Observer.changingInstr(MI);
|
|
|
|
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
|
|
|
|
Observer.changedInstr(MI);
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
2019-02-12 22:54:52 +08:00
|
|
|
if (!SrcTy.isVector())
|
2016-08-05 02:35:11 +08:00
|
|
|
return UnableToLegalize;
|
2019-02-03 07:56:00 +08:00
|
|
|
|
2019-02-12 22:54:52 +08:00
|
|
|
if (DstTy != SrcTy.getElementType())
|
|
|
|
return UnableToLegalize;
|
2019-02-03 07:56:00 +08:00
|
|
|
|
2019-02-12 22:54:52 +08:00
|
|
|
if (Offset % SrcTy.getScalarSizeInBits() != 0)
|
|
|
|
return UnableToLegalize;
|
2019-02-03 07:56:00 +08:00
|
|
|
|
2019-02-12 22:54:52 +08:00
|
|
|
Observer.changingInstr(MI);
|
|
|
|
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
|
2019-02-03 07:56:00 +08:00
|
|
|
|
2019-02-12 22:54:52 +08:00
|
|
|
MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
|
|
|
|
Offset);
|
|
|
|
widenScalarDst(MI, WideTy.getScalarType(), 0);
|
|
|
|
Observer.changedInstr(MI);
|
|
|
|
return Legalized;
|
|
|
|
}
|
2019-02-03 07:56:00 +08:00
|
|
|
|
2019-02-12 22:54:52 +08:00
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
|
|
|
|
LLT WideTy) {
|
|
|
|
if (TypeIdx != 0)
|
|
|
|
return UnableToLegalize;
|
|
|
|
Observer.changingInstr(MI);
|
|
|
|
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
|
|
|
|
widenScalarDst(MI, WideTy);
|
|
|
|
Observer.changedInstr(MI);
|
|
|
|
return Legalized;
|
|
|
|
}
|
2019-02-03 07:56:00 +08:00
|
|
|
|
2019-02-12 22:54:52 +08:00
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
|
|
|
|
MIRBuilder.setInstr(MI);
|
|
|
|
|
|
|
|
switch (MI.getOpcode()) {
|
|
|
|
default:
|
|
|
|
return UnableToLegalize;
|
|
|
|
case TargetOpcode::G_EXTRACT:
|
|
|
|
return widenScalarExtract(MI, TypeIdx, WideTy);
|
|
|
|
case TargetOpcode::G_INSERT:
|
|
|
|
return widenScalarInsert(MI, TypeIdx, WideTy);
|
2019-02-03 08:07:33 +08:00
|
|
|
case TargetOpcode::G_MERGE_VALUES:
|
|
|
|
return widenScalarMergeValues(MI, TypeIdx, WideTy);
|
|
|
|
case TargetOpcode::G_UNMERGE_VALUES:
|
|
|
|
return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
|
2018-08-29 11:17:08 +08:00
|
|
|
case TargetOpcode::G_UADDO:
|
|
|
|
case TargetOpcode::G_USUBO: {
|
|
|
|
if (TypeIdx == 1)
|
|
|
|
return UnableToLegalize; // TODO
|
2020-01-16 20:37:00 +08:00
|
|
|
auto LHSZext = MIRBuilder.buildZExt(WideTy, MI.getOperand(2));
|
|
|
|
auto RHSZext = MIRBuilder.buildZExt(WideTy, MI.getOperand(3));
|
2018-08-29 11:17:08 +08:00
|
|
|
unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO
|
|
|
|
? TargetOpcode::G_ADD
|
|
|
|
: TargetOpcode::G_SUB;
|
|
|
|
// Do the arithmetic in the larger type.
|
[GISel]: Refactor MachineIRBuilder to allow passing additional parameters to build Instrs
https://reviews.llvm.org/D55294
Previously MachineIRBuilder::buildInstr used to accept variadic
arguments for sources (which were either unsigned or
MachineInstrBuilder). While this worked well in common cases, it doesn't
allow us to build instructions that have multiple destinations.
Additionally passing in other optional parameters in the end (such as
flags) is not possible trivially. Also a trivial call such as
B.buildInstr(Opc, Reg1, Reg2, Reg3)
can be interpreted differently based on the opcode (2defs + 1 src for
unmerge vs 1 def + 2srcs).
This patch refactors the buildInstr to
buildInstr(Opc, ArrayRef<DstOps>, ArrayRef<SrcOps>)
where DstOps and SrcOps are typed unions that know how to add itself to
MachineInstrBuilder.
After this patch, most invocations would look like
B.buildInstr(Opc, {s32, DstReg}, {SrcRegs..., SrcMIBs..});
Now all the other calls (such as buildAdd, buildSub etc) forward to
buildInstr. It also makes it possible to build instructions with
multiple defs.
Additionally in a subsequent patch, we should make it possible to add
flags directly while building instructions.
Additionally, the main buildInstr method is now virtual and other
builders now only have to override buildInstr (for say constant
folding/cseing) is straightforward.
Also attached here (https://reviews.llvm.org/F7675680) is a clang-tidy
patch that should upgrade the API calls if necessary.
llvm-svn: 348815
2018-12-11 08:48:50 +08:00
|
|
|
auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext});
|
2018-08-29 11:17:08 +08:00
|
|
|
LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
|
2020-01-16 22:36:41 +08:00
|
|
|
APInt Mask =
|
|
|
|
APInt::getLowBitsSet(WideTy.getSizeInBits(), OrigTy.getSizeInBits());
|
2020-01-16 20:09:48 +08:00
|
|
|
auto AndOp = MIRBuilder.buildAnd(
|
2020-01-16 22:36:41 +08:00
|
|
|
WideTy, NewOp, MIRBuilder.buildConstant(WideTy, Mask));
|
2018-08-29 11:17:08 +08:00
|
|
|
// There is no overflow if the AndOp is the same as NewOp.
|
2020-01-16 20:37:00 +08:00
|
|
|
MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, AndOp);
|
2018-08-29 11:17:08 +08:00
|
|
|
// Now trunc the NewOp to the original result.
|
2020-01-16 20:37:00 +08:00
|
|
|
MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
|
2018-08-29 11:17:08 +08:00
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
2018-08-23 01:59:18 +08:00
|
|
|
case TargetOpcode::G_CTTZ:
|
|
|
|
case TargetOpcode::G_CTTZ_ZERO_UNDEF:
|
|
|
|
case TargetOpcode::G_CTLZ:
|
|
|
|
case TargetOpcode::G_CTLZ_ZERO_UNDEF:
|
|
|
|
case TargetOpcode::G_CTPOP: {
|
2019-01-31 10:09:57 +08:00
|
|
|
if (TypeIdx == 0) {
|
2019-02-05 06:26:33 +08:00
|
|
|
Observer.changingInstr(MI);
|
2019-01-31 10:09:57 +08:00
|
|
|
widenScalarDst(MI, WideTy, 0);
|
2019-02-05 06:26:33 +08:00
|
|
|
Observer.changedInstr(MI);
|
2019-01-31 10:09:57 +08:00
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
Register SrcReg = MI.getOperand(1).getReg();
|
2019-02-05 06:26:33 +08:00
|
|
|
|
2018-08-23 01:59:18 +08:00
|
|
|
// First ZEXT the input.
|
2019-02-05 06:26:33 +08:00
|
|
|
auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg);
|
|
|
|
LLT CurTy = MRI.getType(SrcReg);
|
2018-08-23 01:59:18 +08:00
|
|
|
if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
|
|
|
|
// The count is the same in the larger type except if the original
|
|
|
|
// value was zero. This can be handled by setting the bit just off
|
|
|
|
// the top of the original type.
|
|
|
|
auto TopBit =
|
|
|
|
APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
|
2019-02-05 06:26:33 +08:00
|
|
|
MIBSrc = MIRBuilder.buildOr(
|
|
|
|
WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
|
2018-08-23 01:59:18 +08:00
|
|
|
}
|
2019-02-05 06:26:33 +08:00
|
|
|
|
2018-08-23 01:59:18 +08:00
|
|
|
// Perform the operation at the larger size.
|
[GISel]: Refactor MachineIRBuilder to allow passing additional parameters to build Instrs
https://reviews.llvm.org/D55294
Previously MachineIRBuilder::buildInstr used to accept variadic
arguments for sources (which were either unsigned or
MachineInstrBuilder). While this worked well in common cases, it doesn't
allow us to build instructions that have multiple destinations.
Additionally passing in other optional parameters in the end (such as
flags) is not possible trivially. Also a trivial call such as
B.buildInstr(Opc, Reg1, Reg2, Reg3)
can be interpreted differently based on the opcode (2defs + 1 src for
unmerge vs 1 def + 2srcs).
This patch refactors the buildInstr to
buildInstr(Opc, ArrayRef<DstOps>, ArrayRef<SrcOps>)
where DstOps and SrcOps are typed unions that know how to add itself to
MachineInstrBuilder.
After this patch, most invocations would look like
B.buildInstr(Opc, {s32, DstReg}, {SrcRegs..., SrcMIBs..});
Now all the other calls (such as buildAdd, buildSub etc) forward to
buildInstr. It also makes it possible to build instructions with
multiple defs.
Additionally in a subsequent patch, we should make it possible to add
flags directly while building instructions.
Additionally, the main buildInstr method is now virtual and other
builders now only have to override buildInstr (for say constant
folding/cseing) is straightforward.
Also attached here (https://reviews.llvm.org/F7675680) is a clang-tidy
patch that should upgrade the API calls if necessary.
llvm-svn: 348815
2018-12-11 08:48:50 +08:00
|
|
|
auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
|
2018-08-23 01:59:18 +08:00
|
|
|
// This is already the correct result for CTPOP and CTTZs
|
|
|
|
if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
|
|
|
|
MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
|
|
|
|
// The correct result is NewOp - (Difference in widety and current ty).
|
|
|
|
unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
|
2020-01-16 20:09:48 +08:00
|
|
|
MIBNewOp = MIRBuilder.buildSub(
|
|
|
|
WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
|
2018-08-23 01:59:18 +08:00
|
|
|
}
|
2019-02-05 06:26:33 +08:00
|
|
|
|
|
|
|
MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
|
|
|
|
MI.eraseFromParent();
|
2018-08-23 01:59:18 +08:00
|
|
|
return Legalized;
|
|
|
|
}
|
2019-01-31 10:34:03 +08:00
|
|
|
case TargetOpcode::G_BSWAP: {
|
|
|
|
Observer.changingInstr(MI);
|
2019-06-25 00:16:12 +08:00
|
|
|
Register DstReg = MI.getOperand(0).getReg();
|
2019-01-31 10:34:03 +08:00
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
|
|
|
|
Register DstExt = MRI.createGenericVirtualRegister(WideTy);
|
|
|
|
Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
|
2019-01-31 10:34:03 +08:00
|
|
|
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
|
|
|
|
|
|
|
|
MI.getOperand(0).setReg(DstExt);
|
2018-05-10 01:28:18 +08:00
|
|
|
|
2019-01-31 10:34:03 +08:00
|
|
|
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
|
|
|
|
|
|
|
|
LLT Ty = MRI.getType(DstReg);
|
|
|
|
unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
|
|
|
|
MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
|
2020-01-16 20:09:48 +08:00
|
|
|
MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
|
2019-01-31 10:34:03 +08:00
|
|
|
|
|
|
|
MIRBuilder.buildTrunc(DstReg, ShrReg);
|
|
|
|
Observer.changedInstr(MI);
|
|
|
|
return Legalized;
|
|
|
|
}
|
2019-09-05 04:46:15 +08:00
|
|
|
case TargetOpcode::G_BITREVERSE: {
|
|
|
|
Observer.changingInstr(MI);
|
|
|
|
|
|
|
|
Register DstReg = MI.getOperand(0).getReg();
|
|
|
|
LLT Ty = MRI.getType(DstReg);
|
|
|
|
unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
|
|
|
|
|
|
|
|
Register DstExt = MRI.createGenericVirtualRegister(WideTy);
|
|
|
|
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
|
|
|
|
MI.getOperand(0).setReg(DstExt);
|
|
|
|
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
|
|
|
|
|
|
|
|
auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
|
|
|
|
auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
|
|
|
|
MIRBuilder.buildTrunc(DstReg, Shift);
|
|
|
|
Observer.changedInstr(MI);
|
|
|
|
return Legalized;
|
|
|
|
}
|
2016-08-05 05:39:49 +08:00
|
|
|
case TargetOpcode::G_ADD:
|
|
|
|
case TargetOpcode::G_AND:
|
|
|
|
case TargetOpcode::G_MUL:
|
|
|
|
case TargetOpcode::G_OR:
|
|
|
|
case TargetOpcode::G_XOR:
|
2017-01-19 15:51:17 +08:00
|
|
|
case TargetOpcode::G_SUB:
|
2019-02-12 22:54:52 +08:00
|
|
|
// Perform operation at larger width (any extension is fines here, high bits
|
2016-08-05 02:35:11 +08:00
|
|
|
// don't affect the result) and then truncate the result back to the
|
|
|
|
// original type.
|
2018-12-13 07:48:13 +08:00
|
|
|
Observer.changingInstr(MI);
|
2018-05-10 01:28:18 +08:00
|
|
|
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
|
|
|
|
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
|
|
|
|
widenScalarDst(MI, WideTy);
|
2018-12-06 04:14:52 +08:00
|
|
|
Observer.changedInstr(MI);
|
2016-08-05 02:35:11 +08:00
|
|
|
return Legalized;
|
2018-05-10 01:28:18 +08:00
|
|
|
|
[GlobalISel][Legalizer] Widening the second src op of shifts bug fix
The second source operand of G_SHL, G_ASHR, and G_LSHR must preserve its
value as a (small) unsigned integer, therefore its incorrect to widen it
in any way but by zero extending it.
G_SHL was using G_ANYEXT and G_ASHR - G_SEXT (which is correct for their
destination and first source operands, but not the "number of bits to
shift" operand).
Generally, shifts aren't as similar to regular binary operations as it
might seem, for instance, they aren't commutative nor associative and
the second source operand usually requires a special treatment.
Reviewers: bogner, javed.absar, aivchenk, rovka
Reviewed By: bogner
Subscribers: igorb, kristof.beyls, llvm-commits
Differential Revision: https://reviews.llvm.org/D46413
llvm-svn: 331926
2018-05-10 05:43:30 +08:00
|
|
|
case TargetOpcode::G_SHL:
|
2019-05-16 12:08:46 +08:00
|
|
|
Observer.changingInstr(MI);
|
2019-01-23 05:42:11 +08:00
|
|
|
|
|
|
|
if (TypeIdx == 0) {
|
|
|
|
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
|
|
|
|
widenScalarDst(MI, WideTy);
|
|
|
|
} else {
|
|
|
|
assert(TypeIdx == 1);
|
|
|
|
// The "number of bits to shift" operand must preserve its value as an
|
|
|
|
// unsigned integer:
|
|
|
|
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
|
|
|
|
}
|
|
|
|
|
2018-12-06 04:14:52 +08:00
|
|
|
Observer.changedInstr(MI);
|
[GlobalISel][Legalizer] Widening the second src op of shifts bug fix
The second source operand of G_SHL, G_ASHR, and G_LSHR must preserve its
value as a (small) unsigned integer, therefore its incorrect to widen it
in any way but by zero extending it.
G_SHL was using G_ANYEXT and G_ASHR - G_SEXT (which is correct for their
destination and first source operands, but not the "number of bits to
shift" operand).
Generally, shifts aren't as similar to regular binary operations as it
might seem, for instance, they aren't commutative nor associative and
the second source operand usually requires a special treatment.
Reviewers: bogner, javed.absar, aivchenk, rovka
Reviewed By: bogner
Subscribers: igorb, kristof.beyls, llvm-commits
Differential Revision: https://reviews.llvm.org/D46413
llvm-svn: 331926
2018-05-10 05:43:30 +08:00
|
|
|
return Legalized;
|
|
|
|
|
2016-08-27 01:46:06 +08:00
|
|
|
case TargetOpcode::G_SDIV:
|
2018-05-09 09:43:12 +08:00
|
|
|
case TargetOpcode::G_SREM:
|
2019-05-24 01:58:48 +08:00
|
|
|
case TargetOpcode::G_SMIN:
|
|
|
|
case TargetOpcode::G_SMAX:
|
2018-12-13 07:48:13 +08:00
|
|
|
Observer.changingInstr(MI);
|
2018-05-10 01:28:18 +08:00
|
|
|
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
|
|
|
|
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
|
|
|
|
widenScalarDst(MI, WideTy);
|
2018-12-06 04:14:52 +08:00
|
|
|
Observer.changedInstr(MI);
|
[GlobalISel][Legalizer] More concise and faster widenScalar, NFC
Refactoring LegalizerHelper::widenScalar member function reducing its
size by approximately a factor of 2 and (hopefuly) making it more
straightforward and regular by introducing widenScalarSrc and
widenScalarDst helper methods.
The new widenScalar* methods mutate the instructions in place instead
of recreating them from scratch and removing the originals. The
compile time implications of this were measured on sqlite3
amalgamation, targeting AArch64 in -O0:
LegalizerHelper::widenScalar: > 25% faster
Legalizer::runOnMachineFunction: ~ 4.0 - 4.5% faster
Also adding MachineOperand::setCImm and refactoring out
MachineIRBuilder::recordInsertion methods to make the change possible.
Reviewers: aditya_nandakumar, bogner, javed.absar, t.p.northover, ab, dsanders, arsenm
Reviewed By: aditya_nandakumar
Subscribers: wdng, rovka, kristof.beyls, llvm-commits
Differential Revision: https://reviews.llvm.org/D46414
llvm-svn: 331819
2018-05-09 06:53:09 +08:00
|
|
|
return Legalized;
|
2018-05-10 01:28:18 +08:00
|
|
|
|
[GlobalISel][Legalizer] Widening the second src op of shifts bug fix
The second source operand of G_SHL, G_ASHR, and G_LSHR must preserve its
value as a (small) unsigned integer, therefore its incorrect to widen it
in any way but by zero extending it.
G_SHL was using G_ANYEXT and G_ASHR - G_SEXT (which is correct for their
destination and first source operands, but not the "number of bits to
shift" operand).
Generally, shifts aren't as similar to regular binary operations as it
might seem, for instance, they aren't commutative nor associative and
the second source operand usually requires a special treatment.
Reviewers: bogner, javed.absar, aivchenk, rovka
Reviewed By: bogner
Subscribers: igorb, kristof.beyls, llvm-commits
Differential Revision: https://reviews.llvm.org/D46413
llvm-svn: 331926
2018-05-10 05:43:30 +08:00
|
|
|
case TargetOpcode::G_ASHR:
|
2019-01-23 05:42:11 +08:00
|
|
|
case TargetOpcode::G_LSHR:
|
2018-12-13 07:48:13 +08:00
|
|
|
Observer.changingInstr(MI);
|
2019-01-23 05:42:11 +08:00
|
|
|
|
|
|
|
if (TypeIdx == 0) {
|
|
|
|
unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
|
|
|
|
TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
|
|
|
|
|
|
|
|
widenScalarSrc(MI, WideTy, 1, CvtOp);
|
|
|
|
widenScalarDst(MI, WideTy);
|
|
|
|
} else {
|
|
|
|
assert(TypeIdx == 1);
|
|
|
|
// The "number of bits to shift" operand must preserve its value as an
|
|
|
|
// unsigned integer:
|
|
|
|
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
|
|
|
|
}
|
|
|
|
|
2018-12-06 04:14:52 +08:00
|
|
|
Observer.changedInstr(MI);
|
[GlobalISel][Legalizer] Widening the second src op of shifts bug fix
The second source operand of G_SHL, G_ASHR, and G_LSHR must preserve its
value as a (small) unsigned integer, therefore its incorrect to widen it
in any way but by zero extending it.
G_SHL was using G_ANYEXT and G_ASHR - G_SEXT (which is correct for their
destination and first source operands, but not the "number of bits to
shift" operand).
Generally, shifts aren't as similar to regular binary operations as it
might seem, for instance, they aren't commutative nor associative and
the second source operand usually requires a special treatment.
Reviewers: bogner, javed.absar, aivchenk, rovka
Reviewed By: bogner
Subscribers: igorb, kristof.beyls, llvm-commits
Differential Revision: https://reviews.llvm.org/D46413
llvm-svn: 331926
2018-05-10 05:43:30 +08:00
|
|
|
return Legalized;
|
2018-05-10 01:28:18 +08:00
|
|
|
case TargetOpcode::G_UDIV:
|
|
|
|
case TargetOpcode::G_UREM:
|
2019-05-24 01:58:48 +08:00
|
|
|
case TargetOpcode::G_UMIN:
|
|
|
|
case TargetOpcode::G_UMAX:
|
2018-12-13 07:48:13 +08:00
|
|
|
Observer.changingInstr(MI);
|
2018-05-10 01:28:18 +08:00
|
|
|
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
|
|
|
|
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
|
|
|
|
widenScalarDst(MI, WideTy);
|
2018-12-06 04:14:52 +08:00
|
|
|
Observer.changedInstr(MI);
|
2018-05-10 01:28:18 +08:00
|
|
|
return Legalized;
|
|
|
|
|
|
|
|
case TargetOpcode::G_SELECT:
|
2018-12-13 07:48:13 +08:00
|
|
|
Observer.changingInstr(MI);
|
2018-12-25 22:42:30 +08:00
|
|
|
if (TypeIdx == 0) {
|
|
|
|
// Perform operation at larger width (any extension is fine here, high
|
|
|
|
// bits don't affect the result) and then truncate the result back to the
|
|
|
|
// original type.
|
|
|
|
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
|
|
|
|
widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
|
|
|
|
widenScalarDst(MI, WideTy);
|
|
|
|
} else {
|
2019-01-30 10:57:43 +08:00
|
|
|
bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
|
2018-12-25 22:42:30 +08:00
|
|
|
// Explicit extension is required here since high bits affect the result.
|
2019-01-30 10:57:43 +08:00
|
|
|
widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
|
2018-12-25 22:42:30 +08:00
|
|
|
}
|
2018-12-06 04:14:52 +08:00
|
|
|
Observer.changedInstr(MI);
|
2017-02-07 07:41:27 +08:00
|
|
|
return Legalized;
|
2018-05-10 01:28:18 +08:00
|
|
|
|
2017-01-24 05:10:14 +08:00
|
|
|
case TargetOpcode::G_FPTOSI:
|
2018-05-10 01:28:18 +08:00
|
|
|
case TargetOpcode::G_FPTOUI:
|
2018-12-13 07:48:13 +08:00
|
|
|
Observer.changingInstr(MI);
|
2019-10-01 09:06:48 +08:00
|
|
|
|
|
|
|
if (TypeIdx == 0)
|
|
|
|
widenScalarDst(MI, WideTy);
|
|
|
|
else
|
|
|
|
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
|
|
|
|
|
2018-12-06 04:14:52 +08:00
|
|
|
Observer.changedInstr(MI);
|
2018-05-09 09:43:12 +08:00
|
|
|
return Legalized;
|
2017-01-20 09:37:24 +08:00
|
|
|
case TargetOpcode::G_SITOFP:
|
|
|
|
if (TypeIdx != 1)
|
|
|
|
return UnableToLegalize;
|
2018-12-13 07:48:13 +08:00
|
|
|
Observer.changingInstr(MI);
|
2018-05-10 01:28:18 +08:00
|
|
|
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
|
2018-12-06 04:14:52 +08:00
|
|
|
Observer.changedInstr(MI);
|
2018-05-10 01:28:18 +08:00
|
|
|
return Legalized;
|
2017-01-20 09:37:24 +08:00
|
|
|
|
2018-05-10 01:28:18 +08:00
|
|
|
case TargetOpcode::G_UITOFP:
|
|
|
|
if (TypeIdx != 1)
|
|
|
|
return UnableToLegalize;
|
2018-12-13 07:48:13 +08:00
|
|
|
Observer.changingInstr(MI);
|
2018-05-10 01:28:18 +08:00
|
|
|
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
|
2018-12-06 04:14:52 +08:00
|
|
|
Observer.changedInstr(MI);
|
2018-05-09 09:43:12 +08:00
|
|
|
return Legalized;
|
2018-05-10 01:28:18 +08:00
|
|
|
|
[globalisel][legalizerinfo] Introduce dedicated extending loads and add lowerings for them
Summary:
Previously, a extending load was represented at (G_*EXT (G_LOAD x)).
This had a few drawbacks:
* G_LOAD had to be legal for all sizes you could extend from, even if
registers didn't naturally hold those sizes.
* All sizes you could extend from had to be allocatable just in case the
extend went missing (e.g. by optimization).
* At minimum, G_*EXT and G_TRUNC had to be legal for these sizes. As we
improve optimization of extends and truncates, this legality requirement
would spread without considerable care w.r.t when certain combines were
permitted.
* The SelectionDAG importer required some ugly and fragile pattern
rewriting to translate patterns into this style.
This patch begins changing the representation to:
* (G_[SZ]EXTLOAD x)
* (G_LOAD x) any-extends when MMO.getSize() * 8 < ResultTy.getSizeInBits()
which resolves these issues by allowing targets to work entirely in their
native register sizes, and by having a more direct translation from
SelectionDAG patterns.
This patch introduces the new generic instructions and new variation on
G_LOAD and adds lowering for them to convert back to the existing
representations.
Depends on D45466
Reviewers: ab, aditya_nandakumar, bogner, rtereshin, volkan, rovka, aemerson, javed.absar
Reviewed By: aemerson
Subscribers: aemerson, kristof.beyls, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D45540
llvm-svn: 331115
2018-04-29 02:14:50 +08:00
|
|
|
case TargetOpcode::G_LOAD:
|
|
|
|
case TargetOpcode::G_SEXTLOAD:
|
2018-05-10 01:28:18 +08:00
|
|
|
case TargetOpcode::G_ZEXTLOAD:
|
2018-12-13 07:48:13 +08:00
|
|
|
Observer.changingInstr(MI);
|
2018-05-10 01:28:18 +08:00
|
|
|
widenScalarDst(MI, WideTy);
|
2018-12-06 04:14:52 +08:00
|
|
|
Observer.changedInstr(MI);
|
2016-08-24 02:20:09 +08:00
|
|
|
return Legalized;
|
2018-05-10 01:28:18 +08:00
|
|
|
|
2016-08-24 02:20:09 +08:00
|
|
|
case TargetOpcode::G_STORE: {
|
2019-01-30 10:04:31 +08:00
|
|
|
if (TypeIdx != 0)
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
LLT Ty = MRI.getType(MI.getOperand(0).getReg());
|
|
|
|
if (!isPowerOf2_32(Ty.getSizeInBits()))
|
2017-03-22 06:22:05 +08:00
|
|
|
return UnableToLegalize;
|
|
|
|
|
2018-12-13 07:48:13 +08:00
|
|
|
Observer.changingInstr(MI);
|
2019-01-30 10:04:31 +08:00
|
|
|
|
|
|
|
unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
|
|
|
|
TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
|
|
|
|
widenScalarSrc(MI, WideTy, 0, ExtType);
|
|
|
|
|
2018-12-06 04:14:52 +08:00
|
|
|
Observer.changedInstr(MI);
|
2016-08-24 02:20:09 +08:00
|
|
|
return Legalized;
|
|
|
|
}
|
2016-08-20 06:40:00 +08:00
|
|
|
case TargetOpcode::G_CONSTANT: {
|
2018-05-10 01:28:18 +08:00
|
|
|
MachineOperand &SrcMO = MI.getOperand(1);
|
|
|
|
LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
|
2019-12-04 02:40:03 +08:00
|
|
|
unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
|
|
|
|
MRI.getType(MI.getOperand(0).getReg()));
|
|
|
|
assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
|
|
|
|
ExtOpc == TargetOpcode::G_ANYEXT) &&
|
|
|
|
"Illegal Extend");
|
|
|
|
const APInt &SrcVal = SrcMO.getCImm()->getValue();
|
|
|
|
const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
|
|
|
|
? SrcVal.sext(WideTy.getSizeInBits())
|
|
|
|
: SrcVal.zext(WideTy.getSizeInBits());
|
2018-12-13 07:48:13 +08:00
|
|
|
Observer.changingInstr(MI);
|
2018-05-10 01:28:18 +08:00
|
|
|
SrcMO.setCImm(ConstantInt::get(Ctx, Val));
|
|
|
|
|
|
|
|
widenScalarDst(MI, WideTy);
|
2018-12-06 04:14:52 +08:00
|
|
|
Observer.changedInstr(MI);
|
2016-08-20 06:40:00 +08:00
|
|
|
return Legalized;
|
|
|
|
}
|
2016-08-20 06:40:08 +08:00
|
|
|
case TargetOpcode::G_FCONSTANT: {
|
2018-05-10 01:28:18 +08:00
|
|
|
MachineOperand &SrcMO = MI.getOperand(1);
|
2018-01-27 15:07:20 +08:00
|
|
|
LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
|
2018-05-10 01:28:18 +08:00
|
|
|
APFloat Val = SrcMO.getFPImm()->getValueAPF();
|
2018-01-27 15:07:20 +08:00
|
|
|
bool LosesInfo;
|
2018-05-10 01:28:18 +08:00
|
|
|
switch (WideTy.getSizeInBits()) {
|
|
|
|
case 32:
|
2019-02-12 22:54:54 +08:00
|
|
|
Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
|
|
|
|
&LosesInfo);
|
2018-05-10 01:28:18 +08:00
|
|
|
break;
|
|
|
|
case 64:
|
2019-02-12 22:54:54 +08:00
|
|
|
Val.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
|
|
|
|
&LosesInfo);
|
2018-05-10 01:28:18 +08:00
|
|
|
break;
|
|
|
|
default:
|
2019-02-12 22:54:54 +08:00
|
|
|
return UnableToLegalize;
|
2018-05-10 01:28:18 +08:00
|
|
|
}
|
2019-02-12 22:54:54 +08:00
|
|
|
|
|
|
|
assert(!LosesInfo && "extend should always be lossless");
|
|
|
|
|
2018-12-13 07:48:13 +08:00
|
|
|
Observer.changingInstr(MI);
|
2018-05-10 01:28:18 +08:00
|
|
|
SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
|
|
|
|
|
|
|
|
widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
|
2018-12-06 04:14:52 +08:00
|
|
|
Observer.changedInstr(MI);
|
2016-08-20 06:40:08 +08:00
|
|
|
return Legalized;
|
|
|
|
}
|
2019-01-09 15:34:14 +08:00
|
|
|
case TargetOpcode::G_IMPLICIT_DEF: {
|
|
|
|
Observer.changingInstr(MI);
|
|
|
|
widenScalarDst(MI, WideTy);
|
|
|
|
Observer.changedInstr(MI);
|
|
|
|
return Legalized;
|
|
|
|
}
|
2018-05-10 01:28:18 +08:00
|
|
|
case TargetOpcode::G_BRCOND:
|
2018-12-13 07:48:13 +08:00
|
|
|
Observer.changingInstr(MI);
|
2019-02-14 19:39:53 +08:00
|
|
|
widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
|
2018-12-06 04:14:52 +08:00
|
|
|
Observer.changedInstr(MI);
|
2016-08-24 05:01:20 +08:00
|
|
|
return Legalized;
|
2018-05-10 01:28:18 +08:00
|
|
|
|
|
|
|
case TargetOpcode::G_FCMP:
|
2018-12-13 07:48:13 +08:00
|
|
|
Observer.changingInstr(MI);
|
2017-08-01 01:00:16 +08:00
|
|
|
if (TypeIdx == 0)
|
2018-05-10 01:28:18 +08:00
|
|
|
widenScalarDst(MI, WideTy);
|
|
|
|
else {
|
|
|
|
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
|
|
|
|
widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
|
[GlobalISel][Legalizer] More concise and faster widenScalar, NFC
Refactoring LegalizerHelper::widenScalar member function reducing its
size by approximately a factor of 2 and (hopefuly) making it more
straightforward and regular by introducing widenScalarSrc and
widenScalarDst helper methods.
The new widenScalar* methods mutate the instructions in place instead
of recreating them from scratch and removing the originals. The
compile time implications of this were measured on sqlite3
amalgamation, targeting AArch64 in -O0:
LegalizerHelper::widenScalar: > 25% faster
Legalizer::runOnMachineFunction: ~ 4.0 - 4.5% faster
Also adding MachineOperand::setCImm and refactoring out
MachineIRBuilder::recordInsertion methods to make the change possible.
Reviewers: aditya_nandakumar, bogner, javed.absar, t.p.northover, ab, dsanders, arsenm
Reviewed By: aditya_nandakumar
Subscribers: wdng, rovka, kristof.beyls, llvm-commits
Differential Revision: https://reviews.llvm.org/D46414
llvm-svn: 331819
2018-05-09 06:53:09 +08:00
|
|
|
}
|
2018-12-06 04:14:52 +08:00
|
|
|
Observer.changedInstr(MI);
|
2018-05-10 01:28:18 +08:00
|
|
|
return Legalized;
|
|
|
|
|
|
|
|
case TargetOpcode::G_ICMP:
|
2018-12-13 07:48:13 +08:00
|
|
|
Observer.changingInstr(MI);
|
2018-05-09 09:43:12 +08:00
|
|
|
if (TypeIdx == 0)
|
2018-05-10 01:28:18 +08:00
|
|
|
widenScalarDst(MI, WideTy);
|
|
|
|
else {
|
|
|
|
unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
|
|
|
|
MI.getOperand(1).getPredicate()))
|
|
|
|
? TargetOpcode::G_SEXT
|
|
|
|
: TargetOpcode::G_ZEXT;
|
|
|
|
widenScalarSrc(MI, WideTy, 2, ExtOpcode);
|
|
|
|
widenScalarSrc(MI, WideTy, 3, ExtOpcode);
|
|
|
|
}
|
2018-12-06 04:14:52 +08:00
|
|
|
Observer.changedInstr(MI);
|
2016-08-27 01:46:17 +08:00
|
|
|
return Legalized;
|
2018-05-10 01:28:18 +08:00
|
|
|
|
[globalisel] Rename G_GEP to G_PTR_ADD
Summary:
G_GEP is rather poorly named. It's a simple pointer+scalar addition and
doesn't support any of the complexities of getelementptr. I therefore
propose that we rename it. There's a G_PTR_MASK so let's follow that
convention and go with G_PTR_ADD
Reviewers: volkan, aditya_nandakumar, bogner, rovka, arsenm
Subscribers: sdardis, jvesely, wdng, nhaehnle, hiraditya, jrtc27, atanasyan, arphaman, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D69734
2019-11-02 04:18:00 +08:00
|
|
|
case TargetOpcode::G_PTR_ADD:
|
|
|
|
assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
|
2018-12-13 07:48:13 +08:00
|
|
|
Observer.changingInstr(MI);
|
2018-05-10 01:28:18 +08:00
|
|
|
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
|
2018-12-06 04:14:52 +08:00
|
|
|
Observer.changedInstr(MI);
|
2016-09-15 19:02:19 +08:00
|
|
|
return Legalized;
|
2018-05-10 01:28:18 +08:00
|
|
|
|
2017-08-25 12:57:27 +08:00
|
|
|
case TargetOpcode::G_PHI: {
|
|
|
|
assert(TypeIdx == 0 && "Expecting only Idx 0");
|
2018-05-10 01:28:18 +08:00
|
|
|
|
2018-12-13 07:48:13 +08:00
|
|
|
Observer.changingInstr(MI);
|
2018-05-10 01:28:18 +08:00
|
|
|
for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
|
|
|
|
MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
|
|
|
|
MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
|
|
|
|
widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
|
2017-08-25 12:57:27 +08:00
|
|
|
}
|
2018-05-10 01:28:18 +08:00
|
|
|
|
|
|
|
MachineBasicBlock &MBB = *MI.getParent();
|
2019-09-17 07:46:03 +08:00
|
|
|
MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
|
2018-05-10 01:28:18 +08:00
|
|
|
widenScalarDst(MI, WideTy);
|
2018-12-06 04:14:52 +08:00
|
|
|
Observer.changedInstr(MI);
|
2017-08-25 12:57:27 +08:00
|
|
|
return Legalized;
|
|
|
|
}
|
2019-01-23 04:38:15 +08:00
|
|
|
case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
|
|
|
|
if (TypeIdx == 0) {
|
2019-06-25 00:16:12 +08:00
|
|
|
Register VecReg = MI.getOperand(1).getReg();
|
2019-01-23 04:38:15 +08:00
|
|
|
LLT VecTy = MRI.getType(VecReg);
|
|
|
|
Observer.changingInstr(MI);
|
|
|
|
|
|
|
|
widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(),
|
|
|
|
WideTy.getSizeInBits()),
|
|
|
|
1, TargetOpcode::G_SEXT);
|
|
|
|
|
|
|
|
widenScalarDst(MI, WideTy, 0);
|
|
|
|
Observer.changedInstr(MI);
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
2018-10-25 22:04:54 +08:00
|
|
|
if (TypeIdx != 2)
|
|
|
|
return UnableToLegalize;
|
2018-12-13 07:48:13 +08:00
|
|
|
Observer.changingInstr(MI);
|
2019-10-02 03:51:37 +08:00
|
|
|
// TODO: Probably should be zext
|
2018-10-25 22:04:54 +08:00
|
|
|
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
|
2018-12-06 04:14:52 +08:00
|
|
|
Observer.changedInstr(MI);
|
2018-10-25 22:04:54 +08:00
|
|
|
return Legalized;
|
2019-01-23 04:38:15 +08:00
|
|
|
}
|
2019-10-02 03:51:37 +08:00
|
|
|
case TargetOpcode::G_INSERT_VECTOR_ELT: {
|
|
|
|
if (TypeIdx == 1) {
|
|
|
|
Observer.changingInstr(MI);
|
|
|
|
|
|
|
|
Register VecReg = MI.getOperand(1).getReg();
|
|
|
|
LLT VecTy = MRI.getType(VecReg);
|
|
|
|
LLT WideVecTy = LLT::vector(VecTy.getNumElements(), WideTy);
|
|
|
|
|
|
|
|
widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
|
|
|
|
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
|
|
|
|
widenScalarDst(MI, WideVecTy, 0);
|
|
|
|
Observer.changedInstr(MI);
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (TypeIdx == 2) {
|
|
|
|
Observer.changingInstr(MI);
|
|
|
|
// TODO: Probably should be zext
|
|
|
|
widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
|
|
|
|
Observer.changedInstr(MI);
|
|
|
|
}
|
|
|
|
|
|
|
|
return Legalized;
|
|
|
|
}
|
2019-01-21 03:10:31 +08:00
|
|
|
case TargetOpcode::G_FADD:
|
|
|
|
case TargetOpcode::G_FMUL:
|
|
|
|
case TargetOpcode::G_FSUB:
|
|
|
|
case TargetOpcode::G_FMA:
|
2019-09-07 04:49:10 +08:00
|
|
|
case TargetOpcode::G_FMAD:
|
2019-01-21 03:10:31 +08:00
|
|
|
case TargetOpcode::G_FNEG:
|
|
|
|
case TargetOpcode::G_FABS:
|
2019-02-12 01:05:20 +08:00
|
|
|
case TargetOpcode::G_FCANONICALIZE:
|
2019-07-11 00:31:19 +08:00
|
|
|
case TargetOpcode::G_FMINNUM:
|
|
|
|
case TargetOpcode::G_FMAXNUM:
|
|
|
|
case TargetOpcode::G_FMINNUM_IEEE:
|
|
|
|
case TargetOpcode::G_FMAXNUM_IEEE:
|
|
|
|
case TargetOpcode::G_FMINIMUM:
|
|
|
|
case TargetOpcode::G_FMAXIMUM:
|
2019-01-21 03:10:31 +08:00
|
|
|
case TargetOpcode::G_FDIV:
|
|
|
|
case TargetOpcode::G_FREM:
|
2018-12-22 01:05:26 +08:00
|
|
|
case TargetOpcode::G_FCEIL:
|
2019-02-12 01:22:58 +08:00
|
|
|
case TargetOpcode::G_FFLOOR:
|
2019-01-29 02:34:18 +08:00
|
|
|
case TargetOpcode::G_FCOS:
|
|
|
|
case TargetOpcode::G_FSIN:
|
2019-01-29 03:53:14 +08:00
|
|
|
case TargetOpcode::G_FLOG10:
|
2019-01-29 05:27:23 +08:00
|
|
|
case TargetOpcode::G_FLOG:
|
2019-01-31 05:16:04 +08:00
|
|
|
case TargetOpcode::G_FLOG2:
|
2019-04-20 07:41:52 +08:00
|
|
|
case TargetOpcode::G_FRINT:
|
2019-04-26 00:44:40 +08:00
|
|
|
case TargetOpcode::G_FNEARBYINT:
|
2019-01-31 05:03:52 +08:00
|
|
|
case TargetOpcode::G_FSQRT:
|
2019-01-31 07:46:15 +08:00
|
|
|
case TargetOpcode::G_FEXP:
|
2019-04-04 00:58:32 +08:00
|
|
|
case TargetOpcode::G_FEXP2:
|
2019-04-20 00:28:08 +08:00
|
|
|
case TargetOpcode::G_FPOW:
|
2019-04-24 02:20:44 +08:00
|
|
|
case TargetOpcode::G_INTRINSIC_TRUNC:
|
2019-04-24 05:11:57 +08:00
|
|
|
case TargetOpcode::G_INTRINSIC_ROUND:
|
2019-01-21 03:10:31 +08:00
|
|
|
assert(TypeIdx == 0);
|
2018-12-22 01:05:26 +08:00
|
|
|
Observer.changingInstr(MI);
|
2019-01-21 03:10:31 +08:00
|
|
|
|
|
|
|
for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
|
|
|
|
widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
|
|
|
|
|
2018-12-22 01:05:26 +08:00
|
|
|
widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
|
|
|
|
Observer.changedInstr(MI);
|
|
|
|
return Legalized;
|
2019-02-03 07:29:55 +08:00
|
|
|
case TargetOpcode::G_INTTOPTR:
|
|
|
|
if (TypeIdx != 1)
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
Observer.changingInstr(MI);
|
|
|
|
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
|
|
|
|
Observer.changedInstr(MI);
|
|
|
|
return Legalized;
|
|
|
|
case TargetOpcode::G_PTRTOINT:
|
|
|
|
if (TypeIdx != 0)
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
Observer.changingInstr(MI);
|
|
|
|
widenScalarDst(MI, WideTy, 0);
|
|
|
|
Observer.changedInstr(MI);
|
|
|
|
return Legalized;
|
2019-07-08 21:48:06 +08:00
|
|
|
case TargetOpcode::G_BUILD_VECTOR: {
|
|
|
|
Observer.changingInstr(MI);
|
|
|
|
|
|
|
|
const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
|
|
|
|
for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
|
|
|
|
widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
|
|
|
|
|
|
|
|
// Avoid changing the result vector type if the source element type was
|
|
|
|
// requested.
|
|
|
|
if (TypeIdx == 1) {
|
|
|
|
auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
|
|
|
|
MI.setDesc(TII.get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
|
|
|
|
} else {
|
|
|
|
widenScalarDst(MI, WideTy, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
Observer.changedInstr(MI);
|
|
|
|
return Legalized;
|
|
|
|
}
|
[globalisel] Add G_SEXT_INREG
Summary:
Targets often have instructions that can sign-extend certain cases faster
than the equivalent shift-left/arithmetic-shift-right. Such cases can be
identified by matching a shift-left/shift-right pair but there are some
issues with this in the context of combines. For example, suppose you can
sign-extend 8-bit up to 32-bit with a target extend instruction.
%1:_(s32) = G_SHL %0:_(s32), i32 24 # (I've inlined the G_CONSTANT for brevity)
%2:_(s32) = G_ASHR %1:_(s32), i32 24
%3:_(s32) = G_ASHR %2:_(s32), i32 1
would reasonably combine to:
%1:_(s32) = G_SHL %0:_(s32), i32 24
%2:_(s32) = G_ASHR %1:_(s32), i32 25
which no longer matches the special case. If your shifts and extend are
equal cost, this would break even as a pair of shifts but if your shift is
more expensive than the extend then it's cheaper as:
%2:_(s32) = G_SEXT_INREG %0:_(s32), i32 8
%3:_(s32) = G_ASHR %2:_(s32), i32 1
It's possible to match the shift-pair in ISel and emit an extend and ashr.
However, this is far from the only way to break this shift pair and make
it hard to match the extends. Another example is that with the right
known-zeros, this:
%1:_(s32) = G_SHL %0:_(s32), i32 24
%2:_(s32) = G_ASHR %1:_(s32), i32 24
%3:_(s32) = G_MUL %2:_(s32), i32 2
can become:
%1:_(s32) = G_SHL %0:_(s32), i32 24
%2:_(s32) = G_ASHR %1:_(s32), i32 23
All upstream targets have been configured to lower it to the current
G_SHL,G_ASHR pair but will likely want to make it legal in some cases to
handle their faster cases.
To follow-up: Provide a way to legalize based on the constant. At the
moment, I'm thinking that the best way to achieve this is to provide the
MI in LegalityQuery but that opens the door to breaking core principles
of the legalizer (legality is not context sensitive). That said, it's
worth noting that looking at other instructions and acting on that
information doesn't violate this principle in itself. It's only a
violation if, at the end of legalization, a pass that checks legality
without being able to see the context would say an instruction might not be
legal. That's a fairly subtle distinction so to give a concrete example,
saying %2 in:
%1 = G_CONSTANT 16
%2 = G_SEXT_INREG %0, %1
is legal is in violation of that principle if the legality of %2 depends
on %1 being constant and/or being 16. However, legalizing to either:
%2 = G_SEXT_INREG %0, 16
or:
%1 = G_CONSTANT 16
%2:_(s32) = G_SHL %0, %1
%3:_(s32) = G_ASHR %2, %1
depending on whether %1 is constant and 16 does not violate that principle
since both outputs are genuinely legal.
Reviewers: bogner, aditya_nandakumar, volkan, aemerson, paquette, arsenm
Subscribers: sdardis, jvesely, wdng, nhaehnle, rovka, kristof.beyls, javed.absar, hiraditya, jrtc27, atanasyan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D61289
llvm-svn: 368487
2019-08-10 05:11:20 +08:00
|
|
|
case TargetOpcode::G_SEXT_INREG:
|
|
|
|
if (TypeIdx != 0)
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
Observer.changingInstr(MI);
|
|
|
|
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
|
|
|
|
widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
|
|
|
|
Observer.changedInstr(MI);
|
|
|
|
return Legalized;
|
2016-08-05 02:35:11 +08:00
|
|
|
}
|
2016-07-23 04:03:43 +08:00
|
|
|
}
|
|
|
|
|
2020-01-10 10:53:28 +08:00
|
|
|
static void getUnmergePieces(SmallVectorImpl<Register> &Pieces,
|
|
|
|
MachineIRBuilder &B, Register Src, LLT Ty) {
|
|
|
|
auto Unmerge = B.buildUnmerge(Ty, Src);
|
|
|
|
for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
|
|
|
|
Pieces.push_back(Unmerge.getReg(I));
|
|
|
|
}
|
|
|
|
|
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
LegalizerHelper::lowerBitcast(MachineInstr &MI) {
|
|
|
|
Register Dst = MI.getOperand(0).getReg();
|
|
|
|
Register Src = MI.getOperand(1).getReg();
|
|
|
|
LLT DstTy = MRI.getType(Dst);
|
|
|
|
LLT SrcTy = MRI.getType(Src);
|
|
|
|
|
|
|
|
if (SrcTy.isVector() && !DstTy.isVector()) {
|
|
|
|
SmallVector<Register, 8> SrcRegs;
|
|
|
|
getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcTy.getElementType());
|
|
|
|
MIRBuilder.buildMerge(Dst, SrcRegs);
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (DstTy.isVector() && !SrcTy.isVector()) {
|
|
|
|
SmallVector<Register, 8> SrcRegs;
|
|
|
|
getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
|
|
|
|
MIRBuilder.buildMerge(Dst, SrcRegs);
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
|
|
|
return UnableToLegalize;
|
|
|
|
}
|
|
|
|
|
2016-10-15 06:18:18 +08:00
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
|
2016-08-27 01:46:13 +08:00
|
|
|
using namespace TargetOpcode;
|
|
|
|
MIRBuilder.setInstr(MI);
|
|
|
|
|
|
|
|
switch(MI.getOpcode()) {
|
|
|
|
default:
|
|
|
|
return UnableToLegalize;
|
2020-01-10 10:53:28 +08:00
|
|
|
case TargetOpcode::G_BITCAST:
|
|
|
|
return lowerBitcast(MI);
|
2016-08-27 01:46:13 +08:00
|
|
|
case TargetOpcode::G_SREM:
|
|
|
|
case TargetOpcode::G_UREM: {
|
2019-06-25 00:16:12 +08:00
|
|
|
Register QuotReg = MRI.createGenericVirtualRegister(Ty);
|
2020-01-16 20:37:00 +08:00
|
|
|
MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {QuotReg},
|
|
|
|
{MI.getOperand(1), MI.getOperand(2)});
|
2016-08-27 01:46:13 +08:00
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
Register ProdReg = MRI.createGenericVirtualRegister(Ty);
|
2020-01-16 20:37:00 +08:00
|
|
|
MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2));
|
|
|
|
MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), ProdReg);
|
2016-08-27 01:46:13 +08:00
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
2019-10-17 04:46:32 +08:00
|
|
|
case TargetOpcode::G_SADDO:
|
|
|
|
case TargetOpcode::G_SSUBO:
|
|
|
|
return lowerSADDO_SSUBO(MI);
|
2017-02-09 05:22:15 +08:00
|
|
|
case TargetOpcode::G_SMULO:
|
|
|
|
case TargetOpcode::G_UMULO: {
|
|
|
|
// Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
|
|
|
|
// result.
|
2019-06-25 00:16:12 +08:00
|
|
|
Register Res = MI.getOperand(0).getReg();
|
|
|
|
Register Overflow = MI.getOperand(1).getReg();
|
|
|
|
Register LHS = MI.getOperand(2).getReg();
|
|
|
|
Register RHS = MI.getOperand(3).getReg();
|
2017-02-09 05:22:15 +08:00
|
|
|
|
|
|
|
unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
|
|
|
|
? TargetOpcode::G_SMULH
|
|
|
|
: TargetOpcode::G_UMULH;
|
|
|
|
|
[GlobalISel] Tweak lowering of G_SMULO/G_UMULO
Summary:
Applying this cleanup:
- MIRBuilder.buildInstr(TargetOpcode::G_ASHR)
- .addDef(Shifted)
- .addUse(Res)
- .addUse(ShiftAmt);
+ MIRBuilder.buildAShr(Shifted, Res, ShiftAmt);
caused an assertion failure here:
llc: /home/jayfoad2/git/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp:404: llvm::MachineInstr *llvm::MachineRegisterInfo::getVRegDef(unsigned int) const: Assertion `(I.atEnd() || std::next(I) == def_instr_end()) && "getVRegDef assumes a single definition or no definition"' failed.
#4 0x00000000050a6d96 in llvm::MachineRegisterInfo::getVRegDef (this=0x74606a0, Reg=2147483650) at /home/jayfoad2/git/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp:403
#5 0x00000000066148f6 in llvm::getConstantVRegValWithLookThrough (VReg=2147483650, MRI=..., LookThroughInstrs=false, HandleFConstant=true) at /home/jayfoad2/git/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp:244
#6 0x00000000066147da in llvm::getConstantVRegVal (VReg=2147483650, MRI=...) at /home/jayfoad2/git/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp:210
#7 0x0000000006615367 in llvm::ConstantFoldBinOp (Opcode=101, Op1=2147483650, Op2=2147483656, MRI=...) at /home/jayfoad2/git/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp:341
#8 0x000000000657eee0 in llvm::CSEMIRBuilder::buildInstr (this=0x7465010, Opc=101, DstOps=..., SrcOps=..., Flag=...) at /home/jayfoad2/git/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp:160
#9 0x0000000003645958 in llvm::MachineIRBuilder::buildAShr (this=0x7465010, Dst=..., Src0=..., Src1=..., Flags=...) at /home/jayfoad2/git/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h:1298
#10 0x00000000065c35b1 in llvm::LegalizerHelper::lower (this=0x7fffffffb5f8, MI=..., TypeIdx=0, Ty=...) at /home/jayfoad2/git/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp:2020
because at this point there are two instructions defining Res: the
original G_SMULO/G_UMULO and the new G_MUL that we built. The fix is
to modify the original mul in place, so that there is only ever one
definition of Res.
Reviewers: arsenm, aditya_nandakumar
Subscribers: wdng, rovka, hiraditya, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D72842
2020-01-16 22:46:36 +08:00
|
|
|
Observer.changingInstr(MI);
|
|
|
|
const auto &TII = MIRBuilder.getTII();
|
|
|
|
MI.setDesc(TII.get(TargetOpcode::G_MUL));
|
|
|
|
MI.RemoveOperand(1);
|
|
|
|
Observer.changedInstr(MI);
|
|
|
|
|
|
|
|
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
|
|
|
|
|
|
|
|
auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
|
2017-02-09 05:22:15 +08:00
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
Register Zero = MRI.createGenericVirtualRegister(Ty);
|
2017-02-09 05:22:15 +08:00
|
|
|
MIRBuilder.buildConstant(Zero, 0);
|
2018-01-03 12:56:56 +08:00
|
|
|
|
|
|
|
// For *signed* multiply, overflow is detected by checking:
|
|
|
|
// (hi != (lo >> bitwidth-1))
|
|
|
|
if (Opcode == TargetOpcode::G_SMULH) {
|
[GlobalISel] Tweak lowering of G_SMULO/G_UMULO
Summary:
Applying this cleanup:
- MIRBuilder.buildInstr(TargetOpcode::G_ASHR)
- .addDef(Shifted)
- .addUse(Res)
- .addUse(ShiftAmt);
+ MIRBuilder.buildAShr(Shifted, Res, ShiftAmt);
caused an assertion failure here:
llc: /home/jayfoad2/git/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp:404: llvm::MachineInstr *llvm::MachineRegisterInfo::getVRegDef(unsigned int) const: Assertion `(I.atEnd() || std::next(I) == def_instr_end()) && "getVRegDef assumes a single definition or no definition"' failed.
#4 0x00000000050a6d96 in llvm::MachineRegisterInfo::getVRegDef (this=0x74606a0, Reg=2147483650) at /home/jayfoad2/git/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp:403
#5 0x00000000066148f6 in llvm::getConstantVRegValWithLookThrough (VReg=2147483650, MRI=..., LookThroughInstrs=false, HandleFConstant=true) at /home/jayfoad2/git/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp:244
#6 0x00000000066147da in llvm::getConstantVRegVal (VReg=2147483650, MRI=...) at /home/jayfoad2/git/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp:210
#7 0x0000000006615367 in llvm::ConstantFoldBinOp (Opcode=101, Op1=2147483650, Op2=2147483656, MRI=...) at /home/jayfoad2/git/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp:341
#8 0x000000000657eee0 in llvm::CSEMIRBuilder::buildInstr (this=0x7465010, Opc=101, DstOps=..., SrcOps=..., Flag=...) at /home/jayfoad2/git/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp:160
#9 0x0000000003645958 in llvm::MachineIRBuilder::buildAShr (this=0x7465010, Dst=..., Src0=..., Src1=..., Flags=...) at /home/jayfoad2/git/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h:1298
#10 0x00000000065c35b1 in llvm::LegalizerHelper::lower (this=0x7fffffffb5f8, MI=..., TypeIdx=0, Ty=...) at /home/jayfoad2/git/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp:2020
because at this point there are two instructions defining Res: the
original G_SMULO/G_UMULO and the new G_MUL that we built. The fix is
to modify the original mul in place, so that there is only ever one
definition of Res.
Reviewers: arsenm, aditya_nandakumar
Subscribers: wdng, rovka, hiraditya, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D72842
2020-01-16 22:46:36 +08:00
|
|
|
auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
|
|
|
|
auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
|
2018-01-03 12:56:56 +08:00
|
|
|
MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
|
|
|
|
} else {
|
|
|
|
MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
|
|
|
|
}
|
2017-02-09 05:22:15 +08:00
|
|
|
return Legalized;
|
|
|
|
}
|
2017-03-09 02:09:14 +08:00
|
|
|
case TargetOpcode::G_FNEG: {
|
|
|
|
// TODO: Handle vector types once we are able to
|
|
|
|
// represent them.
|
|
|
|
if (Ty.isVector())
|
|
|
|
return UnableToLegalize;
|
2019-06-25 00:16:12 +08:00
|
|
|
Register Res = MI.getOperand(0).getReg();
|
2017-12-16 06:22:58 +08:00
|
|
|
LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
|
2020-02-07 02:01:57 +08:00
|
|
|
Type *ZeroTy = getFloatTypeForLLT(Ctx, Ty);
|
|
|
|
if (!ZeroTy)
|
|
|
|
return UnableToLegalize;
|
2017-03-09 02:09:14 +08:00
|
|
|
ConstantFP &ZeroForNegation =
|
|
|
|
*cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy));
|
2018-02-15 03:58:36 +08:00
|
|
|
auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation);
|
2019-06-25 00:16:12 +08:00
|
|
|
Register SubByReg = MI.getOperand(1).getReg();
|
2020-01-23 19:51:35 +08:00
|
|
|
Register ZeroReg = Zero.getReg(0);
|
2020-01-16 20:09:48 +08:00
|
|
|
MIRBuilder.buildFSub(Res, ZeroReg, SubByReg, MI.getFlags());
|
2017-03-09 02:09:14 +08:00
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
[GlobalISel] LegalizerHelper: Lower (G_FSUB X, Y) to (G_FADD X, (G_FNEG Y))
Summary: No test case as none of the in-tree targets with GlobalISel support has this condition.
Reviewers: qcolombet, aditya_nandakumar, dsanders, t.p.northover, ab
Reviewed By: qcolombet
Subscribers: dberris, rovka, kristof.beyls, llvm-commits, igorb
Differential Revision: https://reviews.llvm.org/D30786
llvm-svn: 297512
2017-03-11 05:25:09 +08:00
|
|
|
case TargetOpcode::G_FSUB: {
|
|
|
|
// Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
|
|
|
|
// First, check if G_FNEG is marked as Lower. If so, we may
|
|
|
|
// end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
|
2018-01-30 01:37:29 +08:00
|
|
|
if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
|
[GlobalISel] LegalizerHelper: Lower (G_FSUB X, Y) to (G_FADD X, (G_FNEG Y))
Summary: No test case as none of the in-tree targets with GlobalISel support has this condition.
Reviewers: qcolombet, aditya_nandakumar, dsanders, t.p.northover, ab
Reviewed By: qcolombet
Subscribers: dberris, rovka, kristof.beyls, llvm-commits, igorb
Differential Revision: https://reviews.llvm.org/D30786
llvm-svn: 297512
2017-03-11 05:25:09 +08:00
|
|
|
return UnableToLegalize;
|
2019-06-25 00:16:12 +08:00
|
|
|
Register Res = MI.getOperand(0).getReg();
|
|
|
|
Register LHS = MI.getOperand(1).getReg();
|
|
|
|
Register RHS = MI.getOperand(2).getReg();
|
|
|
|
Register Neg = MRI.createGenericVirtualRegister(Ty);
|
2020-01-16 20:09:48 +08:00
|
|
|
MIRBuilder.buildFNeg(Neg, RHS);
|
|
|
|
MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
|
[GlobalISel] LegalizerHelper: Lower (G_FSUB X, Y) to (G_FADD X, (G_FNEG Y))
Summary: No test case as none of the in-tree targets with GlobalISel support has this condition.
Reviewers: qcolombet, aditya_nandakumar, dsanders, t.p.northover, ab
Reviewed By: qcolombet
Subscribers: dberris, rovka, kristof.beyls, llvm-commits, igorb
Differential Revision: https://reviews.llvm.org/D30786
llvm-svn: 297512
2017-03-11 05:25:09 +08:00
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
2019-09-13 08:44:35 +08:00
|
|
|
case TargetOpcode::G_FMAD:
|
|
|
|
return lowerFMad(MI);
|
2019-12-25 03:49:31 +08:00
|
|
|
case TargetOpcode::G_INTRINSIC_ROUND:
|
|
|
|
return lowerIntrinsicRound(MI);
|
2017-12-01 04:11:42 +08:00
|
|
|
case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
|
2019-06-25 00:16:12 +08:00
|
|
|
Register OldValRes = MI.getOperand(0).getReg();
|
|
|
|
Register SuccessRes = MI.getOperand(1).getReg();
|
|
|
|
Register Addr = MI.getOperand(2).getReg();
|
|
|
|
Register CmpVal = MI.getOperand(3).getReg();
|
|
|
|
Register NewVal = MI.getOperand(4).getReg();
|
2017-12-01 04:11:42 +08:00
|
|
|
MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
|
|
|
|
**MI.memoperands_begin());
|
|
|
|
MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
[globalisel][legalizerinfo] Introduce dedicated extending loads and add lowerings for them
Summary:
Previously, a extending load was represented at (G_*EXT (G_LOAD x)).
This had a few drawbacks:
* G_LOAD had to be legal for all sizes you could extend from, even if
registers didn't naturally hold those sizes.
* All sizes you could extend from had to be allocatable just in case the
extend went missing (e.g. by optimization).
* At minimum, G_*EXT and G_TRUNC had to be legal for these sizes. As we
improve optimization of extends and truncates, this legality requirement
would spread without considerable care w.r.t when certain combines were
permitted.
* The SelectionDAG importer required some ugly and fragile pattern
rewriting to translate patterns into this style.
This patch begins changing the representation to:
* (G_[SZ]EXTLOAD x)
* (G_LOAD x) any-extends when MMO.getSize() * 8 < ResultTy.getSizeInBits()
which resolves these issues by allowing targets to work entirely in their
native register sizes, and by having a more direct translation from
SelectionDAG patterns.
This patch introduces the new generic instructions and new variation on
G_LOAD and adds lowering for them to convert back to the existing
representations.
Depends on D45466
Reviewers: ab, aditya_nandakumar, bogner, rtereshin, volkan, rovka, aemerson, javed.absar
Reviewed By: aemerson
Subscribers: aemerson, kristof.beyls, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D45540
llvm-svn: 331115
2018-04-29 02:14:50 +08:00
|
|
|
case TargetOpcode::G_LOAD:
|
|
|
|
case TargetOpcode::G_SEXTLOAD:
|
|
|
|
case TargetOpcode::G_ZEXTLOAD: {
|
|
|
|
// Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
|
2019-06-25 00:16:12 +08:00
|
|
|
Register DstReg = MI.getOperand(0).getReg();
|
|
|
|
Register PtrReg = MI.getOperand(1).getReg();
|
[globalisel][legalizerinfo] Introduce dedicated extending loads and add lowerings for them
Summary:
Previously, a extending load was represented at (G_*EXT (G_LOAD x)).
This had a few drawbacks:
* G_LOAD had to be legal for all sizes you could extend from, even if
registers didn't naturally hold those sizes.
* All sizes you could extend from had to be allocatable just in case the
extend went missing (e.g. by optimization).
* At minimum, G_*EXT and G_TRUNC had to be legal for these sizes. As we
improve optimization of extends and truncates, this legality requirement
would spread without considerable care w.r.t when certain combines were
permitted.
* The SelectionDAG importer required some ugly and fragile pattern
rewriting to translate patterns into this style.
This patch begins changing the representation to:
* (G_[SZ]EXTLOAD x)
* (G_LOAD x) any-extends when MMO.getSize() * 8 < ResultTy.getSizeInBits()
which resolves these issues by allowing targets to work entirely in their
native register sizes, and by having a more direct translation from
SelectionDAG patterns.
This patch introduces the new generic instructions and new variation on
G_LOAD and adds lowering for them to convert back to the existing
representations.
Depends on D45466
Reviewers: ab, aditya_nandakumar, bogner, rtereshin, volkan, rovka, aemerson, javed.absar
Reviewed By: aemerson
Subscribers: aemerson, kristof.beyls, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D45540
llvm-svn: 331115
2018-04-29 02:14:50 +08:00
|
|
|
LLT DstTy = MRI.getType(DstReg);
|
|
|
|
auto &MMO = **MI.memoperands_begin();
|
|
|
|
|
2019-08-03 07:44:24 +08:00
|
|
|
if (DstTy.getSizeInBits() == MMO.getSizeInBits()) {
|
|
|
|
if (MI.getOpcode() == TargetOpcode::G_LOAD) {
|
|
|
|
// This load needs splitting into power of 2 sized loads.
|
|
|
|
if (DstTy.isVector())
|
2018-05-01 01:20:01 +08:00
|
|
|
return UnableToLegalize;
|
2019-08-03 07:44:24 +08:00
|
|
|
if (isPowerOf2_32(DstTy.getSizeInBits()))
|
|
|
|
return UnableToLegalize; // Don't know what we're being asked to do.
|
|
|
|
|
|
|
|
// Our strategy here is to generate anyextending loads for the smaller
|
|
|
|
// types up to next power-2 result type, and then combine the two larger
|
|
|
|
// result values together, before truncating back down to the non-pow-2
|
|
|
|
// type.
|
|
|
|
// E.g. v1 = i24 load =>
|
2020-02-07 06:35:15 +08:00
|
|
|
// v2 = i32 zextload (2 byte)
|
2019-08-03 07:44:24 +08:00
|
|
|
// v3 = i32 load (1 byte)
|
|
|
|
// v4 = i32 shl v3, 16
|
|
|
|
// v5 = i32 or v4, v2
|
|
|
|
// v1 = i24 trunc v5
|
|
|
|
// By doing this we generate the correct truncate which should get
|
|
|
|
// combined away as an artifact with a matching extend.
|
|
|
|
uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits());
|
|
|
|
uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize;
|
|
|
|
|
|
|
|
MachineFunction &MF = MIRBuilder.getMF();
|
|
|
|
MachineMemOperand *LargeMMO =
|
|
|
|
MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
|
|
|
|
MachineMemOperand *SmallMMO = MF.getMachineMemOperand(
|
|
|
|
&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
|
|
|
|
|
|
|
|
LLT PtrTy = MRI.getType(PtrReg);
|
|
|
|
unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits());
|
|
|
|
LLT AnyExtTy = LLT::scalar(AnyExtSize);
|
|
|
|
Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
|
|
|
|
Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
|
2020-02-07 06:35:15 +08:00
|
|
|
auto LargeLoad = MIRBuilder.buildLoadInstr(
|
|
|
|
TargetOpcode::G_ZEXTLOAD, LargeLdReg, PtrReg, *LargeMMO);
|
2019-08-03 07:44:24 +08:00
|
|
|
|
2020-01-27 22:35:59 +08:00
|
|
|
auto OffsetCst = MIRBuilder.buildConstant(
|
|
|
|
LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
|
[globalisel] Rename G_GEP to G_PTR_ADD
Summary:
G_GEP is rather poorly named. It's a simple pointer+scalar addition and
doesn't support any of the complexities of getelementptr. I therefore
propose that we rename it. There's a G_PTR_MASK so let's follow that
convention and go with G_PTR_ADD
Reviewers: volkan, aditya_nandakumar, bogner, rovka, arsenm
Subscribers: sdardis, jvesely, wdng, nhaehnle, hiraditya, jrtc27, atanasyan, arphaman, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D69734
2019-11-02 04:18:00 +08:00
|
|
|
Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
|
|
|
|
auto SmallPtr =
|
|
|
|
MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
|
2019-08-03 07:44:24 +08:00
|
|
|
auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0),
|
|
|
|
*SmallMMO);
|
|
|
|
|
|
|
|
auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
|
|
|
|
auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
|
|
|
|
auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
|
|
|
|
MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)});
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
[globalisel][legalizerinfo] Introduce dedicated extending loads and add lowerings for them
Summary:
Previously, a extending load was represented at (G_*EXT (G_LOAD x)).
This had a few drawbacks:
* G_LOAD had to be legal for all sizes you could extend from, even if
registers didn't naturally hold those sizes.
* All sizes you could extend from had to be allocatable just in case the
extend went missing (e.g. by optimization).
* At minimum, G_*EXT and G_TRUNC had to be legal for these sizes. As we
improve optimization of extends and truncates, this legality requirement
would spread without considerable care w.r.t when certain combines were
permitted.
* The SelectionDAG importer required some ugly and fragile pattern
rewriting to translate patterns into this style.
This patch begins changing the representation to:
* (G_[SZ]EXTLOAD x)
* (G_LOAD x) any-extends when MMO.getSize() * 8 < ResultTy.getSizeInBits()
which resolves these issues by allowing targets to work entirely in their
native register sizes, and by having a more direct translation from
SelectionDAG patterns.
This patch introduces the new generic instructions and new variation on
G_LOAD and adds lowering for them to convert back to the existing
representations.
Depends on D45466
Reviewers: ab, aditya_nandakumar, bogner, rtereshin, volkan, rovka, aemerson, javed.absar
Reviewed By: aemerson
Subscribers: aemerson, kristof.beyls, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D45540
llvm-svn: 331115
2018-04-29 02:14:50 +08:00
|
|
|
MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (DstTy.isScalar()) {
|
2019-06-25 00:16:12 +08:00
|
|
|
Register TmpReg =
|
2019-04-18 06:21:05 +08:00
|
|
|
MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits()));
|
[globalisel][legalizerinfo] Introduce dedicated extending loads and add lowerings for them
Summary:
Previously, a extending load was represented at (G_*EXT (G_LOAD x)).
This had a few drawbacks:
* G_LOAD had to be legal for all sizes you could extend from, even if
registers didn't naturally hold those sizes.
* All sizes you could extend from had to be allocatable just in case the
extend went missing (e.g. by optimization).
* At minimum, G_*EXT and G_TRUNC had to be legal for these sizes. As we
improve optimization of extends and truncates, this legality requirement
would spread without considerable care w.r.t when certain combines were
permitted.
* The SelectionDAG importer required some ugly and fragile pattern
rewriting to translate patterns into this style.
This patch begins changing the representation to:
* (G_[SZ]EXTLOAD x)
* (G_LOAD x) any-extends when MMO.getSize() * 8 < ResultTy.getSizeInBits()
which resolves these issues by allowing targets to work entirely in their
native register sizes, and by having a more direct translation from
SelectionDAG patterns.
This patch introduces the new generic instructions and new variation on
G_LOAD and adds lowering for them to convert back to the existing
representations.
Depends on D45466
Reviewers: ab, aditya_nandakumar, bogner, rtereshin, volkan, rovka, aemerson, javed.absar
Reviewed By: aemerson
Subscribers: aemerson, kristof.beyls, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D45540
llvm-svn: 331115
2018-04-29 02:14:50 +08:00
|
|
|
MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
|
|
|
|
switch (MI.getOpcode()) {
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unexpected opcode");
|
|
|
|
case TargetOpcode::G_LOAD:
|
2019-12-05 09:01:07 +08:00
|
|
|
MIRBuilder.buildExtOrTrunc(TargetOpcode::G_ANYEXT, DstReg, TmpReg);
|
[globalisel][legalizerinfo] Introduce dedicated extending loads and add lowerings for them
Summary:
Previously, a extending load was represented at (G_*EXT (G_LOAD x)).
This had a few drawbacks:
* G_LOAD had to be legal for all sizes you could extend from, even if
registers didn't naturally hold those sizes.
* All sizes you could extend from had to be allocatable just in case the
extend went missing (e.g. by optimization).
* At minimum, G_*EXT and G_TRUNC had to be legal for these sizes. As we
improve optimization of extends and truncates, this legality requirement
would spread without considerable care w.r.t when certain combines were
permitted.
* The SelectionDAG importer required some ugly and fragile pattern
rewriting to translate patterns into this style.
This patch begins changing the representation to:
* (G_[SZ]EXTLOAD x)
* (G_LOAD x) any-extends when MMO.getSize() * 8 < ResultTy.getSizeInBits()
which resolves these issues by allowing targets to work entirely in their
native register sizes, and by having a more direct translation from
SelectionDAG patterns.
This patch introduces the new generic instructions and new variation on
G_LOAD and adds lowering for them to convert back to the existing
representations.
Depends on D45466
Reviewers: ab, aditya_nandakumar, bogner, rtereshin, volkan, rovka, aemerson, javed.absar
Reviewed By: aemerson
Subscribers: aemerson, kristof.beyls, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D45540
llvm-svn: 331115
2018-04-29 02:14:50 +08:00
|
|
|
break;
|
|
|
|
case TargetOpcode::G_SEXTLOAD:
|
|
|
|
MIRBuilder.buildSExt(DstReg, TmpReg);
|
|
|
|
break;
|
|
|
|
case TargetOpcode::G_ZEXTLOAD:
|
|
|
|
MIRBuilder.buildZExt(DstReg, TmpReg);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
|
|
|
return UnableToLegalize;
|
|
|
|
}
|
2019-08-03 07:44:24 +08:00
|
|
|
case TargetOpcode::G_STORE: {
|
|
|
|
// Lower a non-power of 2 store into multiple pow-2 stores.
|
|
|
|
// E.g. split an i24 store into an i16 store + i8 store.
|
|
|
|
// We do this by first extending the stored value to the next largest power
|
|
|
|
// of 2 type, and then using truncating stores to store the components.
|
|
|
|
// By doing this, likewise with G_LOAD, generate an extend that can be
|
|
|
|
// artifact-combined away instead of leaving behind extracts.
|
|
|
|
Register SrcReg = MI.getOperand(0).getReg();
|
|
|
|
Register PtrReg = MI.getOperand(1).getReg();
|
|
|
|
LLT SrcTy = MRI.getType(SrcReg);
|
|
|
|
MachineMemOperand &MMO = **MI.memoperands_begin();
|
|
|
|
if (SrcTy.getSizeInBits() != MMO.getSizeInBits())
|
|
|
|
return UnableToLegalize;
|
|
|
|
if (SrcTy.isVector())
|
|
|
|
return UnableToLegalize;
|
|
|
|
if (isPowerOf2_32(SrcTy.getSizeInBits()))
|
|
|
|
return UnableToLegalize; // Don't know what we're being asked to do.
|
|
|
|
|
|
|
|
// Extend to the next pow-2.
|
|
|
|
const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits()));
|
|
|
|
auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg);
|
|
|
|
|
|
|
|
// Obtain the smaller value by shifting away the larger value.
|
|
|
|
uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits());
|
|
|
|
uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize;
|
|
|
|
auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize);
|
|
|
|
auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt);
|
|
|
|
|
[globalisel] Rename G_GEP to G_PTR_ADD
Summary:
G_GEP is rather poorly named. It's a simple pointer+scalar addition and
doesn't support any of the complexities of getelementptr. I therefore
propose that we rename it. There's a G_PTR_MASK so let's follow that
convention and go with G_PTR_ADD
Reviewers: volkan, aditya_nandakumar, bogner, rovka, arsenm
Subscribers: sdardis, jvesely, wdng, nhaehnle, hiraditya, jrtc27, atanasyan, arphaman, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D69734
2019-11-02 04:18:00 +08:00
|
|
|
// Generate the PtrAdd and truncating stores.
|
2019-08-03 07:44:24 +08:00
|
|
|
LLT PtrTy = MRI.getType(PtrReg);
|
2020-01-30 21:25:10 +08:00
|
|
|
auto OffsetCst = MIRBuilder.buildConstant(
|
|
|
|
LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
|
[globalisel] Rename G_GEP to G_PTR_ADD
Summary:
G_GEP is rather poorly named. It's a simple pointer+scalar addition and
doesn't support any of the complexities of getelementptr. I therefore
propose that we rename it. There's a G_PTR_MASK so let's follow that
convention and go with G_PTR_ADD
Reviewers: volkan, aditya_nandakumar, bogner, rovka, arsenm
Subscribers: sdardis, jvesely, wdng, nhaehnle, hiraditya, jrtc27, atanasyan, arphaman, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D69734
2019-11-02 04:18:00 +08:00
|
|
|
Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
|
|
|
|
auto SmallPtr =
|
|
|
|
MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
|
2019-08-03 07:44:24 +08:00
|
|
|
|
|
|
|
MachineFunction &MF = MIRBuilder.getMF();
|
|
|
|
MachineMemOperand *LargeMMO =
|
|
|
|
MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
|
|
|
|
MachineMemOperand *SmallMMO =
|
|
|
|
MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
|
|
|
|
MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO);
|
|
|
|
MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO);
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
2018-08-22 01:30:31 +08:00
|
|
|
case TargetOpcode::G_CTLZ_ZERO_UNDEF:
|
|
|
|
case TargetOpcode::G_CTTZ_ZERO_UNDEF:
|
|
|
|
case TargetOpcode::G_CTLZ:
|
|
|
|
case TargetOpcode::G_CTTZ:
|
|
|
|
case TargetOpcode::G_CTPOP:
|
|
|
|
return lowerBitCount(MI, TypeIdx, Ty);
|
2019-02-27 01:22:42 +08:00
|
|
|
case G_UADDO: {
|
2019-06-25 00:16:12 +08:00
|
|
|
Register Res = MI.getOperand(0).getReg();
|
|
|
|
Register CarryOut = MI.getOperand(1).getReg();
|
|
|
|
Register LHS = MI.getOperand(2).getReg();
|
|
|
|
Register RHS = MI.getOperand(3).getReg();
|
2019-02-27 01:22:42 +08:00
|
|
|
|
|
|
|
MIRBuilder.buildAdd(Res, LHS, RHS);
|
|
|
|
MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
|
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
2018-12-17 20:31:07 +08:00
|
|
|
case G_UADDE: {
|
2019-06-25 00:16:12 +08:00
|
|
|
Register Res = MI.getOperand(0).getReg();
|
|
|
|
Register CarryOut = MI.getOperand(1).getReg();
|
|
|
|
Register LHS = MI.getOperand(2).getReg();
|
|
|
|
Register RHS = MI.getOperand(3).getReg();
|
|
|
|
Register CarryIn = MI.getOperand(4).getReg();
|
2018-12-17 20:31:07 +08:00
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
Register TmpRes = MRI.createGenericVirtualRegister(Ty);
|
|
|
|
Register ZExtCarryIn = MRI.createGenericVirtualRegister(Ty);
|
2018-12-17 20:31:07 +08:00
|
|
|
|
|
|
|
MIRBuilder.buildAdd(TmpRes, LHS, RHS);
|
|
|
|
MIRBuilder.buildZExt(ZExtCarryIn, CarryIn);
|
|
|
|
MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
|
|
|
|
MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
|
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
2019-01-28 20:10:17 +08:00
|
|
|
case G_USUBO: {
|
2019-06-25 00:16:12 +08:00
|
|
|
Register Res = MI.getOperand(0).getReg();
|
|
|
|
Register BorrowOut = MI.getOperand(1).getReg();
|
|
|
|
Register LHS = MI.getOperand(2).getReg();
|
|
|
|
Register RHS = MI.getOperand(3).getReg();
|
2019-01-28 20:10:17 +08:00
|
|
|
|
|
|
|
MIRBuilder.buildSub(Res, LHS, RHS);
|
|
|
|
MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
|
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
case G_USUBE: {
|
2019-06-25 00:16:12 +08:00
|
|
|
Register Res = MI.getOperand(0).getReg();
|
|
|
|
Register BorrowOut = MI.getOperand(1).getReg();
|
|
|
|
Register LHS = MI.getOperand(2).getReg();
|
|
|
|
Register RHS = MI.getOperand(3).getReg();
|
|
|
|
Register BorrowIn = MI.getOperand(4).getReg();
|
2019-01-28 20:10:17 +08:00
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
Register TmpRes = MRI.createGenericVirtualRegister(Ty);
|
|
|
|
Register ZExtBorrowIn = MRI.createGenericVirtualRegister(Ty);
|
|
|
|
Register LHS_EQ_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1));
|
|
|
|
Register LHS_ULT_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1));
|
2019-01-28 20:10:17 +08:00
|
|
|
|
|
|
|
MIRBuilder.buildSub(TmpRes, LHS, RHS);
|
|
|
|
MIRBuilder.buildZExt(ZExtBorrowIn, BorrowIn);
|
|
|
|
MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
|
|
|
|
MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LHS_EQ_RHS, LHS, RHS);
|
|
|
|
MIRBuilder.buildICmp(CmpInst::ICMP_ULT, LHS_ULT_RHS, LHS, RHS);
|
|
|
|
MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
|
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
2019-05-18 07:05:13 +08:00
|
|
|
case G_UITOFP:
|
|
|
|
return lowerUITOFP(MI, TypeIdx, Ty);
|
|
|
|
case G_SITOFP:
|
|
|
|
return lowerSITOFP(MI, TypeIdx, Ty);
|
2019-08-30 13:44:02 +08:00
|
|
|
case G_FPTOUI:
|
|
|
|
return lowerFPTOUI(MI, TypeIdx, Ty);
|
2020-01-05 06:09:48 +08:00
|
|
|
case G_FPTOSI:
|
|
|
|
return lowerFPTOSI(MI);
|
2019-07-02 01:18:03 +08:00
|
|
|
case G_SMIN:
|
|
|
|
case G_SMAX:
|
|
|
|
case G_UMIN:
|
|
|
|
case G_UMAX:
|
|
|
|
return lowerMinMax(MI, TypeIdx, Ty);
|
2019-07-10 07:34:29 +08:00
|
|
|
case G_FCOPYSIGN:
|
|
|
|
return lowerFCopySign(MI, TypeIdx, Ty);
|
2019-07-11 00:31:19 +08:00
|
|
|
case G_FMINNUM:
|
|
|
|
case G_FMAXNUM:
|
|
|
|
return lowerFMinNumMaxNum(MI);
|
2019-08-02 03:10:05 +08:00
|
|
|
case G_UNMERGE_VALUES:
|
|
|
|
return lowerUnmergeValues(MI);
|
[globalisel] Add G_SEXT_INREG
Summary:
Targets often have instructions that can sign-extend certain cases faster
than the equivalent shift-left/arithmetic-shift-right. Such cases can be
identified by matching a shift-left/shift-right pair but there are some
issues with this in the context of combines. For example, suppose you can
sign-extend 8-bit up to 32-bit with a target extend instruction.
%1:_(s32) = G_SHL %0:_(s32), i32 24 # (I've inlined the G_CONSTANT for brevity)
%2:_(s32) = G_ASHR %1:_(s32), i32 24
%3:_(s32) = G_ASHR %2:_(s32), i32 1
would reasonably combine to:
%1:_(s32) = G_SHL %0:_(s32), i32 24
%2:_(s32) = G_ASHR %1:_(s32), i32 25
which no longer matches the special case. If your shifts and extend are
equal cost, this would break even as a pair of shifts but if your shift is
more expensive than the extend then it's cheaper as:
%2:_(s32) = G_SEXT_INREG %0:_(s32), i32 8
%3:_(s32) = G_ASHR %2:_(s32), i32 1
It's possible to match the shift-pair in ISel and emit an extend and ashr.
However, this is far from the only way to break this shift pair and make
it hard to match the extends. Another example is that with the right
known-zeros, this:
%1:_(s32) = G_SHL %0:_(s32), i32 24
%2:_(s32) = G_ASHR %1:_(s32), i32 24
%3:_(s32) = G_MUL %2:_(s32), i32 2
can become:
%1:_(s32) = G_SHL %0:_(s32), i32 24
%2:_(s32) = G_ASHR %1:_(s32), i32 23
All upstream targets have been configured to lower it to the current
G_SHL,G_ASHR pair but will likely want to make it legal in some cases to
handle their faster cases.
To follow-up: Provide a way to legalize based on the constant. At the
moment, I'm thinking that the best way to achieve this is to provide the
MI in LegalityQuery but that opens the door to breaking core principles
of the legalizer (legality is not context sensitive). That said, it's
worth noting that looking at other instructions and acting on that
information doesn't violate this principle in itself. It's only a
violation if, at the end of legalization, a pass that checks legality
without being able to see the context would say an instruction might not be
legal. That's a fairly subtle distinction so to give a concrete example,
saying %2 in:
%1 = G_CONSTANT 16
%2 = G_SEXT_INREG %0, %1
is legal is in violation of that principle if the legality of %2 depends
on %1 being constant and/or being 16. However, legalizing to either:
%2 = G_SEXT_INREG %0, 16
or:
%1 = G_CONSTANT 16
%2:_(s32) = G_SHL %0, %1
%3:_(s32) = G_ASHR %2, %1
depending on whether %1 is constant and 16 does not violate that principle
since both outputs are genuinely legal.
Reviewers: bogner, aditya_nandakumar, volkan, aemerson, paquette, arsenm
Subscribers: sdardis, jvesely, wdng, nhaehnle, rovka, kristof.beyls, javed.absar, hiraditya, jrtc27, atanasyan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D61289
llvm-svn: 368487
2019-08-10 05:11:20 +08:00
|
|
|
case TargetOpcode::G_SEXT_INREG: {
|
|
|
|
assert(MI.getOperand(2).isImm() && "Expected immediate");
|
|
|
|
int64_t SizeInBits = MI.getOperand(2).getImm();
|
|
|
|
|
|
|
|
Register DstReg = MI.getOperand(0).getReg();
|
|
|
|
Register SrcReg = MI.getOperand(1).getReg();
|
|
|
|
LLT DstTy = MRI.getType(DstReg);
|
|
|
|
Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
|
|
|
|
|
|
|
|
auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
|
2020-01-16 20:37:00 +08:00
|
|
|
MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
|
|
|
|
MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
|
[globalisel] Add G_SEXT_INREG
Summary:
Targets often have instructions that can sign-extend certain cases faster
than the equivalent shift-left/arithmetic-shift-right. Such cases can be
identified by matching a shift-left/shift-right pair but there are some
issues with this in the context of combines. For example, suppose you can
sign-extend 8-bit up to 32-bit with a target extend instruction.
%1:_(s32) = G_SHL %0:_(s32), i32 24 # (I've inlined the G_CONSTANT for brevity)
%2:_(s32) = G_ASHR %1:_(s32), i32 24
%3:_(s32) = G_ASHR %2:_(s32), i32 1
would reasonably combine to:
%1:_(s32) = G_SHL %0:_(s32), i32 24
%2:_(s32) = G_ASHR %1:_(s32), i32 25
which no longer matches the special case. If your shifts and extend are
equal cost, this would break even as a pair of shifts but if your shift is
more expensive than the extend then it's cheaper as:
%2:_(s32) = G_SEXT_INREG %0:_(s32), i32 8
%3:_(s32) = G_ASHR %2:_(s32), i32 1
It's possible to match the shift-pair in ISel and emit an extend and ashr.
However, this is far from the only way to break this shift pair and make
it hard to match the extends. Another example is that with the right
known-zeros, this:
%1:_(s32) = G_SHL %0:_(s32), i32 24
%2:_(s32) = G_ASHR %1:_(s32), i32 24
%3:_(s32) = G_MUL %2:_(s32), i32 2
can become:
%1:_(s32) = G_SHL %0:_(s32), i32 24
%2:_(s32) = G_ASHR %1:_(s32), i32 23
All upstream targets have been configured to lower it to the current
G_SHL,G_ASHR pair but will likely want to make it legal in some cases to
handle their faster cases.
To follow-up: Provide a way to legalize based on the constant. At the
moment, I'm thinking that the best way to achieve this is to provide the
MI in LegalityQuery but that opens the door to breaking core principles
of the legalizer (legality is not context sensitive). That said, it's
worth noting that looking at other instructions and acting on that
information doesn't violate this principle in itself. It's only a
violation if, at the end of legalization, a pass that checks legality
without being able to see the context would say an instruction might not be
legal. That's a fairly subtle distinction so to give a concrete example,
saying %2 in:
%1 = G_CONSTANT 16
%2 = G_SEXT_INREG %0, %1
is legal is in violation of that principle if the legality of %2 depends
on %1 being constant and/or being 16. However, legalizing to either:
%2 = G_SEXT_INREG %0, 16
or:
%1 = G_CONSTANT 16
%2:_(s32) = G_SHL %0, %1
%3:_(s32) = G_ASHR %2, %1
depending on whether %1 is constant and 16 does not violate that principle
since both outputs are genuinely legal.
Reviewers: bogner, aditya_nandakumar, volkan, aemerson, paquette, arsenm
Subscribers: sdardis, jvesely, wdng, nhaehnle, rovka, kristof.beyls, javed.absar, hiraditya, jrtc27, atanasyan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D61289
llvm-svn: 368487
2019-08-10 05:11:20 +08:00
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
2019-08-14 00:09:07 +08:00
|
|
|
case G_SHUFFLE_VECTOR:
|
|
|
|
return lowerShuffleVector(MI);
|
2019-08-28 03:54:27 +08:00
|
|
|
case G_DYN_STACKALLOC:
|
|
|
|
return lowerDynStackAlloc(MI);
|
2019-10-06 09:37:35 +08:00
|
|
|
case G_EXTRACT:
|
|
|
|
return lowerExtract(MI);
|
2019-10-08 03:13:27 +08:00
|
|
|
case G_INSERT:
|
|
|
|
return lowerInsert(MI);
|
2019-12-30 18:13:22 +08:00
|
|
|
case G_BSWAP:
|
|
|
|
return lowerBswap(MI);
|
2019-12-31 01:06:29 +08:00
|
|
|
case G_BITREVERSE:
|
|
|
|
return lowerBitreverse(MI);
|
2019-12-28 08:26:51 +08:00
|
|
|
case G_READ_REGISTER:
|
2020-01-13 02:29:44 +08:00
|
|
|
case G_WRITE_REGISTER:
|
|
|
|
return lowerReadWriteRegister(MI);
|
2016-08-27 01:46:13 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-01-28 05:53:09 +08:00
|
|
|
LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef(
|
|
|
|
MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) {
|
2019-06-24 23:50:29 +08:00
|
|
|
SmallVector<Register, 2> DstRegs;
|
2019-01-28 05:53:09 +08:00
|
|
|
|
|
|
|
unsigned NarrowSize = NarrowTy.getSizeInBits();
|
2019-06-25 00:16:12 +08:00
|
|
|
Register DstReg = MI.getOperand(0).getReg();
|
2019-01-28 05:53:09 +08:00
|
|
|
unsigned Size = MRI.getType(DstReg).getSizeInBits();
|
|
|
|
int NumParts = Size / NarrowSize;
|
|
|
|
// FIXME: Don't know how to handle the situation where the small vectors
|
|
|
|
// aren't all the same size yet.
|
|
|
|
if (Size % NarrowSize != 0)
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
for (int i = 0; i < NumParts; ++i) {
|
2019-06-25 00:16:12 +08:00
|
|
|
Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
|
2019-01-28 05:53:09 +08:00
|
|
|
MIRBuilder.buildUndef(TmpReg);
|
|
|
|
DstRegs.push_back(TmpReg);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (NarrowTy.isVector())
|
|
|
|
MIRBuilder.buildConcatVectors(DstReg, DstRegs);
|
|
|
|
else
|
|
|
|
MIRBuilder.buildBuildVector(DstReg, DstRegs);
|
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
LegalizerHelper::fewerElementsVectorBasic(MachineInstr &MI, unsigned TypeIdx,
|
|
|
|
LLT NarrowTy) {
|
2019-01-30 10:22:13 +08:00
|
|
|
const unsigned Opc = MI.getOpcode();
|
|
|
|
const unsigned NumOps = MI.getNumOperands() - 1;
|
|
|
|
const unsigned NarrowSize = NarrowTy.getSizeInBits();
|
2019-06-28 09:47:44 +08:00
|
|
|
const Register DstReg = MI.getOperand(0).getReg();
|
2019-01-30 10:22:13 +08:00
|
|
|
const unsigned Flags = MI.getFlags();
|
|
|
|
const LLT DstTy = MRI.getType(DstReg);
|
|
|
|
const unsigned Size = DstTy.getSizeInBits();
|
|
|
|
const int NumParts = Size / NarrowSize;
|
|
|
|
const LLT EltTy = DstTy.getElementType();
|
|
|
|
const unsigned EltSize = EltTy.getSizeInBits();
|
|
|
|
const unsigned BitsForNumParts = NarrowSize * NumParts;
|
|
|
|
|
|
|
|
// Check if we have any leftovers. If we do, then only handle the case where
|
|
|
|
// the leftover is one element.
|
|
|
|
if (BitsForNumParts != Size && BitsForNumParts + EltSize != Size)
|
2019-01-28 05:53:09 +08:00
|
|
|
return UnableToLegalize;
|
|
|
|
|
2019-01-30 10:22:13 +08:00
|
|
|
if (BitsForNumParts != Size) {
|
2019-06-25 00:16:12 +08:00
|
|
|
Register AccumDstReg = MRI.createGenericVirtualRegister(DstTy);
|
2019-01-30 10:22:13 +08:00
|
|
|
MIRBuilder.buildUndef(AccumDstReg);
|
|
|
|
|
|
|
|
// Handle the pieces which evenly divide into the requested type with
|
|
|
|
// extract/op/insert sequence.
|
|
|
|
for (unsigned Offset = 0; Offset < BitsForNumParts; Offset += NarrowSize) {
|
|
|
|
SmallVector<SrcOp, 4> SrcOps;
|
|
|
|
for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
|
2019-06-25 00:16:12 +08:00
|
|
|
Register PartOpReg = MRI.createGenericVirtualRegister(NarrowTy);
|
2020-01-16 20:37:00 +08:00
|
|
|
MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I), Offset);
|
2019-01-30 10:22:13 +08:00
|
|
|
SrcOps.push_back(PartOpReg);
|
|
|
|
}
|
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy);
|
2019-01-30 10:22:13 +08:00
|
|
|
MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags);
|
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
Register PartInsertReg = MRI.createGenericVirtualRegister(DstTy);
|
2019-01-30 10:22:13 +08:00
|
|
|
MIRBuilder.buildInsert(PartInsertReg, AccumDstReg, PartDstReg, Offset);
|
|
|
|
AccumDstReg = PartInsertReg;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Handle the remaining element sized leftover piece.
|
|
|
|
SmallVector<SrcOp, 4> SrcOps;
|
|
|
|
for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
|
2019-06-25 00:16:12 +08:00
|
|
|
Register PartOpReg = MRI.createGenericVirtualRegister(EltTy);
|
2020-01-16 20:37:00 +08:00
|
|
|
MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I), BitsForNumParts);
|
2019-01-30 10:22:13 +08:00
|
|
|
SrcOps.push_back(PartOpReg);
|
|
|
|
}
|
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
Register PartDstReg = MRI.createGenericVirtualRegister(EltTy);
|
2019-01-30 10:22:13 +08:00
|
|
|
MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags);
|
|
|
|
MIRBuilder.buildInsert(DstReg, AccumDstReg, PartDstReg, BitsForNumParts);
|
|
|
|
MI.eraseFromParent();
|
|
|
|
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
2019-06-24 23:50:29 +08:00
|
|
|
SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
|
2019-01-28 05:53:09 +08:00
|
|
|
|
|
|
|
extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src0Regs);
|
|
|
|
|
|
|
|
if (NumOps >= 2)
|
|
|
|
extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src1Regs);
|
|
|
|
|
|
|
|
if (NumOps >= 3)
|
|
|
|
extractParts(MI.getOperand(3).getReg(), NarrowTy, NumParts, Src2Regs);
|
|
|
|
|
|
|
|
for (int i = 0; i < NumParts; ++i) {
|
2019-06-25 00:16:12 +08:00
|
|
|
Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
|
2019-01-28 05:53:09 +08:00
|
|
|
|
|
|
|
if (NumOps == 1)
|
|
|
|
MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i]}, Flags);
|
|
|
|
else if (NumOps == 2) {
|
|
|
|
MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i], Src1Regs[i]}, Flags);
|
|
|
|
} else if (NumOps == 3) {
|
|
|
|
MIRBuilder.buildInstr(Opc, {DstReg},
|
|
|
|
{Src0Regs[i], Src1Regs[i], Src2Regs[i]}, Flags);
|
|
|
|
}
|
|
|
|
|
|
|
|
DstRegs.push_back(DstReg);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (NarrowTy.isVector())
|
|
|
|
MIRBuilder.buildConcatVectors(DstReg, DstRegs);
|
|
|
|
else
|
|
|
|
MIRBuilder.buildBuildVector(DstReg, DstRegs);
|
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
2019-02-08 01:38:00 +08:00
|
|
|
// Handle splitting vector operations which need to have the same number of
|
|
|
|
// elements in each type index, but each type index may have a different element
|
|
|
|
// type.
|
|
|
|
//
|
|
|
|
// e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
|
|
|
|
// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
|
|
|
|
// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
|
|
|
|
//
|
|
|
|
// Also handles some irregular breakdown cases, e.g.
|
|
|
|
// e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
|
|
|
|
// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
|
|
|
|
// s64 = G_SHL s64, s32
|
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
LegalizerHelper::fewerElementsVectorMultiEltType(
|
|
|
|
MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) {
|
|
|
|
if (TypeIdx != 0)
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
const LLT NarrowTy0 = NarrowTyArg;
|
|
|
|
const unsigned NewNumElts =
|
|
|
|
NarrowTy0.isVector() ? NarrowTy0.getNumElements() : 1;
|
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
const Register DstReg = MI.getOperand(0).getReg();
|
2019-02-08 01:38:00 +08:00
|
|
|
LLT DstTy = MRI.getType(DstReg);
|
|
|
|
LLT LeftoverTy0;
|
|
|
|
|
|
|
|
// All of the operands need to have the same number of elements, so if we can
|
|
|
|
// determine a type breakdown for the result type, we can for all of the
|
|
|
|
// source types.
|
2019-07-12 22:58:15 +08:00
|
|
|
int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0).first;
|
2019-02-08 01:38:00 +08:00
|
|
|
if (NumParts < 0)
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
SmallVector<MachineInstrBuilder, 4> NewInsts;
|
|
|
|
|
2019-06-24 23:50:29 +08:00
|
|
|
SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
|
|
|
|
SmallVector<Register, 4> PartRegs, LeftoverRegs;
|
2019-02-08 01:38:00 +08:00
|
|
|
|
|
|
|
for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
|
|
|
|
LLT LeftoverTy;
|
2019-06-25 00:16:12 +08:00
|
|
|
Register SrcReg = MI.getOperand(I).getReg();
|
2019-02-08 01:38:00 +08:00
|
|
|
LLT SrcTyI = MRI.getType(SrcReg);
|
|
|
|
LLT NarrowTyI = LLT::scalarOrVector(NewNumElts, SrcTyI.getScalarType());
|
|
|
|
LLT LeftoverTyI;
|
|
|
|
|
|
|
|
// Split this operand into the requested typed registers, and any leftover
|
|
|
|
// required to reproduce the original type.
|
|
|
|
if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs,
|
|
|
|
LeftoverRegs))
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
if (I == 1) {
|
|
|
|
// For the first operand, create an instruction for each part and setup
|
|
|
|
// the result.
|
2019-06-25 00:16:12 +08:00
|
|
|
for (Register PartReg : PartRegs) {
|
|
|
|
Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0);
|
2019-02-08 01:38:00 +08:00
|
|
|
NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
|
|
|
|
.addDef(PartDstReg)
|
|
|
|
.addUse(PartReg));
|
|
|
|
DstRegs.push_back(PartDstReg);
|
|
|
|
}
|
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
for (Register LeftoverReg : LeftoverRegs) {
|
|
|
|
Register PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0);
|
2019-02-08 01:38:00 +08:00
|
|
|
NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
|
|
|
|
.addDef(PartDstReg)
|
|
|
|
.addUse(LeftoverReg));
|
|
|
|
LeftoverDstRegs.push_back(PartDstReg);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size());
|
|
|
|
|
|
|
|
// Add the newly created operand splits to the existing instructions. The
|
|
|
|
// odd-sized pieces are ordered after the requested NarrowTyArg sized
|
|
|
|
// pieces.
|
|
|
|
unsigned InstCount = 0;
|
|
|
|
for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J)
|
|
|
|
NewInsts[InstCount++].addUse(PartRegs[J]);
|
|
|
|
for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J)
|
|
|
|
NewInsts[InstCount++].addUse(LeftoverRegs[J]);
|
|
|
|
}
|
|
|
|
|
|
|
|
PartRegs.clear();
|
|
|
|
LeftoverRegs.clear();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Insert the newly built operations and rebuild the result register.
|
|
|
|
for (auto &MIB : NewInsts)
|
|
|
|
MIRBuilder.insertInstr(MIB);
|
|
|
|
|
|
|
|
insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs);
|
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
2019-01-25 10:36:32 +08:00
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx,
|
|
|
|
LLT NarrowTy) {
|
|
|
|
if (TypeIdx != 0)
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
Register DstReg = MI.getOperand(0).getReg();
|
|
|
|
Register SrcReg = MI.getOperand(1).getReg();
|
2019-01-25 10:36:32 +08:00
|
|
|
LLT DstTy = MRI.getType(DstReg);
|
|
|
|
LLT SrcTy = MRI.getType(SrcReg);
|
|
|
|
|
|
|
|
LLT NarrowTy0 = NarrowTy;
|
|
|
|
LLT NarrowTy1;
|
|
|
|
unsigned NumParts;
|
|
|
|
|
2019-02-03 07:29:55 +08:00
|
|
|
if (NarrowTy.isVector()) {
|
2019-01-25 10:36:32 +08:00
|
|
|
// Uneven breakdown not handled.
|
|
|
|
NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
|
|
|
|
if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements())
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
NarrowTy1 = LLT::vector(NumParts, SrcTy.getElementType().getSizeInBits());
|
2019-02-03 07:29:55 +08:00
|
|
|
} else {
|
|
|
|
NumParts = DstTy.getNumElements();
|
|
|
|
NarrowTy1 = SrcTy.getElementType();
|
2019-01-25 10:36:32 +08:00
|
|
|
}
|
|
|
|
|
2019-06-24 23:50:29 +08:00
|
|
|
SmallVector<Register, 4> SrcRegs, DstRegs;
|
2019-01-25 10:36:32 +08:00
|
|
|
extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs);
|
|
|
|
|
|
|
|
for (unsigned I = 0; I < NumParts; ++I) {
|
2019-06-25 00:16:12 +08:00
|
|
|
Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
|
2020-01-16 20:09:48 +08:00
|
|
|
MachineInstr *NewInst =
|
|
|
|
MIRBuilder.buildInstr(MI.getOpcode(), {DstReg}, {SrcRegs[I]});
|
2019-01-25 10:36:32 +08:00
|
|
|
|
|
|
|
NewInst->setFlags(MI.getFlags());
|
|
|
|
DstRegs.push_back(DstReg);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (NarrowTy.isVector())
|
2019-01-28 05:53:09 +08:00
|
|
|
MIRBuilder.buildConcatVectors(DstReg, DstRegs);
|
2019-01-25 10:59:34 +08:00
|
|
|
else
|
|
|
|
MIRBuilder.buildBuildVector(DstReg, DstRegs);
|
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx,
|
|
|
|
LLT NarrowTy) {
|
2019-06-25 00:16:12 +08:00
|
|
|
Register DstReg = MI.getOperand(0).getReg();
|
|
|
|
Register Src0Reg = MI.getOperand(2).getReg();
|
2019-01-25 10:59:34 +08:00
|
|
|
LLT DstTy = MRI.getType(DstReg);
|
|
|
|
LLT SrcTy = MRI.getType(Src0Reg);
|
|
|
|
|
|
|
|
unsigned NumParts;
|
|
|
|
LLT NarrowTy0, NarrowTy1;
|
|
|
|
|
|
|
|
if (TypeIdx == 0) {
|
|
|
|
unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
|
|
|
|
unsigned OldElts = DstTy.getNumElements();
|
|
|
|
|
|
|
|
NarrowTy0 = NarrowTy;
|
|
|
|
NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements();
|
|
|
|
NarrowTy1 = NarrowTy.isVector() ?
|
|
|
|
LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) :
|
|
|
|
SrcTy.getElementType();
|
|
|
|
|
|
|
|
} else {
|
|
|
|
unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
|
|
|
|
unsigned OldElts = SrcTy.getNumElements();
|
|
|
|
|
|
|
|
NumParts = NarrowTy.isVector() ? (OldElts / NewElts) :
|
|
|
|
NarrowTy.getNumElements();
|
|
|
|
NarrowTy0 = LLT::vector(NarrowTy.getNumElements(),
|
|
|
|
DstTy.getScalarSizeInBits());
|
|
|
|
NarrowTy1 = NarrowTy;
|
|
|
|
}
|
|
|
|
|
|
|
|
// FIXME: Don't know how to handle the situation where the small vectors
|
|
|
|
// aren't all the same size yet.
|
|
|
|
if (NarrowTy1.isVector() &&
|
|
|
|
NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements())
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
CmpInst::Predicate Pred
|
|
|
|
= static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
|
|
|
|
|
2019-06-24 23:50:29 +08:00
|
|
|
SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
|
2019-01-25 10:59:34 +08:00
|
|
|
extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs);
|
|
|
|
extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs);
|
|
|
|
|
|
|
|
for (unsigned I = 0; I < NumParts; ++I) {
|
2019-06-25 00:16:12 +08:00
|
|
|
Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
|
2019-01-25 10:59:34 +08:00
|
|
|
DstRegs.push_back(DstReg);
|
|
|
|
|
|
|
|
if (MI.getOpcode() == TargetOpcode::G_ICMP)
|
|
|
|
MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
|
|
|
|
else {
|
|
|
|
MachineInstr *NewCmp
|
|
|
|
= MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
|
|
|
|
NewCmp->setFlags(MI.getFlags());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-01-28 05:53:09 +08:00
|
|
|
if (NarrowTy1.isVector())
|
2019-01-25 10:36:32 +08:00
|
|
|
MIRBuilder.buildConcatVectors(DstReg, DstRegs);
|
|
|
|
else
|
|
|
|
MIRBuilder.buildBuildVector(DstReg, DstRegs);
|
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
2019-01-30 12:19:31 +08:00
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx,
|
|
|
|
LLT NarrowTy) {
|
2019-06-24 23:50:29 +08:00
|
|
|
Register DstReg = MI.getOperand(0).getReg();
|
|
|
|
Register CondReg = MI.getOperand(1).getReg();
|
2019-01-30 12:19:31 +08:00
|
|
|
|
|
|
|
unsigned NumParts = 0;
|
|
|
|
LLT NarrowTy0, NarrowTy1;
|
|
|
|
|
|
|
|
LLT DstTy = MRI.getType(DstReg);
|
|
|
|
LLT CondTy = MRI.getType(CondReg);
|
|
|
|
unsigned Size = DstTy.getSizeInBits();
|
|
|
|
|
|
|
|
assert(TypeIdx == 0 || CondTy.isVector());
|
|
|
|
|
|
|
|
if (TypeIdx == 0) {
|
|
|
|
NarrowTy0 = NarrowTy;
|
|
|
|
NarrowTy1 = CondTy;
|
|
|
|
|
|
|
|
unsigned NarrowSize = NarrowTy0.getSizeInBits();
|
|
|
|
// FIXME: Don't know how to handle the situation where the small vectors
|
|
|
|
// aren't all the same size yet.
|
|
|
|
if (Size % NarrowSize != 0)
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
NumParts = Size / NarrowSize;
|
|
|
|
|
|
|
|
// Need to break down the condition type
|
|
|
|
if (CondTy.isVector()) {
|
|
|
|
if (CondTy.getNumElements() == NumParts)
|
|
|
|
NarrowTy1 = CondTy.getElementType();
|
|
|
|
else
|
|
|
|
NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts,
|
|
|
|
CondTy.getScalarSizeInBits());
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
NumParts = CondTy.getNumElements();
|
|
|
|
if (NarrowTy.isVector()) {
|
|
|
|
// TODO: Handle uneven breakdown.
|
|
|
|
if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements())
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
return UnableToLegalize;
|
|
|
|
} else {
|
|
|
|
NarrowTy0 = DstTy.getElementType();
|
|
|
|
NarrowTy1 = NarrowTy;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-06-24 23:50:29 +08:00
|
|
|
SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
|
2019-01-30 12:19:31 +08:00
|
|
|
if (CondTy.isVector())
|
|
|
|
extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs);
|
|
|
|
|
|
|
|
extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs);
|
|
|
|
extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs);
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < NumParts; ++i) {
|
2019-06-24 23:50:29 +08:00
|
|
|
Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
|
2019-01-30 12:19:31 +08:00
|
|
|
MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg,
|
|
|
|
Src1Regs[i], Src2Regs[i]);
|
|
|
|
DstRegs.push_back(DstReg);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (NarrowTy0.isVector())
|
|
|
|
MIRBuilder.buildConcatVectors(DstReg, DstRegs);
|
|
|
|
else
|
|
|
|
MIRBuilder.buildBuildVector(DstReg, DstRegs);
|
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
2019-02-28 08:16:32 +08:00
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
|
|
|
|
LLT NarrowTy) {
|
2019-06-25 00:16:12 +08:00
|
|
|
const Register DstReg = MI.getOperand(0).getReg();
|
2019-02-28 08:16:32 +08:00
|
|
|
LLT PhiTy = MRI.getType(DstReg);
|
|
|
|
LLT LeftoverTy;
|
|
|
|
|
|
|
|
// All of the operands need to have the same number of elements, so if we can
|
|
|
|
// determine a type breakdown for the result type, we can for all of the
|
|
|
|
// source types.
|
|
|
|
int NumParts, NumLeftover;
|
|
|
|
std::tie(NumParts, NumLeftover)
|
|
|
|
= getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy);
|
|
|
|
if (NumParts < 0)
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
2019-06-24 23:50:29 +08:00
|
|
|
SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
|
2019-02-28 08:16:32 +08:00
|
|
|
SmallVector<MachineInstrBuilder, 4> NewInsts;
|
|
|
|
|
|
|
|
const int TotalNumParts = NumParts + NumLeftover;
|
|
|
|
|
|
|
|
// Insert the new phis in the result block first.
|
|
|
|
for (int I = 0; I != TotalNumParts; ++I) {
|
|
|
|
LLT Ty = I < NumParts ? NarrowTy : LeftoverTy;
|
2019-06-24 23:50:29 +08:00
|
|
|
Register PartDstReg = MRI.createGenericVirtualRegister(Ty);
|
2019-02-28 08:16:32 +08:00
|
|
|
NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI)
|
|
|
|
.addDef(PartDstReg));
|
|
|
|
if (I < NumParts)
|
|
|
|
DstRegs.push_back(PartDstReg);
|
|
|
|
else
|
|
|
|
LeftoverDstRegs.push_back(PartDstReg);
|
|
|
|
}
|
|
|
|
|
|
|
|
MachineBasicBlock *MBB = MI.getParent();
|
|
|
|
MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI());
|
|
|
|
insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs);
|
|
|
|
|
2019-06-24 23:50:29 +08:00
|
|
|
SmallVector<Register, 4> PartRegs, LeftoverRegs;
|
2019-02-28 08:16:32 +08:00
|
|
|
|
|
|
|
// Insert code to extract the incoming values in each predecessor block.
|
|
|
|
for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
|
|
|
|
PartRegs.clear();
|
|
|
|
LeftoverRegs.clear();
|
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
Register SrcReg = MI.getOperand(I).getReg();
|
2019-02-28 08:16:32 +08:00
|
|
|
MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
|
|
|
|
MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
|
|
|
|
|
|
|
|
LLT Unused;
|
|
|
|
if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs,
|
|
|
|
LeftoverRegs))
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
// Add the newly created operand splits to the existing instructions. The
|
|
|
|
// odd-sized pieces are ordered after the requested NarrowTyArg sized
|
|
|
|
// pieces.
|
|
|
|
for (int J = 0; J != TotalNumParts; ++J) {
|
|
|
|
MachineInstrBuilder MIB = NewInsts[J];
|
|
|
|
MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]);
|
|
|
|
MIB.addMBB(&OpMBB);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
2019-08-14 00:26:28 +08:00
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI,
|
|
|
|
unsigned TypeIdx,
|
|
|
|
LLT NarrowTy) {
|
|
|
|
if (TypeIdx != 1)
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
const int NumDst = MI.getNumOperands() - 1;
|
|
|
|
const Register SrcReg = MI.getOperand(NumDst).getReg();
|
|
|
|
LLT SrcTy = MRI.getType(SrcReg);
|
|
|
|
|
|
|
|
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
|
|
|
|
|
|
|
|
// TODO: Create sequence of extracts.
|
|
|
|
if (DstTy == NarrowTy)
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
LLT GCDTy = getGCDType(SrcTy, NarrowTy);
|
|
|
|
if (DstTy == GCDTy) {
|
|
|
|
// This would just be a copy of the same unmerge.
|
|
|
|
// TODO: Create extracts, pad with undef and create intermediate merges.
|
|
|
|
return UnableToLegalize;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
|
|
|
|
const int NumUnmerge = Unmerge->getNumOperands() - 1;
|
|
|
|
const int PartsPerUnmerge = NumDst / NumUnmerge;
|
|
|
|
|
|
|
|
for (int I = 0; I != NumUnmerge; ++I) {
|
|
|
|
auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
|
|
|
|
|
|
|
|
for (int J = 0; J != PartsPerUnmerge; ++J)
|
|
|
|
MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
|
|
|
|
MIB.addUse(Unmerge.getReg(I));
|
|
|
|
}
|
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
2019-10-10 06:44:43 +08:00
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
LegalizerHelper::fewerElementsVectorBuildVector(MachineInstr &MI,
|
|
|
|
unsigned TypeIdx,
|
|
|
|
LLT NarrowTy) {
|
|
|
|
assert(TypeIdx == 0 && "not a vector type index");
|
|
|
|
Register DstReg = MI.getOperand(0).getReg();
|
|
|
|
LLT DstTy = MRI.getType(DstReg);
|
|
|
|
LLT SrcTy = DstTy.getElementType();
|
|
|
|
|
|
|
|
int DstNumElts = DstTy.getNumElements();
|
|
|
|
int NarrowNumElts = NarrowTy.getNumElements();
|
|
|
|
int NumConcat = (DstNumElts + NarrowNumElts - 1) / NarrowNumElts;
|
|
|
|
LLT WidenedDstTy = LLT::vector(NarrowNumElts * NumConcat, SrcTy);
|
|
|
|
|
|
|
|
SmallVector<Register, 8> ConcatOps;
|
|
|
|
SmallVector<Register, 8> SubBuildVector;
|
|
|
|
|
|
|
|
Register UndefReg;
|
|
|
|
if (WidenedDstTy != DstTy)
|
|
|
|
UndefReg = MIRBuilder.buildUndef(SrcTy).getReg(0);
|
|
|
|
|
|
|
|
// Create a G_CONCAT_VECTORS of NarrowTy pieces, padding with undef as
|
|
|
|
// necessary.
|
|
|
|
//
|
|
|
|
// %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2
|
|
|
|
// -> <2 x s16>
|
|
|
|
//
|
|
|
|
// %4:_(s16) = G_IMPLICIT_DEF
|
|
|
|
// %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1
|
|
|
|
// %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4
|
|
|
|
// %7:_(<4 x s16>) = G_CONCAT_VECTORS %5, %6
|
|
|
|
// %3:_(<3 x s16>) = G_EXTRACT %7, 0
|
|
|
|
for (int I = 0; I != NumConcat; ++I) {
|
|
|
|
for (int J = 0; J != NarrowNumElts; ++J) {
|
|
|
|
int SrcIdx = NarrowNumElts * I + J;
|
|
|
|
|
|
|
|
if (SrcIdx < DstNumElts) {
|
|
|
|
Register SrcReg = MI.getOperand(SrcIdx + 1).getReg();
|
|
|
|
SubBuildVector.push_back(SrcReg);
|
|
|
|
} else
|
|
|
|
SubBuildVector.push_back(UndefReg);
|
|
|
|
}
|
|
|
|
|
|
|
|
auto BuildVec = MIRBuilder.buildBuildVector(NarrowTy, SubBuildVector);
|
|
|
|
ConcatOps.push_back(BuildVec.getReg(0));
|
|
|
|
SubBuildVector.clear();
|
|
|
|
}
|
|
|
|
|
|
|
|
if (DstTy == WidenedDstTy)
|
|
|
|
MIRBuilder.buildConcatVectors(DstReg, ConcatOps);
|
|
|
|
else {
|
|
|
|
auto Concat = MIRBuilder.buildConcatVectors(WidenedDstTy, ConcatOps);
|
|
|
|
MIRBuilder.buildExtract(DstReg, Concat, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
2016-10-15 06:18:18 +08:00
|
|
|
LegalizerHelper::LegalizeResult
|
2019-02-05 08:26:12 +08:00
|
|
|
LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx,
|
|
|
|
LLT NarrowTy) {
|
2019-01-28 05:53:09 +08:00
|
|
|
// FIXME: Don't know how to handle secondary types yet.
|
|
|
|
if (TypeIdx != 0)
|
2016-07-23 04:03:43 +08:00
|
|
|
return UnableToLegalize;
|
2019-01-09 15:51:52 +08:00
|
|
|
|
2019-01-28 06:36:24 +08:00
|
|
|
MachineMemOperand *MMO = *MI.memoperands_begin();
|
|
|
|
|
|
|
|
// This implementation doesn't work for atomics. Give up instead of doing
|
|
|
|
// something invalid.
|
|
|
|
if (MMO->getOrdering() != AtomicOrdering::NotAtomic ||
|
|
|
|
MMO->getFailureOrdering() != AtomicOrdering::NotAtomic)
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
2019-01-28 05:53:09 +08:00
|
|
|
bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
|
2019-06-24 23:50:29 +08:00
|
|
|
Register ValReg = MI.getOperand(0).getReg();
|
|
|
|
Register AddrReg = MI.getOperand(1).getReg();
|
2019-01-31 10:46:05 +08:00
|
|
|
LLT ValTy = MRI.getType(ValReg);
|
2019-01-28 05:53:09 +08:00
|
|
|
|
2019-01-31 10:46:05 +08:00
|
|
|
int NumParts = -1;
|
2019-02-28 08:16:32 +08:00
|
|
|
int NumLeftover = -1;
|
2019-01-31 10:46:05 +08:00
|
|
|
LLT LeftoverTy;
|
2019-06-24 23:50:29 +08:00
|
|
|
SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
|
2019-01-31 10:46:05 +08:00
|
|
|
if (IsLoad) {
|
2019-02-28 08:16:32 +08:00
|
|
|
std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
|
2019-01-31 10:46:05 +08:00
|
|
|
} else {
|
|
|
|
if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
|
2019-02-28 08:16:32 +08:00
|
|
|
NarrowLeftoverRegs)) {
|
2019-01-31 10:46:05 +08:00
|
|
|
NumParts = NarrowRegs.size();
|
2019-02-28 08:16:32 +08:00
|
|
|
NumLeftover = NarrowLeftoverRegs.size();
|
|
|
|
}
|
2019-01-09 15:51:52 +08:00
|
|
|
}
|
2019-01-31 10:46:05 +08:00
|
|
|
|
|
|
|
if (NumParts == -1)
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
const LLT OffsetTy = LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits());
|
|
|
|
|
|
|
|
unsigned TotalSize = ValTy.getSizeInBits();
|
|
|
|
|
|
|
|
// Split the load/store into PartTy sized pieces starting at Offset. If this
|
|
|
|
// is a load, return the new registers in ValRegs. For a store, each elements
|
|
|
|
// of ValRegs should be PartTy. Returns the next offset that needs to be
|
|
|
|
// handled.
|
2019-06-24 23:50:29 +08:00
|
|
|
auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
|
2019-01-31 10:46:05 +08:00
|
|
|
unsigned Offset) -> unsigned {
|
|
|
|
MachineFunction &MF = MIRBuilder.getMF();
|
|
|
|
unsigned PartSize = PartTy.getSizeInBits();
|
|
|
|
for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
|
|
|
|
Offset += PartSize, ++Idx) {
|
|
|
|
unsigned ByteSize = PartSize / 8;
|
|
|
|
unsigned ByteOffset = Offset / 8;
|
2019-06-24 23:50:29 +08:00
|
|
|
Register NewAddrReg;
|
2019-01-31 10:46:05 +08:00
|
|
|
|
[globalisel] Rename G_GEP to G_PTR_ADD
Summary:
G_GEP is rather poorly named. It's a simple pointer+scalar addition and
doesn't support any of the complexities of getelementptr. I therefore
propose that we rename it. There's a G_PTR_MASK so let's follow that
convention and go with G_PTR_ADD
Reviewers: volkan, aditya_nandakumar, bogner, rovka, arsenm
Subscribers: sdardis, jvesely, wdng, nhaehnle, hiraditya, jrtc27, atanasyan, arphaman, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D69734
2019-11-02 04:18:00 +08:00
|
|
|
MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
|
2019-01-31 10:46:05 +08:00
|
|
|
|
|
|
|
MachineMemOperand *NewMMO =
|
|
|
|
MF.getMachineMemOperand(MMO, ByteOffset, ByteSize);
|
|
|
|
|
|
|
|
if (IsLoad) {
|
2019-06-24 23:50:29 +08:00
|
|
|
Register Dst = MRI.createGenericVirtualRegister(PartTy);
|
2019-01-31 10:46:05 +08:00
|
|
|
ValRegs.push_back(Dst);
|
|
|
|
MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
|
|
|
|
} else {
|
|
|
|
MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return Offset;
|
|
|
|
};
|
|
|
|
|
|
|
|
unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0);
|
|
|
|
|
|
|
|
// Handle the rest of the register if this isn't an even type breakdown.
|
|
|
|
if (LeftoverTy.isValid())
|
|
|
|
splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset);
|
|
|
|
|
2019-01-28 05:53:09 +08:00
|
|
|
if (IsLoad) {
|
2019-01-31 10:46:05 +08:00
|
|
|
insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
|
|
|
|
LeftoverTy, NarrowLeftoverRegs);
|
2016-07-23 04:03:43 +08:00
|
|
|
}
|
2019-01-31 10:46:05 +08:00
|
|
|
|
2019-01-28 05:53:09 +08:00
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
2019-01-25 10:59:34 +08:00
|
|
|
|
2020-01-12 08:05:06 +08:00
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
LegalizerHelper::fewerElementsVectorSextInReg(MachineInstr &MI, unsigned TypeIdx,
|
|
|
|
LLT NarrowTy) {
|
|
|
|
Register DstReg = MI.getOperand(0).getReg();
|
|
|
|
Register SrcReg = MI.getOperand(1).getReg();
|
|
|
|
int64_t Imm = MI.getOperand(2).getImm();
|
|
|
|
|
|
|
|
LLT DstTy = MRI.getType(DstReg);
|
|
|
|
|
|
|
|
SmallVector<Register, 8> Parts;
|
|
|
|
LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
|
|
|
|
LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts);
|
|
|
|
|
|
|
|
for (Register &R : Parts)
|
|
|
|
R = MIRBuilder.buildSExtInReg(NarrowTy, R, Imm).getReg(0);
|
|
|
|
|
|
|
|
buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
|
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
2019-01-28 05:53:09 +08:00
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
|
|
|
|
LLT NarrowTy) {
|
|
|
|
using namespace TargetOpcode;
|
|
|
|
|
|
|
|
MIRBuilder.setInstr(MI);
|
|
|
|
switch (MI.getOpcode()) {
|
|
|
|
case G_IMPLICIT_DEF:
|
|
|
|
return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy);
|
|
|
|
case G_AND:
|
|
|
|
case G_OR:
|
|
|
|
case G_XOR:
|
|
|
|
case G_ADD:
|
|
|
|
case G_SUB:
|
|
|
|
case G_MUL:
|
|
|
|
case G_SMULH:
|
|
|
|
case G_UMULH:
|
|
|
|
case G_FADD:
|
|
|
|
case G_FMUL:
|
|
|
|
case G_FSUB:
|
|
|
|
case G_FNEG:
|
|
|
|
case G_FABS:
|
2019-02-12 01:05:20 +08:00
|
|
|
case G_FCANONICALIZE:
|
2019-01-28 05:53:09 +08:00
|
|
|
case G_FDIV:
|
|
|
|
case G_FREM:
|
|
|
|
case G_FMA:
|
2019-09-07 04:49:10 +08:00
|
|
|
case G_FMAD:
|
2019-01-28 05:53:09 +08:00
|
|
|
case G_FPOW:
|
|
|
|
case G_FEXP:
|
|
|
|
case G_FEXP2:
|
|
|
|
case G_FLOG:
|
|
|
|
case G_FLOG2:
|
|
|
|
case G_FLOG10:
|
2019-04-26 00:44:40 +08:00
|
|
|
case G_FNEARBYINT:
|
2019-01-28 05:53:09 +08:00
|
|
|
case G_FCEIL:
|
2019-02-12 01:22:58 +08:00
|
|
|
case G_FFLOOR:
|
2019-04-20 07:41:52 +08:00
|
|
|
case G_FRINT:
|
2019-01-28 05:53:09 +08:00
|
|
|
case G_INTRINSIC_ROUND:
|
|
|
|
case G_INTRINSIC_TRUNC:
|
2019-01-29 02:34:18 +08:00
|
|
|
case G_FCOS:
|
|
|
|
case G_FSIN:
|
2019-01-31 05:03:52 +08:00
|
|
|
case G_FSQRT:
|
2019-01-31 10:34:03 +08:00
|
|
|
case G_BSWAP:
|
2019-09-05 04:46:15 +08:00
|
|
|
case G_BITREVERSE:
|
2019-04-11 07:06:08 +08:00
|
|
|
case G_SDIV:
|
2020-01-05 02:24:09 +08:00
|
|
|
case G_UDIV:
|
|
|
|
case G_SREM:
|
|
|
|
case G_UREM:
|
2019-05-24 01:58:48 +08:00
|
|
|
case G_SMIN:
|
|
|
|
case G_SMAX:
|
|
|
|
case G_UMIN:
|
|
|
|
case G_UMAX:
|
2019-07-11 00:31:19 +08:00
|
|
|
case G_FMINNUM:
|
|
|
|
case G_FMAXNUM:
|
|
|
|
case G_FMINNUM_IEEE:
|
|
|
|
case G_FMAXNUM_IEEE:
|
|
|
|
case G_FMINIMUM:
|
|
|
|
case G_FMAXIMUM:
|
2019-01-28 05:53:09 +08:00
|
|
|
return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy);
|
2019-02-08 01:38:00 +08:00
|
|
|
case G_SHL:
|
|
|
|
case G_LSHR:
|
|
|
|
case G_ASHR:
|
2019-02-21 00:42:52 +08:00
|
|
|
case G_CTLZ:
|
|
|
|
case G_CTLZ_ZERO_UNDEF:
|
|
|
|
case G_CTTZ:
|
|
|
|
case G_CTTZ_ZERO_UNDEF:
|
|
|
|
case G_CTPOP:
|
2019-05-17 20:19:52 +08:00
|
|
|
case G_FCOPYSIGN:
|
2019-02-08 01:38:00 +08:00
|
|
|
return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy);
|
2019-01-28 05:53:09 +08:00
|
|
|
case G_ZEXT:
|
|
|
|
case G_SEXT:
|
|
|
|
case G_ANYEXT:
|
|
|
|
case G_FPEXT:
|
|
|
|
case G_FPTRUNC:
|
|
|
|
case G_SITOFP:
|
|
|
|
case G_UITOFP:
|
|
|
|
case G_FPTOSI:
|
|
|
|
case G_FPTOUI:
|
2019-02-03 07:29:55 +08:00
|
|
|
case G_INTTOPTR:
|
|
|
|
case G_PTRTOINT:
|
2019-02-08 10:40:47 +08:00
|
|
|
case G_ADDRSPACE_CAST:
|
2019-01-25 10:36:32 +08:00
|
|
|
return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy);
|
2019-01-28 05:53:09 +08:00
|
|
|
case G_ICMP:
|
|
|
|
case G_FCMP:
|
|
|
|
return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy);
|
2019-01-30 12:19:31 +08:00
|
|
|
case G_SELECT:
|
|
|
|
return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy);
|
2019-02-28 08:16:32 +08:00
|
|
|
case G_PHI:
|
|
|
|
return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy);
|
2019-08-14 00:26:28 +08:00
|
|
|
case G_UNMERGE_VALUES:
|
|
|
|
return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
|
2019-10-10 06:44:43 +08:00
|
|
|
case G_BUILD_VECTOR:
|
|
|
|
return fewerElementsVectorBuildVector(MI, TypeIdx, NarrowTy);
|
2019-01-28 05:53:09 +08:00
|
|
|
case G_LOAD:
|
|
|
|
case G_STORE:
|
2019-02-05 08:26:12 +08:00
|
|
|
return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
|
2020-01-12 08:05:06 +08:00
|
|
|
case G_SEXT_INREG:
|
|
|
|
return fewerElementsVectorSextInReg(MI, TypeIdx, NarrowTy);
|
2019-01-28 05:53:09 +08:00
|
|
|
default:
|
|
|
|
return UnableToLegalize;
|
2016-07-23 04:03:43 +08:00
|
|
|
}
|
|
|
|
}
|
2018-08-22 01:30:31 +08:00
|
|
|
|
2019-02-08 03:37:44 +08:00
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
|
|
|
|
const LLT HalfTy, const LLT AmtTy) {
|
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
Register InL = MRI.createGenericVirtualRegister(HalfTy);
|
|
|
|
Register InH = MRI.createGenericVirtualRegister(HalfTy);
|
2020-01-16 20:37:00 +08:00
|
|
|
MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
|
2019-02-08 03:37:44 +08:00
|
|
|
|
|
|
|
if (Amt.isNullValue()) {
|
2020-01-16 20:37:00 +08:00
|
|
|
MIRBuilder.buildMerge(MI.getOperand(0), {InL, InH});
|
2019-02-08 03:37:44 +08:00
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
|
|
|
LLT NVT = HalfTy;
|
|
|
|
unsigned NVTBits = HalfTy.getSizeInBits();
|
|
|
|
unsigned VTBits = 2 * NVTBits;
|
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
SrcOp Lo(Register(0)), Hi(Register(0));
|
2019-02-08 03:37:44 +08:00
|
|
|
if (MI.getOpcode() == TargetOpcode::G_SHL) {
|
|
|
|
if (Amt.ugt(VTBits)) {
|
|
|
|
Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
|
|
|
|
} else if (Amt.ugt(NVTBits)) {
|
|
|
|
Lo = MIRBuilder.buildConstant(NVT, 0);
|
|
|
|
Hi = MIRBuilder.buildShl(NVT, InL,
|
|
|
|
MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
|
|
|
|
} else if (Amt == NVTBits) {
|
|
|
|
Lo = MIRBuilder.buildConstant(NVT, 0);
|
|
|
|
Hi = InL;
|
|
|
|
} else {
|
|
|
|
Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
|
2019-02-08 04:44:08 +08:00
|
|
|
auto OrLHS =
|
|
|
|
MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
|
|
|
|
auto OrRHS = MIRBuilder.buildLShr(
|
|
|
|
NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
|
|
|
|
Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
|
2019-02-08 03:37:44 +08:00
|
|
|
}
|
|
|
|
} else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
|
|
|
|
if (Amt.ugt(VTBits)) {
|
|
|
|
Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
|
|
|
|
} else if (Amt.ugt(NVTBits)) {
|
|
|
|
Lo = MIRBuilder.buildLShr(NVT, InH,
|
|
|
|
MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
|
|
|
|
Hi = MIRBuilder.buildConstant(NVT, 0);
|
|
|
|
} else if (Amt == NVTBits) {
|
|
|
|
Lo = InH;
|
|
|
|
Hi = MIRBuilder.buildConstant(NVT, 0);
|
|
|
|
} else {
|
|
|
|
auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
|
|
|
|
|
|
|
|
auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
|
|
|
|
auto OrRHS = MIRBuilder.buildShl(
|
|
|
|
NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
|
|
|
|
|
|
|
|
Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
|
|
|
|
Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (Amt.ugt(VTBits)) {
|
|
|
|
Hi = Lo = MIRBuilder.buildAShr(
|
|
|
|
NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
|
|
|
|
} else if (Amt.ugt(NVTBits)) {
|
|
|
|
Lo = MIRBuilder.buildAShr(NVT, InH,
|
|
|
|
MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
|
|
|
|
Hi = MIRBuilder.buildAShr(NVT, InH,
|
|
|
|
MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
|
|
|
|
} else if (Amt == NVTBits) {
|
|
|
|
Lo = InH;
|
|
|
|
Hi = MIRBuilder.buildAShr(NVT, InH,
|
|
|
|
MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
|
|
|
|
} else {
|
|
|
|
auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
|
|
|
|
|
|
|
|
auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
|
|
|
|
auto OrRHS = MIRBuilder.buildShl(
|
|
|
|
NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
|
|
|
|
|
|
|
|
Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
|
|
|
|
Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-01-16 20:37:00 +08:00
|
|
|
MIRBuilder.buildMerge(MI.getOperand(0), {Lo.getReg(), Hi.getReg()});
|
2019-02-08 03:37:44 +08:00
|
|
|
MI.eraseFromParent();
|
|
|
|
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO: Optimize if constant shift amount.
|
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
|
|
|
|
LLT RequestedTy) {
|
|
|
|
if (TypeIdx == 1) {
|
|
|
|
Observer.changingInstr(MI);
|
|
|
|
narrowScalarSrc(MI, RequestedTy, 2);
|
|
|
|
Observer.changedInstr(MI);
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
Register DstReg = MI.getOperand(0).getReg();
|
2019-02-08 03:37:44 +08:00
|
|
|
LLT DstTy = MRI.getType(DstReg);
|
|
|
|
if (DstTy.isVector())
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
Register Amt = MI.getOperand(2).getReg();
|
2019-02-08 03:37:44 +08:00
|
|
|
LLT ShiftAmtTy = MRI.getType(Amt);
|
|
|
|
const unsigned DstEltSize = DstTy.getScalarSizeInBits();
|
|
|
|
if (DstEltSize % 2 != 0)
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
// Ignore the input type. We can only go to exactly half the size of the
|
|
|
|
// input. If that isn't small enough, the resulting pieces will be further
|
|
|
|
// legalized.
|
|
|
|
const unsigned NewBitSize = DstEltSize / 2;
|
|
|
|
const LLT HalfTy = LLT::scalar(NewBitSize);
|
|
|
|
const LLT CondTy = LLT::scalar(1);
|
|
|
|
|
|
|
|
if (const MachineInstr *KShiftAmt =
|
|
|
|
getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) {
|
|
|
|
return narrowScalarShiftByConstant(
|
|
|
|
MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy);
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO: Expand with known bits.
|
|
|
|
|
|
|
|
// Handle the fully general expansion by an unknown amount.
|
|
|
|
auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
|
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
Register InL = MRI.createGenericVirtualRegister(HalfTy);
|
|
|
|
Register InH = MRI.createGenericVirtualRegister(HalfTy);
|
2020-01-16 20:37:00 +08:00
|
|
|
MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
|
2019-02-08 03:37:44 +08:00
|
|
|
|
|
|
|
auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
|
|
|
|
auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
|
|
|
|
|
|
|
|
auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
|
|
|
|
auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
|
|
|
|
auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
|
|
|
|
|
2019-06-24 23:50:29 +08:00
|
|
|
Register ResultRegs[2];
|
2019-02-08 03:37:44 +08:00
|
|
|
switch (MI.getOpcode()) {
|
|
|
|
case TargetOpcode::G_SHL: {
|
|
|
|
// Short: ShAmt < NewBitSize
|
2019-08-27 22:22:32 +08:00
|
|
|
auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
|
2019-02-08 03:37:44 +08:00
|
|
|
|
2019-08-27 22:22:32 +08:00
|
|
|
auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
|
|
|
|
auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
|
|
|
|
auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
|
2019-02-08 03:37:44 +08:00
|
|
|
|
|
|
|
// Long: ShAmt >= NewBitSize
|
|
|
|
auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
|
|
|
|
auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
|
|
|
|
|
|
|
|
auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
|
|
|
|
auto Hi = MIRBuilder.buildSelect(
|
|
|
|
HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
|
|
|
|
|
|
|
|
ResultRegs[0] = Lo.getReg(0);
|
|
|
|
ResultRegs[1] = Hi.getReg(0);
|
|
|
|
break;
|
|
|
|
}
|
2019-08-27 22:33:05 +08:00
|
|
|
case TargetOpcode::G_LSHR:
|
2019-02-08 03:37:44 +08:00
|
|
|
case TargetOpcode::G_ASHR: {
|
|
|
|
// Short: ShAmt < NewBitSize
|
2019-08-27 22:33:05 +08:00
|
|
|
auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
|
2019-02-08 03:37:44 +08:00
|
|
|
|
2019-08-27 22:22:32 +08:00
|
|
|
auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
|
|
|
|
auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
|
|
|
|
auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
|
2019-02-08 03:37:44 +08:00
|
|
|
|
|
|
|
// Long: ShAmt >= NewBitSize
|
2019-08-27 22:33:05 +08:00
|
|
|
MachineInstrBuilder HiL;
|
|
|
|
if (MI.getOpcode() == TargetOpcode::G_LSHR) {
|
|
|
|
HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
|
|
|
|
} else {
|
|
|
|
auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
|
|
|
|
HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
|
|
|
|
}
|
|
|
|
auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
|
|
|
|
{InH, AmtExcess}); // Lo from Hi part.
|
2019-02-08 03:37:44 +08:00
|
|
|
|
|
|
|
auto Lo = MIRBuilder.buildSelect(
|
|
|
|
HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
|
|
|
|
|
|
|
|
auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
|
|
|
|
|
|
|
|
ResultRegs[0] = Lo.getReg(0);
|
|
|
|
ResultRegs[1] = Hi.getReg(0);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
llvm_unreachable("not a shift");
|
|
|
|
}
|
|
|
|
|
|
|
|
MIRBuilder.buildMerge(DstReg, ResultRegs);
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
2019-02-28 08:01:05 +08:00
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
|
|
|
|
LLT MoreTy) {
|
|
|
|
assert(TypeIdx == 0 && "Expecting only Idx 0");
|
|
|
|
|
|
|
|
Observer.changingInstr(MI);
|
|
|
|
for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
|
|
|
|
MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
|
|
|
|
MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
|
|
|
|
moreElementsVectorSrc(MI, MoreTy, I);
|
|
|
|
}
|
|
|
|
|
|
|
|
MachineBasicBlock &MBB = *MI.getParent();
|
2019-09-17 07:46:03 +08:00
|
|
|
MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
|
2019-02-28 08:01:05 +08:00
|
|
|
moreElementsVectorDst(MI, MoreTy, 0);
|
|
|
|
Observer.changedInstr(MI);
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
2019-02-12 06:00:39 +08:00
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
|
|
|
|
LLT MoreTy) {
|
|
|
|
MIRBuilder.setInstr(MI);
|
|
|
|
unsigned Opc = MI.getOpcode();
|
|
|
|
switch (Opc) {
|
2019-08-01 09:44:22 +08:00
|
|
|
case TargetOpcode::G_IMPLICIT_DEF:
|
|
|
|
case TargetOpcode::G_LOAD: {
|
|
|
|
if (TypeIdx != 0)
|
|
|
|
return UnableToLegalize;
|
2019-02-12 06:00:39 +08:00
|
|
|
Observer.changingInstr(MI);
|
|
|
|
moreElementsVectorDst(MI, MoreTy, 0);
|
|
|
|
Observer.changedInstr(MI);
|
|
|
|
return Legalized;
|
|
|
|
}
|
2019-08-01 09:44:22 +08:00
|
|
|
case TargetOpcode::G_STORE:
|
|
|
|
if (TypeIdx != 0)
|
|
|
|
return UnableToLegalize;
|
|
|
|
Observer.changingInstr(MI);
|
|
|
|
moreElementsVectorSrc(MI, MoreTy, 0);
|
|
|
|
Observer.changedInstr(MI);
|
|
|
|
return Legalized;
|
2019-02-20 00:30:19 +08:00
|
|
|
case TargetOpcode::G_AND:
|
|
|
|
case TargetOpcode::G_OR:
|
2019-05-24 01:58:48 +08:00
|
|
|
case TargetOpcode::G_XOR:
|
|
|
|
case TargetOpcode::G_SMIN:
|
|
|
|
case TargetOpcode::G_SMAX:
|
|
|
|
case TargetOpcode::G_UMIN:
|
2019-07-28 05:47:08 +08:00
|
|
|
case TargetOpcode::G_UMAX:
|
|
|
|
case TargetOpcode::G_FMINNUM:
|
|
|
|
case TargetOpcode::G_FMAXNUM:
|
|
|
|
case TargetOpcode::G_FMINNUM_IEEE:
|
|
|
|
case TargetOpcode::G_FMAXNUM_IEEE:
|
|
|
|
case TargetOpcode::G_FMINIMUM:
|
|
|
|
case TargetOpcode::G_FMAXIMUM: {
|
2019-02-20 00:30:19 +08:00
|
|
|
Observer.changingInstr(MI);
|
|
|
|
moreElementsVectorSrc(MI, MoreTy, 1);
|
|
|
|
moreElementsVectorSrc(MI, MoreTy, 2);
|
|
|
|
moreElementsVectorDst(MI, MoreTy, 0);
|
|
|
|
Observer.changedInstr(MI);
|
|
|
|
return Legalized;
|
|
|
|
}
|
2019-02-20 00:44:22 +08:00
|
|
|
case TargetOpcode::G_EXTRACT:
|
|
|
|
if (TypeIdx != 1)
|
|
|
|
return UnableToLegalize;
|
|
|
|
Observer.changingInstr(MI);
|
|
|
|
moreElementsVectorSrc(MI, MoreTy, 1);
|
|
|
|
Observer.changedInstr(MI);
|
|
|
|
return Legalized;
|
2019-02-21 00:11:22 +08:00
|
|
|
case TargetOpcode::G_INSERT:
|
|
|
|
if (TypeIdx != 0)
|
|
|
|
return UnableToLegalize;
|
|
|
|
Observer.changingInstr(MI);
|
|
|
|
moreElementsVectorSrc(MI, MoreTy, 1);
|
|
|
|
moreElementsVectorDst(MI, MoreTy, 0);
|
|
|
|
Observer.changedInstr(MI);
|
|
|
|
return Legalized;
|
2019-02-20 01:03:09 +08:00
|
|
|
case TargetOpcode::G_SELECT:
|
|
|
|
if (TypeIdx != 0)
|
|
|
|
return UnableToLegalize;
|
|
|
|
if (MRI.getType(MI.getOperand(1).getReg()).isVector())
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
Observer.changingInstr(MI);
|
|
|
|
moreElementsVectorSrc(MI, MoreTy, 2);
|
|
|
|
moreElementsVectorSrc(MI, MoreTy, 3);
|
|
|
|
moreElementsVectorDst(MI, MoreTy, 0);
|
|
|
|
Observer.changedInstr(MI);
|
|
|
|
return Legalized;
|
2019-08-22 00:59:10 +08:00
|
|
|
case TargetOpcode::G_UNMERGE_VALUES: {
|
|
|
|
if (TypeIdx != 1)
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
|
|
|
|
int NumDst = MI.getNumOperands() - 1;
|
|
|
|
moreElementsVectorSrc(MI, MoreTy, NumDst);
|
|
|
|
|
|
|
|
auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
|
|
|
|
for (int I = 0; I != NumDst; ++I)
|
|
|
|
MIB.addDef(MI.getOperand(I).getReg());
|
|
|
|
|
|
|
|
int NewNumDst = MoreTy.getSizeInBits() / DstTy.getSizeInBits();
|
|
|
|
for (int I = NumDst; I != NewNumDst; ++I)
|
|
|
|
MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
|
|
|
|
|
|
|
|
MIB.addUse(MI.getOperand(NumDst).getReg());
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
2019-02-28 08:01:05 +08:00
|
|
|
case TargetOpcode::G_PHI:
|
|
|
|
return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
|
2019-02-12 06:00:39 +08:00
|
|
|
default:
|
|
|
|
return UnableToLegalize;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-06-24 23:50:29 +08:00
|
|
|
void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
|
|
|
|
ArrayRef<Register> Src1Regs,
|
|
|
|
ArrayRef<Register> Src2Regs,
|
2019-03-11 18:00:17 +08:00
|
|
|
LLT NarrowTy) {
|
|
|
|
MachineIRBuilder &B = MIRBuilder;
|
|
|
|
unsigned SrcParts = Src1Regs.size();
|
|
|
|
unsigned DstParts = DstRegs.size();
|
|
|
|
|
|
|
|
unsigned DstIdx = 0; // Low bits of the result.
|
2019-06-25 00:16:12 +08:00
|
|
|
Register FactorSum =
|
2019-03-11 18:00:17 +08:00
|
|
|
B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
|
|
|
|
DstRegs[DstIdx] = FactorSum;
|
|
|
|
|
|
|
|
unsigned CarrySumPrevDstIdx;
|
2019-06-24 23:50:29 +08:00
|
|
|
SmallVector<Register, 4> Factors;
|
2019-03-11 18:00:17 +08:00
|
|
|
|
|
|
|
for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
|
|
|
|
// Collect low parts of muls for DstIdx.
|
|
|
|
for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
|
|
|
|
i <= std::min(DstIdx, SrcParts - 1); ++i) {
|
|
|
|
MachineInstrBuilder Mul =
|
|
|
|
B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
|
|
|
|
Factors.push_back(Mul.getReg(0));
|
|
|
|
}
|
|
|
|
// Collect high parts of muls from previous DstIdx.
|
|
|
|
for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
|
|
|
|
i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
|
|
|
|
MachineInstrBuilder Umulh =
|
|
|
|
B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
|
|
|
|
Factors.push_back(Umulh.getReg(0));
|
|
|
|
}
|
2019-10-28 22:28:00 +08:00
|
|
|
// Add CarrySum from additions calculated for previous DstIdx.
|
2019-03-11 18:00:17 +08:00
|
|
|
if (DstIdx != 1) {
|
|
|
|
Factors.push_back(CarrySumPrevDstIdx);
|
|
|
|
}
|
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
Register CarrySum;
|
2019-03-11 18:00:17 +08:00
|
|
|
// Add all factors and accumulate all carries into CarrySum.
|
|
|
|
if (DstIdx != DstParts - 1) {
|
|
|
|
MachineInstrBuilder Uaddo =
|
|
|
|
B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
|
|
|
|
FactorSum = Uaddo.getReg(0);
|
|
|
|
CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
|
|
|
|
for (unsigned i = 2; i < Factors.size(); ++i) {
|
|
|
|
MachineInstrBuilder Uaddo =
|
|
|
|
B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
|
|
|
|
FactorSum = Uaddo.getReg(0);
|
|
|
|
MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
|
|
|
|
CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// Since value for the next index is not calculated, neither is CarrySum.
|
|
|
|
FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
|
|
|
|
for (unsigned i = 2; i < Factors.size(); ++i)
|
|
|
|
FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
CarrySumPrevDstIdx = CarrySum;
|
|
|
|
DstRegs[DstIdx] = FactorSum;
|
|
|
|
Factors.clear();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-01-27 08:52:51 +08:00
|
|
|
LegalizerHelper::LegalizeResult
|
2019-03-11 18:00:17 +08:00
|
|
|
LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
|
2019-06-24 23:50:29 +08:00
|
|
|
Register DstReg = MI.getOperand(0).getReg();
|
|
|
|
Register Src1 = MI.getOperand(1).getReg();
|
|
|
|
Register Src2 = MI.getOperand(2).getReg();
|
2019-03-11 18:00:17 +08:00
|
|
|
|
2019-01-27 08:52:51 +08:00
|
|
|
LLT Ty = MRI.getType(DstReg);
|
|
|
|
if (Ty.isVector())
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
2019-03-11 18:00:17 +08:00
|
|
|
unsigned SrcSize = MRI.getType(Src1).getSizeInBits();
|
|
|
|
unsigned DstSize = Ty.getSizeInBits();
|
|
|
|
unsigned NarrowSize = NarrowTy.getSizeInBits();
|
|
|
|
if (DstSize % NarrowSize != 0 || SrcSize % NarrowSize != 0)
|
2019-01-27 08:52:51 +08:00
|
|
|
return UnableToLegalize;
|
|
|
|
|
2019-03-11 18:00:17 +08:00
|
|
|
unsigned NumDstParts = DstSize / NarrowSize;
|
|
|
|
unsigned NumSrcParts = SrcSize / NarrowSize;
|
2019-03-11 18:08:44 +08:00
|
|
|
bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
|
|
|
|
unsigned DstTmpParts = NumDstParts * (IsMulHigh ? 2 : 1);
|
2019-01-27 08:52:51 +08:00
|
|
|
|
2020-02-04 23:34:22 +08:00
|
|
|
SmallVector<Register, 2> Src1Parts, Src2Parts;
|
|
|
|
SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
|
2019-03-11 18:00:17 +08:00
|
|
|
extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts);
|
|
|
|
extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts);
|
2019-03-11 18:08:44 +08:00
|
|
|
multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
|
2019-01-27 08:52:51 +08:00
|
|
|
|
2019-03-11 18:08:44 +08:00
|
|
|
// Take only high half of registers if this is high mul.
|
2019-06-24 23:50:29 +08:00
|
|
|
ArrayRef<Register> DstRegs(
|
2019-03-11 18:08:44 +08:00
|
|
|
IsMulHigh ? &DstTmpRegs[DstTmpParts / 2] : &DstTmpRegs[0], NumDstParts);
|
2019-03-11 18:00:17 +08:00
|
|
|
MIRBuilder.buildMerge(DstReg, DstRegs);
|
2019-01-27 08:52:51 +08:00
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
2019-02-12 22:54:52 +08:00
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
|
|
|
|
LLT NarrowTy) {
|
|
|
|
if (TypeIdx != 1)
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
uint64_t NarrowSize = NarrowTy.getSizeInBits();
|
|
|
|
|
|
|
|
int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
|
|
|
|
// FIXME: add support for when SizeOp1 isn't an exact multiple of
|
|
|
|
// NarrowSize.
|
|
|
|
if (SizeOp1 % NarrowSize != 0)
|
|
|
|
return UnableToLegalize;
|
|
|
|
int NumParts = SizeOp1 / NarrowSize;
|
|
|
|
|
2019-06-24 23:50:29 +08:00
|
|
|
SmallVector<Register, 2> SrcRegs, DstRegs;
|
2019-02-12 22:54:52 +08:00
|
|
|
SmallVector<uint64_t, 2> Indexes;
|
|
|
|
extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
|
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
Register OpReg = MI.getOperand(0).getReg();
|
2019-02-12 22:54:52 +08:00
|
|
|
uint64_t OpStart = MI.getOperand(2).getImm();
|
|
|
|
uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
|
|
|
|
for (int i = 0; i < NumParts; ++i) {
|
|
|
|
unsigned SrcStart = i * NarrowSize;
|
|
|
|
|
|
|
|
if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
|
|
|
|
// No part of the extract uses this subregister, ignore it.
|
|
|
|
continue;
|
|
|
|
} else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
|
|
|
|
// The entire subregister is extracted, forward the value.
|
|
|
|
DstRegs.push_back(SrcRegs[i]);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// OpSegStart is where this destination segment would start in OpReg if it
|
|
|
|
// extended infinitely in both directions.
|
|
|
|
int64_t ExtractOffset;
|
|
|
|
uint64_t SegSize;
|
|
|
|
if (OpStart < SrcStart) {
|
|
|
|
ExtractOffset = 0;
|
|
|
|
SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
|
|
|
|
} else {
|
|
|
|
ExtractOffset = OpStart - SrcStart;
|
|
|
|
SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
|
|
|
|
}
|
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
Register SegReg = SrcRegs[i];
|
2019-02-12 22:54:52 +08:00
|
|
|
if (ExtractOffset != 0 || SegSize != NarrowSize) {
|
|
|
|
// A genuine extract is needed.
|
|
|
|
SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
|
|
|
|
MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
|
|
|
|
}
|
|
|
|
|
|
|
|
DstRegs.push_back(SegReg);
|
|
|
|
}
|
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
Register DstReg = MI.getOperand(0).getReg();
|
2019-02-12 22:54:52 +08:00
|
|
|
if(MRI.getType(DstReg).isVector())
|
|
|
|
MIRBuilder.buildBuildVector(DstReg, DstRegs);
|
|
|
|
else
|
|
|
|
MIRBuilder.buildMerge(DstReg, DstRegs);
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
|
|
|
|
LLT NarrowTy) {
|
|
|
|
// FIXME: Don't know how to handle secondary types yet.
|
|
|
|
if (TypeIdx != 0)
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
|
|
|
|
uint64_t NarrowSize = NarrowTy.getSizeInBits();
|
|
|
|
|
|
|
|
// FIXME: add support for when SizeOp0 isn't an exact multiple of
|
|
|
|
// NarrowSize.
|
|
|
|
if (SizeOp0 % NarrowSize != 0)
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
int NumParts = SizeOp0 / NarrowSize;
|
|
|
|
|
2019-06-24 23:50:29 +08:00
|
|
|
SmallVector<Register, 2> SrcRegs, DstRegs;
|
2019-02-12 22:54:52 +08:00
|
|
|
SmallVector<uint64_t, 2> Indexes;
|
|
|
|
extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
|
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
Register OpReg = MI.getOperand(2).getReg();
|
2019-02-12 22:54:52 +08:00
|
|
|
uint64_t OpStart = MI.getOperand(3).getImm();
|
|
|
|
uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
|
|
|
|
for (int i = 0; i < NumParts; ++i) {
|
|
|
|
unsigned DstStart = i * NarrowSize;
|
|
|
|
|
|
|
|
if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
|
|
|
|
// No part of the insert affects this subregister, forward the original.
|
|
|
|
DstRegs.push_back(SrcRegs[i]);
|
|
|
|
continue;
|
|
|
|
} else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
|
|
|
|
// The entire subregister is defined by this insert, forward the new
|
|
|
|
// value.
|
|
|
|
DstRegs.push_back(OpReg);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// OpSegStart is where this destination segment would start in OpReg if it
|
|
|
|
// extended infinitely in both directions.
|
|
|
|
int64_t ExtractOffset, InsertOffset;
|
|
|
|
uint64_t SegSize;
|
|
|
|
if (OpStart < DstStart) {
|
|
|
|
InsertOffset = 0;
|
|
|
|
ExtractOffset = DstStart - OpStart;
|
|
|
|
SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
|
|
|
|
} else {
|
|
|
|
InsertOffset = OpStart - DstStart;
|
|
|
|
ExtractOffset = 0;
|
|
|
|
SegSize =
|
|
|
|
std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
|
|
|
|
}
|
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
Register SegReg = OpReg;
|
2019-02-12 22:54:52 +08:00
|
|
|
if (ExtractOffset != 0 || SegSize != OpSize) {
|
|
|
|
// A genuine extract is needed.
|
|
|
|
SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
|
|
|
|
MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
|
|
|
|
}
|
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
|
2019-02-12 22:54:52 +08:00
|
|
|
MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset);
|
|
|
|
DstRegs.push_back(DstReg);
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
|
2019-06-25 00:16:12 +08:00
|
|
|
Register DstReg = MI.getOperand(0).getReg();
|
2019-02-12 22:54:52 +08:00
|
|
|
if(MRI.getType(DstReg).isVector())
|
|
|
|
MIRBuilder.buildBuildVector(DstReg, DstRegs);
|
|
|
|
else
|
|
|
|
MIRBuilder.buildMerge(DstReg, DstRegs);
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
2019-04-11 01:07:56 +08:00
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx,
|
|
|
|
LLT NarrowTy) {
|
2019-06-25 00:16:12 +08:00
|
|
|
Register DstReg = MI.getOperand(0).getReg();
|
2019-04-11 01:07:56 +08:00
|
|
|
LLT DstTy = MRI.getType(DstReg);
|
|
|
|
|
|
|
|
assert(MI.getNumOperands() == 3 && TypeIdx == 0);
|
|
|
|
|
2019-06-24 23:50:29 +08:00
|
|
|
SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
|
|
|
|
SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
|
|
|
|
SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
|
2019-04-11 01:07:56 +08:00
|
|
|
LLT LeftoverTy;
|
|
|
|
if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
|
|
|
|
Src0Regs, Src0LeftoverRegs))
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
LLT Unused;
|
|
|
|
if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
|
|
|
|
Src1Regs, Src1LeftoverRegs))
|
|
|
|
llvm_unreachable("inconsistent extractParts result");
|
|
|
|
|
|
|
|
for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
|
|
|
|
auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
|
|
|
|
{Src0Regs[I], Src1Regs[I]});
|
2020-01-23 19:51:35 +08:00
|
|
|
DstRegs.push_back(Inst.getReg(0));
|
2019-04-11 01:07:56 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
|
|
|
|
auto Inst = MIRBuilder.buildInstr(
|
|
|
|
MI.getOpcode(),
|
|
|
|
{LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
|
2020-01-23 19:51:35 +08:00
|
|
|
DstLeftoverRegs.push_back(Inst.getReg(0));
|
2019-04-11 01:07:56 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
insertParts(DstReg, DstTy, NarrowTy, DstRegs,
|
|
|
|
LeftoverTy, DstLeftoverRegs);
|
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
2020-01-11 00:02:18 +08:00
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
LegalizerHelper::narrowScalarExt(MachineInstr &MI, unsigned TypeIdx,
|
|
|
|
LLT NarrowTy) {
|
|
|
|
if (TypeIdx != 0)
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
Register DstReg = MI.getOperand(0).getReg();
|
|
|
|
Register SrcReg = MI.getOperand(1).getReg();
|
|
|
|
|
2020-01-10 23:41:29 +08:00
|
|
|
LLT DstTy = MRI.getType(DstReg);
|
|
|
|
if (DstTy.isVector())
|
2020-01-11 00:02:18 +08:00
|
|
|
return UnableToLegalize;
|
|
|
|
|
2020-01-10 23:41:29 +08:00
|
|
|
SmallVector<Register, 8> Parts;
|
|
|
|
LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
|
2020-01-12 08:05:06 +08:00
|
|
|
LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
|
|
|
|
buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
|
|
|
|
|
2020-01-11 00:02:18 +08:00
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
2019-02-05 08:13:44 +08:00
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
|
|
|
|
LLT NarrowTy) {
|
|
|
|
if (TypeIdx != 0)
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
Register CondReg = MI.getOperand(1).getReg();
|
2019-02-05 08:13:44 +08:00
|
|
|
LLT CondTy = MRI.getType(CondReg);
|
|
|
|
if (CondTy.isVector()) // TODO: Handle vselect
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
Register DstReg = MI.getOperand(0).getReg();
|
2019-02-05 08:13:44 +08:00
|
|
|
LLT DstTy = MRI.getType(DstReg);
|
|
|
|
|
2019-06-24 23:50:29 +08:00
|
|
|
SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
|
|
|
|
SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
|
|
|
|
SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
|
2019-02-05 08:13:44 +08:00
|
|
|
LLT LeftoverTy;
|
|
|
|
if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
|
|
|
|
Src1Regs, Src1LeftoverRegs))
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
LLT Unused;
|
|
|
|
if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
|
|
|
|
Src2Regs, Src2LeftoverRegs))
|
|
|
|
llvm_unreachable("inconsistent extractParts result");
|
|
|
|
|
|
|
|
for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
|
|
|
|
auto Select = MIRBuilder.buildSelect(NarrowTy,
|
|
|
|
CondReg, Src1Regs[I], Src2Regs[I]);
|
2020-01-23 19:51:35 +08:00
|
|
|
DstRegs.push_back(Select.getReg(0));
|
2019-02-05 08:13:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
|
|
|
|
auto Select = MIRBuilder.buildSelect(
|
|
|
|
LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
|
2020-01-23 19:51:35 +08:00
|
|
|
DstLeftoverRegs.push_back(Select.getReg(0));
|
2019-02-05 08:13:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
insertParts(DstReg, DstTy, NarrowTy, DstRegs,
|
|
|
|
LeftoverTy, DstLeftoverRegs);
|
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
2020-01-27 16:43:38 +08:00
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
LegalizerHelper::narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx,
|
|
|
|
LLT NarrowTy) {
|
|
|
|
if (TypeIdx != 1)
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
|
|
|
|
unsigned NarrowSize = NarrowTy.getSizeInBits();
|
|
|
|
|
|
|
|
if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
|
|
|
|
MachineIRBuilder &B = MIRBuilder;
|
|
|
|
auto UnmergeSrc = B.buildUnmerge(NarrowTy, MI.getOperand(1));
|
|
|
|
// ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
|
|
|
|
auto C_0 = B.buildConstant(NarrowTy, 0);
|
|
|
|
auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
|
|
|
|
UnmergeSrc.getReg(1), C_0);
|
|
|
|
auto LoCTLZ = B.buildCTLZ(NarrowTy, UnmergeSrc.getReg(0));
|
|
|
|
auto C_NarrowSize = B.buildConstant(NarrowTy, NarrowSize);
|
|
|
|
auto HiIsZeroCTLZ = B.buildAdd(NarrowTy, LoCTLZ, C_NarrowSize);
|
|
|
|
auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(NarrowTy, UnmergeSrc.getReg(1));
|
|
|
|
auto LoOut = B.buildSelect(NarrowTy, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
|
|
|
|
|
|
|
|
B.buildMerge(MI.getOperand(0), {LoOut.getReg(0), C_0.getReg(0)});
|
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
|
|
|
return UnableToLegalize;
|
|
|
|
}
|
|
|
|
|
2020-01-27 16:51:06 +08:00
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
LegalizerHelper::narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx,
|
|
|
|
LLT NarrowTy) {
|
|
|
|
if (TypeIdx != 1)
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
|
|
|
|
unsigned NarrowSize = NarrowTy.getSizeInBits();
|
|
|
|
|
|
|
|
if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
|
|
|
|
MachineIRBuilder &B = MIRBuilder;
|
|
|
|
auto UnmergeSrc = B.buildUnmerge(NarrowTy, MI.getOperand(1));
|
|
|
|
// cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
|
|
|
|
auto C_0 = B.buildConstant(NarrowTy, 0);
|
|
|
|
auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
|
|
|
|
UnmergeSrc.getReg(0), C_0);
|
|
|
|
auto HiCTTZ = B.buildCTTZ(NarrowTy, UnmergeSrc.getReg(1));
|
|
|
|
auto C_NarrowSize = B.buildConstant(NarrowTy, NarrowSize);
|
|
|
|
auto LoIsZeroCTTZ = B.buildAdd(NarrowTy, HiCTTZ, C_NarrowSize);
|
|
|
|
auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(NarrowTy, UnmergeSrc.getReg(0));
|
|
|
|
auto LoOut = B.buildSelect(NarrowTy, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
|
|
|
|
|
|
|
|
B.buildMerge(MI.getOperand(0), {LoOut.getReg(0), C_0.getReg(0)});
|
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
|
|
|
return UnableToLegalize;
|
|
|
|
}
|
|
|
|
|
2020-01-27 16:59:50 +08:00
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx,
|
|
|
|
LLT NarrowTy) {
|
|
|
|
if (TypeIdx != 1)
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
|
|
|
|
unsigned NarrowSize = NarrowTy.getSizeInBits();
|
|
|
|
|
|
|
|
if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
|
|
|
|
auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
|
|
|
|
|
|
|
|
auto LoCTPOP = MIRBuilder.buildCTPOP(NarrowTy, UnmergeSrc.getReg(0));
|
|
|
|
auto HiCTPOP = MIRBuilder.buildCTPOP(NarrowTy, UnmergeSrc.getReg(1));
|
|
|
|
auto Out = MIRBuilder.buildAdd(NarrowTy, HiCTPOP, LoCTPOP);
|
|
|
|
MIRBuilder.buildZExt(MI.getOperand(0), Out);
|
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
|
|
|
return UnableToLegalize;
|
|
|
|
}
|
|
|
|
|
2018-08-22 01:30:31 +08:00
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
|
|
|
|
unsigned Opc = MI.getOpcode();
|
|
|
|
auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
|
2018-11-26 19:07:02 +08:00
|
|
|
auto isSupported = [this](const LegalityQuery &Q) {
|
2018-08-22 01:30:31 +08:00
|
|
|
auto QAction = LI.getAction(Q).Action;
|
2018-11-26 19:07:02 +08:00
|
|
|
return QAction == Legal || QAction == Libcall || QAction == Custom;
|
2018-08-22 01:30:31 +08:00
|
|
|
};
|
|
|
|
switch (Opc) {
|
|
|
|
default:
|
|
|
|
return UnableToLegalize;
|
|
|
|
case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
|
|
|
|
// This trivially expands to CTLZ.
|
2018-12-13 07:48:13 +08:00
|
|
|
Observer.changingInstr(MI);
|
2018-08-22 01:30:31 +08:00
|
|
|
MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
|
2018-12-06 04:14:52 +08:00
|
|
|
Observer.changedInstr(MI);
|
2018-08-22 01:30:31 +08:00
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
case TargetOpcode::G_CTLZ: {
|
2019-06-25 00:16:12 +08:00
|
|
|
Register SrcReg = MI.getOperand(1).getReg();
|
2018-08-22 01:30:31 +08:00
|
|
|
unsigned Len = Ty.getSizeInBits();
|
2019-01-31 10:09:57 +08:00
|
|
|
if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty, Ty}})) {
|
2018-11-26 19:07:02 +08:00
|
|
|
// If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
|
2020-01-16 20:09:48 +08:00
|
|
|
auto MIBCtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(Ty, SrcReg);
|
2018-08-22 01:30:31 +08:00
|
|
|
auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
|
|
|
|
auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
|
|
|
|
auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
|
|
|
|
SrcReg, MIBZero);
|
2020-01-16 20:37:00 +08:00
|
|
|
MIRBuilder.buildSelect(MI.getOperand(0), MIBICmp, MIBLen, MIBCtlzZU);
|
2018-08-22 01:30:31 +08:00
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
// for now, we do this:
|
|
|
|
// NewLen = NextPowerOf2(Len);
|
|
|
|
// x = x | (x >> 1);
|
|
|
|
// x = x | (x >> 2);
|
|
|
|
// ...
|
|
|
|
// x = x | (x >>16);
|
|
|
|
// x = x | (x >>32); // for 64-bit input
|
|
|
|
// Upto NewLen/2
|
|
|
|
// return Len - popcount(x);
|
|
|
|
//
|
|
|
|
// Ref: "Hacker's Delight" by Henry Warren
|
2019-06-25 00:16:12 +08:00
|
|
|
Register Op = SrcReg;
|
2018-08-22 01:30:31 +08:00
|
|
|
unsigned NewLen = PowerOf2Ceil(Len);
|
|
|
|
for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
|
|
|
|
auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i);
|
2020-01-16 20:09:48 +08:00
|
|
|
auto MIBOp =
|
|
|
|
MIRBuilder.buildOr(Ty, Op, MIRBuilder.buildLShr(Ty, Op, MIBShiftAmt));
|
2020-01-23 19:51:35 +08:00
|
|
|
Op = MIBOp.getReg(0);
|
2018-08-22 01:30:31 +08:00
|
|
|
}
|
2020-01-16 20:09:48 +08:00
|
|
|
auto MIBPop = MIRBuilder.buildCTPOP(Ty, Op);
|
2020-01-16 20:37:00 +08:00
|
|
|
MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(Ty, Len),
|
|
|
|
MIBPop);
|
2018-08-22 01:30:31 +08:00
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
|
|
|
|
// This trivially expands to CTTZ.
|
2018-12-13 07:48:13 +08:00
|
|
|
Observer.changingInstr(MI);
|
2018-08-22 01:30:31 +08:00
|
|
|
MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
|
2018-12-06 04:14:52 +08:00
|
|
|
Observer.changedInstr(MI);
|
2018-08-22 01:30:31 +08:00
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
case TargetOpcode::G_CTTZ: {
|
2019-06-25 00:16:12 +08:00
|
|
|
Register SrcReg = MI.getOperand(1).getReg();
|
2018-08-22 01:30:31 +08:00
|
|
|
unsigned Len = Ty.getSizeInBits();
|
2019-01-31 10:09:57 +08:00
|
|
|
if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty, Ty}})) {
|
2018-08-22 01:30:31 +08:00
|
|
|
// If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
|
|
|
|
// zero.
|
2020-01-16 20:09:48 +08:00
|
|
|
auto MIBCttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(Ty, SrcReg);
|
2018-08-22 01:30:31 +08:00
|
|
|
auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
|
|
|
|
auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
|
|
|
|
auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
|
|
|
|
SrcReg, MIBZero);
|
2020-01-16 20:37:00 +08:00
|
|
|
MIRBuilder.buildSelect(MI.getOperand(0), MIBICmp, MIBLen, MIBCttzZU);
|
2018-08-22 01:30:31 +08:00
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
// for now, we use: { return popcount(~x & (x - 1)); }
|
|
|
|
// unless the target has ctlz but not ctpop, in which case we use:
|
|
|
|
// { return 32 - nlz(~x & (x-1)); }
|
|
|
|
// Ref: "Hacker's Delight" by Henry Warren
|
|
|
|
auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1);
|
2020-01-16 20:09:48 +08:00
|
|
|
auto MIBNot = MIRBuilder.buildXor(Ty, SrcReg, MIBCstNeg1);
|
|
|
|
auto MIBTmp = MIRBuilder.buildAnd(
|
|
|
|
Ty, MIBNot, MIRBuilder.buildAdd(Ty, SrcReg, MIBCstNeg1));
|
2019-01-31 10:09:57 +08:00
|
|
|
if (!isSupported({TargetOpcode::G_CTPOP, {Ty, Ty}}) &&
|
|
|
|
isSupported({TargetOpcode::G_CTLZ, {Ty, Ty}})) {
|
2018-08-22 01:30:31 +08:00
|
|
|
auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len);
|
2020-01-16 20:37:00 +08:00
|
|
|
MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
|
2020-01-16 20:09:48 +08:00
|
|
|
MIRBuilder.buildCTLZ(Ty, MIBTmp));
|
2018-08-22 01:30:31 +08:00
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
|
2020-01-23 19:51:35 +08:00
|
|
|
MI.getOperand(1).setReg(MIBTmp.getReg(0));
|
2018-08-22 01:30:31 +08:00
|
|
|
return Legalized;
|
|
|
|
}
|
2020-01-27 16:59:50 +08:00
|
|
|
case TargetOpcode::G_CTPOP: {
|
|
|
|
unsigned Size = Ty.getSizeInBits();
|
|
|
|
MachineIRBuilder &B = MIRBuilder;
|
|
|
|
|
|
|
|
// Count set bits in blocks of 2 bits. Default approach would be
|
|
|
|
// B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
|
|
|
|
// We use following formula instead:
|
|
|
|
// B2Count = val - { (val >> 1) & 0x55555555 }
|
|
|
|
// since it gives same result in blocks of 2 with one instruction less.
|
|
|
|
auto C_1 = B.buildConstant(Ty, 1);
|
|
|
|
auto B2Set1LoTo1Hi = B.buildLShr(Ty, MI.getOperand(1).getReg(), C_1);
|
|
|
|
APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
|
|
|
|
auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
|
|
|
|
auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
|
|
|
|
auto B2Count = B.buildSub(Ty, MI.getOperand(1).getReg(), B2Count1Hi);
|
|
|
|
|
|
|
|
// In order to get count in blocks of 4 add values from adjacent block of 2.
|
|
|
|
// B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
|
|
|
|
auto C_2 = B.buildConstant(Ty, 2);
|
|
|
|
auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
|
|
|
|
APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
|
|
|
|
auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
|
|
|
|
auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
|
|
|
|
auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
|
|
|
|
auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
|
|
|
|
|
|
|
|
// For count in blocks of 8 bits we don't have to mask high 4 bits before
|
|
|
|
// addition since count value sits in range {0,...,8} and 4 bits are enough
|
|
|
|
// to hold such binary values. After addition high 4 bits still hold count
|
|
|
|
// of set bits in high 4 bit block, set them to zero and get 8 bit result.
|
|
|
|
// B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
|
|
|
|
auto C_4 = B.buildConstant(Ty, 4);
|
|
|
|
auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
|
|
|
|
auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
|
|
|
|
APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
|
|
|
|
auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
|
|
|
|
auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
|
|
|
|
|
|
|
|
assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm");
|
|
|
|
// 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
|
|
|
|
// bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
|
|
|
|
auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
|
|
|
|
auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
|
|
|
|
|
|
|
|
// Shift count result from 8 high bits to low bits.
|
|
|
|
auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
|
|
|
|
B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
|
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
2018-08-22 01:30:31 +08:00
|
|
|
}
|
|
|
|
}
|
2019-05-18 07:05:13 +08:00
|
|
|
|
|
|
|
// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
|
|
|
|
// representation.
|
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
|
2019-06-25 00:16:12 +08:00
|
|
|
Register Dst = MI.getOperand(0).getReg();
|
|
|
|
Register Src = MI.getOperand(1).getReg();
|
2019-05-18 07:05:13 +08:00
|
|
|
const LLT S64 = LLT::scalar(64);
|
|
|
|
const LLT S32 = LLT::scalar(32);
|
|
|
|
const LLT S1 = LLT::scalar(1);
|
|
|
|
|
|
|
|
assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
|
|
|
|
|
|
|
|
// unsigned cul2f(ulong u) {
|
|
|
|
// uint lz = clz(u);
|
|
|
|
// uint e = (u != 0) ? 127U + 63U - lz : 0;
|
|
|
|
// u = (u << lz) & 0x7fffffffffffffffUL;
|
|
|
|
// ulong t = u & 0xffffffffffUL;
|
|
|
|
// uint v = (e << 23) | (uint)(u >> 40);
|
|
|
|
// uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
|
|
|
|
// return as_float(v + r);
|
|
|
|
// }
|
|
|
|
|
|
|
|
auto Zero32 = MIRBuilder.buildConstant(S32, 0);
|
|
|
|
auto Zero64 = MIRBuilder.buildConstant(S64, 0);
|
|
|
|
|
|
|
|
auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
|
|
|
|
|
|
|
|
auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
|
|
|
|
auto Sub = MIRBuilder.buildSub(S32, K, LZ);
|
|
|
|
|
|
|
|
auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
|
|
|
|
auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
|
|
|
|
|
|
|
|
auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
|
|
|
|
auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
|
|
|
|
|
|
|
|
auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
|
|
|
|
|
|
|
|
auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
|
|
|
|
auto T = MIRBuilder.buildAnd(S64, U, Mask1);
|
|
|
|
|
|
|
|
auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
|
|
|
|
auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
|
|
|
|
auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
|
|
|
|
|
|
|
|
auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
|
|
|
|
auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
|
|
|
|
auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
|
|
|
|
auto One = MIRBuilder.buildConstant(S32, 1);
|
|
|
|
|
|
|
|
auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
|
|
|
|
auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
|
|
|
|
auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
|
|
|
|
MIRBuilder.buildAdd(Dst, V, R);
|
|
|
|
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
LegalizerHelper::lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
|
2019-06-25 00:16:12 +08:00
|
|
|
Register Dst = MI.getOperand(0).getReg();
|
|
|
|
Register Src = MI.getOperand(1).getReg();
|
2019-05-18 07:05:13 +08:00
|
|
|
LLT DstTy = MRI.getType(Dst);
|
|
|
|
LLT SrcTy = MRI.getType(Src);
|
|
|
|
|
2019-11-15 14:29:12 +08:00
|
|
|
if (SrcTy == LLT::scalar(1)) {
|
|
|
|
auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
|
|
|
|
auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
|
|
|
|
MIRBuilder.buildSelect(Dst, Src, True, False);
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
2019-05-18 07:05:13 +08:00
|
|
|
if (SrcTy != LLT::scalar(64))
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
if (DstTy == LLT::scalar(32)) {
|
|
|
|
// TODO: SelectionDAG has several alternative expansions to port which may
|
|
|
|
// be more reasonble depending on the available instructions. If a target
|
|
|
|
// has sitofp, does not have CTLZ, or can efficiently use f64 as an
|
|
|
|
// intermediate type, this is probably worse.
|
|
|
|
return lowerU64ToF32BitOps(MI);
|
|
|
|
}
|
|
|
|
|
|
|
|
return UnableToLegalize;
|
|
|
|
}
|
|
|
|
|
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
LegalizerHelper::lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
|
2019-06-25 00:16:12 +08:00
|
|
|
Register Dst = MI.getOperand(0).getReg();
|
|
|
|
Register Src = MI.getOperand(1).getReg();
|
2019-05-18 07:05:13 +08:00
|
|
|
LLT DstTy = MRI.getType(Dst);
|
|
|
|
LLT SrcTy = MRI.getType(Src);
|
|
|
|
|
|
|
|
const LLT S64 = LLT::scalar(64);
|
|
|
|
const LLT S32 = LLT::scalar(32);
|
|
|
|
const LLT S1 = LLT::scalar(1);
|
|
|
|
|
2019-11-15 14:29:12 +08:00
|
|
|
if (SrcTy == S1) {
|
|
|
|
auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
|
|
|
|
auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
|
|
|
|
MIRBuilder.buildSelect(Dst, Src, True, False);
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
2019-05-18 07:05:13 +08:00
|
|
|
if (SrcTy != S64)
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
if (DstTy == S32) {
|
|
|
|
// signed cl2f(long l) {
|
|
|
|
// long s = l >> 63;
|
|
|
|
// float r = cul2f((l + s) ^ s);
|
|
|
|
// return s ? -r : r;
|
|
|
|
// }
|
2019-06-25 00:16:12 +08:00
|
|
|
Register L = Src;
|
2019-05-18 07:05:13 +08:00
|
|
|
auto SignBit = MIRBuilder.buildConstant(S64, 63);
|
|
|
|
auto S = MIRBuilder.buildAShr(S64, L, SignBit);
|
|
|
|
|
|
|
|
auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
|
|
|
|
auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
|
|
|
|
auto R = MIRBuilder.buildUITOFP(S32, Xor);
|
|
|
|
|
|
|
|
auto RNeg = MIRBuilder.buildFNeg(S32, R);
|
|
|
|
auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
|
|
|
|
MIRBuilder.buildConstant(S64, 0));
|
|
|
|
MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
|
|
|
return UnableToLegalize;
|
|
|
|
}
|
2019-07-02 01:18:03 +08:00
|
|
|
|
2019-08-30 13:44:02 +08:00
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
LegalizerHelper::lowerFPTOUI(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
|
|
|
|
Register Dst = MI.getOperand(0).getReg();
|
|
|
|
Register Src = MI.getOperand(1).getReg();
|
|
|
|
LLT DstTy = MRI.getType(Dst);
|
|
|
|
LLT SrcTy = MRI.getType(Src);
|
|
|
|
const LLT S64 = LLT::scalar(64);
|
|
|
|
const LLT S32 = LLT::scalar(32);
|
|
|
|
|
|
|
|
if (SrcTy != S64 && SrcTy != S32)
|
|
|
|
return UnableToLegalize;
|
|
|
|
if (DstTy != S32 && DstTy != S64)
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
// FPTOSI gives same result as FPTOUI for positive signed integers.
|
|
|
|
// FPTOUI needs to deal with fp values that convert to unsigned integers
|
|
|
|
// greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
|
|
|
|
|
|
|
|
APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
|
|
|
|
APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
|
|
|
|
: APFloat::IEEEdouble(),
|
|
|
|
APInt::getNullValue(SrcTy.getSizeInBits()));
|
|
|
|
TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
|
|
|
|
|
|
|
|
MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
|
|
|
|
|
|
|
|
MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
|
|
|
|
// For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
|
|
|
|
// (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
|
|
|
|
MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
|
|
|
|
MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
|
|
|
|
MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
|
|
|
|
MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
|
|
|
|
|
2020-01-05 06:06:47 +08:00
|
|
|
const LLT S1 = LLT::scalar(1);
|
|
|
|
|
2019-08-30 13:44:02 +08:00
|
|
|
MachineInstrBuilder FCMP =
|
2020-01-05 06:06:47 +08:00
|
|
|
MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
|
2019-08-30 13:44:02 +08:00
|
|
|
MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
|
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
2020-01-05 06:09:48 +08:00
|
|
|
LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {
|
|
|
|
Register Dst = MI.getOperand(0).getReg();
|
|
|
|
Register Src = MI.getOperand(1).getReg();
|
|
|
|
LLT DstTy = MRI.getType(Dst);
|
|
|
|
LLT SrcTy = MRI.getType(Src);
|
|
|
|
const LLT S64 = LLT::scalar(64);
|
|
|
|
const LLT S32 = LLT::scalar(32);
|
|
|
|
|
|
|
|
// FIXME: Only f32 to i64 conversions are supported.
|
|
|
|
if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
// Expand f32 -> i64 conversion
|
|
|
|
// This algorithm comes from compiler-rt's implementation of fixsfdi:
|
|
|
|
// https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/builtins/fixsfdi.c
|
|
|
|
|
|
|
|
unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
|
|
|
|
|
|
|
|
auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
|
|
|
|
auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
|
|
|
|
|
|
|
|
auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
|
|
|
|
auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
|
|
|
|
|
|
|
|
auto SignMask = MIRBuilder.buildConstant(SrcTy,
|
|
|
|
APInt::getSignMask(SrcEltBits));
|
|
|
|
auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
|
|
|
|
auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
|
|
|
|
auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
|
|
|
|
Sign = MIRBuilder.buildSExt(DstTy, Sign);
|
|
|
|
|
|
|
|
auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
|
|
|
|
auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
|
|
|
|
auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
|
|
|
|
|
|
|
|
auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
|
|
|
|
R = MIRBuilder.buildZExt(DstTy, R);
|
|
|
|
|
|
|
|
auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
|
|
|
|
auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
|
|
|
|
auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
|
|
|
|
auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
|
|
|
|
|
|
|
|
auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
|
|
|
|
auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
|
|
|
|
|
|
|
|
const LLT S1 = LLT::scalar(1);
|
|
|
|
auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,
|
|
|
|
S1, Exponent, ExponentLoBit);
|
|
|
|
|
|
|
|
R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
|
|
|
|
|
|
|
|
auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
|
|
|
|
auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
|
|
|
|
|
|
|
|
auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
|
|
|
|
|
|
|
|
auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
|
|
|
|
S1, Exponent, ZeroSrcTy);
|
|
|
|
|
|
|
|
auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
|
|
|
|
MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
|
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
2019-07-02 01:18:03 +08:00
|
|
|
static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
|
|
|
|
switch (Opc) {
|
|
|
|
case TargetOpcode::G_SMIN:
|
|
|
|
return CmpInst::ICMP_SLT;
|
|
|
|
case TargetOpcode::G_SMAX:
|
|
|
|
return CmpInst::ICMP_SGT;
|
|
|
|
case TargetOpcode::G_UMIN:
|
|
|
|
return CmpInst::ICMP_ULT;
|
|
|
|
case TargetOpcode::G_UMAX:
|
|
|
|
return CmpInst::ICMP_UGT;
|
|
|
|
default:
|
|
|
|
llvm_unreachable("not in integer min/max");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
LegalizerHelper::lowerMinMax(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
|
|
|
|
Register Dst = MI.getOperand(0).getReg();
|
|
|
|
Register Src0 = MI.getOperand(1).getReg();
|
|
|
|
Register Src1 = MI.getOperand(2).getReg();
|
|
|
|
|
|
|
|
const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
|
|
|
|
LLT CmpType = MRI.getType(Dst).changeElementSize(1);
|
|
|
|
|
|
|
|
auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
|
|
|
|
MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
|
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
2019-07-10 07:34:29 +08:00
|
|
|
|
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
LegalizerHelper::lowerFCopySign(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
|
|
|
|
Register Dst = MI.getOperand(0).getReg();
|
|
|
|
Register Src0 = MI.getOperand(1).getReg();
|
|
|
|
Register Src1 = MI.getOperand(2).getReg();
|
|
|
|
|
|
|
|
const LLT Src0Ty = MRI.getType(Src0);
|
|
|
|
const LLT Src1Ty = MRI.getType(Src1);
|
|
|
|
|
|
|
|
const int Src0Size = Src0Ty.getScalarSizeInBits();
|
|
|
|
const int Src1Size = Src1Ty.getScalarSizeInBits();
|
|
|
|
|
|
|
|
auto SignBitMask = MIRBuilder.buildConstant(
|
|
|
|
Src0Ty, APInt::getSignMask(Src0Size));
|
|
|
|
|
|
|
|
auto NotSignBitMask = MIRBuilder.buildConstant(
|
|
|
|
Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
|
|
|
|
|
|
|
|
auto And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask);
|
|
|
|
MachineInstr *Or;
|
|
|
|
|
|
|
|
if (Src0Ty == Src1Ty) {
|
|
|
|
auto And1 = MIRBuilder.buildAnd(Src1Ty, Src0, SignBitMask);
|
|
|
|
Or = MIRBuilder.buildOr(Dst, And0, And1);
|
|
|
|
} else if (Src0Size > Src1Size) {
|
|
|
|
auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
|
|
|
|
auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
|
|
|
|
auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
|
|
|
|
auto And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask);
|
|
|
|
Or = MIRBuilder.buildOr(Dst, And0, And1);
|
|
|
|
} else {
|
|
|
|
auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
|
|
|
|
auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
|
|
|
|
auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
|
|
|
|
auto And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask);
|
|
|
|
Or = MIRBuilder.buildOr(Dst, And0, And1);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Be careful about setting nsz/nnan/ninf on every instruction, since the
|
|
|
|
// constants are a nan and -0.0, but the final result should preserve
|
|
|
|
// everything.
|
|
|
|
if (unsigned Flags = MI.getFlags())
|
|
|
|
Or->setFlags(Flags);
|
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
2019-07-11 00:31:19 +08:00
|
|
|
|
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
|
|
|
|
unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
|
|
|
|
TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
|
|
|
|
|
|
|
|
Register Dst = MI.getOperand(0).getReg();
|
|
|
|
Register Src0 = MI.getOperand(1).getReg();
|
|
|
|
Register Src1 = MI.getOperand(2).getReg();
|
|
|
|
LLT Ty = MRI.getType(Dst);
|
|
|
|
|
|
|
|
if (!MI.getFlag(MachineInstr::FmNoNans)) {
|
|
|
|
// Insert canonicalizes if it's possible we need to quiet to get correct
|
|
|
|
// sNaN behavior.
|
|
|
|
|
|
|
|
// Note this must be done here, and not as an optimization combine in the
|
|
|
|
// absence of a dedicate quiet-snan instruction as we're using an
|
|
|
|
// omni-purpose G_FCANONICALIZE.
|
|
|
|
if (!isKnownNeverSNaN(Src0, MRI))
|
|
|
|
Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
|
|
|
|
|
|
|
|
if (!isKnownNeverSNaN(Src1, MRI))
|
|
|
|
Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
// If there are no nans, it's safe to simply replace this with the non-IEEE
|
|
|
|
// version.
|
|
|
|
MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
2019-08-02 03:10:05 +08:00
|
|
|
|
2019-09-13 08:44:35 +08:00
|
|
|
LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) {
|
|
|
|
// Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
|
|
|
|
Register DstReg = MI.getOperand(0).getReg();
|
|
|
|
LLT Ty = MRI.getType(DstReg);
|
|
|
|
unsigned Flags = MI.getFlags();
|
|
|
|
|
|
|
|
auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
|
|
|
|
Flags);
|
|
|
|
MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
2019-12-25 03:49:31 +08:00
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) {
|
|
|
|
Register DstReg = MI.getOperand(0).getReg();
|
|
|
|
Register SrcReg = MI.getOperand(1).getReg();
|
|
|
|
unsigned Flags = MI.getFlags();
|
|
|
|
LLT Ty = MRI.getType(DstReg);
|
|
|
|
const LLT CondTy = Ty.changeElementSize(1);
|
|
|
|
|
|
|
|
// result = trunc(src);
|
|
|
|
// if (src < 0.0 && src != result)
|
|
|
|
// result += -1.0.
|
|
|
|
|
|
|
|
auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
|
|
|
|
auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
|
|
|
|
|
|
|
|
auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
|
|
|
|
SrcReg, Zero, Flags);
|
|
|
|
auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
|
|
|
|
SrcReg, Trunc, Flags);
|
|
|
|
auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
|
|
|
|
auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
|
|
|
|
|
|
|
|
MIRBuilder.buildFAdd(DstReg, Trunc, AddVal);
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
2019-08-02 03:10:05 +08:00
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
|
|
|
|
const unsigned NumDst = MI.getNumOperands() - 1;
|
|
|
|
const Register SrcReg = MI.getOperand(NumDst).getReg();
|
|
|
|
LLT SrcTy = MRI.getType(SrcReg);
|
|
|
|
|
|
|
|
Register Dst0Reg = MI.getOperand(0).getReg();
|
|
|
|
LLT DstTy = MRI.getType(Dst0Reg);
|
|
|
|
|
|
|
|
|
|
|
|
// Expand scalarizing unmerge as bitcast to integer and shift.
|
|
|
|
if (!DstTy.isVector() && SrcTy.isVector() &&
|
|
|
|
SrcTy.getElementType() == DstTy) {
|
|
|
|
LLT IntTy = LLT::scalar(SrcTy.getSizeInBits());
|
|
|
|
Register Cast = MIRBuilder.buildBitcast(IntTy, SrcReg).getReg(0);
|
|
|
|
|
|
|
|
MIRBuilder.buildTrunc(Dst0Reg, Cast);
|
|
|
|
|
|
|
|
const unsigned DstSize = DstTy.getSizeInBits();
|
|
|
|
unsigned Offset = DstSize;
|
|
|
|
for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
|
|
|
|
auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
|
|
|
|
auto Shift = MIRBuilder.buildLShr(IntTy, Cast, ShiftAmt);
|
|
|
|
MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
|
|
|
|
}
|
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
|
|
|
return UnableToLegalize;
|
|
|
|
}
|
2019-08-14 00:09:07 +08:00
|
|
|
|
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
|
|
|
|
Register DstReg = MI.getOperand(0).getReg();
|
|
|
|
Register Src0Reg = MI.getOperand(1).getReg();
|
|
|
|
Register Src1Reg = MI.getOperand(2).getReg();
|
2019-08-14 05:49:11 +08:00
|
|
|
LLT Src0Ty = MRI.getType(Src0Reg);
|
2019-08-14 00:09:07 +08:00
|
|
|
LLT DstTy = MRI.getType(DstReg);
|
|
|
|
LLT IdxTy = LLT::scalar(32);
|
|
|
|
|
2020-01-14 07:32:45 +08:00
|
|
|
ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
|
2019-08-14 00:09:07 +08:00
|
|
|
|
2019-08-17 02:06:53 +08:00
|
|
|
if (DstTy.isScalar()) {
|
|
|
|
if (Src0Ty.isVector())
|
|
|
|
return UnableToLegalize;
|
|
|
|
|
|
|
|
// This is just a SELECT.
|
|
|
|
assert(Mask.size() == 1 && "Expected a single mask element");
|
|
|
|
Register Val;
|
|
|
|
if (Mask[0] < 0 || Mask[0] > 1)
|
|
|
|
Val = MIRBuilder.buildUndef(DstTy).getReg(0);
|
|
|
|
else
|
|
|
|
Val = Mask[0] == 0 ? Src0Reg : Src1Reg;
|
|
|
|
MIRBuilder.buildCopy(DstReg, Val);
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
2019-08-14 00:09:07 +08:00
|
|
|
Register Undef;
|
|
|
|
SmallVector<Register, 32> BuildVec;
|
2019-08-17 02:06:53 +08:00
|
|
|
LLT EltTy = DstTy.getElementType();
|
2019-08-14 00:09:07 +08:00
|
|
|
|
|
|
|
for (int Idx : Mask) {
|
|
|
|
if (Idx < 0) {
|
|
|
|
if (!Undef.isValid())
|
|
|
|
Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
|
|
|
|
BuildVec.push_back(Undef);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2019-08-14 05:49:11 +08:00
|
|
|
if (Src0Ty.isScalar()) {
|
|
|
|
BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
|
|
|
|
} else {
|
2019-08-14 09:23:33 +08:00
|
|
|
int NumElts = Src0Ty.getNumElements();
|
2019-08-14 05:49:11 +08:00
|
|
|
Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
|
|
|
|
int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
|
|
|
|
auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
|
|
|
|
auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
|
|
|
|
BuildVec.push_back(Extract.getReg(0));
|
|
|
|
}
|
2019-08-14 00:09:07 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
MIRBuilder.buildBuildVector(DstReg, BuildVec);
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
2019-08-28 03:54:27 +08:00
|
|
|
|
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
|
|
|
|
Register Dst = MI.getOperand(0).getReg();
|
|
|
|
Register AllocSize = MI.getOperand(1).getReg();
|
|
|
|
unsigned Align = MI.getOperand(2).getImm();
|
|
|
|
|
|
|
|
const auto &MF = *MI.getMF();
|
|
|
|
const auto &TLI = *MF.getSubtarget().getTargetLowering();
|
|
|
|
|
|
|
|
LLT PtrTy = MRI.getType(Dst);
|
|
|
|
LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
|
|
|
|
|
|
|
|
Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
|
|
|
|
auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
|
|
|
|
SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
|
|
|
|
|
|
|
|
// Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
|
|
|
|
// have to generate an extra instruction to negate the alloc and then use
|
[globalisel] Rename G_GEP to G_PTR_ADD
Summary:
G_GEP is rather poorly named. It's a simple pointer+scalar addition and
doesn't support any of the complexities of getelementptr. I therefore
propose that we rename it. There's a G_PTR_MASK so let's follow that
convention and go with G_PTR_ADD
Reviewers: volkan, aditya_nandakumar, bogner, rovka, arsenm
Subscribers: sdardis, jvesely, wdng, nhaehnle, hiraditya, jrtc27, atanasyan, arphaman, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D69734
2019-11-02 04:18:00 +08:00
|
|
|
// G_PTR_ADD to add the negative offset.
|
2019-08-28 03:54:27 +08:00
|
|
|
auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
|
|
|
|
if (Align) {
|
|
|
|
APInt AlignMask(IntPtrTy.getSizeInBits(), Align, true);
|
|
|
|
AlignMask.negate();
|
|
|
|
auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
|
|
|
|
Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
|
|
|
|
}
|
|
|
|
|
|
|
|
SPTmp = MIRBuilder.buildCast(PtrTy, Alloc);
|
|
|
|
MIRBuilder.buildCopy(SPReg, SPTmp);
|
|
|
|
MIRBuilder.buildCopy(Dst, SPTmp);
|
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
2019-10-06 09:37:35 +08:00
|
|
|
|
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
LegalizerHelper::lowerExtract(MachineInstr &MI) {
|
|
|
|
Register Dst = MI.getOperand(0).getReg();
|
|
|
|
Register Src = MI.getOperand(1).getReg();
|
|
|
|
unsigned Offset = MI.getOperand(2).getImm();
|
|
|
|
|
|
|
|
LLT DstTy = MRI.getType(Dst);
|
|
|
|
LLT SrcTy = MRI.getType(Src);
|
|
|
|
|
|
|
|
if (DstTy.isScalar() &&
|
|
|
|
(SrcTy.isScalar() ||
|
|
|
|
(SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
|
|
|
|
LLT SrcIntTy = SrcTy;
|
|
|
|
if (!SrcTy.isScalar()) {
|
|
|
|
SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
|
|
|
|
Src = MIRBuilder.buildBitcast(SrcIntTy, Src).getReg(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (Offset == 0)
|
|
|
|
MIRBuilder.buildTrunc(Dst, Src);
|
|
|
|
else {
|
|
|
|
auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
|
|
|
|
auto Shr = MIRBuilder.buildLShr(SrcIntTy, Src, ShiftAmt);
|
|
|
|
MIRBuilder.buildTrunc(Dst, Shr);
|
|
|
|
}
|
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
|
|
|
return UnableToLegalize;
|
|
|
|
}
|
2019-10-08 03:13:27 +08:00
|
|
|
|
|
|
|
LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) {
|
|
|
|
Register Dst = MI.getOperand(0).getReg();
|
|
|
|
Register Src = MI.getOperand(1).getReg();
|
|
|
|
Register InsertSrc = MI.getOperand(2).getReg();
|
|
|
|
uint64_t Offset = MI.getOperand(3).getImm();
|
|
|
|
|
|
|
|
LLT DstTy = MRI.getType(Src);
|
|
|
|
LLT InsertTy = MRI.getType(InsertSrc);
|
|
|
|
|
|
|
|
if (InsertTy.isScalar() &&
|
|
|
|
(DstTy.isScalar() ||
|
|
|
|
(DstTy.isVector() && DstTy.getElementType() == InsertTy))) {
|
|
|
|
LLT IntDstTy = DstTy;
|
|
|
|
if (!DstTy.isScalar()) {
|
|
|
|
IntDstTy = LLT::scalar(DstTy.getSizeInBits());
|
|
|
|
Src = MIRBuilder.buildBitcast(IntDstTy, Src).getReg(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
|
|
|
|
if (Offset != 0) {
|
|
|
|
auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
|
|
|
|
ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
|
|
|
|
}
|
|
|
|
|
2020-01-22 07:38:19 +08:00
|
|
|
APInt MaskVal = APInt::getBitsSetWithWrap(DstTy.getSizeInBits(),
|
|
|
|
Offset + InsertTy.getSizeInBits(),
|
|
|
|
Offset);
|
2019-10-08 03:13:27 +08:00
|
|
|
|
|
|
|
auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
|
|
|
|
auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
|
|
|
|
auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
|
|
|
|
|
|
|
|
MIRBuilder.buildBitcast(Dst, Or);
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
|
|
|
|
|
|
|
return UnableToLegalize;
|
|
|
|
}
|
2019-10-17 04:46:32 +08:00
|
|
|
|
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
|
|
|
|
Register Dst0 = MI.getOperand(0).getReg();
|
|
|
|
Register Dst1 = MI.getOperand(1).getReg();
|
|
|
|
Register LHS = MI.getOperand(2).getReg();
|
|
|
|
Register RHS = MI.getOperand(3).getReg();
|
|
|
|
const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
|
|
|
|
|
|
|
|
LLT Ty = MRI.getType(Dst0);
|
|
|
|
LLT BoolTy = MRI.getType(Dst1);
|
|
|
|
|
|
|
|
if (IsAdd)
|
|
|
|
MIRBuilder.buildAdd(Dst0, LHS, RHS);
|
|
|
|
else
|
|
|
|
MIRBuilder.buildSub(Dst0, LHS, RHS);
|
|
|
|
|
|
|
|
// TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
|
|
|
|
|
|
|
|
auto Zero = MIRBuilder.buildConstant(Ty, 0);
|
|
|
|
|
|
|
|
// For an addition, the result should be less than one of the operands (LHS)
|
|
|
|
// if and only if the other operand (RHS) is negative, otherwise there will
|
|
|
|
// be overflow.
|
|
|
|
// For a subtraction, the result should be less than one of the operands
|
|
|
|
// (LHS) if and only if the other operand (RHS) is (non-zero) positive,
|
|
|
|
// otherwise there will be overflow.
|
|
|
|
auto ResultLowerThanLHS =
|
|
|
|
MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, Dst0, LHS);
|
|
|
|
auto ConditionRHS = MIRBuilder.buildICmp(
|
|
|
|
IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
|
|
|
|
|
|
|
|
MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
2019-12-30 18:13:22 +08:00
|
|
|
|
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
LegalizerHelper::lowerBswap(MachineInstr &MI) {
|
|
|
|
Register Dst = MI.getOperand(0).getReg();
|
|
|
|
Register Src = MI.getOperand(1).getReg();
|
|
|
|
const LLT Ty = MRI.getType(Src);
|
|
|
|
unsigned SizeInBytes = Ty.getSizeInBytes();
|
|
|
|
unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
|
|
|
|
|
|
|
|
// Swap most and least significant byte, set remaining bytes in Res to zero.
|
|
|
|
auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
|
|
|
|
auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
|
|
|
|
auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
|
|
|
|
auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
|
|
|
|
|
|
|
|
// Set i-th high/low byte in Res to i-th low/high byte from Src.
|
|
|
|
for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
|
|
|
|
// AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
|
|
|
|
APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
|
|
|
|
auto Mask = MIRBuilder.buildConstant(Ty, APMask);
|
|
|
|
auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
|
|
|
|
// Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
|
|
|
|
auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
|
|
|
|
auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
|
|
|
|
Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
|
|
|
|
// High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
|
|
|
|
auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
|
|
|
|
auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
|
|
|
|
Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
|
|
|
|
}
|
|
|
|
Res.getInstr()->getOperand(0).setReg(Dst);
|
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
2019-12-31 01:06:29 +08:00
|
|
|
|
|
|
|
//{ (Src & Mask) >> N } | { (Src << N) & Mask }
|
|
|
|
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B,
|
|
|
|
MachineInstrBuilder Src, APInt Mask) {
|
|
|
|
const LLT Ty = Dst.getLLTTy(*B.getMRI());
|
|
|
|
MachineInstrBuilder C_N = B.buildConstant(Ty, N);
|
|
|
|
MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
|
|
|
|
auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
|
|
|
|
auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
|
|
|
|
return B.buildOr(Dst, LHS, RHS);
|
|
|
|
}
|
|
|
|
|
|
|
|
LegalizerHelper::LegalizeResult
|
|
|
|
LegalizerHelper::lowerBitreverse(MachineInstr &MI) {
|
|
|
|
Register Dst = MI.getOperand(0).getReg();
|
|
|
|
Register Src = MI.getOperand(1).getReg();
|
|
|
|
const LLT Ty = MRI.getType(Src);
|
|
|
|
unsigned Size = Ty.getSizeInBits();
|
|
|
|
|
|
|
|
MachineInstrBuilder BSWAP =
|
|
|
|
MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
|
|
|
|
|
|
|
|
// swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
|
|
|
|
// [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
|
|
|
|
// -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
|
|
|
|
MachineInstrBuilder Swap4 =
|
|
|
|
SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
|
|
|
|
|
|
|
|
// swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
|
|
|
|
// [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
|
|
|
|
// -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
|
|
|
|
MachineInstrBuilder Swap2 =
|
|
|
|
SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
|
|
|
|
|
|
|
|
// swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5 6|7
|
|
|
|
// [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
|
|
|
|
// -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
|
|
|
|
SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
|
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|
2019-12-28 08:26:51 +08:00
|
|
|
|
|
|
|
LegalizerHelper::LegalizeResult
|
2020-01-13 02:29:44 +08:00
|
|
|
LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) {
|
2019-12-28 08:26:51 +08:00
|
|
|
MachineFunction &MF = MIRBuilder.getMF();
|
|
|
|
const TargetSubtargetInfo &STI = MF.getSubtarget();
|
|
|
|
const TargetLowering *TLI = STI.getTargetLowering();
|
2020-01-13 02:29:44 +08:00
|
|
|
|
|
|
|
bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
|
|
|
|
int NameOpIdx = IsRead ? 1 : 0;
|
|
|
|
int ValRegIndex = IsRead ? 0 : 1;
|
|
|
|
|
|
|
|
Register ValReg = MI.getOperand(ValRegIndex).getReg();
|
|
|
|
const LLT Ty = MRI.getType(ValReg);
|
|
|
|
const MDString *RegStr = cast<MDString>(
|
|
|
|
cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
|
|
|
|
|
|
|
|
Register PhysReg = TLI->getRegisterByName(RegStr->getString().data(), Ty, MF);
|
|
|
|
if (!PhysReg.isValid())
|
2019-12-28 08:26:51 +08:00
|
|
|
return UnableToLegalize;
|
|
|
|
|
2020-01-13 02:29:44 +08:00
|
|
|
if (IsRead)
|
|
|
|
MIRBuilder.buildCopy(ValReg, PhysReg);
|
|
|
|
else
|
|
|
|
MIRBuilder.buildCopy(PhysReg, ValReg);
|
|
|
|
|
2019-12-28 08:26:51 +08:00
|
|
|
MI.eraseFromParent();
|
|
|
|
return Legalized;
|
|
|
|
}
|