llvm-project/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp

476 lines
18 KiB
C++

//===- MVEGatherScatterLowering.cpp - Gather/Scatter lowering -------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// This pass custom lowers llvm.gather and llvm.scatter instructions to
/// arm.mve.gather and arm.mve.scatter intrinsics, optimising the code to
/// produce a better final result as we go.
//
//===----------------------------------------------------------------------===//
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMSubtarget.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsARM.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include <algorithm>
#include <cassert>
using namespace llvm;
#define DEBUG_TYPE "mve-gather-scatter-lowering"
cl::opt<bool> EnableMaskedGatherScatters(
"enable-arm-maskedgatscat", cl::Hidden, cl::init(false),
cl::desc("Enable the generation of masked gathers and scatters"));
namespace {
class MVEGatherScatterLowering : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid
explicit MVEGatherScatterLowering() : FunctionPass(ID) {
initializeMVEGatherScatterLoweringPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override;
StringRef getPassName() const override {
return "MVE gather/scatter lowering";
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<TargetPassConfig>();
FunctionPass::getAnalysisUsage(AU);
}
private:
// Check this is a valid gather with correct alignment
bool isLegalTypeAndAlignment(unsigned NumElements, unsigned ElemSize,
unsigned Alignment);
// Check whether Ptr is hidden behind a bitcast and look through it
void lookThroughBitcast(Value *&Ptr);
// Check for a getelementptr and deduce base and offsets from it, on success
// returning the base directly and the offsets indirectly using the Offsets
// argument
Value *checkGEP(Value *&Offsets, Type *Ty, Value *Ptr, IRBuilder<> &Builder);
// Compute the scale of this gather/scatter instruction
int computeScale(unsigned GEPElemSize, unsigned MemoryElemSize);
bool lowerGather(IntrinsicInst *I);
// Create a gather from a base + vector of offsets
Value *tryCreateMaskedGatherOffset(IntrinsicInst *I, Value *Ptr,
Instruction *&Root, IRBuilder<> &Builder);
// Create a gather from a vector of pointers
Value *tryCreateMaskedGatherBase(IntrinsicInst *I, Value *Ptr,
IRBuilder<> &Builder);
bool lowerScatter(IntrinsicInst *I);
// Create a scatter to a base + vector of offsets
Value *tryCreateMaskedScatterOffset(IntrinsicInst *I, Value *Ptr,
IRBuilder<> &Builder);
// Create a scatter to a vector of pointers
Value *tryCreateMaskedScatterBase(IntrinsicInst *I, Value *Ptr,
IRBuilder<> &Builder);
};
} // end anonymous namespace
char MVEGatherScatterLowering::ID = 0;
INITIALIZE_PASS(MVEGatherScatterLowering, DEBUG_TYPE,
"MVE gather/scattering lowering pass", false, false)
Pass *llvm::createMVEGatherScatterLoweringPass() {
return new MVEGatherScatterLowering();
}
bool MVEGatherScatterLowering::isLegalTypeAndAlignment(unsigned NumElements,
unsigned ElemSize,
unsigned Alignment) {
if (((NumElements == 4 &&
(ElemSize == 32 || ElemSize == 16 || ElemSize == 8)) ||
(NumElements == 8 && (ElemSize == 16 || ElemSize == 8)) ||
(NumElements == 16 && ElemSize == 8)) &&
ElemSize / 8 <= Alignment)
return true;
LLVM_DEBUG(dbgs() << "masked gathers/scatters: instruction does not have "
<< "valid alignment or vector type \n");
return false;
}
Value *MVEGatherScatterLowering::checkGEP(Value *&Offsets, Type *Ty, Value *Ptr,
IRBuilder<> &Builder) {
GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
if (!GEP) {
LLVM_DEBUG(
dbgs() << "masked gathers/scatters: no getelementpointer found\n");
return nullptr;
}
LLVM_DEBUG(dbgs() << "masked gathers/scatters: getelementpointer found."
<< " Looking at intrinsic for base + vector of offsets\n");
Value *GEPPtr = GEP->getPointerOperand();
if (GEPPtr->getType()->isVectorTy()) {
return nullptr;
}
if (GEP->getNumOperands() != 2) {
LLVM_DEBUG(dbgs() << "masked gathers/scatters: getelementptr with too many"
<< " operands. Expanding.\n");
return nullptr;
}
Offsets = GEP->getOperand(1);
// Paranoid check whether the number of parallel lanes is the same
assert(Ty->getVectorNumElements() ==
Offsets->getType()->getVectorNumElements());
// Only <N x i32> offsets can be integrated into an arm gather, any smaller
// type would have to be sign extended by the gep - and arm gathers can only
// zero extend. Additionally, the offsets do have to originate from a zext of
// a vector with element types smaller or equal the type of the gather we're
// looking at
if (Offsets->getType()->getScalarSizeInBits() != 32)
return nullptr;
if (ZExtInst *ZextOffs = dyn_cast<ZExtInst>(Offsets))
Offsets = ZextOffs->getOperand(0);
else if (!(Offsets->getType()->getVectorNumElements() == 4 &&
Offsets->getType()->getScalarSizeInBits() == 32))
return nullptr;
if (Ty != Offsets->getType()) {
if ((Ty->getScalarSizeInBits() <
Offsets->getType()->getScalarSizeInBits())) {
LLVM_DEBUG(dbgs() << "masked gathers/scatters: no correct offset type."
<< " Can't create intrinsic.\n");
return nullptr;
} else {
Offsets = Builder.CreateZExt(
Offsets, VectorType::getInteger(cast<VectorType>(Ty)));
}
}
// If none of the checks failed, return the gep's base pointer
LLVM_DEBUG(dbgs() << "masked gathers/scatters: found correct offsets\n");
return GEPPtr;
}
void MVEGatherScatterLowering::lookThroughBitcast(Value *&Ptr) {
// Look through bitcast instruction if #elements is the same
if (auto *BitCast = dyn_cast<BitCastInst>(Ptr)) {
Type *BCTy = BitCast->getType();
Type *BCSrcTy = BitCast->getOperand(0)->getType();
if (BCTy->getVectorNumElements() == BCSrcTy->getVectorNumElements()) {
LLVM_DEBUG(
dbgs() << "masked gathers/scatters: looking through bitcast\n");
Ptr = BitCast->getOperand(0);
}
}
}
int MVEGatherScatterLowering::computeScale(unsigned GEPElemSize,
unsigned MemoryElemSize) {
// This can be a 32bit load/store scaled by 4, a 16bit load/store scaled by 2,
// or a 8bit, 16bit or 32bit load/store scaled by 1
if (GEPElemSize == 32 && MemoryElemSize == 32)
return 2;
else if (GEPElemSize == 16 && MemoryElemSize == 16)
return 1;
else if (GEPElemSize == 8)
return 0;
LLVM_DEBUG(dbgs() << "masked gathers/scatters: incorrect scale. Can't "
<< "create intrinsic\n");
return -1;
}
bool MVEGatherScatterLowering::lowerGather(IntrinsicInst *I) {
using namespace PatternMatch;
LLVM_DEBUG(dbgs() << "masked gathers: checking transform preconditions\n");
// @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0)
// Attempt to turn the masked gather in I into a MVE intrinsic
// Potentially optimising the addressing modes as we do so.
Type *Ty = I->getType();
Value *Ptr = I->getArgOperand(0);
unsigned Alignment = cast<ConstantInt>(I->getArgOperand(1))->getZExtValue();
Value *Mask = I->getArgOperand(2);
Value *PassThru = I->getArgOperand(3);
if (!isLegalTypeAndAlignment(Ty->getVectorNumElements(),
Ty->getScalarSizeInBits(), Alignment))
return false;
lookThroughBitcast(Ptr);
assert(Ptr->getType()->isVectorTy() && "Unexpected pointer type");
IRBuilder<> Builder(I->getContext());
Builder.SetInsertPoint(I);
Builder.SetCurrentDebugLocation(I->getDebugLoc());
Instruction *Root = I;
Value *Load = tryCreateMaskedGatherOffset(I, Ptr, Root, Builder);
if (!Load)
Load = tryCreateMaskedGatherBase(I, Ptr, Builder);
if (!Load)
return false;
if (!isa<UndefValue>(PassThru) && !match(PassThru, m_Zero())) {
LLVM_DEBUG(dbgs() << "masked gathers: found non-trivial passthru - "
<< "creating select\n");
Load = Builder.CreateSelect(Mask, Load, PassThru);
}
Root->replaceAllUsesWith(Load);
Root->eraseFromParent();
if (Root != I)
// If this was an extending gather, we need to get rid of the sext/zext
// sext/zext as well as of the gather itself
I->eraseFromParent();
LLVM_DEBUG(dbgs() << "masked gathers: successfully built masked gather\n");
return true;
}
Value *MVEGatherScatterLowering::tryCreateMaskedGatherBase(
IntrinsicInst *I, Value *Ptr, IRBuilder<> &Builder) {
using namespace PatternMatch;
Type *Ty = I->getType();
LLVM_DEBUG(dbgs() << "masked gathers: loading from vector of pointers\n");
if (Ty->getVectorNumElements() != 4 || Ty->getScalarSizeInBits() != 32)
// Can't build an intrinsic for this
return nullptr;
Value *Mask = I->getArgOperand(2);
if (match(Mask, m_One()))
return Builder.CreateIntrinsic(Intrinsic::arm_mve_vldr_gather_base,
{Ty, Ptr->getType()},
{Ptr, Builder.getInt32(0)});
else
return Builder.CreateIntrinsic(
Intrinsic::arm_mve_vldr_gather_base_predicated,
{Ty, Ptr->getType(), Mask->getType()},
{Ptr, Builder.getInt32(0), Mask});
}
Value *MVEGatherScatterLowering::tryCreateMaskedGatherOffset(
IntrinsicInst *I, Value *Ptr, Instruction *&Root, IRBuilder<> &Builder) {
using namespace PatternMatch;
Type *OriginalTy = I->getType();
Type *ResultTy = OriginalTy;
unsigned Unsigned = 1;
// The size of the gather was already checked in isLegalTypeAndAlignment;
// if it was not a full vector width an appropriate extend should follow.
auto *Extend = Root;
if (OriginalTy->getPrimitiveSizeInBits() < 128) {
// Only transform gathers with exactly one use
if (!I->hasOneUse())
return nullptr;
// The correct root to replace is the not the CallInst itself, but the
// instruction which extends it
Extend = cast<Instruction>(*I->users().begin());
if (isa<SExtInst>(Extend)) {
Unsigned = 0;
} else if (!isa<ZExtInst>(Extend)) {
LLVM_DEBUG(dbgs() << "masked gathers: extend needed but not provided. "
<< "Expanding\n");
return nullptr;
}
LLVM_DEBUG(dbgs() << "masked gathers: found an extending gather\n");
ResultTy = Extend->getType();
// The final size of the gather must be a full vector width
if (ResultTy->getPrimitiveSizeInBits() != 128) {
LLVM_DEBUG(dbgs() << "masked gathers: extending from the wrong type. "
<< "Expanding\n");
return nullptr;
}
}
Value *Offsets;
Value *BasePtr = checkGEP(Offsets, ResultTy, Ptr, Builder);
if (!BasePtr)
return nullptr;
int Scale = computeScale(
BasePtr->getType()->getPointerElementType()->getPrimitiveSizeInBits(),
OriginalTy->getScalarSizeInBits());
if (Scale == -1)
return nullptr;
Root = Extend;
Value *Mask = I->getArgOperand(2);
if (!match(Mask, m_One()))
return Builder.CreateIntrinsic(
Intrinsic::arm_mve_vldr_gather_offset_predicated,
{ResultTy, BasePtr->getType(), Offsets->getType(), Mask->getType()},
{BasePtr, Offsets, Builder.getInt32(OriginalTy->getScalarSizeInBits()),
Builder.getInt32(Scale), Builder.getInt32(Unsigned), Mask});
else
return Builder.CreateIntrinsic(
Intrinsic::arm_mve_vldr_gather_offset,
{ResultTy, BasePtr->getType(), Offsets->getType()},
{BasePtr, Offsets, Builder.getInt32(OriginalTy->getScalarSizeInBits()),
Builder.getInt32(Scale), Builder.getInt32(Unsigned)});
}
bool MVEGatherScatterLowering::lowerScatter(IntrinsicInst *I) {
using namespace PatternMatch;
LLVM_DEBUG(dbgs() << "masked scatters: checking transform preconditions\n");
// @llvm.masked.scatter.*(data, ptrs, alignment, mask)
// Attempt to turn the masked scatter in I into a MVE intrinsic
// Potentially optimising the addressing modes as we do so.
Value *Input = I->getArgOperand(0);
Value *Ptr = I->getArgOperand(1);
unsigned Alignment = cast<ConstantInt>(I->getArgOperand(2))->getZExtValue();
Type *Ty = Input->getType();
if (!isLegalTypeAndAlignment(Ty->getVectorNumElements(),
Ty->getScalarSizeInBits(), Alignment))
return false;
lookThroughBitcast(Ptr);
assert(Ptr->getType()->isVectorTy() && "Unexpected pointer type");
IRBuilder<> Builder(I->getContext());
Builder.SetInsertPoint(I);
Builder.SetCurrentDebugLocation(I->getDebugLoc());
Value *Store = tryCreateMaskedScatterOffset(I, Ptr, Builder);
if (!Store)
Store = tryCreateMaskedScatterBase(I, Ptr, Builder);
if (!Store)
return false;
LLVM_DEBUG(dbgs() << "masked scatters: successfully built masked scatter\n");
I->replaceAllUsesWith(Store);
I->eraseFromParent();
return true;
}
Value *MVEGatherScatterLowering::tryCreateMaskedScatterBase(
IntrinsicInst *I, Value *Ptr, IRBuilder<> &Builder) {
using namespace PatternMatch;
Value *Input = I->getArgOperand(0);
Value *Mask = I->getArgOperand(3);
Type *Ty = Input->getType();
// Only QR variants allow truncating
if (!(Ty->getVectorNumElements() == 4 && Ty->getScalarSizeInBits() == 32)) {
// Can't build an intrinsic for this
return nullptr;
}
// int_arm_mve_vstr_scatter_base(_predicated) addr, offset, data(, mask)
LLVM_DEBUG(dbgs() << "masked scatters: storing to a vector of pointers\n");
if (match(Mask, m_One()))
return Builder.CreateIntrinsic(Intrinsic::arm_mve_vstr_scatter_base,
{Ptr->getType(), Input->getType()},
{Ptr, Builder.getInt32(0), Input});
else
return Builder.CreateIntrinsic(
Intrinsic::arm_mve_vstr_scatter_base_predicated,
{Ptr->getType(), Input->getType(), Mask->getType()},
{Ptr, Builder.getInt32(0), Input, Mask});
}
Value *MVEGatherScatterLowering::tryCreateMaskedScatterOffset(
IntrinsicInst *I, Value *Ptr, IRBuilder<> &Builder) {
using namespace PatternMatch;
Value *Input = I->getArgOperand(0);
Value *Mask = I->getArgOperand(3);
Type *InputTy = Input->getType();
Type *MemoryTy = InputTy;
LLVM_DEBUG(dbgs() << "masked scatters: getelementpointer found. Storing"
<< " to base + vector of offsets\n");
// If the input has been truncated, try to integrate that trunc into the
// scatter instruction (we don't care about alignment here)
if (TruncInst *Trunc = dyn_cast<TruncInst>(Input)) {
Value *PreTrunc = Trunc->getOperand(0);
Type *PreTruncTy = PreTrunc->getType();
if (PreTruncTy->getPrimitiveSizeInBits() == 128) {
Input = PreTrunc;
InputTy = PreTruncTy;
}
}
if (InputTy->getPrimitiveSizeInBits() != 128) {
LLVM_DEBUG(
dbgs() << "masked scatters: cannot create scatters for non-standard"
<< " input types. Expanding.\n");
return nullptr;
}
Value *Offsets;
Value *BasePtr = checkGEP(Offsets, InputTy, Ptr, Builder);
if (!BasePtr)
return nullptr;
int Scale = computeScale(
BasePtr->getType()->getPointerElementType()->getPrimitiveSizeInBits(),
MemoryTy->getScalarSizeInBits());
if (Scale == -1)
return nullptr;
if (!match(Mask, m_One()))
return Builder.CreateIntrinsic(
Intrinsic::arm_mve_vstr_scatter_offset_predicated,
{BasePtr->getType(), Offsets->getType(), Input->getType(),
Mask->getType()},
{BasePtr, Offsets, Input,
Builder.getInt32(MemoryTy->getScalarSizeInBits()),
Builder.getInt32(Scale), Mask});
else
return Builder.CreateIntrinsic(
Intrinsic::arm_mve_vstr_scatter_offset,
{BasePtr->getType(), Offsets->getType(), Input->getType()},
{BasePtr, Offsets, Input,
Builder.getInt32(MemoryTy->getScalarSizeInBits()),
Builder.getInt32(Scale)});
}
bool MVEGatherScatterLowering::runOnFunction(Function &F) {
if (!EnableMaskedGatherScatters)
return false;
auto &TPC = getAnalysis<TargetPassConfig>();
auto &TM = TPC.getTM<TargetMachine>();
auto *ST = &TM.getSubtarget<ARMSubtarget>(F);
if (!ST->hasMVEIntegerOps())
return false;
SmallVector<IntrinsicInst *, 4> Gathers;
SmallVector<IntrinsicInst *, 4> Scatters;
for (BasicBlock &BB : F) {
for (Instruction &I : BB) {
IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I);
if (II && II->getIntrinsicID() == Intrinsic::masked_gather)
Gathers.push_back(II);
else if (II && II->getIntrinsicID() == Intrinsic::masked_scatter)
Scatters.push_back(II);
}
}
bool Changed = false;
for (IntrinsicInst *I : Gathers)
Changed |= lowerGather(I);
for (IntrinsicInst *I : Scatters)
Changed |= lowerScatter(I);
return Changed;
}