2010-01-05 15:32:13 +08:00
|
|
|
//===- InstCombineCalls.cpp -----------------------------------------------===//
|
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This file implements the visitCall and visitInvoke functions.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2015-01-22 13:25:13 +08:00
|
|
|
#include "InstCombineInternal.h"
|
2016-08-12 01:20:18 +08:00
|
|
|
#include "llvm/ADT/APFloat.h"
|
|
|
|
#include "llvm/ADT/APInt.h"
|
|
|
|
#include "llvm/ADT/ArrayRef.h"
|
|
|
|
#include "llvm/ADT/None.h"
|
2012-11-30 12:05:06 +08:00
|
|
|
#include "llvm/ADT/Statistic.h"
|
2016-08-12 01:20:18 +08:00
|
|
|
#include "llvm/ADT/STLExtras.h"
|
|
|
|
#include "llvm/ADT/SmallVector.h"
|
|
|
|
#include "llvm/ADT/Twine.h"
|
2015-05-22 11:56:46 +08:00
|
|
|
#include "llvm/Analysis/InstructionSimplify.h"
|
2010-01-05 15:32:13 +08:00
|
|
|
#include "llvm/Analysis/MemoryBuiltins.h"
|
2016-08-12 01:20:18 +08:00
|
|
|
#include "llvm/Analysis/ValueTracking.h"
|
|
|
|
#include "llvm/IR/BasicBlock.h"
|
2014-03-04 19:01:28 +08:00
|
|
|
#include "llvm/IR/CallSite.h"
|
2016-08-12 01:20:18 +08:00
|
|
|
#include "llvm/IR/Constant.h"
|
|
|
|
#include "llvm/IR/DataLayout.h"
|
|
|
|
#include "llvm/IR/DerivedTypes.h"
|
|
|
|
#include "llvm/IR/Function.h"
|
|
|
|
#include "llvm/IR/GlobalVariable.h"
|
|
|
|
#include "llvm/IR/InstrTypes.h"
|
|
|
|
#include "llvm/IR/Instruction.h"
|
|
|
|
#include "llvm/IR/Instructions.h"
|
|
|
|
#include "llvm/IR/IntrinsicInst.h"
|
|
|
|
#include "llvm/IR/Intrinsics.h"
|
|
|
|
#include "llvm/IR/LLVMContext.h"
|
|
|
|
#include "llvm/IR/Metadata.h"
|
2014-03-04 19:08:18 +08:00
|
|
|
#include "llvm/IR/PatternMatch.h"
|
[Statepoints 3/4] Statepoint infrastructure for garbage collection: SelectionDAGBuilder
This is the third patch in a small series. It contains the CodeGen support for lowering the gc.statepoint intrinsic sequences (223078) to the STATEPOINT pseudo machine instruction (223085). The change also includes the set of helper routines and classes for working with gc.statepoints, gc.relocates, and gc.results since the lowering code uses them.
With this change, gc.statepoints should be functionally complete. The documentation will follow in the fourth change, and there will likely be some cleanup changes, but interested parties can start experimenting now.
I'm not particularly happy with the amount of code or complexity involved with the lowering step, but at least it's fairly well isolated. The statepoint lowering code is split into it's own files and anyone not working on the statepoint support itself should be able to ignore it.
During the lowering process, we currently spill aggressively to stack. This is not entirely ideal (and we have plans to do better), but it's functional, relatively straight forward, and matches closely the implementations of the patchpoint intrinsics. Most of the complexity comes from trying to keep relocated copies of values in the same stack slots across statepoints. Doing so avoids the insertion of pointless load and store instructions to reshuffle the stack. The current implementation isn't as effective as I'd like, but it is functional and 'good enough' for many common use cases.
In the long term, I'd like to figure out how to integrate the statepoint lowering with the register allocator. In principal, we shouldn't need to eagerly spill at all. The register allocator should do any spilling required and the statepoint should simply record that fact. Depending on how challenging that turns out to be, we may invest in a smarter global stack slot assignment mechanism as a stop gap measure.
Reviewed by: atrick, ributzka
llvm-svn: 223137
2014-12-03 02:50:36 +08:00
|
|
|
#include "llvm/IR/Statepoint.h"
|
2016-08-12 01:20:18 +08:00
|
|
|
#include "llvm/IR/Type.h"
|
|
|
|
#include "llvm/IR/Value.h"
|
|
|
|
#include "llvm/IR/ValueHandle.h"
|
|
|
|
#include "llvm/Support/Casting.h"
|
|
|
|
#include "llvm/Support/Debug.h"
|
|
|
|
#include "llvm/Support/MathExtras.h"
|
2010-12-26 04:37:57 +08:00
|
|
|
#include "llvm/Transforms/Utils/Local.h"
|
2015-01-21 19:23:40 +08:00
|
|
|
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
|
2016-08-12 01:20:18 +08:00
|
|
|
#include <algorithm>
|
|
|
|
#include <cassert>
|
|
|
|
#include <cstdint>
|
|
|
|
#include <cstring>
|
|
|
|
#include <vector>
|
|
|
|
|
2010-01-05 15:32:13 +08:00
|
|
|
using namespace llvm;
|
2012-12-13 11:13:36 +08:00
|
|
|
using namespace PatternMatch;
|
2010-01-05 15:32:13 +08:00
|
|
|
|
2014-04-22 10:55:47 +08:00
|
|
|
#define DEBUG_TYPE "instcombine"
|
|
|
|
|
2012-11-30 12:05:06 +08:00
|
|
|
STATISTIC(NumSimplified, "Number of library calls simplified");
|
|
|
|
|
2017-02-08 23:21:48 +08:00
|
|
|
static cl::opt<unsigned> UnfoldElementAtomicMemcpyMaxElements(
|
2017-02-08 22:32:04 +08:00
|
|
|
"unfold-element-atomic-memcpy-max-elements",
|
|
|
|
cl::init(16),
|
|
|
|
cl::desc("Maximum number of elements in atomic memcpy the optimizer is "
|
|
|
|
"allowed to unfold"));
|
|
|
|
|
2016-01-21 06:24:38 +08:00
|
|
|
/// Return the specified type promoted as it would be to pass though a va_arg
|
|
|
|
/// area.
|
2011-07-18 12:54:35 +08:00
|
|
|
static Type *getPromotedType(Type *Ty) {
|
|
|
|
if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
|
2010-01-05 15:32:13 +08:00
|
|
|
if (ITy->getBitWidth() < 32)
|
|
|
|
return Type::getInt32Ty(Ty->getContext());
|
|
|
|
}
|
|
|
|
return Ty;
|
|
|
|
}
|
|
|
|
|
2016-02-22 01:29:33 +08:00
|
|
|
/// Return a constant boolean vector that has true elements in all positions
|
2016-02-22 01:33:31 +08:00
|
|
|
/// where the input constant data vector has an element with the sign bit set.
|
2016-02-22 01:29:33 +08:00
|
|
|
static Constant *getNegativeIsTrueBoolVec(ConstantDataVector *V) {
|
|
|
|
SmallVector<Constant *, 32> BoolVec;
|
|
|
|
IntegerType *BoolTy = Type::getInt1Ty(V->getContext());
|
|
|
|
for (unsigned I = 0, E = V->getNumElements(); I != E; ++I) {
|
|
|
|
Constant *Elt = V->getElementAsConstant(I);
|
|
|
|
assert((isa<ConstantInt>(Elt) || isa<ConstantFP>(Elt)) &&
|
|
|
|
"Unexpected constant data vector element type");
|
|
|
|
bool Sign = V->getElementType()->isIntegerTy()
|
|
|
|
? cast<ConstantInt>(Elt)->isNegative()
|
|
|
|
: cast<ConstantFP>(Elt)->isNegative();
|
|
|
|
BoolVec.push_back(ConstantInt::get(BoolTy, Sign));
|
|
|
|
}
|
|
|
|
return ConstantVector::get(BoolVec);
|
|
|
|
}
|
|
|
|
|
2017-02-08 22:32:04 +08:00
|
|
|
Instruction *
|
|
|
|
InstCombiner::SimplifyElementAtomicMemCpy(ElementAtomicMemCpyInst *AMI) {
|
|
|
|
// Try to unfold this intrinsic into sequence of explicit atomic loads and
|
|
|
|
// stores.
|
|
|
|
// First check that number of elements is compile time constant.
|
|
|
|
auto *NumElementsCI = dyn_cast<ConstantInt>(AMI->getNumElements());
|
|
|
|
if (!NumElementsCI)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// Check that there are not too many elements.
|
|
|
|
uint64_t NumElements = NumElementsCI->getZExtValue();
|
|
|
|
if (NumElements >= UnfoldElementAtomicMemcpyMaxElements)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// Don't unfold into illegal integers
|
|
|
|
uint64_t ElementSizeInBytes = AMI->getElementSizeInBytes() * 8;
|
|
|
|
if (!getDataLayout().isLegalInteger(ElementSizeInBytes))
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// Cast source and destination to the correct type. Intrinsic input arguments
|
|
|
|
// are usually represented as i8*.
|
|
|
|
// Often operands will be explicitly casted to i8* and we can just strip
|
|
|
|
// those casts instead of inserting new ones. However it's easier to rely on
|
|
|
|
// other InstCombine rules which will cover trivial cases anyway.
|
|
|
|
Value *Src = AMI->getRawSource();
|
|
|
|
Value *Dst = AMI->getRawDest();
|
|
|
|
Type *ElementPointerType = Type::getIntNPtrTy(
|
|
|
|
AMI->getContext(), ElementSizeInBytes, Src->getType()->getPointerAddressSpace());
|
|
|
|
|
|
|
|
Value *SrcCasted = Builder->CreatePointerCast(Src, ElementPointerType,
|
|
|
|
"memcpy_unfold.src_casted");
|
|
|
|
Value *DstCasted = Builder->CreatePointerCast(Dst, ElementPointerType,
|
|
|
|
"memcpy_unfold.dst_casted");
|
|
|
|
|
|
|
|
for (uint64_t i = 0; i < NumElements; ++i) {
|
|
|
|
// Get current element addresses
|
|
|
|
ConstantInt *ElementIdxCI =
|
|
|
|
ConstantInt::get(AMI->getContext(), APInt(64, i));
|
|
|
|
Value *SrcElementAddr =
|
|
|
|
Builder->CreateGEP(SrcCasted, ElementIdxCI, "memcpy_unfold.src_addr");
|
|
|
|
Value *DstElementAddr =
|
|
|
|
Builder->CreateGEP(DstCasted, ElementIdxCI, "memcpy_unfold.dst_addr");
|
|
|
|
|
|
|
|
// Load from the source. Transfer alignment information and mark load as
|
|
|
|
// unordered atomic.
|
|
|
|
LoadInst *Load = Builder->CreateLoad(SrcElementAddr, "memcpy_unfold.val");
|
|
|
|
Load->setOrdering(AtomicOrdering::Unordered);
|
|
|
|
// We know alignment of the first element. It is also guaranteed by the
|
|
|
|
// verifier that element size is less or equal than first element alignment
|
|
|
|
// and both of this values are powers of two.
|
|
|
|
// This means that all subsequent accesses are at least element size
|
|
|
|
// aligned.
|
|
|
|
// TODO: We can infer better alignment but there is no evidence that this
|
|
|
|
// will matter.
|
|
|
|
Load->setAlignment(i == 0 ? AMI->getSrcAlignment()
|
|
|
|
: AMI->getElementSizeInBytes());
|
|
|
|
Load->setDebugLoc(AMI->getDebugLoc());
|
|
|
|
|
|
|
|
// Store loaded value via unordered atomic store.
|
|
|
|
StoreInst *Store = Builder->CreateStore(Load, DstElementAddr);
|
|
|
|
Store->setOrdering(AtomicOrdering::Unordered);
|
|
|
|
Store->setAlignment(i == 0 ? AMI->getDstAlignment()
|
|
|
|
: AMI->getElementSizeInBytes());
|
|
|
|
Store->setDebugLoc(AMI->getDebugLoc());
|
|
|
|
}
|
|
|
|
|
|
|
|
// Set the number of elements of the copy to 0, it will be deleted on the
|
|
|
|
// next iteration.
|
|
|
|
AMI->setNumElements(Constant::getNullValue(NumElementsCI->getType()));
|
|
|
|
return AMI;
|
|
|
|
}
|
|
|
|
|
2015-11-19 13:56:52 +08:00
|
|
|
Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
|
2016-12-19 16:22:17 +08:00
|
|
|
unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, MI, &AC, &DT);
|
|
|
|
unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, MI, &AC, &DT);
|
2015-11-19 13:56:52 +08:00
|
|
|
unsigned MinAlign = std::min(DstAlign, SrcAlign);
|
|
|
|
unsigned CopyAlign = MI->getAlignment();
|
2010-01-05 15:32:13 +08:00
|
|
|
|
2015-11-19 13:56:52 +08:00
|
|
|
if (CopyAlign < MinAlign) {
|
|
|
|
MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), MinAlign, false));
|
2010-01-05 15:32:13 +08:00
|
|
|
return MI;
|
|
|
|
}
|
2012-02-03 08:07:04 +08:00
|
|
|
|
2010-01-05 15:32:13 +08:00
|
|
|
// If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
|
|
|
|
// load/store.
|
2010-06-24 21:54:33 +08:00
|
|
|
ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getArgOperand(2));
|
2014-04-25 13:29:35 +08:00
|
|
|
if (!MemOpLength) return nullptr;
|
2012-02-03 08:07:04 +08:00
|
|
|
|
2010-01-05 15:32:13 +08:00
|
|
|
// Source and destination pointer types are always "i8*" for intrinsic. See
|
|
|
|
// if the size is something we can handle with a single primitive load/store.
|
|
|
|
// A single load+store correctly handles overlapping memory in the memmove
|
|
|
|
// case.
|
2012-08-15 11:49:59 +08:00
|
|
|
uint64_t Size = MemOpLength->getLimitedValue();
|
2014-01-25 01:20:08 +08:00
|
|
|
assert(Size && "0-sized memory transferring should be removed already.");
|
2012-02-03 08:07:04 +08:00
|
|
|
|
2010-01-05 15:32:13 +08:00
|
|
|
if (Size > 8 || (Size&(Size-1)))
|
2014-04-25 13:29:35 +08:00
|
|
|
return nullptr; // If not 1/2/4/8 bytes, exit.
|
2012-02-03 08:07:04 +08:00
|
|
|
|
2010-01-05 15:32:13 +08:00
|
|
|
// Use an integer load+store unless we can find something better.
|
Reapply address space patch after fixing an issue in MemCopyOptimizer.
Added support for address spaces and added a isVolatile field to memcpy, memmove, and memset,
e.g., llvm.memcpy.i32(i8*, i8*, i32, i32) -> llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
llvm-svn: 100304
2010-04-04 11:10:48 +08:00
|
|
|
unsigned SrcAddrSp =
|
2010-06-24 21:54:33 +08:00
|
|
|
cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace();
|
2010-04-16 23:33:14 +08:00
|
|
|
unsigned DstAddrSp =
|
2010-06-24 21:54:33 +08:00
|
|
|
cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace();
|
Reapply address space patch after fixing an issue in MemCopyOptimizer.
Added support for address spaces and added a isVolatile field to memcpy, memmove, and memset,
e.g., llvm.memcpy.i32(i8*, i8*, i32, i32) -> llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
llvm-svn: 100304
2010-04-04 11:10:48 +08:00
|
|
|
|
2011-07-18 12:54:35 +08:00
|
|
|
IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
|
Reapply address space patch after fixing an issue in MemCopyOptimizer.
Added support for address spaces and added a isVolatile field to memcpy, memmove, and memset,
e.g., llvm.memcpy.i32(i8*, i8*, i32, i32) -> llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
llvm-svn: 100304
2010-04-04 11:10:48 +08:00
|
|
|
Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp);
|
|
|
|
Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp);
|
2012-02-03 08:07:04 +08:00
|
|
|
|
2017-03-01 14:45:20 +08:00
|
|
|
// If the memcpy has metadata describing the members, see if we can get the
|
|
|
|
// TBAA tag describing our copy.
|
2014-04-25 13:29:35 +08:00
|
|
|
MDNode *CopyMD = nullptr;
|
2017-03-01 14:45:20 +08:00
|
|
|
if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa_struct)) {
|
|
|
|
if (M->getNumOperands() == 3 && M->getOperand(0) &&
|
|
|
|
mdconst::hasa<ConstantInt>(M->getOperand(0)) &&
|
|
|
|
mdconst::extract<ConstantInt>(M->getOperand(0))->isNullValue() &&
|
|
|
|
M->getOperand(1) &&
|
|
|
|
mdconst::hasa<ConstantInt>(M->getOperand(1)) &&
|
|
|
|
mdconst::extract<ConstantInt>(M->getOperand(1))->getValue() ==
|
|
|
|
Size &&
|
|
|
|
M->getOperand(2) && isa<MDNode>(M->getOperand(2)))
|
|
|
|
CopyMD = cast<MDNode>(M->getOperand(2));
|
2010-01-05 15:32:13 +08:00
|
|
|
}
|
2012-02-03 08:07:04 +08:00
|
|
|
|
2010-01-05 15:32:13 +08:00
|
|
|
// If the memcpy/memmove provides better alignment info than we can
|
|
|
|
// infer, use it.
|
2015-11-19 13:56:52 +08:00
|
|
|
SrcAlign = std::max(SrcAlign, CopyAlign);
|
|
|
|
DstAlign = std::max(DstAlign, CopyAlign);
|
2012-02-03 08:07:04 +08:00
|
|
|
|
2010-06-25 15:57:14 +08:00
|
|
|
Value *Src = Builder->CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy);
|
|
|
|
Value *Dest = Builder->CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);
|
2011-05-19 03:57:14 +08:00
|
|
|
LoadInst *L = Builder->CreateLoad(Src, MI->isVolatile());
|
|
|
|
L->setAlignment(SrcAlign);
|
2012-09-14 05:51:01 +08:00
|
|
|
if (CopyMD)
|
|
|
|
L->setMetadata(LLVMContext::MD_tbaa, CopyMD);
|
2016-09-04 15:49:39 +08:00
|
|
|
MDNode *LoopMemParallelMD =
|
|
|
|
MI->getMetadata(LLVMContext::MD_mem_parallel_loop_access);
|
|
|
|
if (LoopMemParallelMD)
|
|
|
|
L->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
|
2016-09-04 15:06:00 +08:00
|
|
|
|
2011-05-19 03:57:14 +08:00
|
|
|
StoreInst *S = Builder->CreateStore(L, Dest, MI->isVolatile());
|
|
|
|
S->setAlignment(DstAlign);
|
2012-09-14 05:51:01 +08:00
|
|
|
if (CopyMD)
|
|
|
|
S->setMetadata(LLVMContext::MD_tbaa, CopyMD);
|
2016-09-04 15:49:39 +08:00
|
|
|
if (LoopMemParallelMD)
|
|
|
|
S->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
|
2010-01-05 15:32:13 +08:00
|
|
|
|
|
|
|
// Set the size of the copy to 0, it will be deleted on the next iteration.
|
2010-06-29 00:50:57 +08:00
|
|
|
MI->setArgOperand(2, Constant::getNullValue(MemOpLength->getType()));
|
2010-01-05 15:32:13 +08:00
|
|
|
return MI;
|
|
|
|
}
|
|
|
|
|
|
|
|
Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
|
2016-12-19 16:22:17 +08:00
|
|
|
unsigned Alignment = getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT);
|
2015-11-19 13:56:52 +08:00
|
|
|
if (MI->getAlignment() < Alignment) {
|
|
|
|
MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
|
|
|
|
Alignment, false));
|
2010-01-05 15:32:13 +08:00
|
|
|
return MI;
|
|
|
|
}
|
2012-02-03 08:07:04 +08:00
|
|
|
|
2010-01-05 15:32:13 +08:00
|
|
|
// Extract the length and alignment and fill if they are constant.
|
|
|
|
ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
|
|
|
|
ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
|
2010-02-16 00:12:20 +08:00
|
|
|
if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
|
2014-04-25 13:29:35 +08:00
|
|
|
return nullptr;
|
2012-08-15 11:49:59 +08:00
|
|
|
uint64_t Len = LenC->getLimitedValue();
|
2015-11-19 13:56:52 +08:00
|
|
|
Alignment = MI->getAlignment();
|
2012-08-15 11:49:59 +08:00
|
|
|
assert(Len && "0-sized memory setting should be removed already.");
|
2012-02-03 08:07:04 +08:00
|
|
|
|
2010-01-05 15:32:13 +08:00
|
|
|
// memset(s,c,n) -> store s, c (for n=1,2,4,8)
|
|
|
|
if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
|
2011-07-18 12:54:35 +08:00
|
|
|
Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8.
|
2012-02-03 08:07:04 +08:00
|
|
|
|
2010-01-05 15:32:13 +08:00
|
|
|
Value *Dest = MI->getDest();
|
2010-12-20 09:05:30 +08:00
|
|
|
unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace();
|
|
|
|
Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp);
|
|
|
|
Dest = Builder->CreateBitCast(Dest, NewDstPtrTy);
|
2010-01-05 15:32:13 +08:00
|
|
|
|
|
|
|
// Alignment 0 is identity for alignment 1 for memset, but not store.
|
|
|
|
if (Alignment == 0) Alignment = 1;
|
2012-02-03 08:07:04 +08:00
|
|
|
|
2010-01-05 15:32:13 +08:00
|
|
|
// Extract the fill value and store.
|
|
|
|
uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
|
2011-05-19 03:57:14 +08:00
|
|
|
StoreInst *S = Builder->CreateStore(ConstantInt::get(ITy, Fill), Dest,
|
|
|
|
MI->isVolatile());
|
|
|
|
S->setAlignment(Alignment);
|
2012-02-03 08:07:04 +08:00
|
|
|
|
2010-01-05 15:32:13 +08:00
|
|
|
// Set the size of the copy to 0, it will be deleted on the next iteration.
|
|
|
|
MI->setLength(Constant::getNullValue(LenC->getType()));
|
|
|
|
return MI;
|
|
|
|
}
|
|
|
|
|
2015-08-05 16:18:00 +08:00
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
2016-01-30 07:27:03 +08:00
|
|
|
static Value *simplifyX86immShift(const IntrinsicInst &II,
|
2015-08-13 15:39:03 +08:00
|
|
|
InstCombiner::BuilderTy &Builder) {
|
|
|
|
bool LogicalShift = false;
|
|
|
|
bool ShiftLeft = false;
|
|
|
|
|
|
|
|
switch (II.getIntrinsicID()) {
|
2016-11-13 15:26:19 +08:00
|
|
|
default: llvm_unreachable("Unexpected intrinsic!");
|
2015-08-13 15:39:03 +08:00
|
|
|
case Intrinsic::x86_sse2_psra_d:
|
|
|
|
case Intrinsic::x86_sse2_psra_w:
|
|
|
|
case Intrinsic::x86_sse2_psrai_d:
|
|
|
|
case Intrinsic::x86_sse2_psrai_w:
|
|
|
|
case Intrinsic::x86_avx2_psra_d:
|
|
|
|
case Intrinsic::x86_avx2_psra_w:
|
|
|
|
case Intrinsic::x86_avx2_psrai_d:
|
|
|
|
case Intrinsic::x86_avx2_psrai_w:
|
2016-11-13 09:51:55 +08:00
|
|
|
case Intrinsic::x86_avx512_psra_q_128:
|
|
|
|
case Intrinsic::x86_avx512_psrai_q_128:
|
|
|
|
case Intrinsic::x86_avx512_psra_q_256:
|
|
|
|
case Intrinsic::x86_avx512_psrai_q_256:
|
|
|
|
case Intrinsic::x86_avx512_psra_d_512:
|
|
|
|
case Intrinsic::x86_avx512_psra_q_512:
|
|
|
|
case Intrinsic::x86_avx512_psra_w_512:
|
|
|
|
case Intrinsic::x86_avx512_psrai_d_512:
|
|
|
|
case Intrinsic::x86_avx512_psrai_q_512:
|
|
|
|
case Intrinsic::x86_avx512_psrai_w_512:
|
2015-08-13 15:39:03 +08:00
|
|
|
LogicalShift = false; ShiftLeft = false;
|
|
|
|
break;
|
|
|
|
case Intrinsic::x86_sse2_psrl_d:
|
|
|
|
case Intrinsic::x86_sse2_psrl_q:
|
|
|
|
case Intrinsic::x86_sse2_psrl_w:
|
|
|
|
case Intrinsic::x86_sse2_psrli_d:
|
|
|
|
case Intrinsic::x86_sse2_psrli_q:
|
|
|
|
case Intrinsic::x86_sse2_psrli_w:
|
|
|
|
case Intrinsic::x86_avx2_psrl_d:
|
|
|
|
case Intrinsic::x86_avx2_psrl_q:
|
|
|
|
case Intrinsic::x86_avx2_psrl_w:
|
|
|
|
case Intrinsic::x86_avx2_psrli_d:
|
|
|
|
case Intrinsic::x86_avx2_psrli_q:
|
|
|
|
case Intrinsic::x86_avx2_psrli_w:
|
2016-11-13 09:51:55 +08:00
|
|
|
case Intrinsic::x86_avx512_psrl_d_512:
|
|
|
|
case Intrinsic::x86_avx512_psrl_q_512:
|
|
|
|
case Intrinsic::x86_avx512_psrl_w_512:
|
|
|
|
case Intrinsic::x86_avx512_psrli_d_512:
|
|
|
|
case Intrinsic::x86_avx512_psrli_q_512:
|
|
|
|
case Intrinsic::x86_avx512_psrli_w_512:
|
2015-08-13 15:39:03 +08:00
|
|
|
LogicalShift = true; ShiftLeft = false;
|
|
|
|
break;
|
|
|
|
case Intrinsic::x86_sse2_psll_d:
|
|
|
|
case Intrinsic::x86_sse2_psll_q:
|
|
|
|
case Intrinsic::x86_sse2_psll_w:
|
|
|
|
case Intrinsic::x86_sse2_pslli_d:
|
|
|
|
case Intrinsic::x86_sse2_pslli_q:
|
|
|
|
case Intrinsic::x86_sse2_pslli_w:
|
|
|
|
case Intrinsic::x86_avx2_psll_d:
|
|
|
|
case Intrinsic::x86_avx2_psll_q:
|
|
|
|
case Intrinsic::x86_avx2_psll_w:
|
|
|
|
case Intrinsic::x86_avx2_pslli_d:
|
|
|
|
case Intrinsic::x86_avx2_pslli_q:
|
|
|
|
case Intrinsic::x86_avx2_pslli_w:
|
2016-11-13 09:51:55 +08:00
|
|
|
case Intrinsic::x86_avx512_psll_d_512:
|
|
|
|
case Intrinsic::x86_avx512_psll_q_512:
|
|
|
|
case Intrinsic::x86_avx512_psll_w_512:
|
|
|
|
case Intrinsic::x86_avx512_pslli_d_512:
|
|
|
|
case Intrinsic::x86_avx512_pslli_q_512:
|
|
|
|
case Intrinsic::x86_avx512_pslli_w_512:
|
2015-08-13 15:39:03 +08:00
|
|
|
LogicalShift = true; ShiftLeft = true;
|
|
|
|
break;
|
|
|
|
}
|
2015-08-11 04:21:15 +08:00
|
|
|
assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left");
|
|
|
|
|
2015-08-08 02:22:50 +08:00
|
|
|
// Simplify if count is constant.
|
|
|
|
auto Arg1 = II.getArgOperand(1);
|
|
|
|
auto CAZ = dyn_cast<ConstantAggregateZero>(Arg1);
|
|
|
|
auto CDV = dyn_cast<ConstantDataVector>(Arg1);
|
|
|
|
auto CInt = dyn_cast<ConstantInt>(Arg1);
|
|
|
|
if (!CAZ && !CDV && !CInt)
|
2015-08-05 16:18:00 +08:00
|
|
|
return nullptr;
|
2015-08-08 02:22:50 +08:00
|
|
|
|
|
|
|
APInt Count(64, 0);
|
|
|
|
if (CDV) {
|
|
|
|
// SSE2/AVX2 uses all the first 64-bits of the 128-bit vector
|
|
|
|
// operand to compute the shift amount.
|
|
|
|
auto VT = cast<VectorType>(CDV->getType());
|
|
|
|
unsigned BitWidth = VT->getElementType()->getPrimitiveSizeInBits();
|
|
|
|
assert((64 % BitWidth) == 0 && "Unexpected packed shift size");
|
|
|
|
unsigned NumSubElts = 64 / BitWidth;
|
|
|
|
|
|
|
|
// Concatenate the sub-elements to create the 64-bit value.
|
|
|
|
for (unsigned i = 0; i != NumSubElts; ++i) {
|
|
|
|
unsigned SubEltIdx = (NumSubElts - 1) - i;
|
|
|
|
auto SubElt = cast<ConstantInt>(CDV->getElementAsConstant(SubEltIdx));
|
|
|
|
Count = Count.shl(BitWidth);
|
|
|
|
Count |= SubElt->getValue().zextOrTrunc(64);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (CInt)
|
|
|
|
Count = CInt->getValue();
|
2015-08-05 16:18:00 +08:00
|
|
|
|
|
|
|
auto Vec = II.getArgOperand(0);
|
|
|
|
auto VT = cast<VectorType>(Vec->getType());
|
|
|
|
auto SVT = VT->getElementType();
|
|
|
|
unsigned VWidth = VT->getNumElements();
|
2015-08-08 02:22:50 +08:00
|
|
|
unsigned BitWidth = SVT->getPrimitiveSizeInBits();
|
|
|
|
|
|
|
|
// If shift-by-zero then just return the original value.
|
|
|
|
if (Count == 0)
|
|
|
|
return Vec;
|
|
|
|
|
2015-08-11 04:21:15 +08:00
|
|
|
// Handle cases when Shift >= BitWidth.
|
|
|
|
if (Count.uge(BitWidth)) {
|
|
|
|
// If LogicalShift - just return zero.
|
|
|
|
if (LogicalShift)
|
|
|
|
return ConstantAggregateZero::get(VT);
|
|
|
|
|
|
|
|
// If ArithmeticShift - clamp Shift to (BitWidth - 1).
|
|
|
|
Count = APInt(64, BitWidth - 1);
|
|
|
|
}
|
2015-08-05 16:18:00 +08:00
|
|
|
|
|
|
|
// Get a constant vector of the same type as the first operand.
|
2015-08-08 02:22:50 +08:00
|
|
|
auto ShiftAmt = ConstantInt::get(SVT, Count.zextOrTrunc(BitWidth));
|
|
|
|
auto ShiftVec = Builder.CreateVectorSplat(VWidth, ShiftAmt);
|
2015-08-05 16:18:00 +08:00
|
|
|
|
|
|
|
if (ShiftLeft)
|
2015-08-08 02:22:50 +08:00
|
|
|
return Builder.CreateShl(Vec, ShiftVec);
|
2015-08-05 16:18:00 +08:00
|
|
|
|
2015-08-11 04:21:15 +08:00
|
|
|
if (LogicalShift)
|
|
|
|
return Builder.CreateLShr(Vec, ShiftVec);
|
|
|
|
|
|
|
|
return Builder.CreateAShr(Vec, ShiftVec);
|
2015-08-05 16:18:00 +08:00
|
|
|
}
|
|
|
|
|
2016-06-07 18:27:15 +08:00
|
|
|
// Attempt to simplify AVX2 per-element shift intrinsics to a generic IR shift.
|
|
|
|
// Unlike the generic IR shifts, the intrinsics have defined behaviour for out
|
|
|
|
// of range shift amounts (logical - set to zero, arithmetic - splat sign bit).
|
|
|
|
static Value *simplifyX86varShift(const IntrinsicInst &II,
|
|
|
|
InstCombiner::BuilderTy &Builder) {
|
|
|
|
bool LogicalShift = false;
|
|
|
|
bool ShiftLeft = false;
|
|
|
|
|
|
|
|
switch (II.getIntrinsicID()) {
|
2016-11-13 15:26:19 +08:00
|
|
|
default: llvm_unreachable("Unexpected intrinsic!");
|
2016-06-07 18:27:15 +08:00
|
|
|
case Intrinsic::x86_avx2_psrav_d:
|
|
|
|
case Intrinsic::x86_avx2_psrav_d_256:
|
2016-11-13 15:26:19 +08:00
|
|
|
case Intrinsic::x86_avx512_psrav_q_128:
|
|
|
|
case Intrinsic::x86_avx512_psrav_q_256:
|
|
|
|
case Intrinsic::x86_avx512_psrav_d_512:
|
|
|
|
case Intrinsic::x86_avx512_psrav_q_512:
|
2016-11-18 14:04:33 +08:00
|
|
|
case Intrinsic::x86_avx512_psrav_w_128:
|
|
|
|
case Intrinsic::x86_avx512_psrav_w_256:
|
|
|
|
case Intrinsic::x86_avx512_psrav_w_512:
|
2016-06-07 18:27:15 +08:00
|
|
|
LogicalShift = false;
|
|
|
|
ShiftLeft = false;
|
|
|
|
break;
|
|
|
|
case Intrinsic::x86_avx2_psrlv_d:
|
|
|
|
case Intrinsic::x86_avx2_psrlv_d_256:
|
|
|
|
case Intrinsic::x86_avx2_psrlv_q:
|
|
|
|
case Intrinsic::x86_avx2_psrlv_q_256:
|
2016-11-13 15:26:19 +08:00
|
|
|
case Intrinsic::x86_avx512_psrlv_d_512:
|
|
|
|
case Intrinsic::x86_avx512_psrlv_q_512:
|
2016-11-18 14:04:33 +08:00
|
|
|
case Intrinsic::x86_avx512_psrlv_w_128:
|
|
|
|
case Intrinsic::x86_avx512_psrlv_w_256:
|
|
|
|
case Intrinsic::x86_avx512_psrlv_w_512:
|
2016-06-07 18:27:15 +08:00
|
|
|
LogicalShift = true;
|
|
|
|
ShiftLeft = false;
|
|
|
|
break;
|
|
|
|
case Intrinsic::x86_avx2_psllv_d:
|
|
|
|
case Intrinsic::x86_avx2_psllv_d_256:
|
|
|
|
case Intrinsic::x86_avx2_psllv_q:
|
|
|
|
case Intrinsic::x86_avx2_psllv_q_256:
|
2016-11-13 15:26:19 +08:00
|
|
|
case Intrinsic::x86_avx512_psllv_d_512:
|
|
|
|
case Intrinsic::x86_avx512_psllv_q_512:
|
2016-11-18 14:04:33 +08:00
|
|
|
case Intrinsic::x86_avx512_psllv_w_128:
|
|
|
|
case Intrinsic::x86_avx512_psllv_w_256:
|
|
|
|
case Intrinsic::x86_avx512_psllv_w_512:
|
2016-06-07 18:27:15 +08:00
|
|
|
LogicalShift = true;
|
|
|
|
ShiftLeft = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left");
|
|
|
|
|
|
|
|
// Simplify if all shift amounts are constant/undef.
|
|
|
|
auto *CShift = dyn_cast<Constant>(II.getArgOperand(1));
|
|
|
|
if (!CShift)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
auto Vec = II.getArgOperand(0);
|
|
|
|
auto VT = cast<VectorType>(II.getType());
|
|
|
|
auto SVT = VT->getVectorElementType();
|
|
|
|
int NumElts = VT->getNumElements();
|
|
|
|
int BitWidth = SVT->getIntegerBitWidth();
|
|
|
|
|
|
|
|
// Collect each element's shift amount.
|
|
|
|
// We also collect special cases: UNDEF = -1, OUT-OF-RANGE = BitWidth.
|
|
|
|
bool AnyOutOfRange = false;
|
|
|
|
SmallVector<int, 8> ShiftAmts;
|
|
|
|
for (int I = 0; I < NumElts; ++I) {
|
|
|
|
auto *CElt = CShift->getAggregateElement(I);
|
|
|
|
if (CElt && isa<UndefValue>(CElt)) {
|
|
|
|
ShiftAmts.push_back(-1);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto *COp = dyn_cast_or_null<ConstantInt>(CElt);
|
|
|
|
if (!COp)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// Handle out of range shifts.
|
|
|
|
// If LogicalShift - set to BitWidth (special case).
|
|
|
|
// If ArithmeticShift - set to (BitWidth - 1) (sign splat).
|
|
|
|
APInt ShiftVal = COp->getValue();
|
|
|
|
if (ShiftVal.uge(BitWidth)) {
|
|
|
|
AnyOutOfRange = LogicalShift;
|
|
|
|
ShiftAmts.push_back(LogicalShift ? BitWidth : BitWidth - 1);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
ShiftAmts.push_back((int)ShiftVal.getZExtValue());
|
|
|
|
}
|
|
|
|
|
|
|
|
// If all elements out of range or UNDEF, return vector of zeros/undefs.
|
|
|
|
// ArithmeticShift should only hit this if they are all UNDEF.
|
|
|
|
auto OutOfRange = [&](int Idx) { return (Idx < 0) || (BitWidth <= Idx); };
|
2016-08-12 01:20:18 +08:00
|
|
|
if (all_of(ShiftAmts, OutOfRange)) {
|
2016-06-07 18:27:15 +08:00
|
|
|
SmallVector<Constant *, 8> ConstantVec;
|
|
|
|
for (int Idx : ShiftAmts) {
|
|
|
|
if (Idx < 0) {
|
|
|
|
ConstantVec.push_back(UndefValue::get(SVT));
|
|
|
|
} else {
|
|
|
|
assert(LogicalShift && "Logical shift expected");
|
|
|
|
ConstantVec.push_back(ConstantInt::getNullValue(SVT));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return ConstantVector::get(ConstantVec);
|
|
|
|
}
|
|
|
|
|
|
|
|
// We can't handle only some out of range values with generic logical shifts.
|
|
|
|
if (AnyOutOfRange)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// Build the shift amount constant vector.
|
|
|
|
SmallVector<Constant *, 8> ShiftVecAmts;
|
|
|
|
for (int Idx : ShiftAmts) {
|
|
|
|
if (Idx < 0)
|
|
|
|
ShiftVecAmts.push_back(UndefValue::get(SVT));
|
|
|
|
else
|
|
|
|
ShiftVecAmts.push_back(ConstantInt::get(SVT, Idx));
|
|
|
|
}
|
|
|
|
auto ShiftVec = ConstantVector::get(ShiftVecAmts);
|
|
|
|
|
|
|
|
if (ShiftLeft)
|
|
|
|
return Builder.CreateShl(Vec, ShiftVec);
|
|
|
|
|
|
|
|
if (LogicalShift)
|
|
|
|
return Builder.CreateLShr(Vec, ShiftVec);
|
|
|
|
|
|
|
|
return Builder.CreateAShr(Vec, ShiftVec);
|
|
|
|
}
|
|
|
|
|
2017-01-23 23:22:59 +08:00
|
|
|
static Value *simplifyX86muldq(const IntrinsicInst &II,
|
|
|
|
InstCombiner::BuilderTy &Builder) {
|
2017-01-21 02:20:30 +08:00
|
|
|
Value *Arg0 = II.getArgOperand(0);
|
|
|
|
Value *Arg1 = II.getArgOperand(1);
|
|
|
|
Type *ResTy = II.getType();
|
2017-01-23 23:22:59 +08:00
|
|
|
assert(Arg0->getType()->getScalarSizeInBits() == 32 &&
|
|
|
|
Arg1->getType()->getScalarSizeInBits() == 32 &&
|
|
|
|
ResTy->getScalarSizeInBits() == 64 && "Unexpected muldq/muludq types");
|
2017-01-21 02:20:30 +08:00
|
|
|
|
2017-01-23 20:07:32 +08:00
|
|
|
// muldq/muludq(undef, undef) -> zero (matches generic mul behavior)
|
2017-01-24 19:07:41 +08:00
|
|
|
if (isa<UndefValue>(Arg0) || isa<UndefValue>(Arg1))
|
2017-01-23 20:07:32 +08:00
|
|
|
return ConstantAggregateZero::get(ResTy);
|
2017-01-21 02:20:30 +08:00
|
|
|
|
2017-01-23 23:22:59 +08:00
|
|
|
// Constant folding.
|
|
|
|
// PMULDQ = (mul(vXi64 sext(shuffle<0,2,..>(Arg0)),
|
|
|
|
// vXi64 sext(shuffle<0,2,..>(Arg1))))
|
|
|
|
// PMULUDQ = (mul(vXi64 zext(shuffle<0,2,..>(Arg0)),
|
|
|
|
// vXi64 zext(shuffle<0,2,..>(Arg1))))
|
|
|
|
if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1))
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
unsigned NumElts = ResTy->getVectorNumElements();
|
|
|
|
assert(Arg0->getType()->getVectorNumElements() == (2 * NumElts) &&
|
|
|
|
Arg1->getType()->getVectorNumElements() == (2 * NumElts) &&
|
|
|
|
"Unexpected muldq/muludq types");
|
|
|
|
|
|
|
|
unsigned IntrinsicID = II.getIntrinsicID();
|
|
|
|
bool IsSigned = (Intrinsic::x86_sse41_pmuldq == IntrinsicID ||
|
|
|
|
Intrinsic::x86_avx2_pmul_dq == IntrinsicID ||
|
|
|
|
Intrinsic::x86_avx512_pmul_dq_512 == IntrinsicID);
|
|
|
|
|
|
|
|
SmallVector<unsigned, 16> ShuffleMask;
|
|
|
|
for (unsigned i = 0; i != NumElts; ++i)
|
|
|
|
ShuffleMask.push_back(i * 2);
|
|
|
|
|
|
|
|
auto *LHS = Builder.CreateShuffleVector(Arg0, Arg0, ShuffleMask);
|
|
|
|
auto *RHS = Builder.CreateShuffleVector(Arg1, Arg1, ShuffleMask);
|
|
|
|
|
|
|
|
if (IsSigned) {
|
|
|
|
LHS = Builder.CreateSExt(LHS, ResTy);
|
|
|
|
RHS = Builder.CreateSExt(RHS, ResTy);
|
|
|
|
} else {
|
|
|
|
LHS = Builder.CreateZExt(LHS, ResTy);
|
|
|
|
RHS = Builder.CreateZExt(RHS, ResTy);
|
|
|
|
}
|
|
|
|
|
|
|
|
return Builder.CreateMul(LHS, RHS);
|
2017-01-21 02:20:30 +08:00
|
|
|
}
|
|
|
|
|
2017-01-25 22:37:24 +08:00
|
|
|
static Value *simplifyX86pack(IntrinsicInst &II, InstCombiner &IC,
|
|
|
|
InstCombiner::BuilderTy &Builder, bool IsSigned) {
|
|
|
|
Value *Arg0 = II.getArgOperand(0);
|
|
|
|
Value *Arg1 = II.getArgOperand(1);
|
|
|
|
Type *ResTy = II.getType();
|
|
|
|
|
|
|
|
// Fast all undef handling.
|
|
|
|
if (isa<UndefValue>(Arg0) && isa<UndefValue>(Arg1))
|
|
|
|
return UndefValue::get(ResTy);
|
|
|
|
|
|
|
|
Type *ArgTy = Arg0->getType();
|
|
|
|
unsigned NumLanes = ResTy->getPrimitiveSizeInBits() / 128;
|
|
|
|
unsigned NumDstElts = ResTy->getVectorNumElements();
|
|
|
|
unsigned NumSrcElts = ArgTy->getVectorNumElements();
|
|
|
|
assert(NumDstElts == (2 * NumSrcElts) && "Unexpected packing types");
|
|
|
|
|
|
|
|
unsigned NumDstEltsPerLane = NumDstElts / NumLanes;
|
|
|
|
unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;
|
|
|
|
unsigned DstScalarSizeInBits = ResTy->getScalarSizeInBits();
|
|
|
|
assert(ArgTy->getScalarSizeInBits() == (2 * DstScalarSizeInBits) &&
|
|
|
|
"Unexpected packing types");
|
|
|
|
|
|
|
|
// Constant folding.
|
|
|
|
auto *Cst0 = dyn_cast<Constant>(Arg0);
|
|
|
|
auto *Cst1 = dyn_cast<Constant>(Arg1);
|
|
|
|
if (!Cst0 || !Cst1)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
SmallVector<Constant *, 32> Vals;
|
|
|
|
for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
|
|
|
|
for (unsigned Elt = 0; Elt != NumDstEltsPerLane; ++Elt) {
|
|
|
|
unsigned SrcIdx = Lane * NumSrcEltsPerLane + Elt % NumSrcEltsPerLane;
|
|
|
|
auto *Cst = (Elt >= NumSrcEltsPerLane) ? Cst1 : Cst0;
|
|
|
|
auto *COp = Cst->getAggregateElement(SrcIdx);
|
|
|
|
if (COp && isa<UndefValue>(COp)) {
|
|
|
|
Vals.push_back(UndefValue::get(ResTy->getScalarType()));
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto *CInt = dyn_cast_or_null<ConstantInt>(COp);
|
|
|
|
if (!CInt)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
APInt Val = CInt->getValue();
|
|
|
|
assert(Val.getBitWidth() == ArgTy->getScalarSizeInBits() &&
|
|
|
|
"Unexpected constant bitwidth");
|
|
|
|
|
|
|
|
if (IsSigned) {
|
|
|
|
// PACKSS: Truncate signed value with signed saturation.
|
|
|
|
// Source values less than dst minint are saturated to minint.
|
|
|
|
// Source values greater than dst maxint are saturated to maxint.
|
|
|
|
if (Val.isSignedIntN(DstScalarSizeInBits))
|
|
|
|
Val = Val.trunc(DstScalarSizeInBits);
|
|
|
|
else if (Val.isNegative())
|
|
|
|
Val = APInt::getSignedMinValue(DstScalarSizeInBits);
|
|
|
|
else
|
|
|
|
Val = APInt::getSignedMaxValue(DstScalarSizeInBits);
|
|
|
|
} else {
|
|
|
|
// PACKUS: Truncate signed value with unsigned saturation.
|
|
|
|
// Source values less than zero are saturated to zero.
|
|
|
|
// Source values greater than dst maxuint are saturated to maxuint.
|
|
|
|
if (Val.isIntN(DstScalarSizeInBits))
|
|
|
|
Val = Val.trunc(DstScalarSizeInBits);
|
|
|
|
else if (Val.isNegative())
|
|
|
|
Val = APInt::getNullValue(DstScalarSizeInBits);
|
|
|
|
else
|
|
|
|
Val = APInt::getAllOnesValue(DstScalarSizeInBits);
|
|
|
|
}
|
|
|
|
|
|
|
|
Vals.push_back(ConstantInt::get(ResTy->getScalarType(), Val));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ConstantVector::get(Vals);
|
|
|
|
}
|
|
|
|
|
2016-06-07 16:18:35 +08:00
|
|
|
static Value *simplifyX86movmsk(const IntrinsicInst &II,
|
|
|
|
InstCombiner::BuilderTy &Builder) {
|
|
|
|
Value *Arg = II.getArgOperand(0);
|
|
|
|
Type *ResTy = II.getType();
|
|
|
|
Type *ArgTy = Arg->getType();
|
|
|
|
|
|
|
|
// movmsk(undef) -> zero as we must ensure the upper bits are zero.
|
|
|
|
if (isa<UndefValue>(Arg))
|
|
|
|
return Constant::getNullValue(ResTy);
|
|
|
|
|
|
|
|
// We can't easily peek through x86_mmx types.
|
|
|
|
if (!ArgTy->isVectorTy())
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
auto *C = dyn_cast<Constant>(Arg);
|
|
|
|
if (!C)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// Extract signbits of the vector input and pack into integer result.
|
|
|
|
APInt Result(ResTy->getPrimitiveSizeInBits(), 0);
|
|
|
|
for (unsigned I = 0, E = ArgTy->getVectorNumElements(); I != E; ++I) {
|
|
|
|
auto *COp = C->getAggregateElement(I);
|
|
|
|
if (!COp)
|
|
|
|
return nullptr;
|
|
|
|
if (isa<UndefValue>(COp))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
auto *CInt = dyn_cast<ConstantInt>(COp);
|
|
|
|
auto *CFp = dyn_cast<ConstantFP>(COp);
|
|
|
|
if (!CInt && !CFp)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
if ((CInt && CInt->isNegative()) || (CFp && CFp->isNegative()))
|
|
|
|
Result.setBit(I);
|
|
|
|
}
|
|
|
|
|
|
|
|
return Constant::getIntegerValue(ResTy, Result);
|
|
|
|
}
|
|
|
|
|
2016-01-30 07:27:03 +08:00
|
|
|
static Value *simplifyX86insertps(const IntrinsicInst &II,
|
2015-04-17 01:52:13 +08:00
|
|
|
InstCombiner::BuilderTy &Builder) {
|
2016-01-28 08:03:16 +08:00
|
|
|
auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2));
|
|
|
|
if (!CInt)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
VectorType *VecTy = cast<VectorType>(II.getType());
|
|
|
|
assert(VecTy->getNumElements() == 4 && "insertps with wrong vector type");
|
|
|
|
|
|
|
|
// The immediate permute control byte looks like this:
|
|
|
|
// [3:0] - zero mask for each 32-bit lane
|
|
|
|
// [5:4] - select one 32-bit destination lane
|
|
|
|
// [7:6] - select one 32-bit source lane
|
|
|
|
|
|
|
|
uint8_t Imm = CInt->getZExtValue();
|
|
|
|
uint8_t ZMask = Imm & 0xf;
|
|
|
|
uint8_t DestLane = (Imm >> 4) & 0x3;
|
|
|
|
uint8_t SourceLane = (Imm >> 6) & 0x3;
|
|
|
|
|
|
|
|
ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy);
|
|
|
|
|
|
|
|
// If all zero mask bits are set, this was just a weird way to
|
|
|
|
// generate a zero vector.
|
|
|
|
if (ZMask == 0xf)
|
|
|
|
return ZeroVector;
|
|
|
|
|
|
|
|
// Initialize by passing all of the first source bits through.
|
2016-06-12 08:41:19 +08:00
|
|
|
uint32_t ShuffleMask[4] = { 0, 1, 2, 3 };
|
2016-01-28 08:03:16 +08:00
|
|
|
|
|
|
|
// We may replace the second operand with the zero vector.
|
|
|
|
Value *V1 = II.getArgOperand(1);
|
|
|
|
|
|
|
|
if (ZMask) {
|
|
|
|
// If the zero mask is being used with a single input or the zero mask
|
|
|
|
// overrides the destination lane, this is a shuffle with the zero vector.
|
|
|
|
if ((II.getArgOperand(0) == II.getArgOperand(1)) ||
|
|
|
|
(ZMask & (1 << DestLane))) {
|
|
|
|
V1 = ZeroVector;
|
|
|
|
// We may still move 32-bits of the first source vector from one lane
|
|
|
|
// to another.
|
|
|
|
ShuffleMask[DestLane] = SourceLane;
|
|
|
|
// The zero mask may override the previous insert operation.
|
|
|
|
for (unsigned i = 0; i < 4; ++i)
|
|
|
|
if ((ZMask >> i) & 0x1)
|
|
|
|
ShuffleMask[i] = i + 4;
|
2015-04-26 04:55:25 +08:00
|
|
|
} else {
|
2016-01-28 08:03:16 +08:00
|
|
|
// TODO: Model this case as 2 shuffles or a 'logical and' plus shuffle?
|
|
|
|
return nullptr;
|
2015-04-26 04:55:25 +08:00
|
|
|
}
|
2016-01-28 08:03:16 +08:00
|
|
|
} else {
|
|
|
|
// Replace the selected destination lane with the selected source lane.
|
|
|
|
ShuffleMask[DestLane] = SourceLane + 4;
|
2015-04-17 01:52:13 +08:00
|
|
|
}
|
2016-01-28 08:03:16 +08:00
|
|
|
|
|
|
|
return Builder.CreateShuffleVector(II.getArgOperand(0), V1, ShuffleMask);
|
2015-04-17 01:52:13 +08:00
|
|
|
}
|
|
|
|
|
2015-10-17 19:40:05 +08:00
|
|
|
/// Attempt to simplify SSE4A EXTRQ/EXTRQI instructions using constant folding
|
|
|
|
/// or conversion to a shuffle vector.
|
2016-01-30 07:27:03 +08:00
|
|
|
static Value *simplifyX86extrq(IntrinsicInst &II, Value *Op0,
|
2015-10-17 19:40:05 +08:00
|
|
|
ConstantInt *CILength, ConstantInt *CIIndex,
|
|
|
|
InstCombiner::BuilderTy &Builder) {
|
|
|
|
auto LowConstantHighUndef = [&](uint64_t Val) {
|
|
|
|
Type *IntTy64 = Type::getInt64Ty(II.getContext());
|
|
|
|
Constant *Args[] = {ConstantInt::get(IntTy64, Val),
|
|
|
|
UndefValue::get(IntTy64)};
|
|
|
|
return ConstantVector::get(Args);
|
|
|
|
};
|
|
|
|
|
|
|
|
// See if we're dealing with constant values.
|
|
|
|
Constant *C0 = dyn_cast<Constant>(Op0);
|
|
|
|
ConstantInt *CI0 =
|
2016-09-07 20:03:03 +08:00
|
|
|
C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((unsigned)0))
|
2015-10-17 19:40:05 +08:00
|
|
|
: nullptr;
|
|
|
|
|
|
|
|
// Attempt to constant fold.
|
|
|
|
if (CILength && CIIndex) {
|
|
|
|
// From AMD documentation: "The bit index and field length are each six
|
|
|
|
// bits in length other bits of the field are ignored."
|
|
|
|
APInt APIndex = CIIndex->getValue().zextOrTrunc(6);
|
|
|
|
APInt APLength = CILength->getValue().zextOrTrunc(6);
|
|
|
|
|
|
|
|
unsigned Index = APIndex.getZExtValue();
|
|
|
|
|
|
|
|
// From AMD documentation: "a value of zero in the field length is
|
|
|
|
// defined as length of 64".
|
|
|
|
unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue();
|
|
|
|
|
|
|
|
// From AMD documentation: "If the sum of the bit index + length field
|
|
|
|
// is greater than 64, the results are undefined".
|
|
|
|
unsigned End = Index + Length;
|
|
|
|
|
|
|
|
// Note that both field index and field length are 8-bit quantities.
|
|
|
|
// Since variables 'Index' and 'Length' are unsigned values
|
|
|
|
// obtained from zero-extending field index and field length
|
|
|
|
// respectively, their sum should never wrap around.
|
|
|
|
if (End > 64)
|
|
|
|
return UndefValue::get(II.getType());
|
|
|
|
|
|
|
|
// If we are inserting whole bytes, we can convert this to a shuffle.
|
|
|
|
// Lowering can recognize EXTRQI shuffle masks.
|
|
|
|
if ((Length % 8) == 0 && (Index % 8) == 0) {
|
|
|
|
// Convert bit indices to byte indices.
|
|
|
|
Length /= 8;
|
|
|
|
Index /= 8;
|
|
|
|
|
|
|
|
Type *IntTy8 = Type::getInt8Ty(II.getContext());
|
|
|
|
Type *IntTy32 = Type::getInt32Ty(II.getContext());
|
|
|
|
VectorType *ShufTy = VectorType::get(IntTy8, 16);
|
|
|
|
|
|
|
|
SmallVector<Constant *, 16> ShuffleMask;
|
|
|
|
for (int i = 0; i != (int)Length; ++i)
|
|
|
|
ShuffleMask.push_back(
|
|
|
|
Constant::getIntegerValue(IntTy32, APInt(32, i + Index)));
|
|
|
|
for (int i = Length; i != 8; ++i)
|
|
|
|
ShuffleMask.push_back(
|
|
|
|
Constant::getIntegerValue(IntTy32, APInt(32, i + 16)));
|
|
|
|
for (int i = 8; i != 16; ++i)
|
|
|
|
ShuffleMask.push_back(UndefValue::get(IntTy32));
|
|
|
|
|
|
|
|
Value *SV = Builder.CreateShuffleVector(
|
|
|
|
Builder.CreateBitCast(Op0, ShufTy),
|
|
|
|
ConstantAggregateZero::get(ShufTy), ConstantVector::get(ShuffleMask));
|
|
|
|
return Builder.CreateBitCast(SV, II.getType());
|
|
|
|
}
|
|
|
|
|
|
|
|
// Constant Fold - shift Index'th bit to lowest position and mask off
|
|
|
|
// Length bits.
|
|
|
|
if (CI0) {
|
|
|
|
APInt Elt = CI0->getValue();
|
|
|
|
Elt = Elt.lshr(Index).zextOrTrunc(Length);
|
|
|
|
return LowConstantHighUndef(Elt.getZExtValue());
|
|
|
|
}
|
|
|
|
|
|
|
|
// If we were an EXTRQ call, we'll save registers if we convert to EXTRQI.
|
|
|
|
if (II.getIntrinsicID() == Intrinsic::x86_sse4a_extrq) {
|
|
|
|
Value *Args[] = {Op0, CILength, CIIndex};
|
2015-12-15 01:24:23 +08:00
|
|
|
Module *M = II.getModule();
|
2015-10-17 19:40:05 +08:00
|
|
|
Value *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_extrqi);
|
|
|
|
return Builder.CreateCall(F, Args);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Constant Fold - extraction from zero is always {zero, undef}.
|
|
|
|
if (CI0 && CI0->equalsInt(0))
|
|
|
|
return LowConstantHighUndef(0);
|
|
|
|
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Attempt to simplify SSE4A INSERTQ/INSERTQI instructions using constant
|
|
|
|
/// folding or conversion to a shuffle vector.
|
2016-01-30 07:27:03 +08:00
|
|
|
static Value *simplifyX86insertq(IntrinsicInst &II, Value *Op0, Value *Op1,
|
2015-10-17 19:40:05 +08:00
|
|
|
APInt APLength, APInt APIndex,
|
|
|
|
InstCombiner::BuilderTy &Builder) {
|
|
|
|
// From AMD documentation: "The bit index and field length are each six bits
|
|
|
|
// in length other bits of the field are ignored."
|
|
|
|
APIndex = APIndex.zextOrTrunc(6);
|
|
|
|
APLength = APLength.zextOrTrunc(6);
|
|
|
|
|
|
|
|
// Attempt to constant fold.
|
|
|
|
unsigned Index = APIndex.getZExtValue();
|
|
|
|
|
|
|
|
// From AMD documentation: "a value of zero in the field length is
|
|
|
|
// defined as length of 64".
|
|
|
|
unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue();
|
|
|
|
|
|
|
|
// From AMD documentation: "If the sum of the bit index + length field
|
|
|
|
// is greater than 64, the results are undefined".
|
|
|
|
unsigned End = Index + Length;
|
|
|
|
|
|
|
|
// Note that both field index and field length are 8-bit quantities.
|
|
|
|
// Since variables 'Index' and 'Length' are unsigned values
|
|
|
|
// obtained from zero-extending field index and field length
|
|
|
|
// respectively, their sum should never wrap around.
|
|
|
|
if (End > 64)
|
|
|
|
return UndefValue::get(II.getType());
|
|
|
|
|
|
|
|
// If we are inserting whole bytes, we can convert this to a shuffle.
|
|
|
|
// Lowering can recognize INSERTQI shuffle masks.
|
|
|
|
if ((Length % 8) == 0 && (Index % 8) == 0) {
|
|
|
|
// Convert bit indices to byte indices.
|
|
|
|
Length /= 8;
|
|
|
|
Index /= 8;
|
|
|
|
|
|
|
|
Type *IntTy8 = Type::getInt8Ty(II.getContext());
|
|
|
|
Type *IntTy32 = Type::getInt32Ty(II.getContext());
|
|
|
|
VectorType *ShufTy = VectorType::get(IntTy8, 16);
|
|
|
|
|
|
|
|
SmallVector<Constant *, 16> ShuffleMask;
|
|
|
|
for (int i = 0; i != (int)Index; ++i)
|
|
|
|
ShuffleMask.push_back(Constant::getIntegerValue(IntTy32, APInt(32, i)));
|
|
|
|
for (int i = 0; i != (int)Length; ++i)
|
|
|
|
ShuffleMask.push_back(
|
|
|
|
Constant::getIntegerValue(IntTy32, APInt(32, i + 16)));
|
|
|
|
for (int i = Index + Length; i != 8; ++i)
|
|
|
|
ShuffleMask.push_back(Constant::getIntegerValue(IntTy32, APInt(32, i)));
|
|
|
|
for (int i = 8; i != 16; ++i)
|
|
|
|
ShuffleMask.push_back(UndefValue::get(IntTy32));
|
|
|
|
|
|
|
|
Value *SV = Builder.CreateShuffleVector(Builder.CreateBitCast(Op0, ShufTy),
|
|
|
|
Builder.CreateBitCast(Op1, ShufTy),
|
|
|
|
ConstantVector::get(ShuffleMask));
|
|
|
|
return Builder.CreateBitCast(SV, II.getType());
|
|
|
|
}
|
|
|
|
|
|
|
|
// See if we're dealing with constant values.
|
|
|
|
Constant *C0 = dyn_cast<Constant>(Op0);
|
|
|
|
Constant *C1 = dyn_cast<Constant>(Op1);
|
|
|
|
ConstantInt *CI00 =
|
2016-09-07 20:47:53 +08:00
|
|
|
C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((unsigned)0))
|
2015-10-17 19:40:05 +08:00
|
|
|
: nullptr;
|
|
|
|
ConstantInt *CI10 =
|
2016-09-07 20:47:53 +08:00
|
|
|
C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)0))
|
2015-10-17 19:40:05 +08:00
|
|
|
: nullptr;
|
|
|
|
|
|
|
|
// Constant Fold - insert bottom Length bits starting at the Index'th bit.
|
|
|
|
if (CI00 && CI10) {
|
|
|
|
APInt V00 = CI00->getValue();
|
|
|
|
APInt V10 = CI10->getValue();
|
|
|
|
APInt Mask = APInt::getLowBitsSet(64, Length).shl(Index);
|
|
|
|
V00 = V00 & ~Mask;
|
|
|
|
V10 = V10.zextOrTrunc(Length).zextOrTrunc(64).shl(Index);
|
|
|
|
APInt Val = V00 | V10;
|
|
|
|
Type *IntTy64 = Type::getInt64Ty(II.getContext());
|
|
|
|
Constant *Args[] = {ConstantInt::get(IntTy64, Val.getZExtValue()),
|
|
|
|
UndefValue::get(IntTy64)};
|
|
|
|
return ConstantVector::get(Args);
|
|
|
|
}
|
|
|
|
|
|
|
|
// If we were an INSERTQ call, we'll save demanded elements if we convert to
|
|
|
|
// INSERTQI.
|
|
|
|
if (II.getIntrinsicID() == Intrinsic::x86_sse4a_insertq) {
|
|
|
|
Type *IntTy8 = Type::getInt8Ty(II.getContext());
|
|
|
|
Constant *CILength = ConstantInt::get(IntTy8, Length, false);
|
|
|
|
Constant *CIIndex = ConstantInt::get(IntTy8, Index, false);
|
|
|
|
|
|
|
|
Value *Args[] = {Op0, Op1, CILength, CIIndex};
|
2015-12-15 01:24:23 +08:00
|
|
|
Module *M = II.getModule();
|
2015-10-17 19:40:05 +08:00
|
|
|
Value *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi);
|
|
|
|
return Builder.CreateCall(F, Args);
|
|
|
|
}
|
|
|
|
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
2016-04-25 01:00:34 +08:00
|
|
|
/// Attempt to convert pshufb* to shufflevector if the mask is constant.
|
|
|
|
static Value *simplifyX86pshufb(const IntrinsicInst &II,
|
|
|
|
InstCombiner::BuilderTy &Builder) {
|
2016-04-30 05:34:54 +08:00
|
|
|
Constant *V = dyn_cast<Constant>(II.getArgOperand(1));
|
|
|
|
if (!V)
|
|
|
|
return nullptr;
|
|
|
|
|
2016-05-02 03:26:21 +08:00
|
|
|
auto *VecTy = cast<VectorType>(II.getType());
|
|
|
|
auto *MaskEltTy = Type::getInt32Ty(II.getContext());
|
|
|
|
unsigned NumElts = VecTy->getNumElements();
|
2016-12-11 08:23:50 +08:00
|
|
|
assert((NumElts == 16 || NumElts == 32 || NumElts == 64) &&
|
2016-04-25 01:00:34 +08:00
|
|
|
"Unexpected number of elements in shuffle mask!");
|
2016-04-30 05:34:54 +08:00
|
|
|
|
2016-05-02 03:26:21 +08:00
|
|
|
// Construct a shuffle mask from constant integers or UNDEFs.
|
2016-12-11 08:23:50 +08:00
|
|
|
Constant *Indexes[64] = {nullptr};
|
2016-04-25 01:00:34 +08:00
|
|
|
|
2016-04-30 05:34:54 +08:00
|
|
|
// Each byte in the shuffle control mask forms an index to permute the
|
|
|
|
// corresponding byte in the destination operand.
|
|
|
|
for (unsigned I = 0; I < NumElts; ++I) {
|
|
|
|
Constant *COp = V->getAggregateElement(I);
|
2016-05-02 03:26:21 +08:00
|
|
|
if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
|
2016-04-30 05:34:54 +08:00
|
|
|
return nullptr;
|
|
|
|
|
2016-05-02 03:26:21 +08:00
|
|
|
if (isa<UndefValue>(COp)) {
|
|
|
|
Indexes[I] = UndefValue::get(MaskEltTy);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2016-04-30 05:34:54 +08:00
|
|
|
int8_t Index = cast<ConstantInt>(COp)->getValue().getZExtValue();
|
|
|
|
|
|
|
|
// If the most significant bit (bit[7]) of each byte of the shuffle
|
|
|
|
// control mask is set, then zero is written in the result byte.
|
|
|
|
// The zero vector is in the right-hand side of the resulting
|
|
|
|
// shufflevector.
|
|
|
|
|
2016-05-02 03:26:21 +08:00
|
|
|
// The value of each index for the high 128-bit lane is the least
|
|
|
|
// significant 4 bits of the respective shuffle control byte.
|
|
|
|
Index = ((Index < 0) ? NumElts : Index & 0x0F) + (I & 0xF0);
|
|
|
|
Indexes[I] = ConstantInt::get(MaskEltTy, Index);
|
2016-04-30 05:34:54 +08:00
|
|
|
}
|
2016-04-25 01:00:34 +08:00
|
|
|
|
2016-05-02 03:26:21 +08:00
|
|
|
auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, NumElts));
|
2016-04-25 01:00:34 +08:00
|
|
|
auto V1 = II.getArgOperand(0);
|
2016-05-02 03:26:21 +08:00
|
|
|
auto V2 = Constant::getNullValue(VecTy);
|
2016-04-25 01:00:34 +08:00
|
|
|
return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
|
|
|
|
}
|
|
|
|
|
2016-04-25 01:23:46 +08:00
|
|
|
/// Attempt to convert vpermilvar* to shufflevector if the mask is constant.
|
|
|
|
static Value *simplifyX86vpermilvar(const IntrinsicInst &II,
|
|
|
|
InstCombiner::BuilderTy &Builder) {
|
2016-04-30 15:23:30 +08:00
|
|
|
Constant *V = dyn_cast<Constant>(II.getArgOperand(1));
|
|
|
|
if (!V)
|
|
|
|
return nullptr;
|
2016-04-25 01:23:46 +08:00
|
|
|
|
2016-12-11 09:59:36 +08:00
|
|
|
auto *VecTy = cast<VectorType>(II.getType());
|
2016-05-02 04:22:42 +08:00
|
|
|
auto *MaskEltTy = Type::getInt32Ty(II.getContext());
|
2016-12-11 09:59:36 +08:00
|
|
|
unsigned NumElts = VecTy->getVectorNumElements();
|
|
|
|
bool IsPD = VecTy->getScalarType()->isDoubleTy();
|
|
|
|
unsigned NumLaneElts = IsPD ? 2 : 4;
|
|
|
|
assert(NumElts == 16 || NumElts == 8 || NumElts == 4 || NumElts == 2);
|
2016-04-25 01:23:46 +08:00
|
|
|
|
2016-05-02 04:22:42 +08:00
|
|
|
// Construct a shuffle mask from constant integers or UNDEFs.
|
2016-12-11 09:59:36 +08:00
|
|
|
Constant *Indexes[16] = {nullptr};
|
2016-04-30 15:23:30 +08:00
|
|
|
|
|
|
|
// The intrinsics only read one or two bits, clear the rest.
|
2016-05-02 04:22:42 +08:00
|
|
|
for (unsigned I = 0; I < NumElts; ++I) {
|
2016-04-30 15:23:30 +08:00
|
|
|
Constant *COp = V->getAggregateElement(I);
|
2016-05-02 04:22:42 +08:00
|
|
|
if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
|
2016-04-30 15:23:30 +08:00
|
|
|
return nullptr;
|
|
|
|
|
2016-05-02 04:22:42 +08:00
|
|
|
if (isa<UndefValue>(COp)) {
|
|
|
|
Indexes[I] = UndefValue::get(MaskEltTy);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
APInt Index = cast<ConstantInt>(COp)->getValue();
|
|
|
|
Index = Index.zextOrTrunc(32).getLoBits(2);
|
2016-04-30 15:23:30 +08:00
|
|
|
|
|
|
|
// The PD variants uses bit 1 to select per-lane element index, so
|
|
|
|
// shift down to convert to generic shuffle mask index.
|
2016-12-11 09:59:36 +08:00
|
|
|
if (IsPD)
|
2016-05-02 04:22:42 +08:00
|
|
|
Index = Index.lshr(1);
|
|
|
|
|
|
|
|
// The _256 variants are a bit trickier since the mask bits always index
|
|
|
|
// into the corresponding 128 half. In order to convert to a generic
|
|
|
|
// shuffle, we have to make that explicit.
|
2016-12-11 09:59:36 +08:00
|
|
|
Index += APInt(32, (I / NumLaneElts) * NumLaneElts);
|
2016-04-25 01:23:46 +08:00
|
|
|
|
2016-05-02 04:22:42 +08:00
|
|
|
Indexes[I] = ConstantInt::get(MaskEltTy, Index);
|
2016-04-25 01:23:46 +08:00
|
|
|
}
|
|
|
|
|
2016-05-02 04:22:42 +08:00
|
|
|
auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, NumElts));
|
2016-04-25 01:23:46 +08:00
|
|
|
auto V1 = II.getArgOperand(0);
|
|
|
|
auto V2 = UndefValue::get(V1->getType());
|
|
|
|
return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
|
|
|
|
}
|
|
|
|
|
2016-05-02 00:41:22 +08:00
|
|
|
/// Attempt to convert vpermd/vpermps to shufflevector if the mask is constant.
|
|
|
|
static Value *simplifyX86vpermv(const IntrinsicInst &II,
|
|
|
|
InstCombiner::BuilderTy &Builder) {
|
|
|
|
auto *V = dyn_cast<Constant>(II.getArgOperand(1));
|
|
|
|
if (!V)
|
|
|
|
return nullptr;
|
|
|
|
|
2016-05-02 04:43:02 +08:00
|
|
|
auto *VecTy = cast<VectorType>(II.getType());
|
|
|
|
auto *MaskEltTy = Type::getInt32Ty(II.getContext());
|
2016-05-02 00:41:22 +08:00
|
|
|
unsigned Size = VecTy->getNumElements();
|
2016-12-26 07:58:57 +08:00
|
|
|
assert((Size == 4 || Size == 8 || Size == 16 || Size == 32 || Size == 64) &&
|
|
|
|
"Unexpected shuffle mask size");
|
2016-05-02 00:41:22 +08:00
|
|
|
|
2016-05-02 04:43:02 +08:00
|
|
|
// Construct a shuffle mask from constant integers or UNDEFs.
|
2016-12-26 07:58:57 +08:00
|
|
|
Constant *Indexes[64] = {nullptr};
|
2016-05-02 00:41:22 +08:00
|
|
|
|
|
|
|
for (unsigned I = 0; I < Size; ++I) {
|
|
|
|
Constant *COp = V->getAggregateElement(I);
|
2016-05-02 04:43:02 +08:00
|
|
|
if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
|
2016-05-02 00:41:22 +08:00
|
|
|
return nullptr;
|
|
|
|
|
2016-05-02 04:43:02 +08:00
|
|
|
if (isa<UndefValue>(COp)) {
|
|
|
|
Indexes[I] = UndefValue::get(MaskEltTy);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2016-12-26 07:58:57 +08:00
|
|
|
uint32_t Index = cast<ConstantInt>(COp)->getZExtValue();
|
|
|
|
Index &= Size - 1;
|
2016-05-02 04:43:02 +08:00
|
|
|
Indexes[I] = ConstantInt::get(MaskEltTy, Index);
|
2016-05-02 00:41:22 +08:00
|
|
|
}
|
|
|
|
|
2016-05-02 04:43:02 +08:00
|
|
|
auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, Size));
|
2016-05-02 00:41:22 +08:00
|
|
|
auto V1 = II.getArgOperand(0);
|
|
|
|
auto V2 = UndefValue::get(VecTy);
|
|
|
|
return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
|
|
|
|
}
|
|
|
|
|
2015-03-21 05:47:56 +08:00
|
|
|
/// The shuffle mask for a perm2*128 selects any two halves of two 256-bit
|
|
|
|
/// source vectors, unless a zero bit is set. If a zero bit is set,
|
|
|
|
/// then ignore that half of the mask and clear that half of the vector.
|
2016-01-30 07:27:03 +08:00
|
|
|
static Value *simplifyX86vperm2(const IntrinsicInst &II,
|
2015-03-21 05:47:56 +08:00
|
|
|
InstCombiner::BuilderTy &Builder) {
|
2016-01-28 08:03:16 +08:00
|
|
|
auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2));
|
|
|
|
if (!CInt)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
VectorType *VecTy = cast<VectorType>(II.getType());
|
|
|
|
ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy);
|
|
|
|
|
|
|
|
// The immediate permute control byte looks like this:
|
|
|
|
// [1:0] - select 128 bits from sources for low half of destination
|
|
|
|
// [2] - ignore
|
|
|
|
// [3] - zero low half of destination
|
|
|
|
// [5:4] - select 128 bits from sources for high half of destination
|
|
|
|
// [6] - ignore
|
|
|
|
// [7] - zero high half of destination
|
|
|
|
|
|
|
|
uint8_t Imm = CInt->getZExtValue();
|
|
|
|
|
|
|
|
bool LowHalfZero = Imm & 0x08;
|
|
|
|
bool HighHalfZero = Imm & 0x80;
|
|
|
|
|
|
|
|
// If both zero mask bits are set, this was just a weird way to
|
|
|
|
// generate a zero vector.
|
|
|
|
if (LowHalfZero && HighHalfZero)
|
|
|
|
return ZeroVector;
|
|
|
|
|
|
|
|
// If 0 or 1 zero mask bits are set, this is a simple shuffle.
|
|
|
|
unsigned NumElts = VecTy->getNumElements();
|
|
|
|
unsigned HalfSize = NumElts / 2;
|
2016-06-12 08:41:19 +08:00
|
|
|
SmallVector<uint32_t, 8> ShuffleMask(NumElts);
|
2016-01-28 08:03:16 +08:00
|
|
|
|
|
|
|
// The high bit of the selection field chooses the 1st or 2nd operand.
|
|
|
|
bool LowInputSelect = Imm & 0x02;
|
|
|
|
bool HighInputSelect = Imm & 0x20;
|
|
|
|
|
|
|
|
// The low bit of the selection field chooses the low or high half
|
|
|
|
// of the selected operand.
|
|
|
|
bool LowHalfSelect = Imm & 0x01;
|
|
|
|
bool HighHalfSelect = Imm & 0x10;
|
|
|
|
|
|
|
|
// Determine which operand(s) are actually in use for this instruction.
|
|
|
|
Value *V0 = LowInputSelect ? II.getArgOperand(1) : II.getArgOperand(0);
|
|
|
|
Value *V1 = HighInputSelect ? II.getArgOperand(1) : II.getArgOperand(0);
|
|
|
|
|
|
|
|
// If needed, replace operands based on zero mask.
|
|
|
|
V0 = LowHalfZero ? ZeroVector : V0;
|
|
|
|
V1 = HighHalfZero ? ZeroVector : V1;
|
|
|
|
|
|
|
|
// Permute low half of result.
|
|
|
|
unsigned StartIndex = LowHalfSelect ? HalfSize : 0;
|
|
|
|
for (unsigned i = 0; i < HalfSize; ++i)
|
|
|
|
ShuffleMask[i] = StartIndex + i;
|
|
|
|
|
|
|
|
// Permute high half of result.
|
|
|
|
StartIndex = HighHalfSelect ? HalfSize : 0;
|
|
|
|
StartIndex += NumElts;
|
|
|
|
for (unsigned i = 0; i < HalfSize; ++i)
|
|
|
|
ShuffleMask[i + HalfSize] = StartIndex + i;
|
|
|
|
|
|
|
|
return Builder.CreateShuffleVector(V0, V1, ShuffleMask);
|
2015-03-21 05:47:56 +08:00
|
|
|
}
|
|
|
|
|
2015-10-11 22:38:34 +08:00
|
|
|
/// Decode XOP integer vector comparison intrinsics.
|
2016-01-30 07:27:03 +08:00
|
|
|
static Value *simplifyX86vpcom(const IntrinsicInst &II,
|
2016-01-30 07:14:58 +08:00
|
|
|
InstCombiner::BuilderTy &Builder,
|
|
|
|
bool IsSigned) {
|
2015-10-11 22:38:34 +08:00
|
|
|
if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
|
|
|
|
uint64_t Imm = CInt->getZExtValue() & 0x7;
|
|
|
|
VectorType *VecTy = cast<VectorType>(II.getType());
|
|
|
|
CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
|
|
|
|
|
|
|
|
switch (Imm) {
|
|
|
|
case 0x0:
|
|
|
|
Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
|
|
|
|
break;
|
|
|
|
case 0x1:
|
|
|
|
Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
|
|
|
|
break;
|
|
|
|
case 0x2:
|
|
|
|
Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
|
|
|
|
break;
|
|
|
|
case 0x3:
|
|
|
|
Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
|
|
|
|
break;
|
|
|
|
case 0x4:
|
|
|
|
Pred = ICmpInst::ICMP_EQ; break;
|
|
|
|
case 0x5:
|
|
|
|
Pred = ICmpInst::ICMP_NE; break;
|
|
|
|
case 0x6:
|
|
|
|
return ConstantInt::getSigned(VecTy, 0); // FALSE
|
|
|
|
case 0x7:
|
|
|
|
return ConstantInt::getSigned(VecTy, -1); // TRUE
|
|
|
|
}
|
|
|
|
|
2016-01-30 07:14:58 +08:00
|
|
|
if (Value *Cmp = Builder.CreateICmp(Pred, II.getArgOperand(0),
|
|
|
|
II.getArgOperand(1)))
|
2015-10-11 22:38:34 +08:00
|
|
|
return Builder.CreateSExtOrTrunc(Cmp, VecTy);
|
|
|
|
}
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
2016-12-26 07:58:57 +08:00
|
|
|
// Emit a select instruction and appropriate bitcasts to help simplify
|
|
|
|
// masked intrinsics.
|
|
|
|
static Value *emitX86MaskSelect(Value *Mask, Value *Op0, Value *Op1,
|
|
|
|
InstCombiner::BuilderTy &Builder) {
|
2016-12-31 07:06:28 +08:00
|
|
|
unsigned VWidth = Op0->getType()->getVectorNumElements();
|
|
|
|
|
|
|
|
// If the mask is all ones we don't need the select. But we need to check
|
|
|
|
// only the bit thats will be used in case VWidth is less than 8.
|
|
|
|
if (auto *C = dyn_cast<ConstantInt>(Mask))
|
|
|
|
if (C->getValue().zextOrTrunc(VWidth).isAllOnesValue())
|
|
|
|
return Op0;
|
|
|
|
|
2016-12-26 07:58:57 +08:00
|
|
|
auto *MaskTy = VectorType::get(Builder.getInt1Ty(),
|
|
|
|
cast<IntegerType>(Mask->getType())->getBitWidth());
|
|
|
|
Mask = Builder.CreateBitCast(Mask, MaskTy);
|
|
|
|
|
|
|
|
// If we have less than 8 elements, then the starting mask was an i8 and
|
|
|
|
// we need to extract down to the right number of elements.
|
|
|
|
if (VWidth < 8) {
|
|
|
|
uint32_t Indices[4];
|
|
|
|
for (unsigned i = 0; i != VWidth; ++i)
|
|
|
|
Indices[i] = i;
|
|
|
|
Mask = Builder.CreateShuffleVector(Mask, Mask,
|
|
|
|
makeArrayRef(Indices, VWidth),
|
|
|
|
"extract");
|
|
|
|
}
|
|
|
|
|
|
|
|
return Builder.CreateSelect(Mask, Op0, Op1);
|
|
|
|
}
|
|
|
|
|
2016-02-01 00:35:23 +08:00
|
|
|
static Value *simplifyMinnumMaxnum(const IntrinsicInst &II) {
|
|
|
|
Value *Arg0 = II.getArgOperand(0);
|
|
|
|
Value *Arg1 = II.getArgOperand(1);
|
|
|
|
|
|
|
|
// fmin(x, x) -> x
|
|
|
|
if (Arg0 == Arg1)
|
|
|
|
return Arg0;
|
|
|
|
|
|
|
|
const auto *C1 = dyn_cast<ConstantFP>(Arg1);
|
|
|
|
|
|
|
|
// fmin(x, nan) -> x
|
|
|
|
if (C1 && C1->isNaN())
|
|
|
|
return Arg0;
|
|
|
|
|
|
|
|
// This is the value because if undef were NaN, we would return the other
|
|
|
|
// value and cannot return a NaN unless both operands are.
|
|
|
|
//
|
|
|
|
// fmin(undef, x) -> x
|
|
|
|
if (isa<UndefValue>(Arg0))
|
|
|
|
return Arg1;
|
|
|
|
|
|
|
|
// fmin(x, undef) -> x
|
|
|
|
if (isa<UndefValue>(Arg1))
|
|
|
|
return Arg0;
|
|
|
|
|
|
|
|
Value *X = nullptr;
|
|
|
|
Value *Y = nullptr;
|
|
|
|
if (II.getIntrinsicID() == Intrinsic::minnum) {
|
|
|
|
// fmin(x, fmin(x, y)) -> fmin(x, y)
|
|
|
|
// fmin(y, fmin(x, y)) -> fmin(x, y)
|
|
|
|
if (match(Arg1, m_FMin(m_Value(X), m_Value(Y)))) {
|
|
|
|
if (Arg0 == X || Arg0 == Y)
|
|
|
|
return Arg1;
|
|
|
|
}
|
|
|
|
|
|
|
|
// fmin(fmin(x, y), x) -> fmin(x, y)
|
|
|
|
// fmin(fmin(x, y), y) -> fmin(x, y)
|
|
|
|
if (match(Arg0, m_FMin(m_Value(X), m_Value(Y)))) {
|
|
|
|
if (Arg1 == X || Arg1 == Y)
|
|
|
|
return Arg0;
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO: fmin(nnan x, inf) -> x
|
|
|
|
// TODO: fmin(nnan ninf x, flt_max) -> x
|
|
|
|
if (C1 && C1->isInfinity()) {
|
|
|
|
// fmin(x, -inf) -> -inf
|
|
|
|
if (C1->isNegative())
|
|
|
|
return Arg1;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
assert(II.getIntrinsicID() == Intrinsic::maxnum);
|
|
|
|
// fmax(x, fmax(x, y)) -> fmax(x, y)
|
|
|
|
// fmax(y, fmax(x, y)) -> fmax(x, y)
|
|
|
|
if (match(Arg1, m_FMax(m_Value(X), m_Value(Y)))) {
|
|
|
|
if (Arg0 == X || Arg0 == Y)
|
|
|
|
return Arg1;
|
|
|
|
}
|
|
|
|
|
|
|
|
// fmax(fmax(x, y), x) -> fmax(x, y)
|
|
|
|
// fmax(fmax(x, y), y) -> fmax(x, y)
|
|
|
|
if (match(Arg0, m_FMax(m_Value(X), m_Value(Y)))) {
|
|
|
|
if (Arg1 == X || Arg1 == Y)
|
|
|
|
return Arg0;
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO: fmax(nnan x, -inf) -> x
|
|
|
|
// TODO: fmax(nnan ninf x, -flt_max) -> x
|
|
|
|
if (C1 && C1->isInfinity()) {
|
|
|
|
// fmax(x, inf) -> inf
|
|
|
|
if (!C1->isNegative())
|
|
|
|
return Arg1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
2016-07-14 14:58:42 +08:00
|
|
|
static bool maskIsAllOneOrUndef(Value *Mask) {
|
|
|
|
auto *ConstMask = dyn_cast<Constant>(Mask);
|
2016-02-02 01:00:10 +08:00
|
|
|
if (!ConstMask)
|
2016-07-14 14:58:42 +08:00
|
|
|
return false;
|
|
|
|
if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
|
|
|
|
return true;
|
|
|
|
for (unsigned I = 0, E = ConstMask->getType()->getVectorNumElements(); I != E;
|
|
|
|
++I) {
|
|
|
|
if (auto *MaskElt = ConstMask->getAggregateElement(I))
|
|
|
|
if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
|
|
|
|
continue;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
2016-02-02 01:00:10 +08:00
|
|
|
|
2016-07-14 14:58:42 +08:00
|
|
|
static Value *simplifyMaskedLoad(const IntrinsicInst &II,
|
|
|
|
InstCombiner::BuilderTy &Builder) {
|
|
|
|
// If the mask is all ones or undefs, this is a plain vector load of the 1st
|
|
|
|
// argument.
|
|
|
|
if (maskIsAllOneOrUndef(II.getArgOperand(2))) {
|
2016-02-02 01:00:10 +08:00
|
|
|
Value *LoadPtr = II.getArgOperand(0);
|
|
|
|
unsigned Alignment = cast<ConstantInt>(II.getArgOperand(1))->getZExtValue();
|
|
|
|
return Builder.CreateAlignedLoad(LoadPtr, Alignment, "unmaskedload");
|
|
|
|
}
|
|
|
|
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
2016-02-02 03:39:52 +08:00
|
|
|
static Instruction *simplifyMaskedStore(IntrinsicInst &II, InstCombiner &IC) {
|
|
|
|
auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
|
|
|
|
if (!ConstMask)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// If the mask is all zeros, this instruction does nothing.
|
|
|
|
if (ConstMask->isNullValue())
|
2016-02-02 06:23:39 +08:00
|
|
|
return IC.eraseInstFromFunction(II);
|
2016-02-02 03:39:52 +08:00
|
|
|
|
|
|
|
// If the mask is all ones, this is a plain vector store of the 1st argument.
|
|
|
|
if (ConstMask->isAllOnesValue()) {
|
|
|
|
Value *StorePtr = II.getArgOperand(1);
|
|
|
|
unsigned Alignment = cast<ConstantInt>(II.getArgOperand(2))->getZExtValue();
|
|
|
|
return new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);
|
|
|
|
}
|
|
|
|
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
2016-02-02 06:10:26 +08:00
|
|
|
static Instruction *simplifyMaskedGather(IntrinsicInst &II, InstCombiner &IC) {
|
|
|
|
// If the mask is all zeros, return the "passthru" argument of the gather.
|
|
|
|
auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
|
|
|
|
if (ConstMask && ConstMask->isNullValue())
|
2016-02-02 06:23:39 +08:00
|
|
|
return IC.replaceInstUsesWith(II, II.getArgOperand(3));
|
2016-02-02 06:10:26 +08:00
|
|
|
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
static Instruction *simplifyMaskedScatter(IntrinsicInst &II, InstCombiner &IC) {
|
|
|
|
// If the mask is all zeros, a scatter does nothing.
|
|
|
|
auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
|
|
|
|
if (ConstMask && ConstMask->isNullValue())
|
2016-02-02 06:23:39 +08:00
|
|
|
return IC.eraseInstFromFunction(II);
|
2016-02-02 06:10:26 +08:00
|
|
|
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
2016-08-19 04:43:50 +08:00
|
|
|
static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) {
|
|
|
|
assert((II.getIntrinsicID() == Intrinsic::cttz ||
|
|
|
|
II.getIntrinsicID() == Intrinsic::ctlz) &&
|
|
|
|
"Expected cttz or ctlz intrinsic");
|
2016-08-06 06:42:46 +08:00
|
|
|
Value *Op0 = II.getArgOperand(0);
|
|
|
|
// FIXME: Try to simplify vectors of integers.
|
|
|
|
auto *IT = dyn_cast<IntegerType>(Op0->getType());
|
|
|
|
if (!IT)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
unsigned BitWidth = IT->getBitWidth();
|
|
|
|
APInt KnownZero(BitWidth, 0);
|
|
|
|
APInt KnownOne(BitWidth, 0);
|
|
|
|
IC.computeKnownBits(Op0, KnownZero, KnownOne, 0, &II);
|
|
|
|
|
|
|
|
// Create a mask for bits above (ctlz) or below (cttz) the first known one.
|
|
|
|
bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
|
|
|
|
unsigned NumMaskBits = IsTZ ? KnownOne.countTrailingZeros()
|
|
|
|
: KnownOne.countLeadingZeros();
|
|
|
|
APInt Mask = IsTZ ? APInt::getLowBitsSet(BitWidth, NumMaskBits)
|
|
|
|
: APInt::getHighBitsSet(BitWidth, NumMaskBits);
|
|
|
|
|
|
|
|
// If all bits above (ctlz) or below (cttz) the first known one are known
|
|
|
|
// zero, this value is constant.
|
|
|
|
// FIXME: This should be in InstSimplify because we're replacing an
|
|
|
|
// instruction with a constant.
|
2016-08-19 04:43:50 +08:00
|
|
|
if ((Mask & KnownZero) == Mask) {
|
|
|
|
auto *C = ConstantInt::get(IT, APInt(BitWidth, NumMaskBits));
|
|
|
|
return IC.replaceInstUsesWith(II, C);
|
|
|
|
}
|
|
|
|
|
|
|
|
// If the input to cttz/ctlz is known to be non-zero,
|
|
|
|
// then change the 'ZeroIsUndef' parameter to 'true'
|
|
|
|
// because we know the zero behavior can't affect the result.
|
|
|
|
if (KnownOne != 0 || isKnownNonZero(Op0, IC.getDataLayout())) {
|
|
|
|
if (!match(II.getArgOperand(1), m_One())) {
|
|
|
|
II.setOperand(1, IC.Builder->getTrue());
|
|
|
|
return &II;
|
|
|
|
}
|
|
|
|
}
|
2016-08-06 06:42:46 +08:00
|
|
|
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
2016-03-01 07:16:48 +08:00
|
|
|
// TODO: If the x86 backend knew how to convert a bool vector mask back to an
|
|
|
|
// XMM register mask efficiently, we could transform all x86 masked intrinsics
|
|
|
|
// to LLVM masked intrinsics and remove the x86 masked intrinsic defs.
|
|
|
|
static Instruction *simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC) {
|
|
|
|
Value *Ptr = II.getOperand(0);
|
|
|
|
Value *Mask = II.getOperand(1);
|
2016-04-13 07:16:23 +08:00
|
|
|
Constant *ZeroVec = Constant::getNullValue(II.getType());
|
2016-03-01 07:16:48 +08:00
|
|
|
|
|
|
|
// Special case a zero mask since that's not a ConstantDataVector.
|
2016-04-13 07:16:23 +08:00
|
|
|
// This masked load instruction creates a zero vector.
|
2016-03-01 07:16:48 +08:00
|
|
|
if (isa<ConstantAggregateZero>(Mask))
|
2016-04-13 07:16:23 +08:00
|
|
|
return IC.replaceInstUsesWith(II, ZeroVec);
|
2016-03-01 07:16:48 +08:00
|
|
|
|
|
|
|
auto *ConstMask = dyn_cast<ConstantDataVector>(Mask);
|
|
|
|
if (!ConstMask)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// The mask is constant. Convert this x86 intrinsic to the LLVM instrinsic
|
|
|
|
// to allow target-independent optimizations.
|
|
|
|
|
|
|
|
// First, cast the x86 intrinsic scalar pointer to a vector pointer to match
|
|
|
|
// the LLVM intrinsic definition for the pointer argument.
|
|
|
|
unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace();
|
|
|
|
PointerType *VecPtrTy = PointerType::get(II.getType(), AddrSpace);
|
|
|
|
Value *PtrCast = IC.Builder->CreateBitCast(Ptr, VecPtrTy, "castvec");
|
|
|
|
|
|
|
|
// Second, convert the x86 XMM integer vector mask to a vector of bools based
|
|
|
|
// on each element's most significant bit (the sign bit).
|
|
|
|
Constant *BoolMask = getNegativeIsTrueBoolVec(ConstMask);
|
|
|
|
|
2016-04-13 07:16:23 +08:00
|
|
|
// The pass-through vector for an x86 masked load is a zero vector.
|
|
|
|
CallInst *NewMaskedLoad =
|
|
|
|
IC.Builder->CreateMaskedLoad(PtrCast, 1, BoolMask, ZeroVec);
|
2016-03-01 07:16:48 +08:00
|
|
|
return IC.replaceInstUsesWith(II, NewMaskedLoad);
|
|
|
|
}
|
|
|
|
|
[x86, InstCombine] transform x86 AVX masked stores to LLVM intrinsics
The intended effect of this patch in conjunction with:
http://reviews.llvm.org/rL259392
http://reviews.llvm.org/rL260145
is that customers using the AVX intrinsics in C will benefit from combines when
the store mask is constant:
void mstore_zero_mask(float *f, __m128 v) {
_mm_maskstore_ps(f, _mm_set1_epi32(0), v);
}
void mstore_fake_ones_mask(float *f, __m128 v) {
_mm_maskstore_ps(f, _mm_set1_epi32(1), v);
}
void mstore_ones_mask(float *f, __m128 v) {
_mm_maskstore_ps(f, _mm_set1_epi32(0x80000000), v);
}
void mstore_one_set_elt_mask(float *f, __m128 v) {
_mm_maskstore_ps(f, _mm_set_epi32(0x80000000, 0, 0, 0), v);
}
...so none of the above will actually generate a masked store for optimized code.
Differential Revision: http://reviews.llvm.org/D17485
llvm-svn: 262064
2016-02-27 05:04:14 +08:00
|
|
|
// TODO: If the x86 backend knew how to convert a bool vector mask back to an
|
|
|
|
// XMM register mask efficiently, we could transform all x86 masked intrinsics
|
|
|
|
// to LLVM masked intrinsics and remove the x86 masked intrinsic defs.
|
|
|
|
static bool simplifyX86MaskedStore(IntrinsicInst &II, InstCombiner &IC) {
|
|
|
|
Value *Ptr = II.getOperand(0);
|
|
|
|
Value *Mask = II.getOperand(1);
|
|
|
|
Value *Vec = II.getOperand(2);
|
|
|
|
|
|
|
|
// Special case a zero mask since that's not a ConstantDataVector:
|
|
|
|
// this masked store instruction does nothing.
|
|
|
|
if (isa<ConstantAggregateZero>(Mask)) {
|
|
|
|
IC.eraseInstFromFunction(II);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2016-03-12 23:16:59 +08:00
|
|
|
// The SSE2 version is too weird (eg, unaligned but non-temporal) to do
|
|
|
|
// anything else at this level.
|
|
|
|
if (II.getIntrinsicID() == Intrinsic::x86_sse2_maskmov_dqu)
|
|
|
|
return false;
|
|
|
|
|
[x86, InstCombine] transform x86 AVX masked stores to LLVM intrinsics
The intended effect of this patch in conjunction with:
http://reviews.llvm.org/rL259392
http://reviews.llvm.org/rL260145
is that customers using the AVX intrinsics in C will benefit from combines when
the store mask is constant:
void mstore_zero_mask(float *f, __m128 v) {
_mm_maskstore_ps(f, _mm_set1_epi32(0), v);
}
void mstore_fake_ones_mask(float *f, __m128 v) {
_mm_maskstore_ps(f, _mm_set1_epi32(1), v);
}
void mstore_ones_mask(float *f, __m128 v) {
_mm_maskstore_ps(f, _mm_set1_epi32(0x80000000), v);
}
void mstore_one_set_elt_mask(float *f, __m128 v) {
_mm_maskstore_ps(f, _mm_set_epi32(0x80000000, 0, 0, 0), v);
}
...so none of the above will actually generate a masked store for optimized code.
Differential Revision: http://reviews.llvm.org/D17485
llvm-svn: 262064
2016-02-27 05:04:14 +08:00
|
|
|
auto *ConstMask = dyn_cast<ConstantDataVector>(Mask);
|
|
|
|
if (!ConstMask)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// The mask is constant. Convert this x86 intrinsic to the LLVM instrinsic
|
|
|
|
// to allow target-independent optimizations.
|
|
|
|
|
|
|
|
// First, cast the x86 intrinsic scalar pointer to a vector pointer to match
|
|
|
|
// the LLVM intrinsic definition for the pointer argument.
|
|
|
|
unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace();
|
|
|
|
PointerType *VecPtrTy = PointerType::get(Vec->getType(), AddrSpace);
|
|
|
|
Value *PtrCast = IC.Builder->CreateBitCast(Ptr, VecPtrTy, "castvec");
|
|
|
|
|
|
|
|
// Second, convert the x86 XMM integer vector mask to a vector of bools based
|
|
|
|
// on each element's most significant bit (the sign bit).
|
|
|
|
Constant *BoolMask = getNegativeIsTrueBoolVec(ConstMask);
|
|
|
|
|
|
|
|
IC.Builder->CreateMaskedStore(Vec, PtrCast, 1, BoolMask);
|
|
|
|
|
|
|
|
// 'Replace uses' doesn't work for stores. Erase the original masked store.
|
|
|
|
IC.eraseInstFromFunction(II);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2017-02-28 07:08:49 +08:00
|
|
|
// Constant fold llvm.amdgcn.fmed3 intrinsics for standard inputs.
|
|
|
|
//
|
|
|
|
// A single NaN input is folded to minnum, so we rely on that folding for
|
|
|
|
// handling NaNs.
|
|
|
|
static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1,
|
|
|
|
const APFloat &Src2) {
|
|
|
|
APFloat Max3 = maxnum(maxnum(Src0, Src1), Src2);
|
|
|
|
|
|
|
|
APFloat::cmpResult Cmp0 = Max3.compare(Src0);
|
|
|
|
assert(Cmp0 != APFloat::cmpUnordered && "nans handled separately");
|
|
|
|
if (Cmp0 == APFloat::cmpEqual)
|
|
|
|
return maxnum(Src1, Src2);
|
|
|
|
|
|
|
|
APFloat::cmpResult Cmp1 = Max3.compare(Src1);
|
|
|
|
assert(Cmp1 != APFloat::cmpUnordered && "nans handled separately");
|
|
|
|
if (Cmp1 == APFloat::cmpEqual)
|
|
|
|
return maxnum(Src0, Src2);
|
|
|
|
|
|
|
|
return maxnum(Src0, Src1);
|
|
|
|
}
|
|
|
|
|
2016-05-10 17:24:49 +08:00
|
|
|
// Returns true iff the 2 intrinsics have the same operands, limiting the
|
|
|
|
// comparison to the first NumOperands.
|
|
|
|
static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,
|
|
|
|
unsigned NumOperands) {
|
|
|
|
assert(I.getNumArgOperands() >= NumOperands && "Not enough operands");
|
|
|
|
assert(E.getNumArgOperands() >= NumOperands && "Not enough operands");
|
|
|
|
for (unsigned i = 0; i < NumOperands; i++)
|
|
|
|
if (I.getArgOperand(i) != E.getArgOperand(i))
|
|
|
|
return false;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Remove trivially empty start/end intrinsic ranges, i.e. a start
|
|
|
|
// immediately followed by an end (ignoring debuginfo or other
|
|
|
|
// start/end intrinsics in between). As this handles only the most trivial
|
|
|
|
// cases, tracking the nesting level is not needed:
|
|
|
|
//
|
|
|
|
// call @llvm.foo.start(i1 0) ; &I
|
|
|
|
// call @llvm.foo.start(i1 0)
|
|
|
|
// call @llvm.foo.end(i1 0) ; This one will not be skipped: it will be removed
|
|
|
|
// call @llvm.foo.end(i1 0)
|
|
|
|
static bool removeTriviallyEmptyRange(IntrinsicInst &I, unsigned StartID,
|
|
|
|
unsigned EndID, InstCombiner &IC) {
|
|
|
|
assert(I.getIntrinsicID() == StartID &&
|
|
|
|
"Start intrinsic does not have expected ID");
|
|
|
|
BasicBlock::iterator BI(I), BE(I.getParent()->end());
|
|
|
|
for (++BI; BI != BE; ++BI) {
|
|
|
|
if (auto *E = dyn_cast<IntrinsicInst>(BI)) {
|
|
|
|
if (isa<DbgInfoIntrinsic>(E) || E->getIntrinsicID() == StartID)
|
|
|
|
continue;
|
|
|
|
if (E->getIntrinsicID() == EndID &&
|
|
|
|
haveSameOperands(I, *E, E->getNumArgOperands())) {
|
|
|
|
IC.eraseInstFromFunction(*E);
|
|
|
|
IC.eraseInstFromFunction(I);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
[NVPTX] Upgrade NVVM intrinsics in InstCombineCalls.
Summary:
There are many NVVM intrinsics that we can't entirely get rid of, but
that nonetheless often correspond to target-generic LLVM intrinsics.
For example, if flush denormals to zero (ftz) is enabled, we can convert
@llvm.nvvm.ceil.ftz.f to @llvm.ceil.f32. On the other hand, if ftz is
disabled, we can't do this, because @llvm.ceil.f32 will be lowered to a
non-ftz PTX instruction. In this case, we can, however, simplify the
non-ftz nvvm ceil intrinsic, @llvm.nvvm.ceil.f, to @llvm.ceil.f32.
These transformations are particularly useful because they let us
constant fold instructions that appear in libdevice, the bitcode library
that ships with CUDA and essentially functions as its libm.
Reviewers: tra
Subscribers: hfinkel, majnemer, llvm-commits
Differential Revision: https://reviews.llvm.org/D28794
llvm-svn: 293244
2017-01-27 08:58:58 +08:00
|
|
|
// Convert NVVM intrinsics to target-generic LLVM code where possible.
|
|
|
|
static Instruction *SimplifyNVVMIntrinsic(IntrinsicInst *II, InstCombiner &IC) {
|
|
|
|
// Each NVVM intrinsic we can simplify can be replaced with one of:
|
|
|
|
//
|
|
|
|
// * an LLVM intrinsic,
|
|
|
|
// * an LLVM cast operation,
|
|
|
|
// * an LLVM binary operation, or
|
|
|
|
// * ad-hoc LLVM IR for the particular operation.
|
|
|
|
|
|
|
|
// Some transformations are only valid when the module's
|
|
|
|
// flush-denormals-to-zero (ftz) setting is true/false, whereas other
|
|
|
|
// transformations are valid regardless of the module's ftz setting.
|
|
|
|
enum FtzRequirementTy {
|
|
|
|
FTZ_Any, // Any ftz setting is ok.
|
|
|
|
FTZ_MustBeOn, // Transformation is valid only if ftz is on.
|
|
|
|
FTZ_MustBeOff, // Transformation is valid only if ftz is off.
|
|
|
|
};
|
|
|
|
// Classes of NVVM intrinsics that can't be replaced one-to-one with a
|
|
|
|
// target-generic intrinsic, cast op, or binary op but that we can nonetheless
|
|
|
|
// simplify.
|
|
|
|
enum SpecialCase {
|
|
|
|
SPC_Reciprocal,
|
|
|
|
};
|
|
|
|
|
|
|
|
// SimplifyAction is a poor-man's variant (plus an additional flag) that
|
|
|
|
// represents how to replace an NVVM intrinsic with target-generic LLVM IR.
|
|
|
|
struct SimplifyAction {
|
|
|
|
// Invariant: At most one of these Optionals has a value.
|
|
|
|
Optional<Intrinsic::ID> IID;
|
|
|
|
Optional<Instruction::CastOps> CastOp;
|
|
|
|
Optional<Instruction::BinaryOps> BinaryOp;
|
|
|
|
Optional<SpecialCase> Special;
|
|
|
|
|
|
|
|
FtzRequirementTy FtzRequirement = FTZ_Any;
|
|
|
|
|
|
|
|
SimplifyAction() = default;
|
|
|
|
|
|
|
|
SimplifyAction(Intrinsic::ID IID, FtzRequirementTy FtzReq)
|
|
|
|
: IID(IID), FtzRequirement(FtzReq) {}
|
|
|
|
|
|
|
|
// Cast operations don't have anything to do with FTZ, so we skip that
|
|
|
|
// argument.
|
|
|
|
SimplifyAction(Instruction::CastOps CastOp) : CastOp(CastOp) {}
|
|
|
|
|
|
|
|
SimplifyAction(Instruction::BinaryOps BinaryOp, FtzRequirementTy FtzReq)
|
|
|
|
: BinaryOp(BinaryOp), FtzRequirement(FtzReq) {}
|
|
|
|
|
|
|
|
SimplifyAction(SpecialCase Special, FtzRequirementTy FtzReq)
|
|
|
|
: Special(Special), FtzRequirement(FtzReq) {}
|
|
|
|
};
|
|
|
|
|
|
|
|
// Try to generate a SimplifyAction describing how to replace our
|
|
|
|
// IntrinsicInstr with target-generic LLVM IR.
|
|
|
|
const SimplifyAction Action = [II]() -> SimplifyAction {
|
|
|
|
switch (II->getIntrinsicID()) {
|
|
|
|
|
|
|
|
// NVVM intrinsics that map directly to LLVM intrinsics.
|
|
|
|
case Intrinsic::nvvm_ceil_d:
|
|
|
|
return {Intrinsic::ceil, FTZ_Any};
|
|
|
|
case Intrinsic::nvvm_ceil_f:
|
|
|
|
return {Intrinsic::ceil, FTZ_MustBeOff};
|
|
|
|
case Intrinsic::nvvm_ceil_ftz_f:
|
|
|
|
return {Intrinsic::ceil, FTZ_MustBeOn};
|
|
|
|
case Intrinsic::nvvm_fabs_d:
|
|
|
|
return {Intrinsic::fabs, FTZ_Any};
|
|
|
|
case Intrinsic::nvvm_fabs_f:
|
|
|
|
return {Intrinsic::fabs, FTZ_MustBeOff};
|
|
|
|
case Intrinsic::nvvm_fabs_ftz_f:
|
|
|
|
return {Intrinsic::fabs, FTZ_MustBeOn};
|
|
|
|
case Intrinsic::nvvm_floor_d:
|
|
|
|
return {Intrinsic::floor, FTZ_Any};
|
|
|
|
case Intrinsic::nvvm_floor_f:
|
|
|
|
return {Intrinsic::floor, FTZ_MustBeOff};
|
|
|
|
case Intrinsic::nvvm_floor_ftz_f:
|
|
|
|
return {Intrinsic::floor, FTZ_MustBeOn};
|
|
|
|
case Intrinsic::nvvm_fma_rn_d:
|
|
|
|
return {Intrinsic::fma, FTZ_Any};
|
|
|
|
case Intrinsic::nvvm_fma_rn_f:
|
|
|
|
return {Intrinsic::fma, FTZ_MustBeOff};
|
|
|
|
case Intrinsic::nvvm_fma_rn_ftz_f:
|
|
|
|
return {Intrinsic::fma, FTZ_MustBeOn};
|
|
|
|
case Intrinsic::nvvm_fmax_d:
|
|
|
|
return {Intrinsic::maxnum, FTZ_Any};
|
|
|
|
case Intrinsic::nvvm_fmax_f:
|
|
|
|
return {Intrinsic::maxnum, FTZ_MustBeOff};
|
|
|
|
case Intrinsic::nvvm_fmax_ftz_f:
|
|
|
|
return {Intrinsic::maxnum, FTZ_MustBeOn};
|
|
|
|
case Intrinsic::nvvm_fmin_d:
|
|
|
|
return {Intrinsic::minnum, FTZ_Any};
|
|
|
|
case Intrinsic::nvvm_fmin_f:
|
|
|
|
return {Intrinsic::minnum, FTZ_MustBeOff};
|
|
|
|
case Intrinsic::nvvm_fmin_ftz_f:
|
|
|
|
return {Intrinsic::minnum, FTZ_MustBeOn};
|
|
|
|
case Intrinsic::nvvm_round_d:
|
|
|
|
return {Intrinsic::round, FTZ_Any};
|
|
|
|
case Intrinsic::nvvm_round_f:
|
|
|
|
return {Intrinsic::round, FTZ_MustBeOff};
|
|
|
|
case Intrinsic::nvvm_round_ftz_f:
|
|
|
|
return {Intrinsic::round, FTZ_MustBeOn};
|
|
|
|
case Intrinsic::nvvm_sqrt_rn_d:
|
|
|
|
return {Intrinsic::sqrt, FTZ_Any};
|
|
|
|
case Intrinsic::nvvm_sqrt_f:
|
|
|
|
// nvvm_sqrt_f is a special case. For most intrinsics, foo_ftz_f is the
|
|
|
|
// ftz version, and foo_f is the non-ftz version. But nvvm_sqrt_f adopts
|
|
|
|
// the ftz-ness of the surrounding code. sqrt_rn_f and sqrt_rn_ftz_f are
|
|
|
|
// the versions with explicit ftz-ness.
|
|
|
|
return {Intrinsic::sqrt, FTZ_Any};
|
|
|
|
case Intrinsic::nvvm_sqrt_rn_f:
|
|
|
|
return {Intrinsic::sqrt, FTZ_MustBeOff};
|
|
|
|
case Intrinsic::nvvm_sqrt_rn_ftz_f:
|
|
|
|
return {Intrinsic::sqrt, FTZ_MustBeOn};
|
|
|
|
case Intrinsic::nvvm_trunc_d:
|
|
|
|
return {Intrinsic::trunc, FTZ_Any};
|
|
|
|
case Intrinsic::nvvm_trunc_f:
|
|
|
|
return {Intrinsic::trunc, FTZ_MustBeOff};
|
|
|
|
case Intrinsic::nvvm_trunc_ftz_f:
|
|
|
|
return {Intrinsic::trunc, FTZ_MustBeOn};
|
|
|
|
|
|
|
|
// NVVM intrinsics that map to LLVM cast operations.
|
|
|
|
//
|
|
|
|
// Note that llvm's target-generic conversion operators correspond to the rz
|
|
|
|
// (round to zero) versions of the nvvm conversion intrinsics, even though
|
|
|
|
// most everything else here uses the rn (round to nearest even) nvvm ops.
|
|
|
|
case Intrinsic::nvvm_d2i_rz:
|
|
|
|
case Intrinsic::nvvm_f2i_rz:
|
|
|
|
case Intrinsic::nvvm_d2ll_rz:
|
|
|
|
case Intrinsic::nvvm_f2ll_rz:
|
|
|
|
return {Instruction::FPToSI};
|
|
|
|
case Intrinsic::nvvm_d2ui_rz:
|
|
|
|
case Intrinsic::nvvm_f2ui_rz:
|
|
|
|
case Intrinsic::nvvm_d2ull_rz:
|
|
|
|
case Intrinsic::nvvm_f2ull_rz:
|
|
|
|
return {Instruction::FPToUI};
|
|
|
|
case Intrinsic::nvvm_i2d_rz:
|
|
|
|
case Intrinsic::nvvm_i2f_rz:
|
|
|
|
case Intrinsic::nvvm_ll2d_rz:
|
|
|
|
case Intrinsic::nvvm_ll2f_rz:
|
|
|
|
return {Instruction::SIToFP};
|
|
|
|
case Intrinsic::nvvm_ui2d_rz:
|
|
|
|
case Intrinsic::nvvm_ui2f_rz:
|
|
|
|
case Intrinsic::nvvm_ull2d_rz:
|
|
|
|
case Intrinsic::nvvm_ull2f_rz:
|
|
|
|
return {Instruction::UIToFP};
|
|
|
|
|
|
|
|
// NVVM intrinsics that map to LLVM binary ops.
|
|
|
|
case Intrinsic::nvvm_add_rn_d:
|
|
|
|
return {Instruction::FAdd, FTZ_Any};
|
|
|
|
case Intrinsic::nvvm_add_rn_f:
|
|
|
|
return {Instruction::FAdd, FTZ_MustBeOff};
|
|
|
|
case Intrinsic::nvvm_add_rn_ftz_f:
|
|
|
|
return {Instruction::FAdd, FTZ_MustBeOn};
|
|
|
|
case Intrinsic::nvvm_mul_rn_d:
|
|
|
|
return {Instruction::FMul, FTZ_Any};
|
|
|
|
case Intrinsic::nvvm_mul_rn_f:
|
|
|
|
return {Instruction::FMul, FTZ_MustBeOff};
|
|
|
|
case Intrinsic::nvvm_mul_rn_ftz_f:
|
|
|
|
return {Instruction::FMul, FTZ_MustBeOn};
|
|
|
|
case Intrinsic::nvvm_div_rn_d:
|
|
|
|
return {Instruction::FDiv, FTZ_Any};
|
|
|
|
case Intrinsic::nvvm_div_rn_f:
|
|
|
|
return {Instruction::FDiv, FTZ_MustBeOff};
|
|
|
|
case Intrinsic::nvvm_div_rn_ftz_f:
|
|
|
|
return {Instruction::FDiv, FTZ_MustBeOn};
|
|
|
|
|
|
|
|
// The remainder of cases are NVVM intrinsics that map to LLVM idioms, but
|
|
|
|
// need special handling.
|
|
|
|
//
|
|
|
|
// We seem to be mising intrinsics for rcp.approx.{ftz.}f32, which is just
|
|
|
|
// as well.
|
|
|
|
case Intrinsic::nvvm_rcp_rn_d:
|
|
|
|
return {SPC_Reciprocal, FTZ_Any};
|
|
|
|
case Intrinsic::nvvm_rcp_rn_f:
|
|
|
|
return {SPC_Reciprocal, FTZ_MustBeOff};
|
|
|
|
case Intrinsic::nvvm_rcp_rn_ftz_f:
|
|
|
|
return {SPC_Reciprocal, FTZ_MustBeOn};
|
|
|
|
|
|
|
|
// We do not currently simplify intrinsics that give an approximate answer.
|
|
|
|
// These include:
|
|
|
|
//
|
|
|
|
// - nvvm_cos_approx_{f,ftz_f}
|
|
|
|
// - nvvm_ex2_approx_{d,f,ftz_f}
|
|
|
|
// - nvvm_lg2_approx_{d,f,ftz_f}
|
|
|
|
// - nvvm_sin_approx_{f,ftz_f}
|
|
|
|
// - nvvm_sqrt_approx_{f,ftz_f}
|
|
|
|
// - nvvm_rsqrt_approx_{d,f,ftz_f}
|
|
|
|
// - nvvm_div_approx_{ftz_d,ftz_f,f}
|
|
|
|
// - nvvm_rcp_approx_ftz_d
|
|
|
|
//
|
|
|
|
// Ideally we'd encode them as e.g. "fast call @llvm.cos", where "fast"
|
|
|
|
// means that fastmath is enabled in the intrinsic. Unfortunately only
|
|
|
|
// binary operators (currently) have a fastmath bit in SelectionDAG, so this
|
|
|
|
// information gets lost and we can't select on it.
|
|
|
|
//
|
|
|
|
// TODO: div and rcp are lowered to a binary op, so these we could in theory
|
|
|
|
// lower them to "fast fdiv".
|
|
|
|
|
|
|
|
default:
|
|
|
|
return {};
|
|
|
|
}
|
|
|
|
}();
|
|
|
|
|
|
|
|
// If Action.FtzRequirementTy is not satisfied by the module's ftz state, we
|
|
|
|
// can bail out now. (Notice that in the case that IID is not an NVVM
|
|
|
|
// intrinsic, we don't have to look up any module metadata, as
|
|
|
|
// FtzRequirementTy will be FTZ_Any.)
|
|
|
|
if (Action.FtzRequirement != FTZ_Any) {
|
|
|
|
bool FtzEnabled =
|
|
|
|
II->getFunction()->getFnAttribute("nvptx-f32ftz").getValueAsString() ==
|
|
|
|
"true";
|
|
|
|
|
|
|
|
if (FtzEnabled != (Action.FtzRequirement == FTZ_MustBeOn))
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Simplify to target-generic intrinsic.
|
|
|
|
if (Action.IID) {
|
|
|
|
SmallVector<Value *, 4> Args(II->arg_operands());
|
|
|
|
// All the target-generic intrinsics currently of interest to us have one
|
|
|
|
// type argument, equal to that of the nvvm intrinsic's argument.
|
2017-01-27 09:49:39 +08:00
|
|
|
Type *Tys[] = {II->getArgOperand(0)->getType()};
|
[NVPTX] Upgrade NVVM intrinsics in InstCombineCalls.
Summary:
There are many NVVM intrinsics that we can't entirely get rid of, but
that nonetheless often correspond to target-generic LLVM intrinsics.
For example, if flush denormals to zero (ftz) is enabled, we can convert
@llvm.nvvm.ceil.ftz.f to @llvm.ceil.f32. On the other hand, if ftz is
disabled, we can't do this, because @llvm.ceil.f32 will be lowered to a
non-ftz PTX instruction. In this case, we can, however, simplify the
non-ftz nvvm ceil intrinsic, @llvm.nvvm.ceil.f, to @llvm.ceil.f32.
These transformations are particularly useful because they let us
constant fold instructions that appear in libdevice, the bitcode library
that ships with CUDA and essentially functions as its libm.
Reviewers: tra
Subscribers: hfinkel, majnemer, llvm-commits
Differential Revision: https://reviews.llvm.org/D28794
llvm-svn: 293244
2017-01-27 08:58:58 +08:00
|
|
|
return CallInst::Create(
|
|
|
|
Intrinsic::getDeclaration(II->getModule(), *Action.IID, Tys), Args);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Simplify to target-generic binary op.
|
|
|
|
if (Action.BinaryOp)
|
|
|
|
return BinaryOperator::Create(*Action.BinaryOp, II->getArgOperand(0),
|
|
|
|
II->getArgOperand(1), II->getName());
|
|
|
|
|
|
|
|
// Simplify to target-generic cast op.
|
|
|
|
if (Action.CastOp)
|
|
|
|
return CastInst::Create(*Action.CastOp, II->getArgOperand(0), II->getType(),
|
|
|
|
II->getName());
|
|
|
|
|
|
|
|
// All that's left are the special cases.
|
|
|
|
if (!Action.Special)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
switch (*Action.Special) {
|
|
|
|
case SPC_Reciprocal:
|
|
|
|
// Simplify reciprocal.
|
|
|
|
return BinaryOperator::Create(
|
|
|
|
Instruction::FDiv, ConstantFP::get(II->getArgOperand(0)->getType(), 1),
|
|
|
|
II->getArgOperand(0), II->getName());
|
|
|
|
}
|
2017-01-27 10:04:07 +08:00
|
|
|
llvm_unreachable("All SpecialCase enumerators should be handled in switch.");
|
[NVPTX] Upgrade NVVM intrinsics in InstCombineCalls.
Summary:
There are many NVVM intrinsics that we can't entirely get rid of, but
that nonetheless often correspond to target-generic LLVM intrinsics.
For example, if flush denormals to zero (ftz) is enabled, we can convert
@llvm.nvvm.ceil.ftz.f to @llvm.ceil.f32. On the other hand, if ftz is
disabled, we can't do this, because @llvm.ceil.f32 will be lowered to a
non-ftz PTX instruction. In this case, we can, however, simplify the
non-ftz nvvm ceil intrinsic, @llvm.nvvm.ceil.f, to @llvm.ceil.f32.
These transformations are particularly useful because they let us
constant fold instructions that appear in libdevice, the bitcode library
that ships with CUDA and essentially functions as its libm.
Reviewers: tra
Subscribers: hfinkel, majnemer, llvm-commits
Differential Revision: https://reviews.llvm.org/D28794
llvm-svn: 293244
2017-01-27 08:58:58 +08:00
|
|
|
}
|
|
|
|
|
2016-05-10 17:24:49 +08:00
|
|
|
Instruction *InstCombiner::visitVAStartInst(VAStartInst &I) {
|
|
|
|
removeTriviallyEmptyRange(I, Intrinsic::vastart, Intrinsic::vaend, *this);
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
Instruction *InstCombiner::visitVACopyInst(VACopyInst &I) {
|
|
|
|
removeTriviallyEmptyRange(I, Intrinsic::vacopy, Intrinsic::vaend, *this);
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
2016-01-21 06:24:38 +08:00
|
|
|
/// CallInst simplification. This mostly only handles folding of intrinsic
|
|
|
|
/// instructions. For normal calls, it allows visitCallSite to do the heavy
|
|
|
|
/// lifting.
|
2010-01-05 15:32:13 +08:00
|
|
|
Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
2015-05-22 11:56:46 +08:00
|
|
|
auto Args = CI.arg_operands();
|
|
|
|
if (Value *V = SimplifyCall(CI.getCalledValue(), Args.begin(), Args.end(), DL,
|
2016-12-19 16:22:17 +08:00
|
|
|
&TLI, &DT, &AC))
|
2016-02-02 06:23:39 +08:00
|
|
|
return replaceInstUsesWith(CI, V);
|
2015-05-22 11:56:46 +08:00
|
|
|
|
2016-08-05 09:06:44 +08:00
|
|
|
if (isFreeCall(&CI, &TLI))
|
2010-01-05 15:32:13 +08:00
|
|
|
return visitFree(CI);
|
|
|
|
|
|
|
|
// If the caller function is nounwind, mark the call as nounwind, even if the
|
|
|
|
// callee isn't.
|
2016-08-11 23:16:06 +08:00
|
|
|
if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) {
|
2010-01-05 15:32:13 +08:00
|
|
|
CI.setDoesNotThrow();
|
|
|
|
return &CI;
|
|
|
|
}
|
2012-02-03 08:07:04 +08:00
|
|
|
|
2010-01-05 15:32:13 +08:00
|
|
|
IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);
|
|
|
|
if (!II) return visitCallSite(&CI);
|
2010-06-24 20:58:35 +08:00
|
|
|
|
2010-01-05 15:32:13 +08:00
|
|
|
// Intrinsics cannot occur in an invoke, so handle them here instead of in
|
|
|
|
// visitCallSite.
|
|
|
|
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(II)) {
|
|
|
|
bool Changed = false;
|
|
|
|
|
|
|
|
// memmove/cpy/set of zero bytes is a noop.
|
|
|
|
if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) {
|
2010-10-01 13:51:02 +08:00
|
|
|
if (NumBytes->isNullValue())
|
2016-02-02 06:23:39 +08:00
|
|
|
return eraseInstFromFunction(CI);
|
2010-01-05 15:32:13 +08:00
|
|
|
|
|
|
|
if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes))
|
|
|
|
if (CI->getZExtValue() == 1) {
|
|
|
|
// Replace the instruction with just byte operations. We would
|
|
|
|
// transform other cases to loads/stores, but we don't know if
|
|
|
|
// alignment is sufficient.
|
|
|
|
}
|
|
|
|
}
|
2012-02-03 08:07:04 +08:00
|
|
|
|
2010-10-01 13:51:02 +08:00
|
|
|
// No other transformations apply to volatile transfers.
|
|
|
|
if (MI->isVolatile())
|
2014-04-25 13:29:35 +08:00
|
|
|
return nullptr;
|
2010-01-05 15:32:13 +08:00
|
|
|
|
|
|
|
// If we have a memmove and the source operation is a constant global,
|
|
|
|
// then the source and dest pointers can't alias, so we can change this
|
|
|
|
// into a call to memcpy.
|
|
|
|
if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(MI)) {
|
|
|
|
if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
|
|
|
|
if (GVSrc->isConstant()) {
|
2015-12-15 01:24:23 +08:00
|
|
|
Module *M = CI.getModule();
|
2010-01-05 15:32:13 +08:00
|
|
|
Intrinsic::ID MemCpyID = Intrinsic::memcpy;
|
2011-07-12 22:06:48 +08:00
|
|
|
Type *Tys[3] = { CI.getArgOperand(0)->getType(),
|
|
|
|
CI.getArgOperand(1)->getType(),
|
|
|
|
CI.getArgOperand(2)->getType() };
|
2011-07-15 01:45:39 +08:00
|
|
|
CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys));
|
2010-01-05 15:32:13 +08:00
|
|
|
Changed = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
|
|
|
|
// memmove(x,x,size) -> noop.
|
|
|
|
if (MTI->getSource() == MTI->getDest())
|
2016-02-02 06:23:39 +08:00
|
|
|
return eraseInstFromFunction(CI);
|
2010-04-17 07:37:20 +08:00
|
|
|
}
|
2010-01-05 15:32:13 +08:00
|
|
|
|
2010-04-17 07:37:20 +08:00
|
|
|
// If we can determine a pointer alignment that is bigger than currently
|
|
|
|
// set, update the alignment.
|
2015-11-19 13:56:52 +08:00
|
|
|
if (isa<MemTransferInst>(MI)) {
|
|
|
|
if (Instruction *I = SimplifyMemTransfer(MI))
|
2010-01-05 15:32:13 +08:00
|
|
|
return I;
|
|
|
|
} else if (MemSetInst *MSI = dyn_cast<MemSetInst>(MI)) {
|
|
|
|
if (Instruction *I = SimplifyMemSet(MSI))
|
|
|
|
return I;
|
|
|
|
}
|
2010-06-24 21:42:49 +08:00
|
|
|
|
2010-01-05 15:32:13 +08:00
|
|
|
if (Changed) return II;
|
|
|
|
}
|
2012-02-03 08:07:04 +08:00
|
|
|
|
2017-02-08 22:23:47 +08:00
|
|
|
if (auto *AMI = dyn_cast<ElementAtomicMemCpyInst>(II)) {
|
|
|
|
if (Constant *C = dyn_cast<Constant>(AMI->getNumElements()))
|
|
|
|
if (C->isNullValue())
|
|
|
|
return eraseInstFromFunction(*AMI);
|
2017-02-08 22:32:04 +08:00
|
|
|
|
|
|
|
if (Instruction *I = SimplifyElementAtomicMemCpy(AMI))
|
|
|
|
return I;
|
2017-02-08 22:23:47 +08:00
|
|
|
}
|
|
|
|
|
[NVPTX] Upgrade NVVM intrinsics in InstCombineCalls.
Summary:
There are many NVVM intrinsics that we can't entirely get rid of, but
that nonetheless often correspond to target-generic LLVM intrinsics.
For example, if flush denormals to zero (ftz) is enabled, we can convert
@llvm.nvvm.ceil.ftz.f to @llvm.ceil.f32. On the other hand, if ftz is
disabled, we can't do this, because @llvm.ceil.f32 will be lowered to a
non-ftz PTX instruction. In this case, we can, however, simplify the
non-ftz nvvm ceil intrinsic, @llvm.nvvm.ceil.f, to @llvm.ceil.f32.
These transformations are particularly useful because they let us
constant fold instructions that appear in libdevice, the bitcode library
that ships with CUDA and essentially functions as its libm.
Reviewers: tra
Subscribers: hfinkel, majnemer, llvm-commits
Differential Revision: https://reviews.llvm.org/D28794
llvm-svn: 293244
2017-01-27 08:58:58 +08:00
|
|
|
if (Instruction *I = SimplifyNVVMIntrinsic(II, *this))
|
|
|
|
return I;
|
|
|
|
|
2016-01-21 00:41:43 +08:00
|
|
|
auto SimplifyDemandedVectorEltsLow = [this](Value *Op, unsigned Width,
|
|
|
|
unsigned DemandedWidth) {
|
2015-09-18 04:32:45 +08:00
|
|
|
APInt UndefElts(Width, 0);
|
|
|
|
APInt DemandedElts = APInt::getLowBitsSet(Width, DemandedWidth);
|
|
|
|
return SimplifyDemandedVectorElts(Op, DemandedElts, UndefElts);
|
|
|
|
};
|
|
|
|
|
2010-01-05 15:32:13 +08:00
|
|
|
switch (II->getIntrinsicID()) {
|
|
|
|
default: break;
|
2016-12-21 07:46:36 +08:00
|
|
|
case Intrinsic::objectsize:
|
|
|
|
if (ConstantInt *N =
|
|
|
|
lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/false))
|
|
|
|
return replaceInstUsesWith(CI, N);
|
2014-04-25 13:29:35 +08:00
|
|
|
return nullptr;
|
2016-12-21 07:46:36 +08:00
|
|
|
|
2012-12-13 11:13:36 +08:00
|
|
|
case Intrinsic::bswap: {
|
|
|
|
Value *IIOperand = II->getArgOperand(0);
|
2014-04-25 13:29:35 +08:00
|
|
|
Value *X = nullptr;
|
2012-12-13 11:13:36 +08:00
|
|
|
|
2010-01-05 15:32:13 +08:00
|
|
|
// bswap(bswap(x)) -> x
|
2012-12-13 11:13:36 +08:00
|
|
|
if (match(IIOperand, m_BSwap(m_Value(X))))
|
2016-02-02 06:23:39 +08:00
|
|
|
return replaceInstUsesWith(CI, X);
|
2012-02-03 08:07:04 +08:00
|
|
|
|
2010-01-05 15:32:13 +08:00
|
|
|
// bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
|
2012-12-13 11:13:36 +08:00
|
|
|
if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
|
|
|
|
unsigned C = X->getType()->getPrimitiveSizeInBits() -
|
|
|
|
IIOperand->getType()->getPrimitiveSizeInBits();
|
|
|
|
Value *CV = ConstantInt::get(X->getType(), C);
|
|
|
|
Value *V = Builder->CreateLShr(X, CV);
|
|
|
|
return new TruncInst(V, IIOperand->getType());
|
2010-01-05 15:32:13 +08:00
|
|
|
}
|
|
|
|
break;
|
2012-12-13 11:13:36 +08:00
|
|
|
}
|
|
|
|
|
2015-11-12 20:39:41 +08:00
|
|
|
case Intrinsic::bitreverse: {
|
|
|
|
Value *IIOperand = II->getArgOperand(0);
|
|
|
|
Value *X = nullptr;
|
|
|
|
|
|
|
|
// bitreverse(bitreverse(x)) -> x
|
|
|
|
if (match(IIOperand, m_Intrinsic<Intrinsic::bitreverse>(m_Value(X))))
|
2016-02-02 06:23:39 +08:00
|
|
|
return replaceInstUsesWith(CI, X);
|
2015-11-12 20:39:41 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2016-02-02 01:00:10 +08:00
|
|
|
case Intrinsic::masked_load:
|
|
|
|
if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II, *Builder))
|
2016-02-02 06:23:39 +08:00
|
|
|
return replaceInstUsesWith(CI, SimplifiedMaskedOp);
|
2016-02-02 01:00:10 +08:00
|
|
|
break;
|
2016-02-02 03:39:52 +08:00
|
|
|
case Intrinsic::masked_store:
|
|
|
|
return simplifyMaskedStore(*II, *this);
|
2016-02-02 06:10:26 +08:00
|
|
|
case Intrinsic::masked_gather:
|
|
|
|
return simplifyMaskedGather(*II, *this);
|
|
|
|
case Intrinsic::masked_scatter:
|
|
|
|
return simplifyMaskedScatter(*II, *this);
|
2016-02-02 01:00:10 +08:00
|
|
|
|
2010-01-05 15:32:13 +08:00
|
|
|
case Intrinsic::powi:
|
2010-06-24 20:58:35 +08:00
|
|
|
if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
|
2010-01-05 15:32:13 +08:00
|
|
|
// powi(x, 0) -> 1.0
|
|
|
|
if (Power->isZero())
|
2016-02-02 06:23:39 +08:00
|
|
|
return replaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0));
|
2010-01-05 15:32:13 +08:00
|
|
|
// powi(x, 1) -> x
|
|
|
|
if (Power->isOne())
|
2016-02-02 06:23:39 +08:00
|
|
|
return replaceInstUsesWith(CI, II->getArgOperand(0));
|
2010-01-05 15:32:13 +08:00
|
|
|
// powi(x, -1) -> 1/x
|
|
|
|
if (Power->isAllOnesValue())
|
|
|
|
return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0),
|
2010-06-24 20:58:35 +08:00
|
|
|
II->getArgOperand(0));
|
2010-01-05 15:32:13 +08:00
|
|
|
}
|
|
|
|
break;
|
2012-02-03 08:07:04 +08:00
|
|
|
|
2016-08-06 06:42:46 +08:00
|
|
|
case Intrinsic::cttz:
|
|
|
|
case Intrinsic::ctlz:
|
2016-08-19 04:43:50 +08:00
|
|
|
if (auto *I = foldCttzCtlz(*II, *this))
|
|
|
|
return I;
|
2010-01-05 15:32:13 +08:00
|
|
|
break;
|
2015-04-08 12:27:22 +08:00
|
|
|
|
2015-04-14 03:17:37 +08:00
|
|
|
case Intrinsic::uadd_with_overflow:
|
|
|
|
case Intrinsic::sadd_with_overflow:
|
|
|
|
case Intrinsic::umul_with_overflow:
|
|
|
|
case Intrinsic::smul_with_overflow:
|
2010-06-29 00:50:57 +08:00
|
|
|
if (isa<Constant>(II->getArgOperand(0)) &&
|
|
|
|
!isa<Constant>(II->getArgOperand(1))) {
|
2015-04-08 12:27:22 +08:00
|
|
|
// Canonicalize constants into the RHS.
|
2010-06-29 00:50:57 +08:00
|
|
|
Value *LHS = II->getArgOperand(0);
|
|
|
|
II->setArgOperand(0, II->getArgOperand(1));
|
|
|
|
II->setArgOperand(1, LHS);
|
2010-01-05 15:32:13 +08:00
|
|
|
return II;
|
|
|
|
}
|
2016-08-18 04:30:52 +08:00
|
|
|
LLVM_FALLTHROUGH;
|
2010-01-05 15:32:13 +08:00
|
|
|
|
2015-04-14 03:17:37 +08:00
|
|
|
case Intrinsic::usub_with_overflow:
|
|
|
|
case Intrinsic::ssub_with_overflow: {
|
2015-04-08 12:27:22 +08:00
|
|
|
OverflowCheckFlavor OCF =
|
|
|
|
IntrinsicIDToOverflowCheckFlavor(II->getIntrinsicID());
|
|
|
|
assert(OCF != OCF_INVALID && "unexpected!");
|
2014-07-04 18:22:21 +08:00
|
|
|
|
2015-04-08 12:27:22 +08:00
|
|
|
Value *OperationResult = nullptr;
|
|
|
|
Constant *OverflowResult = nullptr;
|
|
|
|
if (OptimizeOverflowCheck(OCF, II->getArgOperand(0), II->getArgOperand(1),
|
|
|
|
*II, OperationResult, OverflowResult))
|
|
|
|
return CreateOverflowTuple(II, OperationResult, OverflowResult);
|
2014-07-04 18:22:21 +08:00
|
|
|
|
2010-01-05 15:32:13 +08:00
|
|
|
break;
|
2014-12-11 16:02:30 +08:00
|
|
|
}
|
2010-01-05 15:32:13 +08:00
|
|
|
|
2014-10-22 07:00:20 +08:00
|
|
|
case Intrinsic::minnum:
|
|
|
|
case Intrinsic::maxnum: {
|
|
|
|
Value *Arg0 = II->getArgOperand(0);
|
|
|
|
Value *Arg1 = II->getArgOperand(1);
|
2016-02-01 00:35:23 +08:00
|
|
|
// Canonicalize constants to the RHS.
|
|
|
|
if (isa<ConstantFP>(Arg0) && !isa<ConstantFP>(Arg1)) {
|
2014-10-22 07:00:20 +08:00
|
|
|
II->setArgOperand(0, Arg1);
|
|
|
|
II->setArgOperand(1, Arg0);
|
|
|
|
return II;
|
|
|
|
}
|
2016-02-01 00:35:23 +08:00
|
|
|
if (Value *V = simplifyMinnumMaxnum(*II))
|
2016-02-02 06:23:39 +08:00
|
|
|
return replaceInstUsesWith(*II, V);
|
2014-10-22 07:00:20 +08:00
|
|
|
break;
|
|
|
|
}
|
2017-01-03 12:32:31 +08:00
|
|
|
case Intrinsic::fmuladd: {
|
2017-02-17 02:46:24 +08:00
|
|
|
// Canonicalize fast fmuladd to the separate fmul + fadd.
|
|
|
|
if (II->hasUnsafeAlgebra()) {
|
|
|
|
BuilderTy::FastMathFlagGuard Guard(*Builder);
|
|
|
|
Builder->setFastMathFlags(II->getFastMathFlags());
|
|
|
|
Value *Mul = Builder->CreateFMul(II->getArgOperand(0),
|
|
|
|
II->getArgOperand(1));
|
|
|
|
Value *Add = Builder->CreateFAdd(Mul, II->getArgOperand(2));
|
|
|
|
Add->takeName(II);
|
|
|
|
return replaceInstUsesWith(*II, Add);
|
|
|
|
}
|
|
|
|
|
|
|
|
LLVM_FALLTHROUGH;
|
|
|
|
}
|
|
|
|
case Intrinsic::fma: {
|
2017-01-03 12:32:31 +08:00
|
|
|
Value *Src0 = II->getArgOperand(0);
|
|
|
|
Value *Src1 = II->getArgOperand(1);
|
|
|
|
|
2017-01-03 12:32:35 +08:00
|
|
|
// Canonicalize constants into the RHS.
|
|
|
|
if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
|
|
|
|
II->setArgOperand(0, Src1);
|
|
|
|
II->setArgOperand(1, Src0);
|
|
|
|
std::swap(Src0, Src1);
|
|
|
|
}
|
|
|
|
|
|
|
|
Value *LHS = nullptr;
|
|
|
|
Value *RHS = nullptr;
|
|
|
|
|
2017-01-03 12:32:31 +08:00
|
|
|
// fma fneg(x), fneg(y), z -> fma x, y, z
|
|
|
|
if (match(Src0, m_FNeg(m_Value(LHS))) &&
|
|
|
|
match(Src1, m_FNeg(m_Value(RHS)))) {
|
2017-01-11 07:17:52 +08:00
|
|
|
II->setArgOperand(0, LHS);
|
|
|
|
II->setArgOperand(1, RHS);
|
|
|
|
return II;
|
2017-01-03 12:32:31 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// fma fabs(x), fabs(x), z -> fma x, x, z
|
|
|
|
if (match(Src0, m_Intrinsic<Intrinsic::fabs>(m_Value(LHS))) &&
|
|
|
|
match(Src1, m_Intrinsic<Intrinsic::fabs>(m_Value(RHS))) && LHS == RHS) {
|
2017-01-11 07:17:52 +08:00
|
|
|
II->setArgOperand(0, LHS);
|
|
|
|
II->setArgOperand(1, RHS);
|
|
|
|
return II;
|
2017-01-03 12:32:31 +08:00
|
|
|
}
|
|
|
|
|
2017-01-03 12:32:35 +08:00
|
|
|
// fma x, 1, z -> fadd x, z
|
|
|
|
if (match(Src1, m_FPOne())) {
|
|
|
|
Instruction *RI = BinaryOperator::CreateFAdd(Src0, II->getArgOperand(2));
|
|
|
|
RI->copyFastMathFlags(II);
|
|
|
|
return RI;
|
|
|
|
}
|
|
|
|
|
2017-01-03 12:32:31 +08:00
|
|
|
break;
|
|
|
|
}
|
2017-01-04 06:40:34 +08:00
|
|
|
case Intrinsic::fabs: {
|
|
|
|
Value *Cond;
|
|
|
|
Constant *LHS, *RHS;
|
|
|
|
if (match(II->getArgOperand(0),
|
|
|
|
m_Select(m_Value(Cond), m_Constant(LHS), m_Constant(RHS)))) {
|
|
|
|
CallInst *Call0 = Builder->CreateCall(II->getCalledFunction(), {LHS});
|
|
|
|
CallInst *Call1 = Builder->CreateCall(II->getCalledFunction(), {RHS});
|
|
|
|
return SelectInst::Create(Cond, Call0, Call1);
|
|
|
|
}
|
|
|
|
|
2017-01-24 07:55:08 +08:00
|
|
|
LLVM_FALLTHROUGH;
|
|
|
|
}
|
|
|
|
case Intrinsic::ceil:
|
|
|
|
case Intrinsic::floor:
|
|
|
|
case Intrinsic::round:
|
|
|
|
case Intrinsic::nearbyint:
|
2017-04-01 03:58:07 +08:00
|
|
|
case Intrinsic::rint:
|
2017-01-24 07:55:08 +08:00
|
|
|
case Intrinsic::trunc: {
|
2017-01-17 08:10:40 +08:00
|
|
|
Value *ExtSrc;
|
|
|
|
if (match(II->getArgOperand(0), m_FPExt(m_Value(ExtSrc))) &&
|
|
|
|
II->getArgOperand(0)->hasOneUse()) {
|
|
|
|
// fabs (fpext x) -> fpext (fabs x)
|
2017-01-24 07:55:08 +08:00
|
|
|
Value *F = Intrinsic::getDeclaration(II->getModule(), II->getIntrinsicID(),
|
2017-01-17 08:10:40 +08:00
|
|
|
{ ExtSrc->getType() });
|
|
|
|
CallInst *NewFabs = Builder->CreateCall(F, ExtSrc);
|
|
|
|
NewFabs->copyFastMathFlags(II);
|
|
|
|
NewFabs->takeName(II);
|
|
|
|
return new FPExtInst(NewFabs, II->getType());
|
|
|
|
}
|
|
|
|
|
2017-01-04 06:40:34 +08:00
|
|
|
break;
|
|
|
|
}
|
2017-01-05 06:49:03 +08:00
|
|
|
case Intrinsic::cos:
|
|
|
|
case Intrinsic::amdgcn_cos: {
|
|
|
|
Value *SrcSrc;
|
|
|
|
Value *Src = II->getArgOperand(0);
|
|
|
|
if (match(Src, m_FNeg(m_Value(SrcSrc))) ||
|
|
|
|
match(Src, m_Intrinsic<Intrinsic::fabs>(m_Value(SrcSrc)))) {
|
|
|
|
// cos(-x) -> cos(x)
|
|
|
|
// cos(fabs(x)) -> cos(x)
|
|
|
|
II->setArgOperand(0, SrcSrc);
|
|
|
|
return II;
|
|
|
|
}
|
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
2010-01-05 15:32:13 +08:00
|
|
|
case Intrinsic::ppc_altivec_lvx:
|
|
|
|
case Intrinsic::ppc_altivec_lvxl:
|
2011-04-13 08:36:11 +08:00
|
|
|
// Turn PPC lvx -> load if the pointer is known aligned.
|
2016-12-19 16:22:17 +08:00
|
|
|
if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, &AC,
|
2016-08-05 09:06:44 +08:00
|
|
|
&DT) >= 16) {
|
2010-06-24 20:58:35 +08:00
|
|
|
Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
|
2010-01-05 15:32:13 +08:00
|
|
|
PointerType::getUnqual(II->getType()));
|
|
|
|
return new LoadInst(Ptr);
|
|
|
|
}
|
|
|
|
break;
|
[PowerPC] Add vec_vsx_ld and vec_vsx_st intrinsics
This patch enables the vec_vsx_ld and vec_vsx_st intrinsics for
PowerPC, which provide programmer access to the lxvd2x, lxvw4x,
stxvd2x, and stxvw4x instructions.
New LLVM intrinsics are provided to represent these four instructions
in IntrinsicsPowerPC.td. These are patterned after the similar
intrinsics for lvx and stvx (Altivec). In PPCInstrVSX.td, these
intrinsics are tied to the code gen patterns, with additional patterns
to allow plain vanilla loads and stores to still generate these
instructions.
At -O1 and higher the intrinsics are immediately converted to loads
and stores in InstCombineCalls.cpp. This will open up more
optimization opportunities while still allowing the correct
instructions to be generated. (Similar code exists for aligned
Altivec loads and stores.)
The new intrinsics are added to the code that checks for consecutive
loads and stores in PPCISelLowering.cpp, as well as to
PPCTargetLowering::getTgtMemIntrinsic().
There's a new test to verify the correct instructions are generated.
The loads and stores tend to be reordered, so the test just counts
their number. It runs at -O2, as it's not very effective to test this
at -O0, when many unnecessary loads and stores are generated.
I ended up having to modify vsx-fma-m.ll. It turns out this test case
is slightly unreliable, but I don't know a good way to prevent
problems with it. The xvmaddmdp instructions read and write the same
register, which is one of the multiplicands. Commutativity allows
either to be chosen. If the FMAs are reordered differently than
expected by the test, the register assignment can be different as a
result. Hopefully this doesn't change often.
There is a companion patch for Clang.
llvm-svn: 221767
2014-11-12 12:19:40 +08:00
|
|
|
case Intrinsic::ppc_vsx_lxvw4x:
|
|
|
|
case Intrinsic::ppc_vsx_lxvd2x: {
|
|
|
|
// Turn PPC VSX loads into normal loads.
|
|
|
|
Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
|
|
|
|
PointerType::getUnqual(II->getType()));
|
|
|
|
return new LoadInst(Ptr, Twine(""), false, 1);
|
|
|
|
}
|
2010-01-05 15:32:13 +08:00
|
|
|
case Intrinsic::ppc_altivec_stvx:
|
|
|
|
case Intrinsic::ppc_altivec_stvxl:
|
|
|
|
// Turn stvx -> store if the pointer is known aligned.
|
2016-12-19 16:22:17 +08:00
|
|
|
if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, &AC,
|
2016-08-05 09:06:44 +08:00
|
|
|
&DT) >= 16) {
|
2012-02-03 08:07:04 +08:00
|
|
|
Type *OpPtrTy =
|
2010-06-24 23:51:11 +08:00
|
|
|
PointerType::getUnqual(II->getArgOperand(0)->getType());
|
|
|
|
Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
|
|
|
|
return new StoreInst(II->getArgOperand(0), Ptr);
|
2010-01-05 15:32:13 +08:00
|
|
|
}
|
|
|
|
break;
|
[PowerPC] Add vec_vsx_ld and vec_vsx_st intrinsics
This patch enables the vec_vsx_ld and vec_vsx_st intrinsics for
PowerPC, which provide programmer access to the lxvd2x, lxvw4x,
stxvd2x, and stxvw4x instructions.
New LLVM intrinsics are provided to represent these four instructions
in IntrinsicsPowerPC.td. These are patterned after the similar
intrinsics for lvx and stvx (Altivec). In PPCInstrVSX.td, these
intrinsics are tied to the code gen patterns, with additional patterns
to allow plain vanilla loads and stores to still generate these
instructions.
At -O1 and higher the intrinsics are immediately converted to loads
and stores in InstCombineCalls.cpp. This will open up more
optimization opportunities while still allowing the correct
instructions to be generated. (Similar code exists for aligned
Altivec loads and stores.)
The new intrinsics are added to the code that checks for consecutive
loads and stores in PPCISelLowering.cpp, as well as to
PPCTargetLowering::getTgtMemIntrinsic().
There's a new test to verify the correct instructions are generated.
The loads and stores tend to be reordered, so the test just counts
their number. It runs at -O2, as it's not very effective to test this
at -O0, when many unnecessary loads and stores are generated.
I ended up having to modify vsx-fma-m.ll. It turns out this test case
is slightly unreliable, but I don't know a good way to prevent
problems with it. The xvmaddmdp instructions read and write the same
register, which is one of the multiplicands. Commutativity allows
either to be chosen. If the FMAs are reordered differently than
expected by the test, the register assignment can be different as a
result. Hopefully this doesn't change often.
There is a companion patch for Clang.
llvm-svn: 221767
2014-11-12 12:19:40 +08:00
|
|
|
case Intrinsic::ppc_vsx_stxvw4x:
|
|
|
|
case Intrinsic::ppc_vsx_stxvd2x: {
|
|
|
|
// Turn PPC VSX stores into normal stores.
|
|
|
|
Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType());
|
|
|
|
Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
|
|
|
|
return new StoreInst(II->getArgOperand(0), Ptr, false, 1);
|
|
|
|
}
|
2015-02-27 02:56:03 +08:00
|
|
|
case Intrinsic::ppc_qpx_qvlfs:
|
|
|
|
// Turn PPC QPX qvlfs -> load if the pointer is known aligned.
|
2016-12-19 16:22:17 +08:00
|
|
|
if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, &AC,
|
2016-08-05 09:06:44 +08:00
|
|
|
&DT) >= 16) {
|
2015-05-11 14:37:03 +08:00
|
|
|
Type *VTy = VectorType::get(Builder->getFloatTy(),
|
|
|
|
II->getType()->getVectorNumElements());
|
2015-02-27 02:56:03 +08:00
|
|
|
Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
|
2015-05-11 14:37:03 +08:00
|
|
|
PointerType::getUnqual(VTy));
|
|
|
|
Value *Load = Builder->CreateLoad(Ptr);
|
|
|
|
return new FPExtInst(Load, II->getType());
|
2015-02-27 02:56:03 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case Intrinsic::ppc_qpx_qvlfd:
|
|
|
|
// Turn PPC QPX qvlfd -> load if the pointer is known aligned.
|
2016-12-19 16:22:17 +08:00
|
|
|
if (getOrEnforceKnownAlignment(II->getArgOperand(0), 32, DL, II, &AC,
|
2016-08-05 09:06:44 +08:00
|
|
|
&DT) >= 32) {
|
2015-02-27 02:56:03 +08:00
|
|
|
Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
|
|
|
|
PointerType::getUnqual(II->getType()));
|
|
|
|
return new LoadInst(Ptr);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case Intrinsic::ppc_qpx_qvstfs:
|
|
|
|
// Turn PPC QPX qvstfs -> store if the pointer is known aligned.
|
2016-12-19 16:22:17 +08:00
|
|
|
if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, &AC,
|
2016-08-05 09:06:44 +08:00
|
|
|
&DT) >= 16) {
|
2015-05-11 14:37:03 +08:00
|
|
|
Type *VTy = VectorType::get(Builder->getFloatTy(),
|
|
|
|
II->getArgOperand(0)->getType()->getVectorNumElements());
|
|
|
|
Value *TOp = Builder->CreateFPTrunc(II->getArgOperand(0), VTy);
|
|
|
|
Type *OpPtrTy = PointerType::getUnqual(VTy);
|
2015-02-27 02:56:03 +08:00
|
|
|
Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
|
2015-05-11 14:37:03 +08:00
|
|
|
return new StoreInst(TOp, Ptr);
|
2015-02-27 02:56:03 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case Intrinsic::ppc_qpx_qvstfd:
|
|
|
|
// Turn PPC QPX qvstfd -> store if the pointer is known aligned.
|
2016-12-19 16:22:17 +08:00
|
|
|
if (getOrEnforceKnownAlignment(II->getArgOperand(1), 32, DL, II, &AC,
|
2016-08-05 09:06:44 +08:00
|
|
|
&DT) >= 32) {
|
2015-02-27 02:56:03 +08:00
|
|
|
Type *OpPtrTy =
|
|
|
|
PointerType::getUnqual(II->getArgOperand(0)->getType());
|
|
|
|
Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
|
|
|
|
return new StoreInst(II->getArgOperand(0), Ptr);
|
|
|
|
}
|
|
|
|
break;
|
2015-09-12 21:39:53 +08:00
|
|
|
|
|
|
|
case Intrinsic::x86_vcvtph2ps_128:
|
|
|
|
case Intrinsic::x86_vcvtph2ps_256: {
|
|
|
|
auto Arg = II->getArgOperand(0);
|
|
|
|
auto ArgType = cast<VectorType>(Arg->getType());
|
|
|
|
auto RetType = cast<VectorType>(II->getType());
|
|
|
|
unsigned ArgWidth = ArgType->getNumElements();
|
|
|
|
unsigned RetWidth = RetType->getNumElements();
|
|
|
|
assert(RetWidth <= ArgWidth && "Unexpected input/return vector widths");
|
|
|
|
assert(ArgType->isIntOrIntVectorTy() &&
|
|
|
|
ArgType->getScalarSizeInBits() == 16 &&
|
|
|
|
"CVTPH2PS input type should be 16-bit integer vector");
|
|
|
|
assert(RetType->getScalarType()->isFloatTy() &&
|
|
|
|
"CVTPH2PS output type should be 32-bit float vector");
|
|
|
|
|
|
|
|
// Constant folding: Convert to generic half to single conversion.
|
2015-09-12 22:00:17 +08:00
|
|
|
if (isa<ConstantAggregateZero>(Arg))
|
2016-02-02 06:23:39 +08:00
|
|
|
return replaceInstUsesWith(*II, ConstantAggregateZero::get(RetType));
|
2015-09-12 21:39:53 +08:00
|
|
|
|
2015-09-12 22:00:17 +08:00
|
|
|
if (isa<ConstantDataVector>(Arg)) {
|
2015-09-12 21:39:53 +08:00
|
|
|
auto VectorHalfAsShorts = Arg;
|
|
|
|
if (RetWidth < ArgWidth) {
|
2016-06-12 08:41:19 +08:00
|
|
|
SmallVector<uint32_t, 8> SubVecMask;
|
2015-09-12 21:39:53 +08:00
|
|
|
for (unsigned i = 0; i != RetWidth; ++i)
|
|
|
|
SubVecMask.push_back((int)i);
|
|
|
|
VectorHalfAsShorts = Builder->CreateShuffleVector(
|
|
|
|
Arg, UndefValue::get(ArgType), SubVecMask);
|
|
|
|
}
|
|
|
|
|
|
|
|
auto VectorHalfType =
|
|
|
|
VectorType::get(Type::getHalfTy(II->getContext()), RetWidth);
|
|
|
|
auto VectorHalfs =
|
|
|
|
Builder->CreateBitCast(VectorHalfAsShorts, VectorHalfType);
|
|
|
|
auto VectorFloats = Builder->CreateFPExt(VectorHalfs, RetType);
|
2016-02-02 06:23:39 +08:00
|
|
|
return replaceInstUsesWith(*II, VectorFloats);
|
2015-09-12 21:39:53 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// We only use the lowest lanes of the argument.
|
2015-09-19 19:41:53 +08:00
|
|
|
if (Value *V = SimplifyDemandedVectorEltsLow(Arg, ArgWidth, RetWidth)) {
|
2015-09-12 21:39:53 +08:00
|
|
|
II->setArgOperand(0, V);
|
|
|
|
return II;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2011-01-10 15:19:37 +08:00
|
|
|
case Intrinsic::x86_sse_cvtss2si:
|
|
|
|
case Intrinsic::x86_sse_cvtss2si64:
|
|
|
|
case Intrinsic::x86_sse_cvttss2si:
|
|
|
|
case Intrinsic::x86_sse_cvttss2si64:
|
|
|
|
case Intrinsic::x86_sse2_cvtsd2si:
|
|
|
|
case Intrinsic::x86_sse2_cvtsd2si64:
|
|
|
|
case Intrinsic::x86_sse2_cvttsd2si:
|
2016-12-14 15:46:12 +08:00
|
|
|
case Intrinsic::x86_sse2_cvttsd2si64:
|
|
|
|
case Intrinsic::x86_avx512_vcvtss2si32:
|
|
|
|
case Intrinsic::x86_avx512_vcvtss2si64:
|
|
|
|
case Intrinsic::x86_avx512_vcvtss2usi32:
|
|
|
|
case Intrinsic::x86_avx512_vcvtss2usi64:
|
|
|
|
case Intrinsic::x86_avx512_vcvtsd2si32:
|
|
|
|
case Intrinsic::x86_avx512_vcvtsd2si64:
|
|
|
|
case Intrinsic::x86_avx512_vcvtsd2usi32:
|
|
|
|
case Intrinsic::x86_avx512_vcvtsd2usi64:
|
|
|
|
case Intrinsic::x86_avx512_cvttss2si:
|
|
|
|
case Intrinsic::x86_avx512_cvttss2si64:
|
|
|
|
case Intrinsic::x86_avx512_cvttss2usi:
|
|
|
|
case Intrinsic::x86_avx512_cvttss2usi64:
|
|
|
|
case Intrinsic::x86_avx512_cvttsd2si:
|
|
|
|
case Intrinsic::x86_avx512_cvttsd2si64:
|
|
|
|
case Intrinsic::x86_avx512_cvttsd2usi:
|
|
|
|
case Intrinsic::x86_avx512_cvttsd2usi64: {
|
2011-01-10 15:19:37 +08:00
|
|
|
// These intrinsics only demand the 0th element of their input vectors. If
|
2010-01-05 15:32:13 +08:00
|
|
|
// we can simplify the input based on that, do so now.
|
2015-09-19 19:41:53 +08:00
|
|
|
Value *Arg = II->getArgOperand(0);
|
|
|
|
unsigned VWidth = Arg->getType()->getVectorNumElements();
|
|
|
|
if (Value *V = SimplifyDemandedVectorEltsLow(Arg, VWidth, 1)) {
|
2010-06-29 00:50:57 +08:00
|
|
|
II->setArgOperand(0, V);
|
2010-01-05 15:32:13 +08:00
|
|
|
return II;
|
|
|
|
}
|
2015-08-05 16:18:00 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2016-06-07 16:18:35 +08:00
|
|
|
case Intrinsic::x86_mmx_pmovmskb:
|
|
|
|
case Intrinsic::x86_sse_movmsk_ps:
|
|
|
|
case Intrinsic::x86_sse2_movmsk_pd:
|
|
|
|
case Intrinsic::x86_sse2_pmovmskb_128:
|
|
|
|
case Intrinsic::x86_avx_movmsk_pd_256:
|
|
|
|
case Intrinsic::x86_avx_movmsk_ps_256:
|
|
|
|
case Intrinsic::x86_avx2_pmovmskb: {
|
|
|
|
if (Value *V = simplifyX86movmsk(*II, *Builder))
|
|
|
|
return replaceInstUsesWith(*II, V);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2016-02-21 07:17:35 +08:00
|
|
|
case Intrinsic::x86_sse_comieq_ss:
|
|
|
|
case Intrinsic::x86_sse_comige_ss:
|
|
|
|
case Intrinsic::x86_sse_comigt_ss:
|
|
|
|
case Intrinsic::x86_sse_comile_ss:
|
|
|
|
case Intrinsic::x86_sse_comilt_ss:
|
|
|
|
case Intrinsic::x86_sse_comineq_ss:
|
|
|
|
case Intrinsic::x86_sse_ucomieq_ss:
|
|
|
|
case Intrinsic::x86_sse_ucomige_ss:
|
|
|
|
case Intrinsic::x86_sse_ucomigt_ss:
|
|
|
|
case Intrinsic::x86_sse_ucomile_ss:
|
|
|
|
case Intrinsic::x86_sse_ucomilt_ss:
|
|
|
|
case Intrinsic::x86_sse_ucomineq_ss:
|
|
|
|
case Intrinsic::x86_sse2_comieq_sd:
|
|
|
|
case Intrinsic::x86_sse2_comige_sd:
|
|
|
|
case Intrinsic::x86_sse2_comigt_sd:
|
|
|
|
case Intrinsic::x86_sse2_comile_sd:
|
|
|
|
case Intrinsic::x86_sse2_comilt_sd:
|
|
|
|
case Intrinsic::x86_sse2_comineq_sd:
|
|
|
|
case Intrinsic::x86_sse2_ucomieq_sd:
|
|
|
|
case Intrinsic::x86_sse2_ucomige_sd:
|
|
|
|
case Intrinsic::x86_sse2_ucomigt_sd:
|
|
|
|
case Intrinsic::x86_sse2_ucomile_sd:
|
|
|
|
case Intrinsic::x86_sse2_ucomilt_sd:
|
2016-12-11 15:42:04 +08:00
|
|
|
case Intrinsic::x86_sse2_ucomineq_sd:
|
2016-12-31 08:45:06 +08:00
|
|
|
case Intrinsic::x86_avx512_vcomi_ss:
|
|
|
|
case Intrinsic::x86_avx512_vcomi_sd:
|
2016-12-11 15:42:04 +08:00
|
|
|
case Intrinsic::x86_avx512_mask_cmp_ss:
|
|
|
|
case Intrinsic::x86_avx512_mask_cmp_sd: {
|
2016-02-21 07:17:35 +08:00
|
|
|
// These intrinsics only demand the 0th element of their input vectors. If
|
|
|
|
// we can simplify the input based on that, do so now.
|
2016-04-25 01:57:27 +08:00
|
|
|
bool MadeChange = false;
|
2016-02-21 07:17:35 +08:00
|
|
|
Value *Arg0 = II->getArgOperand(0);
|
|
|
|
Value *Arg1 = II->getArgOperand(1);
|
|
|
|
unsigned VWidth = Arg0->getType()->getVectorNumElements();
|
|
|
|
if (Value *V = SimplifyDemandedVectorEltsLow(Arg0, VWidth, 1)) {
|
|
|
|
II->setArgOperand(0, V);
|
2016-04-25 01:57:27 +08:00
|
|
|
MadeChange = true;
|
2016-02-21 07:17:35 +08:00
|
|
|
}
|
|
|
|
if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) {
|
|
|
|
II->setArgOperand(1, V);
|
2016-04-25 01:57:27 +08:00
|
|
|
MadeChange = true;
|
2016-02-21 07:17:35 +08:00
|
|
|
}
|
2016-04-25 01:57:27 +08:00
|
|
|
if (MadeChange)
|
|
|
|
return II;
|
2016-02-21 07:17:35 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2016-12-27 08:23:16 +08:00
|
|
|
case Intrinsic::x86_avx512_mask_add_ps_512:
|
|
|
|
case Intrinsic::x86_avx512_mask_div_ps_512:
|
|
|
|
case Intrinsic::x86_avx512_mask_mul_ps_512:
|
|
|
|
case Intrinsic::x86_avx512_mask_sub_ps_512:
|
|
|
|
case Intrinsic::x86_avx512_mask_add_pd_512:
|
|
|
|
case Intrinsic::x86_avx512_mask_div_pd_512:
|
|
|
|
case Intrinsic::x86_avx512_mask_mul_pd_512:
|
|
|
|
case Intrinsic::x86_avx512_mask_sub_pd_512:
|
|
|
|
// If the rounding mode is CUR_DIRECTION(4) we can turn these into regular
|
|
|
|
// IR operations.
|
|
|
|
if (auto *R = dyn_cast<ConstantInt>(II->getArgOperand(4))) {
|
|
|
|
if (R->getValue() == 4) {
|
|
|
|
Value *Arg0 = II->getArgOperand(0);
|
|
|
|
Value *Arg1 = II->getArgOperand(1);
|
|
|
|
|
|
|
|
Value *V;
|
|
|
|
switch (II->getIntrinsicID()) {
|
|
|
|
default: llvm_unreachable("Case stmts out of sync!");
|
|
|
|
case Intrinsic::x86_avx512_mask_add_ps_512:
|
|
|
|
case Intrinsic::x86_avx512_mask_add_pd_512:
|
|
|
|
V = Builder->CreateFAdd(Arg0, Arg1);
|
|
|
|
break;
|
|
|
|
case Intrinsic::x86_avx512_mask_sub_ps_512:
|
|
|
|
case Intrinsic::x86_avx512_mask_sub_pd_512:
|
|
|
|
V = Builder->CreateFSub(Arg0, Arg1);
|
|
|
|
break;
|
|
|
|
case Intrinsic::x86_avx512_mask_mul_ps_512:
|
|
|
|
case Intrinsic::x86_avx512_mask_mul_pd_512:
|
|
|
|
V = Builder->CreateFMul(Arg0, Arg1);
|
|
|
|
break;
|
|
|
|
case Intrinsic::x86_avx512_mask_div_ps_512:
|
|
|
|
case Intrinsic::x86_avx512_mask_div_pd_512:
|
|
|
|
V = Builder->CreateFDiv(Arg0, Arg1);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Create a select for the masking.
|
|
|
|
V = emitX86MaskSelect(II->getArgOperand(3), V, II->getArgOperand(2),
|
|
|
|
*Builder);
|
|
|
|
return replaceInstUsesWith(*II, V);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
2016-12-11 15:42:01 +08:00
|
|
|
case Intrinsic::x86_avx512_mask_add_ss_round:
|
|
|
|
case Intrinsic::x86_avx512_mask_div_ss_round:
|
|
|
|
case Intrinsic::x86_avx512_mask_mul_ss_round:
|
|
|
|
case Intrinsic::x86_avx512_mask_sub_ss_round:
|
|
|
|
case Intrinsic::x86_avx512_mask_add_sd_round:
|
|
|
|
case Intrinsic::x86_avx512_mask_div_sd_round:
|
|
|
|
case Intrinsic::x86_avx512_mask_mul_sd_round:
|
|
|
|
case Intrinsic::x86_avx512_mask_sub_sd_round:
|
2016-12-26 14:33:19 +08:00
|
|
|
// If the rounding mode is CUR_DIRECTION(4) we can turn these into regular
|
|
|
|
// IR operations.
|
|
|
|
if (auto *R = dyn_cast<ConstantInt>(II->getArgOperand(4))) {
|
|
|
|
if (R->getValue() == 4) {
|
2016-12-27 09:56:30 +08:00
|
|
|
// Extract the element as scalars.
|
|
|
|
Value *Arg0 = II->getArgOperand(0);
|
|
|
|
Value *Arg1 = II->getArgOperand(1);
|
|
|
|
Value *LHS = Builder->CreateExtractElement(Arg0, (uint64_t)0);
|
|
|
|
Value *RHS = Builder->CreateExtractElement(Arg1, (uint64_t)0);
|
|
|
|
|
|
|
|
Value *V;
|
|
|
|
switch (II->getIntrinsicID()) {
|
|
|
|
default: llvm_unreachable("Case stmts out of sync!");
|
|
|
|
case Intrinsic::x86_avx512_mask_add_ss_round:
|
|
|
|
case Intrinsic::x86_avx512_mask_add_sd_round:
|
|
|
|
V = Builder->CreateFAdd(LHS, RHS);
|
|
|
|
break;
|
|
|
|
case Intrinsic::x86_avx512_mask_sub_ss_round:
|
|
|
|
case Intrinsic::x86_avx512_mask_sub_sd_round:
|
|
|
|
V = Builder->CreateFSub(LHS, RHS);
|
|
|
|
break;
|
|
|
|
case Intrinsic::x86_avx512_mask_mul_ss_round:
|
|
|
|
case Intrinsic::x86_avx512_mask_mul_sd_round:
|
|
|
|
V = Builder->CreateFMul(LHS, RHS);
|
|
|
|
break;
|
|
|
|
case Intrinsic::x86_avx512_mask_div_ss_round:
|
|
|
|
case Intrinsic::x86_avx512_mask_div_sd_round:
|
|
|
|
V = Builder->CreateFDiv(LHS, RHS);
|
|
|
|
break;
|
2016-12-26 14:33:19 +08:00
|
|
|
}
|
2016-12-27 09:56:30 +08:00
|
|
|
|
|
|
|
// Handle the masking aspect of the intrinsic.
|
|
|
|
Value *Mask = II->getArgOperand(3);
|
2016-12-31 07:06:28 +08:00
|
|
|
auto *C = dyn_cast<ConstantInt>(Mask);
|
|
|
|
// We don't need a select if we know the mask bit is a 1.
|
|
|
|
if (!C || !C->getValue()[0]) {
|
|
|
|
// Cast the mask to an i1 vector and then extract the lowest element.
|
|
|
|
auto *MaskTy = VectorType::get(Builder->getInt1Ty(),
|
2016-12-27 09:56:30 +08:00
|
|
|
cast<IntegerType>(Mask->getType())->getBitWidth());
|
2016-12-31 07:06:28 +08:00
|
|
|
Mask = Builder->CreateBitCast(Mask, MaskTy);
|
|
|
|
Mask = Builder->CreateExtractElement(Mask, (uint64_t)0);
|
|
|
|
// Extract the lowest element from the passthru operand.
|
|
|
|
Value *Passthru = Builder->CreateExtractElement(II->getArgOperand(2),
|
|
|
|
(uint64_t)0);
|
|
|
|
V = Builder->CreateSelect(Mask, V, Passthru);
|
|
|
|
}
|
2016-12-27 09:56:30 +08:00
|
|
|
|
|
|
|
// Insert the result back into the original argument 0.
|
|
|
|
V = Builder->CreateInsertElement(Arg0, V, (uint64_t)0);
|
|
|
|
|
|
|
|
return replaceInstUsesWith(*II, V);
|
2016-12-26 14:33:19 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
LLVM_FALLTHROUGH;
|
|
|
|
|
|
|
|
// X86 scalar intrinsics simplified with SimplifyDemandedVectorElts.
|
|
|
|
case Intrinsic::x86_avx512_mask_max_ss_round:
|
|
|
|
case Intrinsic::x86_avx512_mask_min_ss_round:
|
2016-12-11 15:42:01 +08:00
|
|
|
case Intrinsic::x86_avx512_mask_max_sd_round:
|
2016-12-14 14:06:58 +08:00
|
|
|
case Intrinsic::x86_avx512_mask_min_sd_round:
|
2016-12-15 11:49:45 +08:00
|
|
|
case Intrinsic::x86_avx512_mask_vfmadd_ss:
|
|
|
|
case Intrinsic::x86_avx512_mask_vfmadd_sd:
|
|
|
|
case Intrinsic::x86_avx512_maskz_vfmadd_ss:
|
|
|
|
case Intrinsic::x86_avx512_maskz_vfmadd_sd:
|
|
|
|
case Intrinsic::x86_avx512_mask3_vfmadd_ss:
|
|
|
|
case Intrinsic::x86_avx512_mask3_vfmadd_sd:
|
|
|
|
case Intrinsic::x86_avx512_mask3_vfmsub_ss:
|
|
|
|
case Intrinsic::x86_avx512_mask3_vfmsub_sd:
|
|
|
|
case Intrinsic::x86_avx512_mask3_vfnmsub_ss:
|
|
|
|
case Intrinsic::x86_avx512_mask3_vfnmsub_sd:
|
2016-12-14 13:43:05 +08:00
|
|
|
case Intrinsic::x86_fma_vfmadd_ss:
|
|
|
|
case Intrinsic::x86_fma_vfmsub_ss:
|
|
|
|
case Intrinsic::x86_fma_vfnmadd_ss:
|
|
|
|
case Intrinsic::x86_fma_vfnmsub_ss:
|
|
|
|
case Intrinsic::x86_fma_vfmadd_sd:
|
|
|
|
case Intrinsic::x86_fma_vfmsub_sd:
|
|
|
|
case Intrinsic::x86_fma_vfnmadd_sd:
|
|
|
|
case Intrinsic::x86_fma_vfnmsub_sd:
|
2016-12-14 11:17:27 +08:00
|
|
|
case Intrinsic::x86_sse_cmp_ss:
|
|
|
|
case Intrinsic::x86_sse_min_ss:
|
|
|
|
case Intrinsic::x86_sse_max_ss:
|
|
|
|
case Intrinsic::x86_sse2_cmp_sd:
|
|
|
|
case Intrinsic::x86_sse2_min_sd:
|
|
|
|
case Intrinsic::x86_sse2_max_sd:
|
2016-12-14 11:17:30 +08:00
|
|
|
case Intrinsic::x86_sse41_round_ss:
|
|
|
|
case Intrinsic::x86_sse41_round_sd:
|
2016-12-13 15:45:45 +08:00
|
|
|
case Intrinsic::x86_xop_vfrcz_ss:
|
|
|
|
case Intrinsic::x86_xop_vfrcz_sd: {
|
|
|
|
unsigned VWidth = II->getType()->getVectorNumElements();
|
|
|
|
APInt UndefElts(VWidth, 0);
|
|
|
|
APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
|
|
|
|
if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) {
|
|
|
|
if (V != II)
|
|
|
|
return replaceInstUsesWith(*II, V);
|
|
|
|
return II;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2015-08-11 04:21:15 +08:00
|
|
|
// Constant fold ashr( <A x Bi>, Ci ).
|
2015-08-13 15:39:03 +08:00
|
|
|
// Constant fold lshr( <A x Bi>, Ci ).
|
|
|
|
// Constant fold shl( <A x Bi>, Ci ).
|
2015-08-11 04:21:15 +08:00
|
|
|
case Intrinsic::x86_sse2_psrai_d:
|
|
|
|
case Intrinsic::x86_sse2_psrai_w:
|
|
|
|
case Intrinsic::x86_avx2_psrai_d:
|
|
|
|
case Intrinsic::x86_avx2_psrai_w:
|
2016-11-13 09:51:55 +08:00
|
|
|
case Intrinsic::x86_avx512_psrai_q_128:
|
|
|
|
case Intrinsic::x86_avx512_psrai_q_256:
|
|
|
|
case Intrinsic::x86_avx512_psrai_d_512:
|
|
|
|
case Intrinsic::x86_avx512_psrai_q_512:
|
|
|
|
case Intrinsic::x86_avx512_psrai_w_512:
|
2015-08-05 16:18:00 +08:00
|
|
|
case Intrinsic::x86_sse2_psrli_d:
|
|
|
|
case Intrinsic::x86_sse2_psrli_q:
|
|
|
|
case Intrinsic::x86_sse2_psrli_w:
|
|
|
|
case Intrinsic::x86_avx2_psrli_d:
|
|
|
|
case Intrinsic::x86_avx2_psrli_q:
|
|
|
|
case Intrinsic::x86_avx2_psrli_w:
|
2016-11-13 09:51:55 +08:00
|
|
|
case Intrinsic::x86_avx512_psrli_d_512:
|
|
|
|
case Intrinsic::x86_avx512_psrli_q_512:
|
|
|
|
case Intrinsic::x86_avx512_psrli_w_512:
|
2015-08-13 15:39:03 +08:00
|
|
|
case Intrinsic::x86_sse2_pslli_d:
|
|
|
|
case Intrinsic::x86_sse2_pslli_q:
|
|
|
|
case Intrinsic::x86_sse2_pslli_w:
|
|
|
|
case Intrinsic::x86_avx2_pslli_d:
|
|
|
|
case Intrinsic::x86_avx2_pslli_q:
|
|
|
|
case Intrinsic::x86_avx2_pslli_w:
|
2016-11-13 09:51:55 +08:00
|
|
|
case Intrinsic::x86_avx512_pslli_d_512:
|
|
|
|
case Intrinsic::x86_avx512_pslli_q_512:
|
|
|
|
case Intrinsic::x86_avx512_pslli_w_512:
|
2016-01-30 07:27:03 +08:00
|
|
|
if (Value *V = simplifyX86immShift(*II, *Builder))
|
2016-02-02 06:23:39 +08:00
|
|
|
return replaceInstUsesWith(*II, V);
|
2015-08-05 16:18:00 +08:00
|
|
|
break;
|
|
|
|
|
2015-08-13 15:39:03 +08:00
|
|
|
case Intrinsic::x86_sse2_psra_d:
|
|
|
|
case Intrinsic::x86_sse2_psra_w:
|
|
|
|
case Intrinsic::x86_avx2_psra_d:
|
|
|
|
case Intrinsic::x86_avx2_psra_w:
|
2016-11-13 09:51:55 +08:00
|
|
|
case Intrinsic::x86_avx512_psra_q_128:
|
|
|
|
case Intrinsic::x86_avx512_psra_q_256:
|
|
|
|
case Intrinsic::x86_avx512_psra_d_512:
|
|
|
|
case Intrinsic::x86_avx512_psra_q_512:
|
|
|
|
case Intrinsic::x86_avx512_psra_w_512:
|
2015-08-13 15:39:03 +08:00
|
|
|
case Intrinsic::x86_sse2_psrl_d:
|
|
|
|
case Intrinsic::x86_sse2_psrl_q:
|
|
|
|
case Intrinsic::x86_sse2_psrl_w:
|
|
|
|
case Intrinsic::x86_avx2_psrl_d:
|
|
|
|
case Intrinsic::x86_avx2_psrl_q:
|
|
|
|
case Intrinsic::x86_avx2_psrl_w:
|
2016-11-13 09:51:55 +08:00
|
|
|
case Intrinsic::x86_avx512_psrl_d_512:
|
|
|
|
case Intrinsic::x86_avx512_psrl_q_512:
|
|
|
|
case Intrinsic::x86_avx512_psrl_w_512:
|
2015-08-05 16:18:00 +08:00
|
|
|
case Intrinsic::x86_sse2_psll_d:
|
|
|
|
case Intrinsic::x86_sse2_psll_q:
|
|
|
|
case Intrinsic::x86_sse2_psll_w:
|
2014-04-24 08:58:18 +08:00
|
|
|
case Intrinsic::x86_avx2_psll_d:
|
|
|
|
case Intrinsic::x86_avx2_psll_q:
|
2016-11-13 09:51:55 +08:00
|
|
|
case Intrinsic::x86_avx2_psll_w:
|
|
|
|
case Intrinsic::x86_avx512_psll_d_512:
|
|
|
|
case Intrinsic::x86_avx512_psll_q_512:
|
|
|
|
case Intrinsic::x86_avx512_psll_w_512: {
|
2016-01-30 07:27:03 +08:00
|
|
|
if (Value *V = simplifyX86immShift(*II, *Builder))
|
2016-02-02 06:23:39 +08:00
|
|
|
return replaceInstUsesWith(*II, V);
|
2015-08-13 15:39:03 +08:00
|
|
|
|
|
|
|
// SSE2/AVX2 uses only the first 64-bits of the 128-bit vector
|
|
|
|
// operand to compute the shift amount.
|
2015-09-19 19:41:53 +08:00
|
|
|
Value *Arg1 = II->getArgOperand(1);
|
|
|
|
assert(Arg1->getType()->getPrimitiveSizeInBits() == 128 &&
|
2015-08-13 15:39:03 +08:00
|
|
|
"Unexpected packed shift size");
|
2015-09-19 19:41:53 +08:00
|
|
|
unsigned VWidth = Arg1->getType()->getVectorNumElements();
|
2015-08-13 15:39:03 +08:00
|
|
|
|
2015-09-19 19:41:53 +08:00
|
|
|
if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, VWidth / 2)) {
|
2015-08-13 15:39:03 +08:00
|
|
|
II->setArgOperand(1, V);
|
|
|
|
return II;
|
|
|
|
}
|
2015-08-05 16:18:00 +08:00
|
|
|
break;
|
2015-08-13 15:39:03 +08:00
|
|
|
}
|
2015-08-05 16:18:00 +08:00
|
|
|
|
2016-06-07 18:27:15 +08:00
|
|
|
case Intrinsic::x86_avx2_psllv_d:
|
|
|
|
case Intrinsic::x86_avx2_psllv_d_256:
|
|
|
|
case Intrinsic::x86_avx2_psllv_q:
|
|
|
|
case Intrinsic::x86_avx2_psllv_q_256:
|
2016-11-13 15:26:19 +08:00
|
|
|
case Intrinsic::x86_avx512_psllv_d_512:
|
|
|
|
case Intrinsic::x86_avx512_psllv_q_512:
|
2016-11-18 14:04:33 +08:00
|
|
|
case Intrinsic::x86_avx512_psllv_w_128:
|
|
|
|
case Intrinsic::x86_avx512_psllv_w_256:
|
|
|
|
case Intrinsic::x86_avx512_psllv_w_512:
|
2016-06-07 18:27:15 +08:00
|
|
|
case Intrinsic::x86_avx2_psrav_d:
|
|
|
|
case Intrinsic::x86_avx2_psrav_d_256:
|
2016-11-13 15:26:19 +08:00
|
|
|
case Intrinsic::x86_avx512_psrav_q_128:
|
|
|
|
case Intrinsic::x86_avx512_psrav_q_256:
|
|
|
|
case Intrinsic::x86_avx512_psrav_d_512:
|
|
|
|
case Intrinsic::x86_avx512_psrav_q_512:
|
2016-11-18 14:04:33 +08:00
|
|
|
case Intrinsic::x86_avx512_psrav_w_128:
|
|
|
|
case Intrinsic::x86_avx512_psrav_w_256:
|
|
|
|
case Intrinsic::x86_avx512_psrav_w_512:
|
2016-06-07 18:27:15 +08:00
|
|
|
case Intrinsic::x86_avx2_psrlv_d:
|
|
|
|
case Intrinsic::x86_avx2_psrlv_d_256:
|
|
|
|
case Intrinsic::x86_avx2_psrlv_q:
|
|
|
|
case Intrinsic::x86_avx2_psrlv_q_256:
|
2016-11-13 15:26:19 +08:00
|
|
|
case Intrinsic::x86_avx512_psrlv_d_512:
|
|
|
|
case Intrinsic::x86_avx512_psrlv_q_512:
|
2016-11-18 14:04:33 +08:00
|
|
|
case Intrinsic::x86_avx512_psrlv_w_128:
|
|
|
|
case Intrinsic::x86_avx512_psrlv_w_256:
|
|
|
|
case Intrinsic::x86_avx512_psrlv_w_512:
|
2016-06-07 18:27:15 +08:00
|
|
|
if (Value *V = simplifyX86varShift(*II, *Builder))
|
|
|
|
return replaceInstUsesWith(*II, V);
|
|
|
|
break;
|
|
|
|
|
2016-12-27 07:28:17 +08:00
|
|
|
case Intrinsic::x86_sse2_pmulu_dq:
|
|
|
|
case Intrinsic::x86_sse41_pmuldq:
|
|
|
|
case Intrinsic::x86_avx2_pmul_dq:
|
2016-12-27 13:30:09 +08:00
|
|
|
case Intrinsic::x86_avx2_pmulu_dq:
|
|
|
|
case Intrinsic::x86_avx512_pmul_dq_512:
|
|
|
|
case Intrinsic::x86_avx512_pmulu_dq_512: {
|
2017-01-23 23:22:59 +08:00
|
|
|
if (Value *V = simplifyX86muldq(*II, *Builder))
|
2017-01-21 02:20:30 +08:00
|
|
|
return replaceInstUsesWith(*II, V);
|
|
|
|
|
2016-12-27 07:28:17 +08:00
|
|
|
unsigned VWidth = II->getType()->getVectorNumElements();
|
|
|
|
APInt UndefElts(VWidth, 0);
|
|
|
|
APInt DemandedElts = APInt::getAllOnesValue(VWidth);
|
|
|
|
if (Value *V = SimplifyDemandedVectorElts(II, DemandedElts, UndefElts)) {
|
|
|
|
if (V != II)
|
|
|
|
return replaceInstUsesWith(*II, V);
|
|
|
|
return II;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2017-01-25 22:37:24 +08:00
|
|
|
case Intrinsic::x86_sse2_packssdw_128:
|
|
|
|
case Intrinsic::x86_sse2_packsswb_128:
|
|
|
|
case Intrinsic::x86_avx2_packssdw:
|
|
|
|
case Intrinsic::x86_avx2_packsswb:
|
2017-02-16 15:35:23 +08:00
|
|
|
case Intrinsic::x86_avx512_packssdw_512:
|
|
|
|
case Intrinsic::x86_avx512_packsswb_512:
|
2017-01-25 22:37:24 +08:00
|
|
|
if (Value *V = simplifyX86pack(*II, *this, *Builder, true))
|
|
|
|
return replaceInstUsesWith(*II, V);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case Intrinsic::x86_sse2_packuswb_128:
|
|
|
|
case Intrinsic::x86_sse41_packusdw:
|
|
|
|
case Intrinsic::x86_avx2_packusdw:
|
|
|
|
case Intrinsic::x86_avx2_packuswb:
|
2017-02-16 15:35:23 +08:00
|
|
|
case Intrinsic::x86_avx512_packusdw_512:
|
|
|
|
case Intrinsic::x86_avx512_packuswb_512:
|
2017-01-25 22:37:24 +08:00
|
|
|
if (Value *V = simplifyX86pack(*II, *this, *Builder, false))
|
|
|
|
return replaceInstUsesWith(*II, V);
|
|
|
|
break;
|
|
|
|
|
2017-01-26 13:17:13 +08:00
|
|
|
case Intrinsic::x86_pclmulqdq: {
|
|
|
|
if (auto *C = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
|
|
|
|
unsigned Imm = C->getZExtValue();
|
|
|
|
|
|
|
|
bool MadeChange = false;
|
|
|
|
Value *Arg0 = II->getArgOperand(0);
|
|
|
|
Value *Arg1 = II->getArgOperand(1);
|
|
|
|
unsigned VWidth = Arg0->getType()->getVectorNumElements();
|
|
|
|
APInt DemandedElts(VWidth, 0);
|
|
|
|
|
|
|
|
APInt UndefElts1(VWidth, 0);
|
|
|
|
DemandedElts = (Imm & 0x01) ? 2 : 1;
|
|
|
|
if (Value *V = SimplifyDemandedVectorElts(Arg0, DemandedElts,
|
|
|
|
UndefElts1)) {
|
|
|
|
II->setArgOperand(0, V);
|
|
|
|
MadeChange = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
APInt UndefElts2(VWidth, 0);
|
|
|
|
DemandedElts = (Imm & 0x10) ? 2 : 1;
|
|
|
|
if (Value *V = SimplifyDemandedVectorElts(Arg1, DemandedElts,
|
|
|
|
UndefElts2)) {
|
|
|
|
II->setArgOperand(1, V);
|
|
|
|
MadeChange = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// If both input elements are undef, the result is undef.
|
|
|
|
if (UndefElts1[(Imm & 0x01) ? 1 : 0] ||
|
|
|
|
UndefElts2[(Imm & 0x10) ? 1 : 0])
|
|
|
|
return replaceInstUsesWith(*II,
|
|
|
|
ConstantAggregateZero::get(II->getType()));
|
|
|
|
|
|
|
|
if (MadeChange)
|
|
|
|
return II;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2015-04-17 01:52:13 +08:00
|
|
|
case Intrinsic::x86_sse41_insertps:
|
2016-01-30 07:27:03 +08:00
|
|
|
if (Value *V = simplifyX86insertps(*II, *Builder))
|
2016-02-02 06:23:39 +08:00
|
|
|
return replaceInstUsesWith(*II, V);
|
2015-04-17 01:52:13 +08:00
|
|
|
break;
|
2015-07-26 04:41:00 +08:00
|
|
|
|
2015-09-18 04:32:45 +08:00
|
|
|
case Intrinsic::x86_sse4a_extrq: {
|
|
|
|
Value *Op0 = II->getArgOperand(0);
|
|
|
|
Value *Op1 = II->getArgOperand(1);
|
|
|
|
unsigned VWidth0 = Op0->getType()->getVectorNumElements();
|
|
|
|
unsigned VWidth1 = Op1->getType()->getVectorNumElements();
|
2015-10-17 19:40:05 +08:00
|
|
|
assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
|
|
|
|
Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 &&
|
|
|
|
VWidth1 == 16 && "Unexpected operand sizes");
|
|
|
|
|
|
|
|
// See if we're dealing with constant values.
|
|
|
|
Constant *C1 = dyn_cast<Constant>(Op1);
|
|
|
|
ConstantInt *CILength =
|
2016-09-07 20:03:03 +08:00
|
|
|
C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)0))
|
2015-10-17 19:40:05 +08:00
|
|
|
: nullptr;
|
|
|
|
ConstantInt *CIIndex =
|
2016-09-07 20:03:03 +08:00
|
|
|
C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)1))
|
2015-10-17 19:40:05 +08:00
|
|
|
: nullptr;
|
|
|
|
|
|
|
|
// Attempt to simplify to a constant, shuffle vector or EXTRQI call.
|
2016-01-30 07:27:03 +08:00
|
|
|
if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, *Builder))
|
2016-02-02 06:23:39 +08:00
|
|
|
return replaceInstUsesWith(*II, V);
|
2015-09-18 04:32:45 +08:00
|
|
|
|
2015-10-17 19:40:05 +08:00
|
|
|
// EXTRQ only uses the lowest 64-bits of the first 128-bit vector
|
|
|
|
// operands and the lowest 16-bits of the second.
|
2016-04-25 01:57:27 +08:00
|
|
|
bool MadeChange = false;
|
2015-09-18 04:32:45 +08:00
|
|
|
if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
|
|
|
|
II->setArgOperand(0, V);
|
2016-04-25 01:57:27 +08:00
|
|
|
MadeChange = true;
|
2015-09-18 04:32:45 +08:00
|
|
|
}
|
|
|
|
if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 2)) {
|
|
|
|
II->setArgOperand(1, V);
|
2016-04-25 01:57:27 +08:00
|
|
|
MadeChange = true;
|
2015-09-18 04:32:45 +08:00
|
|
|
}
|
2016-04-25 01:57:27 +08:00
|
|
|
if (MadeChange)
|
|
|
|
return II;
|
2015-09-18 04:32:45 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
case Intrinsic::x86_sse4a_extrqi: {
|
2015-10-17 19:40:05 +08:00
|
|
|
// EXTRQI: Extract Length bits starting from Index. Zero pad the remaining
|
|
|
|
// bits of the lower 64-bits. The upper 64-bits are undefined.
|
|
|
|
Value *Op0 = II->getArgOperand(0);
|
|
|
|
unsigned VWidth = Op0->getType()->getVectorNumElements();
|
|
|
|
assert(Op0->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 &&
|
|
|
|
"Unexpected operand size");
|
|
|
|
|
|
|
|
// See if we're dealing with constant values.
|
|
|
|
ConstantInt *CILength = dyn_cast<ConstantInt>(II->getArgOperand(1));
|
|
|
|
ConstantInt *CIIndex = dyn_cast<ConstantInt>(II->getArgOperand(2));
|
2015-09-18 04:32:45 +08:00
|
|
|
|
2015-10-17 19:40:05 +08:00
|
|
|
// Attempt to simplify to a constant or shuffle vector.
|
2016-01-30 07:27:03 +08:00
|
|
|
if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, *Builder))
|
2016-02-02 06:23:39 +08:00
|
|
|
return replaceInstUsesWith(*II, V);
|
2015-10-17 19:40:05 +08:00
|
|
|
|
|
|
|
// EXTRQI only uses the lowest 64-bits of the first 128-bit vector
|
|
|
|
// operand.
|
|
|
|
if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
|
2015-09-18 04:32:45 +08:00
|
|
|
II->setArgOperand(0, V);
|
|
|
|
return II;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
case Intrinsic::x86_sse4a_insertq: {
|
2015-10-17 19:40:05 +08:00
|
|
|
Value *Op0 = II->getArgOperand(0);
|
|
|
|
Value *Op1 = II->getArgOperand(1);
|
|
|
|
unsigned VWidth = Op0->getType()->getVectorNumElements();
|
|
|
|
assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
|
|
|
|
Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 &&
|
|
|
|
Op1->getType()->getVectorNumElements() == 2 &&
|
|
|
|
"Unexpected operand size");
|
|
|
|
|
|
|
|
// See if we're dealing with constant values.
|
|
|
|
Constant *C1 = dyn_cast<Constant>(Op1);
|
|
|
|
ConstantInt *CI11 =
|
2016-09-07 20:47:53 +08:00
|
|
|
C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)1))
|
2015-10-17 19:40:05 +08:00
|
|
|
: nullptr;
|
|
|
|
|
|
|
|
// Attempt to simplify to a constant, shuffle vector or INSERTQI call.
|
|
|
|
if (CI11) {
|
2016-06-08 18:01:20 +08:00
|
|
|
const APInt &V11 = CI11->getValue();
|
2015-10-17 19:40:05 +08:00
|
|
|
APInt Len = V11.zextOrTrunc(6);
|
|
|
|
APInt Idx = V11.lshr(8).zextOrTrunc(6);
|
2016-01-30 07:27:03 +08:00
|
|
|
if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, *Builder))
|
2016-02-02 06:23:39 +08:00
|
|
|
return replaceInstUsesWith(*II, V);
|
2015-10-17 19:40:05 +08:00
|
|
|
}
|
2015-09-18 04:32:45 +08:00
|
|
|
|
2015-10-17 19:40:05 +08:00
|
|
|
// INSERTQ only uses the lowest 64-bits of the first 128-bit vector
|
|
|
|
// operand.
|
|
|
|
if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
|
2015-09-18 04:32:45 +08:00
|
|
|
II->setArgOperand(0, V);
|
|
|
|
return II;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2014-04-24 08:38:14 +08:00
|
|
|
case Intrinsic::x86_sse4a_insertqi: {
|
2015-10-17 19:40:05 +08:00
|
|
|
// INSERTQI: Extract lowest Length bits from lower half of second source and
|
|
|
|
// insert over first source starting at Index bit. The upper 64-bits are
|
|
|
|
// undefined.
|
2015-09-18 04:32:45 +08:00
|
|
|
Value *Op0 = II->getArgOperand(0);
|
|
|
|
Value *Op1 = II->getArgOperand(1);
|
|
|
|
unsigned VWidth0 = Op0->getType()->getVectorNumElements();
|
|
|
|
unsigned VWidth1 = Op1->getType()->getVectorNumElements();
|
2015-10-17 19:40:05 +08:00
|
|
|
assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
|
|
|
|
Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 &&
|
|
|
|
VWidth1 == 2 && "Unexpected operand sizes");
|
|
|
|
|
|
|
|
// See if we're dealing with constant values.
|
|
|
|
ConstantInt *CILength = dyn_cast<ConstantInt>(II->getArgOperand(2));
|
|
|
|
ConstantInt *CIIndex = dyn_cast<ConstantInt>(II->getArgOperand(3));
|
|
|
|
|
|
|
|
// Attempt to simplify to a constant or shuffle vector.
|
|
|
|
if (CILength && CIIndex) {
|
|
|
|
APInt Len = CILength->getValue().zextOrTrunc(6);
|
|
|
|
APInt Idx = CIIndex->getValue().zextOrTrunc(6);
|
2016-01-30 07:27:03 +08:00
|
|
|
if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, *Builder))
|
2016-02-02 06:23:39 +08:00
|
|
|
return replaceInstUsesWith(*II, V);
|
2015-10-17 19:40:05 +08:00
|
|
|
}
|
2015-09-18 04:32:45 +08:00
|
|
|
|
2015-10-17 19:40:05 +08:00
|
|
|
// INSERTQI only uses the lowest 64-bits of the first two 128-bit vector
|
|
|
|
// operands.
|
2016-04-25 01:57:27 +08:00
|
|
|
bool MadeChange = false;
|
2015-09-18 04:32:45 +08:00
|
|
|
if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
|
|
|
|
II->setArgOperand(0, V);
|
2016-04-25 01:57:27 +08:00
|
|
|
MadeChange = true;
|
2015-09-18 04:32:45 +08:00
|
|
|
}
|
|
|
|
if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 1)) {
|
|
|
|
II->setArgOperand(1, V);
|
2016-04-25 01:57:27 +08:00
|
|
|
MadeChange = true;
|
2015-09-18 04:32:45 +08:00
|
|
|
}
|
2016-04-25 01:57:27 +08:00
|
|
|
if (MadeChange)
|
|
|
|
return II;
|
2014-04-24 08:38:14 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2014-05-27 11:42:20 +08:00
|
|
|
case Intrinsic::x86_sse41_pblendvb:
|
|
|
|
case Intrinsic::x86_sse41_blendvps:
|
|
|
|
case Intrinsic::x86_sse41_blendvpd:
|
|
|
|
case Intrinsic::x86_avx_blendv_ps_256:
|
|
|
|
case Intrinsic::x86_avx_blendv_pd_256:
|
|
|
|
case Intrinsic::x86_avx2_pblendvb: {
|
|
|
|
// Convert blendv* to vector selects if the mask is constant.
|
|
|
|
// This optimization is convoluted because the intrinsic is defined as
|
|
|
|
// getting a vector of floats or doubles for the ps and pd versions.
|
|
|
|
// FIXME: That should be changed.
|
2015-08-12 16:08:56 +08:00
|
|
|
|
|
|
|
Value *Op0 = II->getArgOperand(0);
|
|
|
|
Value *Op1 = II->getArgOperand(1);
|
2014-05-27 11:42:20 +08:00
|
|
|
Value *Mask = II->getArgOperand(2);
|
2015-08-12 16:08:56 +08:00
|
|
|
|
|
|
|
// fold (blend A, A, Mask) -> A
|
|
|
|
if (Op0 == Op1)
|
2016-02-02 06:23:39 +08:00
|
|
|
return replaceInstUsesWith(CI, Op0);
|
2015-08-12 16:08:56 +08:00
|
|
|
|
|
|
|
// Zero Mask - select 1st argument.
|
2015-08-12 16:23:36 +08:00
|
|
|
if (isa<ConstantAggregateZero>(Mask))
|
2016-02-02 06:23:39 +08:00
|
|
|
return replaceInstUsesWith(CI, Op0);
|
2015-08-12 16:08:56 +08:00
|
|
|
|
|
|
|
// Constant Mask - select 1st/2nd argument lane based on top bit of mask.
|
2016-02-22 01:29:33 +08:00
|
|
|
if (auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask)) {
|
|
|
|
Constant *NewSelector = getNegativeIsTrueBoolVec(ConstantMask);
|
2015-08-12 16:08:56 +08:00
|
|
|
return SelectInst::Create(NewSelector, Op1, Op0, "blendv");
|
2014-05-27 11:42:20 +08:00
|
|
|
}
|
2015-08-12 16:08:56 +08:00
|
|
|
break;
|
2014-05-27 11:42:20 +08:00
|
|
|
}
|
|
|
|
|
2015-10-01 00:44:39 +08:00
|
|
|
case Intrinsic::x86_ssse3_pshuf_b_128:
|
2016-04-25 01:00:34 +08:00
|
|
|
case Intrinsic::x86_avx2_pshuf_b:
|
2017-01-18 21:44:04 +08:00
|
|
|
case Intrinsic::x86_avx512_pshuf_b_512:
|
2016-04-25 01:00:34 +08:00
|
|
|
if (Value *V = simplifyX86pshufb(*II, *Builder))
|
|
|
|
return replaceInstUsesWith(*II, V);
|
|
|
|
break;
|
2015-10-01 00:44:39 +08:00
|
|
|
|
2014-04-22 06:06:04 +08:00
|
|
|
case Intrinsic::x86_avx_vpermilvar_ps:
|
|
|
|
case Intrinsic::x86_avx_vpermilvar_ps_256:
|
2016-12-11 09:59:36 +08:00
|
|
|
case Intrinsic::x86_avx512_vpermilvar_ps_512:
|
2014-04-22 06:06:04 +08:00
|
|
|
case Intrinsic::x86_avx_vpermilvar_pd:
|
2016-04-25 01:23:46 +08:00
|
|
|
case Intrinsic::x86_avx_vpermilvar_pd_256:
|
2017-01-18 21:44:04 +08:00
|
|
|
case Intrinsic::x86_avx512_vpermilvar_pd_512:
|
2016-04-25 01:23:46 +08:00
|
|
|
if (Value *V = simplifyX86vpermilvar(*II, *Builder))
|
|
|
|
return replaceInstUsesWith(*II, V);
|
|
|
|
break;
|
2014-04-22 06:06:04 +08:00
|
|
|
|
2016-05-02 00:41:22 +08:00
|
|
|
case Intrinsic::x86_avx2_permd:
|
|
|
|
case Intrinsic::x86_avx2_permps:
|
|
|
|
if (Value *V = simplifyX86vpermv(*II, *Builder))
|
|
|
|
return replaceInstUsesWith(*II, V);
|
|
|
|
break;
|
|
|
|
|
2016-12-26 07:58:57 +08:00
|
|
|
case Intrinsic::x86_avx512_mask_permvar_df_256:
|
|
|
|
case Intrinsic::x86_avx512_mask_permvar_df_512:
|
|
|
|
case Intrinsic::x86_avx512_mask_permvar_di_256:
|
|
|
|
case Intrinsic::x86_avx512_mask_permvar_di_512:
|
|
|
|
case Intrinsic::x86_avx512_mask_permvar_hi_128:
|
|
|
|
case Intrinsic::x86_avx512_mask_permvar_hi_256:
|
|
|
|
case Intrinsic::x86_avx512_mask_permvar_hi_512:
|
|
|
|
case Intrinsic::x86_avx512_mask_permvar_qi_128:
|
|
|
|
case Intrinsic::x86_avx512_mask_permvar_qi_256:
|
|
|
|
case Intrinsic::x86_avx512_mask_permvar_qi_512:
|
|
|
|
case Intrinsic::x86_avx512_mask_permvar_sf_256:
|
|
|
|
case Intrinsic::x86_avx512_mask_permvar_sf_512:
|
|
|
|
case Intrinsic::x86_avx512_mask_permvar_si_256:
|
|
|
|
case Intrinsic::x86_avx512_mask_permvar_si_512:
|
|
|
|
if (Value *V = simplifyX86vpermv(*II, *Builder)) {
|
|
|
|
// We simplified the permuting, now create a select for the masking.
|
|
|
|
V = emitX86MaskSelect(II->getArgOperand(3), V, II->getArgOperand(2),
|
|
|
|
*Builder);
|
|
|
|
return replaceInstUsesWith(*II, V);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
2015-03-21 05:47:56 +08:00
|
|
|
case Intrinsic::x86_avx_vperm2f128_pd_256:
|
|
|
|
case Intrinsic::x86_avx_vperm2f128_ps_256:
|
|
|
|
case Intrinsic::x86_avx_vperm2f128_si_256:
|
2015-03-25 06:39:29 +08:00
|
|
|
case Intrinsic::x86_avx2_vperm2i128:
|
2016-01-30 07:27:03 +08:00
|
|
|
if (Value *V = simplifyX86vperm2(*II, *Builder))
|
2016-02-02 06:23:39 +08:00
|
|
|
return replaceInstUsesWith(*II, V);
|
2015-03-21 05:47:56 +08:00
|
|
|
break;
|
|
|
|
|
2016-03-01 07:16:48 +08:00
|
|
|
case Intrinsic::x86_avx_maskload_ps:
|
2016-03-01 07:59:00 +08:00
|
|
|
case Intrinsic::x86_avx_maskload_pd:
|
|
|
|
case Intrinsic::x86_avx_maskload_ps_256:
|
|
|
|
case Intrinsic::x86_avx_maskload_pd_256:
|
|
|
|
case Intrinsic::x86_avx2_maskload_d:
|
|
|
|
case Intrinsic::x86_avx2_maskload_q:
|
|
|
|
case Intrinsic::x86_avx2_maskload_d_256:
|
|
|
|
case Intrinsic::x86_avx2_maskload_q_256:
|
2016-03-01 07:16:48 +08:00
|
|
|
if (Instruction *I = simplifyX86MaskedLoad(*II, *this))
|
|
|
|
return I;
|
|
|
|
break;
|
|
|
|
|
2016-03-12 23:16:59 +08:00
|
|
|
case Intrinsic::x86_sse2_maskmov_dqu:
|
[x86, InstCombine] transform x86 AVX masked stores to LLVM intrinsics
The intended effect of this patch in conjunction with:
http://reviews.llvm.org/rL259392
http://reviews.llvm.org/rL260145
is that customers using the AVX intrinsics in C will benefit from combines when
the store mask is constant:
void mstore_zero_mask(float *f, __m128 v) {
_mm_maskstore_ps(f, _mm_set1_epi32(0), v);
}
void mstore_fake_ones_mask(float *f, __m128 v) {
_mm_maskstore_ps(f, _mm_set1_epi32(1), v);
}
void mstore_ones_mask(float *f, __m128 v) {
_mm_maskstore_ps(f, _mm_set1_epi32(0x80000000), v);
}
void mstore_one_set_elt_mask(float *f, __m128 v) {
_mm_maskstore_ps(f, _mm_set_epi32(0x80000000, 0, 0, 0), v);
}
...so none of the above will actually generate a masked store for optimized code.
Differential Revision: http://reviews.llvm.org/D17485
llvm-svn: 262064
2016-02-27 05:04:14 +08:00
|
|
|
case Intrinsic::x86_avx_maskstore_ps:
|
|
|
|
case Intrinsic::x86_avx_maskstore_pd:
|
|
|
|
case Intrinsic::x86_avx_maskstore_ps_256:
|
|
|
|
case Intrinsic::x86_avx_maskstore_pd_256:
|
2016-02-27 05:51:44 +08:00
|
|
|
case Intrinsic::x86_avx2_maskstore_d:
|
|
|
|
case Intrinsic::x86_avx2_maskstore_q:
|
|
|
|
case Intrinsic::x86_avx2_maskstore_d_256:
|
|
|
|
case Intrinsic::x86_avx2_maskstore_q_256:
|
[x86, InstCombine] transform x86 AVX masked stores to LLVM intrinsics
The intended effect of this patch in conjunction with:
http://reviews.llvm.org/rL259392
http://reviews.llvm.org/rL260145
is that customers using the AVX intrinsics in C will benefit from combines when
the store mask is constant:
void mstore_zero_mask(float *f, __m128 v) {
_mm_maskstore_ps(f, _mm_set1_epi32(0), v);
}
void mstore_fake_ones_mask(float *f, __m128 v) {
_mm_maskstore_ps(f, _mm_set1_epi32(1), v);
}
void mstore_ones_mask(float *f, __m128 v) {
_mm_maskstore_ps(f, _mm_set1_epi32(0x80000000), v);
}
void mstore_one_set_elt_mask(float *f, __m128 v) {
_mm_maskstore_ps(f, _mm_set_epi32(0x80000000, 0, 0, 0), v);
}
...so none of the above will actually generate a masked store for optimized code.
Differential Revision: http://reviews.llvm.org/D17485
llvm-svn: 262064
2016-02-27 05:04:14 +08:00
|
|
|
if (simplifyX86MaskedStore(*II, *this))
|
|
|
|
return nullptr;
|
|
|
|
break;
|
|
|
|
|
2015-10-11 22:38:34 +08:00
|
|
|
case Intrinsic::x86_xop_vpcomb:
|
|
|
|
case Intrinsic::x86_xop_vpcomd:
|
|
|
|
case Intrinsic::x86_xop_vpcomq:
|
|
|
|
case Intrinsic::x86_xop_vpcomw:
|
2016-01-30 07:27:03 +08:00
|
|
|
if (Value *V = simplifyX86vpcom(*II, *Builder, true))
|
2016-02-02 06:23:39 +08:00
|
|
|
return replaceInstUsesWith(*II, V);
|
2015-10-11 22:38:34 +08:00
|
|
|
break;
|
|
|
|
|
|
|
|
case Intrinsic::x86_xop_vpcomub:
|
|
|
|
case Intrinsic::x86_xop_vpcomud:
|
|
|
|
case Intrinsic::x86_xop_vpcomuq:
|
|
|
|
case Intrinsic::x86_xop_vpcomuw:
|
2016-01-30 07:27:03 +08:00
|
|
|
if (Value *V = simplifyX86vpcom(*II, *Builder, false))
|
2016-02-02 06:23:39 +08:00
|
|
|
return replaceInstUsesWith(*II, V);
|
2015-10-11 22:38:34 +08:00
|
|
|
break;
|
|
|
|
|
2010-01-05 15:32:13 +08:00
|
|
|
case Intrinsic::ppc_altivec_vperm:
|
|
|
|
// Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
|
2014-06-06 03:46:04 +08:00
|
|
|
// Note that ppc_altivec_vperm has a big-endian bias, so when creating
|
|
|
|
// a vectorshuffle for little endian, we must undo the transformation
|
|
|
|
// performed on vec_perm in altivec.h. That is, we must complement
|
|
|
|
// the permutation mask with respect to 31 and reverse the order of
|
|
|
|
// V1 and V2.
|
2012-01-27 11:08:05 +08:00
|
|
|
if (Constant *Mask = dyn_cast<Constant>(II->getArgOperand(2))) {
|
|
|
|
assert(Mask->getType()->getVectorNumElements() == 16 &&
|
|
|
|
"Bad type for intrinsic!");
|
2012-02-03 08:07:04 +08:00
|
|
|
|
2010-01-05 15:32:13 +08:00
|
|
|
// Check that all of the elements are integer constants or undefs.
|
|
|
|
bool AllEltsOk = true;
|
|
|
|
for (unsigned i = 0; i != 16; ++i) {
|
2012-01-27 11:08:05 +08:00
|
|
|
Constant *Elt = Mask->getAggregateElement(i);
|
2014-04-25 13:29:35 +08:00
|
|
|
if (!Elt || !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) {
|
2010-01-05 15:32:13 +08:00
|
|
|
AllEltsOk = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2012-02-03 08:07:04 +08:00
|
|
|
|
2010-01-05 15:32:13 +08:00
|
|
|
if (AllEltsOk) {
|
|
|
|
// Cast the input vectors to byte vectors.
|
2010-07-22 18:37:47 +08:00
|
|
|
Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0),
|
|
|
|
Mask->getType());
|
|
|
|
Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1),
|
|
|
|
Mask->getType());
|
2010-01-05 15:32:13 +08:00
|
|
|
Value *Result = UndefValue::get(Op0->getType());
|
2012-02-03 08:07:04 +08:00
|
|
|
|
2010-01-05 15:32:13 +08:00
|
|
|
// Only extract each element once.
|
|
|
|
Value *ExtractedElts[32];
|
|
|
|
memset(ExtractedElts, 0, sizeof(ExtractedElts));
|
2012-02-03 08:07:04 +08:00
|
|
|
|
2010-01-05 15:32:13 +08:00
|
|
|
for (unsigned i = 0; i != 16; ++i) {
|
2012-01-27 11:08:05 +08:00
|
|
|
if (isa<UndefValue>(Mask->getAggregateElement(i)))
|
2010-01-05 15:32:13 +08:00
|
|
|
continue;
|
2012-02-03 08:07:04 +08:00
|
|
|
unsigned Idx =
|
2012-01-27 11:08:05 +08:00
|
|
|
cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue();
|
2010-01-05 15:32:13 +08:00
|
|
|
Idx &= 31; // Match the hardware behavior.
|
2015-03-10 10:37:25 +08:00
|
|
|
if (DL.isLittleEndian())
|
2014-06-06 03:46:04 +08:00
|
|
|
Idx = 31 - Idx;
|
2012-02-03 08:07:04 +08:00
|
|
|
|
2014-04-25 13:29:35 +08:00
|
|
|
if (!ExtractedElts[Idx]) {
|
2015-03-10 10:37:25 +08:00
|
|
|
Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0;
|
|
|
|
Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1;
|
2012-02-03 08:07:04 +08:00
|
|
|
ExtractedElts[Idx] =
|
2014-06-06 03:46:04 +08:00
|
|
|
Builder->CreateExtractElement(Idx < 16 ? Op0ToUse : Op1ToUse,
|
2011-09-28 04:39:19 +08:00
|
|
|
Builder->getInt32(Idx&15));
|
2010-01-05 15:32:13 +08:00
|
|
|
}
|
2012-02-03 08:07:04 +08:00
|
|
|
|
2010-01-05 15:32:13 +08:00
|
|
|
// Insert this value into the result vector.
|
|
|
|
Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx],
|
2011-09-28 04:39:19 +08:00
|
|
|
Builder->getInt32(i));
|
2010-01-05 15:32:13 +08:00
|
|
|
}
|
|
|
|
return CastInst::Create(Instruction::BitCast, Result, CI.getType());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
2010-10-23 05:41:48 +08:00
|
|
|
case Intrinsic::arm_neon_vld1:
|
|
|
|
case Intrinsic::arm_neon_vld2:
|
|
|
|
case Intrinsic::arm_neon_vld3:
|
|
|
|
case Intrinsic::arm_neon_vld4:
|
|
|
|
case Intrinsic::arm_neon_vld2lane:
|
|
|
|
case Intrinsic::arm_neon_vld3lane:
|
|
|
|
case Intrinsic::arm_neon_vld4lane:
|
|
|
|
case Intrinsic::arm_neon_vst1:
|
|
|
|
case Intrinsic::arm_neon_vst2:
|
|
|
|
case Intrinsic::arm_neon_vst3:
|
|
|
|
case Intrinsic::arm_neon_vst4:
|
|
|
|
case Intrinsic::arm_neon_vst2lane:
|
|
|
|
case Intrinsic::arm_neon_vst3lane:
|
|
|
|
case Intrinsic::arm_neon_vst4lane: {
|
2016-08-05 09:06:44 +08:00
|
|
|
unsigned MemAlign =
|
2016-12-19 16:22:17 +08:00
|
|
|
getKnownAlignment(II->getArgOperand(0), DL, II, &AC, &DT);
|
2010-10-23 05:41:48 +08:00
|
|
|
unsigned AlignArg = II->getNumArgOperands() - 1;
|
|
|
|
ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg));
|
|
|
|
if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) {
|
|
|
|
II->setArgOperand(AlignArg,
|
|
|
|
ConstantInt::get(Type::getInt32Ty(II->getContext()),
|
|
|
|
MemAlign, false));
|
|
|
|
return II;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2012-05-01 08:20:38 +08:00
|
|
|
case Intrinsic::arm_neon_vmulls:
|
2014-03-29 18:18:08 +08:00
|
|
|
case Intrinsic::arm_neon_vmullu:
|
2014-05-24 20:50:23 +08:00
|
|
|
case Intrinsic::aarch64_neon_smull:
|
|
|
|
case Intrinsic::aarch64_neon_umull: {
|
2012-05-01 08:20:38 +08:00
|
|
|
Value *Arg0 = II->getArgOperand(0);
|
|
|
|
Value *Arg1 = II->getArgOperand(1);
|
|
|
|
|
|
|
|
// Handle mul by zero first:
|
|
|
|
if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1)) {
|
2016-02-02 06:23:39 +08:00
|
|
|
return replaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType()));
|
2012-05-01 08:20:38 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Check for constant LHS & RHS - in this case we just simplify.
|
2014-03-29 18:18:08 +08:00
|
|
|
bool Zext = (II->getIntrinsicID() == Intrinsic::arm_neon_vmullu ||
|
2014-05-24 20:50:23 +08:00
|
|
|
II->getIntrinsicID() == Intrinsic::aarch64_neon_umull);
|
2012-05-01 08:20:38 +08:00
|
|
|
VectorType *NewVT = cast<VectorType>(II->getType());
|
2014-02-14 02:23:24 +08:00
|
|
|
if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
|
|
|
|
if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
|
|
|
|
CV0 = ConstantExpr::getIntegerCast(CV0, NewVT, /*isSigned=*/!Zext);
|
|
|
|
CV1 = ConstantExpr::getIntegerCast(CV1, NewVT, /*isSigned=*/!Zext);
|
|
|
|
|
2016-02-02 06:23:39 +08:00
|
|
|
return replaceInstUsesWith(CI, ConstantExpr::getMul(CV0, CV1));
|
2012-05-01 08:20:38 +08:00
|
|
|
}
|
|
|
|
|
2014-01-25 01:20:08 +08:00
|
|
|
// Couldn't simplify - canonicalize constant to the RHS.
|
2012-05-01 08:20:38 +08:00
|
|
|
std::swap(Arg0, Arg1);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Handle mul by one:
|
2014-02-14 02:23:24 +08:00
|
|
|
if (Constant *CV1 = dyn_cast<Constant>(Arg1))
|
2012-05-01 08:20:38 +08:00
|
|
|
if (ConstantInt *Splat =
|
2014-02-14 02:23:24 +08:00
|
|
|
dyn_cast_or_null<ConstantInt>(CV1->getSplatValue()))
|
|
|
|
if (Splat->isOne())
|
|
|
|
return CastInst::CreateIntegerCast(Arg0, II->getType(),
|
|
|
|
/*isSigned=*/!Zext);
|
2012-05-01 08:20:38 +08:00
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
2016-01-23 05:30:34 +08:00
|
|
|
case Intrinsic::amdgcn_rcp: {
|
2017-03-25 03:04:57 +08:00
|
|
|
Value *Src = II->getArgOperand(0);
|
|
|
|
|
|
|
|
// TODO: Move to ConstantFolding/InstSimplify?
|
|
|
|
if (isa<UndefValue>(Src))
|
|
|
|
return replaceInstUsesWith(CI, Src);
|
|
|
|
|
|
|
|
if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
|
2014-06-19 09:19:19 +08:00
|
|
|
const APFloat &ArgVal = C->getValueAPF();
|
|
|
|
APFloat Val(ArgVal.getSemantics(), 1.0);
|
|
|
|
APFloat::opStatus Status = Val.divide(ArgVal,
|
|
|
|
APFloat::rmNearestTiesToEven);
|
|
|
|
// Only do this if it was exact and therefore not dependent on the
|
|
|
|
// rounding mode.
|
|
|
|
if (Status == APFloat::opOK)
|
2016-02-02 06:23:39 +08:00
|
|
|
return replaceInstUsesWith(CI, ConstantFP::get(II->getContext(), Val));
|
2014-06-19 09:19:19 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
2017-03-25 03:04:57 +08:00
|
|
|
case Intrinsic::amdgcn_rsq: {
|
|
|
|
Value *Src = II->getArgOperand(0);
|
|
|
|
|
|
|
|
// TODO: Move to ConstantFolding/InstSimplify?
|
|
|
|
if (isa<UndefValue>(Src))
|
|
|
|
return replaceInstUsesWith(CI, Src);
|
|
|
|
break;
|
|
|
|
}
|
2016-03-31 06:28:52 +08:00
|
|
|
case Intrinsic::amdgcn_frexp_mant:
|
|
|
|
case Intrinsic::amdgcn_frexp_exp: {
|
2016-03-31 06:28:26 +08:00
|
|
|
Value *Src = II->getArgOperand(0);
|
|
|
|
if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
|
|
|
|
int Exp;
|
|
|
|
APFloat Significand = frexp(C->getValueAPF(), Exp,
|
|
|
|
APFloat::rmNearestTiesToEven);
|
|
|
|
|
2016-03-31 06:28:52 +08:00
|
|
|
if (II->getIntrinsicID() == Intrinsic::amdgcn_frexp_mant) {
|
|
|
|
return replaceInstUsesWith(CI, ConstantFP::get(II->getContext(),
|
|
|
|
Significand));
|
|
|
|
}
|
|
|
|
|
|
|
|
// Match instruction special case behavior.
|
|
|
|
if (Exp == APFloat::IEK_NaN || Exp == APFloat::IEK_Inf)
|
|
|
|
Exp = 0;
|
|
|
|
|
|
|
|
return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Exp));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (isa<UndefValue>(Src))
|
|
|
|
return replaceInstUsesWith(CI, UndefValue::get(II->getType()));
|
2016-03-31 06:28:26 +08:00
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
2016-09-03 15:06:58 +08:00
|
|
|
case Intrinsic::amdgcn_class: {
|
|
|
|
enum {
|
|
|
|
S_NAN = 1 << 0, // Signaling NaN
|
|
|
|
Q_NAN = 1 << 1, // Quiet NaN
|
|
|
|
N_INFINITY = 1 << 2, // Negative infinity
|
|
|
|
N_NORMAL = 1 << 3, // Negative normal
|
|
|
|
N_SUBNORMAL = 1 << 4, // Negative subnormal
|
|
|
|
N_ZERO = 1 << 5, // Negative zero
|
|
|
|
P_ZERO = 1 << 6, // Positive zero
|
|
|
|
P_SUBNORMAL = 1 << 7, // Positive subnormal
|
|
|
|
P_NORMAL = 1 << 8, // Positive normal
|
|
|
|
P_INFINITY = 1 << 9 // Positive infinity
|
|
|
|
};
|
|
|
|
|
|
|
|
const uint32_t FullMask = S_NAN | Q_NAN | N_INFINITY | N_NORMAL |
|
|
|
|
N_SUBNORMAL | N_ZERO | P_ZERO | P_SUBNORMAL | P_NORMAL | P_INFINITY;
|
|
|
|
|
|
|
|
Value *Src0 = II->getArgOperand(0);
|
|
|
|
Value *Src1 = II->getArgOperand(1);
|
|
|
|
const ConstantInt *CMask = dyn_cast<ConstantInt>(Src1);
|
|
|
|
if (!CMask) {
|
|
|
|
if (isa<UndefValue>(Src0))
|
|
|
|
return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
|
|
|
|
|
|
|
|
if (isa<UndefValue>(Src1))
|
|
|
|
return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), false));
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint32_t Mask = CMask->getZExtValue();
|
|
|
|
|
|
|
|
// If all tests are made, it doesn't matter what the value is.
|
|
|
|
if ((Mask & FullMask) == FullMask)
|
|
|
|
return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), true));
|
|
|
|
|
|
|
|
if ((Mask & FullMask) == 0)
|
|
|
|
return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), false));
|
|
|
|
|
|
|
|
if (Mask == (S_NAN | Q_NAN)) {
|
|
|
|
// Equivalent of isnan. Replace with standard fcmp.
|
|
|
|
Value *FCmp = Builder->CreateFCmpUNO(Src0, Src0);
|
|
|
|
FCmp->takeName(II);
|
|
|
|
return replaceInstUsesWith(*II, FCmp);
|
|
|
|
}
|
|
|
|
|
|
|
|
const ConstantFP *CVal = dyn_cast<ConstantFP>(Src0);
|
|
|
|
if (!CVal) {
|
|
|
|
if (isa<UndefValue>(Src0))
|
|
|
|
return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
|
|
|
|
|
|
|
|
// Clamp mask to used bits
|
|
|
|
if ((Mask & FullMask) != Mask) {
|
|
|
|
CallInst *NewCall = Builder->CreateCall(II->getCalledFunction(),
|
|
|
|
{ Src0, ConstantInt::get(Src1->getType(), Mask & FullMask) }
|
|
|
|
);
|
|
|
|
|
|
|
|
NewCall->takeName(II);
|
|
|
|
return replaceInstUsesWith(*II, NewCall);
|
|
|
|
}
|
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
const APFloat &Val = CVal->getValueAPF();
|
|
|
|
|
|
|
|
bool Result =
|
|
|
|
((Mask & S_NAN) && Val.isNaN() && Val.isSignaling()) ||
|
|
|
|
((Mask & Q_NAN) && Val.isNaN() && !Val.isSignaling()) ||
|
|
|
|
((Mask & N_INFINITY) && Val.isInfinity() && Val.isNegative()) ||
|
|
|
|
((Mask & N_NORMAL) && Val.isNormal() && Val.isNegative()) ||
|
|
|
|
((Mask & N_SUBNORMAL) && Val.isDenormal() && Val.isNegative()) ||
|
|
|
|
((Mask & N_ZERO) && Val.isZero() && Val.isNegative()) ||
|
|
|
|
((Mask & P_ZERO) && Val.isZero() && !Val.isNegative()) ||
|
|
|
|
((Mask & P_SUBNORMAL) && Val.isDenormal() && !Val.isNegative()) ||
|
|
|
|
((Mask & P_NORMAL) && Val.isNormal() && !Val.isNegative()) ||
|
|
|
|
((Mask & P_INFINITY) && Val.isInfinity() && !Val.isNegative());
|
|
|
|
|
|
|
|
return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), Result));
|
|
|
|
}
|
2017-02-22 08:27:34 +08:00
|
|
|
case Intrinsic::amdgcn_cvt_pkrtz: {
|
|
|
|
Value *Src0 = II->getArgOperand(0);
|
|
|
|
Value *Src1 = II->getArgOperand(1);
|
|
|
|
if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
|
|
|
|
if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
|
|
|
|
const fltSemantics &HalfSem
|
|
|
|
= II->getType()->getScalarType()->getFltSemantics();
|
|
|
|
bool LosesInfo;
|
|
|
|
APFloat Val0 = C0->getValueAPF();
|
|
|
|
APFloat Val1 = C1->getValueAPF();
|
|
|
|
Val0.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
|
|
|
|
Val1.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
|
|
|
|
|
|
|
|
Constant *Folded = ConstantVector::get({
|
|
|
|
ConstantFP::get(II->getContext(), Val0),
|
|
|
|
ConstantFP::get(II->getContext(), Val1) });
|
|
|
|
return replaceInstUsesWith(*II, Folded);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1))
|
|
|
|
return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
|
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
2017-02-23 07:04:58 +08:00
|
|
|
case Intrinsic::amdgcn_ubfe:
|
|
|
|
case Intrinsic::amdgcn_sbfe: {
|
|
|
|
// Decompose simple cases into standard shifts.
|
|
|
|
Value *Src = II->getArgOperand(0);
|
|
|
|
if (isa<UndefValue>(Src))
|
|
|
|
return replaceInstUsesWith(*II, Src);
|
|
|
|
|
|
|
|
unsigned Width;
|
|
|
|
Type *Ty = II->getType();
|
|
|
|
unsigned IntSize = Ty->getIntegerBitWidth();
|
|
|
|
|
|
|
|
ConstantInt *CWidth = dyn_cast<ConstantInt>(II->getArgOperand(2));
|
|
|
|
if (CWidth) {
|
|
|
|
Width = CWidth->getZExtValue();
|
|
|
|
if ((Width & (IntSize - 1)) == 0)
|
|
|
|
return replaceInstUsesWith(*II, ConstantInt::getNullValue(Ty));
|
|
|
|
|
|
|
|
if (Width >= IntSize) {
|
|
|
|
// Hardware ignores high bits, so remove those.
|
|
|
|
II->setArgOperand(2, ConstantInt::get(CWidth->getType(),
|
|
|
|
Width & (IntSize - 1)));
|
|
|
|
return II;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned Offset;
|
|
|
|
ConstantInt *COffset = dyn_cast<ConstantInt>(II->getArgOperand(1));
|
|
|
|
if (COffset) {
|
|
|
|
Offset = COffset->getZExtValue();
|
|
|
|
if (Offset >= IntSize) {
|
|
|
|
II->setArgOperand(1, ConstantInt::get(COffset->getType(),
|
|
|
|
Offset & (IntSize - 1)));
|
|
|
|
return II;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bool Signed = II->getIntrinsicID() == Intrinsic::amdgcn_sbfe;
|
|
|
|
|
|
|
|
// TODO: Also emit sub if only width is constant.
|
|
|
|
if (!CWidth && COffset && Offset == 0) {
|
|
|
|
Constant *KSize = ConstantInt::get(COffset->getType(), IntSize);
|
|
|
|
Value *ShiftVal = Builder->CreateSub(KSize, II->getArgOperand(2));
|
|
|
|
ShiftVal = Builder->CreateZExt(ShiftVal, II->getType());
|
|
|
|
|
|
|
|
Value *Shl = Builder->CreateShl(Src, ShiftVal);
|
|
|
|
Value *RightShift = Signed ?
|
|
|
|
Builder->CreateAShr(Shl, ShiftVal) :
|
|
|
|
Builder->CreateLShr(Shl, ShiftVal);
|
|
|
|
RightShift->takeName(II);
|
|
|
|
return replaceInstUsesWith(*II, RightShift);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!CWidth || !COffset)
|
|
|
|
break;
|
|
|
|
|
|
|
|
// TODO: This allows folding to undef when the hardware has specific
|
|
|
|
// behavior?
|
|
|
|
if (Offset + Width < IntSize) {
|
|
|
|
Value *Shl = Builder->CreateShl(Src, IntSize - Offset - Width);
|
|
|
|
Value *RightShift = Signed ?
|
|
|
|
Builder->CreateAShr(Shl, IntSize - Width) :
|
|
|
|
Builder->CreateLShr(Shl, IntSize - Width);
|
|
|
|
RightShift->takeName(II);
|
|
|
|
return replaceInstUsesWith(*II, RightShift);
|
|
|
|
}
|
|
|
|
|
|
|
|
Value *RightShift = Signed ?
|
|
|
|
Builder->CreateAShr(Src, Offset) :
|
|
|
|
Builder->CreateLShr(Src, Offset);
|
|
|
|
|
|
|
|
RightShift->takeName(II);
|
|
|
|
return replaceInstUsesWith(*II, RightShift);
|
|
|
|
}
|
2017-02-23 08:44:03 +08:00
|
|
|
case Intrinsic::amdgcn_exp:
|
|
|
|
case Intrinsic::amdgcn_exp_compr: {
|
|
|
|
ConstantInt *En = dyn_cast<ConstantInt>(II->getArgOperand(1));
|
|
|
|
if (!En) // Illegal.
|
|
|
|
break;
|
|
|
|
|
|
|
|
unsigned EnBits = En->getZExtValue();
|
|
|
|
if (EnBits == 0xf)
|
|
|
|
break; // All inputs enabled.
|
|
|
|
|
|
|
|
bool IsCompr = II->getIntrinsicID() == Intrinsic::amdgcn_exp_compr;
|
|
|
|
bool Changed = false;
|
|
|
|
for (int I = 0; I < (IsCompr ? 2 : 4); ++I) {
|
|
|
|
if ((!IsCompr && (EnBits & (1 << I)) == 0) ||
|
|
|
|
(IsCompr && ((EnBits & (0x3 << (2 * I))) == 0))) {
|
|
|
|
Value *Src = II->getArgOperand(I + 2);
|
|
|
|
if (!isa<UndefValue>(Src)) {
|
|
|
|
II->setArgOperand(I + 2, UndefValue::get(Src->getType()));
|
|
|
|
Changed = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (Changed)
|
|
|
|
return II;
|
|
|
|
|
|
|
|
break;
|
2017-02-28 07:08:49 +08:00
|
|
|
|
|
|
|
}
|
|
|
|
case Intrinsic::amdgcn_fmed3: {
|
|
|
|
// Note this does not preserve proper sNaN behavior if IEEE-mode is enabled
|
|
|
|
// for the shader.
|
|
|
|
|
|
|
|
Value *Src0 = II->getArgOperand(0);
|
|
|
|
Value *Src1 = II->getArgOperand(1);
|
|
|
|
Value *Src2 = II->getArgOperand(2);
|
|
|
|
|
|
|
|
bool Swap = false;
|
|
|
|
// Canonicalize constants to RHS operands.
|
|
|
|
//
|
|
|
|
// fmed3(c0, x, c1) -> fmed3(x, c0, c1)
|
|
|
|
if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
|
|
|
|
std::swap(Src0, Src1);
|
|
|
|
Swap = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (isa<Constant>(Src1) && !isa<Constant>(Src2)) {
|
|
|
|
std::swap(Src1, Src2);
|
|
|
|
Swap = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
|
|
|
|
std::swap(Src0, Src1);
|
|
|
|
Swap = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (Swap) {
|
|
|
|
II->setArgOperand(0, Src0);
|
|
|
|
II->setArgOperand(1, Src1);
|
|
|
|
II->setArgOperand(2, Src2);
|
|
|
|
return II;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (match(Src2, m_NaN()) || isa<UndefValue>(Src2)) {
|
|
|
|
CallInst *NewCall = Builder->CreateMinNum(Src0, Src1);
|
|
|
|
NewCall->copyFastMathFlags(II);
|
|
|
|
NewCall->takeName(II);
|
|
|
|
return replaceInstUsesWith(*II, NewCall);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
|
|
|
|
if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
|
|
|
|
if (const ConstantFP *C2 = dyn_cast<ConstantFP>(Src2)) {
|
|
|
|
APFloat Result = fmed3AMDGCN(C0->getValueAPF(), C1->getValueAPF(),
|
|
|
|
C2->getValueAPF());
|
|
|
|
return replaceInstUsesWith(*II,
|
|
|
|
ConstantFP::get(Builder->getContext(), Result));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
break;
|
2017-02-23 08:44:03 +08:00
|
|
|
}
|
2017-03-14 02:14:02 +08:00
|
|
|
case Intrinsic::amdgcn_icmp:
|
|
|
|
case Intrinsic::amdgcn_fcmp: {
|
|
|
|
const ConstantInt *CC = dyn_cast<ConstantInt>(II->getArgOperand(2));
|
|
|
|
if (!CC)
|
|
|
|
break;
|
|
|
|
|
|
|
|
// Guard against invalid arguments.
|
|
|
|
int64_t CCVal = CC->getZExtValue();
|
|
|
|
bool IsInteger = II->getIntrinsicID() == Intrinsic::amdgcn_icmp;
|
|
|
|
if ((IsInteger && (CCVal < CmpInst::FIRST_ICMP_PREDICATE ||
|
|
|
|
CCVal > CmpInst::LAST_ICMP_PREDICATE)) ||
|
|
|
|
(!IsInteger && (CCVal < CmpInst::FIRST_FCMP_PREDICATE ||
|
|
|
|
CCVal > CmpInst::LAST_FCMP_PREDICATE)))
|
|
|
|
break;
|
|
|
|
|
|
|
|
Value *Src0 = II->getArgOperand(0);
|
|
|
|
Value *Src1 = II->getArgOperand(1);
|
|
|
|
|
|
|
|
if (auto *CSrc0 = dyn_cast<Constant>(Src0)) {
|
|
|
|
if (auto *CSrc1 = dyn_cast<Constant>(Src1)) {
|
|
|
|
Constant *CCmp = ConstantExpr::getCompare(CCVal, CSrc0, CSrc1);
|
|
|
|
return replaceInstUsesWith(*II,
|
|
|
|
ConstantExpr::getSExt(CCmp, II->getType()));
|
|
|
|
}
|
|
|
|
|
|
|
|
// Canonicalize constants to RHS.
|
|
|
|
CmpInst::Predicate SwapPred
|
|
|
|
= CmpInst::getSwappedPredicate(static_cast<CmpInst::Predicate>(CCVal));
|
|
|
|
II->setArgOperand(0, Src1);
|
|
|
|
II->setArgOperand(1, Src0);
|
|
|
|
II->setArgOperand(2, ConstantInt::get(CC->getType(),
|
|
|
|
static_cast<int>(SwapPred)));
|
|
|
|
return II;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (CCVal != CmpInst::ICMP_EQ && CCVal != CmpInst::ICMP_NE)
|
|
|
|
break;
|
|
|
|
|
|
|
|
// Canonicalize compare eq with true value to compare != 0
|
|
|
|
// llvm.amdgcn.icmp(zext (i1 x), 1, eq)
|
|
|
|
// -> llvm.amdgcn.icmp(zext (i1 x), 0, ne)
|
|
|
|
// llvm.amdgcn.icmp(sext (i1 x), -1, eq)
|
|
|
|
// -> llvm.amdgcn.icmp(sext (i1 x), 0, ne)
|
|
|
|
Value *ExtSrc;
|
|
|
|
if (CCVal == CmpInst::ICMP_EQ &&
|
|
|
|
((match(Src1, m_One()) && match(Src0, m_ZExt(m_Value(ExtSrc)))) ||
|
|
|
|
(match(Src1, m_AllOnes()) && match(Src0, m_SExt(m_Value(ExtSrc))))) &&
|
|
|
|
ExtSrc->getType()->isIntegerTy(1)) {
|
|
|
|
II->setArgOperand(1, ConstantInt::getNullValue(Src1->getType()));
|
|
|
|
II->setArgOperand(2, ConstantInt::get(CC->getType(), CmpInst::ICMP_NE));
|
|
|
|
return II;
|
|
|
|
}
|
|
|
|
|
|
|
|
CmpInst::Predicate SrcPred;
|
|
|
|
Value *SrcLHS;
|
|
|
|
Value *SrcRHS;
|
|
|
|
|
|
|
|
// Fold compare eq/ne with 0 from a compare result as the predicate to the
|
|
|
|
// intrinsic. The typical use is a wave vote function in the library, which
|
|
|
|
// will be fed from a user code condition compared with 0. Fold in the
|
|
|
|
// redundant compare.
|
|
|
|
|
|
|
|
// llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, ne)
|
|
|
|
// -> llvm.amdgcn.[if]cmp(a, b, pred)
|
|
|
|
//
|
|
|
|
// llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, eq)
|
|
|
|
// -> llvm.amdgcn.[if]cmp(a, b, inv pred)
|
|
|
|
if (match(Src1, m_Zero()) &&
|
|
|
|
match(Src0,
|
|
|
|
m_ZExtOrSExt(m_Cmp(SrcPred, m_Value(SrcLHS), m_Value(SrcRHS))))) {
|
|
|
|
if (CCVal == CmpInst::ICMP_EQ)
|
|
|
|
SrcPred = CmpInst::getInversePredicate(SrcPred);
|
|
|
|
|
|
|
|
Intrinsic::ID NewIID = CmpInst::isFPPredicate(SrcPred) ?
|
|
|
|
Intrinsic::amdgcn_fcmp : Intrinsic::amdgcn_icmp;
|
|
|
|
|
|
|
|
Value *NewF = Intrinsic::getDeclaration(II->getModule(), NewIID,
|
|
|
|
SrcLHS->getType());
|
|
|
|
Value *Args[] = { SrcLHS, SrcRHS,
|
|
|
|
ConstantInt::get(CC->getType(), SrcPred) };
|
|
|
|
CallInst *NewCall = Builder->CreateCall(NewF, Args);
|
|
|
|
NewCall->takeName(II);
|
|
|
|
return replaceInstUsesWith(*II, NewCall);
|
|
|
|
}
|
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
2010-01-05 15:32:13 +08:00
|
|
|
case Intrinsic::stackrestore: {
|
|
|
|
// If the save is right next to the restore, remove the restore. This can
|
|
|
|
// happen when variable allocas are DCE'd.
|
2010-06-24 20:58:35 +08:00
|
|
|
if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
|
2010-01-05 15:32:13 +08:00
|
|
|
if (SS->getIntrinsicID() == Intrinsic::stacksave) {
|
2015-10-14 00:59:33 +08:00
|
|
|
if (&*++SS->getIterator() == II)
|
2016-02-02 06:23:39 +08:00
|
|
|
return eraseInstFromFunction(CI);
|
2010-01-05 15:32:13 +08:00
|
|
|
}
|
|
|
|
}
|
2012-02-03 08:07:04 +08:00
|
|
|
|
2010-01-05 15:32:13 +08:00
|
|
|
// Scan down this block to see if there is another stack restore in the
|
|
|
|
// same block without an intervening call/alloca.
|
2015-10-14 00:59:33 +08:00
|
|
|
BasicBlock::iterator BI(II);
|
2010-01-05 15:32:13 +08:00
|
|
|
TerminatorInst *TI = II->getParent()->getTerminator();
|
|
|
|
bool CannotRemove = false;
|
|
|
|
for (++BI; &*BI != TI; ++BI) {
|
2012-06-21 23:45:28 +08:00
|
|
|
if (isa<AllocaInst>(BI)) {
|
2010-01-05 15:32:13 +08:00
|
|
|
CannotRemove = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (CallInst *BCI = dyn_cast<CallInst>(BI)) {
|
|
|
|
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BCI)) {
|
|
|
|
// If there is a stackrestore below this one, remove this one.
|
|
|
|
if (II->getIntrinsicID() == Intrinsic::stackrestore)
|
2016-02-02 06:23:39 +08:00
|
|
|
return eraseInstFromFunction(CI);
|
2016-02-27 08:53:54 +08:00
|
|
|
|
|
|
|
// Bail if we cross over an intrinsic with side effects, such as
|
|
|
|
// llvm.stacksave, llvm.read_register, or llvm.setjmp.
|
|
|
|
if (II->mayHaveSideEffects()) {
|
|
|
|
CannotRemove = true;
|
|
|
|
break;
|
|
|
|
}
|
2010-01-05 15:32:13 +08:00
|
|
|
} else {
|
|
|
|
// If we found a non-intrinsic call, we can't remove the stack
|
|
|
|
// restore.
|
|
|
|
CannotRemove = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2012-02-03 08:07:04 +08:00
|
|
|
|
2011-07-31 14:30:59 +08:00
|
|
|
// If the stack restore is in a return, resume, or unwind block and if there
|
|
|
|
// are no allocas or calls between the restore and the return, nuke the
|
|
|
|
// restore.
|
2012-02-07 05:16:41 +08:00
|
|
|
if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI)))
|
2016-02-02 06:23:39 +08:00
|
|
|
return eraseInstFromFunction(CI);
|
2010-01-05 15:32:13 +08:00
|
|
|
break;
|
|
|
|
}
|
2016-07-29 06:50:48 +08:00
|
|
|
case Intrinsic::lifetime_start:
|
2016-07-29 06:59:03 +08:00
|
|
|
// Asan needs to poison memory to detect invalid access which is possible
|
|
|
|
// even for empty lifetime range.
|
|
|
|
if (II->getFunction()->hasFnAttribute(Attribute::SanitizeAddress))
|
|
|
|
break;
|
|
|
|
|
2016-05-10 17:24:49 +08:00
|
|
|
if (removeTriviallyEmptyRange(*II, Intrinsic::lifetime_start,
|
|
|
|
Intrinsic::lifetime_end, *this))
|
|
|
|
return nullptr;
|
[InstCombine] Remove trivially empty lifetime start/end ranges.
Summary:
Some passes may open up opportunities for optimizations, leaving empty
lifetime start/end ranges. For example, with the following code:
void foo(char *, char *);
void bar(int Size, bool flag) {
for (int i = 0; i < Size; ++i) {
char text[1];
char buff[1];
if (flag)
foo(text, buff); // BBFoo
}
}
the loop unswitch pass will create 2 versions of the loop, one with
flag==true, and the other one with flag==false, but always leaving
the BBFoo basic block, with lifetime ranges covering the scope of the for
loop. Simplify CFG will then remove BBFoo in the case where flag==false,
but will leave the lifetime markers.
This patch teaches InstCombine to remove trivially empty lifetime marker
ranges, that is ranges ending right after they were started (ignoring
debug info or other lifetime markers in the range).
This fixes PR24598: excessive compile time after r234581.
Reviewers: reames, chandlerc
Subscribers: llvm-commits
Differential Revision: http://reviews.llvm.org/D13305
llvm-svn: 249018
2015-10-01 22:54:31 +08:00
|
|
|
break;
|
2014-07-26 05:45:17 +08:00
|
|
|
case Intrinsic::assume: {
|
2016-04-09 00:37:12 +08:00
|
|
|
Value *IIOperand = II->getArgOperand(0);
|
|
|
|
// Remove an assume if it is immediately followed by an identical assume.
|
|
|
|
if (match(II->getNextNode(),
|
|
|
|
m_Intrinsic<Intrinsic::assume>(m_Specific(IIOperand))))
|
|
|
|
return eraseInstFromFunction(CI);
|
|
|
|
|
2014-07-26 05:45:17 +08:00
|
|
|
// Canonicalize assume(a && b) -> assume(a); assume(b);
|
2014-09-07 20:44:26 +08:00
|
|
|
// Note: New assumption intrinsics created here are registered by
|
|
|
|
// the InstCombineIRInserter object.
|
2016-04-09 00:37:12 +08:00
|
|
|
Value *AssumeIntrinsic = II->getCalledValue(), *A, *B;
|
2014-07-26 05:45:17 +08:00
|
|
|
if (match(IIOperand, m_And(m_Value(A), m_Value(B)))) {
|
|
|
|
Builder->CreateCall(AssumeIntrinsic, A, II->getName());
|
|
|
|
Builder->CreateCall(AssumeIntrinsic, B, II->getName());
|
2016-02-02 06:23:39 +08:00
|
|
|
return eraseInstFromFunction(*II);
|
2014-07-26 05:45:17 +08:00
|
|
|
}
|
|
|
|
// assume(!(a || b)) -> assume(!a); assume(!b);
|
|
|
|
if (match(IIOperand, m_Not(m_Or(m_Value(A), m_Value(B))))) {
|
2014-09-07 20:44:26 +08:00
|
|
|
Builder->CreateCall(AssumeIntrinsic, Builder->CreateNot(A),
|
|
|
|
II->getName());
|
|
|
|
Builder->CreateCall(AssumeIntrinsic, Builder->CreateNot(B),
|
|
|
|
II->getName());
|
2016-02-02 06:23:39 +08:00
|
|
|
return eraseInstFromFunction(*II);
|
2014-07-26 05:45:17 +08:00
|
|
|
}
|
2014-10-05 05:27:06 +08:00
|
|
|
|
2014-11-12 07:33:19 +08:00
|
|
|
// assume( (load addr) != null ) -> add 'nonnull' metadata to load
|
|
|
|
// (if assume is valid at the load)
|
2017-01-04 06:25:31 +08:00
|
|
|
CmpInst::Predicate Pred;
|
|
|
|
Instruction *LHS;
|
|
|
|
if (match(IIOperand, m_ICmp(Pred, m_Instruction(LHS), m_Zero())) &&
|
|
|
|
Pred == ICmpInst::ICMP_NE && LHS->getOpcode() == Instruction::Load &&
|
|
|
|
LHS->getType()->isPointerTy() &&
|
|
|
|
isValidAssumeForContext(II, LHS, &DT)) {
|
|
|
|
MDNode *MD = MDNode::get(II->getContext(), None);
|
|
|
|
LHS->setMetadata(LLVMContext::MD_nonnull, MD);
|
|
|
|
return eraseInstFromFunction(*II);
|
|
|
|
|
2015-02-10 16:07:32 +08:00
|
|
|
// TODO: apply nonnull return attributes to calls and invokes
|
2014-11-12 07:33:19 +08:00
|
|
|
// TODO: apply range metadata for range check patterns?
|
|
|
|
}
|
2017-01-04 06:25:31 +08:00
|
|
|
|
2014-10-05 05:27:06 +08:00
|
|
|
// If there is a dominating assume with the same condition as this one,
|
|
|
|
// then this one is redundant, and should be removed.
|
2014-10-05 08:53:02 +08:00
|
|
|
APInt KnownZero(1, 0), KnownOne(1, 0);
|
|
|
|
computeKnownBits(IIOperand, KnownZero, KnownOne, 0, II);
|
|
|
|
if (KnownOne.isAllOnesValue())
|
2016-02-02 06:23:39 +08:00
|
|
|
return eraseInstFromFunction(*II);
|
2014-10-05 05:27:06 +08:00
|
|
|
|
2017-01-11 21:24:24 +08:00
|
|
|
// Update the cache of affected values for this assumption (we might be
|
|
|
|
// here because we just simplified the condition).
|
|
|
|
AC.updateAffectedValues(II);
|
2014-07-26 05:45:17 +08:00
|
|
|
break;
|
|
|
|
}
|
2014-12-30 07:27:30 +08:00
|
|
|
case Intrinsic::experimental_gc_relocate: {
|
|
|
|
// Translate facts known about a pointer before relocating into
|
|
|
|
// facts about the relocate value, while being careful to
|
|
|
|
// preserve relocation semantics.
|
2016-01-05 12:03:00 +08:00
|
|
|
Value *DerivedPtr = cast<GCRelocateInst>(II)->getDerivedPtr();
|
2014-12-30 07:27:30 +08:00
|
|
|
|
|
|
|
// Remove the relocation if unused, note that this check is required
|
|
|
|
// to prevent the cases below from looping forever.
|
|
|
|
if (II->use_empty())
|
2016-02-02 06:23:39 +08:00
|
|
|
return eraseInstFromFunction(*II);
|
2014-12-30 07:27:30 +08:00
|
|
|
|
|
|
|
// Undef is undef, even after relocation.
|
|
|
|
// TODO: provide a hook for this in GCStrategy. This is clearly legal for
|
|
|
|
// most practical collectors, but there was discussion in the review thread
|
|
|
|
// about whether it was legal for all possible collectors.
|
2016-02-10 05:09:22 +08:00
|
|
|
if (isa<UndefValue>(DerivedPtr))
|
|
|
|
// Use undef of gc_relocate's type to replace it.
|
|
|
|
return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
|
|
|
|
|
|
|
|
if (auto *PT = dyn_cast<PointerType>(II->getType())) {
|
|
|
|
// The relocation of null will be null for most any collector.
|
|
|
|
// TODO: provide a hook for this in GCStrategy. There might be some
|
|
|
|
// weird collector this property does not hold for.
|
|
|
|
if (isa<ConstantPointerNull>(DerivedPtr))
|
|
|
|
// Use null-pointer of gc_relocate's type to replace it.
|
|
|
|
return replaceInstUsesWith(*II, ConstantPointerNull::get(PT));
|
2016-04-25 01:00:34 +08:00
|
|
|
|
2016-02-10 05:09:22 +08:00
|
|
|
// isKnownNonNull -> nonnull attribute
|
2016-08-05 09:06:44 +08:00
|
|
|
if (isKnownNonNullAt(DerivedPtr, II, &DT))
|
Rename AttributeSet to AttributeList
Summary:
This class is a list of AttributeSetNodes corresponding the function
prototype of a call or function declaration. This class used to be
called ParamAttrListPtr, then AttrListPtr, then AttributeSet. It is
typically accessed by parameter and return value index, so
"AttributeList" seems like a more intuitive name.
Rename AttributeSetImpl to AttributeListImpl to follow suit.
It's useful to rename this class so that we can rename AttributeSetNode
to AttributeSet later. AttributeSet is the set of attributes that apply
to a single function, argument, or return value.
Reviewers: sanjoy, javed.absar, chandlerc, pete
Reviewed By: pete
Subscribers: pete, jholewinski, arsenm, dschuff, mehdi_amini, jfb, nhaehnle, sbc100, void, llvm-commits
Differential Revision: https://reviews.llvm.org/D31102
llvm-svn: 298393
2017-03-22 00:57:19 +08:00
|
|
|
II->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
|
2015-02-15 03:37:54 +08:00
|
|
|
}
|
2014-12-30 07:27:30 +08:00
|
|
|
|
|
|
|
// TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
|
|
|
|
// Canonicalize on the type from the uses to the defs
|
2015-02-15 03:37:54 +08:00
|
|
|
|
2014-12-30 07:27:30 +08:00
|
|
|
// TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
|
2016-02-10 05:09:22 +08:00
|
|
|
break;
|
2014-12-30 07:27:30 +08:00
|
|
|
}
|
2017-01-25 22:12:12 +08:00
|
|
|
|
|
|
|
case Intrinsic::experimental_guard: {
|
2017-02-02 00:34:55 +08:00
|
|
|
// Is this guard followed by another guard?
|
|
|
|
Instruction *NextInst = II->getNextNode();
|
|
|
|
Value *NextCond = nullptr;
|
|
|
|
if (match(NextInst,
|
|
|
|
m_Intrinsic<Intrinsic::experimental_guard>(m_Value(NextCond)))) {
|
|
|
|
Value *CurrCond = II->getArgOperand(0);
|
2017-01-25 22:12:12 +08:00
|
|
|
|
2017-03-30 20:59:53 +08:00
|
|
|
// Remove a guard that it is immediately preceded by an identical guard.
|
2017-02-02 00:34:55 +08:00
|
|
|
if (CurrCond == NextCond)
|
|
|
|
return eraseInstFromFunction(*NextInst);
|
|
|
|
|
|
|
|
// Otherwise canonicalize guard(a); guard(b) -> guard(a & b).
|
|
|
|
II->setArgOperand(0, Builder->CreateAnd(CurrCond, NextCond));
|
|
|
|
return eraseInstFromFunction(*NextInst);
|
|
|
|
}
|
2017-01-25 22:12:12 +08:00
|
|
|
break;
|
|
|
|
}
|
2010-01-05 15:32:13 +08:00
|
|
|
}
|
|
|
|
return visitCallSite(II);
|
|
|
|
}
|
|
|
|
|
2017-02-01 02:09:05 +08:00
|
|
|
// Fence instruction simplification
|
|
|
|
Instruction *InstCombiner::visitFenceInst(FenceInst &FI) {
|
|
|
|
// Remove identical consecutive fences.
|
|
|
|
if (auto *NFI = dyn_cast<FenceInst>(FI.getNextNode()))
|
|
|
|
if (FI.isIdenticalTo(NFI))
|
|
|
|
return eraseInstFromFunction(FI);
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
2010-01-05 15:32:13 +08:00
|
|
|
// InvokeInst simplification
|
|
|
|
//
|
|
|
|
Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) {
|
|
|
|
return visitCallSite(&II);
|
|
|
|
}
|
|
|
|
|
2016-01-21 06:24:38 +08:00
|
|
|
/// If this cast does not affect the value passed through the varargs area, we
|
|
|
|
/// can eliminate the use of the cast.
|
2010-01-05 15:32:13 +08:00
|
|
|
static bool isSafeToEliminateVarargsCast(const CallSite CS,
|
2015-03-10 10:37:25 +08:00
|
|
|
const DataLayout &DL,
|
|
|
|
const CastInst *const CI,
|
2010-01-05 15:32:13 +08:00
|
|
|
const int ix) {
|
|
|
|
if (!CI->isLosslessCast())
|
|
|
|
return false;
|
|
|
|
|
[Statepoints 3/4] Statepoint infrastructure for garbage collection: SelectionDAGBuilder
This is the third patch in a small series. It contains the CodeGen support for lowering the gc.statepoint intrinsic sequences (223078) to the STATEPOINT pseudo machine instruction (223085). The change also includes the set of helper routines and classes for working with gc.statepoints, gc.relocates, and gc.results since the lowering code uses them.
With this change, gc.statepoints should be functionally complete. The documentation will follow in the fourth change, and there will likely be some cleanup changes, but interested parties can start experimenting now.
I'm not particularly happy with the amount of code or complexity involved with the lowering step, but at least it's fairly well isolated. The statepoint lowering code is split into it's own files and anyone not working on the statepoint support itself should be able to ignore it.
During the lowering process, we currently spill aggressively to stack. This is not entirely ideal (and we have plans to do better), but it's functional, relatively straight forward, and matches closely the implementations of the patchpoint intrinsics. Most of the complexity comes from trying to keep relocated copies of values in the same stack slots across statepoints. Doing so avoids the insertion of pointless load and store instructions to reshuffle the stack. The current implementation isn't as effective as I'd like, but it is functional and 'good enough' for many common use cases.
In the long term, I'd like to figure out how to integrate the statepoint lowering with the register allocator. In principal, we shouldn't need to eagerly spill at all. The register allocator should do any spilling required and the statepoint should simply record that fact. Depending on how challenging that turns out to be, we may invest in a smarter global stack slot assignment mechanism as a stop gap measure.
Reviewed by: atrick, ributzka
llvm-svn: 223137
2014-12-03 02:50:36 +08:00
|
|
|
// If this is a GC intrinsic, avoid munging types. We need types for
|
|
|
|
// statepoint reconstruction in SelectionDAG.
|
|
|
|
// TODO: This is probably something which should be expanded to all
|
|
|
|
// intrinsics since the entire point of intrinsics is that
|
|
|
|
// they are understandable by the optimizer.
|
|
|
|
if (isStatepoint(CS) || isGCRelocate(CS) || isGCResult(CS))
|
|
|
|
return false;
|
|
|
|
|
2014-01-28 10:38:36 +08:00
|
|
|
// The size of ByVal or InAlloca arguments is derived from the type, so we
|
2010-01-05 15:32:13 +08:00
|
|
|
// can't change to a type with a different size. If the size were
|
|
|
|
// passed explicitly we could avoid this check.
|
2014-01-28 10:38:36 +08:00
|
|
|
if (!CS.isByValOrInAllocaArgument(ix))
|
2010-01-05 15:32:13 +08:00
|
|
|
return true;
|
|
|
|
|
2012-02-03 08:07:04 +08:00
|
|
|
Type* SrcTy =
|
2010-01-05 15:32:13 +08:00
|
|
|
cast<PointerType>(CI->getOperand(0)->getType())->getElementType();
|
2011-07-18 12:54:35 +08:00
|
|
|
Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
|
2010-01-05 15:32:13 +08:00
|
|
|
if (!SrcTy->isSized() || !DstTy->isSized())
|
|
|
|
return false;
|
2015-03-10 10:37:25 +08:00
|
|
|
if (DL.getTypeAllocSize(SrcTy) != DL.getTypeAllocSize(DstTy))
|
2010-01-05 15:32:13 +08:00
|
|
|
return false;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2015-03-10 10:37:25 +08:00
|
|
|
Instruction *InstCombiner::tryOptimizeCall(CallInst *CI) {
|
2014-04-25 13:29:35 +08:00
|
|
|
if (!CI->getCalledFunction()) return nullptr;
|
2010-03-06 18:50:38 +08:00
|
|
|
|
2015-01-21 19:23:40 +08:00
|
|
|
auto InstCombineRAUW = [this](Instruction *From, Value *With) {
|
2016-02-02 06:23:39 +08:00
|
|
|
replaceInstUsesWith(*From, With);
|
2015-01-21 19:23:40 +08:00
|
|
|
};
|
2016-08-05 09:06:44 +08:00
|
|
|
LibCallSimplifier Simplifier(DL, &TLI, InstCombineRAUW);
|
2015-01-21 19:23:40 +08:00
|
|
|
if (Value *With = Simplifier.optimizeCall(CI)) {
|
2012-11-30 12:05:06 +08:00
|
|
|
++NumSimplified;
|
2016-02-02 06:23:39 +08:00
|
|
|
return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
|
2012-11-30 12:05:06 +08:00
|
|
|
}
|
2012-10-14 00:45:24 +08:00
|
|
|
|
2014-04-25 13:29:35 +08:00
|
|
|
return nullptr;
|
2010-03-06 18:50:38 +08:00
|
|
|
}
|
|
|
|
|
2016-01-30 07:27:03 +08:00
|
|
|
static IntrinsicInst *findInitTrampolineFromAlloca(Value *TrampMem) {
|
2011-09-06 21:37:06 +08:00
|
|
|
// Strip off at most one level of pointer casts, looking for an alloca. This
|
|
|
|
// is good enough in practice and simpler than handling any number of casts.
|
|
|
|
Value *Underlying = TrampMem->stripPointerCasts();
|
|
|
|
if (Underlying != TrampMem &&
|
2014-03-09 11:16:01 +08:00
|
|
|
(!Underlying->hasOneUse() || Underlying->user_back() != TrampMem))
|
2014-04-25 13:29:35 +08:00
|
|
|
return nullptr;
|
2011-09-06 21:37:06 +08:00
|
|
|
if (!isa<AllocaInst>(Underlying))
|
2014-04-25 13:29:35 +08:00
|
|
|
return nullptr;
|
2011-09-06 21:37:06 +08:00
|
|
|
|
2014-04-25 13:29:35 +08:00
|
|
|
IntrinsicInst *InitTrampoline = nullptr;
|
2014-03-09 11:16:01 +08:00
|
|
|
for (User *U : TrampMem->users()) {
|
|
|
|
IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
|
2011-09-06 21:37:06 +08:00
|
|
|
if (!II)
|
2014-04-25 13:29:35 +08:00
|
|
|
return nullptr;
|
2011-09-06 21:37:06 +08:00
|
|
|
if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
|
|
|
|
if (InitTrampoline)
|
|
|
|
// More than one init_trampoline writes to this value. Give up.
|
2014-04-25 13:29:35 +08:00
|
|
|
return nullptr;
|
2011-09-06 21:37:06 +08:00
|
|
|
InitTrampoline = II;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
|
|
|
|
// Allow any number of calls to adjust.trampoline.
|
|
|
|
continue;
|
2014-04-25 13:29:35 +08:00
|
|
|
return nullptr;
|
2011-09-06 21:37:06 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// No call to init.trampoline found.
|
|
|
|
if (!InitTrampoline)
|
2014-04-25 13:29:35 +08:00
|
|
|
return nullptr;
|
2011-09-06 21:37:06 +08:00
|
|
|
|
|
|
|
// Check that the alloca is being used in the expected way.
|
|
|
|
if (InitTrampoline->getOperand(0) != TrampMem)
|
2014-04-25 13:29:35 +08:00
|
|
|
return nullptr;
|
2011-09-06 21:37:06 +08:00
|
|
|
|
|
|
|
return InitTrampoline;
|
|
|
|
}
|
|
|
|
|
2016-01-30 07:27:03 +08:00
|
|
|
static IntrinsicInst *findInitTrampolineFromBB(IntrinsicInst *AdjustTramp,
|
2011-09-06 21:37:06 +08:00
|
|
|
Value *TrampMem) {
|
|
|
|
// Visit all the previous instructions in the basic block, and try to find a
|
|
|
|
// init.trampoline which has a direct path to the adjust.trampoline.
|
2015-10-14 00:59:33 +08:00
|
|
|
for (BasicBlock::iterator I = AdjustTramp->getIterator(),
|
|
|
|
E = AdjustTramp->getParent()->begin();
|
|
|
|
I != E;) {
|
|
|
|
Instruction *Inst = &*--I;
|
2011-09-06 21:37:06 +08:00
|
|
|
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
|
|
|
|
if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
|
|
|
|
II->getOperand(0) == TrampMem)
|
|
|
|
return II;
|
|
|
|
if (Inst->mayWriteToMemory())
|
2014-04-25 13:29:35 +08:00
|
|
|
return nullptr;
|
2011-09-06 21:37:06 +08:00
|
|
|
}
|
2014-04-25 13:29:35 +08:00
|
|
|
return nullptr;
|
2011-09-06 21:37:06 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Given a call to llvm.adjust.trampoline, find and return the corresponding
|
|
|
|
// call to llvm.init.trampoline if the call to the trampoline can be optimized
|
|
|
|
// to a direct call to a function. Otherwise return NULL.
|
|
|
|
//
|
2016-01-30 07:27:03 +08:00
|
|
|
static IntrinsicInst *findInitTrampoline(Value *Callee) {
|
2011-09-06 21:37:06 +08:00
|
|
|
Callee = Callee->stripPointerCasts();
|
|
|
|
IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee);
|
|
|
|
if (!AdjustTramp ||
|
|
|
|
AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
|
2014-04-25 13:29:35 +08:00
|
|
|
return nullptr;
|
2011-09-06 21:37:06 +08:00
|
|
|
|
|
|
|
Value *TrampMem = AdjustTramp->getOperand(0);
|
|
|
|
|
2016-01-30 07:27:03 +08:00
|
|
|
if (IntrinsicInst *IT = findInitTrampolineFromAlloca(TrampMem))
|
2011-09-06 21:37:06 +08:00
|
|
|
return IT;
|
2016-01-30 07:27:03 +08:00
|
|
|
if (IntrinsicInst *IT = findInitTrampolineFromBB(AdjustTramp, TrampMem))
|
2011-09-06 21:37:06 +08:00
|
|
|
return IT;
|
2014-04-25 13:29:35 +08:00
|
|
|
return nullptr;
|
2011-09-06 21:37:06 +08:00
|
|
|
}
|
|
|
|
|
2016-01-21 06:24:38 +08:00
|
|
|
/// Improvements for call and invoke instructions.
|
2010-01-05 15:32:13 +08:00
|
|
|
Instruction *InstCombiner::visitCallSite(CallSite CS) {
|
2016-08-05 09:06:44 +08:00
|
|
|
if (isAllocLikeFn(CS.getInstruction(), &TLI))
|
2012-07-10 02:38:20 +08:00
|
|
|
return visitAllocSite(*CS.getInstruction());
|
2012-06-22 05:25:05 +08:00
|
|
|
|
2010-01-05 15:32:13 +08:00
|
|
|
bool Changed = false;
|
|
|
|
|
2015-06-17 04:24:25 +08:00
|
|
|
// Mark any parameters that are known to be non-null with the nonnull
|
|
|
|
// attribute. This is helpful for inlining calls to functions with null
|
|
|
|
// checks on their arguments.
|
2015-12-02 14:58:49 +08:00
|
|
|
SmallVector<unsigned, 4> Indices;
|
2015-06-17 04:24:25 +08:00
|
|
|
unsigned ArgNo = 0;
|
2015-12-02 14:58:49 +08:00
|
|
|
|
2015-06-17 04:24:25 +08:00
|
|
|
for (Value *V : CS.args()) {
|
2016-01-30 07:14:58 +08:00
|
|
|
if (V->getType()->isPointerTy() &&
|
|
|
|
!CS.paramHasAttr(ArgNo + 1, Attribute::NonNull) &&
|
2016-08-05 09:06:44 +08:00
|
|
|
isKnownNonNullAt(V, CS.getInstruction(), &DT))
|
2015-12-02 14:58:49 +08:00
|
|
|
Indices.push_back(ArgNo + 1);
|
2015-06-17 04:24:25 +08:00
|
|
|
ArgNo++;
|
|
|
|
}
|
2015-12-02 14:58:49 +08:00
|
|
|
|
2015-06-17 04:24:25 +08:00
|
|
|
assert(ArgNo == CS.arg_size() && "sanity check");
|
|
|
|
|
2015-12-02 14:58:49 +08:00
|
|
|
if (!Indices.empty()) {
|
Rename AttributeSet to AttributeList
Summary:
This class is a list of AttributeSetNodes corresponding the function
prototype of a call or function declaration. This class used to be
called ParamAttrListPtr, then AttrListPtr, then AttributeSet. It is
typically accessed by parameter and return value index, so
"AttributeList" seems like a more intuitive name.
Rename AttributeSetImpl to AttributeListImpl to follow suit.
It's useful to rename this class so that we can rename AttributeSetNode
to AttributeSet later. AttributeSet is the set of attributes that apply
to a single function, argument, or return value.
Reviewers: sanjoy, javed.absar, chandlerc, pete
Reviewed By: pete
Subscribers: pete, jholewinski, arsenm, dschuff, mehdi_amini, jfb, nhaehnle, sbc100, void, llvm-commits
Differential Revision: https://reviews.llvm.org/D31102
llvm-svn: 298393
2017-03-22 00:57:19 +08:00
|
|
|
AttributeList AS = CS.getAttributes();
|
2015-12-02 14:58:49 +08:00
|
|
|
LLVMContext &Ctx = CS.getInstruction()->getContext();
|
|
|
|
AS = AS.addAttribute(Ctx, Indices,
|
|
|
|
Attribute::get(Ctx, Attribute::NonNull));
|
|
|
|
CS.setAttributes(AS);
|
|
|
|
Changed = true;
|
|
|
|
}
|
|
|
|
|
2010-12-20 16:25:06 +08:00
|
|
|
// If the callee is a pointer to a function, attempt to move any casts to the
|
|
|
|
// arguments of the call/invoke.
|
2010-01-05 15:32:13 +08:00
|
|
|
Value *Callee = CS.getCalledValue();
|
2010-12-20 16:25:06 +08:00
|
|
|
if (!isa<Function>(Callee) && transformConstExprCastCall(CS))
|
2014-04-25 13:29:35 +08:00
|
|
|
return nullptr;
|
2010-01-05 15:32:13 +08:00
|
|
|
|
2016-03-15 04:18:54 +08:00
|
|
|
if (Function *CalleeF = dyn_cast<Function>(Callee)) {
|
|
|
|
// Remove the convergent attr on calls when the callee is not convergent.
|
2016-06-21 03:04:44 +08:00
|
|
|
if (CS.isConvergent() && !CalleeF->isConvergent() &&
|
|
|
|
!CalleeF->isIntrinsic()) {
|
2016-03-15 04:18:54 +08:00
|
|
|
DEBUG(dbgs() << "Removing convergent attr from instr "
|
|
|
|
<< CS.getInstruction() << "\n");
|
|
|
|
CS.setNotConvergent();
|
|
|
|
return CS.getInstruction();
|
|
|
|
}
|
|
|
|
|
2010-02-02 02:11:34 +08:00
|
|
|
// If the call and callee calling conventions don't match, this call must
|
|
|
|
// be unreachable, as the call is undefined.
|
|
|
|
if (CalleeF->getCallingConv() != CS.getCallingConv() &&
|
|
|
|
// Only do this for calls to a function with a body. A prototype may
|
|
|
|
// not actually end up matching the implementation's calling conv for a
|
|
|
|
// variety of reasons (e.g. it may be written in assembly).
|
|
|
|
!CalleeF->isDeclaration()) {
|
2010-01-05 15:32:13 +08:00
|
|
|
Instruction *OldCall = CS.getInstruction();
|
|
|
|
new StoreInst(ConstantInt::getTrue(Callee->getContext()),
|
2012-02-03 08:07:04 +08:00
|
|
|
UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
|
2010-01-05 15:32:13 +08:00
|
|
|
OldCall);
|
2012-12-13 08:18:46 +08:00
|
|
|
// If OldCall does not return void then replaceAllUsesWith undef.
|
2010-01-05 15:32:13 +08:00
|
|
|
// This allows ValueHandlers and custom metadata to adjust itself.
|
|
|
|
if (!OldCall->getType()->isVoidTy())
|
2016-02-02 06:23:39 +08:00
|
|
|
replaceInstUsesWith(*OldCall, UndefValue::get(OldCall->getType()));
|
2010-02-02 02:04:58 +08:00
|
|
|
if (isa<CallInst>(OldCall))
|
2016-02-02 06:23:39 +08:00
|
|
|
return eraseInstFromFunction(*OldCall);
|
2012-02-03 08:07:04 +08:00
|
|
|
|
2010-02-02 02:04:58 +08:00
|
|
|
// We cannot remove an invoke, because it would change the CFG, just
|
|
|
|
// change the callee to a null pointer.
|
2010-03-21 05:00:25 +08:00
|
|
|
cast<InvokeInst>(OldCall)->setCalledFunction(
|
2010-02-02 02:04:58 +08:00
|
|
|
Constant::getNullValue(CalleeF->getType()));
|
2014-04-25 13:29:35 +08:00
|
|
|
return nullptr;
|
2010-01-05 15:32:13 +08:00
|
|
|
}
|
2016-03-15 04:18:54 +08:00
|
|
|
}
|
2010-01-05 15:32:13 +08:00
|
|
|
|
|
|
|
if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
|
2010-06-24 20:58:35 +08:00
|
|
|
// If CS does not return void then replaceAllUsesWith undef.
|
2010-01-05 15:32:13 +08:00
|
|
|
// This allows ValueHandlers and custom metadata to adjust itself.
|
|
|
|
if (!CS.getInstruction()->getType()->isVoidTy())
|
2016-02-02 06:23:39 +08:00
|
|
|
replaceInstUsesWith(*CS.getInstruction(),
|
2011-05-18 08:32:01 +08:00
|
|
|
UndefValue::get(CS.getInstruction()->getType()));
|
2010-01-05 15:32:13 +08:00
|
|
|
|
2012-06-22 07:52:14 +08:00
|
|
|
if (isa<InvokeInst>(CS.getInstruction())) {
|
|
|
|
// Can't remove an invoke because we cannot change the CFG.
|
2014-04-25 13:29:35 +08:00
|
|
|
return nullptr;
|
2010-01-05 15:32:13 +08:00
|
|
|
}
|
2012-06-22 07:52:14 +08:00
|
|
|
|
|
|
|
// This instruction is not reachable, just remove it. We insert a store to
|
|
|
|
// undef so that we know that this code is not reachable, despite the fact
|
|
|
|
// that we can't modify the CFG here.
|
|
|
|
new StoreInst(ConstantInt::getTrue(Callee->getContext()),
|
|
|
|
UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
|
|
|
|
CS.getInstruction());
|
|
|
|
|
2016-02-02 06:23:39 +08:00
|
|
|
return eraseInstFromFunction(*CS.getInstruction());
|
2010-01-05 15:32:13 +08:00
|
|
|
}
|
|
|
|
|
2016-01-30 07:27:03 +08:00
|
|
|
if (IntrinsicInst *II = findInitTrampoline(Callee))
|
2011-09-06 21:37:06 +08:00
|
|
|
return transformCallThroughTrampoline(CS, II);
|
2010-01-05 15:32:13 +08:00
|
|
|
|
2011-07-18 12:54:35 +08:00
|
|
|
PointerType *PTy = cast<PointerType>(Callee->getType());
|
|
|
|
FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
|
2010-01-05 15:32:13 +08:00
|
|
|
if (FTy->isVarArg()) {
|
2011-11-29 09:18:23 +08:00
|
|
|
int ix = FTy->getNumParams();
|
2010-01-05 15:32:13 +08:00
|
|
|
// See if we can optimize any arguments passed through the varargs area of
|
|
|
|
// the call.
|
2013-06-28 08:25:40 +08:00
|
|
|
for (CallSite::arg_iterator I = CS.arg_begin() + FTy->getNumParams(),
|
2010-01-05 15:32:13 +08:00
|
|
|
E = CS.arg_end(); I != E; ++I, ++ix) {
|
|
|
|
CastInst *CI = dyn_cast<CastInst>(*I);
|
2015-03-10 10:37:25 +08:00
|
|
|
if (CI && isSafeToEliminateVarargsCast(CS, DL, CI, ix)) {
|
2010-01-05 15:32:13 +08:00
|
|
|
*I = CI->getOperand(0);
|
|
|
|
Changed = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) {
|
|
|
|
// Inline asm calls cannot throw - mark them 'nounwind'.
|
|
|
|
CS.setDoesNotThrow();
|
|
|
|
Changed = true;
|
|
|
|
}
|
|
|
|
|
2012-10-09 00:38:25 +08:00
|
|
|
// Try to optimize the call if possible, we require DataLayout for most of
|
2010-03-06 18:50:38 +08:00
|
|
|
// this. None of these calls are seen as possibly dead so go ahead and
|
|
|
|
// delete the instruction now.
|
|
|
|
if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) {
|
2015-03-10 10:37:25 +08:00
|
|
|
Instruction *I = tryOptimizeCall(CI);
|
2010-03-06 18:59:25 +08:00
|
|
|
// If we changed something return the result, etc. Otherwise let
|
|
|
|
// the fallthrough check.
|
2016-02-02 06:23:39 +08:00
|
|
|
if (I) return eraseInstFromFunction(*I);
|
2010-03-06 18:50:38 +08:00
|
|
|
}
|
|
|
|
|
2014-04-25 13:29:35 +08:00
|
|
|
return Changed ? CS.getInstruction() : nullptr;
|
2010-01-05 15:32:13 +08:00
|
|
|
}
|
|
|
|
|
2016-01-21 06:24:38 +08:00
|
|
|
/// If the callee is a constexpr cast of a function, attempt to move the cast to
|
|
|
|
/// the arguments of the call/invoke.
|
2010-01-05 15:32:13 +08:00
|
|
|
bool InstCombiner::transformConstExprCastCall(CallSite CS) {
|
2016-08-11 23:21:21 +08:00
|
|
|
auto *Callee = dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
|
2014-04-25 13:29:35 +08:00
|
|
|
if (!Callee)
|
2010-01-05 15:32:13 +08:00
|
|
|
return false;
|
2016-08-11 23:23:56 +08:00
|
|
|
|
|
|
|
// The prototype of a thunk is a lie. Don't directly call such a function.
|
2015-01-22 06:32:04 +08:00
|
|
|
if (Callee->hasFnAttribute("thunk"))
|
|
|
|
return false;
|
2016-08-11 23:23:56 +08:00
|
|
|
|
2010-01-05 15:32:13 +08:00
|
|
|
Instruction *Caller = CS.getInstruction();
|
Rename AttributeSet to AttributeList
Summary:
This class is a list of AttributeSetNodes corresponding the function
prototype of a call or function declaration. This class used to be
called ParamAttrListPtr, then AttrListPtr, then AttributeSet. It is
typically accessed by parameter and return value index, so
"AttributeList" seems like a more intuitive name.
Rename AttributeSetImpl to AttributeListImpl to follow suit.
It's useful to rename this class so that we can rename AttributeSetNode
to AttributeSet later. AttributeSet is the set of attributes that apply
to a single function, argument, or return value.
Reviewers: sanjoy, javed.absar, chandlerc, pete
Reviewed By: pete
Subscribers: pete, jholewinski, arsenm, dschuff, mehdi_amini, jfb, nhaehnle, sbc100, void, llvm-commits
Differential Revision: https://reviews.llvm.org/D31102
llvm-svn: 298393
2017-03-22 00:57:19 +08:00
|
|
|
const AttributeList &CallerPAL = CS.getAttributes();
|
2010-01-05 15:32:13 +08:00
|
|
|
|
|
|
|
// Okay, this is a cast from a function to a different type. Unless doing so
|
|
|
|
// would cause a type conversion of one of our arguments, change this call to
|
|
|
|
// be a direct call with arguments casted to the appropriate types.
|
|
|
|
//
|
2011-07-18 12:54:35 +08:00
|
|
|
FunctionType *FT = Callee->getFunctionType();
|
|
|
|
Type *OldRetTy = Caller->getType();
|
|
|
|
Type *NewRetTy = FT->getReturnType();
|
2010-01-05 15:32:13 +08:00
|
|
|
|
|
|
|
// Check to see if we are changing the return type...
|
|
|
|
if (OldRetTy != NewRetTy) {
|
2014-01-19 06:47:12 +08:00
|
|
|
|
|
|
|
if (NewRetTy->isStructTy())
|
|
|
|
return false; // TODO: Handle multiple return values.
|
|
|
|
|
2015-01-06 16:41:31 +08:00
|
|
|
if (!CastInst::isBitOrNoopPointerCastable(NewRetTy, OldRetTy, DL)) {
|
2013-09-18 05:10:14 +08:00
|
|
|
if (Callee->isDeclaration())
|
|
|
|
return false; // Cannot transform this return value.
|
2010-01-05 15:32:13 +08:00
|
|
|
|
2013-09-18 05:10:14 +08:00
|
|
|
if (!Caller->use_empty() &&
|
|
|
|
// void -> non-void is handled specially
|
|
|
|
!NewRetTy->isVoidTy())
|
2014-10-23 12:08:42 +08:00
|
|
|
return false; // Cannot transform this return value.
|
2013-09-18 05:10:14 +08:00
|
|
|
}
|
2010-01-05 15:32:13 +08:00
|
|
|
|
|
|
|
if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
|
Rename AttributeSet to AttributeList
Summary:
This class is a list of AttributeSetNodes corresponding the function
prototype of a call or function declaration. This class used to be
called ParamAttrListPtr, then AttrListPtr, then AttributeSet. It is
typically accessed by parameter and return value index, so
"AttributeList" seems like a more intuitive name.
Rename AttributeSetImpl to AttributeListImpl to follow suit.
It's useful to rename this class so that we can rename AttributeSetNode
to AttributeSet later. AttributeSet is the set of attributes that apply
to a single function, argument, or return value.
Reviewers: sanjoy, javed.absar, chandlerc, pete
Reviewed By: pete
Subscribers: pete, jholewinski, arsenm, dschuff, mehdi_amini, jfb, nhaehnle, sbc100, void, llvm-commits
Differential Revision: https://reviews.llvm.org/D31102
llvm-svn: 298393
2017-03-22 00:57:19 +08:00
|
|
|
AttrBuilder RAttrs(CallerPAL, AttributeList::ReturnIndex);
|
2015-05-07 07:19:56 +08:00
|
|
|
if (RAttrs.overlaps(AttributeFuncs::typeIncompatible(NewRetTy)))
|
2010-01-05 15:32:13 +08:00
|
|
|
return false; // Attribute not compatible with transformed value.
|
|
|
|
}
|
|
|
|
|
|
|
|
// If the callsite is an invoke instruction, and the return value is used by
|
|
|
|
// a PHI node in a successor, we cannot change the return type of the call
|
|
|
|
// because there is no place to put the cast instruction (without breaking
|
|
|
|
// the critical edge). Bail out in this case.
|
|
|
|
if (!Caller->use_empty())
|
|
|
|
if (InvokeInst *II = dyn_cast<InvokeInst>(Caller))
|
2014-03-09 11:16:01 +08:00
|
|
|
for (User *U : II->users())
|
|
|
|
if (PHINode *PN = dyn_cast<PHINode>(U))
|
2010-01-05 15:32:13 +08:00
|
|
|
if (PN->getParent() == II->getNormalDest() ||
|
|
|
|
PN->getParent() == II->getUnwindDest())
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2013-06-28 08:25:40 +08:00
|
|
|
unsigned NumActualArgs = CS.arg_size();
|
2010-01-05 15:32:13 +08:00
|
|
|
unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
|
|
|
|
|
2015-01-06 16:41:31 +08:00
|
|
|
// Prevent us turning:
|
|
|
|
// declare void @takes_i32_inalloca(i32* inalloca)
|
|
|
|
// call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0)
|
|
|
|
//
|
|
|
|
// into:
|
|
|
|
// call void @takes_i32_inalloca(i32* null)
|
2015-03-12 02:03:05 +08:00
|
|
|
//
|
|
|
|
// Similarly, avoid folding away bitcasts of byval calls.
|
|
|
|
if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
|
|
|
|
Callee->getAttributes().hasAttrSomewhere(Attribute::ByVal))
|
2015-01-06 16:41:31 +08:00
|
|
|
return false;
|
|
|
|
|
2010-01-05 15:32:13 +08:00
|
|
|
CallSite::arg_iterator AI = CS.arg_begin();
|
|
|
|
for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
|
2011-07-18 12:54:35 +08:00
|
|
|
Type *ParamTy = FT->getParamType(i);
|
|
|
|
Type *ActTy = (*AI)->getType();
|
2010-01-05 15:32:13 +08:00
|
|
|
|
2015-01-06 16:41:31 +08:00
|
|
|
if (!CastInst::isBitOrNoopPointerCastable(ActTy, ParamTy, DL))
|
2010-01-05 15:32:13 +08:00
|
|
|
return false; // Cannot transform this parameter value.
|
|
|
|
|
2017-04-11 04:34:19 +08:00
|
|
|
if (AttrBuilder(CallerPAL.getParamAttributes(i + 1), i + 1).
|
2015-05-07 07:19:56 +08:00
|
|
|
overlaps(AttributeFuncs::typeIncompatible(ParamTy)))
|
2010-01-05 15:32:13 +08:00
|
|
|
return false; // Attribute not compatible with transformed value.
|
2012-02-03 08:07:04 +08:00
|
|
|
|
2014-01-28 10:38:36 +08:00
|
|
|
if (CS.isInAllocaArgument(i))
|
|
|
|
return false; // Cannot transform to and from inalloca.
|
|
|
|
|
2010-12-20 16:36:38 +08:00
|
|
|
// If the parameter is passed as a byval argument, then we have to have a
|
|
|
|
// sized type and the sized type has to have the same size as the old type.
|
2017-04-11 04:34:19 +08:00
|
|
|
if (ParamTy != ActTy &&
|
|
|
|
CallerPAL.getParamAttributes(i + 1).hasAttribute(i + 1,
|
|
|
|
Attribute::ByVal)) {
|
2011-07-18 12:54:35 +08:00
|
|
|
PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
|
2015-03-10 10:37:25 +08:00
|
|
|
if (!ParamPTy || !ParamPTy->getElementType()->isSized())
|
2010-12-20 16:36:38 +08:00
|
|
|
return false;
|
2012-02-03 08:07:04 +08:00
|
|
|
|
2013-09-28 06:18:51 +08:00
|
|
|
Type *CurElTy = ActTy->getPointerElementType();
|
2015-03-10 10:37:25 +08:00
|
|
|
if (DL.getTypeAllocSize(CurElTy) !=
|
|
|
|
DL.getTypeAllocSize(ParamPTy->getElementType()))
|
2010-12-20 16:36:38 +08:00
|
|
|
return false;
|
|
|
|
}
|
2010-01-05 15:32:13 +08:00
|
|
|
}
|
|
|
|
|
2011-02-24 13:10:56 +08:00
|
|
|
if (Callee->isDeclaration()) {
|
|
|
|
// Do not delete arguments unless we have a function body.
|
|
|
|
if (FT->getNumParams() < NumActualArgs && !FT->isVarArg())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// If the callee is just a declaration, don't change the varargsness of the
|
|
|
|
// call. We don't want to introduce a varargs call where one doesn't
|
|
|
|
// already exist.
|
2011-07-18 12:54:35 +08:00
|
|
|
PointerType *APTy = cast<PointerType>(CS.getCalledValue()->getType());
|
2011-02-24 13:10:56 +08:00
|
|
|
if (FT->isVarArg()!=cast<FunctionType>(APTy->getElementType())->isVarArg())
|
|
|
|
return false;
|
2012-02-03 08:00:55 +08:00
|
|
|
|
|
|
|
// If both the callee and the cast type are varargs, we still have to make
|
|
|
|
// sure the number of fixed parameters are the same or we have the same
|
|
|
|
// ABI issues as if we introduce a varargs call.
|
2012-02-03 08:26:07 +08:00
|
|
|
if (FT->isVarArg() &&
|
|
|
|
cast<FunctionType>(APTy->getElementType())->isVarArg() &&
|
|
|
|
FT->getNumParams() !=
|
2012-02-03 08:00:55 +08:00
|
|
|
cast<FunctionType>(APTy->getElementType())->getNumParams())
|
|
|
|
return false;
|
2011-02-24 13:10:56 +08:00
|
|
|
}
|
2012-02-03 08:07:04 +08:00
|
|
|
|
2012-02-03 08:00:50 +08:00
|
|
|
if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
|
|
|
|
!CallerPAL.isEmpty())
|
|
|
|
// In this case we have more arguments than the new function type, but we
|
|
|
|
// won't be dropping them. Check that these extra arguments have attributes
|
|
|
|
// that are compatible with being a vararg call argument.
|
|
|
|
for (unsigned i = CallerPAL.getNumSlots(); i; --i) {
|
2013-01-26 07:09:36 +08:00
|
|
|
unsigned Index = CallerPAL.getSlotIndex(i - 1);
|
|
|
|
if (Index <= FT->getNumParams())
|
2012-02-03 08:00:50 +08:00
|
|
|
break;
|
2013-01-26 07:09:36 +08:00
|
|
|
|
2012-12-19 16:57:40 +08:00
|
|
|
// Check if it has an attribute that's incompatible with varargs.
|
Rename AttributeSet to AttributeList
Summary:
This class is a list of AttributeSetNodes corresponding the function
prototype of a call or function declaration. This class used to be
called ParamAttrListPtr, then AttrListPtr, then AttributeSet. It is
typically accessed by parameter and return value index, so
"AttributeList" seems like a more intuitive name.
Rename AttributeSetImpl to AttributeListImpl to follow suit.
It's useful to rename this class so that we can rename AttributeSetNode
to AttributeSet later. AttributeSet is the set of attributes that apply
to a single function, argument, or return value.
Reviewers: sanjoy, javed.absar, chandlerc, pete
Reviewed By: pete
Subscribers: pete, jholewinski, arsenm, dschuff, mehdi_amini, jfb, nhaehnle, sbc100, void, llvm-commits
Differential Revision: https://reviews.llvm.org/D31102
llvm-svn: 298393
2017-03-22 00:57:19 +08:00
|
|
|
AttributeList PAttrs = CallerPAL.getSlotAttributes(i - 1);
|
2013-01-26 07:09:36 +08:00
|
|
|
if (PAttrs.hasAttribute(Index, Attribute::StructRet))
|
2012-02-03 08:00:50 +08:00
|
|
|
return false;
|
|
|
|
}
|
2010-01-05 15:32:13 +08:00
|
|
|
|
2012-02-03 08:07:04 +08:00
|
|
|
|
2010-01-05 15:32:13 +08:00
|
|
|
// Okay, we decided that this is a safe thing to do: go ahead and start
|
2011-02-24 13:10:56 +08:00
|
|
|
// inserting cast instructions as necessary.
|
2010-01-05 15:32:13 +08:00
|
|
|
std::vector<Value*> Args;
|
|
|
|
Args.reserve(NumActualArgs);
|
Rename AttributeSet to AttributeList
Summary:
This class is a list of AttributeSetNodes corresponding the function
prototype of a call or function declaration. This class used to be
called ParamAttrListPtr, then AttrListPtr, then AttributeSet. It is
typically accessed by parameter and return value index, so
"AttributeList" seems like a more intuitive name.
Rename AttributeSetImpl to AttributeListImpl to follow suit.
It's useful to rename this class so that we can rename AttributeSetNode
to AttributeSet later. AttributeSet is the set of attributes that apply
to a single function, argument, or return value.
Reviewers: sanjoy, javed.absar, chandlerc, pete
Reviewed By: pete
Subscribers: pete, jholewinski, arsenm, dschuff, mehdi_amini, jfb, nhaehnle, sbc100, void, llvm-commits
Differential Revision: https://reviews.llvm.org/D31102
llvm-svn: 298393
2017-03-22 00:57:19 +08:00
|
|
|
SmallVector<AttributeList, 8> attrVec;
|
2010-01-05 15:32:13 +08:00
|
|
|
attrVec.reserve(NumCommonArgs);
|
|
|
|
|
|
|
|
// Get any return attributes.
|
Rename AttributeSet to AttributeList
Summary:
This class is a list of AttributeSetNodes corresponding the function
prototype of a call or function declaration. This class used to be
called ParamAttrListPtr, then AttrListPtr, then AttributeSet. It is
typically accessed by parameter and return value index, so
"AttributeList" seems like a more intuitive name.
Rename AttributeSetImpl to AttributeListImpl to follow suit.
It's useful to rename this class so that we can rename AttributeSetNode
to AttributeSet later. AttributeSet is the set of attributes that apply
to a single function, argument, or return value.
Reviewers: sanjoy, javed.absar, chandlerc, pete
Reviewed By: pete
Subscribers: pete, jholewinski, arsenm, dschuff, mehdi_amini, jfb, nhaehnle, sbc100, void, llvm-commits
Differential Revision: https://reviews.llvm.org/D31102
llvm-svn: 298393
2017-03-22 00:57:19 +08:00
|
|
|
AttrBuilder RAttrs(CallerPAL, AttributeList::ReturnIndex);
|
2010-01-05 15:32:13 +08:00
|
|
|
|
|
|
|
// If the return value is not being used, the type may not be compatible
|
|
|
|
// with the existing attributes. Wipe out any problematic attributes.
|
2015-05-07 07:19:56 +08:00
|
|
|
RAttrs.remove(AttributeFuncs::typeIncompatible(NewRetTy));
|
2010-01-05 15:32:13 +08:00
|
|
|
|
|
|
|
// Add the new return attributes.
|
2012-10-09 08:01:21 +08:00
|
|
|
if (RAttrs.hasAttributes())
|
Rename AttributeSet to AttributeList
Summary:
This class is a list of AttributeSetNodes corresponding the function
prototype of a call or function declaration. This class used to be
called ParamAttrListPtr, then AttrListPtr, then AttributeSet. It is
typically accessed by parameter and return value index, so
"AttributeList" seems like a more intuitive name.
Rename AttributeSetImpl to AttributeListImpl to follow suit.
It's useful to rename this class so that we can rename AttributeSetNode
to AttributeSet later. AttributeSet is the set of attributes that apply
to a single function, argument, or return value.
Reviewers: sanjoy, javed.absar, chandlerc, pete
Reviewed By: pete
Subscribers: pete, jholewinski, arsenm, dschuff, mehdi_amini, jfb, nhaehnle, sbc100, void, llvm-commits
Differential Revision: https://reviews.llvm.org/D31102
llvm-svn: 298393
2017-03-22 00:57:19 +08:00
|
|
|
attrVec.push_back(AttributeList::get(Caller->getContext(),
|
|
|
|
AttributeList::ReturnIndex, RAttrs));
|
2010-01-05 15:32:13 +08:00
|
|
|
|
|
|
|
AI = CS.arg_begin();
|
|
|
|
for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
|
2011-07-18 12:54:35 +08:00
|
|
|
Type *ParamTy = FT->getParamType(i);
|
2013-07-31 04:45:05 +08:00
|
|
|
|
2010-01-05 15:32:13 +08:00
|
|
|
if ((*AI)->getType() == ParamTy) {
|
|
|
|
Args.push_back(*AI);
|
|
|
|
} else {
|
2015-01-06 16:41:31 +08:00
|
|
|
Args.push_back(Builder->CreateBitOrPointerCast(*AI, ParamTy));
|
2010-01-05 15:32:13 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Add any parameter attributes.
|
2017-04-11 04:34:19 +08:00
|
|
|
AttrBuilder PAttrs(CallerPAL.getParamAttributes(i + 1), i + 1);
|
2012-10-14 16:54:26 +08:00
|
|
|
if (PAttrs.hasAttributes())
|
Rename AttributeSet to AttributeList
Summary:
This class is a list of AttributeSetNodes corresponding the function
prototype of a call or function declaration. This class used to be
called ParamAttrListPtr, then AttrListPtr, then AttributeSet. It is
typically accessed by parameter and return value index, so
"AttributeList" seems like a more intuitive name.
Rename AttributeSetImpl to AttributeListImpl to follow suit.
It's useful to rename this class so that we can rename AttributeSetNode
to AttributeSet later. AttributeSet is the set of attributes that apply
to a single function, argument, or return value.
Reviewers: sanjoy, javed.absar, chandlerc, pete
Reviewed By: pete
Subscribers: pete, jholewinski, arsenm, dschuff, mehdi_amini, jfb, nhaehnle, sbc100, void, llvm-commits
Differential Revision: https://reviews.llvm.org/D31102
llvm-svn: 298393
2017-03-22 00:57:19 +08:00
|
|
|
attrVec.push_back(
|
|
|
|
AttributeList::get(Caller->getContext(), i + 1, PAttrs));
|
2010-01-05 15:32:13 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// If the function takes more arguments than the call was taking, add them
|
|
|
|
// now.
|
|
|
|
for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i)
|
|
|
|
Args.push_back(Constant::getNullValue(FT->getParamType(i)));
|
|
|
|
|
|
|
|
// If we are removing arguments to the function, emit an obnoxious warning.
|
|
|
|
if (FT->getNumParams() < NumActualArgs) {
|
2012-12-27 06:00:35 +08:00
|
|
|
// TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722
|
|
|
|
if (FT->isVarArg()) {
|
2010-01-05 15:32:13 +08:00
|
|
|
// Add all of the arguments in their promoted form to the arg list.
|
|
|
|
for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
|
2011-07-18 12:54:35 +08:00
|
|
|
Type *PTy = getPromotedType((*AI)->getType());
|
2010-01-05 15:32:13 +08:00
|
|
|
if (PTy != (*AI)->getType()) {
|
|
|
|
// Must promote to pass through va_arg area!
|
|
|
|
Instruction::CastOps opcode =
|
|
|
|
CastInst::getCastOpcode(*AI, false, PTy, false);
|
2011-09-28 04:39:19 +08:00
|
|
|
Args.push_back(Builder->CreateCast(opcode, *AI, PTy));
|
2010-01-05 15:32:13 +08:00
|
|
|
} else {
|
|
|
|
Args.push_back(*AI);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Add any parameter attributes.
|
2017-04-11 04:34:19 +08:00
|
|
|
AttrBuilder PAttrs(CallerPAL.getParamAttributes(i + 1), i + 1);
|
2012-10-14 16:54:26 +08:00
|
|
|
if (PAttrs.hasAttributes())
|
Rename AttributeSet to AttributeList
Summary:
This class is a list of AttributeSetNodes corresponding the function
prototype of a call or function declaration. This class used to be
called ParamAttrListPtr, then AttrListPtr, then AttributeSet. It is
typically accessed by parameter and return value index, so
"AttributeList" seems like a more intuitive name.
Rename AttributeSetImpl to AttributeListImpl to follow suit.
It's useful to rename this class so that we can rename AttributeSetNode
to AttributeSet later. AttributeSet is the set of attributes that apply
to a single function, argument, or return value.
Reviewers: sanjoy, javed.absar, chandlerc, pete
Reviewed By: pete
Subscribers: pete, jholewinski, arsenm, dschuff, mehdi_amini, jfb, nhaehnle, sbc100, void, llvm-commits
Differential Revision: https://reviews.llvm.org/D31102
llvm-svn: 298393
2017-03-22 00:57:19 +08:00
|
|
|
attrVec.push_back(
|
|
|
|
AttributeList::get(FT->getContext(), i + 1, PAttrs));
|
2010-01-05 15:32:13 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-04-11 04:34:19 +08:00
|
|
|
AttributeList FnAttrs = CallerPAL.getFnAttributes();
|
Rename AttributeSet to AttributeList
Summary:
This class is a list of AttributeSetNodes corresponding the function
prototype of a call or function declaration. This class used to be
called ParamAttrListPtr, then AttrListPtr, then AttributeSet. It is
typically accessed by parameter and return value index, so
"AttributeList" seems like a more intuitive name.
Rename AttributeSetImpl to AttributeListImpl to follow suit.
It's useful to rename this class so that we can rename AttributeSetNode
to AttributeSet later. AttributeSet is the set of attributes that apply
to a single function, argument, or return value.
Reviewers: sanjoy, javed.absar, chandlerc, pete
Reviewed By: pete
Subscribers: pete, jholewinski, arsenm, dschuff, mehdi_amini, jfb, nhaehnle, sbc100, void, llvm-commits
Differential Revision: https://reviews.llvm.org/D31102
llvm-svn: 298393
2017-03-22 00:57:19 +08:00
|
|
|
if (CallerPAL.hasAttributes(AttributeList::FunctionIndex))
|
2017-04-11 04:34:19 +08:00
|
|
|
attrVec.push_back(AttributeList::get(Callee->getContext(), FnAttrs));
|
2010-01-05 15:32:13 +08:00
|
|
|
|
|
|
|
if (NewRetTy->isVoidTy())
|
|
|
|
Caller->setName(""); // Void type should not have a name.
|
|
|
|
|
Rename AttributeSet to AttributeList
Summary:
This class is a list of AttributeSetNodes corresponding the function
prototype of a call or function declaration. This class used to be
called ParamAttrListPtr, then AttrListPtr, then AttributeSet. It is
typically accessed by parameter and return value index, so
"AttributeList" seems like a more intuitive name.
Rename AttributeSetImpl to AttributeListImpl to follow suit.
It's useful to rename this class so that we can rename AttributeSetNode
to AttributeSet later. AttributeSet is the set of attributes that apply
to a single function, argument, or return value.
Reviewers: sanjoy, javed.absar, chandlerc, pete
Reviewed By: pete
Subscribers: pete, jholewinski, arsenm, dschuff, mehdi_amini, jfb, nhaehnle, sbc100, void, llvm-commits
Differential Revision: https://reviews.llvm.org/D31102
llvm-svn: 298393
2017-03-22 00:57:19 +08:00
|
|
|
const AttributeList &NewCallerPAL =
|
|
|
|
AttributeList::get(Callee->getContext(), attrVec);
|
2010-01-05 15:32:13 +08:00
|
|
|
|
2015-11-25 08:42:19 +08:00
|
|
|
SmallVector<OperandBundleDef, 1> OpBundles;
|
2015-11-25 08:42:24 +08:00
|
|
|
CS.getOperandBundlesAsDefs(OpBundles);
|
2015-11-25 08:42:19 +08:00
|
|
|
|
2010-01-05 15:32:13 +08:00
|
|
|
Instruction *NC;
|
|
|
|
if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
|
2015-11-25 08:42:19 +08:00
|
|
|
NC = Builder->CreateInvoke(Callee, II->getNormalDest(), II->getUnwindDest(),
|
|
|
|
Args, OpBundles);
|
2011-05-18 09:28:27 +08:00
|
|
|
NC->takeName(II);
|
2010-01-05 15:32:13 +08:00
|
|
|
cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv());
|
|
|
|
cast<InvokeInst>(NC)->setAttributes(NewCallerPAL);
|
|
|
|
} else {
|
|
|
|
CallInst *CI = cast<CallInst>(Caller);
|
2015-11-25 08:42:19 +08:00
|
|
|
NC = Builder->CreateCall(Callee, Args, OpBundles);
|
2011-05-18 09:28:27 +08:00
|
|
|
NC->takeName(CI);
|
2017-03-31 23:59:52 +08:00
|
|
|
// Preserve the weight metadata for the new call instruction. The metadata
|
|
|
|
// is used by SamplePGO to check callsite's hotness.
|
|
|
|
uint64_t W;
|
|
|
|
if (CI->extractProfTotalWeight(W))
|
|
|
|
NC->setProfWeight(W);
|
|
|
|
|
2016-11-26 06:35:09 +08:00
|
|
|
cast<CallInst>(NC)->setTailCallKind(CI->getTailCallKind());
|
2010-01-05 15:32:13 +08:00
|
|
|
cast<CallInst>(NC)->setCallingConv(CI->getCallingConv());
|
|
|
|
cast<CallInst>(NC)->setAttributes(NewCallerPAL);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Insert a cast of the return type as necessary.
|
|
|
|
Value *NV = NC;
|
|
|
|
if (OldRetTy != NV->getType() && !Caller->use_empty()) {
|
|
|
|
if (!NV->getType()->isVoidTy()) {
|
2015-01-06 16:41:31 +08:00
|
|
|
NV = NC = CastInst::CreateBitOrPointerCast(NC, OldRetTy);
|
2011-05-27 08:19:40 +08:00
|
|
|
NC->setDebugLoc(Caller->getDebugLoc());
|
2010-01-05 15:32:13 +08:00
|
|
|
|
|
|
|
// If this is an invoke instruction, we should insert it after the first
|
|
|
|
// non-phi, instruction in the normal successor block.
|
|
|
|
if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
|
2011-08-25 09:08:34 +08:00
|
|
|
BasicBlock::iterator I = II->getNormalDest()->getFirstInsertionPt();
|
2010-01-05 15:32:13 +08:00
|
|
|
InsertNewInstBefore(NC, *I);
|
|
|
|
} else {
|
2010-12-20 16:25:06 +08:00
|
|
|
// Otherwise, it's a call, just insert cast right after the call.
|
2010-01-05 15:32:13 +08:00
|
|
|
InsertNewInstBefore(NC, *Caller);
|
|
|
|
}
|
|
|
|
Worklist.AddUsersToWorkList(*Caller);
|
|
|
|
} else {
|
|
|
|
NV = UndefValue::get(Caller->getType());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!Caller->use_empty())
|
2016-02-02 06:23:39 +08:00
|
|
|
replaceInstUsesWith(*Caller, NV);
|
2014-10-23 12:08:42 +08:00
|
|
|
else if (Caller->hasValueHandle()) {
|
|
|
|
if (OldRetTy == NV->getType())
|
|
|
|
ValueHandleBase::ValueIsRAUWd(Caller, NV);
|
|
|
|
else
|
|
|
|
// We cannot call ValueIsRAUWd with a different type, and the
|
|
|
|
// actual tracked value will disappear.
|
|
|
|
ValueHandleBase::ValueIsDeleted(Caller);
|
|
|
|
}
|
2011-05-18 08:32:01 +08:00
|
|
|
|
2016-02-02 06:23:39 +08:00
|
|
|
eraseInstFromFunction(*Caller);
|
2010-01-05 15:32:13 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2016-01-21 06:24:38 +08:00
|
|
|
/// Turn a call to a function created by init_trampoline / adjust_trampoline
|
|
|
|
/// intrinsic pair into a direct call to the underlying function.
|
2011-09-06 21:37:06 +08:00
|
|
|
Instruction *
|
|
|
|
InstCombiner::transformCallThroughTrampoline(CallSite CS,
|
|
|
|
IntrinsicInst *Tramp) {
|
2010-01-05 15:32:13 +08:00
|
|
|
Value *Callee = CS.getCalledValue();
|
2011-07-18 12:54:35 +08:00
|
|
|
PointerType *PTy = cast<PointerType>(Callee->getType());
|
|
|
|
FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
|
2017-04-11 04:34:19 +08:00
|
|
|
const AttributeList &Attrs = CS.getAttributes();
|
2010-01-05 15:32:13 +08:00
|
|
|
|
|
|
|
// If the call already has the 'nest' attribute somewhere then give up -
|
|
|
|
// otherwise 'nest' would occur twice after splicing in the chain.
|
2012-12-31 08:49:59 +08:00
|
|
|
if (Attrs.hasAttrSomewhere(Attribute::Nest))
|
2014-04-25 13:29:35 +08:00
|
|
|
return nullptr;
|
2010-01-05 15:32:13 +08:00
|
|
|
|
2011-09-06 21:37:06 +08:00
|
|
|
assert(Tramp &&
|
|
|
|
"transformCallThroughTrampoline called with incorrect CallSite.");
|
2010-01-05 15:32:13 +08:00
|
|
|
|
2010-07-22 18:37:47 +08:00
|
|
|
Function *NestF =cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts());
|
2016-01-17 04:30:46 +08:00
|
|
|
FunctionType *NestFTy = cast<FunctionType>(NestF->getValueType());
|
2010-01-05 15:32:13 +08:00
|
|
|
|
2017-04-11 04:34:19 +08:00
|
|
|
const AttributeList &NestAttrs = NestF->getAttributes();
|
2010-01-05 15:32:13 +08:00
|
|
|
if (!NestAttrs.isEmpty()) {
|
|
|
|
unsigned NestIdx = 1;
|
2014-04-25 13:29:35 +08:00
|
|
|
Type *NestTy = nullptr;
|
2017-04-11 04:34:19 +08:00
|
|
|
AttributeList NestAttr;
|
2010-01-05 15:32:13 +08:00
|
|
|
|
|
|
|
// Look for a parameter marked with the 'nest' attribute.
|
|
|
|
for (FunctionType::param_iterator I = NestFTy->param_begin(),
|
|
|
|
E = NestFTy->param_end(); I != E; ++NestIdx, ++I)
|
2013-01-23 14:14:59 +08:00
|
|
|
if (NestAttrs.hasAttribute(NestIdx, Attribute::Nest)) {
|
2010-01-05 15:32:13 +08:00
|
|
|
// Record the parameter type and any other attributes.
|
|
|
|
NestTy = *I;
|
|
|
|
NestAttr = NestAttrs.getParamAttributes(NestIdx);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (NestTy) {
|
|
|
|
Instruction *Caller = CS.getInstruction();
|
|
|
|
std::vector<Value*> NewArgs;
|
2013-06-28 08:25:40 +08:00
|
|
|
NewArgs.reserve(CS.arg_size() + 1);
|
2017-04-11 04:34:19 +08:00
|
|
|
|
|
|
|
SmallVector<AttributeList, 8> NewAttrs;
|
|
|
|
NewAttrs.reserve(Attrs.getNumSlots() + 1);
|
2010-01-05 15:32:13 +08:00
|
|
|
|
|
|
|
// Insert the nest argument into the call argument list, which may
|
|
|
|
// mean appending it. Likewise for attributes.
|
|
|
|
|
|
|
|
// Add any result attributes.
|
2017-04-11 04:34:19 +08:00
|
|
|
if (Attrs.hasAttributes(AttributeList::ReturnIndex))
|
|
|
|
NewAttrs.push_back(
|
|
|
|
AttributeList::get(Caller->getContext(), Attrs.getRetAttributes()));
|
2010-01-05 15:32:13 +08:00
|
|
|
|
|
|
|
{
|
|
|
|
unsigned Idx = 1;
|
|
|
|
CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
|
|
|
|
do {
|
|
|
|
if (Idx == NestIdx) {
|
|
|
|
// Add the chain argument and attributes.
|
2010-06-24 20:58:35 +08:00
|
|
|
Value *NestVal = Tramp->getArgOperand(2);
|
2010-01-05 15:32:13 +08:00
|
|
|
if (NestVal->getType() != NestTy)
|
2011-05-19 07:58:37 +08:00
|
|
|
NestVal = Builder->CreateBitCast(NestVal, NestTy, "nest");
|
2010-01-05 15:32:13 +08:00
|
|
|
NewArgs.push_back(NestVal);
|
2017-04-11 04:34:19 +08:00
|
|
|
NewAttrs.push_back(
|
|
|
|
AttributeList::get(Caller->getContext(), NestAttr));
|
2010-01-05 15:32:13 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (I == E)
|
|
|
|
break;
|
|
|
|
|
|
|
|
// Add the original argument and attributes.
|
|
|
|
NewArgs.push_back(*I);
|
2017-04-11 04:34:19 +08:00
|
|
|
AttributeList Attr = Attrs.getParamAttributes(Idx);
|
|
|
|
if (Attr.hasAttributes(Idx)) {
|
|
|
|
AttrBuilder B(Attr, Idx);
|
|
|
|
NewAttrs.push_back(AttributeList::get(Caller->getContext(),
|
|
|
|
Idx + (Idx >= NestIdx), B));
|
|
|
|
}
|
2010-01-05 15:32:13 +08:00
|
|
|
|
2016-02-19 06:09:30 +08:00
|
|
|
++Idx;
|
|
|
|
++I;
|
2016-08-12 01:20:18 +08:00
|
|
|
} while (true);
|
2010-01-05 15:32:13 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Add any function attributes.
|
2017-04-11 04:34:19 +08:00
|
|
|
if (Attrs.hasAttributes(AttributeList::FunctionIndex))
|
|
|
|
NewAttrs.push_back(
|
|
|
|
AttributeList::get(FTy->getContext(), Attrs.getFnAttributes()));
|
2010-01-05 15:32:13 +08:00
|
|
|
|
|
|
|
// The trampoline may have been bitcast to a bogus type (FTy).
|
|
|
|
// Handle this by synthesizing a new function type, equal to FTy
|
|
|
|
// with the chain parameter inserted.
|
|
|
|
|
2011-07-12 22:06:48 +08:00
|
|
|
std::vector<Type*> NewTypes;
|
2010-01-05 15:32:13 +08:00
|
|
|
NewTypes.reserve(FTy->getNumParams()+1);
|
|
|
|
|
|
|
|
// Insert the chain's type into the list of parameter types, which may
|
|
|
|
// mean appending it.
|
|
|
|
{
|
|
|
|
unsigned Idx = 1;
|
|
|
|
FunctionType::param_iterator I = FTy->param_begin(),
|
|
|
|
E = FTy->param_end();
|
|
|
|
|
|
|
|
do {
|
|
|
|
if (Idx == NestIdx)
|
|
|
|
// Add the chain's type.
|
|
|
|
NewTypes.push_back(NestTy);
|
|
|
|
|
|
|
|
if (I == E)
|
|
|
|
break;
|
|
|
|
|
|
|
|
// Add the original type.
|
|
|
|
NewTypes.push_back(*I);
|
|
|
|
|
2016-02-19 06:09:30 +08:00
|
|
|
++Idx;
|
|
|
|
++I;
|
2016-08-12 01:20:18 +08:00
|
|
|
} while (true);
|
2010-01-05 15:32:13 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Replace the trampoline call with a direct call. Let the generic
|
|
|
|
// code sort out any function type mismatches.
|
2012-02-03 08:07:04 +08:00
|
|
|
FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes,
|
2010-01-05 15:32:13 +08:00
|
|
|
FTy->isVarArg());
|
|
|
|
Constant *NewCallee =
|
|
|
|
NestF->getType() == PointerType::getUnqual(NewFTy) ?
|
2012-02-03 08:07:04 +08:00
|
|
|
NestF : ConstantExpr::getBitCast(NestF,
|
2010-01-05 15:32:13 +08:00
|
|
|
PointerType::getUnqual(NewFTy));
|
2017-04-11 04:34:19 +08:00
|
|
|
const AttributeList &NewPAL =
|
|
|
|
AttributeList::get(FTy->getContext(), NewAttrs);
|
2010-01-05 15:32:13 +08:00
|
|
|
|
2016-04-29 16:07:20 +08:00
|
|
|
SmallVector<OperandBundleDef, 1> OpBundles;
|
|
|
|
CS.getOperandBundlesAsDefs(OpBundles);
|
|
|
|
|
2010-01-05 15:32:13 +08:00
|
|
|
Instruction *NewCaller;
|
|
|
|
if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
|
|
|
|
NewCaller = InvokeInst::Create(NewCallee,
|
|
|
|
II->getNormalDest(), II->getUnwindDest(),
|
2016-04-29 16:07:20 +08:00
|
|
|
NewArgs, OpBundles);
|
2010-01-05 15:32:13 +08:00
|
|
|
cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
|
|
|
|
cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
|
|
|
|
} else {
|
2016-04-29 16:07:20 +08:00
|
|
|
NewCaller = CallInst::Create(NewCallee, NewArgs, OpBundles);
|
2016-11-26 06:35:09 +08:00
|
|
|
cast<CallInst>(NewCaller)->setTailCallKind(
|
|
|
|
cast<CallInst>(Caller)->getTailCallKind());
|
|
|
|
cast<CallInst>(NewCaller)->setCallingConv(
|
|
|
|
cast<CallInst>(Caller)->getCallingConv());
|
2010-01-05 15:32:13 +08:00
|
|
|
cast<CallInst>(NewCaller)->setAttributes(NewPAL);
|
|
|
|
}
|
2011-05-19 03:57:14 +08:00
|
|
|
|
|
|
|
return NewCaller;
|
2010-01-05 15:32:13 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Replace the trampoline call with a direct call. Since there is no 'nest'
|
|
|
|
// parameter, there is no need to adjust the argument list. Let the generic
|
|
|
|
// code sort out any function type mismatches.
|
|
|
|
Constant *NewCallee =
|
2012-02-03 08:07:04 +08:00
|
|
|
NestF->getType() == PTy ? NestF :
|
2010-01-05 15:32:13 +08:00
|
|
|
ConstantExpr::getBitCast(NestF, PTy);
|
|
|
|
CS.setCalledFunction(NewCallee);
|
|
|
|
return CS.getInstruction();
|
|
|
|
}
|