2010-01-05 13:36:20 +08:00
|
|
|
//===- InstCombineVectorOps.cpp -------------------------------------------===//
|
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2010-01-05 13:36:20 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This file implements instcombine for ExtractElement, InsertElement and
|
|
|
|
// ShuffleVector.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2015-01-22 13:25:13 +08:00
|
|
|
#include "InstCombineInternal.h"
|
2017-10-25 05:24:53 +08:00
|
|
|
#include "llvm/ADT/APInt.h"
|
|
|
|
#include "llvm/ADT/ArrayRef.h"
|
2015-02-26 06:30:51 +08:00
|
|
|
#include "llvm/ADT/DenseMap.h"
|
2017-10-25 05:24:53 +08:00
|
|
|
#include "llvm/ADT/STLExtras.h"
|
2020-04-14 02:36:31 +08:00
|
|
|
#include "llvm/ADT/SmallBitVector.h"
|
2017-10-25 05:24:53 +08:00
|
|
|
#include "llvm/ADT/SmallVector.h"
|
2015-07-13 09:15:53 +08:00
|
|
|
#include "llvm/Analysis/InstructionSimplify.h"
|
|
|
|
#include "llvm/Analysis/VectorUtils.h"
|
2017-10-25 05:24:53 +08:00
|
|
|
#include "llvm/IR/BasicBlock.h"
|
|
|
|
#include "llvm/IR/Constant.h"
|
|
|
|
#include "llvm/IR/Constants.h"
|
|
|
|
#include "llvm/IR/DerivedTypes.h"
|
|
|
|
#include "llvm/IR/InstrTypes.h"
|
|
|
|
#include "llvm/IR/Instruction.h"
|
|
|
|
#include "llvm/IR/Instructions.h"
|
|
|
|
#include "llvm/IR/Operator.h"
|
2014-03-04 19:08:18 +08:00
|
|
|
#include "llvm/IR/PatternMatch.h"
|
2017-10-25 05:24:53 +08:00
|
|
|
#include "llvm/IR/Type.h"
|
|
|
|
#include "llvm/IR/User.h"
|
|
|
|
#include "llvm/IR/Value.h"
|
|
|
|
#include "llvm/Support/Casting.h"
|
|
|
|
#include "llvm/Support/ErrorHandling.h"
|
|
|
|
#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
|
|
|
|
#include <cassert>
|
|
|
|
#include <cstdint>
|
|
|
|
#include <iterator>
|
|
|
|
#include <utility>
|
|
|
|
|
2010-01-05 13:36:20 +08:00
|
|
|
using namespace llvm;
|
2013-01-16 07:43:14 +08:00
|
|
|
using namespace PatternMatch;
|
2010-01-05 13:36:20 +08:00
|
|
|
|
2014-04-22 10:55:47 +08:00
|
|
|
#define DEBUG_TYPE "instcombine"
|
|
|
|
|
2015-09-09 23:24:36 +08:00
|
|
|
/// Return true if the value is cheaper to scalarize than it is to leave as a
|
2018-12-19 03:07:38 +08:00
|
|
|
/// vector operation. IsConstantExtractIndex indicates whether we are extracting
|
|
|
|
/// one known element from a vector constant.
|
|
|
|
///
|
|
|
|
/// FIXME: It's possible to create more instructions than previously existed.
|
|
|
|
static bool cheapToScalarize(Value *V, bool IsConstantExtractIndex) {
|
|
|
|
// If we can pick a scalar constant value out of a vector, that is free.
|
|
|
|
if (auto *C = dyn_cast<Constant>(V))
|
|
|
|
return IsConstantExtractIndex || C->getSplatValue();
|
2012-01-26 08:42:34 +08:00
|
|
|
|
2018-12-19 03:07:38 +08:00
|
|
|
// An insertelement to the same constant index as our extract will simplify
|
|
|
|
// to the scalar inserted element. An insertelement to a different constant
|
|
|
|
// index is irrelevant to our extract.
|
2020-05-23 22:13:50 +08:00
|
|
|
if (match(V, m_InsertElt(m_Value(), m_Value(), m_ConstantInt())))
|
2018-12-19 03:07:38 +08:00
|
|
|
return IsConstantExtractIndex;
|
2010-10-30 06:20:43 +08:00
|
|
|
|
2018-12-19 03:07:38 +08:00
|
|
|
if (match(V, m_OneUse(m_Load(m_Value()))))
|
2010-01-05 13:36:20 +08:00
|
|
|
return true;
|
2018-12-19 03:07:38 +08:00
|
|
|
|
2020-03-10 23:05:31 +08:00
|
|
|
if (match(V, m_OneUse(m_UnOp())))
|
|
|
|
return true;
|
|
|
|
|
2018-12-19 03:07:38 +08:00
|
|
|
Value *V0, *V1;
|
|
|
|
if (match(V, m_OneUse(m_BinOp(m_Value(V0), m_Value(V1)))))
|
|
|
|
if (cheapToScalarize(V0, IsConstantExtractIndex) ||
|
|
|
|
cheapToScalarize(V1, IsConstantExtractIndex))
|
2010-01-05 13:36:20 +08:00
|
|
|
return true;
|
2018-12-19 03:07:38 +08:00
|
|
|
|
|
|
|
CmpInst::Predicate UnusedPred;
|
|
|
|
if (match(V, m_OneUse(m_Cmp(UnusedPred, m_Value(V0), m_Value(V1)))))
|
|
|
|
if (cheapToScalarize(V0, IsConstantExtractIndex) ||
|
|
|
|
cheapToScalarize(V1, IsConstantExtractIndex))
|
2010-01-05 13:36:20 +08:00
|
|
|
return true;
|
2010-10-30 06:20:43 +08:00
|
|
|
|
2010-01-05 13:36:20 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2016-06-07 07:38:33 +08:00
|
|
|
// If we have a PHI node with a vector type that is only used to feed
|
2013-08-29 06:17:26 +08:00
|
|
|
// itself and be an operand of extractelement at a constant location,
|
|
|
|
// try to replace the PHI of the vector type with a PHI of a scalar type.
|
2013-04-19 03:35:39 +08:00
|
|
|
Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
|
2016-06-07 07:38:33 +08:00
|
|
|
SmallVector<Instruction *, 2> Extracts;
|
|
|
|
// The users we want the PHI to have are:
|
|
|
|
// 1) The EI ExtractElement (we already know this)
|
|
|
|
// 2) Possibly more ExtractElements with the same index.
|
|
|
|
// 3) Another operand, which will feed back into the PHI.
|
|
|
|
Instruction *PHIUser = nullptr;
|
|
|
|
for (auto U : PN->users()) {
|
|
|
|
if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(U)) {
|
|
|
|
if (EI.getIndexOperand() == EU->getIndexOperand())
|
|
|
|
Extracts.push_back(EU);
|
|
|
|
else
|
|
|
|
return nullptr;
|
|
|
|
} else if (!PHIUser) {
|
|
|
|
PHIUser = cast<Instruction>(U);
|
|
|
|
} else {
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
}
|
2013-04-19 03:35:39 +08:00
|
|
|
|
2016-06-07 07:38:33 +08:00
|
|
|
if (!PHIUser)
|
|
|
|
return nullptr;
|
2013-04-19 03:35:39 +08:00
|
|
|
|
|
|
|
// Verify that this PHI user has one use, which is the PHI itself,
|
|
|
|
// and that it is a binary operation which is cheap to scalarize.
|
2017-10-25 05:24:53 +08:00
|
|
|
// otherwise return nullptr.
|
2014-03-09 11:16:01 +08:00
|
|
|
if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
|
2015-11-18 01:24:08 +08:00
|
|
|
!(isa<BinaryOperator>(PHIUser)) || !cheapToScalarize(PHIUser, true))
|
2014-04-25 13:29:35 +08:00
|
|
|
return nullptr;
|
2013-04-19 03:35:39 +08:00
|
|
|
|
|
|
|
// Create a scalar PHI node that will replace the vector PHI node
|
|
|
|
// just before the current PHI node.
|
2013-05-24 20:33:28 +08:00
|
|
|
PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(
|
|
|
|
PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), *PN));
|
2013-04-19 03:35:39 +08:00
|
|
|
// Scalarize each PHI operand.
|
2013-05-24 20:33:28 +08:00
|
|
|
for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
|
2013-04-19 03:35:39 +08:00
|
|
|
Value *PHIInVal = PN->getIncomingValue(i);
|
|
|
|
BasicBlock *inBB = PN->getIncomingBlock(i);
|
|
|
|
Value *Elt = EI.getIndexOperand();
|
|
|
|
// If the operand is the PHI induction variable:
|
|
|
|
if (PHIInVal == PHIUser) {
|
|
|
|
// Scalarize the binary operation. Its first operand is the
|
2014-07-08 06:13:58 +08:00
|
|
|
// scalar PHI, and the second operand is extracted from the other
|
2013-04-19 03:35:39 +08:00
|
|
|
// vector operand.
|
|
|
|
BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
|
2013-05-24 20:33:28 +08:00
|
|
|
unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
|
2013-05-24 20:29:54 +08:00
|
|
|
Value *Op = InsertNewInstWith(
|
|
|
|
ExtractElementInst::Create(B0->getOperand(opId), Elt,
|
|
|
|
B0->getOperand(opId)->getName() + ".Elt"),
|
|
|
|
*B0);
|
2013-04-19 03:35:39 +08:00
|
|
|
Value *newPHIUser = InsertNewInstWith(
|
2016-03-02 03:35:52 +08:00
|
|
|
BinaryOperator::CreateWithCopiedFlags(B0->getOpcode(),
|
|
|
|
scalarPHI, Op, B0), *B0);
|
2013-04-19 03:35:39 +08:00
|
|
|
scalarPHI->addIncoming(newPHIUser, inBB);
|
|
|
|
} else {
|
|
|
|
// Scalarize PHI input:
|
2013-05-24 20:33:28 +08:00
|
|
|
Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, "");
|
2013-04-19 03:35:39 +08:00
|
|
|
// Insert the new instruction into the predecessor basic block.
|
|
|
|
Instruction *pos = dyn_cast<Instruction>(PHIInVal);
|
|
|
|
BasicBlock::iterator InsertPos;
|
|
|
|
if (pos && !isa<PHINode>(pos)) {
|
2015-10-14 00:59:33 +08:00
|
|
|
InsertPos = ++pos->getIterator();
|
2013-04-19 03:35:39 +08:00
|
|
|
} else {
|
|
|
|
InsertPos = inBB->getFirstInsertionPt();
|
|
|
|
}
|
|
|
|
|
|
|
|
InsertNewInstWith(newEI, *InsertPos);
|
|
|
|
|
|
|
|
scalarPHI->addIncoming(newEI, inBB);
|
|
|
|
}
|
|
|
|
}
|
2016-06-07 07:38:33 +08:00
|
|
|
|
|
|
|
for (auto E : Extracts)
|
|
|
|
replaceInstUsesWith(*E, scalarPHI);
|
|
|
|
|
|
|
|
return &EI;
|
2013-04-19 03:35:39 +08:00
|
|
|
}
|
|
|
|
|
2018-09-25 04:41:22 +08:00
|
|
|
static Instruction *foldBitcastExtElt(ExtractElementInst &Ext,
|
2018-10-01 22:40:00 +08:00
|
|
|
InstCombiner::BuilderTy &Builder,
|
|
|
|
bool IsBigEndian) {
|
2018-09-25 04:41:22 +08:00
|
|
|
Value *X;
|
|
|
|
uint64_t ExtIndexC;
|
|
|
|
if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) ||
|
|
|
|
!X->getType()->isVectorTy() ||
|
|
|
|
!match(Ext.getIndexOperand(), m_ConstantInt(ExtIndexC)))
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// If this extractelement is using a bitcast from a vector of the same number
|
|
|
|
// of elements, see if we can find the source element from the source vector:
|
|
|
|
// extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
|
2020-04-09 01:42:22 +08:00
|
|
|
auto *SrcTy = cast<VectorType>(X->getType());
|
2018-09-25 04:41:22 +08:00
|
|
|
Type *DestTy = Ext.getType();
|
2020-04-09 01:42:22 +08:00
|
|
|
unsigned NumSrcElts = SrcTy->getNumElements();
|
2018-09-25 04:41:22 +08:00
|
|
|
unsigned NumElts = Ext.getVectorOperandType()->getNumElements();
|
|
|
|
if (NumSrcElts == NumElts)
|
|
|
|
if (Value *Elt = findScalarElement(X, ExtIndexC))
|
|
|
|
return new BitCastInst(Elt, DestTy);
|
|
|
|
|
2018-10-01 22:40:00 +08:00
|
|
|
// If the source elements are wider than the destination, try to shift and
|
|
|
|
// truncate a subset of scalar bits of an insert op.
|
2018-10-05 00:25:05 +08:00
|
|
|
if (NumSrcElts < NumElts) {
|
2018-10-01 22:40:00 +08:00
|
|
|
Value *Scalar;
|
|
|
|
uint64_t InsIndexC;
|
2020-05-23 22:13:50 +08:00
|
|
|
if (!match(X, m_InsertElt(m_Value(), m_Value(Scalar),
|
|
|
|
m_ConstantInt(InsIndexC))))
|
2018-10-01 22:40:00 +08:00
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// The extract must be from the subset of vector elements that we inserted
|
|
|
|
// into. Example: if we inserted element 1 of a <2 x i64> and we are
|
|
|
|
// extracting an i16 (narrowing ratio = 4), then this extract must be from 1
|
|
|
|
// of elements 4-7 of the bitcasted vector.
|
|
|
|
unsigned NarrowingRatio = NumElts / NumSrcElts;
|
|
|
|
if (ExtIndexC / NarrowingRatio != InsIndexC)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// We are extracting part of the original scalar. How that scalar is
|
|
|
|
// inserted into the vector depends on the endian-ness. Example:
|
|
|
|
// Vector Byte Elt Index: 0 1 2 3 4 5 6 7
|
|
|
|
// +--+--+--+--+--+--+--+--+
|
|
|
|
// inselt <2 x i32> V, <i32> S, 1: |V0|V1|V2|V3|S0|S1|S2|S3|
|
|
|
|
// extelt <4 x i16> V', 3: | |S2|S3|
|
|
|
|
// +--+--+--+--+--+--+--+--+
|
|
|
|
// If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value.
|
|
|
|
// If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value.
|
|
|
|
// In this example, we must right-shift little-endian. Big-endian is just a
|
|
|
|
// truncate.
|
|
|
|
unsigned Chunk = ExtIndexC % NarrowingRatio;
|
|
|
|
if (IsBigEndian)
|
|
|
|
Chunk = NarrowingRatio - 1 - Chunk;
|
2018-10-05 00:25:05 +08:00
|
|
|
|
|
|
|
// Bail out if this is an FP vector to FP vector sequence. That would take
|
|
|
|
// more instructions than we started with unless there is no shift, and it
|
|
|
|
// may not be handled as well in the backend.
|
|
|
|
bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy();
|
|
|
|
bool NeedDestBitcast = DestTy->isFloatingPointTy();
|
|
|
|
if (NeedSrcBitcast && NeedDestBitcast)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
unsigned SrcWidth = SrcTy->getScalarSizeInBits();
|
|
|
|
unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
|
|
|
|
unsigned ShAmt = Chunk * DestWidth;
|
|
|
|
|
|
|
|
// TODO: This limitation is more strict than necessary. We could sum the
|
|
|
|
// number of new instructions and subtract the number eliminated to know if
|
|
|
|
// we can proceed.
|
|
|
|
if (!X->hasOneUse() || !Ext.getVectorOperand()->hasOneUse())
|
|
|
|
if (NeedSrcBitcast || NeedDestBitcast)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
if (NeedSrcBitcast) {
|
|
|
|
Type *SrcIntTy = IntegerType::getIntNTy(Scalar->getContext(), SrcWidth);
|
|
|
|
Scalar = Builder.CreateBitCast(Scalar, SrcIntTy);
|
|
|
|
}
|
|
|
|
|
2018-10-01 22:40:00 +08:00
|
|
|
if (ShAmt) {
|
|
|
|
// Bail out if we could end with more instructions than we started with.
|
|
|
|
if (!Ext.getVectorOperand()->hasOneUse())
|
|
|
|
return nullptr;
|
|
|
|
Scalar = Builder.CreateLShr(Scalar, ShAmt);
|
|
|
|
}
|
2018-10-05 00:25:05 +08:00
|
|
|
|
|
|
|
if (NeedDestBitcast) {
|
|
|
|
Type *DestIntTy = IntegerType::getIntNTy(Scalar->getContext(), DestWidth);
|
|
|
|
return new BitCastInst(Builder.CreateTrunc(Scalar, DestIntTy), DestTy);
|
|
|
|
}
|
2018-10-01 22:40:00 +08:00
|
|
|
return new TruncInst(Scalar, DestTy);
|
|
|
|
}
|
|
|
|
|
2018-09-25 04:41:22 +08:00
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
2019-10-21 16:12:47 +08:00
|
|
|
/// Find elements of V demanded by UserInstr.
|
|
|
|
static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr) {
|
2020-04-09 01:42:22 +08:00
|
|
|
unsigned VWidth = cast<VectorType>(V->getType())->getNumElements();
|
2019-10-21 16:12:47 +08:00
|
|
|
|
|
|
|
// Conservatively assume that all elements are needed.
|
|
|
|
APInt UsedElts(APInt::getAllOnesValue(VWidth));
|
|
|
|
|
|
|
|
switch (UserInstr->getOpcode()) {
|
|
|
|
case Instruction::ExtractElement: {
|
|
|
|
ExtractElementInst *EEI = cast<ExtractElementInst>(UserInstr);
|
|
|
|
assert(EEI->getVectorOperand() == V);
|
|
|
|
ConstantInt *EEIIndexC = dyn_cast<ConstantInt>(EEI->getIndexOperand());
|
|
|
|
if (EEIIndexC && EEIIndexC->getValue().ult(VWidth)) {
|
|
|
|
UsedElts = APInt::getOneBitSet(VWidth, EEIIndexC->getZExtValue());
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case Instruction::ShuffleVector: {
|
|
|
|
ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(UserInstr);
|
2020-04-09 01:42:22 +08:00
|
|
|
unsigned MaskNumElts =
|
|
|
|
cast<VectorType>(UserInstr->getType())->getNumElements();
|
2019-10-21 16:12:47 +08:00
|
|
|
|
|
|
|
UsedElts = APInt(VWidth, 0);
|
|
|
|
for (unsigned i = 0; i < MaskNumElts; i++) {
|
|
|
|
unsigned MaskVal = Shuffle->getMaskValue(i);
|
|
|
|
if (MaskVal == -1u || MaskVal >= 2 * VWidth)
|
|
|
|
continue;
|
|
|
|
if (Shuffle->getOperand(0) == V && (MaskVal < VWidth))
|
|
|
|
UsedElts.setBit(MaskVal);
|
|
|
|
if (Shuffle->getOperand(1) == V &&
|
|
|
|
((MaskVal >= VWidth) && (MaskVal < 2 * VWidth)))
|
|
|
|
UsedElts.setBit(MaskVal - VWidth);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return UsedElts;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Find union of elements of V demanded by all its users.
|
|
|
|
/// If it is known by querying findDemandedEltsBySingleUser that
|
|
|
|
/// no user demands an element of V, then the corresponding bit
|
|
|
|
/// remains unset in the returned value.
|
|
|
|
static APInt findDemandedEltsByAllUsers(Value *V) {
|
2020-04-09 01:42:22 +08:00
|
|
|
unsigned VWidth = cast<VectorType>(V->getType())->getNumElements();
|
2019-10-21 16:12:47 +08:00
|
|
|
|
|
|
|
APInt UnionUsedElts(VWidth, 0);
|
|
|
|
for (const Use &U : V->uses()) {
|
|
|
|
if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
|
|
|
|
UnionUsedElts |= findDemandedEltsBySingleUser(V, I);
|
|
|
|
} else {
|
|
|
|
UnionUsedElts = APInt::getAllOnesValue(VWidth);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (UnionUsedElts.isAllOnesValue())
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return UnionUsedElts;
|
|
|
|
}
|
|
|
|
|
2010-01-05 13:36:20 +08:00
|
|
|
Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
|
2018-12-06 05:57:51 +08:00
|
|
|
Value *SrcVec = EI.getVectorOperand();
|
|
|
|
Value *Index = EI.getIndexOperand();
|
|
|
|
if (Value *V = SimplifyExtractElementInst(SrcVec, Index,
|
2017-06-09 11:21:29 +08:00
|
|
|
SQ.getWithInstruction(&EI)))
|
2016-02-02 06:23:39 +08:00
|
|
|
return replaceInstUsesWith(EI, V);
|
2015-07-13 09:15:53 +08:00
|
|
|
|
2010-01-05 13:36:20 +08:00
|
|
|
// If extracting a specified index from the vector, see if we can recursively
|
|
|
|
// find a previously computed scalar that was inserted into the vector.
|
2018-12-06 05:57:51 +08:00
|
|
|
auto *IndexC = dyn_cast<ConstantInt>(Index);
|
|
|
|
if (IndexC) {
|
2020-05-08 04:03:26 +08:00
|
|
|
ElementCount EC = EI.getVectorOperandType()->getElementCount();
|
|
|
|
unsigned NumElts = EC.Min;
|
2010-10-30 06:20:43 +08:00
|
|
|
|
2017-12-27 20:00:18 +08:00
|
|
|
// InstSimplify should handle cases where the index is invalid.
|
2020-05-08 04:03:26 +08:00
|
|
|
// For fixed-length vector, it's invalid to extract out-of-range element.
|
|
|
|
if (!EC.Scalable && IndexC->getValue().uge(NumElts))
|
2017-12-27 20:00:18 +08:00
|
|
|
return nullptr;
|
|
|
|
|
2010-01-05 13:36:20 +08:00
|
|
|
// This instruction only demands the single element from the input vector.
|
2020-05-08 04:03:26 +08:00
|
|
|
// Skip for scalable type, the number of elements is unknown at
|
|
|
|
// compile-time.
|
|
|
|
if (!EC.Scalable && NumElts != 1) {
|
2019-10-21 16:12:47 +08:00
|
|
|
// If the input vector has a single use, simplify it based on this use
|
|
|
|
// property.
|
|
|
|
if (SrcVec->hasOneUse()) {
|
|
|
|
APInt UndefElts(NumElts, 0);
|
|
|
|
APInt DemandedElts(NumElts, 0);
|
|
|
|
DemandedElts.setBit(IndexC->getZExtValue());
|
|
|
|
if (Value *V =
|
2020-03-30 02:07:46 +08:00
|
|
|
SimplifyDemandedVectorElts(SrcVec, DemandedElts, UndefElts))
|
|
|
|
return replaceOperand(EI, 0, V);
|
2019-10-21 16:12:47 +08:00
|
|
|
} else {
|
|
|
|
// If the input vector has multiple uses, simplify it based on a union
|
|
|
|
// of all elements used.
|
|
|
|
APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec);
|
|
|
|
if (!DemandedElts.isAllOnesValue()) {
|
|
|
|
APInt UndefElts(NumElts, 0);
|
|
|
|
if (Value *V = SimplifyDemandedVectorElts(
|
|
|
|
SrcVec, DemandedElts, UndefElts, 0 /* Depth */,
|
|
|
|
true /* AllowMultipleUsers */)) {
|
|
|
|
if (V != SrcVec) {
|
|
|
|
SrcVec->replaceAllUsesWith(V);
|
|
|
|
return &EI;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2010-01-05 13:36:20 +08:00
|
|
|
}
|
|
|
|
}
|
2018-10-01 22:40:00 +08:00
|
|
|
if (Instruction *I = foldBitcastExtElt(EI, Builder, DL.isBigEndian()))
|
2018-09-25 04:41:22 +08:00
|
|
|
return I;
|
2013-04-19 03:35:39 +08:00
|
|
|
|
|
|
|
// If there's a vector PHI feeding a scalar use through this extractelement
|
|
|
|
// instruction, try to scalarize the PHI.
|
2018-12-06 05:57:51 +08:00
|
|
|
if (auto *Phi = dyn_cast<PHINode>(SrcVec))
|
|
|
|
if (Instruction *ScalarPHI = scalarizePHI(EI, Phi))
|
|
|
|
return ScalarPHI;
|
2010-01-05 13:36:20 +08:00
|
|
|
}
|
2010-10-30 06:20:43 +08:00
|
|
|
|
2020-03-10 23:05:31 +08:00
|
|
|
// TODO come up with a n-ary matcher that subsumes both unary and
|
|
|
|
// binary matchers.
|
|
|
|
UnaryOperator *UO;
|
|
|
|
if (match(SrcVec, m_UnOp(UO)) && cheapToScalarize(SrcVec, IndexC)) {
|
|
|
|
// extelt (unop X), Index --> unop (extelt X, Index)
|
|
|
|
Value *X = UO->getOperand(0);
|
|
|
|
Value *E = Builder.CreateExtractElement(X, Index);
|
|
|
|
return UnaryOperator::CreateWithCopiedFlags(UO->getOpcode(), E, UO);
|
|
|
|
}
|
|
|
|
|
2018-12-06 05:57:51 +08:00
|
|
|
BinaryOperator *BO;
|
|
|
|
if (match(SrcVec, m_BinOp(BO)) && cheapToScalarize(SrcVec, IndexC)) {
|
|
|
|
// extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)
|
|
|
|
Value *X = BO->getOperand(0), *Y = BO->getOperand(1);
|
|
|
|
Value *E0 = Builder.CreateExtractElement(X, Index);
|
|
|
|
Value *E1 = Builder.CreateExtractElement(Y, Index);
|
|
|
|
return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(), E0, E1, BO);
|
|
|
|
}
|
|
|
|
|
2018-12-11 05:50:54 +08:00
|
|
|
Value *X, *Y;
|
|
|
|
CmpInst::Predicate Pred;
|
|
|
|
if (match(SrcVec, m_Cmp(Pred, m_Value(X), m_Value(Y))) &&
|
|
|
|
cheapToScalarize(SrcVec, IndexC)) {
|
|
|
|
// extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
|
|
|
|
Value *E0 = Builder.CreateExtractElement(X, Index);
|
|
|
|
Value *E1 = Builder.CreateExtractElement(Y, Index);
|
|
|
|
return CmpInst::Create(cast<CmpInst>(SrcVec)->getOpcode(), Pred, E0, E1);
|
|
|
|
}
|
|
|
|
|
2018-12-06 05:57:51 +08:00
|
|
|
if (auto *I = dyn_cast<Instruction>(SrcVec)) {
|
|
|
|
if (auto *IE = dyn_cast<InsertElementInst>(I)) {
|
2010-01-05 13:36:20 +08:00
|
|
|
// Extracting the inserted element?
|
2018-12-06 05:57:51 +08:00
|
|
|
if (IE->getOperand(2) == Index)
|
2016-02-02 06:23:39 +08:00
|
|
|
return replaceInstUsesWith(EI, IE->getOperand(1));
|
2010-01-05 13:36:20 +08:00
|
|
|
// If the inserted and extracted elements are constants, they must not
|
|
|
|
// be the same value, extract from the pre-inserted value instead.
|
2020-02-01 05:23:33 +08:00
|
|
|
if (isa<Constant>(IE->getOperand(2)) && IndexC)
|
|
|
|
return replaceOperand(EI, 0, IE->getOperand(0));
|
2018-12-06 05:57:51 +08:00
|
|
|
} else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {
|
2010-01-05 13:36:20 +08:00
|
|
|
// If this is extracting an element from a shufflevector, figure out where
|
|
|
|
// it came from and extract from the appropriate input element instead.
|
2020-05-08 04:03:26 +08:00
|
|
|
// Restrict the following transformation to fixed-length vector.
|
|
|
|
if (isa<FixedVectorType>(SVI->getType()) && isa<ConstantInt>(Index)) {
|
|
|
|
int SrcIdx =
|
|
|
|
SVI->getMaskValue(cast<ConstantInt>(Index)->getZExtValue());
|
2010-01-05 13:36:20 +08:00
|
|
|
Value *Src;
|
2020-05-08 04:03:26 +08:00
|
|
|
unsigned LHSWidth = cast<FixedVectorType>(SVI->getOperand(0)->getType())
|
|
|
|
->getNumElements();
|
2010-10-30 06:20:43 +08:00
|
|
|
|
2010-10-30 06:03:05 +08:00
|
|
|
if (SrcIdx < 0)
|
2016-02-02 06:23:39 +08:00
|
|
|
return replaceInstUsesWith(EI, UndefValue::get(EI.getType()));
|
2010-10-30 06:03:05 +08:00
|
|
|
if (SrcIdx < (int)LHSWidth)
|
2010-01-05 13:36:20 +08:00
|
|
|
Src = SVI->getOperand(0);
|
2010-10-30 06:03:05 +08:00
|
|
|
else {
|
2010-01-05 13:36:20 +08:00
|
|
|
SrcIdx -= LHSWidth;
|
|
|
|
Src = SVI->getOperand(1);
|
|
|
|
}
|
2011-07-18 12:54:35 +08:00
|
|
|
Type *Int32Ty = Type::getInt32Ty(EI.getContext());
|
2020-05-08 04:03:26 +08:00
|
|
|
return ExtractElementInst::Create(
|
|
|
|
Src, ConstantInt::get(Int32Ty, SrcIdx, false));
|
2010-01-05 13:36:20 +08:00
|
|
|
}
|
2018-12-06 05:57:51 +08:00
|
|
|
} else if (auto *CI = dyn_cast<CastInst>(I)) {
|
2015-11-30 06:09:34 +08:00
|
|
|
// Canonicalize extractelement(cast) -> cast(extractelement).
|
|
|
|
// Bitcasts can change the number of vector elements, and they cost
|
|
|
|
// nothing.
|
2013-04-19 03:56:44 +08:00
|
|
|
if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
|
2018-12-06 05:57:51 +08:00
|
|
|
Value *EE = Builder.CreateExtractElement(CI->getOperand(0), Index);
|
2011-04-01 06:57:29 +08:00
|
|
|
return CastInst::Create(CI->getOpcode(), EE, EI.getType());
|
|
|
|
}
|
2010-01-05 13:36:20 +08:00
|
|
|
}
|
|
|
|
}
|
2014-04-25 13:29:35 +08:00
|
|
|
return nullptr;
|
2010-01-05 13:36:20 +08:00
|
|
|
}
|
|
|
|
|
2015-09-09 23:24:36 +08:00
|
|
|
/// If V is a shuffle of values that ONLY returns elements from either LHS or
|
|
|
|
/// RHS, return the shuffle mask and true. Otherwise, return false.
|
2015-11-18 01:24:08 +08:00
|
|
|
static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
|
2020-04-15 20:29:09 +08:00
|
|
|
SmallVectorImpl<int> &Mask) {
|
2014-03-07 18:24:44 +08:00
|
|
|
assert(LHS->getType() == RHS->getType() &&
|
2010-01-05 13:36:20 +08:00
|
|
|
"Invalid CollectSingleShuffleElements");
|
2020-04-09 01:42:22 +08:00
|
|
|
unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
|
2010-10-30 06:20:43 +08:00
|
|
|
|
2010-01-05 13:36:20 +08:00
|
|
|
if (isa<UndefValue>(V)) {
|
2020-04-15 20:29:09 +08:00
|
|
|
Mask.assign(NumElts, -1);
|
2010-01-05 13:36:20 +08:00
|
|
|
return true;
|
|
|
|
}
|
2010-10-30 06:20:43 +08:00
|
|
|
|
2010-01-05 13:36:20 +08:00
|
|
|
if (V == LHS) {
|
|
|
|
for (unsigned i = 0; i != NumElts; ++i)
|
2020-04-15 20:29:09 +08:00
|
|
|
Mask.push_back(i);
|
2010-01-05 13:36:20 +08:00
|
|
|
return true;
|
|
|
|
}
|
2010-10-30 06:20:43 +08:00
|
|
|
|
2010-01-05 13:36:20 +08:00
|
|
|
if (V == RHS) {
|
|
|
|
for (unsigned i = 0; i != NumElts; ++i)
|
2020-04-15 20:29:09 +08:00
|
|
|
Mask.push_back(i + NumElts);
|
2010-01-05 13:36:20 +08:00
|
|
|
return true;
|
|
|
|
}
|
2010-10-30 06:20:43 +08:00
|
|
|
|
2010-01-05 13:36:20 +08:00
|
|
|
if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
|
|
|
|
// If this is an insert of an extract from some other vector, include it.
|
|
|
|
Value *VecOp = IEI->getOperand(0);
|
|
|
|
Value *ScalarOp = IEI->getOperand(1);
|
|
|
|
Value *IdxOp = IEI->getOperand(2);
|
2010-10-30 06:20:43 +08:00
|
|
|
|
2010-01-05 13:36:20 +08:00
|
|
|
if (!isa<ConstantInt>(IdxOp))
|
|
|
|
return false;
|
|
|
|
unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
|
2010-10-30 06:20:43 +08:00
|
|
|
|
2010-01-05 13:36:20 +08:00
|
|
|
if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector.
|
2014-07-08 06:13:58 +08:00
|
|
|
// We can handle this if the vector we are inserting into is
|
2010-01-05 13:36:20 +08:00
|
|
|
// transitively ok.
|
2015-11-18 01:24:08 +08:00
|
|
|
if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
|
2010-01-05 13:36:20 +08:00
|
|
|
// If so, update the mask to reflect the inserted undef.
|
2020-04-15 20:29:09 +08:00
|
|
|
Mask[InsertedIdx] = -1;
|
2010-01-05 13:36:20 +08:00
|
|
|
return true;
|
2010-10-30 06:20:43 +08:00
|
|
|
}
|
2010-01-05 13:36:20 +08:00
|
|
|
} else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
|
2014-03-07 18:24:44 +08:00
|
|
|
if (isa<ConstantInt>(EI->getOperand(1))) {
|
2010-01-05 13:36:20 +08:00
|
|
|
unsigned ExtractedIdx =
|
|
|
|
cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
|
2020-04-09 01:42:22 +08:00
|
|
|
unsigned NumLHSElts =
|
|
|
|
cast<VectorType>(LHS->getType())->getNumElements();
|
2010-10-30 06:20:43 +08:00
|
|
|
|
2010-01-05 13:36:20 +08:00
|
|
|
// This must be extracting from either LHS or RHS.
|
|
|
|
if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
|
2014-07-08 06:13:58 +08:00
|
|
|
// We can handle this if the vector we are inserting into is
|
2010-01-05 13:36:20 +08:00
|
|
|
// transitively ok.
|
2015-11-18 01:24:08 +08:00
|
|
|
if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
|
2010-01-05 13:36:20 +08:00
|
|
|
// If so, update the mask to reflect the inserted value.
|
|
|
|
if (EI->getOperand(0) == LHS) {
|
2020-04-15 20:29:09 +08:00
|
|
|
Mask[InsertedIdx % NumElts] = ExtractedIdx;
|
2010-01-05 13:36:20 +08:00
|
|
|
} else {
|
|
|
|
assert(EI->getOperand(0) == RHS);
|
2020-04-15 20:29:09 +08:00
|
|
|
Mask[InsertedIdx % NumElts] = ExtractedIdx + NumLHSElts;
|
2010-01-05 13:36:20 +08:00
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2010-10-30 06:20:43 +08:00
|
|
|
|
2010-01-05 13:36:20 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
[InstCombine] transform more extract/insert pairs into shuffles (PR2109)
This is an extension of the shuffle combining from r203229:
http://reviews.llvm.org/rL203229
The idea is to widen a short input vector with undef elements so the
existing shuffle transform for extract/insert can kick in.
The motivation is to finally solve PR2109:
https://llvm.org/bugs/show_bug.cgi?id=2109
For that example, the IR becomes:
%1 = bitcast <2 x i32>* %P to <2 x float>*
%ld1 = load <2 x float>, <2 x float>* %1, align 8
%2 = shufflevector <2 x float> %ld1, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
%i2 = shufflevector <4 x float> %A, <4 x float> %2, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
ret <4 x float> %i2
And x86 SSE output improves from:
movq (%rdi), %xmm1 ## xmm1 = mem[0],zero
movdqa %xmm1, %xmm2
shufps $229, %xmm2, %xmm2 ## xmm2 = xmm2[1,1,2,3]
shufps $48, %xmm0, %xmm1 ## xmm1 = xmm1[0,0],xmm0[3,0]
shufps $132, %xmm1, %xmm0 ## xmm0 = xmm0[0,1],xmm1[0,2]
shufps $32, %xmm0, %xmm2 ## xmm2 = xmm2[0,0],xmm0[2,0]
shufps $36, %xmm2, %xmm0 ## xmm0 = xmm0[0,1],xmm2[2,0]
retq
To the almost optimal:
movhpd (%rdi), %xmm0
Note: There's a tension in the existing transform related to generating
arbitrary shufflevector masks. We avoid that in other places in InstCombine
because we're scared that codegen can't handle strange masks, but it looks
like we're ok with producing those here. I purposely chose weird insert/extract
indexes for the regression tests to see the effect in these cases.
For PowerPC+Altivec, AArch64, and X86+SSE/AVX, I think the codegen is equal or
better for these examples.
Differential Revision: http://reviews.llvm.org/D15096
llvm-svn: 256394
2015-12-25 05:17:56 +08:00
|
|
|
/// If we have insertion into a vector that is wider than the vector that we
|
|
|
|
/// are extracting from, try to widen the source vector to allow a single
|
|
|
|
/// shufflevector to replace one or more insert/extract pairs.
|
|
|
|
static void replaceExtractElements(InsertElementInst *InsElt,
|
|
|
|
ExtractElementInst *ExtElt,
|
|
|
|
InstCombiner &IC) {
|
|
|
|
VectorType *InsVecType = InsElt->getType();
|
|
|
|
VectorType *ExtVecType = ExtElt->getVectorOperandType();
|
2020-04-09 01:42:22 +08:00
|
|
|
unsigned NumInsElts = InsVecType->getNumElements();
|
|
|
|
unsigned NumExtElts = ExtVecType->getNumElements();
|
[InstCombine] transform more extract/insert pairs into shuffles (PR2109)
This is an extension of the shuffle combining from r203229:
http://reviews.llvm.org/rL203229
The idea is to widen a short input vector with undef elements so the
existing shuffle transform for extract/insert can kick in.
The motivation is to finally solve PR2109:
https://llvm.org/bugs/show_bug.cgi?id=2109
For that example, the IR becomes:
%1 = bitcast <2 x i32>* %P to <2 x float>*
%ld1 = load <2 x float>, <2 x float>* %1, align 8
%2 = shufflevector <2 x float> %ld1, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
%i2 = shufflevector <4 x float> %A, <4 x float> %2, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
ret <4 x float> %i2
And x86 SSE output improves from:
movq (%rdi), %xmm1 ## xmm1 = mem[0],zero
movdqa %xmm1, %xmm2
shufps $229, %xmm2, %xmm2 ## xmm2 = xmm2[1,1,2,3]
shufps $48, %xmm0, %xmm1 ## xmm1 = xmm1[0,0],xmm0[3,0]
shufps $132, %xmm1, %xmm0 ## xmm0 = xmm0[0,1],xmm1[0,2]
shufps $32, %xmm0, %xmm2 ## xmm2 = xmm2[0,0],xmm0[2,0]
shufps $36, %xmm2, %xmm0 ## xmm0 = xmm0[0,1],xmm2[2,0]
retq
To the almost optimal:
movhpd (%rdi), %xmm0
Note: There's a tension in the existing transform related to generating
arbitrary shufflevector masks. We avoid that in other places in InstCombine
because we're scared that codegen can't handle strange masks, but it looks
like we're ok with producing those here. I purposely chose weird insert/extract
indexes for the regression tests to see the effect in these cases.
For PowerPC+Altivec, AArch64, and X86+SSE/AVX, I think the codegen is equal or
better for these examples.
Differential Revision: http://reviews.llvm.org/D15096
llvm-svn: 256394
2015-12-25 05:17:56 +08:00
|
|
|
|
|
|
|
// The inserted-to vector must be wider than the extracted-from vector.
|
|
|
|
if (InsVecType->getElementType() != ExtVecType->getElementType() ||
|
|
|
|
NumExtElts >= NumInsElts)
|
|
|
|
return;
|
|
|
|
|
|
|
|
// Create a shuffle mask to widen the extended-from vector using undefined
|
|
|
|
// values. The mask selects all of the values of the original vector followed
|
|
|
|
// by as many undefined values as needed to create a vector of the same length
|
|
|
|
// as the inserted-to vector.
|
2020-04-15 20:29:09 +08:00
|
|
|
SmallVector<int, 16> ExtendMask;
|
[InstCombine] transform more extract/insert pairs into shuffles (PR2109)
This is an extension of the shuffle combining from r203229:
http://reviews.llvm.org/rL203229
The idea is to widen a short input vector with undef elements so the
existing shuffle transform for extract/insert can kick in.
The motivation is to finally solve PR2109:
https://llvm.org/bugs/show_bug.cgi?id=2109
For that example, the IR becomes:
%1 = bitcast <2 x i32>* %P to <2 x float>*
%ld1 = load <2 x float>, <2 x float>* %1, align 8
%2 = shufflevector <2 x float> %ld1, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
%i2 = shufflevector <4 x float> %A, <4 x float> %2, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
ret <4 x float> %i2
And x86 SSE output improves from:
movq (%rdi), %xmm1 ## xmm1 = mem[0],zero
movdqa %xmm1, %xmm2
shufps $229, %xmm2, %xmm2 ## xmm2 = xmm2[1,1,2,3]
shufps $48, %xmm0, %xmm1 ## xmm1 = xmm1[0,0],xmm0[3,0]
shufps $132, %xmm1, %xmm0 ## xmm0 = xmm0[0,1],xmm1[0,2]
shufps $32, %xmm0, %xmm2 ## xmm2 = xmm2[0,0],xmm0[2,0]
shufps $36, %xmm2, %xmm0 ## xmm0 = xmm0[0,1],xmm2[2,0]
retq
To the almost optimal:
movhpd (%rdi), %xmm0
Note: There's a tension in the existing transform related to generating
arbitrary shufflevector masks. We avoid that in other places in InstCombine
because we're scared that codegen can't handle strange masks, but it looks
like we're ok with producing those here. I purposely chose weird insert/extract
indexes for the regression tests to see the effect in these cases.
For PowerPC+Altivec, AArch64, and X86+SSE/AVX, I think the codegen is equal or
better for these examples.
Differential Revision: http://reviews.llvm.org/D15096
llvm-svn: 256394
2015-12-25 05:17:56 +08:00
|
|
|
for (unsigned i = 0; i < NumExtElts; ++i)
|
2020-04-15 20:29:09 +08:00
|
|
|
ExtendMask.push_back(i);
|
[InstCombine] transform more extract/insert pairs into shuffles (PR2109)
This is an extension of the shuffle combining from r203229:
http://reviews.llvm.org/rL203229
The idea is to widen a short input vector with undef elements so the
existing shuffle transform for extract/insert can kick in.
The motivation is to finally solve PR2109:
https://llvm.org/bugs/show_bug.cgi?id=2109
For that example, the IR becomes:
%1 = bitcast <2 x i32>* %P to <2 x float>*
%ld1 = load <2 x float>, <2 x float>* %1, align 8
%2 = shufflevector <2 x float> %ld1, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
%i2 = shufflevector <4 x float> %A, <4 x float> %2, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
ret <4 x float> %i2
And x86 SSE output improves from:
movq (%rdi), %xmm1 ## xmm1 = mem[0],zero
movdqa %xmm1, %xmm2
shufps $229, %xmm2, %xmm2 ## xmm2 = xmm2[1,1,2,3]
shufps $48, %xmm0, %xmm1 ## xmm1 = xmm1[0,0],xmm0[3,0]
shufps $132, %xmm1, %xmm0 ## xmm0 = xmm0[0,1],xmm1[0,2]
shufps $32, %xmm0, %xmm2 ## xmm2 = xmm2[0,0],xmm0[2,0]
shufps $36, %xmm2, %xmm0 ## xmm0 = xmm0[0,1],xmm2[2,0]
retq
To the almost optimal:
movhpd (%rdi), %xmm0
Note: There's a tension in the existing transform related to generating
arbitrary shufflevector masks. We avoid that in other places in InstCombine
because we're scared that codegen can't handle strange masks, but it looks
like we're ok with producing those here. I purposely chose weird insert/extract
indexes for the regression tests to see the effect in these cases.
For PowerPC+Altivec, AArch64, and X86+SSE/AVX, I think the codegen is equal or
better for these examples.
Differential Revision: http://reviews.llvm.org/D15096
llvm-svn: 256394
2015-12-25 05:17:56 +08:00
|
|
|
for (unsigned i = NumExtElts; i < NumInsElts; ++i)
|
2020-04-15 20:29:09 +08:00
|
|
|
ExtendMask.push_back(-1);
|
[InstCombine] transform more extract/insert pairs into shuffles (PR2109)
This is an extension of the shuffle combining from r203229:
http://reviews.llvm.org/rL203229
The idea is to widen a short input vector with undef elements so the
existing shuffle transform for extract/insert can kick in.
The motivation is to finally solve PR2109:
https://llvm.org/bugs/show_bug.cgi?id=2109
For that example, the IR becomes:
%1 = bitcast <2 x i32>* %P to <2 x float>*
%ld1 = load <2 x float>, <2 x float>* %1, align 8
%2 = shufflevector <2 x float> %ld1, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
%i2 = shufflevector <4 x float> %A, <4 x float> %2, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
ret <4 x float> %i2
And x86 SSE output improves from:
movq (%rdi), %xmm1 ## xmm1 = mem[0],zero
movdqa %xmm1, %xmm2
shufps $229, %xmm2, %xmm2 ## xmm2 = xmm2[1,1,2,3]
shufps $48, %xmm0, %xmm1 ## xmm1 = xmm1[0,0],xmm0[3,0]
shufps $132, %xmm1, %xmm0 ## xmm0 = xmm0[0,1],xmm1[0,2]
shufps $32, %xmm0, %xmm2 ## xmm2 = xmm2[0,0],xmm0[2,0]
shufps $36, %xmm2, %xmm0 ## xmm0 = xmm0[0,1],xmm2[2,0]
retq
To the almost optimal:
movhpd (%rdi), %xmm0
Note: There's a tension in the existing transform related to generating
arbitrary shufflevector masks. We avoid that in other places in InstCombine
because we're scared that codegen can't handle strange masks, but it looks
like we're ok with producing those here. I purposely chose weird insert/extract
indexes for the regression tests to see the effect in these cases.
For PowerPC+Altivec, AArch64, and X86+SSE/AVX, I think the codegen is equal or
better for these examples.
Differential Revision: http://reviews.llvm.org/D15096
llvm-svn: 256394
2015-12-25 05:17:56 +08:00
|
|
|
|
|
|
|
Value *ExtVecOp = ExtElt->getVectorOperand();
|
2016-01-30 04:21:02 +08:00
|
|
|
auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);
|
|
|
|
BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
|
|
|
|
? ExtVecOpInst->getParent()
|
|
|
|
: ExtElt->getParent();
|
|
|
|
|
|
|
|
// TODO: This restriction matches the basic block check below when creating
|
|
|
|
// new extractelement instructions. If that limitation is removed, this one
|
|
|
|
// could also be removed. But for now, we just bail out to ensure that we
|
|
|
|
// will replace the extractelement instruction that is feeding our
|
|
|
|
// insertelement instruction. This allows the insertelement to then be
|
|
|
|
// replaced by a shufflevector. If the insertelement is not replaced, we can
|
|
|
|
// induce infinite looping because there's an optimization for extractelement
|
|
|
|
// that will delete our widening shuffle. This would trigger another attempt
|
|
|
|
// here to create that shuffle, and we spin forever.
|
|
|
|
if (InsertionBlock != InsElt->getParent())
|
|
|
|
return;
|
|
|
|
|
2016-11-10 08:15:14 +08:00
|
|
|
// TODO: This restriction matches the check in visitInsertElementInst() and
|
|
|
|
// prevents an infinite loop caused by not turning the extract/insert pair
|
|
|
|
// into a shuffle. We really should not need either check, but we're lacking
|
|
|
|
// folds for shufflevectors because we're afraid to generate shuffle masks
|
|
|
|
// that the backend can't handle.
|
|
|
|
if (InsElt->hasOneUse() && isa<InsertElementInst>(InsElt->user_back()))
|
|
|
|
return;
|
|
|
|
|
2020-04-15 20:29:09 +08:00
|
|
|
auto *WideVec =
|
|
|
|
new ShuffleVectorInst(ExtVecOp, UndefValue::get(ExtVecType), ExtendMask);
|
[InstCombine] transform more extract/insert pairs into shuffles (PR2109)
This is an extension of the shuffle combining from r203229:
http://reviews.llvm.org/rL203229
The idea is to widen a short input vector with undef elements so the
existing shuffle transform for extract/insert can kick in.
The motivation is to finally solve PR2109:
https://llvm.org/bugs/show_bug.cgi?id=2109
For that example, the IR becomes:
%1 = bitcast <2 x i32>* %P to <2 x float>*
%ld1 = load <2 x float>, <2 x float>* %1, align 8
%2 = shufflevector <2 x float> %ld1, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
%i2 = shufflevector <4 x float> %A, <4 x float> %2, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
ret <4 x float> %i2
And x86 SSE output improves from:
movq (%rdi), %xmm1 ## xmm1 = mem[0],zero
movdqa %xmm1, %xmm2
shufps $229, %xmm2, %xmm2 ## xmm2 = xmm2[1,1,2,3]
shufps $48, %xmm0, %xmm1 ## xmm1 = xmm1[0,0],xmm0[3,0]
shufps $132, %xmm1, %xmm0 ## xmm0 = xmm0[0,1],xmm1[0,2]
shufps $32, %xmm0, %xmm2 ## xmm2 = xmm2[0,0],xmm0[2,0]
shufps $36, %xmm2, %xmm0 ## xmm0 = xmm0[0,1],xmm2[2,0]
retq
To the almost optimal:
movhpd (%rdi), %xmm0
Note: There's a tension in the existing transform related to generating
arbitrary shufflevector masks. We avoid that in other places in InstCombine
because we're scared that codegen can't handle strange masks, but it looks
like we're ok with producing those here. I purposely chose weird insert/extract
indexes for the regression tests to see the effect in these cases.
For PowerPC+Altivec, AArch64, and X86+SSE/AVX, I think the codegen is equal or
better for these examples.
Differential Revision: http://reviews.llvm.org/D15096
llvm-svn: 256394
2015-12-25 05:17:56 +08:00
|
|
|
|
2016-01-06 03:09:47 +08:00
|
|
|
// Insert the new shuffle after the vector operand of the extract is defined
|
2016-01-08 09:39:16 +08:00
|
|
|
// (as long as it's not a PHI) or at the start of the basic block of the
|
|
|
|
// extract, so any subsequent extracts in the same basic block can use it.
|
|
|
|
// TODO: Insert before the earliest ExtractElementInst that is replaced.
|
|
|
|
if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
|
2016-01-06 03:09:47 +08:00
|
|
|
WideVec->insertAfter(ExtVecOpInst);
|
2016-01-08 09:39:16 +08:00
|
|
|
else
|
2016-01-06 03:09:47 +08:00
|
|
|
IC.InsertNewInstWith(WideVec, *ExtElt->getParent()->getFirstInsertionPt());
|
|
|
|
|
|
|
|
// Replace extracts from the original narrow vector with extracts from the new
|
|
|
|
// wide vector.
|
[InstCombine] transform more extract/insert pairs into shuffles (PR2109)
This is an extension of the shuffle combining from r203229:
http://reviews.llvm.org/rL203229
The idea is to widen a short input vector with undef elements so the
existing shuffle transform for extract/insert can kick in.
The motivation is to finally solve PR2109:
https://llvm.org/bugs/show_bug.cgi?id=2109
For that example, the IR becomes:
%1 = bitcast <2 x i32>* %P to <2 x float>*
%ld1 = load <2 x float>, <2 x float>* %1, align 8
%2 = shufflevector <2 x float> %ld1, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
%i2 = shufflevector <4 x float> %A, <4 x float> %2, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
ret <4 x float> %i2
And x86 SSE output improves from:
movq (%rdi), %xmm1 ## xmm1 = mem[0],zero
movdqa %xmm1, %xmm2
shufps $229, %xmm2, %xmm2 ## xmm2 = xmm2[1,1,2,3]
shufps $48, %xmm0, %xmm1 ## xmm1 = xmm1[0,0],xmm0[3,0]
shufps $132, %xmm1, %xmm0 ## xmm0 = xmm0[0,1],xmm1[0,2]
shufps $32, %xmm0, %xmm2 ## xmm2 = xmm2[0,0],xmm0[2,0]
shufps $36, %xmm2, %xmm0 ## xmm0 = xmm0[0,1],xmm2[2,0]
retq
To the almost optimal:
movhpd (%rdi), %xmm0
Note: There's a tension in the existing transform related to generating
arbitrary shufflevector masks. We avoid that in other places in InstCombine
because we're scared that codegen can't handle strange masks, but it looks
like we're ok with producing those here. I purposely chose weird insert/extract
indexes for the regression tests to see the effect in these cases.
For PowerPC+Altivec, AArch64, and X86+SSE/AVX, I think the codegen is equal or
better for these examples.
Differential Revision: http://reviews.llvm.org/D15096
llvm-svn: 256394
2015-12-25 05:17:56 +08:00
|
|
|
for (User *U : ExtVecOp->users()) {
|
2016-01-06 03:09:47 +08:00
|
|
|
ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U);
|
2016-01-08 09:39:16 +08:00
|
|
|
if (!OldExt || OldExt->getParent() != WideVec->getParent())
|
2016-01-06 03:09:47 +08:00
|
|
|
continue;
|
|
|
|
auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
|
2017-06-05 17:18:10 +08:00
|
|
|
NewExt->insertAfter(OldExt);
|
2016-02-02 06:23:39 +08:00
|
|
|
IC.replaceInstUsesWith(*OldExt, NewExt);
|
[InstCombine] transform more extract/insert pairs into shuffles (PR2109)
This is an extension of the shuffle combining from r203229:
http://reviews.llvm.org/rL203229
The idea is to widen a short input vector with undef elements so the
existing shuffle transform for extract/insert can kick in.
The motivation is to finally solve PR2109:
https://llvm.org/bugs/show_bug.cgi?id=2109
For that example, the IR becomes:
%1 = bitcast <2 x i32>* %P to <2 x float>*
%ld1 = load <2 x float>, <2 x float>* %1, align 8
%2 = shufflevector <2 x float> %ld1, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
%i2 = shufflevector <4 x float> %A, <4 x float> %2, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
ret <4 x float> %i2
And x86 SSE output improves from:
movq (%rdi), %xmm1 ## xmm1 = mem[0],zero
movdqa %xmm1, %xmm2
shufps $229, %xmm2, %xmm2 ## xmm2 = xmm2[1,1,2,3]
shufps $48, %xmm0, %xmm1 ## xmm1 = xmm1[0,0],xmm0[3,0]
shufps $132, %xmm1, %xmm0 ## xmm0 = xmm0[0,1],xmm1[0,2]
shufps $32, %xmm0, %xmm2 ## xmm2 = xmm2[0,0],xmm0[2,0]
shufps $36, %xmm2, %xmm0 ## xmm0 = xmm0[0,1],xmm2[2,0]
retq
To the almost optimal:
movhpd (%rdi), %xmm0
Note: There's a tension in the existing transform related to generating
arbitrary shufflevector masks. We avoid that in other places in InstCombine
because we're scared that codegen can't handle strange masks, but it looks
like we're ok with producing those here. I purposely chose weird insert/extract
indexes for the regression tests to see the effect in these cases.
For PowerPC+Altivec, AArch64, and X86+SSE/AVX, I think the codegen is equal or
better for these examples.
Differential Revision: http://reviews.llvm.org/D15096
llvm-svn: 256394
2015-12-25 05:17:56 +08:00
|
|
|
}
|
|
|
|
}
|
2014-03-07 18:24:44 +08:00
|
|
|
|
|
|
|
/// We are building a shuffle to create V, which is a sequence of insertelement,
|
|
|
|
/// extractelement pairs. If PermittedRHS is set, then we must either use it or
|
2014-07-08 06:13:58 +08:00
|
|
|
/// not rely on the second vector source. Return a std::pair containing the
|
2014-03-07 18:24:44 +08:00
|
|
|
/// left and right vectors of the proposed shuffle (or 0), and set the Mask
|
|
|
|
/// parameter as required.
|
|
|
|
///
|
|
|
|
/// Note: we intentionally don't try to fold earlier shuffles since they have
|
|
|
|
/// often been chosen carefully to be efficiently implementable on the target.
|
2017-10-25 05:24:53 +08:00
|
|
|
using ShuffleOps = std::pair<Value *, Value *>;
|
2014-03-07 18:24:44 +08:00
|
|
|
|
2020-04-15 20:29:09 +08:00
|
|
|
static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl<int> &Mask,
|
[InstCombine] transform more extract/insert pairs into shuffles (PR2109)
This is an extension of the shuffle combining from r203229:
http://reviews.llvm.org/rL203229
The idea is to widen a short input vector with undef elements so the
existing shuffle transform for extract/insert can kick in.
The motivation is to finally solve PR2109:
https://llvm.org/bugs/show_bug.cgi?id=2109
For that example, the IR becomes:
%1 = bitcast <2 x i32>* %P to <2 x float>*
%ld1 = load <2 x float>, <2 x float>* %1, align 8
%2 = shufflevector <2 x float> %ld1, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
%i2 = shufflevector <4 x float> %A, <4 x float> %2, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
ret <4 x float> %i2
And x86 SSE output improves from:
movq (%rdi), %xmm1 ## xmm1 = mem[0],zero
movdqa %xmm1, %xmm2
shufps $229, %xmm2, %xmm2 ## xmm2 = xmm2[1,1,2,3]
shufps $48, %xmm0, %xmm1 ## xmm1 = xmm1[0,0],xmm0[3,0]
shufps $132, %xmm1, %xmm0 ## xmm0 = xmm0[0,1],xmm1[0,2]
shufps $32, %xmm0, %xmm2 ## xmm2 = xmm2[0,0],xmm0[2,0]
shufps $36, %xmm2, %xmm0 ## xmm0 = xmm0[0,1],xmm2[2,0]
retq
To the almost optimal:
movhpd (%rdi), %xmm0
Note: There's a tension in the existing transform related to generating
arbitrary shufflevector masks. We avoid that in other places in InstCombine
because we're scared that codegen can't handle strange masks, but it looks
like we're ok with producing those here. I purposely chose weird insert/extract
indexes for the regression tests to see the effect in these cases.
For PowerPC+Altivec, AArch64, and X86+SSE/AVX, I think the codegen is equal or
better for these examples.
Differential Revision: http://reviews.llvm.org/D15096
llvm-svn: 256394
2015-12-25 05:17:56 +08:00
|
|
|
Value *PermittedRHS,
|
|
|
|
InstCombiner &IC) {
|
2014-03-07 18:24:44 +08:00
|
|
|
assert(V->getType()->isVectorTy() && "Invalid shuffle!");
|
[InstCombine][SVE] Fix visitInsertElementInst for scalable type.
Summary:
This patch fixes the following issues in visitInsertElementInst:
1. Bail out for scalable type when analysis requires fixed size number of vector elements.
2. Use cast<FixedVectorType> to get vector number of elements. This ensure assertion
on scalable vector type.
3. For scalable type, avoid folding a chain of insertelement into splat:
insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ...
->
shufflevector(insertelt(X, %k, 0), undef, zero)
The length of scalable vector is unknown at compile-time, therefore we don't know if
given insertelement sequence is valid for splat.
Reviewers: sdesmalen, efriedma, spatel, nikic
Reviewed By: sdesmalen, efriedma
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78895
2020-05-08 03:43:14 +08:00
|
|
|
unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
|
2010-10-30 06:20:43 +08:00
|
|
|
|
2010-01-05 13:36:20 +08:00
|
|
|
if (isa<UndefValue>(V)) {
|
2020-04-15 20:29:09 +08:00
|
|
|
Mask.assign(NumElts, -1);
|
2014-03-07 18:24:44 +08:00
|
|
|
return std::make_pair(
|
|
|
|
PermittedRHS ? UndefValue::get(PermittedRHS->getType()) : V, nullptr);
|
2012-01-24 22:31:22 +08:00
|
|
|
}
|
2013-01-18 13:09:16 +08:00
|
|
|
|
2012-01-24 22:31:22 +08:00
|
|
|
if (isa<ConstantAggregateZero>(V)) {
|
2020-04-15 20:29:09 +08:00
|
|
|
Mask.assign(NumElts, 0);
|
2014-03-07 18:24:44 +08:00
|
|
|
return std::make_pair(V, nullptr);
|
2012-01-24 22:31:22 +08:00
|
|
|
}
|
2013-01-18 13:09:16 +08:00
|
|
|
|
2012-01-24 22:31:22 +08:00
|
|
|
if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
|
2010-01-05 13:36:20 +08:00
|
|
|
// If this is an insert of an extract from some other vector, include it.
|
|
|
|
Value *VecOp = IEI->getOperand(0);
|
|
|
|
Value *ScalarOp = IEI->getOperand(1);
|
|
|
|
Value *IdxOp = IEI->getOperand(2);
|
2010-10-30 06:20:43 +08:00
|
|
|
|
2010-01-05 13:36:20 +08:00
|
|
|
if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
|
2014-03-07 18:24:44 +08:00
|
|
|
if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
|
2010-01-05 13:36:20 +08:00
|
|
|
unsigned ExtractedIdx =
|
2010-10-30 06:20:45 +08:00
|
|
|
cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
|
2010-01-05 13:36:20 +08:00
|
|
|
unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
|
2010-10-30 06:20:43 +08:00
|
|
|
|
2010-01-05 13:36:20 +08:00
|
|
|
// Either the extracted from or inserted into vector must be RHSVec,
|
|
|
|
// otherwise we'd end up with a shuffle of three inputs.
|
2014-04-25 13:29:35 +08:00
|
|
|
if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) {
|
2014-03-07 18:24:44 +08:00
|
|
|
Value *RHS = EI->getOperand(0);
|
[InstCombine] transform more extract/insert pairs into shuffles (PR2109)
This is an extension of the shuffle combining from r203229:
http://reviews.llvm.org/rL203229
The idea is to widen a short input vector with undef elements so the
existing shuffle transform for extract/insert can kick in.
The motivation is to finally solve PR2109:
https://llvm.org/bugs/show_bug.cgi?id=2109
For that example, the IR becomes:
%1 = bitcast <2 x i32>* %P to <2 x float>*
%ld1 = load <2 x float>, <2 x float>* %1, align 8
%2 = shufflevector <2 x float> %ld1, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
%i2 = shufflevector <4 x float> %A, <4 x float> %2, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
ret <4 x float> %i2
And x86 SSE output improves from:
movq (%rdi), %xmm1 ## xmm1 = mem[0],zero
movdqa %xmm1, %xmm2
shufps $229, %xmm2, %xmm2 ## xmm2 = xmm2[1,1,2,3]
shufps $48, %xmm0, %xmm1 ## xmm1 = xmm1[0,0],xmm0[3,0]
shufps $132, %xmm1, %xmm0 ## xmm0 = xmm0[0,1],xmm1[0,2]
shufps $32, %xmm0, %xmm2 ## xmm2 = xmm2[0,0],xmm0[2,0]
shufps $36, %xmm2, %xmm0 ## xmm0 = xmm0[0,1],xmm2[2,0]
retq
To the almost optimal:
movhpd (%rdi), %xmm0
Note: There's a tension in the existing transform related to generating
arbitrary shufflevector masks. We avoid that in other places in InstCombine
because we're scared that codegen can't handle strange masks, but it looks
like we're ok with producing those here. I purposely chose weird insert/extract
indexes for the regression tests to see the effect in these cases.
For PowerPC+Altivec, AArch64, and X86+SSE/AVX, I think the codegen is equal or
better for these examples.
Differential Revision: http://reviews.llvm.org/D15096
llvm-svn: 256394
2015-12-25 05:17:56 +08:00
|
|
|
ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC);
|
2014-04-28 12:05:08 +08:00
|
|
|
assert(LR.second == nullptr || LR.second == RHS);
|
2014-03-07 18:24:44 +08:00
|
|
|
|
|
|
|
if (LR.first->getType() != RHS->getType()) {
|
[InstCombine] transform more extract/insert pairs into shuffles (PR2109)
This is an extension of the shuffle combining from r203229:
http://reviews.llvm.org/rL203229
The idea is to widen a short input vector with undef elements so the
existing shuffle transform for extract/insert can kick in.
The motivation is to finally solve PR2109:
https://llvm.org/bugs/show_bug.cgi?id=2109
For that example, the IR becomes:
%1 = bitcast <2 x i32>* %P to <2 x float>*
%ld1 = load <2 x float>, <2 x float>* %1, align 8
%2 = shufflevector <2 x float> %ld1, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
%i2 = shufflevector <4 x float> %A, <4 x float> %2, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
ret <4 x float> %i2
And x86 SSE output improves from:
movq (%rdi), %xmm1 ## xmm1 = mem[0],zero
movdqa %xmm1, %xmm2
shufps $229, %xmm2, %xmm2 ## xmm2 = xmm2[1,1,2,3]
shufps $48, %xmm0, %xmm1 ## xmm1 = xmm1[0,0],xmm0[3,0]
shufps $132, %xmm1, %xmm0 ## xmm0 = xmm0[0,1],xmm1[0,2]
shufps $32, %xmm0, %xmm2 ## xmm2 = xmm2[0,0],xmm0[2,0]
shufps $36, %xmm2, %xmm0 ## xmm0 = xmm0[0,1],xmm2[2,0]
retq
To the almost optimal:
movhpd (%rdi), %xmm0
Note: There's a tension in the existing transform related to generating
arbitrary shufflevector masks. We avoid that in other places in InstCombine
because we're scared that codegen can't handle strange masks, but it looks
like we're ok with producing those here. I purposely chose weird insert/extract
indexes for the regression tests to see the effect in these cases.
For PowerPC+Altivec, AArch64, and X86+SSE/AVX, I think the codegen is equal or
better for these examples.
Differential Revision: http://reviews.llvm.org/D15096
llvm-svn: 256394
2015-12-25 05:17:56 +08:00
|
|
|
// Although we are giving up for now, see if we can create extracts
|
|
|
|
// that match the inserts for another round of combining.
|
|
|
|
replaceExtractElements(IEI, EI, IC);
|
|
|
|
|
2014-03-07 18:24:44 +08:00
|
|
|
// We tried our best, but we can't find anything compatible with RHS
|
|
|
|
// further up the chain. Return a trivial shuffle.
|
|
|
|
for (unsigned i = 0; i < NumElts; ++i)
|
2020-04-15 20:29:09 +08:00
|
|
|
Mask[i] = i;
|
2014-03-07 18:24:44 +08:00
|
|
|
return std::make_pair(V, nullptr);
|
|
|
|
}
|
|
|
|
|
2020-04-09 01:42:22 +08:00
|
|
|
unsigned NumLHSElts =
|
|
|
|
cast<VectorType>(RHS->getType())->getNumElements();
|
2020-04-15 20:29:09 +08:00
|
|
|
Mask[InsertedIdx % NumElts] = NumLHSElts + ExtractedIdx;
|
2014-03-07 18:24:44 +08:00
|
|
|
return std::make_pair(LR.first, RHS);
|
2010-01-05 13:36:20 +08:00
|
|
|
}
|
2010-10-30 06:20:43 +08:00
|
|
|
|
2014-03-07 18:24:44 +08:00
|
|
|
if (VecOp == PermittedRHS) {
|
|
|
|
// We've gone as far as we can: anything on the other side of the
|
|
|
|
// extractelement will already have been converted into a shuffle.
|
|
|
|
unsigned NumLHSElts =
|
2020-04-09 01:42:22 +08:00
|
|
|
cast<VectorType>(EI->getOperand(0)->getType())->getNumElements();
|
2014-03-07 18:24:44 +08:00
|
|
|
for (unsigned i = 0; i != NumElts; ++i)
|
2020-04-15 20:29:09 +08:00
|
|
|
Mask.push_back(i == InsertedIdx ? ExtractedIdx : NumLHSElts + i);
|
2014-03-07 18:24:44 +08:00
|
|
|
return std::make_pair(EI->getOperand(0), PermittedRHS);
|
2010-01-05 13:36:20 +08:00
|
|
|
}
|
2010-10-30 06:20:43 +08:00
|
|
|
|
2010-01-05 13:36:20 +08:00
|
|
|
// If this insertelement is a chain that comes from exactly these two
|
|
|
|
// vectors, return the vector and the effective shuffle.
|
2014-03-07 18:24:44 +08:00
|
|
|
if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&
|
2015-11-18 01:24:08 +08:00
|
|
|
collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
|
2014-03-07 18:24:44 +08:00
|
|
|
Mask))
|
|
|
|
return std::make_pair(EI->getOperand(0), PermittedRHS);
|
2010-01-05 13:36:20 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2010-10-30 06:20:43 +08:00
|
|
|
|
2015-11-30 06:09:34 +08:00
|
|
|
// Otherwise, we can't do anything fancy. Return an identity vector.
|
2010-01-05 13:36:20 +08:00
|
|
|
for (unsigned i = 0; i != NumElts; ++i)
|
2020-04-15 20:29:09 +08:00
|
|
|
Mask.push_back(i);
|
2014-03-07 18:24:44 +08:00
|
|
|
return std::make_pair(V, nullptr);
|
2010-01-05 13:36:20 +08:00
|
|
|
}
|
|
|
|
|
2014-05-07 22:30:18 +08:00
|
|
|
/// Try to find redundant insertvalue instructions, like the following ones:
|
|
|
|
/// %0 = insertvalue { i8, i32 } undef, i8 %x, 0
|
|
|
|
/// %1 = insertvalue { i8, i32 } %0, i8 %y, 0
|
|
|
|
/// Here the second instruction inserts values at the same indices, as the
|
|
|
|
/// first one, making the first one redundant.
|
|
|
|
/// It should be transformed to:
|
|
|
|
/// %0 = insertvalue { i8, i32 } undef, i8 %y, 0
|
|
|
|
Instruction *InstCombiner::visitInsertValueInst(InsertValueInst &I) {
|
|
|
|
bool IsRedundant = false;
|
|
|
|
ArrayRef<unsigned int> FirstIndices = I.getIndices();
|
|
|
|
|
|
|
|
// If there is a chain of insertvalue instructions (each of them except the
|
|
|
|
// last one has only one use and it's another insertvalue insn from this
|
|
|
|
// chain), check if any of the 'children' uses the same indices as the first
|
|
|
|
// instruction. In this case, the first one is redundant.
|
|
|
|
Value *V = &I;
|
2014-05-09 03:50:24 +08:00
|
|
|
unsigned Depth = 0;
|
2014-05-07 22:30:18 +08:00
|
|
|
while (V->hasOneUse() && Depth < 10) {
|
|
|
|
User *U = V->user_back();
|
2014-05-09 03:50:24 +08:00
|
|
|
auto UserInsInst = dyn_cast<InsertValueInst>(U);
|
|
|
|
if (!UserInsInst || U->getOperand(0) != V)
|
2014-05-07 22:30:18 +08:00
|
|
|
break;
|
|
|
|
if (UserInsInst->getIndices() == FirstIndices) {
|
|
|
|
IsRedundant = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
V = UserInsInst;
|
|
|
|
Depth++;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (IsRedundant)
|
2016-02-02 06:23:39 +08:00
|
|
|
return replaceInstUsesWith(I, I.getOperand(0));
|
2014-05-07 22:30:18 +08:00
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
2016-09-03 01:05:43 +08:00
|
|
|
static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf) {
|
[InstCombine][SVE] Fix visitInsertElementInst for scalable type.
Summary:
This patch fixes the following issues in visitInsertElementInst:
1. Bail out for scalable type when analysis requires fixed size number of vector elements.
2. Use cast<FixedVectorType> to get vector number of elements. This ensure assertion
on scalable vector type.
3. For scalable type, avoid folding a chain of insertelement into splat:
insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ...
->
shufflevector(insertelt(X, %k, 0), undef, zero)
The length of scalable vector is unknown at compile-time, therefore we don't know if
given insertelement sequence is valid for splat.
Reviewers: sdesmalen, efriedma, spatel, nikic
Reviewed By: sdesmalen, efriedma
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78895
2020-05-08 03:43:14 +08:00
|
|
|
// Can not analyze scalable type, the number of elements is not a compile-time
|
|
|
|
// constant.
|
|
|
|
if (isa<ScalableVectorType>(Shuf.getOperand(0)->getType()))
|
|
|
|
return false;
|
|
|
|
|
2020-04-01 04:08:59 +08:00
|
|
|
int MaskSize = Shuf.getShuffleMask().size();
|
2020-04-09 01:42:22 +08:00
|
|
|
int VecSize =
|
[InstCombine][SVE] Fix visitInsertElementInst for scalable type.
Summary:
This patch fixes the following issues in visitInsertElementInst:
1. Bail out for scalable type when analysis requires fixed size number of vector elements.
2. Use cast<FixedVectorType> to get vector number of elements. This ensure assertion
on scalable vector type.
3. For scalable type, avoid folding a chain of insertelement into splat:
insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ...
->
shufflevector(insertelt(X, %k, 0), undef, zero)
The length of scalable vector is unknown at compile-time, therefore we don't know if
given insertelement sequence is valid for splat.
Reviewers: sdesmalen, efriedma, spatel, nikic
Reviewed By: sdesmalen, efriedma
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78895
2020-05-08 03:43:14 +08:00
|
|
|
cast<FixedVectorType>(Shuf.getOperand(0)->getType())->getNumElements();
|
2016-09-03 01:05:43 +08:00
|
|
|
|
|
|
|
// A vector select does not change the size of the operands.
|
|
|
|
if (MaskSize != VecSize)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Each mask element must be undefined or choose a vector element from one of
|
|
|
|
// the source operands without crossing vector lanes.
|
|
|
|
for (int i = 0; i != MaskSize; ++i) {
|
|
|
|
int Elt = Shuf.getMaskValue(i);
|
|
|
|
if (Elt != -1 && Elt != i && Elt != i + VecSize)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-06-26 23:52:59 +08:00
|
|
|
/// Turn a chain of inserts that splats a value into an insert + shuffle:
|
|
|
|
/// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
|
|
|
|
/// shufflevector(insertelt(X, %k, 0), undef, zero)
|
|
|
|
static Instruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) {
|
|
|
|
// We are interested in the last insert in a chain. So if this insert has a
|
|
|
|
// single user and that user is an insert, bail.
|
2016-12-28 08:18:08 +08:00
|
|
|
if (InsElt.hasOneUse() && isa<InsertElementInst>(InsElt.user_back()))
|
|
|
|
return nullptr;
|
|
|
|
|
[InstCombine][SVE] Fix visitInsertElementInst for scalable type.
Summary:
This patch fixes the following issues in visitInsertElementInst:
1. Bail out for scalable type when analysis requires fixed size number of vector elements.
2. Use cast<FixedVectorType> to get vector number of elements. This ensure assertion
on scalable vector type.
3. For scalable type, avoid folding a chain of insertelement into splat:
insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ...
->
shufflevector(insertelt(X, %k, 0), undef, zero)
The length of scalable vector is unknown at compile-time, therefore we don't know if
given insertelement sequence is valid for splat.
Reviewers: sdesmalen, efriedma, spatel, nikic
Reviewed By: sdesmalen, efriedma
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78895
2020-05-08 03:43:14 +08:00
|
|
|
VectorType *VecTy = InsElt.getType();
|
|
|
|
// Can not handle scalable type, the number of elements is not a compile-time
|
|
|
|
// constant.
|
|
|
|
if (isa<ScalableVectorType>(VecTy))
|
|
|
|
return nullptr;
|
|
|
|
unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
|
2016-12-28 08:18:08 +08:00
|
|
|
|
|
|
|
// Do not try to do this for a one-element vector, since that's a nop,
|
|
|
|
// and will cause an inf-loop.
|
|
|
|
if (NumElements == 1)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
Value *SplatVal = InsElt.getOperand(1);
|
2018-07-31 03:41:25 +08:00
|
|
|
InsertElementInst *CurrIE = &InsElt;
|
2020-04-14 02:36:31 +08:00
|
|
|
SmallBitVector ElementPresent(NumElements, false);
|
2017-08-30 18:54:21 +08:00
|
|
|
InsertElementInst *FirstIE = nullptr;
|
2016-12-28 08:18:08 +08:00
|
|
|
|
|
|
|
// Walk the chain backwards, keeping track of which indices we inserted into,
|
|
|
|
// until we hit something that isn't an insert of the splatted value.
|
|
|
|
while (CurrIE) {
|
2017-11-28 02:19:32 +08:00
|
|
|
auto *Idx = dyn_cast<ConstantInt>(CurrIE->getOperand(2));
|
2016-12-28 08:18:08 +08:00
|
|
|
if (!Idx || CurrIE->getOperand(1) != SplatVal)
|
|
|
|
return nullptr;
|
|
|
|
|
2017-11-28 02:19:32 +08:00
|
|
|
auto *NextIE = dyn_cast<InsertElementInst>(CurrIE->getOperand(0));
|
2017-08-30 18:54:21 +08:00
|
|
|
// Check none of the intermediate steps have any additional uses, except
|
|
|
|
// for the root insertelement instruction, which can be re-used, if it
|
|
|
|
// inserts at position 0.
|
|
|
|
if (CurrIE != &InsElt &&
|
|
|
|
(!CurrIE->hasOneUse() && (NextIE != nullptr || !Idx->isZero())))
|
2016-12-28 08:18:08 +08:00
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
ElementPresent[Idx->getZExtValue()] = true;
|
2017-08-30 18:54:21 +08:00
|
|
|
FirstIE = CurrIE;
|
|
|
|
CurrIE = NextIE;
|
2016-12-28 08:18:08 +08:00
|
|
|
}
|
|
|
|
|
2019-07-05 00:45:34 +08:00
|
|
|
// If this is just a single insertelement (not a sequence), we are done.
|
|
|
|
if (FirstIE == &InsElt)
|
2016-12-28 08:18:08 +08:00
|
|
|
return nullptr;
|
|
|
|
|
2019-07-05 00:45:34 +08:00
|
|
|
// If we are not inserting into an undef vector, make sure we've seen an
|
|
|
|
// insert into every element.
|
|
|
|
// TODO: If the base vector is not undef, it might be better to create a splat
|
|
|
|
// and then a select-shuffle (blend) with the base vector.
|
|
|
|
if (!isa<UndefValue>(FirstIE->getOperand(0)))
|
2020-04-14 02:36:31 +08:00
|
|
|
if (!ElementPresent.all())
|
2019-07-05 00:45:34 +08:00
|
|
|
return nullptr;
|
|
|
|
|
2019-06-26 23:52:59 +08:00
|
|
|
// Create the insert + shuffle.
|
|
|
|
Type *Int32Ty = Type::getInt32Ty(InsElt.getContext());
|
|
|
|
UndefValue *UndefVec = UndefValue::get(VecTy);
|
|
|
|
Constant *Zero = ConstantInt::get(Int32Ty, 0);
|
|
|
|
if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
|
|
|
|
FirstIE = InsertElementInst::Create(UndefVec, SplatVal, Zero, "", &InsElt);
|
2016-12-28 08:18:08 +08:00
|
|
|
|
2019-07-05 00:45:34 +08:00
|
|
|
// Splat from element 0, but replace absent elements with undef in the mask.
|
2020-04-15 20:29:09 +08:00
|
|
|
SmallVector<int, 16> Mask(NumElements, 0);
|
2019-07-05 00:45:34 +08:00
|
|
|
for (unsigned i = 0; i != NumElements; ++i)
|
|
|
|
if (!ElementPresent[i])
|
2020-04-15 20:29:09 +08:00
|
|
|
Mask[i] = -1;
|
2019-07-05 00:45:34 +08:00
|
|
|
|
2020-04-15 20:29:09 +08:00
|
|
|
return new ShuffleVectorInst(FirstIE, UndefVec, Mask);
|
2016-12-28 08:18:08 +08:00
|
|
|
}
|
|
|
|
|
2019-07-09 03:48:52 +08:00
|
|
|
/// Try to fold an insert element into an existing splat shuffle by changing
|
|
|
|
/// the shuffle's mask to include the index of this insert element.
|
|
|
|
static Instruction *foldInsEltIntoSplat(InsertElementInst &InsElt) {
|
|
|
|
// Check if the vector operand of this insert is a canonical splat shuffle.
|
|
|
|
auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
|
|
|
|
if (!Shuf || !Shuf->isZeroEltSplat())
|
|
|
|
return nullptr;
|
|
|
|
|
[InstCombine][SVE] Fix visitInsertElementInst for scalable type.
Summary:
This patch fixes the following issues in visitInsertElementInst:
1. Bail out for scalable type when analysis requires fixed size number of vector elements.
2. Use cast<FixedVectorType> to get vector number of elements. This ensure assertion
on scalable vector type.
3. For scalable type, avoid folding a chain of insertelement into splat:
insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ...
->
shufflevector(insertelt(X, %k, 0), undef, zero)
The length of scalable vector is unknown at compile-time, therefore we don't know if
given insertelement sequence is valid for splat.
Reviewers: sdesmalen, efriedma, spatel, nikic
Reviewed By: sdesmalen, efriedma
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78895
2020-05-08 03:43:14 +08:00
|
|
|
// Bail out early if shuffle is scalable type. The number of elements in
|
|
|
|
// shuffle mask is unknown at compile-time.
|
|
|
|
if (isa<ScalableVectorType>(Shuf->getType()))
|
|
|
|
return nullptr;
|
|
|
|
|
2019-07-09 03:48:52 +08:00
|
|
|
// Check for a constant insertion index.
|
|
|
|
uint64_t IdxC;
|
|
|
|
if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// Check if the splat shuffle's input is the same as this insert's scalar op.
|
|
|
|
Value *X = InsElt.getOperand(1);
|
|
|
|
Value *Op0 = Shuf->getOperand(0);
|
2020-05-23 22:13:50 +08:00
|
|
|
if (!match(Op0, m_InsertElt(m_Undef(), m_Specific(X), m_ZeroInt())))
|
2019-07-09 03:48:52 +08:00
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// Replace the shuffle mask element at the index of this insert with a zero.
|
|
|
|
// For example:
|
|
|
|
// inselt (shuf (inselt undef, X, 0), undef, <0,undef,0,undef>), X, 1
|
|
|
|
// --> shuf (inselt undef, X, 0), undef, <0,0,0,undef>
|
2020-04-09 01:42:22 +08:00
|
|
|
unsigned NumMaskElts = Shuf->getType()->getNumElements();
|
2020-04-01 04:08:59 +08:00
|
|
|
SmallVector<int, 16> NewMask(NumMaskElts);
|
2019-07-09 03:48:52 +08:00
|
|
|
for (unsigned i = 0; i != NumMaskElts; ++i)
|
2020-04-01 04:08:59 +08:00
|
|
|
NewMask[i] = i == IdxC ? 0 : Shuf->getMaskValue(i);
|
2019-07-09 03:48:52 +08:00
|
|
|
|
|
|
|
return new ShuffleVectorInst(Op0, UndefValue::get(Op0->getType()), NewMask);
|
|
|
|
}
|
|
|
|
|
2019-09-09 03:03:01 +08:00
|
|
|
/// Try to fold an extract+insert element into an existing identity shuffle by
|
|
|
|
/// changing the shuffle's mask to include the index of this insert element.
|
|
|
|
static Instruction *foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt) {
|
|
|
|
// Check if the vector operand of this insert is an identity shuffle.
|
|
|
|
auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
|
|
|
|
if (!Shuf || !isa<UndefValue>(Shuf->getOperand(1)) ||
|
|
|
|
!(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding()))
|
|
|
|
return nullptr;
|
|
|
|
|
[InstCombine][SVE] Fix visitInsertElementInst for scalable type.
Summary:
This patch fixes the following issues in visitInsertElementInst:
1. Bail out for scalable type when analysis requires fixed size number of vector elements.
2. Use cast<FixedVectorType> to get vector number of elements. This ensure assertion
on scalable vector type.
3. For scalable type, avoid folding a chain of insertelement into splat:
insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ...
->
shufflevector(insertelt(X, %k, 0), undef, zero)
The length of scalable vector is unknown at compile-time, therefore we don't know if
given insertelement sequence is valid for splat.
Reviewers: sdesmalen, efriedma, spatel, nikic
Reviewed By: sdesmalen, efriedma
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78895
2020-05-08 03:43:14 +08:00
|
|
|
// Bail out early if shuffle is scalable type. The number of elements in
|
|
|
|
// shuffle mask is unknown at compile-time.
|
|
|
|
if (isa<ScalableVectorType>(Shuf->getType()))
|
|
|
|
return nullptr;
|
|
|
|
|
2019-09-09 03:03:01 +08:00
|
|
|
// Check for a constant insertion index.
|
|
|
|
uint64_t IdxC;
|
|
|
|
if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// Check if this insert's scalar op is extracted from the identity shuffle's
|
|
|
|
// input vector.
|
|
|
|
Value *Scalar = InsElt.getOperand(1);
|
|
|
|
Value *X = Shuf->getOperand(0);
|
2020-05-23 22:13:50 +08:00
|
|
|
if (!match(Scalar, m_ExtractElt(m_Specific(X), m_SpecificInt(IdxC))))
|
2019-09-09 03:03:01 +08:00
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// Replace the shuffle mask element at the index of this extract+insert with
|
|
|
|
// that same index value.
|
|
|
|
// For example:
|
|
|
|
// inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask'
|
2020-04-09 01:42:22 +08:00
|
|
|
unsigned NumMaskElts = Shuf->getType()->getNumElements();
|
2020-04-01 04:08:59 +08:00
|
|
|
SmallVector<int, 16> NewMask(NumMaskElts);
|
|
|
|
ArrayRef<int> OldMask = Shuf->getShuffleMask();
|
2019-09-09 03:03:01 +08:00
|
|
|
for (unsigned i = 0; i != NumMaskElts; ++i) {
|
|
|
|
if (i != IdxC) {
|
|
|
|
// All mask elements besides the inserted element remain the same.
|
2020-04-01 04:08:59 +08:00
|
|
|
NewMask[i] = OldMask[i];
|
|
|
|
} else if (OldMask[i] == (int)IdxC) {
|
2019-09-09 03:03:01 +08:00
|
|
|
// If the mask element was already set, there's nothing to do
|
|
|
|
// (demanded elements analysis may unset it later).
|
|
|
|
return nullptr;
|
|
|
|
} else {
|
2020-04-01 04:08:59 +08:00
|
|
|
assert(OldMask[i] == UndefMaskElem &&
|
2019-09-09 03:03:01 +08:00
|
|
|
"Unexpected shuffle mask element for identity shuffle");
|
2020-04-01 04:08:59 +08:00
|
|
|
NewMask[i] = IdxC;
|
2019-09-09 03:03:01 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return new ShuffleVectorInst(X, Shuf->getOperand(1), NewMask);
|
|
|
|
}
|
|
|
|
|
[InstCombine] canonicalize insertelement of scalar constant ahead of insertelement of variable
insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->
insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1
As noted in the code comment and seen in the test changes, the motivation is that by pulling
constant insertion up, we may be able to constant fold some insertelement instructions.
Differential Revision: https://reviews.llvm.org/D31196
llvm-svn: 298520
2017-03-23 01:10:44 +08:00
|
|
|
/// If we have an insertelement instruction feeding into another insertelement
|
|
|
|
/// and the 2nd is inserting a constant into the vector, canonicalize that
|
|
|
|
/// constant insertion before the insertion of a variable:
|
|
|
|
///
|
|
|
|
/// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->
|
|
|
|
/// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1
|
|
|
|
///
|
|
|
|
/// This has the potential of eliminating the 2nd insertelement instruction
|
|
|
|
/// via constant folding of the scalar constant into a vector constant.
|
|
|
|
static Instruction *hoistInsEltConst(InsertElementInst &InsElt2,
|
|
|
|
InstCombiner::BuilderTy &Builder) {
|
|
|
|
auto *InsElt1 = dyn_cast<InsertElementInst>(InsElt2.getOperand(0));
|
|
|
|
if (!InsElt1 || !InsElt1->hasOneUse())
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
Value *X, *Y;
|
|
|
|
Constant *ScalarC;
|
|
|
|
ConstantInt *IdxC1, *IdxC2;
|
|
|
|
if (match(InsElt1->getOperand(0), m_Value(X)) &&
|
|
|
|
match(InsElt1->getOperand(1), m_Value(Y)) && !isa<Constant>(Y) &&
|
|
|
|
match(InsElt1->getOperand(2), m_ConstantInt(IdxC1)) &&
|
|
|
|
match(InsElt2.getOperand(1), m_Constant(ScalarC)) &&
|
|
|
|
match(InsElt2.getOperand(2), m_ConstantInt(IdxC2)) && IdxC1 != IdxC2) {
|
|
|
|
Value *NewInsElt1 = Builder.CreateInsertElement(X, ScalarC, IdxC2);
|
|
|
|
return InsertElementInst::Create(NewInsElt1, Y, IdxC1);
|
|
|
|
}
|
|
|
|
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
[InstCombine] Fix for PR29124: reduce insertelements to shufflevector
If inserting more than one constant into a vector:
define <4 x float> @foo(<4 x float> %x) {
%ins1 = insertelement <4 x float> %x, float 1.0, i32 1
%ins2 = insertelement <4 x float> %ins1, float 2.0, i32 2
ret <4 x float> %ins2
}
InstCombine could reduce that to a shufflevector:
define <4 x float> @goo(<4 x float> %x) {
%shuf = shufflevector <4 x float> %x, <4 x float> <float undef, float 1.0, float 2.0, float undef>, <4 x i32><i32 0, i32 5, i32 6, i32 3>
ret <4 x float> %shuf
}
Also, InstCombine tries to convert shuffle instruction to single insertelement, if one of the vectors is a constant vector and only a single element from this constant should be used in shuffle, i.e.
shufflevector <4 x float> %v, <4 x float> <float undef, float 1.0, float
undef, float undef>, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef> ->
insertelement <4 x float> %v, float 1.0, 1
Differential Revision: https://reviews.llvm.org/D24182
llvm-svn: 282237
2016-09-23 17:14:08 +08:00
|
|
|
/// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex
|
|
|
|
/// --> shufflevector X, CVec', Mask'
|
2016-09-03 01:05:43 +08:00
|
|
|
static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) {
|
[InstCombine] Fix for PR29124: reduce insertelements to shufflevector
If inserting more than one constant into a vector:
define <4 x float> @foo(<4 x float> %x) {
%ins1 = insertelement <4 x float> %x, float 1.0, i32 1
%ins2 = insertelement <4 x float> %ins1, float 2.0, i32 2
ret <4 x float> %ins2
}
InstCombine could reduce that to a shufflevector:
define <4 x float> @goo(<4 x float> %x) {
%shuf = shufflevector <4 x float> %x, <4 x float> <float undef, float 1.0, float 2.0, float undef>, <4 x i32><i32 0, i32 5, i32 6, i32 3>
ret <4 x float> %shuf
}
Also, InstCombine tries to convert shuffle instruction to single insertelement, if one of the vectors is a constant vector and only a single element from this constant should be used in shuffle, i.e.
shufflevector <4 x float> %v, <4 x float> <float undef, float 1.0, float
undef, float undef>, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef> ->
insertelement <4 x float> %v, float 1.0, 1
Differential Revision: https://reviews.llvm.org/D24182
llvm-svn: 282237
2016-09-23 17:14:08 +08:00
|
|
|
auto *Inst = dyn_cast<Instruction>(InsElt.getOperand(0));
|
|
|
|
// Bail out if the parent has more than one use. In that case, we'd be
|
2016-09-03 01:05:43 +08:00
|
|
|
// replacing the insertelt with a shuffle, and that's not a clear win.
|
[InstCombine] Fix for PR29124: reduce insertelements to shufflevector
If inserting more than one constant into a vector:
define <4 x float> @foo(<4 x float> %x) {
%ins1 = insertelement <4 x float> %x, float 1.0, i32 1
%ins2 = insertelement <4 x float> %ins1, float 2.0, i32 2
ret <4 x float> %ins2
}
InstCombine could reduce that to a shufflevector:
define <4 x float> @goo(<4 x float> %x) {
%shuf = shufflevector <4 x float> %x, <4 x float> <float undef, float 1.0, float 2.0, float undef>, <4 x i32><i32 0, i32 5, i32 6, i32 3>
ret <4 x float> %shuf
}
Also, InstCombine tries to convert shuffle instruction to single insertelement, if one of the vectors is a constant vector and only a single element from this constant should be used in shuffle, i.e.
shufflevector <4 x float> %v, <4 x float> <float undef, float 1.0, float
undef, float undef>, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef> ->
insertelement <4 x float> %v, float 1.0, 1
Differential Revision: https://reviews.llvm.org/D24182
llvm-svn: 282237
2016-09-23 17:14:08 +08:00
|
|
|
if (!Inst || !Inst->hasOneUse())
|
2016-09-03 01:05:43 +08:00
|
|
|
return nullptr;
|
[InstCombine] Fix for PR29124: reduce insertelements to shufflevector
If inserting more than one constant into a vector:
define <4 x float> @foo(<4 x float> %x) {
%ins1 = insertelement <4 x float> %x, float 1.0, i32 1
%ins2 = insertelement <4 x float> %ins1, float 2.0, i32 2
ret <4 x float> %ins2
}
InstCombine could reduce that to a shufflevector:
define <4 x float> @goo(<4 x float> %x) {
%shuf = shufflevector <4 x float> %x, <4 x float> <float undef, float 1.0, float 2.0, float undef>, <4 x i32><i32 0, i32 5, i32 6, i32 3>
ret <4 x float> %shuf
}
Also, InstCombine tries to convert shuffle instruction to single insertelement, if one of the vectors is a constant vector and only a single element from this constant should be used in shuffle, i.e.
shufflevector <4 x float> %v, <4 x float> <float undef, float 1.0, float
undef, float undef>, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef> ->
insertelement <4 x float> %v, float 1.0, 1
Differential Revision: https://reviews.llvm.org/D24182
llvm-svn: 282237
2016-09-23 17:14:08 +08:00
|
|
|
if (auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0))) {
|
|
|
|
// The shuffle must have a constant vector operand. The insertelt must have
|
|
|
|
// a constant scalar being inserted at a constant position in the vector.
|
|
|
|
Constant *ShufConstVec, *InsEltScalar;
|
|
|
|
uint64_t InsEltIndex;
|
|
|
|
if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) ||
|
|
|
|
!match(InsElt.getOperand(1), m_Constant(InsEltScalar)) ||
|
|
|
|
!match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex)))
|
|
|
|
return nullptr;
|
2016-09-03 01:05:43 +08:00
|
|
|
|
[InstCombine] Fix for PR29124: reduce insertelements to shufflevector
If inserting more than one constant into a vector:
define <4 x float> @foo(<4 x float> %x) {
%ins1 = insertelement <4 x float> %x, float 1.0, i32 1
%ins2 = insertelement <4 x float> %ins1, float 2.0, i32 2
ret <4 x float> %ins2
}
InstCombine could reduce that to a shufflevector:
define <4 x float> @goo(<4 x float> %x) {
%shuf = shufflevector <4 x float> %x, <4 x float> <float undef, float 1.0, float 2.0, float undef>, <4 x i32><i32 0, i32 5, i32 6, i32 3>
ret <4 x float> %shuf
}
Also, InstCombine tries to convert shuffle instruction to single insertelement, if one of the vectors is a constant vector and only a single element from this constant should be used in shuffle, i.e.
shufflevector <4 x float> %v, <4 x float> <float undef, float 1.0, float
undef, float undef>, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef> ->
insertelement <4 x float> %v, float 1.0, 1
Differential Revision: https://reviews.llvm.org/D24182
llvm-svn: 282237
2016-09-23 17:14:08 +08:00
|
|
|
// Adding an element to an arbitrary shuffle could be expensive, but a
|
|
|
|
// shuffle that selects elements from vectors without crossing lanes is
|
|
|
|
// assumed cheap.
|
|
|
|
// If we're just adding a constant into that shuffle, it will still be
|
|
|
|
// cheap.
|
|
|
|
if (!isShuffleEquivalentToSelect(*Shuf))
|
|
|
|
return nullptr;
|
2016-09-03 01:05:43 +08:00
|
|
|
|
[InstCombine] Fix for PR29124: reduce insertelements to shufflevector
If inserting more than one constant into a vector:
define <4 x float> @foo(<4 x float> %x) {
%ins1 = insertelement <4 x float> %x, float 1.0, i32 1
%ins2 = insertelement <4 x float> %ins1, float 2.0, i32 2
ret <4 x float> %ins2
}
InstCombine could reduce that to a shufflevector:
define <4 x float> @goo(<4 x float> %x) {
%shuf = shufflevector <4 x float> %x, <4 x float> <float undef, float 1.0, float 2.0, float undef>, <4 x i32><i32 0, i32 5, i32 6, i32 3>
ret <4 x float> %shuf
}
Also, InstCombine tries to convert shuffle instruction to single insertelement, if one of the vectors is a constant vector and only a single element from this constant should be used in shuffle, i.e.
shufflevector <4 x float> %v, <4 x float> <float undef, float 1.0, float
undef, float undef>, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef> ->
insertelement <4 x float> %v, float 1.0, 1
Differential Revision: https://reviews.llvm.org/D24182
llvm-svn: 282237
2016-09-23 17:14:08 +08:00
|
|
|
// From the above 'select' check, we know that the mask has the same number
|
|
|
|
// of elements as the vector input operands. We also know that each constant
|
|
|
|
// input element is used in its lane and can not be used more than once by
|
|
|
|
// the shuffle. Therefore, replace the constant in the shuffle's constant
|
|
|
|
// vector with the insertelt constant. Replace the constant in the shuffle's
|
|
|
|
// mask vector with the insertelt index plus the length of the vector
|
|
|
|
// (because the constant vector operand of a shuffle is always the 2nd
|
|
|
|
// operand).
|
2020-04-01 04:08:59 +08:00
|
|
|
ArrayRef<int> Mask = Shuf->getShuffleMask();
|
|
|
|
unsigned NumElts = Mask.size();
|
[InstCombine] Fix for PR29124: reduce insertelements to shufflevector
If inserting more than one constant into a vector:
define <4 x float> @foo(<4 x float> %x) {
%ins1 = insertelement <4 x float> %x, float 1.0, i32 1
%ins2 = insertelement <4 x float> %ins1, float 2.0, i32 2
ret <4 x float> %ins2
}
InstCombine could reduce that to a shufflevector:
define <4 x float> @goo(<4 x float> %x) {
%shuf = shufflevector <4 x float> %x, <4 x float> <float undef, float 1.0, float 2.0, float undef>, <4 x i32><i32 0, i32 5, i32 6, i32 3>
ret <4 x float> %shuf
}
Also, InstCombine tries to convert shuffle instruction to single insertelement, if one of the vectors is a constant vector and only a single element from this constant should be used in shuffle, i.e.
shufflevector <4 x float> %v, <4 x float> <float undef, float 1.0, float
undef, float undef>, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef> ->
insertelement <4 x float> %v, float 1.0, 1
Differential Revision: https://reviews.llvm.org/D24182
llvm-svn: 282237
2016-09-23 17:14:08 +08:00
|
|
|
SmallVector<Constant *, 16> NewShufElts(NumElts);
|
2020-04-01 04:08:59 +08:00
|
|
|
SmallVector<int, 16> NewMaskElts(NumElts);
|
[InstCombine] Fix for PR29124: reduce insertelements to shufflevector
If inserting more than one constant into a vector:
define <4 x float> @foo(<4 x float> %x) {
%ins1 = insertelement <4 x float> %x, float 1.0, i32 1
%ins2 = insertelement <4 x float> %ins1, float 2.0, i32 2
ret <4 x float> %ins2
}
InstCombine could reduce that to a shufflevector:
define <4 x float> @goo(<4 x float> %x) {
%shuf = shufflevector <4 x float> %x, <4 x float> <float undef, float 1.0, float 2.0, float undef>, <4 x i32><i32 0, i32 5, i32 6, i32 3>
ret <4 x float> %shuf
}
Also, InstCombine tries to convert shuffle instruction to single insertelement, if one of the vectors is a constant vector and only a single element from this constant should be used in shuffle, i.e.
shufflevector <4 x float> %v, <4 x float> <float undef, float 1.0, float
undef, float undef>, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef> ->
insertelement <4 x float> %v, float 1.0, 1
Differential Revision: https://reviews.llvm.org/D24182
llvm-svn: 282237
2016-09-23 17:14:08 +08:00
|
|
|
for (unsigned I = 0; I != NumElts; ++I) {
|
|
|
|
if (I == InsEltIndex) {
|
|
|
|
NewShufElts[I] = InsEltScalar;
|
2020-04-01 04:08:59 +08:00
|
|
|
NewMaskElts[I] = InsEltIndex + NumElts;
|
[InstCombine] Fix for PR29124: reduce insertelements to shufflevector
If inserting more than one constant into a vector:
define <4 x float> @foo(<4 x float> %x) {
%ins1 = insertelement <4 x float> %x, float 1.0, i32 1
%ins2 = insertelement <4 x float> %ins1, float 2.0, i32 2
ret <4 x float> %ins2
}
InstCombine could reduce that to a shufflevector:
define <4 x float> @goo(<4 x float> %x) {
%shuf = shufflevector <4 x float> %x, <4 x float> <float undef, float 1.0, float 2.0, float undef>, <4 x i32><i32 0, i32 5, i32 6, i32 3>
ret <4 x float> %shuf
}
Also, InstCombine tries to convert shuffle instruction to single insertelement, if one of the vectors is a constant vector and only a single element from this constant should be used in shuffle, i.e.
shufflevector <4 x float> %v, <4 x float> <float undef, float 1.0, float
undef, float undef>, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef> ->
insertelement <4 x float> %v, float 1.0, 1
Differential Revision: https://reviews.llvm.org/D24182
llvm-svn: 282237
2016-09-23 17:14:08 +08:00
|
|
|
} else {
|
|
|
|
// Copy over the existing values.
|
|
|
|
NewShufElts[I] = ShufConstVec->getAggregateElement(I);
|
2020-04-01 04:08:59 +08:00
|
|
|
NewMaskElts[I] = Mask[I];
|
[InstCombine] Fix for PR29124: reduce insertelements to shufflevector
If inserting more than one constant into a vector:
define <4 x float> @foo(<4 x float> %x) {
%ins1 = insertelement <4 x float> %x, float 1.0, i32 1
%ins2 = insertelement <4 x float> %ins1, float 2.0, i32 2
ret <4 x float> %ins2
}
InstCombine could reduce that to a shufflevector:
define <4 x float> @goo(<4 x float> %x) {
%shuf = shufflevector <4 x float> %x, <4 x float> <float undef, float 1.0, float 2.0, float undef>, <4 x i32><i32 0, i32 5, i32 6, i32 3>
ret <4 x float> %shuf
}
Also, InstCombine tries to convert shuffle instruction to single insertelement, if one of the vectors is a constant vector and only a single element from this constant should be used in shuffle, i.e.
shufflevector <4 x float> %v, <4 x float> <float undef, float 1.0, float
undef, float undef>, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef> ->
insertelement <4 x float> %v, float 1.0, 1
Differential Revision: https://reviews.llvm.org/D24182
llvm-svn: 282237
2016-09-23 17:14:08 +08:00
|
|
|
}
|
|
|
|
}
|
2016-09-03 01:05:43 +08:00
|
|
|
|
[InstCombine] Fix for PR29124: reduce insertelements to shufflevector
If inserting more than one constant into a vector:
define <4 x float> @foo(<4 x float> %x) {
%ins1 = insertelement <4 x float> %x, float 1.0, i32 1
%ins2 = insertelement <4 x float> %ins1, float 2.0, i32 2
ret <4 x float> %ins2
}
InstCombine could reduce that to a shufflevector:
define <4 x float> @goo(<4 x float> %x) {
%shuf = shufflevector <4 x float> %x, <4 x float> <float undef, float 1.0, float 2.0, float undef>, <4 x i32><i32 0, i32 5, i32 6, i32 3>
ret <4 x float> %shuf
}
Also, InstCombine tries to convert shuffle instruction to single insertelement, if one of the vectors is a constant vector and only a single element from this constant should be used in shuffle, i.e.
shufflevector <4 x float> %v, <4 x float> <float undef, float 1.0, float
undef, float undef>, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef> ->
insertelement <4 x float> %v, float 1.0, 1
Differential Revision: https://reviews.llvm.org/D24182
llvm-svn: 282237
2016-09-23 17:14:08 +08:00
|
|
|
// Create new operands for a shuffle that includes the constant of the
|
|
|
|
// original insertelt. The old shuffle will be dead now.
|
|
|
|
return new ShuffleVectorInst(Shuf->getOperand(0),
|
2020-04-01 04:08:59 +08:00
|
|
|
ConstantVector::get(NewShufElts), NewMaskElts);
|
[InstCombine] Fix for PR29124: reduce insertelements to shufflevector
If inserting more than one constant into a vector:
define <4 x float> @foo(<4 x float> %x) {
%ins1 = insertelement <4 x float> %x, float 1.0, i32 1
%ins2 = insertelement <4 x float> %ins1, float 2.0, i32 2
ret <4 x float> %ins2
}
InstCombine could reduce that to a shufflevector:
define <4 x float> @goo(<4 x float> %x) {
%shuf = shufflevector <4 x float> %x, <4 x float> <float undef, float 1.0, float 2.0, float undef>, <4 x i32><i32 0, i32 5, i32 6, i32 3>
ret <4 x float> %shuf
}
Also, InstCombine tries to convert shuffle instruction to single insertelement, if one of the vectors is a constant vector and only a single element from this constant should be used in shuffle, i.e.
shufflevector <4 x float> %v, <4 x float> <float undef, float 1.0, float
undef, float undef>, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef> ->
insertelement <4 x float> %v, float 1.0, 1
Differential Revision: https://reviews.llvm.org/D24182
llvm-svn: 282237
2016-09-23 17:14:08 +08:00
|
|
|
} else if (auto *IEI = dyn_cast<InsertElementInst>(Inst)) {
|
|
|
|
// Transform sequences of insertelements ops with constant data/indexes into
|
|
|
|
// a single shuffle op.
|
[InstCombine][SVE] Fix visitInsertElementInst for scalable type.
Summary:
This patch fixes the following issues in visitInsertElementInst:
1. Bail out for scalable type when analysis requires fixed size number of vector elements.
2. Use cast<FixedVectorType> to get vector number of elements. This ensure assertion
on scalable vector type.
3. For scalable type, avoid folding a chain of insertelement into splat:
insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ...
->
shufflevector(insertelt(X, %k, 0), undef, zero)
The length of scalable vector is unknown at compile-time, therefore we don't know if
given insertelement sequence is valid for splat.
Reviewers: sdesmalen, efriedma, spatel, nikic
Reviewed By: sdesmalen, efriedma
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78895
2020-05-08 03:43:14 +08:00
|
|
|
// Can not handle scalable type, the number of elements needed to create
|
|
|
|
// shuffle mask is not a compile-time constant.
|
|
|
|
if (isa<ScalableVectorType>(InsElt.getType()))
|
|
|
|
return nullptr;
|
|
|
|
unsigned NumElts =
|
|
|
|
cast<FixedVectorType>(InsElt.getType())->getNumElements();
|
[InstCombine] Fix for PR29124: reduce insertelements to shufflevector
If inserting more than one constant into a vector:
define <4 x float> @foo(<4 x float> %x) {
%ins1 = insertelement <4 x float> %x, float 1.0, i32 1
%ins2 = insertelement <4 x float> %ins1, float 2.0, i32 2
ret <4 x float> %ins2
}
InstCombine could reduce that to a shufflevector:
define <4 x float> @goo(<4 x float> %x) {
%shuf = shufflevector <4 x float> %x, <4 x float> <float undef, float 1.0, float 2.0, float undef>, <4 x i32><i32 0, i32 5, i32 6, i32 3>
ret <4 x float> %shuf
}
Also, InstCombine tries to convert shuffle instruction to single insertelement, if one of the vectors is a constant vector and only a single element from this constant should be used in shuffle, i.e.
shufflevector <4 x float> %v, <4 x float> <float undef, float 1.0, float
undef, float undef>, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef> ->
insertelement <4 x float> %v, float 1.0, 1
Differential Revision: https://reviews.llvm.org/D24182
llvm-svn: 282237
2016-09-23 17:14:08 +08:00
|
|
|
|
|
|
|
uint64_t InsertIdx[2];
|
|
|
|
Constant *Val[2];
|
|
|
|
if (!match(InsElt.getOperand(2), m_ConstantInt(InsertIdx[0])) ||
|
|
|
|
!match(InsElt.getOperand(1), m_Constant(Val[0])) ||
|
|
|
|
!match(IEI->getOperand(2), m_ConstantInt(InsertIdx[1])) ||
|
|
|
|
!match(IEI->getOperand(1), m_Constant(Val[1])))
|
|
|
|
return nullptr;
|
|
|
|
SmallVector<Constant *, 16> Values(NumElts);
|
2020-04-15 20:29:09 +08:00
|
|
|
SmallVector<int, 16> Mask(NumElts);
|
[InstCombine] Fix for PR29124: reduce insertelements to shufflevector
If inserting more than one constant into a vector:
define <4 x float> @foo(<4 x float> %x) {
%ins1 = insertelement <4 x float> %x, float 1.0, i32 1
%ins2 = insertelement <4 x float> %ins1, float 2.0, i32 2
ret <4 x float> %ins2
}
InstCombine could reduce that to a shufflevector:
define <4 x float> @goo(<4 x float> %x) {
%shuf = shufflevector <4 x float> %x, <4 x float> <float undef, float 1.0, float 2.0, float undef>, <4 x i32><i32 0, i32 5, i32 6, i32 3>
ret <4 x float> %shuf
}
Also, InstCombine tries to convert shuffle instruction to single insertelement, if one of the vectors is a constant vector and only a single element from this constant should be used in shuffle, i.e.
shufflevector <4 x float> %v, <4 x float> <float undef, float 1.0, float
undef, float undef>, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef> ->
insertelement <4 x float> %v, float 1.0, 1
Differential Revision: https://reviews.llvm.org/D24182
llvm-svn: 282237
2016-09-23 17:14:08 +08:00
|
|
|
auto ValI = std::begin(Val);
|
|
|
|
// Generate new constant vector and mask.
|
|
|
|
// We have 2 values/masks from the insertelements instructions. Insert them
|
|
|
|
// into new value/mask vectors.
|
|
|
|
for (uint64_t I : InsertIdx) {
|
|
|
|
if (!Values[I]) {
|
|
|
|
Values[I] = *ValI;
|
2020-04-15 20:29:09 +08:00
|
|
|
Mask[I] = NumElts + I;
|
[InstCombine] Fix for PR29124: reduce insertelements to shufflevector
If inserting more than one constant into a vector:
define <4 x float> @foo(<4 x float> %x) {
%ins1 = insertelement <4 x float> %x, float 1.0, i32 1
%ins2 = insertelement <4 x float> %ins1, float 2.0, i32 2
ret <4 x float> %ins2
}
InstCombine could reduce that to a shufflevector:
define <4 x float> @goo(<4 x float> %x) {
%shuf = shufflevector <4 x float> %x, <4 x float> <float undef, float 1.0, float 2.0, float undef>, <4 x i32><i32 0, i32 5, i32 6, i32 3>
ret <4 x float> %shuf
}
Also, InstCombine tries to convert shuffle instruction to single insertelement, if one of the vectors is a constant vector and only a single element from this constant should be used in shuffle, i.e.
shufflevector <4 x float> %v, <4 x float> <float undef, float 1.0, float
undef, float undef>, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef> ->
insertelement <4 x float> %v, float 1.0, 1
Differential Revision: https://reviews.llvm.org/D24182
llvm-svn: 282237
2016-09-23 17:14:08 +08:00
|
|
|
}
|
|
|
|
++ValI;
|
2016-09-03 01:05:43 +08:00
|
|
|
}
|
[InstCombine] Fix for PR29124: reduce insertelements to shufflevector
If inserting more than one constant into a vector:
define <4 x float> @foo(<4 x float> %x) {
%ins1 = insertelement <4 x float> %x, float 1.0, i32 1
%ins2 = insertelement <4 x float> %ins1, float 2.0, i32 2
ret <4 x float> %ins2
}
InstCombine could reduce that to a shufflevector:
define <4 x float> @goo(<4 x float> %x) {
%shuf = shufflevector <4 x float> %x, <4 x float> <float undef, float 1.0, float 2.0, float undef>, <4 x i32><i32 0, i32 5, i32 6, i32 3>
ret <4 x float> %shuf
}
Also, InstCombine tries to convert shuffle instruction to single insertelement, if one of the vectors is a constant vector and only a single element from this constant should be used in shuffle, i.e.
shufflevector <4 x float> %v, <4 x float> <float undef, float 1.0, float
undef, float undef>, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef> ->
insertelement <4 x float> %v, float 1.0, 1
Differential Revision: https://reviews.llvm.org/D24182
llvm-svn: 282237
2016-09-23 17:14:08 +08:00
|
|
|
// Remaining values are filled with 'undef' values.
|
|
|
|
for (unsigned I = 0; I < NumElts; ++I) {
|
|
|
|
if (!Values[I]) {
|
|
|
|
Values[I] = UndefValue::get(InsElt.getType()->getElementType());
|
2020-04-15 20:29:09 +08:00
|
|
|
Mask[I] = I;
|
[InstCombine] Fix for PR29124: reduce insertelements to shufflevector
If inserting more than one constant into a vector:
define <4 x float> @foo(<4 x float> %x) {
%ins1 = insertelement <4 x float> %x, float 1.0, i32 1
%ins2 = insertelement <4 x float> %ins1, float 2.0, i32 2
ret <4 x float> %ins2
}
InstCombine could reduce that to a shufflevector:
define <4 x float> @goo(<4 x float> %x) {
%shuf = shufflevector <4 x float> %x, <4 x float> <float undef, float 1.0, float 2.0, float undef>, <4 x i32><i32 0, i32 5, i32 6, i32 3>
ret <4 x float> %shuf
}
Also, InstCombine tries to convert shuffle instruction to single insertelement, if one of the vectors is a constant vector and only a single element from this constant should be used in shuffle, i.e.
shufflevector <4 x float> %v, <4 x float> <float undef, float 1.0, float
undef, float undef>, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef> ->
insertelement <4 x float> %v, float 1.0, 1
Differential Revision: https://reviews.llvm.org/D24182
llvm-svn: 282237
2016-09-23 17:14:08 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
// Create new operands for a shuffle that includes the constant of the
|
|
|
|
// original insertelt.
|
|
|
|
return new ShuffleVectorInst(IEI->getOperand(0),
|
2020-04-15 20:29:09 +08:00
|
|
|
ConstantVector::get(Values), Mask);
|
2016-09-03 01:05:43 +08:00
|
|
|
}
|
[InstCombine] Fix for PR29124: reduce insertelements to shufflevector
If inserting more than one constant into a vector:
define <4 x float> @foo(<4 x float> %x) {
%ins1 = insertelement <4 x float> %x, float 1.0, i32 1
%ins2 = insertelement <4 x float> %ins1, float 2.0, i32 2
ret <4 x float> %ins2
}
InstCombine could reduce that to a shufflevector:
define <4 x float> @goo(<4 x float> %x) {
%shuf = shufflevector <4 x float> %x, <4 x float> <float undef, float 1.0, float 2.0, float undef>, <4 x i32><i32 0, i32 5, i32 6, i32 3>
ret <4 x float> %shuf
}
Also, InstCombine tries to convert shuffle instruction to single insertelement, if one of the vectors is a constant vector and only a single element from this constant should be used in shuffle, i.e.
shufflevector <4 x float> %v, <4 x float> <float undef, float 1.0, float
undef, float undef>, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef> ->
insertelement <4 x float> %v, float 1.0, 1
Differential Revision: https://reviews.llvm.org/D24182
llvm-svn: 282237
2016-09-23 17:14:08 +08:00
|
|
|
return nullptr;
|
2016-09-03 01:05:43 +08:00
|
|
|
}
|
|
|
|
|
2010-01-05 13:36:20 +08:00
|
|
|
Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
|
|
|
|
Value *VecOp = IE.getOperand(0);
|
|
|
|
Value *ScalarOp = IE.getOperand(1);
|
|
|
|
Value *IdxOp = IE.getOperand(2);
|
2010-10-30 06:20:43 +08:00
|
|
|
|
2017-12-13 19:21:18 +08:00
|
|
|
if (auto *V = SimplifyInsertElementInst(
|
|
|
|
VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE)))
|
|
|
|
return replaceInstUsesWith(IE, V);
|
|
|
|
|
2020-05-10 23:37:47 +08:00
|
|
|
// If the scalar is bitcast and inserted into undef, do the insert in the
|
|
|
|
// source type followed by bitcast.
|
|
|
|
// TODO: Generalize for insert into any constant, not just undef?
|
|
|
|
Value *ScalarSrc;
|
|
|
|
if (match(VecOp, m_Undef()) &&
|
|
|
|
match(ScalarOp, m_OneUse(m_BitCast(m_Value(ScalarSrc)))) &&
|
|
|
|
(ScalarSrc->getType()->isIntegerTy() ||
|
|
|
|
ScalarSrc->getType()->isFloatingPointTy())) {
|
|
|
|
// inselt undef, (bitcast ScalarSrc), IdxOp -->
|
|
|
|
// bitcast (inselt undef, ScalarSrc, IdxOp)
|
|
|
|
Type *ScalarTy = ScalarSrc->getType();
|
|
|
|
Type *VecTy = VectorType::get(ScalarTy, IE.getType()->getElementCount());
|
|
|
|
UndefValue *NewUndef = UndefValue::get(VecTy);
|
|
|
|
Value *NewInsElt = Builder.CreateInsertElement(NewUndef, ScalarSrc, IdxOp);
|
|
|
|
return new BitCastInst(NewInsElt, IE.getType());
|
|
|
|
}
|
|
|
|
|
2019-05-18 02:06:12 +08:00
|
|
|
// If the vector and scalar are both bitcast from the same element type, do
|
|
|
|
// the insert in that source type followed by bitcast.
|
2020-05-10 23:37:47 +08:00
|
|
|
Value *VecSrc;
|
2019-05-18 02:06:12 +08:00
|
|
|
if (match(VecOp, m_BitCast(m_Value(VecSrc))) &&
|
|
|
|
match(ScalarOp, m_BitCast(m_Value(ScalarSrc))) &&
|
|
|
|
(VecOp->hasOneUse() || ScalarOp->hasOneUse()) &&
|
|
|
|
VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() &&
|
2020-04-09 01:42:22 +08:00
|
|
|
cast<VectorType>(VecSrc->getType())->getElementType() ==
|
|
|
|
ScalarSrc->getType()) {
|
2019-05-18 02:06:12 +08:00
|
|
|
// inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp -->
|
|
|
|
// bitcast (inselt VecSrc, ScalarSrc, IdxOp)
|
|
|
|
Value *NewInsElt = Builder.CreateInsertElement(VecSrc, ScalarSrc, IdxOp);
|
|
|
|
return new BitCastInst(NewInsElt, IE.getType());
|
|
|
|
}
|
|
|
|
|
[InstCombine][SVE] Fix visitInsertElementInst for scalable type.
Summary:
This patch fixes the following issues in visitInsertElementInst:
1. Bail out for scalable type when analysis requires fixed size number of vector elements.
2. Use cast<FixedVectorType> to get vector number of elements. This ensure assertion
on scalable vector type.
3. For scalable type, avoid folding a chain of insertelement into splat:
insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ...
->
shufflevector(insertelt(X, %k, 0), undef, zero)
The length of scalable vector is unknown at compile-time, therefore we don't know if
given insertelement sequence is valid for splat.
Reviewers: sdesmalen, efriedma, spatel, nikic
Reviewed By: sdesmalen, efriedma
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78895
2020-05-08 03:43:14 +08:00
|
|
|
// If the inserted element was extracted from some other fixed-length vector
|
|
|
|
// and both indexes are valid constants, try to turn this into a shuffle.
|
|
|
|
// Can not handle scalable vector type, the number of elements needed to
|
|
|
|
// create shuffle mask is not a compile-time constant.
|
2018-10-21 01:15:57 +08:00
|
|
|
uint64_t InsertedIdx, ExtractedIdx;
|
|
|
|
Value *ExtVecOp;
|
[InstCombine][SVE] Fix visitInsertElementInst for scalable type.
Summary:
This patch fixes the following issues in visitInsertElementInst:
1. Bail out for scalable type when analysis requires fixed size number of vector elements.
2. Use cast<FixedVectorType> to get vector number of elements. This ensure assertion
on scalable vector type.
3. For scalable type, avoid folding a chain of insertelement into splat:
insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ...
->
shufflevector(insertelt(X, %k, 0), undef, zero)
The length of scalable vector is unknown at compile-time, therefore we don't know if
given insertelement sequence is valid for splat.
Reviewers: sdesmalen, efriedma, spatel, nikic
Reviewed By: sdesmalen, efriedma
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78895
2020-05-08 03:43:14 +08:00
|
|
|
if (isa<FixedVectorType>(IE.getType()) &&
|
|
|
|
match(IdxOp, m_ConstantInt(InsertedIdx)) &&
|
2020-04-09 01:42:22 +08:00
|
|
|
match(ScalarOp,
|
2020-05-23 22:13:50 +08:00
|
|
|
m_ExtractElt(m_Value(ExtVecOp), m_ConstantInt(ExtractedIdx))) &&
|
[InstCombine][SVE] Fix visitInsertElementInst for scalable type.
Summary:
This patch fixes the following issues in visitInsertElementInst:
1. Bail out for scalable type when analysis requires fixed size number of vector elements.
2. Use cast<FixedVectorType> to get vector number of elements. This ensure assertion
on scalable vector type.
3. For scalable type, avoid folding a chain of insertelement into splat:
insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ...
->
shufflevector(insertelt(X, %k, 0), undef, zero)
The length of scalable vector is unknown at compile-time, therefore we don't know if
given insertelement sequence is valid for splat.
Reviewers: sdesmalen, efriedma, spatel, nikic
Reviewed By: sdesmalen, efriedma
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78895
2020-05-08 03:43:14 +08:00
|
|
|
isa<FixedVectorType>(ExtVecOp->getType()) &&
|
|
|
|
ExtractedIdx <
|
|
|
|
cast<FixedVectorType>(ExtVecOp->getType())->getNumElements()) {
|
2018-10-21 01:15:57 +08:00
|
|
|
// TODO: Looking at the user(s) to determine if this insert is a
|
|
|
|
// fold-to-shuffle opportunity does not match the usual instcombine
|
|
|
|
// constraints. We should decide if the transform is worthy based only
|
|
|
|
// on this instruction and its operands, but that may not work currently.
|
|
|
|
//
|
|
|
|
// Here, we are trying to avoid creating shuffles before reaching
|
|
|
|
// the end of a chain of extract-insert pairs. This is complicated because
|
|
|
|
// we do not generally form arbitrary shuffle masks in instcombine
|
|
|
|
// (because those may codegen poorly), but collectShuffleElements() does
|
|
|
|
// exactly that.
|
|
|
|
//
|
|
|
|
// The rules for determining what is an acceptable target-independent
|
|
|
|
// shuffle mask are fuzzy because they evolve based on the backend's
|
|
|
|
// capabilities and real-world impact.
|
|
|
|
auto isShuffleRootCandidate = [](InsertElementInst &Insert) {
|
|
|
|
if (!Insert.hasOneUse())
|
|
|
|
return true;
|
|
|
|
auto *InsertUser = dyn_cast<InsertElementInst>(Insert.user_back());
|
|
|
|
if (!InsertUser)
|
|
|
|
return true;
|
|
|
|
return false;
|
|
|
|
};
|
|
|
|
|
|
|
|
// Try to form a shuffle from a chain of extract-insert ops.
|
|
|
|
if (isShuffleRootCandidate(IE)) {
|
2020-04-15 20:29:09 +08:00
|
|
|
SmallVector<int, 16> Mask;
|
2018-10-21 01:15:57 +08:00
|
|
|
ShuffleOps LR = collectShuffleElements(&IE, Mask, nullptr, *this);
|
|
|
|
|
|
|
|
// The proposed shuffle may be trivial, in which case we shouldn't
|
|
|
|
// perform the combine.
|
|
|
|
if (LR.first != &IE && LR.second != &IE) {
|
|
|
|
// We now have a shuffle of LHS, RHS, Mask.
|
|
|
|
if (LR.second == nullptr)
|
|
|
|
LR.second = UndefValue::get(LR.first->getType());
|
2020-04-15 20:29:09 +08:00
|
|
|
return new ShuffleVectorInst(LR.first, LR.second, Mask);
|
2010-01-05 13:36:20 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2010-10-30 06:20:43 +08:00
|
|
|
|
[InstCombine][SVE] Fix visitInsertElementInst for scalable type.
Summary:
This patch fixes the following issues in visitInsertElementInst:
1. Bail out for scalable type when analysis requires fixed size number of vector elements.
2. Use cast<FixedVectorType> to get vector number of elements. This ensure assertion
on scalable vector type.
3. For scalable type, avoid folding a chain of insertelement into splat:
insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ...
->
shufflevector(insertelt(X, %k, 0), undef, zero)
The length of scalable vector is unknown at compile-time, therefore we don't know if
given insertelement sequence is valid for splat.
Reviewers: sdesmalen, efriedma, spatel, nikic
Reviewed By: sdesmalen, efriedma
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78895
2020-05-08 03:43:14 +08:00
|
|
|
if (auto VecTy = dyn_cast<FixedVectorType>(VecOp->getType())) {
|
|
|
|
unsigned VWidth = VecTy->getNumElements();
|
|
|
|
APInt UndefElts(VWidth, 0);
|
|
|
|
APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
|
|
|
|
if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) {
|
|
|
|
if (V != &IE)
|
|
|
|
return replaceInstUsesWith(IE, V);
|
|
|
|
return &IE;
|
|
|
|
}
|
2011-02-20 06:42:40 +08:00
|
|
|
}
|
2010-10-30 06:20:43 +08:00
|
|
|
|
2016-09-03 01:05:43 +08:00
|
|
|
if (Instruction *Shuf = foldConstantInsEltIntoShuffle(IE))
|
|
|
|
return Shuf;
|
|
|
|
|
2017-07-08 07:16:26 +08:00
|
|
|
if (Instruction *NewInsElt = hoistInsEltConst(IE, Builder))
|
[InstCombine] canonicalize insertelement of scalar constant ahead of insertelement of variable
insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->
insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1
As noted in the code comment and seen in the test changes, the motivation is that by pulling
constant insertion up, we may be able to constant fold some insertelement instructions.
Differential Revision: https://reviews.llvm.org/D31196
llvm-svn: 298520
2017-03-23 01:10:44 +08:00
|
|
|
return NewInsElt;
|
|
|
|
|
2019-06-26 23:52:59 +08:00
|
|
|
if (Instruction *Broadcast = foldInsSequenceIntoSplat(IE))
|
2016-12-28 08:18:08 +08:00
|
|
|
return Broadcast;
|
|
|
|
|
2019-07-09 03:48:52 +08:00
|
|
|
if (Instruction *Splat = foldInsEltIntoSplat(IE))
|
|
|
|
return Splat;
|
|
|
|
|
2019-09-09 03:03:01 +08:00
|
|
|
if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(IE))
|
|
|
|
return IdentityShuf;
|
|
|
|
|
2014-04-25 13:29:35 +08:00
|
|
|
return nullptr;
|
2010-01-05 13:36:20 +08:00
|
|
|
}
|
|
|
|
|
2013-05-31 08:59:42 +08:00
|
|
|
/// Return true if we can evaluate the specified expression tree if the vector
|
|
|
|
/// elements were shuffled in a different order.
|
2018-09-29 23:05:24 +08:00
|
|
|
static bool canEvaluateShuffled(Value *V, ArrayRef<int> Mask,
|
2013-06-02 04:51:31 +08:00
|
|
|
unsigned Depth = 5) {
|
2013-05-31 08:59:42 +08:00
|
|
|
// We can always reorder the elements of a constant.
|
|
|
|
if (isa<Constant>(V))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
// We won't reorder vector arguments. No IPO here.
|
|
|
|
Instruction *I = dyn_cast<Instruction>(V);
|
|
|
|
if (!I) return false;
|
|
|
|
|
|
|
|
// Two users may expect different orders of the elements. Don't try it.
|
|
|
|
if (!I->hasOneUse())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (Depth == 0) return false;
|
|
|
|
|
|
|
|
switch (I->getOpcode()) {
|
[InstCombine] Fix miscompile bug in canEvaluateShuffled
Summary:
Add restrictions in canEvaluateShuffled to prevent that we for example
transform
%0 = insertelement <2 x i16> undef, i16 %a, i32 0
%1 = srem <2 x i16> %0, <i16 2, i16 1>
%2 = shufflevector <2 x i16> %1, <2 x i16> undef, <2 x i32> <i32 undef, i32 0>
into
%1 = insertelement <2 x i16> undef, i16 %a, i32 1
%2 = srem <2 x i16> %1, <i16 undef, i16 2>
as having an undef denominator makes the srem undefined (for all
vector elements).
Fixes: https://bugs.llvm.org/show_bug.cgi?id=43689
Reviewers: spatel, lebedev.ri
Reviewed By: spatel, lebedev.ri
Subscribers: lebedev.ri, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D69038
llvm-svn: 375208
2019-10-18 15:42:02 +08:00
|
|
|
case Instruction::UDiv:
|
|
|
|
case Instruction::SDiv:
|
|
|
|
case Instruction::URem:
|
|
|
|
case Instruction::SRem:
|
|
|
|
// Propagating an undefined shuffle mask element to integer div/rem is not
|
|
|
|
// allowed because those opcodes can create immediate undefined behavior
|
|
|
|
// from an undefined element in an operand.
|
|
|
|
if (llvm::any_of(Mask, [](int M){ return M == -1; }))
|
|
|
|
return false;
|
|
|
|
LLVM_FALLTHROUGH;
|
2013-05-31 08:59:42 +08:00
|
|
|
case Instruction::Add:
|
|
|
|
case Instruction::FAdd:
|
|
|
|
case Instruction::Sub:
|
|
|
|
case Instruction::FSub:
|
|
|
|
case Instruction::Mul:
|
|
|
|
case Instruction::FMul:
|
|
|
|
case Instruction::FDiv:
|
|
|
|
case Instruction::FRem:
|
|
|
|
case Instruction::Shl:
|
|
|
|
case Instruction::LShr:
|
|
|
|
case Instruction::AShr:
|
|
|
|
case Instruction::And:
|
|
|
|
case Instruction::Or:
|
|
|
|
case Instruction::Xor:
|
|
|
|
case Instruction::ICmp:
|
|
|
|
case Instruction::FCmp:
|
|
|
|
case Instruction::Trunc:
|
|
|
|
case Instruction::ZExt:
|
|
|
|
case Instruction::SExt:
|
|
|
|
case Instruction::FPToUI:
|
|
|
|
case Instruction::FPToSI:
|
|
|
|
case Instruction::UIToFP:
|
|
|
|
case Instruction::SIToFP:
|
|
|
|
case Instruction::FPTrunc:
|
|
|
|
case Instruction::FPExt:
|
|
|
|
case Instruction::GetElementPtr: {
|
2018-09-30 21:50:42 +08:00
|
|
|
// Bail out if we would create longer vector ops. We could allow creating
|
[InstCombine] Fix miscompile bug in canEvaluateShuffled
Summary:
Add restrictions in canEvaluateShuffled to prevent that we for example
transform
%0 = insertelement <2 x i16> undef, i16 %a, i32 0
%1 = srem <2 x i16> %0, <i16 2, i16 1>
%2 = shufflevector <2 x i16> %1, <2 x i16> undef, <2 x i32> <i32 undef, i32 0>
into
%1 = insertelement <2 x i16> undef, i16 %a, i32 1
%2 = srem <2 x i16> %1, <i16 undef, i16 2>
as having an undef denominator makes the srem undefined (for all
vector elements).
Fixes: https://bugs.llvm.org/show_bug.cgi?id=43689
Reviewers: spatel, lebedev.ri
Reviewed By: spatel, lebedev.ri
Subscribers: lebedev.ri, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D69038
llvm-svn: 375208
2019-10-18 15:42:02 +08:00
|
|
|
// longer vector ops, but that may result in more expensive codegen.
|
2018-09-30 21:50:42 +08:00
|
|
|
Type *ITy = I->getType();
|
2020-04-09 01:42:22 +08:00
|
|
|
if (ITy->isVectorTy() &&
|
|
|
|
Mask.size() > cast<VectorType>(ITy)->getNumElements())
|
2018-09-30 21:50:42 +08:00
|
|
|
return false;
|
2015-11-17 06:16:52 +08:00
|
|
|
for (Value *Operand : I->operands()) {
|
2018-09-29 23:05:24 +08:00
|
|
|
if (!canEvaluateShuffled(Operand, Mask, Depth - 1))
|
2013-05-31 08:59:42 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
case Instruction::InsertElement: {
|
|
|
|
ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
|
|
|
|
if (!CI) return false;
|
|
|
|
int ElementNumber = CI->getLimitedValue();
|
|
|
|
|
|
|
|
// Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
|
|
|
|
// can't put an element into multiple indices.
|
|
|
|
bool SeenOnce = false;
|
|
|
|
for (int i = 0, e = Mask.size(); i != e; ++i) {
|
|
|
|
if (Mask[i] == ElementNumber) {
|
|
|
|
if (SeenOnce)
|
|
|
|
return false;
|
|
|
|
SeenOnce = true;
|
|
|
|
}
|
|
|
|
}
|
2018-09-29 23:05:24 +08:00
|
|
|
return canEvaluateShuffled(I->getOperand(0), Mask, Depth - 1);
|
2013-05-31 08:59:42 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Rebuild a new instruction just like 'I' but with the new operands given.
|
|
|
|
/// In the event of type mismatch, the type of the operands is correct.
|
2015-11-18 01:24:08 +08:00
|
|
|
static Value *buildNew(Instruction *I, ArrayRef<Value*> NewOps) {
|
2013-05-31 08:59:42 +08:00
|
|
|
// We don't want to use the IRBuilder here because we want the replacement
|
|
|
|
// instructions to appear next to 'I', not the builder's insertion point.
|
|
|
|
switch (I->getOpcode()) {
|
|
|
|
case Instruction::Add:
|
|
|
|
case Instruction::FAdd:
|
|
|
|
case Instruction::Sub:
|
|
|
|
case Instruction::FSub:
|
|
|
|
case Instruction::Mul:
|
|
|
|
case Instruction::FMul:
|
|
|
|
case Instruction::UDiv:
|
|
|
|
case Instruction::SDiv:
|
|
|
|
case Instruction::FDiv:
|
|
|
|
case Instruction::URem:
|
|
|
|
case Instruction::SRem:
|
|
|
|
case Instruction::FRem:
|
|
|
|
case Instruction::Shl:
|
|
|
|
case Instruction::LShr:
|
|
|
|
case Instruction::AShr:
|
|
|
|
case Instruction::And:
|
|
|
|
case Instruction::Or:
|
|
|
|
case Instruction::Xor: {
|
|
|
|
BinaryOperator *BO = cast<BinaryOperator>(I);
|
|
|
|
assert(NewOps.size() == 2 && "binary operator with #ops != 2");
|
|
|
|
BinaryOperator *New =
|
|
|
|
BinaryOperator::Create(cast<BinaryOperator>(I)->getOpcode(),
|
|
|
|
NewOps[0], NewOps[1], "", BO);
|
|
|
|
if (isa<OverflowingBinaryOperator>(BO)) {
|
|
|
|
New->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
|
|
|
|
New->setHasNoSignedWrap(BO->hasNoSignedWrap());
|
|
|
|
}
|
|
|
|
if (isa<PossiblyExactOperator>(BO)) {
|
|
|
|
New->setIsExact(BO->isExact());
|
|
|
|
}
|
2014-01-18 08:48:14 +08:00
|
|
|
if (isa<FPMathOperator>(BO))
|
|
|
|
New->copyFastMathFlags(I);
|
2013-05-31 08:59:42 +08:00
|
|
|
return New;
|
|
|
|
}
|
|
|
|
case Instruction::ICmp:
|
|
|
|
assert(NewOps.size() == 2 && "icmp with #ops != 2");
|
|
|
|
return new ICmpInst(I, cast<ICmpInst>(I)->getPredicate(),
|
|
|
|
NewOps[0], NewOps[1]);
|
|
|
|
case Instruction::FCmp:
|
|
|
|
assert(NewOps.size() == 2 && "fcmp with #ops != 2");
|
|
|
|
return new FCmpInst(I, cast<FCmpInst>(I)->getPredicate(),
|
|
|
|
NewOps[0], NewOps[1]);
|
|
|
|
case Instruction::Trunc:
|
|
|
|
case Instruction::ZExt:
|
|
|
|
case Instruction::SExt:
|
|
|
|
case Instruction::FPToUI:
|
|
|
|
case Instruction::FPToSI:
|
|
|
|
case Instruction::UIToFP:
|
|
|
|
case Instruction::SIToFP:
|
|
|
|
case Instruction::FPTrunc:
|
|
|
|
case Instruction::FPExt: {
|
|
|
|
// It's possible that the mask has a different number of elements from
|
|
|
|
// the original cast. We recompute the destination type to match the mask.
|
2020-04-09 01:42:22 +08:00
|
|
|
Type *DestTy = VectorType::get(
|
|
|
|
I->getType()->getScalarType(),
|
|
|
|
cast<VectorType>(NewOps[0]->getType())->getElementCount());
|
2013-05-31 08:59:42 +08:00
|
|
|
assert(NewOps.size() == 1 && "cast with #ops != 1");
|
|
|
|
return CastInst::Create(cast<CastInst>(I)->getOpcode(), NewOps[0], DestTy,
|
|
|
|
"", I);
|
|
|
|
}
|
|
|
|
case Instruction::GetElementPtr: {
|
|
|
|
Value *Ptr = NewOps[0];
|
|
|
|
ArrayRef<Value*> Idx = NewOps.slice(1);
|
2015-03-15 03:24:04 +08:00
|
|
|
GetElementPtrInst *GEP = GetElementPtrInst::Create(
|
|
|
|
cast<GetElementPtrInst>(I)->getSourceElementType(), Ptr, Idx, "", I);
|
2013-05-31 08:59:42 +08:00
|
|
|
GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds());
|
|
|
|
return GEP;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
llvm_unreachable("failed to rebuild vector instructions");
|
|
|
|
}
|
|
|
|
|
2018-09-29 23:05:24 +08:00
|
|
|
static Value *evaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
|
2013-05-31 08:59:42 +08:00
|
|
|
// Mask.size() does not need to be equal to the number of vector elements.
|
|
|
|
|
|
|
|
assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
|
2017-10-10 01:54:46 +08:00
|
|
|
Type *EltTy = V->getType()->getScalarType();
|
2018-09-29 23:05:24 +08:00
|
|
|
Type *I32Ty = IntegerType::getInt32Ty(V->getContext());
|
2017-10-10 01:54:46 +08:00
|
|
|
if (isa<UndefValue>(V))
|
2020-05-30 06:24:15 +08:00
|
|
|
return UndefValue::get(FixedVectorType::get(EltTy, Mask.size()));
|
2017-10-10 01:54:46 +08:00
|
|
|
|
|
|
|
if (isa<ConstantAggregateZero>(V))
|
2020-05-30 06:24:15 +08:00
|
|
|
return ConstantAggregateZero::get(FixedVectorType::get(EltTy, Mask.size()));
|
2017-10-10 01:54:46 +08:00
|
|
|
|
2020-04-01 04:08:59 +08:00
|
|
|
if (Constant *C = dyn_cast<Constant>(V))
|
2013-05-31 08:59:42 +08:00
|
|
|
return ConstantExpr::getShuffleVector(C, UndefValue::get(C->getType()),
|
2020-04-01 04:08:59 +08:00
|
|
|
Mask);
|
2013-05-31 08:59:42 +08:00
|
|
|
|
|
|
|
Instruction *I = cast<Instruction>(V);
|
|
|
|
switch (I->getOpcode()) {
|
|
|
|
case Instruction::Add:
|
|
|
|
case Instruction::FAdd:
|
|
|
|
case Instruction::Sub:
|
|
|
|
case Instruction::FSub:
|
|
|
|
case Instruction::Mul:
|
|
|
|
case Instruction::FMul:
|
|
|
|
case Instruction::UDiv:
|
|
|
|
case Instruction::SDiv:
|
|
|
|
case Instruction::FDiv:
|
|
|
|
case Instruction::URem:
|
|
|
|
case Instruction::SRem:
|
|
|
|
case Instruction::FRem:
|
|
|
|
case Instruction::Shl:
|
|
|
|
case Instruction::LShr:
|
|
|
|
case Instruction::AShr:
|
|
|
|
case Instruction::And:
|
|
|
|
case Instruction::Or:
|
|
|
|
case Instruction::Xor:
|
|
|
|
case Instruction::ICmp:
|
|
|
|
case Instruction::FCmp:
|
|
|
|
case Instruction::Trunc:
|
|
|
|
case Instruction::ZExt:
|
|
|
|
case Instruction::SExt:
|
|
|
|
case Instruction::FPToUI:
|
|
|
|
case Instruction::FPToSI:
|
|
|
|
case Instruction::UIToFP:
|
|
|
|
case Instruction::SIToFP:
|
|
|
|
case Instruction::FPTrunc:
|
|
|
|
case Instruction::FPExt:
|
|
|
|
case Instruction::Select:
|
|
|
|
case Instruction::GetElementPtr: {
|
|
|
|
SmallVector<Value*, 8> NewOps;
|
2020-04-09 01:42:22 +08:00
|
|
|
bool NeedsRebuild =
|
|
|
|
(Mask.size() != cast<VectorType>(I->getType())->getNumElements());
|
2013-05-31 08:59:42 +08:00
|
|
|
for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
|
2019-04-01 22:10:10 +08:00
|
|
|
Value *V;
|
|
|
|
// Recursively call evaluateInDifferentElementOrder on vector arguments
|
|
|
|
// as well. E.g. GetElementPtr may have scalar operands even if the
|
|
|
|
// return value is a vector, so we need to examine the operand type.
|
|
|
|
if (I->getOperand(i)->getType()->isVectorTy())
|
|
|
|
V = evaluateInDifferentElementOrder(I->getOperand(i), Mask);
|
|
|
|
else
|
|
|
|
V = I->getOperand(i);
|
2013-05-31 08:59:42 +08:00
|
|
|
NewOps.push_back(V);
|
|
|
|
NeedsRebuild |= (V != I->getOperand(i));
|
|
|
|
}
|
|
|
|
if (NeedsRebuild) {
|
2015-11-18 01:24:08 +08:00
|
|
|
return buildNew(I, NewOps);
|
2013-05-31 08:59:42 +08:00
|
|
|
}
|
|
|
|
return I;
|
|
|
|
}
|
|
|
|
case Instruction::InsertElement: {
|
|
|
|
int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();
|
|
|
|
|
|
|
|
// The insertelement was inserting at Element. Figure out which element
|
|
|
|
// that becomes after shuffling. The answer is guaranteed to be unique
|
|
|
|
// by CanEvaluateShuffled.
|
2013-06-02 04:51:31 +08:00
|
|
|
bool Found = false;
|
2013-05-31 08:59:42 +08:00
|
|
|
int Index = 0;
|
2013-06-02 04:51:31 +08:00
|
|
|
for (int e = Mask.size(); Index != e; ++Index) {
|
|
|
|
if (Mask[Index] == Element) {
|
|
|
|
Found = true;
|
2013-05-31 08:59:42 +08:00
|
|
|
break;
|
2013-06-02 04:51:31 +08:00
|
|
|
}
|
|
|
|
}
|
2013-05-31 08:59:42 +08:00
|
|
|
|
2014-01-08 11:06:15 +08:00
|
|
|
// If element is not in Mask, no need to handle the operand 1 (element to
|
|
|
|
// be inserted). Just evaluate values in operand 0 according to Mask.
|
2013-06-02 04:51:31 +08:00
|
|
|
if (!Found)
|
2018-09-29 23:05:24 +08:00
|
|
|
return evaluateInDifferentElementOrder(I->getOperand(0), Mask);
|
2013-07-13 07:08:06 +08:00
|
|
|
|
2018-09-29 23:05:24 +08:00
|
|
|
Value *V = evaluateInDifferentElementOrder(I->getOperand(0), Mask);
|
2013-05-31 08:59:42 +08:00
|
|
|
return InsertElementInst::Create(V, I->getOperand(1),
|
2018-09-29 23:05:24 +08:00
|
|
|
ConstantInt::get(I32Ty, Index), "", I);
|
2013-05-31 08:59:42 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
llvm_unreachable("failed to reorder elements of vector instruction!");
|
|
|
|
}
|
2010-01-05 13:36:20 +08:00
|
|
|
|
2015-02-26 06:30:51 +08:00
|
|
|
// Returns true if the shuffle is extracting a contiguous range of values from
|
|
|
|
// LHS, for example:
|
|
|
|
// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
|
|
|
|
// Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|
|
|
|
|
// Shuffles to: |EE|FF|GG|HH|
|
|
|
|
// +--+--+--+--+
|
|
|
|
static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI,
|
2020-04-01 04:08:59 +08:00
|
|
|
ArrayRef<int> Mask) {
|
2020-04-09 01:42:22 +08:00
|
|
|
unsigned LHSElems =
|
|
|
|
cast<VectorType>(SVI.getOperand(0)->getType())->getNumElements();
|
2015-02-26 06:30:51 +08:00
|
|
|
unsigned MaskElems = Mask.size();
|
|
|
|
unsigned BegIdx = Mask.front();
|
|
|
|
unsigned EndIdx = Mask.back();
|
|
|
|
if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)
|
|
|
|
return false;
|
|
|
|
for (unsigned I = 0; I != MaskElems; ++I)
|
|
|
|
if (static_cast<unsigned>(Mask[I]) != BegIdx + I)
|
|
|
|
return false;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-07-03 01:42:29 +08:00
|
|
|
/// These are the ingredients in an alternate form binary operator as described
|
|
|
|
/// below.
|
|
|
|
struct BinopElts {
|
|
|
|
BinaryOperator::BinaryOps Opcode;
|
|
|
|
Value *Op0;
|
|
|
|
Value *Op1;
|
|
|
|
BinopElts(BinaryOperator::BinaryOps Opc = (BinaryOperator::BinaryOps)0,
|
|
|
|
Value *V0 = nullptr, Value *V1 = nullptr) :
|
|
|
|
Opcode(Opc), Op0(V0), Op1(V1) {}
|
|
|
|
operator bool() const { return Opcode != 0; }
|
|
|
|
};
|
|
|
|
|
|
|
|
/// Binops may be transformed into binops with different opcodes and operands.
|
|
|
|
/// Reverse the usual canonicalization to enable folds with the non-canonical
|
|
|
|
/// form of the binop. If a transform is possible, return the elements of the
|
|
|
|
/// new binop. If not, return invalid elements.
|
|
|
|
static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL) {
|
|
|
|
Value *BO0 = BO->getOperand(0), *BO1 = BO->getOperand(1);
|
|
|
|
Type *Ty = BO->getType();
|
|
|
|
switch (BO->getOpcode()) {
|
|
|
|
case Instruction::Shl: {
|
|
|
|
// shl X, C --> mul X, (1 << C)
|
|
|
|
Constant *C;
|
|
|
|
if (match(BO1, m_Constant(C))) {
|
|
|
|
Constant *ShlOne = ConstantExpr::getShl(ConstantInt::get(Ty, 1), C);
|
|
|
|
return { Instruction::Mul, BO0, ShlOne };
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case Instruction::Or: {
|
|
|
|
// or X, C --> add X, C (when X and C have no common bits set)
|
|
|
|
const APInt *C;
|
|
|
|
if (match(BO1, m_APInt(C)) && MaskedValueIsZero(BO0, *C, DL))
|
|
|
|
return { Instruction::Add, BO0, BO1 };
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return {};
|
|
|
|
}
|
|
|
|
|
2018-07-03 21:44:22 +08:00
|
|
|
static Instruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf) {
|
|
|
|
assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
|
|
|
|
|
|
|
|
// Are we shuffling together some value and that same value after it has been
|
|
|
|
// modified by a binop with a constant?
|
|
|
|
Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
|
|
|
|
Constant *C;
|
|
|
|
bool Op0IsBinop;
|
|
|
|
if (match(Op0, m_BinOp(m_Specific(Op1), m_Constant(C))))
|
|
|
|
Op0IsBinop = true;
|
|
|
|
else if (match(Op1, m_BinOp(m_Specific(Op0), m_Constant(C))))
|
|
|
|
Op0IsBinop = false;
|
|
|
|
else
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// The identity constant for a binop leaves a variable operand unchanged. For
|
|
|
|
// a vector, this is a splat of something like 0, -1, or 1.
|
|
|
|
// If there's no identity constant for this binop, we're done.
|
[InstCombine] safely allow non-commutative binop identity constant folds
This was originally intended with D48893, but as discussed there, we
have to make the folds safe from producing extra poison. This should
give the single binop folds the same capabilities as the existing
folds for 2-binops+shuffle.
LLVM binary opcode review: there are a total of 18 binops. There are 7
commutative binops (add, mul, and, or, xor, fadd, fmul) which we already
fold. We're able to fold 6 more opcodes with this patch (shl, lshr, ashr,
fdiv, udiv, sdiv). There are no folds for srem/urem/frem AFAIK. We don't
bother with sub/fsub with constant operand 1 because those are
canonicalized to add/fadd. 7 + 6 + 3 + 2 = 18.
llvm-svn: 336684
2018-07-10 23:12:31 +08:00
|
|
|
auto *BO = cast<BinaryOperator>(Op0IsBinop ? Op0 : Op1);
|
2018-07-03 21:44:22 +08:00
|
|
|
BinaryOperator::BinaryOps BOpcode = BO->getOpcode();
|
[InstCombine] safely allow non-commutative binop identity constant folds
This was originally intended with D48893, but as discussed there, we
have to make the folds safe from producing extra poison. This should
give the single binop folds the same capabilities as the existing
folds for 2-binops+shuffle.
LLVM binary opcode review: there are a total of 18 binops. There are 7
commutative binops (add, mul, and, or, xor, fadd, fmul) which we already
fold. We're able to fold 6 more opcodes with this patch (shl, lshr, ashr,
fdiv, udiv, sdiv). There are no folds for srem/urem/frem AFAIK. We don't
bother with sub/fsub with constant operand 1 because those are
canonicalized to add/fadd. 7 + 6 + 3 + 2 = 18.
llvm-svn: 336684
2018-07-10 23:12:31 +08:00
|
|
|
Constant *IdC = ConstantExpr::getBinOpIdentity(BOpcode, Shuf.getType(), true);
|
2018-07-03 21:44:22 +08:00
|
|
|
if (!IdC)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// Shuffle identity constants into the lanes that return the original value.
|
|
|
|
// Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4}
|
|
|
|
// Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4}
|
|
|
|
// The existing binop constant vector remains in the same operand position.
|
2020-04-01 04:08:59 +08:00
|
|
|
ArrayRef<int> Mask = Shuf.getShuffleMask();
|
2018-07-03 21:44:22 +08:00
|
|
|
Constant *NewC = Op0IsBinop ? ConstantExpr::getShuffleVector(C, IdC, Mask) :
|
|
|
|
ConstantExpr::getShuffleVector(IdC, C, Mask);
|
|
|
|
|
[InstCombine] safely allow non-commutative binop identity constant folds
This was originally intended with D48893, but as discussed there, we
have to make the folds safe from producing extra poison. This should
give the single binop folds the same capabilities as the existing
folds for 2-binops+shuffle.
LLVM binary opcode review: there are a total of 18 binops. There are 7
commutative binops (add, mul, and, or, xor, fadd, fmul) which we already
fold. We're able to fold 6 more opcodes with this patch (shl, lshr, ashr,
fdiv, udiv, sdiv). There are no folds for srem/urem/frem AFAIK. We don't
bother with sub/fsub with constant operand 1 because those are
canonicalized to add/fadd. 7 + 6 + 3 + 2 = 18.
llvm-svn: 336684
2018-07-10 23:12:31 +08:00
|
|
|
bool MightCreatePoisonOrUB =
|
2020-04-01 04:08:59 +08:00
|
|
|
is_contained(Mask, UndefMaskElem) &&
|
[InstCombine] safely allow non-commutative binop identity constant folds
This was originally intended with D48893, but as discussed there, we
have to make the folds safe from producing extra poison. This should
give the single binop folds the same capabilities as the existing
folds for 2-binops+shuffle.
LLVM binary opcode review: there are a total of 18 binops. There are 7
commutative binops (add, mul, and, or, xor, fadd, fmul) which we already
fold. We're able to fold 6 more opcodes with this patch (shl, lshr, ashr,
fdiv, udiv, sdiv). There are no folds for srem/urem/frem AFAIK. We don't
bother with sub/fsub with constant operand 1 because those are
canonicalized to add/fadd. 7 + 6 + 3 + 2 = 18.
llvm-svn: 336684
2018-07-10 23:12:31 +08:00
|
|
|
(Instruction::isIntDivRem(BOpcode) || Instruction::isShift(BOpcode));
|
|
|
|
if (MightCreatePoisonOrUB)
|
|
|
|
NewC = getSafeVectorConstantForBinop(BOpcode, NewC, true);
|
|
|
|
|
2018-07-03 21:44:22 +08:00
|
|
|
// shuf (bop X, C), X, M --> bop X, C'
|
|
|
|
// shuf X, (bop X, C), M --> bop X, C'
|
[InstCombine] safely allow non-commutative binop identity constant folds
This was originally intended with D48893, but as discussed there, we
have to make the folds safe from producing extra poison. This should
give the single binop folds the same capabilities as the existing
folds for 2-binops+shuffle.
LLVM binary opcode review: there are a total of 18 binops. There are 7
commutative binops (add, mul, and, or, xor, fadd, fmul) which we already
fold. We're able to fold 6 more opcodes with this patch (shl, lshr, ashr,
fdiv, udiv, sdiv). There are no folds for srem/urem/frem AFAIK. We don't
bother with sub/fsub with constant operand 1 because those are
canonicalized to add/fadd. 7 + 6 + 3 + 2 = 18.
llvm-svn: 336684
2018-07-10 23:12:31 +08:00
|
|
|
Value *X = Op0IsBinop ? Op1 : Op0;
|
2018-07-03 21:44:22 +08:00
|
|
|
Instruction *NewBO = BinaryOperator::Create(BOpcode, X, NewC);
|
|
|
|
NewBO->copyIRFlags(BO);
|
2018-07-10 22:27:55 +08:00
|
|
|
|
|
|
|
// An undef shuffle mask element may propagate as an undef constant element in
|
|
|
|
// the new binop. That would produce poison where the original code might not.
|
[InstCombine] safely allow non-commutative binop identity constant folds
This was originally intended with D48893, but as discussed there, we
have to make the folds safe from producing extra poison. This should
give the single binop folds the same capabilities as the existing
folds for 2-binops+shuffle.
LLVM binary opcode review: there are a total of 18 binops. There are 7
commutative binops (add, mul, and, or, xor, fadd, fmul) which we already
fold. We're able to fold 6 more opcodes with this patch (shl, lshr, ashr,
fdiv, udiv, sdiv). There are no folds for srem/urem/frem AFAIK. We don't
bother with sub/fsub with constant operand 1 because those are
canonicalized to add/fadd. 7 + 6 + 3 + 2 = 18.
llvm-svn: 336684
2018-07-10 23:12:31 +08:00
|
|
|
// If we already made a safe constant, then there's no danger.
|
2020-04-01 04:08:59 +08:00
|
|
|
if (is_contained(Mask, UndefMaskElem) && !MightCreatePoisonOrUB)
|
2018-07-10 22:27:55 +08:00
|
|
|
NewBO->dropPoisonGeneratingFlags();
|
2018-07-03 21:44:22 +08:00
|
|
|
return NewBO;
|
|
|
|
}
|
|
|
|
|
2019-07-09 00:26:48 +08:00
|
|
|
/// If we have an insert of a scalar to a non-zero element of an undefined
|
|
|
|
/// vector and then shuffle that value, that's the same as inserting to the zero
|
|
|
|
/// element and shuffling. Splatting from the zero element is recognized as the
|
|
|
|
/// canonical form of splat.
|
|
|
|
static Instruction *canonicalizeInsertSplat(ShuffleVectorInst &Shuf,
|
|
|
|
InstCombiner::BuilderTy &Builder) {
|
|
|
|
Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
|
2020-04-01 04:08:59 +08:00
|
|
|
ArrayRef<int> Mask = Shuf.getShuffleMask();
|
2019-07-09 00:26:48 +08:00
|
|
|
Value *X;
|
|
|
|
uint64_t IndexC;
|
|
|
|
|
|
|
|
// Match a shuffle that is a splat to a non-zero element.
|
2020-05-23 22:13:50 +08:00
|
|
|
if (!match(Op0, m_OneUse(m_InsertElt(m_Undef(), m_Value(X),
|
|
|
|
m_ConstantInt(IndexC)))) ||
|
2020-04-01 04:08:59 +08:00
|
|
|
!match(Op1, m_Undef()) || match(Mask, m_ZeroMask()) || IndexC == 0)
|
2019-07-09 00:26:48 +08:00
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// Insert into element 0 of an undef vector.
|
|
|
|
UndefValue *UndefVec = UndefValue::get(Shuf.getType());
|
|
|
|
Constant *Zero = Builder.getInt32(0);
|
|
|
|
Value *NewIns = Builder.CreateInsertElement(UndefVec, X, Zero);
|
|
|
|
|
|
|
|
// Splat from element 0. Any mask element that is undefined remains undefined.
|
|
|
|
// For example:
|
|
|
|
// shuf (inselt undef, X, 2), undef, <2,2,undef>
|
|
|
|
// --> shuf (inselt undef, X, 0), undef, <0,0,undef>
|
2020-04-09 01:42:22 +08:00
|
|
|
unsigned NumMaskElts = Shuf.getType()->getNumElements();
|
2020-04-01 04:08:59 +08:00
|
|
|
SmallVector<int, 16> NewMask(NumMaskElts, 0);
|
2019-07-09 00:26:48 +08:00
|
|
|
for (unsigned i = 0; i != NumMaskElts; ++i)
|
2020-04-01 04:08:59 +08:00
|
|
|
if (Mask[i] == UndefMaskElem)
|
|
|
|
NewMask[i] = Mask[i];
|
2019-07-09 00:26:48 +08:00
|
|
|
|
2020-04-01 04:08:59 +08:00
|
|
|
return new ShuffleVectorInst(NewIns, UndefVec, NewMask);
|
2019-07-09 00:26:48 +08:00
|
|
|
}
|
|
|
|
|
2018-07-03 01:42:29 +08:00
|
|
|
/// Try to fold shuffles that are the equivalent of a vector select.
|
2018-06-29 21:44:06 +08:00
|
|
|
static Instruction *foldSelectShuffle(ShuffleVectorInst &Shuf,
|
2018-07-03 01:42:29 +08:00
|
|
|
InstCombiner::BuilderTy &Builder,
|
|
|
|
const DataLayout &DL) {
|
2018-06-22 04:15:09 +08:00
|
|
|
if (!Shuf.isSelect())
|
|
|
|
return nullptr;
|
|
|
|
|
2019-11-26 00:55:57 +08:00
|
|
|
// Canonicalize to choose from operand 0 first unless operand 1 is undefined.
|
|
|
|
// Commuting undef to operand 0 conflicts with another canonicalization.
|
2020-04-09 01:42:22 +08:00
|
|
|
unsigned NumElts = Shuf.getType()->getNumElements();
|
2019-11-26 00:55:57 +08:00
|
|
|
if (!isa<UndefValue>(Shuf.getOperand(1)) &&
|
|
|
|
Shuf.getMaskValue(0) >= (int)NumElts) {
|
2019-04-08 21:28:29 +08:00
|
|
|
// TODO: Can we assert that both operands of a shuffle-select are not undef
|
|
|
|
// (otherwise, it would have been folded by instsimplify?
|
2019-03-31 23:01:30 +08:00
|
|
|
Shuf.commute();
|
|
|
|
return &Shuf;
|
|
|
|
}
|
|
|
|
|
2018-07-03 21:44:22 +08:00
|
|
|
if (Instruction *I = foldSelectShuffleWith1Binop(Shuf))
|
|
|
|
return I;
|
|
|
|
|
2018-06-22 04:15:09 +08:00
|
|
|
BinaryOperator *B0, *B1;
|
|
|
|
if (!match(Shuf.getOperand(0), m_BinOp(B0)) ||
|
|
|
|
!match(Shuf.getOperand(1), m_BinOp(B1)))
|
|
|
|
return nullptr;
|
|
|
|
|
2018-06-29 21:44:06 +08:00
|
|
|
Value *X, *Y;
|
2018-06-22 04:15:09 +08:00
|
|
|
Constant *C0, *C1;
|
2018-06-22 20:46:16 +08:00
|
|
|
bool ConstantsAreOp1;
|
|
|
|
if (match(B0, m_BinOp(m_Value(X), m_Constant(C0))) &&
|
2018-06-29 21:44:06 +08:00
|
|
|
match(B1, m_BinOp(m_Value(Y), m_Constant(C1))))
|
2018-06-22 20:46:16 +08:00
|
|
|
ConstantsAreOp1 = true;
|
|
|
|
else if (match(B0, m_BinOp(m_Constant(C0), m_Value(X))) &&
|
2018-06-29 21:44:06 +08:00
|
|
|
match(B1, m_BinOp(m_Constant(C1), m_Value(Y))))
|
2018-06-22 20:46:16 +08:00
|
|
|
ConstantsAreOp1 = false;
|
|
|
|
else
|
2018-06-22 04:15:09 +08:00
|
|
|
return nullptr;
|
|
|
|
|
2018-06-29 01:48:04 +08:00
|
|
|
// We need matching binops to fold the lanes together.
|
|
|
|
BinaryOperator::BinaryOps Opc0 = B0->getOpcode();
|
|
|
|
BinaryOperator::BinaryOps Opc1 = B1->getOpcode();
|
|
|
|
bool DropNSW = false;
|
|
|
|
if (ConstantsAreOp1 && Opc0 != Opc1) {
|
|
|
|
// TODO: We drop "nsw" if shift is converted into multiply because it may
|
|
|
|
// not be correct when the shift amount is BitWidth - 1. We could examine
|
|
|
|
// each vector element to determine if it is safe to keep that flag.
|
2018-07-03 01:42:29 +08:00
|
|
|
if (Opc0 == Instruction::Shl || Opc1 == Instruction::Shl)
|
2018-06-29 01:48:04 +08:00
|
|
|
DropNSW = true;
|
2018-07-03 01:42:29 +08:00
|
|
|
if (BinopElts AltB0 = getAlternateBinop(B0, DL)) {
|
|
|
|
assert(isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop");
|
|
|
|
Opc0 = AltB0.Opcode;
|
|
|
|
C0 = cast<Constant>(AltB0.Op1);
|
|
|
|
} else if (BinopElts AltB1 = getAlternateBinop(B1, DL)) {
|
|
|
|
assert(isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop");
|
|
|
|
Opc1 = AltB1.Opcode;
|
|
|
|
C1 = cast<Constant>(AltB1.Op1);
|
2018-06-29 01:48:04 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (Opc0 != Opc1)
|
2018-06-22 07:56:59 +08:00
|
|
|
return nullptr;
|
|
|
|
|
2018-06-29 01:48:04 +08:00
|
|
|
// The opcodes must be the same. Use a new name to make that clear.
|
|
|
|
BinaryOperator::BinaryOps BOpc = Opc0;
|
|
|
|
|
2018-07-10 21:33:26 +08:00
|
|
|
// Select the constant elements needed for the single binop.
|
2020-04-01 04:08:59 +08:00
|
|
|
ArrayRef<int> Mask = Shuf.getShuffleMask();
|
2018-07-10 21:33:26 +08:00
|
|
|
Constant *NewC = ConstantExpr::getShuffleVector(C0, C1, Mask);
|
|
|
|
|
2018-07-09 21:21:46 +08:00
|
|
|
// We are moving a binop after a shuffle. When a shuffle has an undefined
|
|
|
|
// mask element, the result is undefined, but it is not poison or undefined
|
|
|
|
// behavior. That is not necessarily true for div/rem/shift.
|
|
|
|
bool MightCreatePoisonOrUB =
|
2020-04-01 04:08:59 +08:00
|
|
|
is_contained(Mask, UndefMaskElem) &&
|
2018-07-09 21:21:46 +08:00
|
|
|
(Instruction::isIntDivRem(BOpc) || Instruction::isShift(BOpc));
|
2018-07-10 21:33:26 +08:00
|
|
|
if (MightCreatePoisonOrUB)
|
|
|
|
NewC = getSafeVectorConstantForBinop(BOpc, NewC, ConstantsAreOp1);
|
2018-07-09 21:21:46 +08:00
|
|
|
|
2018-06-29 21:44:06 +08:00
|
|
|
Value *V;
|
|
|
|
if (X == Y) {
|
|
|
|
// Remove a binop and the shuffle by rearranging the constant:
|
|
|
|
// shuffle (op V, C0), (op V, C1), M --> op V, C'
|
|
|
|
// shuffle (op C0, V), (op C1, V), M --> op C', V
|
|
|
|
V = X;
|
2018-07-09 21:21:46 +08:00
|
|
|
} else {
|
2018-06-29 21:44:06 +08:00
|
|
|
// If there are 2 different variable operands, we must create a new shuffle
|
|
|
|
// (select) first, so check uses to ensure that we don't end up with more
|
|
|
|
// instructions than we started with.
|
2018-07-09 21:21:46 +08:00
|
|
|
if (!B0->hasOneUse() && !B1->hasOneUse())
|
|
|
|
return nullptr;
|
|
|
|
|
2018-07-10 21:33:26 +08:00
|
|
|
// If we use the original shuffle mask and op1 is *variable*, we would be
|
|
|
|
// putting an undef into operand 1 of div/rem/shift. This is either UB or
|
|
|
|
// poison. We do not have to guard against UB when *constants* are op1
|
|
|
|
// because safe constants guarantee that we do not overflow sdiv/srem (and
|
|
|
|
// there's no danger for other opcodes).
|
|
|
|
// TODO: To allow this case, create a new shuffle mask with no undefs.
|
|
|
|
if (MightCreatePoisonOrUB && !ConstantsAreOp1)
|
2018-07-09 21:21:46 +08:00
|
|
|
return nullptr;
|
|
|
|
|
2018-06-29 21:44:06 +08:00
|
|
|
// Note: In general, we do not create new shuffles in InstCombine because we
|
|
|
|
// do not know if a target can lower an arbitrary shuffle optimally. In this
|
|
|
|
// case, the shuffle uses the existing mask, so there is no additional risk.
|
|
|
|
|
|
|
|
// Select the variable vectors first, then perform the binop:
|
|
|
|
// shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C'
|
|
|
|
// shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M)
|
2018-07-09 21:21:46 +08:00
|
|
|
V = Builder.CreateShuffleVector(X, Y, Mask);
|
2018-06-29 21:44:06 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
Instruction *NewBO = ConstantsAreOp1 ? BinaryOperator::Create(BOpc, V, NewC) :
|
|
|
|
BinaryOperator::Create(BOpc, NewC, V);
|
2018-06-22 04:15:09 +08:00
|
|
|
|
2018-07-09 21:21:46 +08:00
|
|
|
// Flags are intersected from the 2 source binops. But there are 2 exceptions:
|
|
|
|
// 1. If we changed an opcode, poison conditions might have changed.
|
|
|
|
// 2. If the shuffle had undef mask elements, the new binop might have undefs
|
2018-07-11 00:09:49 +08:00
|
|
|
// where the original code did not. But if we already made a safe constant,
|
|
|
|
// then there's no danger.
|
2018-06-22 04:15:09 +08:00
|
|
|
NewBO->copyIRFlags(B0);
|
|
|
|
NewBO->andIRFlags(B1);
|
2018-06-29 01:48:04 +08:00
|
|
|
if (DropNSW)
|
|
|
|
NewBO->setHasNoSignedWrap(false);
|
2020-04-01 04:08:59 +08:00
|
|
|
if (is_contained(Mask, UndefMaskElem) && !MightCreatePoisonOrUB)
|
2018-07-09 21:21:46 +08:00
|
|
|
NewBO->dropPoisonGeneratingFlags();
|
2018-06-22 04:15:09 +08:00
|
|
|
return NewBO;
|
|
|
|
}
|
|
|
|
|
2020-04-05 21:46:22 +08:00
|
|
|
/// Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
|
|
|
|
/// Example (little endian):
|
|
|
|
/// shuf (bitcast <4 x i16> X to <8 x i8>), <0, 2, 4, 6> --> trunc X to <4 x i8>
|
|
|
|
static Instruction *foldTruncShuffle(ShuffleVectorInst &Shuf,
|
|
|
|
bool IsBigEndian) {
|
|
|
|
// This must be a bitcasted shuffle of 1 vector integer operand.
|
|
|
|
Type *DestType = Shuf.getType();
|
|
|
|
Value *X;
|
|
|
|
if (!match(Shuf.getOperand(0), m_BitCast(m_Value(X))) ||
|
|
|
|
!match(Shuf.getOperand(1), m_Undef()) || !DestType->isIntOrIntVectorTy())
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// The source type must have the same number of elements as the shuffle,
|
|
|
|
// and the source element type must be larger than the shuffle element type.
|
|
|
|
Type *SrcType = X->getType();
|
|
|
|
if (!SrcType->isVectorTy() || !SrcType->isIntOrIntVectorTy() ||
|
2020-04-09 01:42:22 +08:00
|
|
|
cast<VectorType>(SrcType)->getNumElements() !=
|
|
|
|
cast<VectorType>(DestType)->getNumElements() ||
|
2020-04-05 21:46:22 +08:00
|
|
|
SrcType->getScalarSizeInBits() % DestType->getScalarSizeInBits() != 0)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
assert(Shuf.changesLength() && !Shuf.increasesLength() &&
|
|
|
|
"Expected a shuffle that decreases length");
|
|
|
|
|
|
|
|
// Last, check that the mask chooses the correct low bits for each narrow
|
|
|
|
// element in the result.
|
|
|
|
uint64_t TruncRatio =
|
|
|
|
SrcType->getScalarSizeInBits() / DestType->getScalarSizeInBits();
|
|
|
|
ArrayRef<int> Mask = Shuf.getShuffleMask();
|
|
|
|
for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
|
|
|
|
if (Mask[i] == UndefMaskElem)
|
|
|
|
continue;
|
|
|
|
uint64_t LSBIndex = IsBigEndian ? (i + 1) * TruncRatio - 1 : i * TruncRatio;
|
|
|
|
assert(LSBIndex <= std::numeric_limits<int32_t>::max() &&
|
|
|
|
"Overflowed 32-bits");
|
|
|
|
if (Mask[i] != (int)LSBIndex)
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
return new TruncInst(X, DestType);
|
|
|
|
}
|
|
|
|
|
[InstCombine] narrow vector select with padded condition and extracted result (PR38691)
shuf (sel (shuf NarrowCond, undef, WideMask), X, Y), undef, NarrowMask) -->
sel NarrowCond, (shuf X, undef, NarrowMask), (shuf Y, undef, NarrowMask)
The motivating case from:
https://bugs.llvm.org/show_bug.cgi?id=38691
...is the last regression test. In that case, we're just left with the narrow select.
Note that if we do create new shuffles, they use the existing extraction identity mask,
so there's no danger that this transform creates arbitrary shuffles.
Differential Revision: https://reviews.llvm.org/D51496
llvm-svn: 341708
2018-09-08 05:03:34 +08:00
|
|
|
/// Match a shuffle-select-shuffle pattern where the shuffles are widening and
|
|
|
|
/// narrowing (concatenating with undef and extracting back to the original
|
|
|
|
/// length). This allows replacing the wide select with a narrow select.
|
2018-10-09 23:29:26 +08:00
|
|
|
static Instruction *narrowVectorSelect(ShuffleVectorInst &Shuf,
|
|
|
|
InstCombiner::BuilderTy &Builder) {
|
[InstCombine] narrow vector select with padded condition and extracted result (PR38691)
shuf (sel (shuf NarrowCond, undef, WideMask), X, Y), undef, NarrowMask) -->
sel NarrowCond, (shuf X, undef, NarrowMask), (shuf Y, undef, NarrowMask)
The motivating case from:
https://bugs.llvm.org/show_bug.cgi?id=38691
...is the last regression test. In that case, we're just left with the narrow select.
Note that if we do create new shuffles, they use the existing extraction identity mask,
so there's no danger that this transform creates arbitrary shuffles.
Differential Revision: https://reviews.llvm.org/D51496
llvm-svn: 341708
2018-09-08 05:03:34 +08:00
|
|
|
// This must be a narrowing identity shuffle. It extracts the 1st N elements
|
|
|
|
// of the 1st vector operand of a shuffle.
|
|
|
|
if (!match(Shuf.getOperand(1), m_Undef()) || !Shuf.isIdentityWithExtract())
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// The vector being shuffled must be a vector select that we can eliminate.
|
|
|
|
// TODO: The one-use requirement could be eased if X and/or Y are constants.
|
|
|
|
Value *Cond, *X, *Y;
|
|
|
|
if (!match(Shuf.getOperand(0),
|
|
|
|
m_OneUse(m_Select(m_Value(Cond), m_Value(X), m_Value(Y)))))
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// We need a narrow condition value. It must be extended with undef elements
|
|
|
|
// and have the same number of elements as this shuffle.
|
2020-04-09 01:42:22 +08:00
|
|
|
unsigned NarrowNumElts = Shuf.getType()->getNumElements();
|
[InstCombine] narrow vector select with padded condition and extracted result (PR38691)
shuf (sel (shuf NarrowCond, undef, WideMask), X, Y), undef, NarrowMask) -->
sel NarrowCond, (shuf X, undef, NarrowMask), (shuf Y, undef, NarrowMask)
The motivating case from:
https://bugs.llvm.org/show_bug.cgi?id=38691
...is the last regression test. In that case, we're just left with the narrow select.
Note that if we do create new shuffles, they use the existing extraction identity mask,
so there's no danger that this transform creates arbitrary shuffles.
Differential Revision: https://reviews.llvm.org/D51496
llvm-svn: 341708
2018-09-08 05:03:34 +08:00
|
|
|
Value *NarrowCond;
|
2020-05-23 22:13:50 +08:00
|
|
|
if (!match(Cond, m_OneUse(m_Shuffle(m_Value(NarrowCond), m_Undef()))) ||
|
2020-04-09 01:42:22 +08:00
|
|
|
cast<VectorType>(NarrowCond->getType())->getNumElements() !=
|
|
|
|
NarrowNumElts ||
|
[InstCombine] narrow vector select with padded condition and extracted result (PR38691)
shuf (sel (shuf NarrowCond, undef, WideMask), X, Y), undef, NarrowMask) -->
sel NarrowCond, (shuf X, undef, NarrowMask), (shuf Y, undef, NarrowMask)
The motivating case from:
https://bugs.llvm.org/show_bug.cgi?id=38691
...is the last regression test. In that case, we're just left with the narrow select.
Note that if we do create new shuffles, they use the existing extraction identity mask,
so there's no danger that this transform creates arbitrary shuffles.
Differential Revision: https://reviews.llvm.org/D51496
llvm-svn: 341708
2018-09-08 05:03:34 +08:00
|
|
|
!cast<ShuffleVectorInst>(Cond)->isIdentityWithPadding())
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// shuf (sel (shuf NarrowCond, undef, WideMask), X, Y), undef, NarrowMask) -->
|
|
|
|
// sel NarrowCond, (shuf X, undef, NarrowMask), (shuf Y, undef, NarrowMask)
|
|
|
|
Value *Undef = UndefValue::get(X->getType());
|
2020-04-01 04:08:59 +08:00
|
|
|
Value *NarrowX = Builder.CreateShuffleVector(X, Undef, Shuf.getShuffleMask());
|
|
|
|
Value *NarrowY = Builder.CreateShuffleVector(Y, Undef, Shuf.getShuffleMask());
|
[InstCombine] narrow vector select with padded condition and extracted result (PR38691)
shuf (sel (shuf NarrowCond, undef, WideMask), X, Y), undef, NarrowMask) -->
sel NarrowCond, (shuf X, undef, NarrowMask), (shuf Y, undef, NarrowMask)
The motivating case from:
https://bugs.llvm.org/show_bug.cgi?id=38691
...is the last regression test. In that case, we're just left with the narrow select.
Note that if we do create new shuffles, they use the existing extraction identity mask,
so there's no danger that this transform creates arbitrary shuffles.
Differential Revision: https://reviews.llvm.org/D51496
llvm-svn: 341708
2018-09-08 05:03:34 +08:00
|
|
|
return SelectInst::Create(NarrowCond, NarrowX, NarrowY);
|
|
|
|
}
|
|
|
|
|
2018-10-14 23:25:06 +08:00
|
|
|
/// Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
|
|
|
|
static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) {
|
|
|
|
Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
|
|
|
|
if (!Shuf.isIdentityWithExtract() || !isa<UndefValue>(Op1))
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
Value *X, *Y;
|
2020-04-01 04:08:59 +08:00
|
|
|
ArrayRef<int> Mask;
|
2020-05-23 22:13:50 +08:00
|
|
|
if (!match(Op0, m_Shuffle(m_Value(X), m_Value(Y), m_Mask(Mask))))
|
2018-10-14 23:25:06 +08:00
|
|
|
return nullptr;
|
|
|
|
|
2019-02-06 06:58:45 +08:00
|
|
|
// Be conservative with shuffle transforms. If we can't kill the 1st shuffle,
|
|
|
|
// then combining may result in worse codegen.
|
|
|
|
if (!Op0->hasOneUse())
|
|
|
|
return nullptr;
|
|
|
|
|
2018-10-14 23:25:06 +08:00
|
|
|
// We are extracting a subvector from a shuffle. Remove excess elements from
|
|
|
|
// the 1st shuffle mask to eliminate the extract.
|
|
|
|
//
|
|
|
|
// This transform is conservatively limited to identity extracts because we do
|
|
|
|
// not allow arbitrary shuffle mask creation as a target-independent transform
|
|
|
|
// (because we can't guarantee that will lower efficiently).
|
|
|
|
//
|
|
|
|
// If the extracting shuffle has an undef mask element, it transfers to the
|
|
|
|
// new shuffle mask. Otherwise, copy the original mask element. Example:
|
|
|
|
// shuf (shuf X, Y, <C0, C1, C2, undef, C4>), undef, <0, undef, 2, 3> -->
|
|
|
|
// shuf X, Y, <C0, undef, C2, undef>
|
2020-04-09 01:42:22 +08:00
|
|
|
unsigned NumElts = Shuf.getType()->getNumElements();
|
2020-04-01 04:08:59 +08:00
|
|
|
SmallVector<int, 16> NewMask(NumElts);
|
|
|
|
assert(NumElts < Mask.size() &&
|
2018-10-14 23:25:06 +08:00
|
|
|
"Identity with extract must have less elements than its inputs");
|
|
|
|
|
|
|
|
for (unsigned i = 0; i != NumElts; ++i) {
|
2020-04-01 04:08:59 +08:00
|
|
|
int ExtractMaskElt = Shuf.getMaskValue(i);
|
|
|
|
int MaskElt = Mask[i];
|
|
|
|
NewMask[i] = ExtractMaskElt == UndefMaskElem ? ExtractMaskElt : MaskElt;
|
2018-10-14 23:25:06 +08:00
|
|
|
}
|
2020-04-01 04:08:59 +08:00
|
|
|
return new ShuffleVectorInst(X, Y, NewMask);
|
2018-10-14 23:25:06 +08:00
|
|
|
}
|
|
|
|
|
2019-12-10 23:10:05 +08:00
|
|
|
/// Try to replace a shuffle with an insertelement or try to replace a shuffle
|
|
|
|
/// operand with the operand of an insertelement.
|
2020-02-04 04:17:36 +08:00
|
|
|
static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf,
|
|
|
|
InstCombiner &IC) {
|
2018-10-30 23:26:39 +08:00
|
|
|
Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1);
|
2020-04-01 04:08:59 +08:00
|
|
|
SmallVector<int, 16> Mask;
|
|
|
|
Shuf.getShuffleMask(Mask);
|
2018-10-30 23:26:39 +08:00
|
|
|
|
|
|
|
// The shuffle must not change vector sizes.
|
|
|
|
// TODO: This restriction could be removed if the insert has only one use
|
|
|
|
// (because the transform would require a new length-changing shuffle).
|
|
|
|
int NumElts = Mask.size();
|
2020-04-09 01:42:22 +08:00
|
|
|
if (NumElts != (int)(cast<VectorType>(V0->getType())->getNumElements()))
|
2018-10-30 23:26:39 +08:00
|
|
|
return nullptr;
|
|
|
|
|
2019-12-10 23:10:05 +08:00
|
|
|
// This is a specialization of a fold in SimplifyDemandedVectorElts. We may
|
|
|
|
// not be able to handle it there if the insertelement has >1 use.
|
|
|
|
// If the shuffle has an insertelement operand but does not choose the
|
|
|
|
// inserted scalar element from that value, then we can replace that shuffle
|
|
|
|
// operand with the source vector of the insertelement.
|
|
|
|
Value *X;
|
|
|
|
uint64_t IdxC;
|
2020-05-23 22:13:50 +08:00
|
|
|
if (match(V0, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
|
2019-12-10 23:10:05 +08:00
|
|
|
// shuf (inselt X, ?, IdxC), ?, Mask --> shuf X, ?, Mask
|
2020-02-04 04:17:36 +08:00
|
|
|
if (none_of(Mask, [IdxC](int MaskElt) { return MaskElt == (int)IdxC; }))
|
|
|
|
return IC.replaceOperand(Shuf, 0, X);
|
2019-12-10 23:10:05 +08:00
|
|
|
}
|
2020-05-23 22:13:50 +08:00
|
|
|
if (match(V1, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
|
2019-12-10 23:10:05 +08:00
|
|
|
// Offset the index constant by the vector width because we are checking for
|
|
|
|
// accesses to the 2nd vector input of the shuffle.
|
|
|
|
IdxC += NumElts;
|
|
|
|
// shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask
|
2020-02-04 04:17:36 +08:00
|
|
|
if (none_of(Mask, [IdxC](int MaskElt) { return MaskElt == (int)IdxC; }))
|
|
|
|
return IC.replaceOperand(Shuf, 1, X);
|
2019-12-10 23:10:05 +08:00
|
|
|
}
|
|
|
|
|
2018-10-30 23:26:39 +08:00
|
|
|
// shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
|
|
|
|
auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) {
|
|
|
|
// We need an insertelement with a constant index.
|
2020-05-23 22:13:50 +08:00
|
|
|
if (!match(V0, m_InsertElt(m_Value(), m_Value(Scalar),
|
|
|
|
m_ConstantInt(IndexC))))
|
2018-10-30 23:26:39 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
// Test the shuffle mask to see if it splices the inserted scalar into the
|
|
|
|
// operand 1 vector of the shuffle.
|
|
|
|
int NewInsIndex = -1;
|
|
|
|
for (int i = 0; i != NumElts; ++i) {
|
|
|
|
// Ignore undef mask elements.
|
|
|
|
if (Mask[i] == -1)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
// The shuffle takes elements of operand 1 without lane changes.
|
|
|
|
if (Mask[i] == NumElts + i)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
// The shuffle must choose the inserted scalar exactly once.
|
|
|
|
if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// The shuffle is placing the inserted scalar into element i.
|
|
|
|
NewInsIndex = i;
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?");
|
|
|
|
|
|
|
|
// Index is updated to the potentially translated insertion lane.
|
|
|
|
IndexC = ConstantInt::get(IndexC->getType(), NewInsIndex);
|
|
|
|
return true;
|
|
|
|
};
|
|
|
|
|
|
|
|
// If the shuffle is unnecessary, insert the scalar operand directly into
|
|
|
|
// operand 1 of the shuffle. Example:
|
|
|
|
// shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0
|
|
|
|
Value *Scalar;
|
|
|
|
ConstantInt *IndexC;
|
|
|
|
if (isShufflingScalarIntoOp1(Scalar, IndexC))
|
|
|
|
return InsertElementInst::Create(V1, Scalar, IndexC);
|
|
|
|
|
|
|
|
// Try again after commuting shuffle. Example:
|
|
|
|
// shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->
|
|
|
|
// shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3
|
|
|
|
std::swap(V0, V1);
|
|
|
|
ShuffleVectorInst::commuteShuffleMask(Mask, NumElts);
|
|
|
|
if (isShufflingScalarIntoOp1(Scalar, IndexC))
|
|
|
|
return InsertElementInst::Create(V1, Scalar, IndexC);
|
|
|
|
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
2019-05-22 08:32:25 +08:00
|
|
|
static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) {
|
|
|
|
// Match the operands as identity with padding (also known as concatenation
|
|
|
|
// with undef) shuffles of the same source type. The backend is expected to
|
|
|
|
// recreate these concatenations from a shuffle of narrow operands.
|
|
|
|
auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(0));
|
|
|
|
auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(1));
|
|
|
|
if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() ||
|
|
|
|
!Shuffle1 || !Shuffle1->isIdentityWithPadding())
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// We limit this transform to power-of-2 types because we expect that the
|
|
|
|
// backend can convert the simplified IR patterns to identical nodes as the
|
|
|
|
// original IR.
|
2019-05-24 02:46:03 +08:00
|
|
|
// TODO: If we can verify the same behavior for arbitrary types, the
|
|
|
|
// power-of-2 checks can be removed.
|
2019-05-22 08:32:25 +08:00
|
|
|
Value *X = Shuffle0->getOperand(0);
|
|
|
|
Value *Y = Shuffle1->getOperand(0);
|
|
|
|
if (X->getType() != Y->getType() ||
|
2020-04-09 01:42:22 +08:00
|
|
|
!isPowerOf2_32(Shuf.getType()->getNumElements()) ||
|
|
|
|
!isPowerOf2_32(Shuffle0->getType()->getNumElements()) ||
|
|
|
|
!isPowerOf2_32(cast<VectorType>(X->getType())->getNumElements()) ||
|
2019-05-22 08:32:25 +08:00
|
|
|
isa<UndefValue>(X) || isa<UndefValue>(Y))
|
|
|
|
return nullptr;
|
|
|
|
assert(isa<UndefValue>(Shuffle0->getOperand(1)) &&
|
|
|
|
isa<UndefValue>(Shuffle1->getOperand(1)) &&
|
|
|
|
"Unexpected operand for identity shuffle");
|
|
|
|
|
|
|
|
// This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
|
|
|
|
// operands directly by adjusting the shuffle mask to account for the narrower
|
|
|
|
// types:
|
|
|
|
// shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
|
2020-04-09 01:42:22 +08:00
|
|
|
int NarrowElts = cast<VectorType>(X->getType())->getNumElements();
|
|
|
|
int WideElts = Shuffle0->getType()->getNumElements();
|
2019-05-22 08:32:25 +08:00
|
|
|
assert(WideElts > NarrowElts && "Unexpected types for identity with padding");
|
|
|
|
|
2020-04-01 04:08:59 +08:00
|
|
|
ArrayRef<int> Mask = Shuf.getShuffleMask();
|
2020-04-15 20:29:09 +08:00
|
|
|
SmallVector<int, 16> NewMask(Mask.size(), -1);
|
2019-05-22 08:32:25 +08:00
|
|
|
for (int i = 0, e = Mask.size(); i != e; ++i) {
|
|
|
|
if (Mask[i] == -1)
|
|
|
|
continue;
|
2019-05-24 02:46:03 +08:00
|
|
|
|
|
|
|
// If this shuffle is choosing an undef element from 1 of the sources, that
|
|
|
|
// element is undef.
|
|
|
|
if (Mask[i] < WideElts) {
|
|
|
|
if (Shuffle0->getMaskValue(Mask[i]) == -1)
|
|
|
|
continue;
|
|
|
|
} else {
|
|
|
|
if (Shuffle1->getMaskValue(Mask[i] - WideElts) == -1)
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// If this shuffle is choosing from the 1st narrow op, the mask element is
|
|
|
|
// the same. If this shuffle is choosing from the 2nd narrow op, the mask
|
|
|
|
// element is offset down to adjust for the narrow vector widths.
|
|
|
|
if (Mask[i] < WideElts) {
|
|
|
|
assert(Mask[i] < NarrowElts && "Unexpected shuffle mask");
|
2020-04-15 20:29:09 +08:00
|
|
|
NewMask[i] = Mask[i];
|
2019-05-24 02:46:03 +08:00
|
|
|
} else {
|
|
|
|
assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask");
|
2020-04-15 20:29:09 +08:00
|
|
|
NewMask[i] = Mask[i] - (WideElts - NarrowElts);
|
2019-05-24 02:46:03 +08:00
|
|
|
}
|
2019-05-22 08:32:25 +08:00
|
|
|
}
|
2020-04-15 20:29:09 +08:00
|
|
|
return new ShuffleVectorInst(X, Y, NewMask);
|
2019-05-22 08:32:25 +08:00
|
|
|
}
|
|
|
|
|
2010-01-05 13:36:20 +08:00
|
|
|
Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
|
|
|
|
Value *LHS = SVI.getOperand(0);
|
|
|
|
Value *RHS = SVI.getOperand(1);
|
2020-04-03 01:44:50 +08:00
|
|
|
SimplifyQuery ShufQuery = SQ.getWithInstruction(&SVI);
|
|
|
|
if (auto *V = SimplifyShuffleVectorInst(LHS, RHS, SVI.getShuffleMask(),
|
|
|
|
SVI.getType(), ShufQuery))
|
2017-04-04 12:47:57 +08:00
|
|
|
return replaceInstUsesWith(SVI, V);
|
2010-10-30 06:20:43 +08:00
|
|
|
|
2019-11-26 02:30:45 +08:00
|
|
|
// shuffle x, x, mask --> shuffle x, undef, mask'
|
2020-04-09 01:42:22 +08:00
|
|
|
unsigned VWidth = SVI.getType()->getNumElements();
|
|
|
|
unsigned LHSWidth = cast<VectorType>(LHS->getType())->getNumElements();
|
2020-04-01 04:08:59 +08:00
|
|
|
ArrayRef<int> Mask = SVI.getShuffleMask();
|
2018-08-29 22:42:12 +08:00
|
|
|
Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
|
2020-04-03 01:44:50 +08:00
|
|
|
|
|
|
|
// Peek through a bitcasted shuffle operand by scaling the mask. If the
|
|
|
|
// simulated shuffle can simplify, then this shuffle is unnecessary:
|
|
|
|
// shuf (bitcast X), undef, Mask --> bitcast X'
|
2020-04-15 02:41:35 +08:00
|
|
|
// TODO: This could be extended to allow length-changing shuffles.
|
|
|
|
// The transform might also be obsoleted if we allowed canonicalization
|
|
|
|
// of bitcasted shuffles.
|
2020-04-03 01:44:50 +08:00
|
|
|
Value *X;
|
|
|
|
if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_Undef()) &&
|
2020-04-15 02:41:35 +08:00
|
|
|
X->getType()->isVectorTy() && VWidth == LHSWidth) {
|
|
|
|
// Try to create a scaled mask constant.
|
2020-04-09 01:42:22 +08:00
|
|
|
auto *XType = cast<VectorType>(X->getType());
|
|
|
|
unsigned XNumElts = XType->getNumElements();
|
2020-04-03 01:44:50 +08:00
|
|
|
SmallVector<int, 16> ScaledMask;
|
2020-04-15 02:41:35 +08:00
|
|
|
if (XNumElts >= VWidth) {
|
|
|
|
assert(XNumElts % VWidth == 0 && "Unexpected vector bitcast");
|
|
|
|
narrowShuffleMaskElts(XNumElts / VWidth, Mask, ScaledMask);
|
|
|
|
} else {
|
|
|
|
assert(VWidth % XNumElts == 0 && "Unexpected vector bitcast");
|
|
|
|
if (!widenShuffleMaskElts(VWidth / XNumElts, Mask, ScaledMask))
|
|
|
|
ScaledMask.clear();
|
|
|
|
}
|
|
|
|
if (!ScaledMask.empty()) {
|
|
|
|
// If the shuffled source vector simplifies, cast that value to this
|
|
|
|
// shuffle's type.
|
|
|
|
if (auto *V = SimplifyShuffleVectorInst(X, UndefValue::get(XType),
|
|
|
|
ScaledMask, XType, ShufQuery))
|
|
|
|
return BitCastInst::Create(Instruction::BitCast, V, SVI.getType());
|
|
|
|
}
|
2020-04-03 01:44:50 +08:00
|
|
|
}
|
|
|
|
|
2019-11-26 02:30:45 +08:00
|
|
|
if (LHS == RHS) {
|
2019-11-25 23:54:18 +08:00
|
|
|
assert(!isa<UndefValue>(RHS) && "Shuffle with 2 undef ops not simplified?");
|
2010-01-05 13:36:20 +08:00
|
|
|
// Remap any references to RHS to use LHS.
|
2020-04-01 04:08:59 +08:00
|
|
|
SmallVector<int, 16> Elts;
|
2019-11-25 23:40:21 +08:00
|
|
|
for (unsigned i = 0; i != VWidth; ++i) {
|
2019-11-26 02:30:45 +08:00
|
|
|
// Propagate undef elements or force mask to LHS.
|
|
|
|
if (Mask[i] < 0)
|
2020-04-01 04:08:59 +08:00
|
|
|
Elts.push_back(UndefMaskElem);
|
2019-11-26 00:11:12 +08:00
|
|
|
else
|
2020-04-01 04:08:59 +08:00
|
|
|
Elts.push_back(Mask[i] % LHSWidth);
|
2010-01-05 13:36:20 +08:00
|
|
|
}
|
2020-04-01 04:08:59 +08:00
|
|
|
return new ShuffleVectorInst(LHS, UndefValue::get(RHS->getType()), Elts);
|
2010-01-05 13:36:20 +08:00
|
|
|
}
|
2010-10-30 06:20:43 +08:00
|
|
|
|
2019-11-26 02:30:45 +08:00
|
|
|
// shuffle undef, x, mask --> shuffle x, undef, mask'
|
|
|
|
if (isa<UndefValue>(LHS)) {
|
|
|
|
SVI.commute();
|
|
|
|
return &SVI;
|
|
|
|
}
|
|
|
|
|
2019-07-09 00:26:48 +08:00
|
|
|
if (Instruction *I = canonicalizeInsertSplat(SVI, Builder))
|
|
|
|
return I;
|
|
|
|
|
2019-03-30 00:49:38 +08:00
|
|
|
if (Instruction *I = foldSelectShuffle(SVI, Builder, DL))
|
|
|
|
return I;
|
|
|
|
|
2020-04-05 21:46:22 +08:00
|
|
|
if (Instruction *I = foldTruncShuffle(SVI, DL.isBigEndian()))
|
|
|
|
return I;
|
|
|
|
|
2019-03-30 00:49:38 +08:00
|
|
|
if (Instruction *I = narrowVectorSelect(SVI, Builder))
|
|
|
|
return I;
|
|
|
|
|
|
|
|
APInt UndefElts(VWidth, 0);
|
|
|
|
APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
|
|
|
|
if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
|
|
|
|
if (V != &SVI)
|
|
|
|
return replaceInstUsesWith(SVI, V);
|
|
|
|
return &SVI;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (Instruction *I = foldIdentityExtractShuffle(SVI))
|
|
|
|
return I;
|
|
|
|
|
2019-05-22 08:32:25 +08:00
|
|
|
// These transforms have the potential to lose undef knowledge, so they are
|
2019-03-30 00:49:38 +08:00
|
|
|
// intentionally placed after SimplifyDemandedVectorElts().
|
2020-02-04 04:17:36 +08:00
|
|
|
if (Instruction *I = foldShuffleWithInsert(SVI, *this))
|
2019-03-30 00:49:38 +08:00
|
|
|
return I;
|
2019-05-22 08:32:25 +08:00
|
|
|
if (Instruction *I = foldIdentityPaddedShuffles(SVI))
|
|
|
|
return I;
|
2019-03-30 00:49:38 +08:00
|
|
|
|
2018-09-30 21:50:42 +08:00
|
|
|
if (isa<UndefValue>(RHS) && canEvaluateShuffled(LHS, Mask)) {
|
2018-09-29 23:05:24 +08:00
|
|
|
Value *V = evaluateInDifferentElementOrder(LHS, Mask);
|
2016-02-02 06:23:39 +08:00
|
|
|
return replaceInstUsesWith(SVI, V);
|
2013-05-31 08:59:42 +08:00
|
|
|
}
|
|
|
|
|
2015-02-26 06:30:51 +08:00
|
|
|
// SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
|
|
|
|
// a non-vector type. We can instead bitcast the original vector followed by
|
|
|
|
// an extract of the desired element:
|
|
|
|
//
|
|
|
|
// %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
|
|
|
|
// <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
|
|
// %1 = bitcast <4 x i8> %sroa to i32
|
|
|
|
// Becomes:
|
|
|
|
// %bc = bitcast <16 x i8> %in to <4 x i32>
|
|
|
|
// %ext = extractelement <4 x i32> %bc, i32 0
|
|
|
|
//
|
|
|
|
// If the shuffle is extracting a contiguous range of values from the input
|
|
|
|
// vector then each use which is a bitcast of the extracted size can be
|
|
|
|
// replaced. This will work if the vector types are compatible, and the begin
|
|
|
|
// index is aligned to a value in the casted vector type. If the begin index
|
|
|
|
// isn't aligned then we can shuffle the original vector (keeping the same
|
|
|
|
// vector type) before extracting.
|
|
|
|
//
|
|
|
|
// This code will bail out if the target type is fundamentally incompatible
|
|
|
|
// with vectors of the source type.
|
|
|
|
//
|
|
|
|
// Example of <16 x i8>, target type i32:
|
|
|
|
// Index range [4,8): v-----------v Will work.
|
|
|
|
// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
|
|
|
|
// <16 x i8>: | | | | | | | | | | | | | | | | |
|
|
|
|
// <4 x i32>: | | | | |
|
|
|
|
// +-----------+-----------+-----------+-----------+
|
|
|
|
// Index range [6,10): ^-----------^ Needs an extra shuffle.
|
|
|
|
// Target type i40: ^--------------^ Won't work, bail.
|
2019-03-30 00:49:38 +08:00
|
|
|
bool MadeChange = false;
|
2015-02-26 06:30:51 +08:00
|
|
|
if (isShuffleExtractingFromLHS(SVI, Mask)) {
|
|
|
|
Value *V = LHS;
|
|
|
|
unsigned MaskElems = Mask.size();
|
|
|
|
VectorType *SrcTy = cast<VectorType>(V->getType());
|
2020-04-15 04:46:27 +08:00
|
|
|
unsigned VecBitWidth = SrcTy->getPrimitiveSizeInBits().getFixedSize();
|
2015-04-04 04:18:40 +08:00
|
|
|
unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());
|
2015-02-26 06:30:51 +08:00
|
|
|
assert(SrcElemBitWidth && "vector elements must have a bitwidth");
|
|
|
|
unsigned SrcNumElems = SrcTy->getNumElements();
|
|
|
|
SmallVector<BitCastInst *, 8> BCs;
|
|
|
|
DenseMap<Type *, Value *> NewBCs;
|
|
|
|
for (User *U : SVI.users())
|
|
|
|
if (BitCastInst *BC = dyn_cast<BitCastInst>(U))
|
|
|
|
if (!BC->use_empty())
|
|
|
|
// Only visit bitcasts that weren't previously handled.
|
|
|
|
BCs.push_back(BC);
|
|
|
|
for (BitCastInst *BC : BCs) {
|
2017-02-17 15:36:03 +08:00
|
|
|
unsigned BegIdx = Mask.front();
|
2015-02-26 06:30:51 +08:00
|
|
|
Type *TgtTy = BC->getDestTy();
|
2015-04-04 04:18:40 +08:00
|
|
|
unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);
|
2015-02-26 06:30:51 +08:00
|
|
|
if (!TgtElemBitWidth)
|
|
|
|
continue;
|
|
|
|
unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
|
|
|
|
bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
|
|
|
|
bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
|
|
|
|
if (!VecBitWidthsEqual)
|
|
|
|
continue;
|
|
|
|
if (!VectorType::isValidElementType(TgtTy))
|
|
|
|
continue;
|
2020-05-30 06:24:15 +08:00
|
|
|
auto *CastSrcTy = FixedVectorType::get(TgtTy, TgtNumElems);
|
2015-02-26 06:30:51 +08:00
|
|
|
if (!BegIsAligned) {
|
|
|
|
// Shuffle the input so [0,NumElements) contains the output, and
|
|
|
|
// [NumElems,SrcNumElems) is undef.
|
2020-04-15 18:41:54 +08:00
|
|
|
SmallVector<int, 16> ShuffleMask(SrcNumElems, -1);
|
2015-02-26 06:30:51 +08:00
|
|
|
for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
|
2020-04-15 18:41:54 +08:00
|
|
|
ShuffleMask[I] = Idx;
|
2017-07-08 07:16:26 +08:00
|
|
|
V = Builder.CreateShuffleVector(V, UndefValue::get(V->getType()),
|
2020-04-15 18:41:54 +08:00
|
|
|
ShuffleMask,
|
2017-07-08 07:16:26 +08:00
|
|
|
SVI.getName() + ".extract");
|
2015-02-26 06:30:51 +08:00
|
|
|
BegIdx = 0;
|
|
|
|
}
|
|
|
|
unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
|
|
|
|
assert(SrcElemsPerTgtElem);
|
|
|
|
BegIdx /= SrcElemsPerTgtElem;
|
|
|
|
bool BCAlreadyExists = NewBCs.find(CastSrcTy) != NewBCs.end();
|
|
|
|
auto *NewBC =
|
|
|
|
BCAlreadyExists
|
|
|
|
? NewBCs[CastSrcTy]
|
2017-07-08 07:16:26 +08:00
|
|
|
: Builder.CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
|
2015-02-26 06:30:51 +08:00
|
|
|
if (!BCAlreadyExists)
|
|
|
|
NewBCs[CastSrcTy] = NewBC;
|
2017-07-08 07:16:26 +08:00
|
|
|
auto *Ext = Builder.CreateExtractElement(
|
2015-02-26 06:30:51 +08:00
|
|
|
NewBC, ConstantInt::get(Int32Ty, BegIdx), SVI.getName() + ".extract");
|
|
|
|
// The shufflevector isn't being replaced: the bitcast that used it
|
|
|
|
// is. InstCombine will visit the newly-created instructions.
|
2016-02-02 06:23:39 +08:00
|
|
|
replaceInstUsesWith(*BC, Ext);
|
2015-02-26 06:30:51 +08:00
|
|
|
MadeChange = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-08-18 06:55:27 +08:00
|
|
|
// If the LHS is a shufflevector itself, see if we can combine it with this
|
2011-10-22 03:06:29 +08:00
|
|
|
// one without producing an unusual shuffle.
|
|
|
|
// Cases that might be simplified:
|
|
|
|
// 1.
|
|
|
|
// x1=shuffle(v1,v2,mask1)
|
|
|
|
// x=shuffle(x1,undef,mask)
|
|
|
|
// ==>
|
|
|
|
// x=shuffle(v1,undef,newMask)
|
|
|
|
// newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
|
|
|
|
// 2.
|
|
|
|
// x1=shuffle(v1,undef,mask1)
|
|
|
|
// x=shuffle(x1,x2,mask)
|
|
|
|
// where v1.size() == mask1.size()
|
|
|
|
// ==>
|
|
|
|
// x=shuffle(v1,x2,newMask)
|
|
|
|
// newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
|
|
|
|
// 3.
|
|
|
|
// x2=shuffle(v2,undef,mask2)
|
|
|
|
// x=shuffle(x1,x2,mask)
|
|
|
|
// where v2.size() == mask2.size()
|
|
|
|
// ==>
|
|
|
|
// x=shuffle(x1,v2,newMask)
|
|
|
|
// newMask[i] = (mask[i] < x1.size())
|
|
|
|
// ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
|
|
|
|
// 4.
|
|
|
|
// x1=shuffle(v1,undef,mask1)
|
|
|
|
// x2=shuffle(v2,undef,mask2)
|
|
|
|
// x=shuffle(x1,x2,mask)
|
|
|
|
// where v1.size() == v2.size()
|
|
|
|
// ==>
|
|
|
|
// x=shuffle(v1,v2,newMask)
|
|
|
|
// newMask[i] = (mask[i] < x1.size())
|
|
|
|
// ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
|
|
|
|
//
|
|
|
|
// Here we are really conservative:
|
2010-08-18 06:55:27 +08:00
|
|
|
// we are absolutely afraid of producing a shuffle mask not in the input
|
|
|
|
// program, because the code gen may not be smart enough to turn a merged
|
|
|
|
// shuffle into two specific shuffles: it may produce worse code. As such,
|
2013-05-01 08:25:27 +08:00
|
|
|
// we only merge two shuffles if the result is either a splat or one of the
|
|
|
|
// input shuffle masks. In this case, merging the shuffles just removes
|
|
|
|
// one instruction, which we know is safe. This is good for things like
|
2011-10-22 03:06:29 +08:00
|
|
|
// turning: (splat(splat)) -> splat, or
|
|
|
|
// merge(V[0..n], V[n+1..2n]) -> V[0..2n]
|
|
|
|
ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS);
|
|
|
|
ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);
|
|
|
|
if (LHSShuffle)
|
|
|
|
if (!isa<UndefValue>(LHSShuffle->getOperand(1)) && !isa<UndefValue>(RHS))
|
2014-04-25 13:29:35 +08:00
|
|
|
LHSShuffle = nullptr;
|
2011-10-22 03:06:29 +08:00
|
|
|
if (RHSShuffle)
|
|
|
|
if (!isa<UndefValue>(RHSShuffle->getOperand(1)))
|
2014-04-25 13:29:35 +08:00
|
|
|
RHSShuffle = nullptr;
|
2011-10-22 03:06:29 +08:00
|
|
|
if (!LHSShuffle && !RHSShuffle)
|
2014-04-25 13:29:35 +08:00
|
|
|
return MadeChange ? &SVI : nullptr;
|
2011-10-22 03:06:29 +08:00
|
|
|
|
2014-04-25 13:29:35 +08:00
|
|
|
Value* LHSOp0 = nullptr;
|
|
|
|
Value* LHSOp1 = nullptr;
|
|
|
|
Value* RHSOp0 = nullptr;
|
2011-10-22 03:06:29 +08:00
|
|
|
unsigned LHSOp0Width = 0;
|
|
|
|
unsigned RHSOp0Width = 0;
|
|
|
|
if (LHSShuffle) {
|
|
|
|
LHSOp0 = LHSShuffle->getOperand(0);
|
|
|
|
LHSOp1 = LHSShuffle->getOperand(1);
|
2020-04-09 01:42:22 +08:00
|
|
|
LHSOp0Width = cast<VectorType>(LHSOp0->getType())->getNumElements();
|
2011-10-22 03:06:29 +08:00
|
|
|
}
|
|
|
|
if (RHSShuffle) {
|
|
|
|
RHSOp0 = RHSShuffle->getOperand(0);
|
2020-04-09 01:42:22 +08:00
|
|
|
RHSOp0Width = cast<VectorType>(RHSOp0->getType())->getNumElements();
|
2011-10-22 03:06:29 +08:00
|
|
|
}
|
|
|
|
Value* newLHS = LHS;
|
|
|
|
Value* newRHS = RHS;
|
|
|
|
if (LHSShuffle) {
|
|
|
|
// case 1
|
2010-08-18 06:55:27 +08:00
|
|
|
if (isa<UndefValue>(RHS)) {
|
2011-10-22 03:06:29 +08:00
|
|
|
newLHS = LHSOp0;
|
|
|
|
newRHS = LHSOp1;
|
|
|
|
}
|
|
|
|
// case 2 or 4
|
|
|
|
else if (LHSOp0Width == LHSWidth) {
|
|
|
|
newLHS = LHSOp0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// case 3 or 4
|
|
|
|
if (RHSShuffle && RHSOp0Width == LHSWidth) {
|
|
|
|
newRHS = RHSOp0;
|
|
|
|
}
|
|
|
|
// case 4
|
|
|
|
if (LHSOp0 == RHSOp0) {
|
|
|
|
newLHS = LHSOp0;
|
2014-04-25 13:29:35 +08:00
|
|
|
newRHS = nullptr;
|
2011-10-22 03:06:29 +08:00
|
|
|
}
|
2010-10-30 06:20:43 +08:00
|
|
|
|
2011-10-22 03:06:29 +08:00
|
|
|
if (newLHS == LHS && newRHS == RHS)
|
2014-04-25 13:29:35 +08:00
|
|
|
return MadeChange ? &SVI : nullptr;
|
2011-10-22 03:06:29 +08:00
|
|
|
|
2020-04-01 04:08:59 +08:00
|
|
|
ArrayRef<int> LHSMask;
|
|
|
|
ArrayRef<int> RHSMask;
|
2012-01-26 08:42:34 +08:00
|
|
|
if (newLHS != LHS)
|
|
|
|
LHSMask = LHSShuffle->getShuffleMask();
|
|
|
|
if (RHSShuffle && newRHS != RHS)
|
|
|
|
RHSMask = RHSShuffle->getShuffleMask();
|
|
|
|
|
2011-10-22 03:06:29 +08:00
|
|
|
unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
|
|
|
|
SmallVector<int, 16> newMask;
|
|
|
|
bool isSplat = true;
|
|
|
|
int SplatElt = -1;
|
|
|
|
// Create a new mask for the new ShuffleVectorInst so that the new
|
|
|
|
// ShuffleVectorInst is equivalent to the original one.
|
|
|
|
for (unsigned i = 0; i < VWidth; ++i) {
|
|
|
|
int eltMask;
|
2013-01-18 13:30:07 +08:00
|
|
|
if (Mask[i] < 0) {
|
2011-10-22 03:06:29 +08:00
|
|
|
// This element is an undef value.
|
|
|
|
eltMask = -1;
|
|
|
|
} else if (Mask[i] < (int)LHSWidth) {
|
|
|
|
// This element is from left hand side vector operand.
|
2013-01-18 13:09:16 +08:00
|
|
|
//
|
2011-10-22 03:06:29 +08:00
|
|
|
// If LHS is going to be replaced (case 1, 2, or 4), calculate the
|
|
|
|
// new mask value for the element.
|
|
|
|
if (newLHS != LHS) {
|
|
|
|
eltMask = LHSMask[Mask[i]];
|
|
|
|
// If the value selected is an undef value, explicitly specify it
|
|
|
|
// with a -1 mask value.
|
|
|
|
if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1))
|
|
|
|
eltMask = -1;
|
2013-01-18 13:09:16 +08:00
|
|
|
} else
|
2011-10-22 03:06:29 +08:00
|
|
|
eltMask = Mask[i];
|
|
|
|
} else {
|
|
|
|
// This element is from right hand side vector operand
|
|
|
|
//
|
|
|
|
// If the value selected is an undef value, explicitly specify it
|
|
|
|
// with a -1 mask value. (case 1)
|
|
|
|
if (isa<UndefValue>(RHS))
|
|
|
|
eltMask = -1;
|
|
|
|
// If RHS is going to be replaced (case 3 or 4), calculate the
|
|
|
|
// new mask value for the element.
|
|
|
|
else if (newRHS != RHS) {
|
|
|
|
eltMask = RHSMask[Mask[i]-LHSWidth];
|
|
|
|
// If the value selected is an undef value, explicitly specify it
|
|
|
|
// with a -1 mask value.
|
|
|
|
if (eltMask >= (int)RHSOp0Width) {
|
|
|
|
assert(isa<UndefValue>(RHSShuffle->getOperand(1))
|
|
|
|
&& "should have been check above");
|
|
|
|
eltMask = -1;
|
2010-08-13 08:17:53 +08:00
|
|
|
}
|
2013-01-18 13:09:16 +08:00
|
|
|
} else
|
2011-10-22 03:06:29 +08:00
|
|
|
eltMask = Mask[i]-LHSWidth;
|
|
|
|
|
|
|
|
// If LHS's width is changed, shift the mask value accordingly.
|
2017-10-25 05:24:53 +08:00
|
|
|
// If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any
|
[InstCombine] Teach InstCombine how to handle an obfuscated splat.
An obfuscated splat is where the frontend poorly generates code for a splat
using several different shuffles to create the splat, i.e.,
%A = load <4 x float>* %in_ptr, align 16
%B = shufflevector <4 x float> %A, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 undef, i32 undef>
%C = shufflevector <4 x float> %B, <4 x float> %A, <4 x i32> <i32 0, i32 1, i32 4, i32 undef>
%D = shufflevector <4 x float> %C, <4 x float> %A, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
llvm-svn: 166061
2012-10-17 05:29:38 +08:00
|
|
|
// references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
|
|
|
|
// If newRHS == newLHS, we want to remap any references from newRHS to
|
|
|
|
// newLHS so that we can properly identify splats that may occur due to
|
2014-01-25 01:20:08 +08:00
|
|
|
// obfuscation across the two vectors.
|
2014-04-25 13:29:35 +08:00
|
|
|
if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS)
|
2011-10-22 03:06:29 +08:00
|
|
|
eltMask += newLHSWidth;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check if this could still be a splat.
|
|
|
|
if (eltMask >= 0) {
|
|
|
|
if (SplatElt >= 0 && SplatElt != eltMask)
|
|
|
|
isSplat = false;
|
|
|
|
SplatElt = eltMask;
|
|
|
|
}
|
|
|
|
|
|
|
|
newMask.push_back(eltMask);
|
|
|
|
}
|
|
|
|
|
|
|
|
// If the result mask is equal to one of the original shuffle masks,
|
2013-05-01 08:25:27 +08:00
|
|
|
// or is a splat, do the replacement.
|
|
|
|
if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
|
2011-10-22 03:06:29 +08:00
|
|
|
SmallVector<Constant*, 16> Elts;
|
|
|
|
for (unsigned i = 0, e = newMask.size(); i != e; ++i) {
|
|
|
|
if (newMask[i] < 0) {
|
|
|
|
Elts.push_back(UndefValue::get(Int32Ty));
|
|
|
|
} else {
|
|
|
|
Elts.push_back(ConstantInt::get(Int32Ty, newMask[i]));
|
|
|
|
}
|
2010-08-13 08:17:53 +08:00
|
|
|
}
|
2014-04-25 13:29:35 +08:00
|
|
|
if (!newRHS)
|
2011-10-22 03:06:29 +08:00
|
|
|
newRHS = UndefValue::get(newLHS->getType());
|
|
|
|
return new ShuffleVectorInst(newLHS, newRHS, ConstantVector::get(Elts));
|
2010-08-13 08:17:53 +08:00
|
|
|
}
|
2010-10-30 06:20:43 +08:00
|
|
|
|
2014-04-25 13:29:35 +08:00
|
|
|
return MadeChange ? &SVI : nullptr;
|
2010-01-05 13:36:20 +08:00
|
|
|
}
|