forked from OSchip/llvm-project
Reapply: [SVE] Fix bug in simplification of scalable vector instructions
This reverts commita05441038a
, reapplying commit31574d38ac
This commit is contained in:
parent
9986b88e64
commit
b03f3fbd6a
|
@ -1060,13 +1060,13 @@ public:
|
|||
Ptr->getType()->getPointerAddressSpace());
|
||||
// Vector GEP
|
||||
if (Ptr->getType()->isVectorTy()) {
|
||||
unsigned NumElem = Ptr->getType()->getVectorNumElements();
|
||||
return VectorType::get(PtrTy, NumElem);
|
||||
ElementCount EltCount = Ptr->getType()->getVectorElementCount();
|
||||
return VectorType::get(PtrTy, EltCount);
|
||||
}
|
||||
for (Value *Index : IdxList)
|
||||
if (Index->getType()->isVectorTy()) {
|
||||
unsigned NumElem = Index->getType()->getVectorNumElements();
|
||||
return VectorType::get(PtrTy, NumElem);
|
||||
ElementCount EltCount = Index->getType()->getVectorElementCount();
|
||||
return VectorType::get(PtrTy, EltCount);
|
||||
}
|
||||
// Scalar GEP
|
||||
return PtrTy;
|
||||
|
|
|
@ -4074,9 +4074,9 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
|
|||
Type *LastType = GetElementPtrInst::getIndexedType(SrcTy, Ops.slice(1));
|
||||
Type *GEPTy = PointerType::get(LastType, AS);
|
||||
if (VectorType *VT = dyn_cast<VectorType>(Ops[0]->getType()))
|
||||
GEPTy = VectorType::get(GEPTy, VT->getNumElements());
|
||||
GEPTy = VectorType::get(GEPTy, VT->getElementCount());
|
||||
else if (VectorType *VT = dyn_cast<VectorType>(Ops[1]->getType()))
|
||||
GEPTy = VectorType::get(GEPTy, VT->getNumElements());
|
||||
GEPTy = VectorType::get(GEPTy, VT->getElementCount());
|
||||
|
||||
if (isa<UndefValue>(Ops[0]))
|
||||
return UndefValue::get(GEPTy);
|
||||
|
@ -4445,52 +4445,66 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
|
|||
return UndefValue::get(RetTy);
|
||||
|
||||
Type *InVecTy = Op0->getType();
|
||||
unsigned MaskNumElts = Mask->getType()->getVectorNumElements();
|
||||
unsigned InVecNumElts = InVecTy->getVectorNumElements();
|
||||
ElementCount MaskEltCount = Mask->getType()->getVectorElementCount();
|
||||
ElementCount InVecEltCount = InVecTy->getVectorElementCount();
|
||||
|
||||
assert(MaskEltCount.Scalable == InVecEltCount.Scalable &&
|
||||
"vscale mismatch between input vector and mask");
|
||||
|
||||
bool Scalable = MaskEltCount.Scalable;
|
||||
|
||||
SmallVector<int, 32> Indices;
|
||||
ShuffleVectorInst::getShuffleMask(Mask, Indices);
|
||||
assert(MaskNumElts == Indices.size() &&
|
||||
"Size of Indices not same as number of mask elements?");
|
||||
|
||||
// Canonicalization: If mask does not select elements from an input vector,
|
||||
// replace that input vector with undef.
|
||||
bool MaskSelects0 = false, MaskSelects1 = false;
|
||||
for (unsigned i = 0; i != MaskNumElts; ++i) {
|
||||
if (Indices[i] == -1)
|
||||
continue;
|
||||
if ((unsigned)Indices[i] < InVecNumElts)
|
||||
MaskSelects0 = true;
|
||||
else
|
||||
MaskSelects1 = true;
|
||||
if (!Scalable) {
|
||||
ShuffleVectorInst::getShuffleMask(Mask, Indices);
|
||||
assert(MaskEltCount.Min == Indices.size() &&
|
||||
"Size of Indices not same as number of mask elements?");
|
||||
}
|
||||
|
||||
if (!Scalable) {
|
||||
// Canonicalization: If mask does not select elements from an input vector,
|
||||
// replace that input vector with undef.
|
||||
bool MaskSelects0 = false, MaskSelects1 = false;
|
||||
for (unsigned i = 0; i != MaskEltCount.Min; ++i) {
|
||||
if (Indices[i] == -1)
|
||||
continue;
|
||||
if ((unsigned)Indices[i] < InVecEltCount.Min)
|
||||
MaskSelects0 = true;
|
||||
else
|
||||
MaskSelects1 = true;
|
||||
}
|
||||
if (!MaskSelects0)
|
||||
Op0 = UndefValue::get(InVecTy);
|
||||
if (!MaskSelects1)
|
||||
Op1 = UndefValue::get(InVecTy);
|
||||
}
|
||||
if (!MaskSelects0)
|
||||
Op0 = UndefValue::get(InVecTy);
|
||||
if (!MaskSelects1)
|
||||
Op1 = UndefValue::get(InVecTy);
|
||||
|
||||
auto *Op0Const = dyn_cast<Constant>(Op0);
|
||||
auto *Op1Const = dyn_cast<Constant>(Op1);
|
||||
|
||||
// If all operands are constant, constant fold the shuffle.
|
||||
if (Op0Const && Op1Const)
|
||||
// If all operands are constant, constant fold the shuffle. This
|
||||
// transformation depends on the value of the mask which is not known at
|
||||
// compile time for scalable vectors
|
||||
if (!Scalable && Op0Const && Op1Const)
|
||||
return ConstantFoldShuffleVectorInstruction(Op0Const, Op1Const, Mask);
|
||||
|
||||
// Canonicalization: if only one input vector is constant, it shall be the
|
||||
// second one.
|
||||
if (Op0Const && !Op1Const) {
|
||||
// second one. This transformation depends on the value of the mask which
|
||||
// is not known at compile time for scalable vectors
|
||||
if (!Scalable && Op0Const && !Op1Const) {
|
||||
std::swap(Op0, Op1);
|
||||
ShuffleVectorInst::commuteShuffleMask(Indices, InVecNumElts);
|
||||
ShuffleVectorInst::commuteShuffleMask(Indices, InVecEltCount.Min);
|
||||
}
|
||||
|
||||
// A splat of an inserted scalar constant becomes a vector constant:
|
||||
// shuf (inselt ?, C, IndexC), undef, <IndexC, IndexC...> --> <C, C...>
|
||||
// NOTE: We may have commuted above, so analyze the updated Indices, not the
|
||||
// original mask constant.
|
||||
// NOTE: This transformation depends on the value of the mask which is not
|
||||
// known at compile time for scalable vectors
|
||||
Constant *C;
|
||||
ConstantInt *IndexC;
|
||||
if (match(Op0, m_InsertElement(m_Value(), m_Constant(C),
|
||||
m_ConstantInt(IndexC)))) {
|
||||
if (!Scalable && match(Op0, m_InsertElement(m_Value(), m_Constant(C),
|
||||
m_ConstantInt(IndexC)))) {
|
||||
// Match a splat shuffle mask of the insert index allowing undef elements.
|
||||
int InsertIndex = IndexC->getZExtValue();
|
||||
if (all_of(Indices, [InsertIndex](int MaskElt) {
|
||||
|
@ -4499,8 +4513,8 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
|
|||
assert(isa<UndefValue>(Op1) && "Expected undef operand 1 for splat");
|
||||
|
||||
// Shuffle mask undefs become undefined constant result elements.
|
||||
SmallVector<Constant *, 16> VecC(MaskNumElts, C);
|
||||
for (unsigned i = 0; i != MaskNumElts; ++i)
|
||||
SmallVector<Constant *, 16> VecC(MaskEltCount.Min, C);
|
||||
for (unsigned i = 0; i != MaskEltCount.Min; ++i)
|
||||
if (Indices[i] == -1)
|
||||
VecC[i] = UndefValue::get(C->getType());
|
||||
return ConstantVector::get(VecC);
|
||||
|
@ -4514,6 +4528,11 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
|
|||
OpShuf->getMask()->getSplatValue())
|
||||
return Op0;
|
||||
|
||||
// All remaining transformation depend on the value of the mask, which is
|
||||
// not known at compile time for scalable vectors.
|
||||
if (Scalable)
|
||||
return nullptr;
|
||||
|
||||
// Don't fold a shuffle with undef mask elements. This may get folded in a
|
||||
// better way using demanded bits or other analysis.
|
||||
// TODO: Should we allow this?
|
||||
|
@ -4525,7 +4544,7 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
|
|||
// shuffle. This handles simple identity shuffles as well as chains of
|
||||
// shuffles that may widen/narrow and/or move elements across lanes and back.
|
||||
Value *RootVec = nullptr;
|
||||
for (unsigned i = 0; i != MaskNumElts; ++i) {
|
||||
for (unsigned i = 0; i != MaskEltCount.Min; ++i) {
|
||||
// Note that recursion is limited for each vector element, so if any element
|
||||
// exceeds the limit, this will fail to simplify.
|
||||
RootVec =
|
||||
|
|
|
@ -166,6 +166,11 @@ static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) {
|
|||
static bool getShuffleDemandedElts(const ShuffleVectorInst *Shuf,
|
||||
const APInt &DemandedElts,
|
||||
APInt &DemandedLHS, APInt &DemandedRHS) {
|
||||
// The length of scalable vectors is unknown at compile time, thus we
|
||||
// cannot check their values
|
||||
if (Shuf->getMask()->getType()->getVectorElementCount().Scalable)
|
||||
return false;
|
||||
|
||||
int NumElts = Shuf->getOperand(0)->getType()->getVectorNumElements();
|
||||
int NumMaskElts = Shuf->getMask()->getType()->getVectorNumElements();
|
||||
DemandedLHS = DemandedRHS = APInt::getNullValue(NumElts);
|
||||
|
|
|
@ -7224,8 +7224,8 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
|
|||
bool AteExtraComma = false;
|
||||
// GEP returns a vector of pointers if at least one of parameters is a vector.
|
||||
// All vector parameters should have the same vector width.
|
||||
unsigned GEPWidth = BaseType->isVectorTy() ?
|
||||
BaseType->getVectorNumElements() : 0;
|
||||
ElementCount GEPWidth = BaseType->isVectorTy() ?
|
||||
BaseType->getVectorElementCount() : ElementCount(0, false);
|
||||
|
||||
while (EatIfPresent(lltok::comma)) {
|
||||
if (Lex.getKind() == lltok::MetadataVar) {
|
||||
|
@ -7237,8 +7237,8 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
|
|||
return Error(EltLoc, "getelementptr index must be an integer");
|
||||
|
||||
if (Val->getType()->isVectorTy()) {
|
||||
unsigned ValNumEl = Val->getType()->getVectorNumElements();
|
||||
if (GEPWidth && GEPWidth != ValNumEl)
|
||||
ElementCount ValNumEl = Val->getType()->getVectorElementCount();
|
||||
if (GEPWidth != ElementCount(0, false) && GEPWidth != ValNumEl)
|
||||
return Error(EltLoc,
|
||||
"getelementptr vector index has a wrong number of elements");
|
||||
GEPWidth = ValNumEl;
|
||||
|
|
|
@ -863,12 +863,12 @@ Constant *llvm::ConstantFoldInsertElementInstruction(Constant *Val,
|
|||
Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1,
|
||||
Constant *V2,
|
||||
Constant *Mask) {
|
||||
unsigned MaskNumElts = Mask->getType()->getVectorNumElements();
|
||||
ElementCount MaskEltCount = Mask->getType()->getVectorElementCount();
|
||||
Type *EltTy = V1->getType()->getVectorElementType();
|
||||
|
||||
// Undefined shuffle mask -> undefined value.
|
||||
if (isa<UndefValue>(Mask))
|
||||
return UndefValue::get(VectorType::get(EltTy, MaskNumElts));
|
||||
return UndefValue::get(VectorType::get(EltTy, MaskEltCount));
|
||||
|
||||
// Don't break the bitcode reader hack.
|
||||
if (isa<ConstantExpr>(Mask)) return nullptr;
|
||||
|
@ -879,6 +879,7 @@ Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1,
|
|||
if (ValTy->isScalable())
|
||||
return nullptr;
|
||||
|
||||
unsigned MaskNumElts = MaskEltCount.Min;
|
||||
unsigned SrcNumElts = V1->getType()->getVectorNumElements();
|
||||
|
||||
// Loop over the shuffle mask, evaluating each element.
|
||||
|
|
|
@ -1887,6 +1887,8 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2,
|
|||
|
||||
int ShuffleVectorInst::getMaskValue(const Constant *Mask, unsigned i) {
|
||||
assert(i < Mask->getType()->getVectorNumElements() && "Index out of range");
|
||||
assert(!Mask->getType()->getVectorElementCount().Scalable &&
|
||||
"Length of scalable vectors unknown at compile time");
|
||||
if (auto *CDS = dyn_cast<ConstantDataSequential>(Mask))
|
||||
return CDS->getElementAsInteger(i);
|
||||
Constant *C = Mask->getAggregateElement(i);
|
||||
|
@ -1897,6 +1899,8 @@ int ShuffleVectorInst::getMaskValue(const Constant *Mask, unsigned i) {
|
|||
|
||||
void ShuffleVectorInst::getShuffleMask(const Constant *Mask,
|
||||
SmallVectorImpl<int> &Result) {
|
||||
assert(!Mask->getType()->getVectorElementCount().Scalable &&
|
||||
"Length of scalable vectors unknown at compile time");
|
||||
unsigned NumElts = Mask->getType()->getVectorNumElements();
|
||||
|
||||
if (auto *CDS = dyn_cast<ConstantDataSequential>(Mask)) {
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
; RUN: opt -early-cse -S < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "aarch64"
|
||||
|
||||
; CHECK-LABEL: define <4 x i32*> @fixed_length_version_first() {
|
||||
; CHECK-NEXT: ret <4 x i32*> undef
|
||||
define <4 x i32*> @fixed_length_version_first() {
|
||||
%ptr = getelementptr i32, <4 x i32*> undef, <4 x i64> undef
|
||||
ret <4 x i32*> %ptr
|
||||
}
|
||||
|
||||
; CHECK-LABEL: define <4 x <4 x i32>*> @fixed_length_version_second() {
|
||||
; CHECK-NEXT: ret <4 x <4 x i32>*> undef
|
||||
define <4 x <4 x i32>*> @fixed_length_version_second() {
|
||||
%ptr = getelementptr <4 x i32>, <4 x i32>* undef, <4 x i64> undef
|
||||
ret <4 x <4 x i32>*> %ptr
|
||||
}
|
||||
|
||||
; CHECK-LABEL: define <vscale x 4 x i32*> @vscale_version_first() {
|
||||
; CHECK-NEXT: ret <vscale x 4 x i32*> undef
|
||||
define <vscale x 4 x i32*> @vscale_version_first() {
|
||||
%ptr = getelementptr i32, <vscale x 4 x i32*> undef, <vscale x 4 x i64> undef
|
||||
ret <vscale x 4 x i32*> %ptr
|
||||
}
|
||||
|
||||
; CHECK-LABEL: define <vscale x 4 x <vscale x 4 x i32>*> @vscale_version_second() {
|
||||
; CHECK-NEXT: ret <vscale x 4 x <vscale x 4 x i32>*> undef
|
||||
define <vscale x 4 x <vscale x 4 x i32>*> @vscale_version_second() {
|
||||
%ptr = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* undef, <vscale x 4 x i64> undef
|
||||
ret <vscale x 4 x <vscale x 4 x i32>*> %ptr
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
; RUN: opt -early-cse -S < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "aarch64"
|
||||
|
||||
; This test checks that SimplifyInstruction does not blow up in the face of
|
||||
; a scalable shufflevector. vscale is a constant value known only at runtime.
|
||||
; Therefore, it is not possible to know the concrete value of, or the length
|
||||
; of the mask at compile time. Simplifications that depend on the value
|
||||
; of the mask cannot be performed.
|
||||
|
||||
; Given the fact that the value of the mask is unknown at compile time for
|
||||
; scalable vectors, very few simplifications will be done. Here, we want to
|
||||
; see that the instruction can be passed to SimplifyInstruction and not crash
|
||||
; the compiler. It happens to be the case that this will be the result.
|
||||
|
||||
; CHECK-LABEL: define <vscale x 8 x i1> @vscale_version()
|
||||
; CHECK-NEXT: %splatter = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
|
||||
; CHECK-NEXT: %foo = shufflevector <vscale x 8 x i1> %splatter, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
|
||||
; CHECK-NEXT: ret <vscale x 8 x i1> %foo
|
||||
|
||||
define <vscale x 8 x i1> @vscale_version() {
|
||||
%splatter = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
|
||||
%foo = shufflevector <vscale x 8 x i1> %splatter,
|
||||
<vscale x 8 x i1> undef,
|
||||
<vscale x 8 x i32> zeroinitializer
|
||||
ret <vscale x 8 x i1> %foo
|
||||
}
|
||||
|
||||
; The non-scalable version should be optimized as normal.
|
||||
|
||||
; CHECK-LABEL: define <8 x i1> @fixed_length_version() {
|
||||
; CHECK-NEXT: ret <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
|
||||
define <8 x i1> @fixed_length_version() {
|
||||
%splatter = insertelement <8 x i1> undef, i1 true, i32 0
|
||||
%foo = shufflevector <8 x i1> %splatter,
|
||||
<8 x i1> undef,
|
||||
<8 x i32> zeroinitializer
|
||||
ret <8 x i1> %foo
|
||||
}
|
||||
|
Loading…
Reference in New Issue