[InstCombine] Handle vector gep with scalar argument in evaluateInDifferentElementOrder

Summary:
This fixes PR41270.

The recursive function evaluateInDifferentElementOrder expects to be called
on a vector Value, so when we call it on a vector GEP's arguments, we must
first check that the argument is indeed a vector.

Reviewers: reames, spatel

Reviewed By: spatel

Subscribers: llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D60058

llvm-svn: 357389
This commit is contained in:
Mikael Holmen 2019-04-01 14:10:10 +00:00
parent ebf90db084
commit 150a7ec2dc
2 changed files with 24 additions and 1 deletions

View File

@ -1171,7 +1171,14 @@ static Value *evaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
SmallVector<Value*, 8> NewOps;
bool NeedsRebuild = (Mask.size() != I->getType()->getVectorNumElements());
for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
Value *V = evaluateInDifferentElementOrder(I->getOperand(i), Mask);
Value *V;
// Recursively call evaluateInDifferentElementOrder on vector arguments
// as well. E.g. GetElementPtr may have scalar operands even if the
// return value is a vector, so we need to examine the operand type.
if (I->getOperand(i)->getType()->isVectorTy())
V = evaluateInDifferentElementOrder(I->getOperand(i), Mask);
else
V = I->getOperand(i);
NewOps.push_back(V);
NeedsRebuild |= (V != I->getOperand(i));
}

View File

@ -0,0 +1,16 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -instcombine -S < %s | FileCheck %s
define <4 x i16*> @PR41270([4 x i16]* %x) {
; CHECK-LABEL: @PR41270(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x [4 x i16]*> undef, [4 x i16]* [[X:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [4 x i16], <4 x [4 x i16]*> [[TMP1]], i64 0, i64 3
; CHECK-NEXT: ret <4 x i16*> [[TMP2]]
;
%ins = insertelement <4 x [4 x i16]*> undef, [4 x i16]* %x, i32 0
%splat = shufflevector <4 x [4 x i16]*> %ins, <4 x [4 x i16]*> undef, <4 x i32> zeroinitializer
%t2 = getelementptr inbounds [4 x i16], <4 x [4 x i16]*> %splat, i32 0, i32 3
%t3 = extractelement <4 x i16*> %t2, i32 3
%ins2 = insertelement <4 x i16*> undef, i16* %t3, i32 0
ret <4 x i16*> %ins2
}