[RS4GC] Be slightly less conservative for gep vector_base, scalar_idx

After submitting https://reviews.llvm.org/D57138, I realized it was slightly more conservative than needed. The scalar indices don't appear to be a problem on a vector gep, we even had a test for that.

Differential Revision: https://reviews.llvm.org/D57161

llvm-svn: 352061
This commit is contained in:
Philip Reames 2019-01-24 16:34:00 +00:00
parent 68686a9f7a
commit 4d683ee7e3
2 changed files with 11 additions and 12 deletions

View File

@ -2611,21 +2611,20 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT,
continue;
unsigned VF = 0;
bool HasScalarOperand = false;
for (unsigned i = 0; i < I.getNumOperands(); i++)
if (I.getOperand(i)->getType()->isVectorTy())
if (I.getOperand(i)->getType()->isVectorTy()) {
assert(VF == 0 ||
VF == I.getOperand(i)->getType()->getVectorNumElements());
VF = I.getOperand(i)->getType()->getVectorNumElements();
else
HasScalarOperand = true;
}
if (HasScalarOperand && VF != 0) {
// It's the vector to scalar traversal through the pointer operand which
// confuses base pointer rewriting, so limit ourselves to that case.
if (!I.getOperand(0)->getType()->isVectorTy() && VF != 0) {
IRBuilder<> B(&I);
for (unsigned i = 0; i < I.getNumOperands(); i++)
if (!I.getOperand(i)->getType()->isVectorTy()) {
auto *Splat = B.CreateVectorSplat(VF, I.getOperand(i));
I.setOperand(i, Splat);
MadeChange = true;
}
auto *Splat = B.CreateVectorSplat(VF, I.getOperand(0));
I.setOperand(0, Splat);
MadeChange = true;
}
}

View File

@ -253,7 +253,7 @@ define void @test11(<4 x i64 addrspace(1)*> %vec1) gc "statepoint-example" {
; CHECK: @llvm.experimental.gc.statepoint.p0f_isVoidf{{.*}}<4 x i64 addrspace(1)*> %vec1)
; CHECK: %vec1.relocated = call coldcc <4 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v4p1i8
; CHECK: %vec1.relocated.casted = bitcast <4 x i8 addrspace(1)*> %vec1.relocated to <4 x i64 addrspace(1)*>
; CHECK: %vec2.remat = getelementptr i64, <4 x i64 addrspace(1)*> %vec1.relocated.casted, <4 x i32> <i32 1024, i32 1024, i32 1024, i32 1024>
; CHECK: %vec2.remat = getelementptr i64, <4 x i64 addrspace(1)*> %vec1.relocated.casted, i32 1024
; CHECK: call void @use_vec(<4 x i64 addrspace(1)*> %vec2.remat)
entry:
%vec2 = getelementptr i64, <4 x i64 addrspace(1)*> %vec1, i32 1024