forked from OSchip/llvm-project
[LAA] Add recursive IR walker for forked pointers
This builds on the previous forked pointers patch, which only accepted a single select as the pointer to check. A recursive function to walk through IR has been added, which searches for either a loop-invariant or addrec SCEV. This will only handle a single fork at present, so selects of selects or a GEP with a select for both the base and offset will be rejected. There is also a recursion limit with a cli option to change it. Reviewed By: fhahn, david-arm Differential Revision: https://reviews.llvm.org/D108699
This commit is contained in:
parent
ca2e3ffbc1
commit
db8fcb2c25
|
@ -130,6 +130,11 @@ static cl::opt<bool> EnableForwardingConflictDetection(
|
|||
cl::desc("Enable conflict detection in loop-access analysis"),
|
||||
cl::init(true));
|
||||
|
||||
static cl::opt<unsigned> MaxForkedSCEVDepth(
|
||||
"max-forked-scev-depth", cl::Hidden,
|
||||
cl::desc("Maximum recursion depth when finding forked SCEVs (default = 5)"),
|
||||
cl::init(5));
|
||||
|
||||
bool VectorizerParams::isInterleaveForced() {
|
||||
return ::VectorizationInterleave.getNumOccurrences() > 0;
|
||||
}
|
||||
|
@ -778,6 +783,142 @@ static void visitPointers(Value *StartPtr, const Loop &InnermostLoop,
|
|||
}
|
||||
}
|
||||
|
||||
// Walk back through the IR for a pointer, looking for a select like the
|
||||
// following:
|
||||
//
|
||||
// %offset = select i1 %cmp, i64 %a, i64 %b
|
||||
// %addr = getelementptr double, double* %base, i64 %offset
|
||||
// %ld = load double, double* %addr, align 8
|
||||
//
|
||||
// We won't be able to form a single SCEVAddRecExpr from this since the
|
||||
// address for each loop iteration depends on %cmp. We could potentially
|
||||
// produce multiple valid SCEVAddRecExprs, though, and check all of them for
|
||||
// memory safety/aliasing if needed.
|
||||
//
|
||||
// If we encounter some IR we don't yet handle, or something obviously fine
|
||||
// like a constant, then we just add the SCEV for that term to the list passed
|
||||
// in by the caller. If we have a node that may potentially yield a valid
|
||||
// SCEVAddRecExpr then we decompose it into parts and build the SCEV terms
|
||||
// ourselves before adding to the list.
|
||||
static void
|
||||
findForkedSCEVs(ScalarEvolution *SE, const Loop *L, Value *Ptr,
|
||||
SmallVectorImpl<std::pair<const SCEV *, bool>> &ScevList,
|
||||
unsigned Depth) {
|
||||
// If our Value is a SCEVAddRecExpr, loop invariant, not an instruction, or
|
||||
// we've exceeded our limit on recursion, just return whatever we have
|
||||
// regardless of whether it can be used for a forked pointer or not, along
|
||||
// with an indication of whether it might be a poison or undef value.
|
||||
const SCEV *Scev = SE->getSCEV(Ptr);
|
||||
if (isa<SCEVAddRecExpr>(Scev) || L->isLoopInvariant(Ptr) ||
|
||||
!isa<Instruction>(Ptr) || Depth == 0) {
|
||||
ScevList.push_back(
|
||||
std::make_pair(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr)));
|
||||
return;
|
||||
}
|
||||
|
||||
Depth--;
|
||||
|
||||
auto UndefPoisonCheck = [](std::pair<const SCEV *, bool> S) -> bool {
|
||||
return S.second;
|
||||
};
|
||||
|
||||
Instruction *I = cast<Instruction>(Ptr);
|
||||
unsigned Opcode = I->getOpcode();
|
||||
switch (Opcode) {
|
||||
case Instruction::GetElementPtr: {
|
||||
GetElementPtrInst *GEP = cast<GetElementPtrInst>(I);
|
||||
Type *SourceTy = GEP->getSourceElementType();
|
||||
// We only handle base + single offset GEPs here for now.
|
||||
// Not dealing with preexisting gathers yet, so no vectors.
|
||||
if (I->getNumOperands() != 2 || SourceTy->isVectorTy()) {
|
||||
ScevList.push_back(
|
||||
std::make_pair(Scev, !isGuaranteedNotToBeUndefOrPoison(GEP)));
|
||||
break;
|
||||
}
|
||||
SmallVector<std::pair<const SCEV *, bool>, 2> BaseScevs;
|
||||
SmallVector<std::pair<const SCEV *, bool>, 2> OffsetScevs;
|
||||
findForkedSCEVs(SE, L, I->getOperand(0), BaseScevs, Depth);
|
||||
findForkedSCEVs(SE, L, I->getOperand(1), OffsetScevs, Depth);
|
||||
|
||||
// See if we need to freeze our fork...
|
||||
bool NeedsFreeze = any_of(BaseScevs, UndefPoisonCheck) ||
|
||||
any_of(OffsetScevs, UndefPoisonCheck);
|
||||
|
||||
// Check that we only have a single fork, on either the base or the offset.
|
||||
// Copy the SCEV across for the one without a fork in order to generate
|
||||
// the full SCEV for both sides of the GEP.
|
||||
if (OffsetScevs.size() == 2 && BaseScevs.size() == 1)
|
||||
BaseScevs.push_back(BaseScevs[0]);
|
||||
else if (BaseScevs.size() == 2 && OffsetScevs.size() == 1)
|
||||
OffsetScevs.push_back(OffsetScevs[0]);
|
||||
else {
|
||||
ScevList.push_back(std::make_pair(Scev, NeedsFreeze));
|
||||
break;
|
||||
}
|
||||
|
||||
// Find the pointer type we need to extend to.
|
||||
Type *IntPtrTy = SE->getEffectiveSCEVType(
|
||||
SE->getSCEV(GEP->getPointerOperand())->getType());
|
||||
|
||||
// Find the size of the type being pointed to. We only have a single
|
||||
// index term (guarded above) so we don't need to index into arrays or
|
||||
// structures, just get the size of the scalar value.
|
||||
const SCEV *Size = SE->getSizeOfExpr(IntPtrTy, SourceTy);
|
||||
|
||||
// Scale up the offsets by the size of the type, then add to the bases.
|
||||
const SCEV *Scaled1 = SE->getMulExpr(
|
||||
Size, SE->getTruncateOrSignExtend(OffsetScevs[0].first, IntPtrTy));
|
||||
const SCEV *Scaled2 = SE->getMulExpr(
|
||||
Size, SE->getTruncateOrSignExtend(OffsetScevs[1].first, IntPtrTy));
|
||||
ScevList.push_back(std::make_pair(
|
||||
SE->getAddExpr(BaseScevs[0].first, Scaled1), NeedsFreeze));
|
||||
ScevList.push_back(std::make_pair(
|
||||
SE->getAddExpr(BaseScevs[1].first, Scaled2), NeedsFreeze));
|
||||
break;
|
||||
}
|
||||
case Instruction::Select: {
|
||||
SmallVector<std::pair<const SCEV *, bool>, 2> ChildScevs;
|
||||
// A select means we've found a forked pointer, but we currently only
|
||||
// support a single select per pointer so if there's another behind this
|
||||
// then we just bail out and return the generic SCEV.
|
||||
findForkedSCEVs(SE, L, I->getOperand(1), ChildScevs, Depth);
|
||||
findForkedSCEVs(SE, L, I->getOperand(2), ChildScevs, Depth);
|
||||
if (ChildScevs.size() == 2) {
|
||||
ScevList.push_back(ChildScevs[0]);
|
||||
ScevList.push_back(ChildScevs[1]);
|
||||
} else
|
||||
ScevList.push_back(
|
||||
std::make_pair(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr)));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
// Just return the current SCEV if we haven't handled the instruction yet.
|
||||
LLVM_DEBUG(dbgs() << "ForkedPtr unhandled instruction: " << *I << "\n");
|
||||
ScevList.push_back(
|
||||
std::make_pair(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr)));
|
||||
break;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
static SmallVector<std::pair<const SCEV *, bool>>
|
||||
findForkedPointer(PredicatedScalarEvolution &PSE,
|
||||
const ValueToValueMap &StridesMap, Value *Ptr,
|
||||
const Loop *L) {
|
||||
ScalarEvolution *SE = PSE.getSE();
|
||||
assert(SE->isSCEVable(Ptr->getType()) && "Value is not SCEVable!");
|
||||
SmallVector<std::pair<const SCEV *, bool>, 2> Scevs;
|
||||
findForkedSCEVs(SE, L, Ptr, Scevs, MaxForkedSCEVDepth);
|
||||
|
||||
// For now, we will only accept a forked pointer with two possible SCEVs.
|
||||
if (Scevs.size() == 2)
|
||||
return Scevs;
|
||||
|
||||
return {
|
||||
std::make_pair(replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr), false)};
|
||||
}
|
||||
|
||||
bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
|
||||
MemAccessInfo Access, Type *AccessTy,
|
||||
const ValueToValueMap &StridesMap,
|
||||
|
@ -787,19 +928,8 @@ bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
|
|||
bool Assume) {
|
||||
Value *Ptr = Access.getPointer();
|
||||
|
||||
ScalarEvolution &SE = *PSE.getSE();
|
||||
SmallVector<std::pair<const SCEV *, bool>> TranslatedPtrs;
|
||||
auto *SI = dyn_cast<SelectInst>(Ptr);
|
||||
// Look through selects in the current loop.
|
||||
if (SI && !TheLoop->isLoopInvariant(SI)) {
|
||||
TranslatedPtrs = {
|
||||
std::make_pair(SE.getSCEV(SI->getOperand(1)),
|
||||
!isGuaranteedNotToBeUndefOrPoison(SI->getOperand(1))),
|
||||
std::make_pair(SE.getSCEV(SI->getOperand(2)),
|
||||
!isGuaranteedNotToBeUndefOrPoison(SI->getOperand(2)))};
|
||||
} else
|
||||
TranslatedPtrs = {
|
||||
std::make_pair(replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr), false)};
|
||||
SmallVector<std::pair<const SCEV *, bool>> TranslatedPtrs =
|
||||
findForkedPointer(PSE, StridesMap, Ptr, TheLoop);
|
||||
|
||||
for (auto &P : TranslatedPtrs) {
|
||||
const SCEV *PtrExpr = P.first;
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: opt -disable-output -passes='print-access-info' %s 2>&1 | FileCheck %s
|
||||
; RUN: opt -disable-output -passes='print-access-info' -max-forked-scev-depth=2 %s 2>&1 | FileCheck -check-prefix=RECURSE %s
|
||||
|
||||
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
||||
|
||||
|
@ -59,17 +60,59 @@ exit:
|
|||
}
|
||||
|
||||
; CHECK-LABEL: function 'forked_ptrs_different_base_same_offset':
|
||||
; CHECK-NEXT: for.body:
|
||||
; CHECK-NEXT: Report: cannot identify array bounds
|
||||
; CHECK-NEXT: Dependences:
|
||||
; CHECK-NEXT: Run-time memory checks:
|
||||
; CHECK-NEXT: Grouped accesses:
|
||||
; CHECK-NEXT: for.body:
|
||||
; CHECK-NEXT: Memory dependences are safe with run-time checks
|
||||
; CHECK-NEXT: Dependences:
|
||||
; CHECK-NEXT: Run-time memory checks:
|
||||
; CHECK-NEXT: Check 0:
|
||||
; CHECK-NEXT: Comparing group ([[G1:.+]]):
|
||||
; CHECK-NEXT: %1 = getelementptr inbounds float, ptr %Dest, i64 %indvars.iv
|
||||
; CHECK-NEXT: Against group ([[G2:.+]]):
|
||||
; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %Preds, i64 %indvars.iv
|
||||
; CHECK-NEXT: Check 1:
|
||||
; CHECK-NEXT: Comparing group ([[G1]]):
|
||||
; CHECK-NEXT: %1 = getelementptr inbounds float, ptr %Dest, i64 %indvars.iv
|
||||
; CHECK-NEXT: Against group ([[G3:.+]]):
|
||||
; CHECK-NEXT: %.sink.in = getelementptr inbounds float, ptr %spec.select, i64 %indvars.iv
|
||||
; CHECK-NEXT: Check 2:
|
||||
; CHECK-NEXT: Comparing group ([[G1]]):
|
||||
; CHECK-NEXT: %1 = getelementptr inbounds float, ptr %Dest, i64 %indvars.iv
|
||||
; CHECK-NEXT: Against group ([[G4:.+]]):
|
||||
; CHECK-NEXT: %.sink.in = getelementptr inbounds float, ptr %spec.select, i64 %indvars.iv
|
||||
; CHECK-NEXT: Grouped accesses:
|
||||
; CHECK-NEXT: Group [[G1]]:
|
||||
; CHECK-NEXT: (Low: %Dest High: (400 + %Dest))
|
||||
; CHECK-NEXT: Member: {%Dest,+,4}<nuw><%for.body>
|
||||
; CHECK-NEXT: Group [[G2]]:
|
||||
; CHECK-NEXT: (Low: %Preds High: (400 + %Preds))
|
||||
; CHECK-NEXT: Member: {%Preds,+,4}<nuw><%for.body>
|
||||
; CHECK-NEXT: Group [[G3]]:
|
||||
; CHECK-NEXT: (Low: %Base2 High: (400 + %Base2))
|
||||
; CHECK-NEXT: Member: {%Base2,+,4}<nw><%for.body>
|
||||
; CHECK-NEXT: Group [[G4]]:
|
||||
; CHECK-NEXT: (Low: %Base1 High: (400 + %Base1))
|
||||
; CHECK-NEXT: Member: {%Base1,+,4}<nw><%for.body>
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
|
||||
; CHECK-NEXT: SCEV assumptions:
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: Expressions re-written:
|
||||
|
||||
;; We have a limit on the recursion depth for finding a loop invariant or
|
||||
;; addrec term; confirm we won't exceed that depth by forcing a lower
|
||||
;; limit via -max-forked-scev-depth=2
|
||||
; RECURSE-LABEL: Loop access info in function 'forked_ptrs_same_base_different_offset':
|
||||
; RECURSE-NEXT: for.body:
|
||||
; RECURSE-NEXT: Report: cannot identify array bounds
|
||||
; RECURSE-NEXT: Dependences:
|
||||
; RECURSE-NEXT: Run-time memory checks:
|
||||
; RECURSE-NEXT: Grouped accesses:
|
||||
; RECURSE-EMPTY:
|
||||
; RECURSE-NEXT: Non vectorizable stores to invariant address were not found in loop.
|
||||
; RECURSE-NEXT: SCEV assumptions:
|
||||
; RECURSE-EMPTY:
|
||||
; RECURSE-NEXT: Expressions re-written:
|
||||
|
||||
;;;; Derived from the following C code
|
||||
;; void forked_ptrs_different_base_same_offset(float *A, float *B, float *C, int *D) {
|
||||
;; for (int i=0; i<100; i++) {
|
||||
|
@ -104,11 +147,38 @@ for.body:
|
|||
}
|
||||
|
||||
; CHECK-LABEL: function 'forked_ptrs_different_base_same_offset_possible_poison':
|
||||
; CHECK-NEXT: for.body:
|
||||
; CHECK-NEXT: Report: cannot identify array bounds
|
||||
; CHECK-NEXT: Dependences:
|
||||
; CHECK-NEXT: Run-time memory checks:
|
||||
; CHECK-NEXT: Grouped accesses:
|
||||
; CHECK-NEXT: for.body:
|
||||
; CHECK-NEXT: Memory dependences are safe with run-time checks
|
||||
; CHECK-NEXT: Dependences:
|
||||
; CHECK-NEXT: Run-time memory checks:
|
||||
; CHECK-NEXT: Check 0:
|
||||
; CHECK-NEXT: Comparing group ([[G1:.+]]):
|
||||
; CHECK-NEXT: %1 = getelementptr inbounds float, ptr %Dest, i64 %indvars.iv
|
||||
; CHECK-NEXT: Against group ([[G2:.+]]):
|
||||
; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %Preds, i64 %indvars.iv
|
||||
; CHECK-NEXT: Check 1:
|
||||
; CHECK-NEXT: Comparing group ([[G1]]):
|
||||
; CHECK-NEXT: %1 = getelementptr inbounds float, ptr %Dest, i64 %indvars.iv
|
||||
; CHECK-NEXT: Against group ([[G3:.+]]):
|
||||
; CHECK-NEXT: %.sink.in = getelementptr inbounds float, ptr %spec.select, i64 %indvars.iv
|
||||
; CHECK-NEXT: Check 2:
|
||||
; CHECK-NEXT: Comparing group ([[G1]]):
|
||||
; CHECK-NEXT: %1 = getelementptr inbounds float, ptr %Dest, i64 %indvars.iv
|
||||
; CHECK-NEXT: Against group ([[G4:.+]]):
|
||||
; CHECK-NEXT: %.sink.in = getelementptr inbounds float, ptr %spec.select, i64 %indvars.iv
|
||||
; CHECK-NEXT: Grouped accesses:
|
||||
; CHECK-NEXT: Group [[G1]]:
|
||||
; CHECK-NEXT: (Low: %Dest High: (400 + %Dest))
|
||||
; CHECK-NEXT: Member: {%Dest,+,4}<nw><%for.body>
|
||||
; CHECK-NEXT: Group [[G2]]:
|
||||
; CHECK-NEXT: (Low: %Preds High: (400 + %Preds))
|
||||
; CHECK-NEXT: Member: {%Preds,+,4}<nuw><%for.body>
|
||||
; CHECK-NEXT: Group [[G3]]:
|
||||
; CHECK-NEXT: (Low: %Base2 High: (400 + %Base2))
|
||||
; CHECK-NEXT: Member: {%Base2,+,4}<nw><%for.body>
|
||||
; CHECK-NEXT: Group [[G4]]:
|
||||
; CHECK-NEXT: (Low: %Base1 High: (400 + %Base1))
|
||||
; CHECK-NEXT: Member: {%Base1,+,4}<nw><%for.body>
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
|
||||
; CHECK-NEXT: SCEV assumptions:
|
||||
|
|
|
@ -17,22 +17,82 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
|||
define dso_local void @forked_ptrs_different_base_same_offset(float* nocapture readonly %Base1, float* nocapture readonly %Base2, float* nocapture %Dest, i32* nocapture readonly %Preds) {
|
||||
; CHECK-LABEL: @forked_ptrs_different_base_same_offset(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[BASE1_FR:%.*]] = freeze float* [[BASE1:%.*]]
|
||||
; CHECK-NEXT: [[BASE2_FR:%.*]] = freeze float* [[BASE2:%.*]]
|
||||
; CHECK-NEXT: [[DEST_FR:%.*]] = freeze float* [[DEST:%.*]]
|
||||
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
|
||||
; CHECK: vector.memcheck:
|
||||
; CHECK-NEXT: [[DEST1:%.*]] = ptrtoint float* [[DEST_FR]] to i64
|
||||
; CHECK-NEXT: [[PREDS2:%.*]] = ptrtoint i32* [[PREDS:%.*]] to i64
|
||||
; CHECK-NEXT: [[BASE23:%.*]] = ptrtoint float* [[BASE2_FR]] to i64
|
||||
; CHECK-NEXT: [[BASE15:%.*]] = ptrtoint float* [[BASE1_FR]] to i64
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[DEST1]], [[PREDS2]]
|
||||
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[DEST1]], [[BASE23]]
|
||||
; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 16
|
||||
; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[DEST1]], [[BASE15]]
|
||||
; CHECK-NEXT: [[DIFF_CHECK7:%.*]] = icmp ult i64 [[TMP2]], 16
|
||||
; CHECK-NEXT: [[CONFLICT_RDX8:%.*]] = or i1 [[CONFLICT_RDX]], [[DIFF_CHECK7]]
|
||||
; CHECK-NEXT: br i1 [[CONFLICT_RDX8]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float*> poison, float* [[BASE2_FR]], i64 0
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float*> [[BROADCAST_SPLATINSERT]], <4 x float*> poison, <4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <4 x float*> poison, float* [[BASE1_FR]], i64 0
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT10:%.*]] = shufflevector <4 x float*> [[BROADCAST_SPLATINSERT9]], <4 x float*> poison, <4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = or i64 [[INDEX]], 3
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[PREDS]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>*
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP7]], align 4
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], zeroinitializer
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP8]], <4 x float*> [[BROADCAST_SPLAT]], <4 x float*> [[BROADCAST_SPLAT10]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float*> [[TMP9]], i64 0
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, float* [[TMP10]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x float*> [[TMP9]], i64 1
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, float* [[TMP12]], i64 [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x float*> [[TMP9]], i64 2
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x float*> [[TMP9]], i64 3
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = load float, float* [[TMP11]], align 4
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = load float, float* [[TMP13]], align 4
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = load float, float* [[TMP15]], align 4
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = load float, float* [[TMP17]], align 4
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x float> poison, float [[TMP18]], i64 0
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x float> [[TMP22]], float [[TMP19]], i64 1
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x float> [[TMP23]], float [[TMP20]], i64 2
|
||||
; CHECK-NEXT: [[TMP25:%.*]] = insertelement <4 x float> [[TMP24]], float [[TMP21]], i64 3
|
||||
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, float* [[DEST_FR]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP27:%.*]] = bitcast float* [[TMP26]] to <4 x float>*
|
||||
; CHECK-NEXT: store <4 x float> [[TMP25]], <4 x float>* [[TMP27]], align 4
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
||||
; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
|
||||
; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
||||
; CHECK: middle.block:
|
||||
; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
|
||||
; CHECK: scalar.ph:
|
||||
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
|
||||
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; CHECK: for.cond.cleanup:
|
||||
; CHECK-NEXT: ret void
|
||||
; CHECK: for.body:
|
||||
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
|
||||
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[PREDS:%.*]], i64 [[INDVARS_IV]]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
|
||||
; CHECK-NEXT: [[CMP1_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
|
||||
; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[CMP1_NOT]], float* [[BASE2:%.*]], float* [[BASE1:%.*]]
|
||||
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
|
||||
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[PREDS]], i64 [[INDVARS_IV]]
|
||||
; CHECK-NEXT: [[TMP29:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
|
||||
; CHECK-NEXT: [[CMP1_NOT:%.*]] = icmp eq i32 [[TMP29]], 0
|
||||
; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[CMP1_NOT]], float* [[BASE2_FR]], float* [[BASE1_FR]]
|
||||
; CHECK-NEXT: [[DOTSINK_IN:%.*]] = getelementptr inbounds float, float* [[SPEC_SELECT]], i64 [[INDVARS_IV]]
|
||||
; CHECK-NEXT: [[DOTSINK:%.*]] = load float, float* [[DOTSINK_IN]], align 4
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, float* [[DEST:%.*]], i64 [[INDVARS_IV]]
|
||||
; CHECK-NEXT: store float [[DOTSINK]], float* [[TMP1]], align 4
|
||||
; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, float* [[DEST_FR]], i64 [[INDVARS_IV]]
|
||||
; CHECK-NEXT: store float [[DOTSINK]], float* [[TMP30]], align 4
|
||||
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
||||
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 100
|
||||
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
|
||||
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
|
Loading…
Reference in New Issue