forked from OSchip/llvm-project
[SLP] Explicit track required stacksave/alloca dependency
The semantics of an inalloca alloca instruction requires that it not be reordered with a preceeding stacksave intrinsic call. Unfortunately, there's no def/use edge or memory dependence edge. (THe memory point is slightly subtle, but in general a new allocation can't alias with a call which executes strictly before it comes into existance.)
I'd tried to tackle this same case previously in 689babdf6
, but the fix chosen there turned out to be incomplete. As such, this change contains a fully revert of the first fix attempt.
This was noticed when investigating problems which surfaced with D118538, but this is definitely an existing bug. This time around, I managed to reduce a couple of additional cases, including one which was being actively miscompiled even without the new scheduling change. (See test diffs)
Compile time wise, we only spend extra time when seeing a stacksave (rare), and even then we walk the block at most once per schedule window extension. Likely a non-issue.
This commit is contained in:
parent
c1a31ee65b
commit
b7806c8b37
|
@ -4023,16 +4023,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
|||
return;
|
||||
}
|
||||
|
||||
// Avoid attempting to schedule allocas; there are unmodeled dependencies
|
||||
// for "static" alloca status and for reordering with stacksave calls.
|
||||
for (Value *V : VL) {
|
||||
if (isa<AllocaInst>(V)) {
|
||||
LLVM_DEBUG(dbgs() << "SLP: Gathering due to alloca.\n");
|
||||
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (StoreInst *SI = dyn_cast<StoreInst>(S.OpValue))
|
||||
if (SI->getValueOperand()->getType()->isVectorTy()) {
|
||||
LLVM_DEBUG(dbgs() << "SLP: Gathering due to store vector type.\n");
|
||||
|
@ -8086,6 +8076,33 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD,
|
|||
}
|
||||
}
|
||||
|
||||
// If we have an inalloc alloca instruction, it needs to be scheduled
|
||||
// after any preceeding stacksave.
|
||||
if (match(BundleMember->Inst, m_Intrinsic<Intrinsic::stacksave>())) {
|
||||
for (Instruction *I = BundleMember->Inst->getNextNode();
|
||||
I != ScheduleEnd; I = I->getNextNode()) {
|
||||
if (match(I, m_Intrinsic<Intrinsic::stacksave>()))
|
||||
// Any allocas past here must be control dependent on I, and I
|
||||
// must be memory dependend on BundleMember->Inst.
|
||||
break;
|
||||
|
||||
if (!isa<AllocaInst>(I))
|
||||
continue;
|
||||
|
||||
// Add the dependency
|
||||
auto *DepDest = getScheduleData(I);
|
||||
assert(DepDest && "must be in schedule window");
|
||||
DepDest->ControlDependencies.push_back(BundleMember);
|
||||
BundleMember->Dependencies++;
|
||||
ScheduleData *DestBundle = DepDest->FirstInBundle;
|
||||
if (!DestBundle->IsScheduled)
|
||||
BundleMember->incrementUnscheduledDeps(1);
|
||||
if (!DestBundle->hasValidDependencies())
|
||||
WorkList.push_back(DestBundle);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Handle the memory dependencies (if any).
|
||||
ScheduleData *DepDest = BundleMember->NextLoadStore;
|
||||
if (!DepDest)
|
||||
|
@ -8180,10 +8197,8 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
|
|||
// For the real scheduling we use a more sophisticated ready-list: it is
|
||||
// sorted by the original instruction location. This lets the final schedule
|
||||
// be as close as possible to the original instruction order.
|
||||
// WARNING: This required for correctness in the following case:
|
||||
// * We must prevent reordering of allocas with stacksave intrinsic calls.
|
||||
// We rely on two instructions which are both ready (per the subgraph) not
|
||||
// to be reordered.
|
||||
// WARNING: If changing this order causes a correctness issue, that means
|
||||
// there is some missing dependence edge in the schedule data graph.
|
||||
struct ScheduleDataCompare {
|
||||
bool operator()(ScheduleData *SD1, ScheduleData *SD2) const {
|
||||
return SD2->SchedulingPriority < SD1->SchedulingPriority;
|
||||
|
|
|
@ -89,22 +89,22 @@ define void @allocas_speculation(i8** %a, i8** %b, i8** %c) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; FIXME: This is wrong because we life the alloca out of the region
|
||||
; We must be careful not to lift the inalloca alloc above the stacksave here.
|
||||
; We used to miscompile this example before adding explicit dependency handling
|
||||
; for stacksave.
|
||||
define void @stacksave(i8** %a, i8** %b, i8** %c) {
|
||||
; CHECK-LABEL: @stacksave(
|
||||
; CHECK-NEXT: [[V1:%.*]] = alloca i8, align 1
|
||||
; CHECK-NEXT: [[V2:%.*]] = alloca inalloca i8, align 1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i8*> poison, i8* [[V1]], i32 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i8*> [[TMP1]], i8* [[V2]], i32 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <2 x i8*> [[TMP2]], <2 x i32> <i32 1, i32 1>
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i8*> [[TMP3]], i32 0
|
||||
; CHECK-NEXT: store i8* [[TMP4]], i8** [[A:%.*]], align 8
|
||||
; CHECK-NEXT: [[ADD1:%.*]] = getelementptr i8, i8* [[V1]], i32 1
|
||||
; CHECK-NEXT: store i8* [[ADD1]], i8** [[A:%.*]], align 8
|
||||
; CHECK-NEXT: [[STACK:%.*]] = call i8* @llvm.stacksave()
|
||||
; CHECK-NEXT: [[V2:%.*]] = alloca inalloca i8, align 1
|
||||
; CHECK-NEXT: call void @use(i8* inalloca(i8) [[V2]]) #[[ATTR4:[0-9]+]]
|
||||
; CHECK-NEXT: call void @llvm.stackrestore(i8* [[STACK]])
|
||||
; CHECK-NEXT: [[B2:%.*]] = getelementptr i8*, i8** [[B:%.*]], i32 1
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8** [[B]] to <2 x i8*>*
|
||||
; CHECK-NEXT: store <2 x i8*> [[TMP3]], <2 x i8*>* [[TMP5]], align 8
|
||||
; CHECK-NEXT: [[ADD2:%.*]] = getelementptr i8, i8* [[V2]], i32 1
|
||||
; CHECK-NEXT: store i8* [[ADD1]], i8** [[B:%.*]], align 8
|
||||
; CHECK-NEXT: [[B2:%.*]] = getelementptr i8*, i8** [[B]], i32 1
|
||||
; CHECK-NEXT: store i8* [[ADD2]], i8** [[B2]], align 8
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
|
||||
|
@ -295,15 +295,13 @@ define void @ham() #1 {
|
|||
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8** [[VAR32]] to <4 x i8*>*
|
||||
; CHECK-NEXT: store <4 x i8*> [[SHUFFLE]], <4 x i8*>* [[TMP2]], align 8
|
||||
; CHECK-NEXT: [[VAR36:%.*]] = getelementptr inbounds [12 x i8*], [12 x i8*]* [[VAR12]], i32 0, i32 4
|
||||
; CHECK-NEXT: store i8* [[VAR4]], i8** [[VAR36]], align 8
|
||||
; CHECK-NEXT: [[VAR37:%.*]] = getelementptr inbounds [12 x i8*], [12 x i8*]* [[VAR12]], i32 0, i32 5
|
||||
; CHECK-NEXT: [[VAR38:%.*]] = getelementptr inbounds [12 x i8*], [12 x i8*]* [[VAR12]], i32 0, i32 6
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i8*> poison, i8* [[VAR5]], i32 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i8*> [[TMP3]], i8* [[VAR5]], i32 1
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8** [[VAR37]] to <2 x i8*>*
|
||||
; CHECK-NEXT: store <2 x i8*> [[TMP4]], <2 x i8*>* [[TMP5]], align 8
|
||||
; CHECK-NEXT: [[VAR39:%.*]] = getelementptr inbounds [12 x i8*], [12 x i8*]* [[VAR12]], i32 0, i32 7
|
||||
; CHECK-NEXT: store i8* [[VAR5]], i8** [[VAR39]], align 8
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8*> [[TMP1]], i8* [[VAR5]], i32 1
|
||||
; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i8*> [[TMP3]], <4 x i8*> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8** [[VAR36]] to <4 x i8*>*
|
||||
; CHECK-NEXT: store <4 x i8*> [[SHUFFLE1]], <4 x i8*>* [[TMP4]], align 8
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%var2 = alloca i8
|
||||
|
@ -343,15 +341,14 @@ define void @spam() #1 {
|
|||
; CHECK-NEXT: [[VAR5:%.*]] = alloca i8, align 1
|
||||
; CHECK-NEXT: [[VAR12:%.*]] = alloca [12 x i8*], align 8
|
||||
; CHECK-NEXT: [[VAR36:%.*]] = getelementptr inbounds [12 x i8*], [12 x i8*]* [[VAR12]], i32 0, i32 4
|
||||
; CHECK-NEXT: store i8* [[VAR4]], i8** [[VAR36]], align 8
|
||||
; CHECK-NEXT: [[VAR37:%.*]] = getelementptr inbounds [12 x i8*], [12 x i8*]* [[VAR12]], i32 0, i32 5
|
||||
; CHECK-NEXT: [[VAR38:%.*]] = getelementptr inbounds [12 x i8*], [12 x i8*]* [[VAR12]], i32 0, i32 6
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i8*> poison, i8* [[VAR5]], i32 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i8*> [[TMP1]], i8* [[VAR5]], i32 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8** [[VAR37]] to <2 x i8*>*
|
||||
; CHECK-NEXT: store <2 x i8*> [[TMP2]], <2 x i8*>* [[TMP3]], align 8
|
||||
; CHECK-NEXT: [[VAR39:%.*]] = getelementptr inbounds [12 x i8*], [12 x i8*]* [[VAR12]], i32 0, i32 7
|
||||
; CHECK-NEXT: store i8* [[VAR5]], i8** [[VAR39]], align 8
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i8*> poison, i8* [[VAR4]], i32 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i8*> [[TMP1]], i8* [[VAR5]], i32 1
|
||||
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i8*> [[TMP2]], <4 x i8*> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8** [[VAR36]] to <4 x i8*>*
|
||||
; CHECK-NEXT: store <4 x i8*> [[SHUFFLE]], <4 x i8*>* [[TMP3]], align 8
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%var4 = alloca i8
|
||||
|
|
Loading…
Reference in New Issue