[X86] Fix assert fails in pass X86AvoidSFBPass

Fixes https://bugs.llvm.org/show_bug.cgi?id=38743

The function removeRedundantBlockingStores is supposed to remove any blocking stores contained in each other in lockingStoresDispSizeMap.
But it currently looks only at the previous one, which will miss some cases that result in assert.

This patch refine the function to check all previous layouts until find the uncontained one. So all redundant stores will be removed.

Patch by Pengfei Wang

Differential Revision: https://reviews.llvm.org/D55642

llvm-svn: 349660
This commit is contained in:
Craig Topper 2018-12-19 18:45:57 +00:00
parent 4b7396e25f
commit 291470347a
2 changed files with 108 additions and 13 deletions

View File

@ -641,21 +641,22 @@ removeRedundantBlockingStores(DisplacementSizeMap &BlockingStoresDispSizeMap) {
if (BlockingStoresDispSizeMap.size() <= 1)
return;
int64_t PrevDisp = BlockingStoresDispSizeMap.begin()->first;
unsigned PrevSize = BlockingStoresDispSizeMap.begin()->second;
SmallVector<int64_t, 2> ForRemoval;
for (auto DispSizePair = std::next(BlockingStoresDispSizeMap.begin());
DispSizePair != BlockingStoresDispSizeMap.end(); ++DispSizePair) {
int64_t CurrDisp = DispSizePair->first;
unsigned CurrSize = DispSizePair->second;
if (CurrDisp + CurrSize <= PrevDisp + PrevSize) {
ForRemoval.push_back(PrevDisp);
SmallVector<std::pair<int64_t, unsigned>, 0> DispSizeStack;
for (auto DispSizePair : BlockingStoresDispSizeMap) {
int64_t CurrDisp = DispSizePair.first;
unsigned CurrSize = DispSizePair.second;
while (DispSizeStack.size()) {
int64_t PrevDisp = DispSizeStack.back().first;
unsigned PrevSize = DispSizeStack.back().second;
if (CurrDisp + CurrSize > PrevDisp + PrevSize)
break;
DispSizeStack.pop_back();
}
PrevDisp = CurrDisp;
PrevSize = CurrSize;
DispSizeStack.push_back(DispSizePair);
}
for (auto Disp : ForRemoval)
BlockingStoresDispSizeMap.erase(Disp);
BlockingStoresDispSizeMap.clear();
for (auto Disp : DispSizeStack)
BlockingStoresDispSizeMap.insert(Disp);
}
bool X86AvoidSFBPass::runOnMachineFunction(MachineFunction &MF) {

View File

@ -0,0 +1,94 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
%0 = type { %1 }
%1 = type { %2 }
%2 = type { %3 }
%3 = type { %4 }
%4 = type { %5 }
%5 = type { i64, i64, i8* }
%6 = type { %7, [23 x i8] }
%7 = type { i8 }
@.str.16 = external dso_local unnamed_addr constant [16 x i8], align 1
@.str.17 = external dso_local unnamed_addr constant [12 x i8], align 1
@.str.18 = external dso_local unnamed_addr constant [15 x i8], align 1
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #0
define void @pr38743() #1 align 2 {
; CHECK-LABEL: pr38743:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: cmpl $3, %eax
; CHECK-NEXT: je .LBB0_4
; CHECK-NEXT: # %bb.1: # %bb
; CHECK-NEXT: cmpl $1, %eax
; CHECK-NEXT: je .LBB0_2
; CHECK-NEXT: # %bb.3: # %bb5
; CHECK-NEXT: movzwl .str.17+{{.*}}(%rip), %eax
; CHECK-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movq {{.*}}(%rip), %rax
; CHECK-NEXT: jmp .LBB0_5
; CHECK-NEXT: .LBB0_4: # %bb8
; CHECK-NEXT: movq .str.18+{{.*}}(%rip), %rax
; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movq {{.*}}(%rip), %rax
; CHECK-NEXT: jmp .LBB0_5
; CHECK-NEXT: .LBB0_2: # %bb2
; CHECK-NEXT: movq .str.16+{{.*}}(%rip), %rax
; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movq {{.*}}(%rip), %rax
; CHECK-NEXT: .LBB0_5: # %bb12
; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rax
; CHECK-NEXT: movq %rax, (%rax)
; CHECK-NEXT: movb -{{[0-9]+}}(%rsp), %al
; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
; CHECK-NEXT: movzwl -{{[0-9]+}}(%rsp), %edx
; CHECK-NEXT: movl -{{[0-9]+}}(%rsp), %esi
; CHECK-NEXT: movb -{{[0-9]+}}(%rsp), %dil
; CHECK-NEXT: movb %al, (%rax)
; CHECK-NEXT: movq %rcx, 1(%rax)
; CHECK-NEXT: movw %dx, 9(%rax)
; CHECK-NEXT: movl %esi, 11(%rax)
; CHECK-NEXT: movb %dil, 15(%rax)
; CHECK-NEXT: retq
bb:
%tmp = alloca %0, align 16
%tmp1 = bitcast %0* %tmp to i8*
switch i32 undef, label %bb11 [
i32 1, label %bb2
i32 4, label %bb5
i32 2, label %bb5
i32 3, label %bb8
]
bb2: ; preds = %bb
%tmp3 = bitcast %0* %tmp to %6*
%tmp4 = getelementptr inbounds %6, %6* %tmp3, i64 0, i32 1, i64 0
call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 %tmp4, i8* align 1 getelementptr inbounds ([16 x i8], [16 x i8]* @.str.16, i64 0, i64 0), i64 15, i1 false)
br label %bb12
bb5: ; preds = %bb, %bb
%tmp6 = bitcast %0* %tmp to %6*
%tmp7 = getelementptr inbounds %6, %6* %tmp6, i64 0, i32 1, i64 0
call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 %tmp7, i8* align 1 getelementptr inbounds ([12 x i8], [12 x i8]* @.str.17, i64 0, i64 0), i64 10, i1 false)
br label %bb12
bb8: ; preds = %bb
%tmp9 = bitcast %0* %tmp to %6*
%tmp10 = getelementptr inbounds %6, %6* %tmp9, i64 0, i32 1, i64 0
call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 %tmp10, i8* align 1 getelementptr inbounds ([15 x i8], [15 x i8]* @.str.18, i64 0, i64 0), i64 14, i1 false)
br label %bb12
bb11: ; preds = %bb
unreachable
bb12: ; preds = %bb8, %bb5, %bb2
call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 undef, i8* nonnull align 16 %tmp1, i64 24, i1 false) #2
ret void
}
attributes #0 = { argmemonly nounwind }
attributes #1 = { "target-features"="+sse,+sse2,+sse3,+sse4.2" }
attributes #2 = { nounwind }