2016-08-14 17:34:39 +08:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
2016-09-12 14:26:03 +08:00
|
|
|
; RUN: llc < %s -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
|
|
|
|
; RUN: llc < %s -mattr=+avx512f -mattr=+avx512vl -mattr=+avx512bw -mattr=+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
|
2014-12-11 18:21:12 +08:00
|
|
|
|
|
|
|
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
|
|
|
target triple = "x86_64-unknown-linux-gnu"
|
|
|
|
|
|
|
|
define void @func() {
|
2016-08-14 17:34:39 +08:00
|
|
|
; CHECK-LABEL: func:
|
Generalize MergeBlockIntoPredecessor. Replace uses of MergeBasicBlockIntoOnlyPred.
Summary:
Two utils methods have essentially the same functionality. This is an attempt to merge them into one.
1. lib/Transforms/Utils/Local.cpp : MergeBasicBlockIntoOnlyPred
2. lib/Transforms/Utils/BasicBlockUtils.cpp : MergeBlockIntoPredecessor
Prior to the patch:
1. MergeBasicBlockIntoOnlyPred
Updates either DomTree or DeferredDominance
Moves all instructions from Pred to BB, deletes Pred
Asserts BB has single predecessor
If address was taken, replace the block address with constant 1 (?)
2. MergeBlockIntoPredecessor
Updates DomTree, LoopInfo and MemoryDependenceResults
Moves all instruction from BB to Pred, deletes BB
Returns if doesn't have a single predecessor
Returns if BB's address was taken
After the patch:
Method 2. MergeBlockIntoPredecessor is attempting to become the new default:
Updates DomTree or DeferredDominance, and LoopInfo and MemoryDependenceResults
Moves all instruction from BB to Pred, deletes BB
Returns if doesn't have a single predecessor
Returns if BB's address was taken
Uses of MergeBasicBlockIntoOnlyPred that need to be replaced:
1. lib/Transforms/Scalar/LoopSimplifyCFG.cpp
Updated in this patch. No challenges.
2. lib/CodeGen/CodeGenPrepare.cpp
Updated in this patch.
i. eliminateFallThrough is straightforward, but I added using a temporary array to avoid the iterator invalidation.
ii. eliminateMostlyEmptyBlock(s) methods also now use a temporary array for blocks
Some interesting aspects:
- Since Pred is not deleted (BB is), the entry block does not need updating.
- The entry block was being updated with the deleted block in eliminateMostlyEmptyBlock. Added assert to make obvious that BB=SinglePred.
- isMergingEmptyBlockProfitable assumes BB is the one to be deleted.
- eliminateMostlyEmptyBlock(BB) does not delete BB on one path, it deletes its unique predecessor instead.
- adding some test owner as subscribers for the interesting tests modified:
test/CodeGen/X86/avx-cmp.ll
test/CodeGen/AMDGPU/nested-loop-conditions.ll
test/CodeGen/AMDGPU/si-annotate-cf.ll
test/CodeGen/X86/hoist-spill.ll
test/CodeGen/X86/2006-11-17-IllegalMove.ll
3. lib/Transforms/Scalar/JumpThreading.cpp
Not covered in this patch. It is the only use case using the DeferredDominance.
I would defer to Brian Rzycki to make this replacement.
Reviewers: chandlerc, spatel, davide, brzycki, bkramer, javed.absar
Subscribers: qcolombet, sanjoy, nemanjai, nhaehnle, jlebar, tpr, kbarton, RKSimon, wmi, arsenm, llvm-commits
Differential Revision: https://reviews.llvm.org/D48202
llvm-svn: 335183
2018-06-21 06:01:04 +08:00
|
|
|
; CHECK: # %bb.0: # %bb1
|
2016-09-04 22:03:52 +08:00
|
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
|
|
; CHECK-NEXT: testb %al, %al
|
2016-09-12 14:26:03 +08:00
|
|
|
; CHECK-NEXT: je .LBB0_1
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK-NEXT: # %bb.4: # %L_30
|
2016-08-14 17:34:39 +08:00
|
|
|
; CHECK-NEXT: retq
|
2016-09-12 14:26:03 +08:00
|
|
|
; CHECK-NEXT: .LBB0_1: # %bb56
|
2016-09-04 22:03:52 +08:00
|
|
|
; CHECK-NEXT: xorl %eax, %eax
|
2016-09-12 14:26:03 +08:00
|
|
|
; CHECK-NEXT: jmp .LBB0_2
|
2016-08-14 17:34:39 +08:00
|
|
|
; CHECK-NEXT: .p2align 4, 0x90
|
2016-09-12 14:26:03 +08:00
|
|
|
; CHECK-NEXT: .LBB0_3: # %bb35
|
|
|
|
; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
|
2016-09-04 22:03:52 +08:00
|
|
|
; CHECK-NEXT: testb %al, %al
|
2016-09-12 14:26:03 +08:00
|
|
|
; CHECK-NEXT: .LBB0_2: # %bb33
|
|
|
|
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
2016-09-04 22:03:52 +08:00
|
|
|
; CHECK-NEXT: testb %al, %al
|
2016-09-12 14:26:03 +08:00
|
|
|
; CHECK-NEXT: jne .LBB0_2
|
|
|
|
; CHECK-NEXT: jmp .LBB0_3
|
2014-12-11 18:21:12 +08:00
|
|
|
bb1:
|
|
|
|
br i1 undef, label %L_10, label %L_10
|
|
|
|
|
|
|
|
L_10: ; preds = %bb1, %bb1
|
|
|
|
br i1 undef, label %L_30, label %bb56
|
|
|
|
|
|
|
|
bb56: ; preds = %L_10
|
|
|
|
br label %bb33
|
|
|
|
|
|
|
|
bb33: ; preds = %bb51, %bb56
|
2015-02-28 05:17:42 +08:00
|
|
|
%r111 = load i64, i64* undef, align 8
|
2014-12-11 18:21:12 +08:00
|
|
|
br i1 undef, label %bb51, label %bb35
|
|
|
|
|
|
|
|
bb35: ; preds = %bb33
|
|
|
|
br i1 undef, label %L_19, label %bb37
|
|
|
|
|
|
|
|
bb37: ; preds = %bb35
|
|
|
|
%r128 = and i64 %r111, 576460752303423488
|
|
|
|
%phitmp = icmp eq i64 %r128, 0
|
|
|
|
br label %L_19
|
|
|
|
|
|
|
|
L_19: ; preds = %bb37, %bb35
|
|
|
|
%"$V_S25.0" = phi i1 [ %phitmp, %bb37 ], [ true, %bb35 ]
|
|
|
|
br i1 undef, label %bb51, label %bb42
|
|
|
|
|
|
|
|
bb42: ; preds = %L_19
|
|
|
|
%r136 = select i1 %"$V_S25.0", i32* undef, i32* undef
|
|
|
|
br label %bb51
|
|
|
|
|
|
|
|
bb51: ; preds = %bb42, %L_19, %bb33
|
|
|
|
br i1 false, label %L_30, label %bb33
|
|
|
|
|
|
|
|
L_30: ; preds = %bb51, %L_10
|
|
|
|
ret void
|
|
|
|
}
|
2016-09-12 14:26:03 +08:00
|
|
|
|
|
|
|
; The following test generates suboptimal code on AVX-512
|
|
|
|
; PR 28175
|
|
|
|
define i64 @func2(i1 zeroext %i, i32 %j) {
|
|
|
|
; CHECK-LABEL: func2:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0: # %entry
|
2016-09-12 14:26:03 +08:00
|
|
|
; CHECK-NEXT: testl %esi, %esi
|
|
|
|
; CHECK-NEXT: je .LBB1_1
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK-NEXT: # %bb.2: # %if.then
|
2016-09-12 14:26:03 +08:00
|
|
|
; CHECK-NEXT: jmp bar # TAILCALL
|
Generalize MergeBlockIntoPredecessor. Replace uses of MergeBasicBlockIntoOnlyPred.
Summary:
Two utils methods have essentially the same functionality. This is an attempt to merge them into one.
1. lib/Transforms/Utils/Local.cpp : MergeBasicBlockIntoOnlyPred
2. lib/Transforms/Utils/BasicBlockUtils.cpp : MergeBlockIntoPredecessor
Prior to the patch:
1. MergeBasicBlockIntoOnlyPred
Updates either DomTree or DeferredDominance
Moves all instructions from Pred to BB, deletes Pred
Asserts BB has single predecessor
If address was taken, replace the block address with constant 1 (?)
2. MergeBlockIntoPredecessor
Updates DomTree, LoopInfo and MemoryDependenceResults
Moves all instruction from BB to Pred, deletes BB
Returns if doesn't have a single predecessor
Returns if BB's address was taken
After the patch:
Method 2. MergeBlockIntoPredecessor is attempting to become the new default:
Updates DomTree or DeferredDominance, and LoopInfo and MemoryDependenceResults
Moves all instruction from BB to Pred, deletes BB
Returns if doesn't have a single predecessor
Returns if BB's address was taken
Uses of MergeBasicBlockIntoOnlyPred that need to be replaced:
1. lib/Transforms/Scalar/LoopSimplifyCFG.cpp
Updated in this patch. No challenges.
2. lib/CodeGen/CodeGenPrepare.cpp
Updated in this patch.
i. eliminateFallThrough is straightforward, but I added using a temporary array to avoid the iterator invalidation.
ii. eliminateMostlyEmptyBlock(s) methods also now use a temporary array for blocks
Some interesting aspects:
- Since Pred is not deleted (BB is), the entry block does not need updating.
- The entry block was being updated with the deleted block in eliminateMostlyEmptyBlock. Added assert to make obvious that BB=SinglePred.
- isMergingEmptyBlockProfitable assumes BB is the one to be deleted.
- eliminateMostlyEmptyBlock(BB) does not delete BB on one path, it deletes its unique predecessor instead.
- adding some test owner as subscribers for the interesting tests modified:
test/CodeGen/X86/avx-cmp.ll
test/CodeGen/AMDGPU/nested-loop-conditions.ll
test/CodeGen/AMDGPU/si-annotate-cf.ll
test/CodeGen/X86/hoist-spill.ll
test/CodeGen/X86/2006-11-17-IllegalMove.ll
3. lib/Transforms/Scalar/JumpThreading.cpp
Not covered in this patch. It is the only use case using the DeferredDominance.
I would defer to Brian Rzycki to make this replacement.
Reviewers: chandlerc, spatel, davide, brzycki, bkramer, javed.absar
Subscribers: qcolombet, sanjoy, nemanjai, nhaehnle, jlebar, tpr, kbarton, RKSimon, wmi, arsenm, llvm-commits
Differential Revision: https://reviews.llvm.org/D48202
llvm-svn: 335183
2018-06-21 06:01:04 +08:00
|
|
|
; CHECK-NEXT: .LBB1_1: # %if.end
|
2017-05-19 20:35:15 +08:00
|
|
|
; CHECK-NEXT: movzbl %dil, %eax
|
|
|
|
; CHECK-NEXT: orq $-2, %rax
|
2016-09-12 14:26:03 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%tobool = icmp eq i32 %j, 0
|
|
|
|
br i1 %tobool, label %if.end, label %if.then
|
|
|
|
|
|
|
|
if.then: ; preds = %entry
|
|
|
|
%call = tail call i64 @bar()
|
|
|
|
br label %return
|
|
|
|
|
|
|
|
if.end: ; preds = %entry
|
|
|
|
%conv = zext i1 %i to i64
|
|
|
|
%or = or i64 %conv, -2
|
|
|
|
br label %return
|
|
|
|
|
|
|
|
return: ; preds = %if.end, %if.then
|
|
|
|
%or.sink = phi i64 [ %or, %if.end ], [ %call, %if.then ]
|
|
|
|
ret i64 %or.sink
|
|
|
|
}
|
|
|
|
|
|
|
|
declare i64 @bar()
|