llvm-project/llvm/test/CodeGen/X86/memcmp-mergeexpand.ll

52 lines
1.8 KiB
LLVM
Raw Normal View History

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64
; This tests interaction between MergeICmp and ExpandMemCmp.
%"struct.std::pair" = type { i32, i32 }
define zeroext i1 @opeq1(
; X86-LABEL: opeq1:
Generalize MergeBlockIntoPredecessor. Replace uses of MergeBasicBlockIntoOnlyPred. Summary: Two utils methods have essentially the same functionality. This is an attempt to merge them into one. 1. lib/Transforms/Utils/Local.cpp : MergeBasicBlockIntoOnlyPred 2. lib/Transforms/Utils/BasicBlockUtils.cpp : MergeBlockIntoPredecessor Prior to the patch: 1. MergeBasicBlockIntoOnlyPred Updates either DomTree or DeferredDominance Moves all instructions from Pred to BB, deletes Pred Asserts BB has single predecessor If address was taken, replace the block address with constant 1 (?) 2. MergeBlockIntoPredecessor Updates DomTree, LoopInfo and MemoryDependenceResults Moves all instruction from BB to Pred, deletes BB Returns if doesn't have a single predecessor Returns if BB's address was taken After the patch: Method 2. MergeBlockIntoPredecessor is attempting to become the new default: Updates DomTree or DeferredDominance, and LoopInfo and MemoryDependenceResults Moves all instruction from BB to Pred, deletes BB Returns if doesn't have a single predecessor Returns if BB's address was taken Uses of MergeBasicBlockIntoOnlyPred that need to be replaced: 1. lib/Transforms/Scalar/LoopSimplifyCFG.cpp Updated in this patch. No challenges. 2. lib/CodeGen/CodeGenPrepare.cpp Updated in this patch. i. eliminateFallThrough is straightforward, but I added using a temporary array to avoid the iterator invalidation. ii. eliminateMostlyEmptyBlock(s) methods also now use a temporary array for blocks Some interesting aspects: - Since Pred is not deleted (BB is), the entry block does not need updating. - The entry block was being updated with the deleted block in eliminateMostlyEmptyBlock. Added assert to make obvious that BB=SinglePred. - isMergingEmptyBlockProfitable assumes BB is the one to be deleted. - eliminateMostlyEmptyBlock(BB) does not delete BB on one path, it deletes its unique predecessor instead. - adding some test owner as subscribers for the interesting tests modified: test/CodeGen/X86/avx-cmp.ll test/CodeGen/AMDGPU/nested-loop-conditions.ll test/CodeGen/AMDGPU/si-annotate-cf.ll test/CodeGen/X86/hoist-spill.ll test/CodeGen/X86/2006-11-17-IllegalMove.ll 3. lib/Transforms/Scalar/JumpThreading.cpp Not covered in this patch. It is the only use case using the DeferredDominance. I would defer to Brian Rzycki to make this replacement. Reviewers: chandlerc, spatel, davide, brzycki, bkramer, javed.absar Subscribers: qcolombet, sanjoy, nemanjai, nhaehnle, jlebar, tpr, kbarton, RKSimon, wmi, arsenm, llvm-commits Differential Revision: https://reviews.llvm.org/D48202 llvm-svn: 335183
2018-06-21 06:01:04 +08:00
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl (%ecx), %edx
; X86-NEXT: movl 4(%ecx), %ecx
; X86-NEXT: xorl (%eax), %edx
; X86-NEXT: xorl 4(%eax), %ecx
; X86-NEXT: orl %edx, %ecx
; X86-NEXT: sete %al
; X86-NEXT: retl
;
; X64-LABEL: opeq1:
Generalize MergeBlockIntoPredecessor. Replace uses of MergeBasicBlockIntoOnlyPred. Summary: Two utils methods have essentially the same functionality. This is an attempt to merge them into one. 1. lib/Transforms/Utils/Local.cpp : MergeBasicBlockIntoOnlyPred 2. lib/Transforms/Utils/BasicBlockUtils.cpp : MergeBlockIntoPredecessor Prior to the patch: 1. MergeBasicBlockIntoOnlyPred Updates either DomTree or DeferredDominance Moves all instructions from Pred to BB, deletes Pred Asserts BB has single predecessor If address was taken, replace the block address with constant 1 (?) 2. MergeBlockIntoPredecessor Updates DomTree, LoopInfo and MemoryDependenceResults Moves all instruction from BB to Pred, deletes BB Returns if doesn't have a single predecessor Returns if BB's address was taken After the patch: Method 2. MergeBlockIntoPredecessor is attempting to become the new default: Updates DomTree or DeferredDominance, and LoopInfo and MemoryDependenceResults Moves all instruction from BB to Pred, deletes BB Returns if doesn't have a single predecessor Returns if BB's address was taken Uses of MergeBasicBlockIntoOnlyPred that need to be replaced: 1. lib/Transforms/Scalar/LoopSimplifyCFG.cpp Updated in this patch. No challenges. 2. lib/CodeGen/CodeGenPrepare.cpp Updated in this patch. i. eliminateFallThrough is straightforward, but I added using a temporary array to avoid the iterator invalidation. ii. eliminateMostlyEmptyBlock(s) methods also now use a temporary array for blocks Some interesting aspects: - Since Pred is not deleted (BB is), the entry block does not need updating. - The entry block was being updated with the deleted block in eliminateMostlyEmptyBlock. Added assert to make obvious that BB=SinglePred. - isMergingEmptyBlockProfitable assumes BB is the one to be deleted. - eliminateMostlyEmptyBlock(BB) does not delete BB on one path, it deletes its unique predecessor instead. - adding some test owner as subscribers for the interesting tests modified: test/CodeGen/X86/avx-cmp.ll test/CodeGen/AMDGPU/nested-loop-conditions.ll test/CodeGen/AMDGPU/si-annotate-cf.ll test/CodeGen/X86/hoist-spill.ll test/CodeGen/X86/2006-11-17-IllegalMove.ll 3. lib/Transforms/Scalar/JumpThreading.cpp Not covered in this patch. It is the only use case using the DeferredDominance. I would defer to Brian Rzycki to make this replacement. Reviewers: chandlerc, spatel, davide, brzycki, bkramer, javed.absar Subscribers: qcolombet, sanjoy, nemanjai, nhaehnle, jlebar, tpr, kbarton, RKSimon, wmi, arsenm, llvm-commits Differential Revision: https://reviews.llvm.org/D48202 llvm-svn: 335183
2018-06-21 06:01:04 +08:00
; X64: # %bb.0: # %entry
; X64-NEXT: movq (%rdi), %rax
; X64-NEXT: cmpq (%rsi), %rax
; X64-NEXT: sete %al
; X64-NEXT: retq
%"struct.std::pair"* nocapture readonly dereferenceable(8) %a,
%"struct.std::pair"* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 {
entry:
%first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0
%0 = load i32, i32* %first.i, align 4
%first1.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 0
%1 = load i32, i32* %first1.i, align 4
%cmp.i = icmp eq i32 %0, %1
br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit
land.rhs.i:
%second.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1
%2 = load i32, i32* %second.i, align 4
%second2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 1
%3 = load i32, i32* %second2.i, align 4
%cmp3.i = icmp eq i32 %2, %3
br label %opeq1.exit
opeq1.exit:
%4 = phi i1 [ false, %entry ], [ %cmp3.i, %land.rhs.i ]
ret i1 %4
}