llvm-project/llvm/test/CodeGen/X86/split-store.ll

279 lines
8.0 KiB
LLVM
Raw Normal View History

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-unknown-unknown -force-split-store < %s | FileCheck %s
define void @int32_float_pair(i32 %tmp1, float %tmp2, i64* %ref.tmp) {
; CHECK-LABEL: int32_float_pair:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, (%rsi)
; CHECK-NEXT: movss %xmm0, 4(%rsi)
; CHECK-NEXT: retq
%t0 = bitcast float %tmp2 to i32
%t1 = zext i32 %t0 to i64
%t2 = shl nuw i64 %t1, 32
%t3 = zext i32 %tmp1 to i64
%t4 = or i64 %t2, %t3
store i64 %t4, i64* %ref.tmp, align 8
ret void
}
define void @float_int32_pair(float %tmp1, i32 %tmp2, i64* %ref.tmp) {
; CHECK-LABEL: float_int32_pair:
; CHECK: # %bb.0:
; CHECK-NEXT: movss %xmm0, (%rsi)
; CHECK-NEXT: movl %edi, 4(%rsi)
; CHECK-NEXT: retq
%t0 = bitcast float %tmp1 to i32
%t1 = zext i32 %tmp2 to i64
%t2 = shl nuw i64 %t1, 32
%t3 = zext i32 %t0 to i64
%t4 = or i64 %t2, %t3
store i64 %t4, i64* %ref.tmp, align 8
ret void
}
define void @int16_float_pair(i16 signext %tmp1, float %tmp2, i64* %ref.tmp) {
; CHECK-LABEL: int16_float_pair:
; CHECK: # %bb.0:
; CHECK-NEXT: movzwl %di, %eax
; CHECK-NEXT: movl %eax, (%rsi)
; CHECK-NEXT: movss %xmm0, 4(%rsi)
; CHECK-NEXT: retq
%t0 = bitcast float %tmp2 to i32
%t1 = zext i32 %t0 to i64
%t2 = shl nuw i64 %t1, 32
%t3 = zext i16 %tmp1 to i64
%t4 = or i64 %t2, %t3
store i64 %t4, i64* %ref.tmp, align 8
ret void
}
define void @int8_float_pair(i8 signext %tmp1, float %tmp2, i64* %ref.tmp) {
; CHECK-LABEL: int8_float_pair:
; CHECK: # %bb.0:
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: movl %eax, (%rsi)
; CHECK-NEXT: movss %xmm0, 4(%rsi)
; CHECK-NEXT: retq
%t0 = bitcast float %tmp2 to i32
%t1 = zext i32 %t0 to i64
%t2 = shl nuw i64 %t1, 32
%t3 = zext i8 %tmp1 to i64
%t4 = or i64 %t2, %t3
store i64 %t4, i64* %ref.tmp, align 8
ret void
}
define void @int32_int32_pair(i32 %tmp1, i32 %tmp2, i64* %ref.tmp) {
; CHECK-LABEL: int32_int32_pair:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, (%rdx)
; CHECK-NEXT: movl %esi, 4(%rdx)
; CHECK-NEXT: retq
%t1 = zext i32 %tmp2 to i64
%t2 = shl nuw i64 %t1, 32
%t3 = zext i32 %tmp1 to i64
%t4 = or i64 %t2, %t3
store i64 %t4, i64* %ref.tmp, align 8
ret void
}
define void @int16_int16_pair(i16 signext %tmp1, i16 signext %tmp2, i32* %ref.tmp) {
; CHECK-LABEL: int16_int16_pair:
; CHECK: # %bb.0:
; CHECK-NEXT: movw %di, (%rdx)
; CHECK-NEXT: movw %si, 2(%rdx)
; CHECK-NEXT: retq
%t1 = zext i16 %tmp2 to i32
%t2 = shl nuw i32 %t1, 16
%t3 = zext i16 %tmp1 to i32
%t4 = or i32 %t2, %t3
store i32 %t4, i32* %ref.tmp, align 4
ret void
}
define void @int8_int8_pair(i8 signext %tmp1, i8 signext %tmp2, i16* %ref.tmp) {
; CHECK-LABEL: int8_int8_pair:
; CHECK: # %bb.0:
; CHECK-NEXT: movb %dil, (%rdx)
; CHECK-NEXT: movb %sil, 1(%rdx)
; CHECK-NEXT: retq
%t1 = zext i8 %tmp2 to i16
%t2 = shl nuw i16 %t1, 8
%t3 = zext i8 %tmp1 to i16
%t4 = or i16 %t2, %t3
store i16 %t4, i16* %ref.tmp, align 2
ret void
}
define void @int31_int31_pair(i31 %tmp1, i31 %tmp2, i64* %ref.tmp) {
; CHECK-LABEL: int31_int31_pair:
; CHECK: # %bb.0:
; CHECK-NEXT: andl $2147483647, %edi # imm = 0x7FFFFFFF
; CHECK-NEXT: movl %edi, (%rdx)
; CHECK-NEXT: andl $2147483647, %esi # imm = 0x7FFFFFFF
; CHECK-NEXT: movl %esi, 4(%rdx)
; CHECK-NEXT: retq
%t1 = zext i31 %tmp2 to i64
%t2 = shl nuw i64 %t1, 32
%t3 = zext i31 %tmp1 to i64
%t4 = or i64 %t2, %t3
store i64 %t4, i64* %ref.tmp, align 8
ret void
}
define void @int31_int17_pair(i31 %tmp1, i17 %tmp2, i64* %ref.tmp) {
; CHECK-LABEL: int31_int17_pair:
; CHECK: # %bb.0:
; CHECK-NEXT: andl $2147483647, %edi # imm = 0x7FFFFFFF
; CHECK-NEXT: movl %edi, (%rdx)
; CHECK-NEXT: andl $131071, %esi # imm = 0x1FFFF
; CHECK-NEXT: movl %esi, 4(%rdx)
; CHECK-NEXT: retq
%t1 = zext i17 %tmp2 to i64
%t2 = shl nuw i64 %t1, 32
%t3 = zext i31 %tmp1 to i64
%t4 = or i64 %t2, %t3
store i64 %t4, i64* %ref.tmp, align 8
ret void
}
define void @int7_int3_pair(i7 signext %tmp1, i3 signext %tmp2, i16* %ref.tmp) {
; CHECK-LABEL: int7_int3_pair:
; CHECK: # %bb.0:
; CHECK-NEXT: andb $127, %dil
; CHECK-NEXT: movb %dil, (%rdx)
; CHECK-NEXT: andb $7, %sil
; CHECK-NEXT: movb %sil, 1(%rdx)
; CHECK-NEXT: retq
%t1 = zext i3 %tmp2 to i16
%t2 = shl nuw i16 %t1, 8
%t3 = zext i7 %tmp1 to i16
%t4 = or i16 %t2, %t3
store i16 %t4, i16* %ref.tmp, align 2
ret void
}
define void @int24_int24_pair(i24 signext %tmp1, i24 signext %tmp2, i48* %ref.tmp) {
; CHECK-LABEL: int24_int24_pair:
; CHECK: # %bb.0:
; CHECK-NEXT: movw %di, (%rdx)
; CHECK-NEXT: shrl $16, %edi
; CHECK-NEXT: movb %dil, 2(%rdx)
; CHECK-NEXT: movw %si, 4(%rdx)
; CHECK-NEXT: shrl $16, %esi
; CHECK-NEXT: movb %sil, 6(%rdx)
; CHECK-NEXT: retq
%t1 = zext i24 %tmp2 to i48
%t2 = shl nuw i48 %t1, 24
%t3 = zext i24 %tmp1 to i48
%t4 = or i48 %t2, %t3
store i48 %t4, i48* %ref.tmp, align 2
ret void
}
; getTypeSizeInBits(i12) != getTypeStoreSizeInBits(i12), so store split doesn't kick in.
define void @int12_int12_pair(i12 signext %tmp1, i12 signext %tmp2, i24* %ref.tmp) {
; CHECK-LABEL: int12_int12_pair:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %esi, %eax
; CHECK-NEXT: shll $12, %eax
; CHECK-NEXT: andl $4095, %edi # imm = 0xFFF
; CHECK-NEXT: orl %eax, %edi
; CHECK-NEXT: shrl $4, %esi
; CHECK-NEXT: movb %sil, 2(%rdx)
; CHECK-NEXT: movw %di, (%rdx)
; CHECK-NEXT: retq
%t1 = zext i12 %tmp2 to i24
%t2 = shl nuw i24 %t1, 12
%t3 = zext i12 %tmp1 to i24
%t4 = or i24 %t2, %t3
store i24 %t4, i24* %ref.tmp, align 2
ret void
}
; getTypeSizeInBits(i14) != getTypeStoreSizeInBits(i14), so store split doesn't kick in.
define void @int7_int7_pair(i7 signext %tmp1, i7 signext %tmp2, i14* %ref.tmp) {
; CHECK-LABEL: int7_int7_pair:
; CHECK: # %bb.0:
; CHECK-NEXT: shll $7, %esi
; CHECK-NEXT: andl $127, %edi
; CHECK-NEXT: orl %esi, %edi
; CHECK-NEXT: andl $16383, %edi # imm = 0x3FFF
; CHECK-NEXT: movw %di, (%rdx)
; CHECK-NEXT: retq
%t1 = zext i7 %tmp2 to i14
%t2 = shl nuw i14 %t1, 7
%t3 = zext i7 %tmp1 to i14
%t4 = or i14 %t2, %t3
store i14 %t4, i14* %ref.tmp, align 2
ret void
}
; getTypeSizeInBits(i2) != getTypeStoreSizeInBits(i2), so store split doesn't kick in.
define void @int1_int1_pair(i1 signext %tmp1, i1 signext %tmp2, i2* %ref.tmp) {
; CHECK-LABEL: int1_int1_pair:
; CHECK: # %bb.0:
; CHECK-NEXT: addb %sil, %sil
; CHECK-NEXT: andb $1, %dil
; CHECK-NEXT: orb %sil, %dil
; CHECK-NEXT: andb $3, %dil
; CHECK-NEXT: movb %dil, (%rdx)
; CHECK-NEXT: retq
%t1 = zext i1 %tmp2 to i2
%t2 = shl nuw i2 %t1, 1
%t3 = zext i1 %tmp1 to i2
%t4 = or i2 %t2, %t3
store i2 %t4, i2* %ref.tmp, align 1
ret void
}
define void @mbb_int32_float_pair(i32 %tmp1, float %tmp2, i64* %ref.tmp) {
; CHECK-LABEL: mbb_int32_float_pair:
Generalize MergeBlockIntoPredecessor. Replace uses of MergeBasicBlockIntoOnlyPred. Summary: Two utils methods have essentially the same functionality. This is an attempt to merge them into one. 1. lib/Transforms/Utils/Local.cpp : MergeBasicBlockIntoOnlyPred 2. lib/Transforms/Utils/BasicBlockUtils.cpp : MergeBlockIntoPredecessor Prior to the patch: 1. MergeBasicBlockIntoOnlyPred Updates either DomTree or DeferredDominance Moves all instructions from Pred to BB, deletes Pred Asserts BB has single predecessor If address was taken, replace the block address with constant 1 (?) 2. MergeBlockIntoPredecessor Updates DomTree, LoopInfo and MemoryDependenceResults Moves all instruction from BB to Pred, deletes BB Returns if doesn't have a single predecessor Returns if BB's address was taken After the patch: Method 2. MergeBlockIntoPredecessor is attempting to become the new default: Updates DomTree or DeferredDominance, and LoopInfo and MemoryDependenceResults Moves all instruction from BB to Pred, deletes BB Returns if doesn't have a single predecessor Returns if BB's address was taken Uses of MergeBasicBlockIntoOnlyPred that need to be replaced: 1. lib/Transforms/Scalar/LoopSimplifyCFG.cpp Updated in this patch. No challenges. 2. lib/CodeGen/CodeGenPrepare.cpp Updated in this patch. i. eliminateFallThrough is straightforward, but I added using a temporary array to avoid the iterator invalidation. ii. eliminateMostlyEmptyBlock(s) methods also now use a temporary array for blocks Some interesting aspects: - Since Pred is not deleted (BB is), the entry block does not need updating. - The entry block was being updated with the deleted block in eliminateMostlyEmptyBlock. Added assert to make obvious that BB=SinglePred. - isMergingEmptyBlockProfitable assumes BB is the one to be deleted. - eliminateMostlyEmptyBlock(BB) does not delete BB on one path, it deletes its unique predecessor instead. - adding some test owner as subscribers for the interesting tests modified: test/CodeGen/X86/avx-cmp.ll test/CodeGen/AMDGPU/nested-loop-conditions.ll test/CodeGen/AMDGPU/si-annotate-cf.ll test/CodeGen/X86/hoist-spill.ll test/CodeGen/X86/2006-11-17-IllegalMove.ll 3. lib/Transforms/Scalar/JumpThreading.cpp Not covered in this patch. It is the only use case using the DeferredDominance. I would defer to Brian Rzycki to make this replacement. Reviewers: chandlerc, spatel, davide, brzycki, bkramer, javed.absar Subscribers: qcolombet, sanjoy, nemanjai, nhaehnle, jlebar, tpr, kbarton, RKSimon, wmi, arsenm, llvm-commits Differential Revision: https://reviews.llvm.org/D48202 llvm-svn: 335183
2018-06-21 06:01:04 +08:00
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movl %edi, (%rsi)
; CHECK-NEXT: movss %xmm0, 4(%rsi)
; CHECK-NEXT: retq
entry:
%t0 = bitcast float %tmp2 to i32
br label %next
next:
%t1 = zext i32 %t0 to i64
%t2 = shl nuw i64 %t1, 32
%t3 = zext i32 %tmp1 to i64
%t4 = or i64 %t2, %t3
store i64 %t4, i64* %ref.tmp, align 8
ret void
}
define void @mbb_int32_float_multi_stores(i32 %tmp1, float %tmp2, i64* %ref.tmp, i64* %ref.tmp1, i1 %cmp) {
; CHECK-LABEL: mbb_int32_float_multi_stores:
Generalize MergeBlockIntoPredecessor. Replace uses of MergeBasicBlockIntoOnlyPred. Summary: Two utils methods have essentially the same functionality. This is an attempt to merge them into one. 1. lib/Transforms/Utils/Local.cpp : MergeBasicBlockIntoOnlyPred 2. lib/Transforms/Utils/BasicBlockUtils.cpp : MergeBlockIntoPredecessor Prior to the patch: 1. MergeBasicBlockIntoOnlyPred Updates either DomTree or DeferredDominance Moves all instructions from Pred to BB, deletes Pred Asserts BB has single predecessor If address was taken, replace the block address with constant 1 (?) 2. MergeBlockIntoPredecessor Updates DomTree, LoopInfo and MemoryDependenceResults Moves all instruction from BB to Pred, deletes BB Returns if doesn't have a single predecessor Returns if BB's address was taken After the patch: Method 2. MergeBlockIntoPredecessor is attempting to become the new default: Updates DomTree or DeferredDominance, and LoopInfo and MemoryDependenceResults Moves all instruction from BB to Pred, deletes BB Returns if doesn't have a single predecessor Returns if BB's address was taken Uses of MergeBasicBlockIntoOnlyPred that need to be replaced: 1. lib/Transforms/Scalar/LoopSimplifyCFG.cpp Updated in this patch. No challenges. 2. lib/CodeGen/CodeGenPrepare.cpp Updated in this patch. i. eliminateFallThrough is straightforward, but I added using a temporary array to avoid the iterator invalidation. ii. eliminateMostlyEmptyBlock(s) methods also now use a temporary array for blocks Some interesting aspects: - Since Pred is not deleted (BB is), the entry block does not need updating. - The entry block was being updated with the deleted block in eliminateMostlyEmptyBlock. Added assert to make obvious that BB=SinglePred. - isMergingEmptyBlockProfitable assumes BB is the one to be deleted. - eliminateMostlyEmptyBlock(BB) does not delete BB on one path, it deletes its unique predecessor instead. - adding some test owner as subscribers for the interesting tests modified: test/CodeGen/X86/avx-cmp.ll test/CodeGen/AMDGPU/nested-loop-conditions.ll test/CodeGen/AMDGPU/si-annotate-cf.ll test/CodeGen/X86/hoist-spill.ll test/CodeGen/X86/2006-11-17-IllegalMove.ll 3. lib/Transforms/Scalar/JumpThreading.cpp Not covered in this patch. It is the only use case using the DeferredDominance. I would defer to Brian Rzycki to make this replacement. Reviewers: chandlerc, spatel, davide, brzycki, bkramer, javed.absar Subscribers: qcolombet, sanjoy, nemanjai, nhaehnle, jlebar, tpr, kbarton, RKSimon, wmi, arsenm, llvm-commits Differential Revision: https://reviews.llvm.org/D48202 llvm-svn: 335183
2018-06-21 06:01:04 +08:00
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movl %edi, (%rsi)
; CHECK-NEXT: movss %xmm0, 4(%rsi)
; CHECK-NEXT: testb $1, %cl
; CHECK-NEXT: je .LBB15_2
; CHECK-NEXT: # %bb.1: # %bb2
; CHECK-NEXT: movl %edi, (%rdx)
; CHECK-NEXT: movss %xmm0, 4(%rdx)
; CHECK-NEXT: .LBB15_2: # %exitbb
; CHECK-NEXT: retq
entry:
%t0 = bitcast float %tmp2 to i32
br label %bb1
bb1:
%t1 = zext i32 %t0 to i64
%t2 = shl nuw i64 %t1, 32
%t3 = zext i32 %tmp1 to i64
%t4 = or i64 %t2, %t3
store i64 %t4, i64* %ref.tmp, align 8
br i1 %cmp, label %bb2, label %exitbb
bb2:
store i64 %t4, i64* %ref.tmp1, align 8
br label %exitbb
exitbb:
ret void
}