forked from OSchip/llvm-project
[x86] add test for possible load scalarization fold; NFC
This is a minimal test to show a transform proposed in D118376.
This commit is contained in:
parent
6277b90992
commit
e9768a2a44
|
@ -300,3 +300,38 @@ define void @subextract_broadcast_load_constant(<2 x i16>* nocapture %0, i16* no
|
||||||
store i16 %10, i16* %2, align 2
|
store i16 %10, i16* %2, align 2
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define i32 @multi_use_load_scalarization(<4 x i32>* %p) {
|
||||||
|
; X32-SSE2-LABEL: multi_use_load_scalarization:
|
||||||
|
; X32-SSE2: # %bb.0:
|
||||||
|
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||||
|
; X32-SSE2-NEXT: movdqu (%ecx), %xmm0
|
||||||
|
; X32-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
|
||||||
|
; X32-SSE2-NEXT: movd %xmm0, %eax
|
||||||
|
; X32-SSE2-NEXT: psubd %xmm1, %xmm0
|
||||||
|
; X32-SSE2-NEXT: movdqa %xmm0, (%ecx)
|
||||||
|
; X32-SSE2-NEXT: retl
|
||||||
|
;
|
||||||
|
; X64-SSSE3-LABEL: multi_use_load_scalarization:
|
||||||
|
; X64-SSSE3: # %bb.0:
|
||||||
|
; X64-SSSE3-NEXT: movdqu (%rdi), %xmm0
|
||||||
|
; X64-SSSE3-NEXT: pcmpeqd %xmm1, %xmm1
|
||||||
|
; X64-SSSE3-NEXT: movd %xmm0, %eax
|
||||||
|
; X64-SSSE3-NEXT: psubd %xmm1, %xmm0
|
||||||
|
; X64-SSSE3-NEXT: movdqa %xmm0, (%rdi)
|
||||||
|
; X64-SSSE3-NEXT: retq
|
||||||
|
;
|
||||||
|
; X64-AVX-LABEL: multi_use_load_scalarization:
|
||||||
|
; X64-AVX: # %bb.0:
|
||||||
|
; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
|
||||||
|
; X64-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||||
|
; X64-AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm1
|
||||||
|
; X64-AVX-NEXT: vmovdqa %xmm1, (%rdi)
|
||||||
|
; X64-AVX-NEXT: vmovd %xmm0, %eax
|
||||||
|
; X64-AVX-NEXT: retq
|
||||||
|
%v = load <4 x i32>, <4 x i32>* %p, align 1
|
||||||
|
%v1 = add <4 x i32> %v, <i32 1, i32 1, i32 1, i32 1>
|
||||||
|
store <4 x i32> %v1, <4 x i32>* %p
|
||||||
|
%r = extractelement <4 x i32> %v, i64 0
|
||||||
|
ret i32 %r
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue