llvm-project/llvm/test/CodeGen/X86/dagcombine-buildvector.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown -mcpu=penryn | FileCheck %s

; Shows a dag combine bug that will generate an illegal build vector
; with v2i64 build_vector i32, i32.

define void @test(<2 x double>* %dst, <4 x double> %src) nounwind {
; CHECK-LABEL: test:
; CHECK:       # BB#0: # %entry
; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT:    movapd %xmm0, (%eax)
; CHECK-NEXT:    retl
entry:
        %tmp7.i = shufflevector <4 x double> %src, <4 x double> undef, <2 x i32> <i32 0, i32 2>
        store <2 x double> %tmp7.i, <2 x double>* %dst
        ret void
}

define void @test2(<4 x i16>* %src, <4 x i32>* %dest) nounwind {
; CHECK-LABEL: test2:
; CHECK:       # BB#0: # %entry
; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT:    pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
; CHECK-NEXT:    movdqa %xmm0, (%eax)
; CHECK-NEXT:    retl
entry:
        %tmp1 = load <4 x i16>, <4 x i16>* %src
        %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
        %0 = tail call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %tmp3)
        store <4 x i32> %0, <4 x i32>* %dest
        ret void
}

declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
[X86][SSE] Regenerate 32-bit buildvector test llvm-svn: 279389 2016-08-21 07:09:57 +08:00			`; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py`
[llvm] Remove redundant --check-prefix=CHECK from tests Reviewers: MatzeB, mcrosier, rengolin Differential Revision: https://reviews.llvm.org/D25894 llvm-svn: 285003 2016-10-25 02:57:55 +08:00			`; RUN: llc < %s -mtriple=i686-unknown -mcpu=penryn \| FileCheck %s`
Fix a problem with DAGCombine where we were building an illegal build vector shuffle mask. Forced the mask to be built using i32. Note: this will be irrelevant once vector_shuffle no longer takes a build vector for the shuffle mask. llvm-svn: 67076 2009-03-17 14:33:10 +08:00
			`; Shows a dag combine bug that will generate an illegal build vector`
			`; with v2i64 build_vector i32, i32.`

Adapt the x86 build_vector dagcombine to the current state of the legalizer. build vectors with i64 elements will only appear on 32b x86 before legalize. Since vector widening occurs during legalize, and produces i64 build_vector elements, the dag combiner is never run on these before legalize splits them into 32b elements. Teach the build_vector dag combine in x86 back end to recognize consecutive loads producing the low part of the vector. Convert the two uses of TLI's consecutive load recognizer to pass LoadSDNodes since that was required implicitly. Add a testcase for the transform. Old: subl $28, %esp movl 32(%esp), %eax movl 4(%eax), %ecx movl %ecx, 4(%esp) movl (%eax), %eax movl %eax, (%esp) movaps (%esp), %xmm0 pmovzxwd %xmm0, %xmm0 movl 36(%esp), %eax movaps %xmm0, (%eax) addl $28, %esp ret New: movl 4(%esp), %eax pmovzxwd (%eax), %xmm0 movl 8(%esp), %eax movaps %xmm0, (%eax) ret llvm-svn: 72957 2009-06-06 05:37:30 +08:00			`define void @test(<2 x double>* %dst, <4 x double> %src) nounwind {`
[X86][SSE] Regenerate 32-bit buildvector test llvm-svn: 279389 2016-08-21 07:09:57 +08:00			`; CHECK-LABEL: test:`
			`; CHECK: # BB#0: # %entry`
			`; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax`
			`; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]`
			`; CHECK-NEXT: movapd %xmm0, (%eax)`
			`; CHECK-NEXT: retl`
Fix a problem with DAGCombine where we were building an illegal build vector shuffle mask. Forced the mask to be built using i32. Note: this will be irrelevant once vector_shuffle no longer takes a build vector for the shuffle mask. llvm-svn: 67076 2009-03-17 14:33:10 +08:00			`entry:`
[X86][SSE] Regenerate 32-bit buildvector test llvm-svn: 279389 2016-08-21 07:09:57 +08:00			`%tmp7.i = shufflevector <4 x double> %src, <4 x double> undef, <2 x i32> <i32 0, i32 2>`
Fix a problem with DAGCombine where we were building an illegal build vector shuffle mask. Forced the mask to be built using i32. Note: this will be irrelevant once vector_shuffle no longer takes a build vector for the shuffle mask. llvm-svn: 67076 2009-03-17 14:33:10 +08:00			`store <2 x double> %tmp7.i, <2 x double>* %dst`
			`ret void`
Add newline at end of file. llvm-svn: 67085 2009-03-18 01:08:25 +08:00			`}`
Adapt the x86 build_vector dagcombine to the current state of the legalizer. build vectors with i64 elements will only appear on 32b x86 before legalize. Since vector widening occurs during legalize, and produces i64 build_vector elements, the dag combiner is never run on these before legalize splits them into 32b elements. Teach the build_vector dag combine in x86 back end to recognize consecutive loads producing the low part of the vector. Convert the two uses of TLI's consecutive load recognizer to pass LoadSDNodes since that was required implicitly. Add a testcase for the transform. Old: subl $28, %esp movl 32(%esp), %eax movl 4(%eax), %ecx movl %ecx, 4(%esp) movl (%eax), %eax movl %eax, (%esp) movaps (%esp), %xmm0 pmovzxwd %xmm0, %xmm0 movl 36(%esp), %eax movaps %xmm0, (%eax) addl $28, %esp ret New: movl 4(%esp), %eax pmovzxwd (%eax), %xmm0 movl 8(%esp), %eax movaps %xmm0, (%eax) ret llvm-svn: 72957 2009-06-06 05:37:30 +08:00
			`define void @test2(<4 x i16>* %src, <4 x i32>* %dest) nounwind {`
[X86][SSE] Regenerate 32-bit buildvector test llvm-svn: 279389 2016-08-21 07:09:57 +08:00			`; CHECK-LABEL: test2:`
			`; CHECK: # BB#0: # %entry`
			`; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax`
			`; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx`
			`; CHECK-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero`
			`; CHECK-NEXT: movdqa %xmm0, (%eax)`
			`; CHECK-NEXT: retl`
Adapt the x86 build_vector dagcombine to the current state of the legalizer. build vectors with i64 elements will only appear on 32b x86 before legalize. Since vector widening occurs during legalize, and produces i64 build_vector elements, the dag combiner is never run on these before legalize splits them into 32b elements. Teach the build_vector dag combine in x86 back end to recognize consecutive loads producing the low part of the vector. Convert the two uses of TLI's consecutive load recognizer to pass LoadSDNodes since that was required implicitly. Add a testcase for the transform. Old: subl $28, %esp movl 32(%esp), %eax movl 4(%eax), %ecx movl %ecx, 4(%esp) movl (%eax), %eax movl %eax, (%esp) movaps (%esp), %xmm0 pmovzxwd %xmm0, %xmm0 movl 36(%esp), %eax movaps %xmm0, (%eax) addl $28, %esp ret New: movl 4(%esp), %eax pmovzxwd (%eax), %xmm0 movl 8(%esp), %eax movaps %xmm0, (%eax) ret llvm-svn: 72957 2009-06-06 05:37:30 +08:00			`entry:`
[opaque pointer type] Add textual IR support for explicit type parameter to load instruction Essentially the same as the GEP change in r230786. A similar migration script can be used to update test cases, though a few more test case improvements/changes were required this time around: (r229269-r229278) import fileinput import sys import re pat = re.compile(r"((?:=\|:\|^)\sload (?:atomic )?(?:volatile )?(.?))(\| addrspace\(\d+\) )\($\| (?:%\|@\|null\|undef\|blockaddress\|getelementptr\|addrspacecast\|bitcast\|inttoptr\|\[\[[a-zA-Z]\|\{\{).$)") for line in sys.stdin: sys.stdout.write(re.sub(pat, r"\1, \2\3*\4", line)) Reviewers: rafael, dexonsmith, grosser Differential Revision: http://reviews.llvm.org/D7649 llvm-svn: 230794 2015-02-28 05:17:42 +08:00			`%tmp1 = load <4 x i16>, <4 x i16>* %src`
Adapt the x86 build_vector dagcombine to the current state of the legalizer. build vectors with i64 elements will only appear on 32b x86 before legalize. Since vector widening occurs during legalize, and produces i64 build_vector elements, the dag combiner is never run on these before legalize splits them into 32b elements. Teach the build_vector dag combine in x86 back end to recognize consecutive loads producing the low part of the vector. Convert the two uses of TLI's consecutive load recognizer to pass LoadSDNodes since that was required implicitly. Add a testcase for the transform. Old: subl $28, %esp movl 32(%esp), %eax movl 4(%eax), %ecx movl %ecx, 4(%esp) movl (%eax), %eax movl %eax, (%esp) movaps (%esp), %xmm0 pmovzxwd %xmm0, %xmm0 movl 36(%esp), %eax movaps %xmm0, (%eax) addl $28, %esp ret New: movl 4(%esp), %eax pmovzxwd (%eax), %xmm0 movl 8(%esp), %eax movaps %xmm0, (%eax) ret llvm-svn: 72957 2009-06-06 05:37:30 +08:00			`%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>`
			`%0 = tail call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %tmp3)`
			`store <4 x i32> %0, <4 x i32>* %dest`
			`ret void`
			`}`

			`declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone`