llvm-project/llvm/test/CodeGen/X86/avx-basic.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx | FileCheck %s

@x = common global <8 x float> zeroinitializer, align 32
@y = common global <4 x double> zeroinitializer, align 32
@z = common global <4 x float> zeroinitializer, align 16

define void @zero128() nounwind ssp {
; CHECK-LABEL: zero128:
; CHECK:       ## BB#0:
; CHECK-NEXT:    vxorps %xmm0, %xmm0, %xmm0
; CHECK-NEXT:    movq _z@{{.*}}(%rip), %rax
; CHECK-NEXT:    vmovaps %xmm0, (%rax)
; CHECK-NEXT:    retq
  store <4 x float> zeroinitializer, <4 x float>* @z, align 16
  ret void
}

define void @zero256() nounwind ssp {
; CHECK-LABEL: zero256:
; CHECK:       ## BB#0:
; CHECK-NEXT:    movq _x@{{.*}}(%rip), %rax
; CHECK-NEXT:    vxorps %ymm0, %ymm0, %ymm0
; CHECK-NEXT:    vmovaps %ymm0, (%rax)
; CHECK-NEXT:    movq _y@{{.*}}(%rip), %rax
; CHECK-NEXT:    vmovaps %ymm0, (%rax)
; CHECK-NEXT:    vzeroupper
; CHECK-NEXT:    retq
  store <8 x float> zeroinitializer, <8 x float>* @x, align 32
  store <4 x double> zeroinitializer, <4 x double>* @y, align 32
  ret void
}

define void @ones([0 x float]* nocapture %RET, [0 x float]* nocapture %aFOO) nounwind {
; CHECK-LABEL: ones:
; CHECK:       ## BB#0: ## %allocas
; CHECK-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
; CHECK-NEXT:    vmovaps %ymm0, (%rdi)
; CHECK-NEXT:    vzeroupper
; CHECK-NEXT:    retq
allocas:
  %ptr2vec615 = bitcast [0 x float]* %RET to <8 x float>*
  store <8 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float
0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float
0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000>, <8 x
float>* %ptr2vec615, align 32
  ret void
}

define void @ones2([0 x i32]* nocapture %RET, [0 x i32]* nocapture %aFOO) nounwind {
; CHECK-LABEL: ones2:
; CHECK:       ## BB#0: ## %allocas
; CHECK-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
; CHECK-NEXT:    vmovaps %ymm0, (%rdi)
; CHECK-NEXT:    vzeroupper
; CHECK-NEXT:    retq
allocas:
  %ptr2vec615 = bitcast [0 x i32]* %RET to <8 x i32>*
  store <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <8 x i32>* %ptr2vec615, align 32
  ret void
}

;;; Just make sure this doesn't crash
define <4 x i64> @ISelCrash(<4 x i64> %a) nounwind uwtable readnone ssp {
; CHECK-LABEL: ISelCrash:
; CHECK:       ## BB#0:
; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
; CHECK-NEXT:    retq
  %shuffle = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 4>
  ret <4 x i64> %shuffle
}

;;; Don't crash on movd
define <8 x i32> @VMOVZQI2PQI([0 x float]* nocapture %aFOO) nounwind {
; CHECK-LABEL: VMOVZQI2PQI:
; CHECK:       ## BB#0:
; CHECK-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
; CHECK-NEXT:    retq
  %ptrcast.i33.i = bitcast [0 x float]* %aFOO to i32*
  %val.i34.i = load i32, i32* %ptrcast.i33.i, align 4
  %ptroffset.i22.i992 = getelementptr [0 x float], [0 x float]* %aFOO, i64 0, i64 1
  %ptrcast.i23.i = bitcast float* %ptroffset.i22.i992 to i32*
  %val.i24.i = load i32, i32* %ptrcast.i23.i, align 4
  %updatedret.i30.i = insertelement <8 x i32> undef, i32 %val.i34.i, i32 1
  ret <8 x i32> %updatedret.i30.i
}

;;;; Don't crash on fneg
; rdar://10566486
define <16 x float> @fneg(<16 x float> %a) nounwind {
; CHECK-LABEL: fneg:
; CHECK:       ## BB#0:
; CHECK-NEXT:    vmovaps {{.*#+}} ymm2 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648]
; CHECK-NEXT:    vxorps %ymm2, %ymm0, %ymm0
; CHECK-NEXT:    vxorps %ymm2, %ymm1, %ymm1
; CHECK-NEXT:    retq
  %1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
  ret <16 x float> %1
}

;;; Don't crash on build vector
define <16 x i16> @build_vec_16x16(i16 %a) nounwind readonly {
; CHECK-LABEL: build_vec_16x16:
; CHECK:       ## BB#0:
; CHECK-NEXT:    movzwl %di, %eax
; CHECK-NEXT:    vmovd %eax, %xmm0
; CHECK-NEXT:    retq
  %res = insertelement <16 x i16> <i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, i16 %a, i32 0
  ret <16 x i16> %res
}

;;; Check that VMOVPQIto64rr generates the assembly string "vmovq".  Previously
;;; an incorrect mnemonic of "movd" was printed for this instruction.
define i64 @VMOVPQIto64rr(<2 x i64> %a) {
; CHECK-LABEL: VMOVPQIto64rr:
; CHECK:       ## BB#0:
; CHECK-NEXT:    vmovq %xmm0, %rax
; CHECK-NEXT:    retq
  %vecext.i = extractelement <2 x i64> %a, i32 0
  ret i64 %vecext.i
}

; PR22685
define <8 x float> @mov00_8f32(float* %ptr) {
; CHECK-LABEL: mov00_8f32:
; CHECK:       ## BB#0:
; CHECK-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT:    retq
  %val = load float, float* %ptr
  %vec = insertelement <8 x float> zeroinitializer, float %val, i32 0
  ret <8 x float> %vec
}
[x86, AVX] tighten checks llvm-svn: 258828 2016-01-27 02:22:50 +08:00			`; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py`
			`; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx \| FileCheck %s`
Begin to support some vector operations for AVX 256-bit intructions. The long term goal here is to be able to match enough of vector_shuffle and build_vector so all avx intrinsics which aren't mapped to their own built-ins but to shufflevector calls can be codegen'd. This is the first (baby) step, support building zeroed vectors. llvm-svn: 110897 2010-08-12 10:06:36 +08:00
			`@x = common global <8 x float> zeroinitializer, align 32`
			`@y = common global <4 x double> zeroinitializer, align 32`
Rename and tidy up tests llvm-svn: 137103 2011-08-09 11:04:23 +08:00			`@z = common global <4 x float> zeroinitializer, align 16`
Begin to support some vector operations for AVX 256-bit intructions. The long term goal here is to be able to match enough of vector_shuffle and build_vector so all avx intrinsics which aren't mapped to their own built-ins but to shufflevector calls can be codegen'd. This is the first (baby) step, support building zeroed vectors. llvm-svn: 110897 2010-08-12 10:06:36 +08:00
Rename and tidy up tests llvm-svn: 137103 2011-08-09 11:04:23 +08:00			`define void @zero128() nounwind ssp {`
[x86, AVX] tighten checks llvm-svn: 258828 2016-01-27 02:22:50 +08:00			`; CHECK-LABEL: zero128:`
			`; CHECK: ## BB#0:`
			`; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0`
			`; CHECK-NEXT: movq _z@{{.*}}(%rip), %rax`
			`; CHECK-NEXT: vmovaps %xmm0, (%rax)`
			`; CHECK-NEXT: retq`
Rename and tidy up tests llvm-svn: 137103 2011-08-09 11:04:23 +08:00			`store <4 x float> zeroinitializer, <4 x float>* @z, align 16`
			`ret void`
			`}`

			`define void @zero256() nounwind ssp {`
[x86, AVX] tighten checks llvm-svn: 258828 2016-01-27 02:22:50 +08:00			`; CHECK-LABEL: zero256:`
			`; CHECK: ## BB#0:`
			`; CHECK-NEXT: movq _x@{{.*}}(%rip), %rax`
[X86] Lower 256-bit vector all-zero constants to v8i32 even with AVX1 only. Either way a 256-bit VXORPS will be used. llvm-svn: 268873 2016-05-08 15:10:54 +08:00			`; CHECK-NEXT: vxorps %ymm0, %ymm0, %ymm0`
[x86, AVX] tighten checks llvm-svn: 258828 2016-01-27 02:22:50 +08:00			`; CHECK-NEXT: vmovaps %ymm0, (%rax)`
			`; CHECK-NEXT: movq _y@{{.*}}(%rip), %rax`
			`; CHECK-NEXT: vmovaps %ymm0, (%rax)`
			`; CHECK-NEXT: vzeroupper`
			`; CHECK-NEXT: retq`
Begin to support some vector operations for AVX 256-bit intructions. The long term goal here is to be able to match enough of vector_shuffle and build_vector so all avx intrinsics which aren't mapped to their own built-ins but to shufflevector calls can be codegen'd. This is the first (baby) step, support building zeroed vectors. llvm-svn: 110897 2010-08-12 10:06:36 +08:00			`store <8 x float> zeroinitializer, <8 x float>* @x, align 32`
			`store <4 x double> zeroinitializer, <4 x double>* @y, align 32`
			`ret void`
			`}`
Codegen allonesvector better while using AVX: vpcmpeqd + vinsertf128 This also fixes PR10452 llvm-svn: 136004 2011-07-26 07:05:32 +08:00
			`define void @ones([0 x float]* nocapture %RET, [0 x float]* nocapture %aFOO) nounwind {`
[x86, AVX] tighten checks llvm-svn: 258828 2016-01-27 02:22:50 +08:00			`; CHECK-LABEL: ones:`
			`; CHECK: ## BB#0: ## %allocas`
			`; CHECK-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0`
			`; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0`
			`; CHECK-NEXT: vmovaps %ymm0, (%rdi)`
			`; CHECK-NEXT: vzeroupper`
			`; CHECK-NEXT: retq`
Codegen allonesvector better while using AVX: vpcmpeqd + vinsertf128 This also fixes PR10452 llvm-svn: 136004 2011-07-26 07:05:32 +08:00			`allocas:`
			`%ptr2vec615 = bitcast [0 x float]* %RET to <8 x float>*`
			`store <8 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float`
			`0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float`
			`0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000>, <8 x`
			`float>* %ptr2vec615, align 32`
			`ret void`
			`}`
Since vectors with all ones can't be created with a 256-bit instruction, avoid returning early for v8i32 types, which would only be valid for vector with all zeros. Also split the handling of zeros and ones into separate checking logic since they are handled differently. This fixes PR10547 llvm-svn: 136642 2011-08-02 03:51:53 +08:00
			`define void @ones2([0 x i32]* nocapture %RET, [0 x i32]* nocapture %aFOO) nounwind {`
[x86, AVX] tighten checks llvm-svn: 258828 2016-01-27 02:22:50 +08:00			`; CHECK-LABEL: ones2:`
			`; CHECK: ## BB#0: ## %allocas`
			`; CHECK-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0`
			`; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0`
			`; CHECK-NEXT: vmovaps %ymm0, (%rdi)`
			`; CHECK-NEXT: vzeroupper`
			`; CHECK-NEXT: retq`
Since vectors with all ones can't be created with a 256-bit instruction, avoid returning early for v8i32 types, which would only be valid for vector with all zeros. Also split the handling of zeros and ones into separate checking logic since they are handled differently. This fixes PR10547 llvm-svn: 136642 2011-08-02 03:51:53 +08:00			`allocas:`
			`%ptr2vec615 = bitcast [0 x i32]* %RET to <8 x i32>*`
			`store <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <8 x i32>* %ptr2vec615, align 32`
			`ret void`
			`}`
Fix PR10492 by teaching MOVHLPS and MOVLPS mask matching to be more strict. llvm-svn: 137324 2011-08-12 02:59:13 +08:00
			`;;; Just make sure this doesn't crash`
			`define <4 x i64> @ISelCrash(<4 x i64> %a) nounwind uwtable readnone ssp {`
[x86, AVX] tighten checks llvm-svn: 258828 2016-01-27 02:22:50 +08:00			`; CHECK-LABEL: ISelCrash:`
			`; CHECK: ## BB#0:`
			`; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0`
			`; CHECK-NEXT: retq`
Fix PR10492 by teaching MOVHLPS and MOVLPS mask matching to be more strict. llvm-svn: 137324 2011-08-12 02:59:13 +08:00			`%shuffle = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 4>`
			`ret <4 x i64> %shuffle`
			`}`
Instead of always leaving the work to the generic legalizer when there is no support for native 256-bit shuffles, be more smart in some cases, for example, when you can extract specific 128-bit parts and use regular 128-bit shuffles for them. Example: For this shuffle: shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 7, i32 6> This was expanded to: vextractf128 $1, %ymm1, %xmm2 vpextrq $0, %xmm2, %rax vmovd %rax, %xmm1 vpextrq $1, %xmm2, %rax vmovd %rax, %xmm2 vpunpcklqdq %xmm1, %xmm2, %xmm1 vpextrq $0, %xmm0, %rax vmovd %rax, %xmm2 vpextrq $1, %xmm0, %rax vmovd %rax, %xmm0 vpunpcklqdq %xmm2, %xmm0, %xmm0 vinsertf128 $1, %xmm1, %ymm0, %ymm0 ret Now we get: vshufpd $1, %xmm0, %xmm0, %xmm0 vextractf128 $1, %ymm1, %xmm1 vshufpd $1, %xmm1, %xmm1, %xmm1 vinsertf128 $1, %xmm1, %ymm0, %ymm0 llvm-svn: 137733 2011-08-17 02:21:54 +08:00
Fix PR10845. SUBREG_TO_REG shouldn't be used when the input and destination types are equal! llvm-svn: 139553 2011-09-13 06:59:23 +08:00			`;;; Don't crash on movd`
			`define <8 x i32> @VMOVZQI2PQI([0 x float]* nocapture %aFOO) nounwind {`
[x86, AVX] tighten checks llvm-svn: 258828 2016-01-27 02:22:50 +08:00			`; CHECK-LABEL: VMOVZQI2PQI:`
			`; CHECK: ## BB#0:`
			`; CHECK-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero`
			`; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]`
			`; CHECK-NEXT: retq`
Fix PR10845. SUBREG_TO_REG shouldn't be used when the input and destination types are equal! llvm-svn: 139553 2011-09-13 06:59:23 +08:00			`%ptrcast.i33.i = bitcast [0 x float]* %aFOO to i32*`
[opaque pointer type] Add textual IR support for explicit type parameter to load instruction Essentially the same as the GEP change in r230786. A similar migration script can be used to update test cases, though a few more test case improvements/changes were required this time around: (r229269-r229278) import fileinput import sys import re pat = re.compile(r"((?:=\|:\|^)\sload (?:atomic )?(?:volatile )?(.?))(\| addrspace\(\d+\) )\($\| (?:%\|@\|null\|undef\|blockaddress\|getelementptr\|addrspacecast\|bitcast\|inttoptr\|\[\[[a-zA-Z]\|\{\{).$)") for line in sys.stdin: sys.stdout.write(re.sub(pat, r"\1, \2\3*\4", line)) Reviewers: rafael, dexonsmith, grosser Differential Revision: http://reviews.llvm.org/D7649 llvm-svn: 230794 2015-02-28 05:17:42 +08:00			`%val.i34.i = load i32, i32* %ptrcast.i33.i, align 4`
[opaque pointer type] Add textual IR support for explicit type parameter to getelementptr instruction One of several parallel first steps to remove the target type of pointers, replacing them with a single opaque pointer type. This adds an explicit type parameter to the gep instruction so that when the first parameter becomes an opaque pointer type, the type to gep through is still available to the instructions. * This doesn't modify gep operators, only instructions (operators will be handled separately) * Textual IR changes only. Bitcode (including upgrade) and changing the in-memory representation will be in separate changes. * geps of vectors are transformed as: getelementptr <4 x float> %x, ... ->getelementptr float, <4 x float> %x, ... Then, once the opaque pointer type is introduced, this will ultimately look like: getelementptr float, <4 x ptr> %x with the unambiguous interpretation that it is a vector of pointers to float. * address spaces remain on the pointer, not the type: getelementptr float addrspace(1)* %x ->getelementptr float, float addrspace(1)* %x Then, eventually: getelementptr float, ptr addrspace(1) %x Importantly, the massive amount of test case churn has been automated by same crappy python code. I had to manually update a few test cases that wouldn't fit the script's model (r228970,r229196,r229197,r229198). The python script just massages stdin and writes the result to stdout, I then wrapped that in a shell script to handle replacing files, then using the usual find+xargs to migrate all the files. update.py: import fileinput import sys import re ibrep = re.compile(r"(^.?[^%\w]getelementptr inbounds )(((?:<\d x )?)(.?)(\| addrspace\(\d\)) \(\|>)(?:$\| (?:%\|@\|null\|undef\|blockaddress\|getelementptr\|addrspacecast\|bitcast\|inttoptr\|\[\[[a-zA-Z]\|\{\{).$))") normrep = re.compile( r"(^.?[^%\w]getelementptr )(((?:<\d* x )?)(.?)(\| addrspace\(\d\)) \(\|>)(?:$\| (?:%\|@\|null\|undef\|blockaddress\|getelementptr\|addrspacecast\|bitcast\|inttoptr\|\[\[[a-zA-Z]\|\{\{).$))") def conv(match, line): if not match: return line line = match.groups()[0] if len(match.groups()[5]) == 0: line += match.groups()[2] line += match.groups()[3] line += ", " line += match.groups()[1] line += "\n" return line for line in sys.stdin: if line.find("getelementptr ") == line.find("getelementptr inbounds"): if line.find("getelementptr inbounds") != line.find("getelementptr inbounds ("): line = conv(re.match(ibrep, line), line) elif line.find("getelementptr ") != line.find("getelementptr ("): line = conv(re.match(normrep, line), line) sys.stdout.write(line) apply.sh: for name in "$@" do python3 `dirname "$0"`/update.py < "$name" > "$name.tmp" && mv "$name.tmp" "$name" rm -f "$name.tmp" done The actual commands: From llvm/src: find test/ -name .ll \| xargs ./apply.sh From llvm/src/tools/clang: find test/ -name .mm -o -name .m -o -name .cpp -o -name .c \| xargs -I '{}' ../../apply.sh "{}" From llvm/src/tools/polly: find test/ -name *.ll \| xargs ./apply.sh After that, check-all (with llvm, clang, clang-tools-extra, lld, compiler-rt, and polly all checked out). The extra 'rm' in the apply.sh script is due to a few files in clang's test suite using interesting unicode stuff that my python script was throwing exceptions on. None of those files needed to be migrated, so it seemed sufficient to ignore those cases. Reviewers: rafael, dexonsmith, grosser Differential Revision: http://reviews.llvm.org/D7636 llvm-svn: 230786 2015-02-28 03:29:02 +08:00			`%ptroffset.i22.i992 = getelementptr [0 x float], [0 x float]* %aFOO, i64 0, i64 1`
Fix PR10845. SUBREG_TO_REG shouldn't be used when the input and destination types are equal! llvm-svn: 139553 2011-09-13 06:59:23 +08:00			`%ptrcast.i23.i = bitcast float* %ptroffset.i22.i992 to i32*`
[opaque pointer type] Add textual IR support for explicit type parameter to load instruction Essentially the same as the GEP change in r230786. A similar migration script can be used to update test cases, though a few more test case improvements/changes were required this time around: (r229269-r229278) import fileinput import sys import re pat = re.compile(r"((?:=\|:\|^)\sload (?:atomic )?(?:volatile )?(.?))(\| addrspace\(\d+\) )\($\| (?:%\|@\|null\|undef\|blockaddress\|getelementptr\|addrspacecast\|bitcast\|inttoptr\|\[\[[a-zA-Z]\|\{\{).$)") for line in sys.stdin: sys.stdout.write(re.sub(pat, r"\1, \2\3*\4", line)) Reviewers: rafael, dexonsmith, grosser Differential Revision: http://reviews.llvm.org/D7649 llvm-svn: 230794 2015-02-28 05:17:42 +08:00			`%val.i24.i = load i32, i32* %ptrcast.i23.i, align 4`
Fix PR10845. SUBREG_TO_REG shouldn't be used when the input and destination types are equal! llvm-svn: 139553 2011-09-13 06:59:23 +08:00			`%updatedret.i30.i = insertelement <8 x i32> undef, i32 %val.i34.i, i32 1`
			`ret <8 x i32> %updatedret.i30.i`
			`}`

Add support for lowering fneg when AVX is enabled. rdar://10566486 llvm-svn: 146625 2011-12-15 09:02:25 +08:00			`;;;; Don't crash on fneg`
			`; rdar://10566486`
Teach DAG combiner to constant fold fneg of a BUILD_VECTOR of constants. llvm-svn: 163483 2012-09-10 06:58:45 +08:00			`define <16 x float> @fneg(<16 x float> %a) nounwind {`
[x86, AVX] tighten checks llvm-svn: 258828 2016-01-27 02:22:50 +08:00			`; CHECK-LABEL: fneg:`
			`; CHECK: ## BB#0:`
			`; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648]`
			`; CHECK-NEXT: vxorps %ymm2, %ymm0, %ymm0`
			`; CHECK-NEXT: vxorps %ymm2, %ymm1, %ymm1`
			`; CHECK-NEXT: retq`
Teach DAG combiner to constant fold fneg of a BUILD_VECTOR of constants. llvm-svn: 163483 2012-09-10 06:58:45 +08:00			`%1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a`
Add support for lowering fneg when AVX is enabled. rdar://10566486 llvm-svn: 146625 2011-12-15 09:02:25 +08:00			`ret <16 x float> %1`
			`}`
Fix assert in LowerBUILD_VECTOR for v16i16 type on AVX. Patch by Elena Demikhovsky <elena.demikhovsky@intel.com>! llvm-svn: 146684 2011-12-16 05:34:44 +08:00
			`;;; Don't crash on build vector`
			`define <16 x i16> @build_vec_16x16(i16 %a) nounwind readonly {`
[x86, AVX] tighten checks llvm-svn: 258828 2016-01-27 02:22:50 +08:00			`; CHECK-LABEL: build_vec_16x16:`
			`; CHECK: ## BB#0:`
			`; CHECK-NEXT: movzwl %di, %eax`
			`; CHECK-NEXT: vmovd %eax, %xmm0`
			`; CHECK-NEXT: retq`
Fix assert in LowerBUILD_VECTOR for v16i16 type on AVX. Patch by Elena Demikhovsky <elena.demikhovsky@intel.com>! llvm-svn: 146684 2011-12-16 05:34:44 +08:00			`%res = insertelement <16 x i16> <i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, i16 %a, i32 0`
			`ret <16 x i16> %res`
			`}`
Put VMOVPQIto64rr in the VRPDI class. Patch by Joshua Magee. llvm-svn: 180842 2013-05-01 21:00:16 +08:00
Remove some instructions that existed to provide aliases to the assembler. Can be done with InstAlias instead. Unfortunately, this was causing printer to use 'vmovq' or 'vmovd' based on what was parsed. To cleanup the inconsistencies convert all 'vmovd' with 64-bit registers to 'vmovq', but provide an alias so that 'vmovd' will still parse. llvm-svn: 192171 2013-10-08 13:53:50 +08:00			`;;; Check that VMOVPQIto64rr generates the assembly string "vmovq". Previously`
Put VMOVPQIto64rr in the VRPDI class. Patch by Joshua Magee. llvm-svn: 180842 2013-05-01 21:00:16 +08:00			`;;; an incorrect mnemonic of "movd" was printed for this instruction.`
			`define i64 @VMOVPQIto64rr(<2 x i64> %a) {`
[x86, AVX] tighten checks llvm-svn: 258828 2016-01-27 02:22:50 +08:00			`; CHECK-LABEL: VMOVPQIto64rr:`
			`; CHECK: ## BB#0:`
			`; CHECK-NEXT: vmovq %xmm0, %rax`
			`; CHECK-NEXT: retq`
Put VMOVPQIto64rr in the VRPDI class. Patch by Joshua Magee. llvm-svn: 180842 2013-05-01 21:00:16 +08:00			`%vecext.i = extractelement <2 x i64> %a, i32 0`
			`ret i64 %vecext.i`
			`}`
[X86] Use vmovss to handle inserting an element into index 0 of a v8f32 vector of zeros. llvm-svn: 231354 2015-03-05 14:38:42 +08:00
			`; PR22685`
			`define <8 x float> @mov00_8f32(float* %ptr) {`
[x86, AVX] tighten checks llvm-svn: 258828 2016-01-27 02:22:50 +08:00			`; CHECK-LABEL: mov00_8f32:`
			`; CHECK: ## BB#0:`
			`; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero`
			`; CHECK-NEXT: retq`
[X86] Use vmovss to handle inserting an element into index 0 of a v8f32 vector of zeros. llvm-svn: 231354 2015-03-05 14:38:42 +08:00			`%val = load float, float* %ptr`
			`%vec = insertelement <8 x float> zeroinitializer, float %val, i32 0`
			`ret <8 x float> %vec`
			`}`