llvm-project/clang/test/CodeGen/avx-builtins.c

// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -o - | FileCheck %s

// Don't include mm_malloc.h, it's system specific.
#define __MM_MALLOC_H

#include <immintrin.h>

//
// Test LLVM IR codegen of shuffle instructions
//

__m256 test__mm256_loadu_ps(void* p) {
  // CHECK: load <8 x float>, <8 x float>* %{{.*}}, align 1
  return _mm256_loadu_ps(p);
}

__m256d test__mm256_loadu_pd(void* p) {
  // CHECK: load <4 x double>, <4 x double>* %{{.*}}, align 1
  return _mm256_loadu_pd(p);
}

__m256i test__mm256_loadu_si256(void* p) {
  // CHECK: load <4 x i64>, <4 x i64>* %{{.+}}, align 1
  return _mm256_loadu_si256(p);
}

int test_extract_epi32(__m256i __a) {
  // CHECK-LABEL: @test_extract_epi32
  // CHECK: [[SHIFT1:%[^ ]+]] = and i32 %{{.*}}, 7
  // CHECK: extractelement <8 x i32> %{{.*}}, i32 [[SHIFT1]]
  return _mm256_extract_epi32(__a, 8);
}

int test_extract_epi16(__m256i __a) {
  // CHECK-LABEL: @test_extract_epi16
  // CHECK: [[SHIFT2:%[^ ]+]] = and i32 %{{.*}}, 15
  // CHECK: extractelement <16 x i16> %{{.*}}, i32 [[SHIFT2]]
  return _mm256_extract_epi16(__a, 16);
}

int test_extract_epi8(__m256i __a) {
  // CHECK-LABEL: @test_extract_epi8
  // CHECK: [[SHIFT3:%[^ ]+]] = and i32 %{{.*}}, 31
  // CHECK: extractelement <32 x i8> %{{.*}}, i32 [[SHIFT3]]
  return _mm256_extract_epi8(__a, 32);
}

__m256d test_256_blend_pd(__m256d __a, __m256d __b) {
  // CHECK-LABEL: @test_256_blend_pd
  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
  return _mm256_blend_pd(__a, __b, 0x35);
}

__m256 test_256_blend_ps(__m256 __a, __m256 __b) {
  // CHECK-LABEL: @test_256_blend_ps
  // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 13, i32 6, i32 7>
  return _mm256_blend_ps(__a, __b, 0x35);
}

__m256i test_256_insert_epi8(__m256i __a) {
  // CHECK-LABEL: @test_256_insert_epi8
  // CHECK: insertelement <32 x i8> {{.*}}, i8 {{.*}}, i32 {{.*}}
  return _mm256_insert_epi8(__a, 42, 3);
}

__m256i test_256_insert_epi16(__m256i __a) {
  // CHECK-LABEL: @test_256_insert_epi16
  // CHECK: insertelement <16 x i16> {{.*}}, i16 {{.*}}, i32 {{.*}}
  return _mm256_insert_epi16(__a, 42, 3);
}

__m256i test_256_insert_epi32(__m256i __a) {
  // CHECK-LABEL: @test_256_insert_epi32
  // CHECK: insertelement <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}}
  return _mm256_insert_epi32(__a, 42, 3);
}

__m256i test_256_insert_epi64(__m256i __a) {
  // CHECK-LABEL: @test_256_insert_epi64
  // CHECK: insertelement <4 x i64> {{.*}}, i64 {{.*}}, i32 {{.*}}
  return _mm256_insert_epi64(__a, 42, 3);
}

__m256 test_mm256_undefined_ps() {
  // CHECK-LABEL: @test_mm256_undefined_ps
  // CHECK: ret <8 x float> undef
  return _mm256_undefined_ps();
}

__m256d test_mm256_undefined_pd() {
  // CHECK-LABEL: @test_mm256_undefined_pd
  // CHECK: ret <4 x double> undef
  return _mm256_undefined_pd();
}

__m256i test_mm256_undefined_si256() {
  // CHECK-LABEL: @test_mm256_undefined_si256
  // CHECK: ret <4 x i64> undef
  return _mm256_undefined_si256();
}
Canonicalize some of the x86 builtin tests and either remove or comment about optimization options. llvm-svn: 250271 2015-10-14 13:40:21 +08:00			`// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -o - \| FileCheck %s`
Represent 256-bit unaligned loads natively and remove the builtins. Similar change was made for 128-bit versions a while back. llvm-svn: 148919 2012-01-25 12:26:17 +08:00
			`// Don't include mm_malloc.h, it's system specific.`
			`#define __MM_MALLOC_H`

			`#include <immintrin.h>`

			`//`
			`// Test LLVM IR codegen of shuffle instructions`
			`//`

			`__m256 test__mm256_loadu_ps(void* p) {`
Update Clang tests to handle explicitly typed load changes in LLVM. llvm-svn: 230795 2015-02-28 05:19:58 +08:00			`// CHECK: load <8 x float>, <8 x float>* %{{.*}}, align 1`
Represent 256-bit unaligned loads natively and remove the builtins. Similar change was made for 128-bit versions a while back. llvm-svn: 148919 2012-01-25 12:26:17 +08:00			`return _mm256_loadu_ps(p);`
			`}`

			`__m256d test__mm256_loadu_pd(void* p) {`
Update Clang tests to handle explicitly typed load changes in LLVM. llvm-svn: 230795 2015-02-28 05:19:58 +08:00			`// CHECK: load <4 x double>, <4 x double>* %{{.*}}, align 1`
Represent 256-bit unaligned loads natively and remove the builtins. Similar change was made for 128-bit versions a while back. llvm-svn: 148919 2012-01-25 12:26:17 +08:00			`return _mm256_loadu_pd(p);`
			`}`

			`__m256i test__mm256_loadu_si256(void* p) {`
Update Clang tests to handle explicitly typed load changes in LLVM. llvm-svn: 230795 2015-02-28 05:19:58 +08:00			`// CHECK: load <4 x i64>, <4 x i64>* %{{.+}}, align 1`
Represent 256-bit unaligned loads natively and remove the builtins. Similar change was made for 128-bit versions a while back. llvm-svn: 148919 2012-01-25 12:26:17 +08:00			`return _mm256_loadu_si256(p);`
			`}`
Re-enable pcmpistri/pcmpestri builtins in clang now that llvm supports them properly. llvm-svn: 161319 2012-08-06 15:07:06 +08:00
Intrinsics: fix extract & insert when index is out of bound. Now, all extract & insert intrinsics should have the correct and operation to ignore higher bits. rdar://15250497 llvm-svn: 193267 2013-10-24 04:33:14 +08:00			`int test_extract_epi32(__m256i __a) {`
			`// CHECK-LABEL: @test_extract_epi32`
Canonicalize some of the x86 builtin tests and either remove or comment about optimization options. llvm-svn: 250271 2015-10-14 13:40:21 +08:00			`// CHECK: [[SHIFT1:%[^ ]+]] = and i32 %{{.*}}, 7`
			`// CHECK: extractelement <8 x i32> %{{.*}}, i32 [[SHIFT1]]`
Intrinsics: fix extract & insert when index is out of bound. Now, all extract & insert intrinsics should have the correct and operation to ignore higher bits. rdar://15250497 llvm-svn: 193267 2013-10-24 04:33:14 +08:00			`return _mm256_extract_epi32(__a, 8);`
			`}`

			`int test_extract_epi16(__m256i __a) {`
			`// CHECK-LABEL: @test_extract_epi16`
Canonicalize some of the x86 builtin tests and either remove or comment about optimization options. llvm-svn: 250271 2015-10-14 13:40:21 +08:00			`// CHECK: [[SHIFT2:%[^ ]+]] = and i32 %{{.*}}, 15`
			`// CHECK: extractelement <16 x i16> %{{.*}}, i32 [[SHIFT2]]`
Intrinsics: fix extract & insert when index is out of bound. Now, all extract & insert intrinsics should have the correct and operation to ignore higher bits. rdar://15250497 llvm-svn: 193267 2013-10-24 04:33:14 +08:00			`return _mm256_extract_epi16(__a, 16);`
			`}`

			`int test_extract_epi8(__m256i __a) {`
			`// CHECK-LABEL: @test_extract_epi8`
Canonicalize some of the x86 builtin tests and either remove or comment about optimization options. llvm-svn: 250271 2015-10-14 13:40:21 +08:00			`// CHECK: [[SHIFT3:%[^ ]+]] = and i32 %{{.*}}, 31`
			`// CHECK: extractelement <32 x i8> %{{.*}}, i32 [[SHIFT3]]`
Intrinsics: fix extract & insert when index is out of bound. Now, all extract & insert intrinsics should have the correct and operation to ignore higher bits. rdar://15250497 llvm-svn: 193267 2013-10-24 04:33:14 +08:00			`return _mm256_extract_epi8(__a, 32);`
			`}`
Patched clang to emit x86 blends as shufflevectors. Summary: Most of the clang header patch by Simon Pilgrim @ SCEE. Also fixed (or added) clang tests for these intrinsics. LLVM tests to make sure we get the blend instruction out of these shufflevectors are at http://reviews.llvm.org/D3600 Reviewers: eli.friedman, craig.topper, rafael Subscribers: cfe-commits Differential Revision: http://reviews.llvm.org/D3601 llvm-svn: 208664 2014-05-13 10:37:02 +08:00
			`__m256d test_256_blend_pd(__m256d __a, __m256d __b) {`
			`// CHECK-LABEL: @test_256_blend_pd`
			`// CHECK: shufflevector <4 x double> %{{.}}, <4 x double> %{{.}}, <4 x i32> <i32 4, i32 1, i32 6, i32 3>`
			`return _mm256_blend_pd(__a, __b, 0x35);`
			`}`

			`__m256 test_256_blend_ps(__m256 __a, __m256 __b) {`
			`// CHECK-LABEL: @test_256_blend_ps`
			`// CHECK: shufflevector <8 x float> %{{.}}, <8 x float> %{{.}}, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 13, i32 6, i32 7>`
			`return _mm256_blend_ps(__a, __b, 0x35);`
			`}`
[Headers] Add tests for _mm256_insert_epi64 and fix its definition Summary: The definition for _mm256_insert_epi64 was taking an int, which would get truncated before being inserted in the vector. Original patch by Joshua Magee! Reviewers: bruno, craig.topper Subscribers: cfe-commits Differential Revision: http://reviews.llvm.org/D7179 llvm-svn: 229811 2015-02-19 11:02:33 +08:00
			`__m256i test_256_insert_epi8(__m256i __a) {`
			`// CHECK-LABEL: @test_256_insert_epi8`
			`// CHECK: insertelement <32 x i8> {{.}}, i8 {{.}}, i32 {{.*}}`
			`return _mm256_insert_epi8(__a, 42, 3);`
			`}`

			`__m256i test_256_insert_epi16(__m256i __a) {`
			`// CHECK-LABEL: @test_256_insert_epi16`
			`// CHECK: insertelement <16 x i16> {{.}}, i16 {{.}}, i32 {{.*}}`
			`return _mm256_insert_epi16(__a, 42, 3);`
			`}`

			`__m256i test_256_insert_epi32(__m256i __a) {`
			`// CHECK-LABEL: @test_256_insert_epi32`
			`// CHECK: insertelement <8 x i32> {{.}}, i32 {{.}}, i32 {{.*}}`
			`return _mm256_insert_epi32(__a, 42, 3);`
			`}`

			`__m256i test_256_insert_epi64(__m256i __a) {`
			`// CHECK-LABEL: @test_256_insert_epi64`
			`// CHECK: insertelement <4 x i64> {{.}}, i64 {{.}}, i32 {{.*}}`
			`return _mm256_insert_epi64(__a, 42, 3);`
			`}`
[X86][SSE] Add _mm_undefined_* intrinsics Added missing SSE/AVX 'undefined' intrinsics (PR24040): _mm_undefined_pd, _mm_undefined_ps + _mm_undefined_si128 _mm256_undefined_pd, _mm256_undefined_ps + _mm256_undefined_si256 _mm512_undefined, _mm512_undefined_ps, _mm512_undefined_pd + _mm512_undefined_epi32 Added builtin intrinsicss: __builtin_ia32_undef128, __builtin_ia32_undef256 + __builtin_ia32_undef512 Differential Revision: http://reviews.llvm.org/D12052 llvm-svn: 246083 2015-08-27 05:17:12 +08:00
			`__m256 test_mm256_undefined_ps() {`
			`// CHECK-LABEL: @test_mm256_undefined_ps`
			`// CHECK: ret <8 x float> undef`
			`return _mm256_undefined_ps();`
			`}`

			`__m256d test_mm256_undefined_pd() {`
			`// CHECK-LABEL: @test_mm256_undefined_pd`
			`// CHECK: ret <4 x double> undef`
			`return _mm256_undefined_pd();`
			`}`

			`__m256i test_mm256_undefined_si256() {`
			`// CHECK-LABEL: @test_mm256_undefined_si256`
			`// CHECK: ret <4 x i64> undef`
			`return _mm256_undefined_si256();`
			`}`