From 7dfaaf3891993fb8bbce1c509d263064c7301a19 Mon Sep 17 00:00:00 2001 From: Ahmed Bougacha Date: Fri, 2 Oct 2015 23:29:26 +0000 Subject: [PATCH] [Headers][X86] Fix stream_load (movntdqa) to accept const*. Per Intel intrinsics guide: - _mm256_stream_load_si256 takes `__m256i const *' - _mm_stream_load_si128 takes `__m128i *', for no good reason. Let's accept const* for both. llvm-svn: 249213 --- clang/include/clang/Basic/BuiltinsX86.def | 4 ++-- clang/lib/Headers/avx2intrin.h | 4 ++-- clang/lib/Headers/smmintrin.h | 4 ++-- clang/test/CodeGen/avx2-builtins.c | 2 +- clang/test/CodeGen/sse41-builtins.c | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index 12c27e98bfda..53f7fe6f3277 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -390,7 +390,7 @@ TARGET_BUILTIN(__builtin_ia32_roundsd, "V2dV2dV2di", "", "sse4.1") TARGET_BUILTIN(__builtin_ia32_roundpd, "V2dV2di", "", "sse4.1") TARGET_BUILTIN(__builtin_ia32_dpps, "V4fV4fV4fIc", "", "sse4.1") TARGET_BUILTIN(__builtin_ia32_dppd, "V2dV2dV2dIc", "", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_movntdqa, "V2LLiV2LLi*", "", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_movntdqa, "V2LLiV2LLiC*", "", "sse4.1") TARGET_BUILTIN(__builtin_ia32_ptestz128, "iV2LLiV2LLi", "", "sse4.1") TARGET_BUILTIN(__builtin_ia32_ptestc128, "iV2LLiV2LLi", "", "sse4.1") TARGET_BUILTIN(__builtin_ia32_ptestnzc128, "iV2LLiV2LLi", "", "sse4.1") @@ -594,7 +594,7 @@ TARGET_BUILTIN(__builtin_ia32_psrldi256, "V8iV8ii", "", "avx2") TARGET_BUILTIN(__builtin_ia32_psrld256, "V8iV8iV4i", "", "avx2") TARGET_BUILTIN(__builtin_ia32_psrlqi256, "V4LLiV4LLii", "", "avx2") TARGET_BUILTIN(__builtin_ia32_psrlq256, "V4LLiV4LLiV2LLi", "", "avx2") -TARGET_BUILTIN(__builtin_ia32_movntdqa256, "V4LLiV4LLi*", "", "avx2") +TARGET_BUILTIN(__builtin_ia32_movntdqa256, "V4LLiV4LLiC*", "", "avx2") TARGET_BUILTIN(__builtin_ia32_permvarsi256, "V8iV8iV8i", "", "avx2") TARGET_BUILTIN(__builtin_ia32_permvarsf256, "V8fV8fV8f", "", "avx2") TARGET_BUILTIN(__builtin_ia32_permti256, "V4LLiV4LLiV4LLiIc", "", "avx2") diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index 90b8530126d0..b2a92f12b0f9 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -754,9 +754,9 @@ _mm256_xor_si256(__m256i __a, __m256i __b) } static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_stream_load_si256(__m256i *__V) +_mm256_stream_load_si256(__m256i const *__V) { - return (__m256i)__builtin_ia32_movntdqa256((__v4di *)__V); + return (__m256i)__builtin_ia32_movntdqa256((const __v4di *)__V); } static __inline__ __m128 __DEFAULT_FN_ATTRS diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h index 90ba9970cdb4..89db27f20c2d 100644 --- a/clang/lib/Headers/smmintrin.h +++ b/clang/lib/Headers/smmintrin.h @@ -151,9 +151,9 @@ _mm_mul_epi32 (__m128i __V1, __m128i __V2) /* SSE4 Streaming Load Hint Instruction. */ static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_stream_load_si128 (__m128i *__V) +_mm_stream_load_si128 (__m128i const *__V) { - return (__m128i) __builtin_ia32_movntdqa ((__v2di *) __V); + return (__m128i) __builtin_ia32_movntdqa ((const __v2di *) __V); } /* SSE4 Packed Integer Min/Max Instructions. */ diff --git a/clang/test/CodeGen/avx2-builtins.c b/clang/test/CodeGen/avx2-builtins.c index a29e59ff2240..0fc0a74776fb 100644 --- a/clang/test/CodeGen/avx2-builtins.c +++ b/clang/test/CodeGen/avx2-builtins.c @@ -728,7 +728,7 @@ __m256i test_mm256_unpacklo_epi64(__m256i a, __m256i b) { return _mm256_unpacklo_epi64(a, b); } -__m256i test_mm256_stream_load_si256(__m256i *a) { +__m256i test_mm256_stream_load_si256(__m256i const *a) { // CHECK: @llvm.x86.avx2.movntdqa // CHECK-ASM: vmovntdqa (%rdi), %ymm{{.*}} return _mm256_stream_load_si256(a); diff --git a/clang/test/CodeGen/sse41-builtins.c b/clang/test/CodeGen/sse41-builtins.c index 486c8a30a923..0acb31a0859e 100644 --- a/clang/test/CodeGen/sse41-builtins.c +++ b/clang/test/CodeGen/sse41-builtins.c @@ -385,7 +385,7 @@ __m128 test_mm_round_ss(__m128 x, __m128 y) { return _mm_round_ss(x, y, 2); } -__m128i test_mm_stream_load_si128(__m128i *a) { +__m128i test_mm_stream_load_si128(__m128i const *a) { // CHECK-LABEL: test_mm_stream_load_si128 // CHECK: call <2 x i64> @llvm.x86.sse41.movntdqa // CHECK-ASM: movntdqa