2010-08-21 02:04:07 +08:00
|
|
|
/*===---- immintrin.h - Intel intrinsics -----------------------------------===
|
|
|
|
*
|
2019-04-09 04:51:30 +08:00
|
|
|
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
* See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2010-08-21 02:04:07 +08:00
|
|
|
*
|
|
|
|
*===-----------------------------------------------------------------------===
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef __IMMINTRIN_H
|
|
|
|
#define __IMMINTRIN_H
|
|
|
|
|
2016-05-17 02:14:07 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MMX__)
|
2010-08-21 02:04:07 +08:00
|
|
|
#include <mmintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2010-08-21 02:04:07 +08:00
|
|
|
|
2016-05-17 02:14:07 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE__)
|
2010-08-21 02:04:07 +08:00
|
|
|
#include <xmmintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2010-08-21 02:04:07 +08:00
|
|
|
|
2016-05-17 02:14:07 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE2__)
|
2010-08-21 02:04:07 +08:00
|
|
|
#include <emmintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2010-08-21 02:04:07 +08:00
|
|
|
|
2016-05-17 02:14:07 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE3__)
|
2010-08-21 02:04:07 +08:00
|
|
|
#include <pmmintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2010-08-21 02:04:07 +08:00
|
|
|
|
2016-05-17 02:14:07 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSSE3__)
|
2010-08-21 02:04:07 +08:00
|
|
|
#include <tmmintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2010-08-21 02:04:07 +08:00
|
|
|
|
2016-05-17 02:14:07 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || \
|
|
|
|
(defined(__SSE4_2__) || defined(__SSE4_1__))
|
2010-08-21 02:04:07 +08:00
|
|
|
#include <smmintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2010-08-21 02:04:07 +08:00
|
|
|
|
2016-05-17 02:14:07 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || \
|
|
|
|
(defined(__AES__) || defined(__PCLMUL__))
|
2010-08-21 02:04:07 +08:00
|
|
|
#include <wmmintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2010-08-21 02:04:07 +08:00
|
|
|
|
2016-07-05 23:56:03 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLFLUSHOPT__)
|
|
|
|
#include <clflushoptintrin.h>
|
|
|
|
#endif
|
|
|
|
|
2017-10-13 02:57:15 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLWB__)
|
|
|
|
#include <clwbintrin.h>
|
|
|
|
#endif
|
|
|
|
|
2016-05-17 02:14:07 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX__)
|
2010-08-21 02:04:07 +08:00
|
|
|
#include <avxintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2010-08-21 02:04:07 +08:00
|
|
|
|
2016-05-17 02:14:07 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX2__)
|
2011-12-19 13:04:33 +08:00
|
|
|
#include <avx2intrin.h>
|
2018-05-23 04:33:04 +08:00
|
|
|
#endif
|
2011-12-19 13:04:33 +08:00
|
|
|
|
2018-05-23 02:54:19 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__F16C__)
|
|
|
|
#include <f16cintrin.h>
|
2018-05-23 04:33:04 +08:00
|
|
|
#endif
|
2015-12-03 02:41:52 +08:00
|
|
|
|
2017-12-27 17:00:31 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VPCLMULQDQ__)
|
|
|
|
#include <vpclmulqdqintrin.h>
|
|
|
|
#endif
|
|
|
|
|
2016-05-17 02:14:07 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__)
|
2011-12-25 14:25:37 +08:00
|
|
|
#include <bmiintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2011-12-25 14:25:37 +08:00
|
|
|
|
2016-05-17 02:14:07 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI2__)
|
2011-12-26 10:31:10 +08:00
|
|
|
#include <bmi2intrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2011-12-26 10:31:10 +08:00
|
|
|
|
2016-05-17 02:14:07 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__LZCNT__)
|
2011-12-25 14:25:37 +08:00
|
|
|
#include <lzcntintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2011-12-25 14:25:37 +08:00
|
|
|
|
2018-05-24 02:32:58 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__POPCNT__)
|
|
|
|
#include <popcntintrin.h>
|
|
|
|
#endif
|
|
|
|
|
2016-05-17 02:14:07 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FMA__)
|
2012-06-04 11:42:47 +08:00
|
|
|
#include <fmaintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2012-06-04 11:42:47 +08:00
|
|
|
|
2016-05-17 02:14:07 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512F__)
|
2014-07-22 19:31:39 +08:00
|
|
|
#include <avx512fintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2014-07-22 19:31:39 +08:00
|
|
|
|
2016-05-17 02:14:07 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VL__)
|
2014-10-09 01:18:13 +08:00
|
|
|
#include <avx512vlintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2014-10-09 01:18:13 +08:00
|
|
|
|
2016-05-17 02:14:07 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BW__)
|
2014-10-09 01:18:13 +08:00
|
|
|
#include <avx512bwintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2014-10-09 01:18:13 +08:00
|
|
|
|
2017-12-27 18:01:00 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BITALG__)
|
|
|
|
#include <avx512bitalgintrin.h>
|
|
|
|
#endif
|
|
|
|
|
2016-05-17 02:14:07 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512CD__)
|
2015-06-29 20:51:53 +08:00
|
|
|
#include <avx512cdintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2015-06-29 20:51:53 +08:00
|
|
|
|
2017-05-25 21:44:11 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__)
|
|
|
|
#include <avx512vpopcntdqintrin.h>
|
|
|
|
#endif
|
|
|
|
|
2017-12-16 14:02:31 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || \
|
|
|
|
(defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__))
|
|
|
|
#include <avx512vpopcntdqvlintrin.h>
|
|
|
|
#endif
|
|
|
|
|
2017-12-27 18:37:51 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VNNI__)
|
|
|
|
#include <avx512vnniintrin.h>
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || \
|
|
|
|
(defined(__AVX512VL__) && defined(__AVX512VNNI__))
|
|
|
|
#include <avx512vlvnniintrin.h>
|
|
|
|
#endif
|
|
|
|
|
2016-05-17 02:14:07 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__)
|
2015-04-30 17:24:29 +08:00
|
|
|
#include <avx512dqintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2015-04-30 17:24:29 +08:00
|
|
|
|
2017-12-27 18:01:00 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || \
|
|
|
|
(defined(__AVX512VL__) && defined(__AVX512BITALG__))
|
|
|
|
#include <avx512vlbitalgintrin.h>
|
|
|
|
#endif
|
|
|
|
|
2016-05-17 02:14:07 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || \
|
|
|
|
(defined(__AVX512VL__) && defined(__AVX512BW__))
|
2014-10-09 01:18:13 +08:00
|
|
|
#include <avx512vlbwintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2014-10-09 01:18:13 +08:00
|
|
|
|
2016-05-17 02:14:07 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || \
|
|
|
|
(defined(__AVX512VL__) && defined(__AVX512CD__))
|
2016-04-27 19:43:14 +08:00
|
|
|
#include <avx512vlcdintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2016-04-27 19:43:14 +08:00
|
|
|
|
2016-05-17 02:14:07 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || \
|
|
|
|
(defined(__AVX512VL__) && defined(__AVX512DQ__))
|
2015-04-30 17:24:29 +08:00
|
|
|
#include <avx512vldqintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2015-04-30 17:24:29 +08:00
|
|
|
|
2016-05-17 02:14:07 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512ER__)
|
2014-07-22 19:31:39 +08:00
|
|
|
#include <avx512erintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2014-07-22 19:31:39 +08:00
|
|
|
|
2016-05-17 02:14:07 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512IFMA__)
|
2016-03-07 17:55:55 +08:00
|
|
|
#include <avx512ifmaintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2016-03-07 17:55:55 +08:00
|
|
|
|
2016-05-17 02:14:07 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || \
|
|
|
|
(defined(__AVX512IFMA__) && defined(__AVX512VL__))
|
2016-03-07 17:55:55 +08:00
|
|
|
#include <avx512ifmavlintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2016-03-07 17:55:55 +08:00
|
|
|
|
2016-05-17 02:14:07 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI__)
|
2016-03-08 01:04:11 +08:00
|
|
|
#include <avx512vbmiintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2016-03-08 01:04:11 +08:00
|
|
|
|
2016-05-17 02:14:07 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || \
|
|
|
|
(defined(__AVX512VBMI__) && defined(__AVX512VL__))
|
2016-03-08 01:04:11 +08:00
|
|
|
#include <avx512vbmivlintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2016-03-08 01:04:11 +08:00
|
|
|
|
[x86][icelake][vbmi2]
added vbmi2 feature recognition
added intrinsics support for vbmi2 instructions
_mm[128,256,512]_mask[z]_compress_epi[16,32]
_mm[128,256,512]_mask_compressstoreu_epi[16,32]
_mm[128,256,512]_mask[z]_expand_epi[16,32]
_mm[128,256,512]_mask[z]_expandloadu_epi[16,32]
_mm[128,256,512]_mask[z]_sh[l,r]di_epi[16,32,64]
_mm[128,256,512]_mask_sh[l,r]dv_epi[16,32,64]
matching a similar work on the backend (D40206)
Differential Revision: https://reviews.llvm.org/D41557
llvm-svn: 321487
2017-12-27 19:25:07 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI2__)
|
|
|
|
#include <avx512vbmi2intrin.h>
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || \
|
|
|
|
(defined(__AVX512VBMI2__) && defined(__AVX512VL__))
|
|
|
|
#include <avx512vlvbmi2intrin.h>
|
|
|
|
#endif
|
|
|
|
|
2016-05-17 02:14:07 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512PF__)
|
2016-04-21 20:47:27 +08:00
|
|
|
#include <avx512pfintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2016-04-21 20:47:27 +08:00
|
|
|
|
Enable intrinsics of AVX512_BF16, which are supported for BFLOAT16 in Cooper Lake
Summary:
1. Enable infrastructure of AVX512_BF16, which is supported for BFLOAT16 in Cooper Lake;
2. Enable intrinsics for VCVTNE2PS2BF16, VCVTNEPS2BF16 and DPBF16PS instructions, which are Vector Neural Network Instructions supporting BFLOAT16 inputs and conversion instructions from IEEE single precision.
For more details about BF16 intrinsic, please refer to the latest ISE document: https://software.intel.com/en-us/download/intel-architecture-instruction-set-extensions-programming-reference
Patch by LiuTianle
Reviewers: craig.topper, smaslov, LuoYuanke, wxiao3, annita.zhang, spatel, RKSimon
Reviewed By: craig.topper
Subscribers: mgorny, cfe-commits
Tags: #clang
Differential Revision: https://reviews.llvm.org/D60552
llvm-svn: 360018
2019-05-06 16:25:11 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BF16__)
|
|
|
|
#include <avx512bf16intrin.h>
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || \
|
|
|
|
(defined(__AVX512VL__) && defined(__AVX512BF16__))
|
|
|
|
#include <avx512vlbf16intrin.h>
|
|
|
|
#endif
|
|
|
|
|
2016-05-17 02:14:07 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PKU__)
|
2015-12-31 22:14:07 +08:00
|
|
|
#include <pkuintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2015-12-31 22:14:07 +08:00
|
|
|
|
2017-12-27 16:16:54 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VAES__)
|
|
|
|
#include <vaesintrin.h>
|
|
|
|
#endif
|
|
|
|
|
2017-12-27 16:37:47 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__GFNI__)
|
|
|
|
#include <gfniintrin.h>
|
|
|
|
#endif
|
|
|
|
|
2018-01-21 02:36:52 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDPID__)
|
2018-05-09 09:00:01 +08:00
|
|
|
/// Returns the value of the IA32_TSC_AUX MSR (0xc0000103).
|
2018-01-21 02:36:52 +08:00
|
|
|
///
|
|
|
|
/// \headerfile <immintrin.h>
|
|
|
|
///
|
|
|
|
/// This intrinsic corresponds to the <c> RDPID </c> instruction.
|
|
|
|
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid")))
|
|
|
|
_rdpid_u32(void) {
|
|
|
|
return __builtin_ia32_rdpid();
|
|
|
|
}
|
|
|
|
#endif // __RDPID__
|
|
|
|
|
2016-05-17 02:14:07 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDRND__)
|
2015-06-17 15:09:32 +08:00
|
|
|
static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
|
2012-07-12 17:33:03 +08:00
|
|
|
_rdrand16_step(unsigned short *__p)
|
|
|
|
{
|
|
|
|
return __builtin_ia32_rdrand16_step(__p);
|
|
|
|
}
|
|
|
|
|
2015-06-17 15:09:32 +08:00
|
|
|
static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
|
2012-07-12 17:33:03 +08:00
|
|
|
_rdrand32_step(unsigned int *__p)
|
|
|
|
{
|
|
|
|
return __builtin_ia32_rdrand32_step(__p);
|
|
|
|
}
|
|
|
|
|
2017-07-10 15:13:56 +08:00
|
|
|
#ifdef __x86_64__
|
|
|
|
static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
|
|
|
|
_rdrand64_step(unsigned long long *__p)
|
|
|
|
{
|
|
|
|
return __builtin_ia32_rdrand64_step(__p);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#endif /* __RDRND__ */
|
|
|
|
|
2016-05-17 02:14:07 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FSGSBASE__)
|
2014-11-03 14:51:41 +08:00
|
|
|
#ifdef __x86_64__
|
2015-06-17 15:09:32 +08:00
|
|
|
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
|
2014-11-03 14:51:41 +08:00
|
|
|
_readfsbase_u32(void)
|
|
|
|
{
|
|
|
|
return __builtin_ia32_rdfsbase32();
|
|
|
|
}
|
|
|
|
|
2015-06-17 15:09:32 +08:00
|
|
|
static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
|
2014-11-03 14:51:41 +08:00
|
|
|
_readfsbase_u64(void)
|
|
|
|
{
|
|
|
|
return __builtin_ia32_rdfsbase64();
|
|
|
|
}
|
|
|
|
|
2015-06-17 15:09:32 +08:00
|
|
|
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
|
2014-11-03 14:51:41 +08:00
|
|
|
_readgsbase_u32(void)
|
|
|
|
{
|
|
|
|
return __builtin_ia32_rdgsbase32();
|
|
|
|
}
|
|
|
|
|
2015-06-17 15:09:32 +08:00
|
|
|
static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
|
2014-11-03 14:51:41 +08:00
|
|
|
_readgsbase_u64(void)
|
|
|
|
{
|
|
|
|
return __builtin_ia32_rdgsbase64();
|
|
|
|
}
|
|
|
|
|
2015-06-17 15:09:32 +08:00
|
|
|
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
|
2014-11-03 14:51:41 +08:00
|
|
|
_writefsbase_u32(unsigned int __V)
|
|
|
|
{
|
2018-05-31 01:23:45 +08:00
|
|
|
__builtin_ia32_wrfsbase32(__V);
|
2014-11-03 14:51:41 +08:00
|
|
|
}
|
|
|
|
|
2015-06-17 15:09:32 +08:00
|
|
|
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
|
2014-11-03 14:51:41 +08:00
|
|
|
_writefsbase_u64(unsigned long long __V)
|
|
|
|
{
|
2018-05-31 01:23:45 +08:00
|
|
|
__builtin_ia32_wrfsbase64(__V);
|
2014-11-03 14:51:41 +08:00
|
|
|
}
|
|
|
|
|
2015-06-17 15:09:32 +08:00
|
|
|
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
|
2014-11-03 14:51:41 +08:00
|
|
|
_writegsbase_u32(unsigned int __V)
|
|
|
|
{
|
2018-05-31 01:23:45 +08:00
|
|
|
__builtin_ia32_wrgsbase32(__V);
|
2014-11-03 14:51:41 +08:00
|
|
|
}
|
|
|
|
|
2015-06-17 15:09:32 +08:00
|
|
|
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
|
2014-11-03 14:51:41 +08:00
|
|
|
_writegsbase_u64(unsigned long long __V)
|
|
|
|
{
|
2018-05-31 01:23:45 +08:00
|
|
|
__builtin_ia32_wrgsbase64(__V);
|
2014-11-03 14:51:41 +08:00
|
|
|
}
|
2016-06-01 20:21:00 +08:00
|
|
|
|
2014-11-03 14:51:41 +08:00
|
|
|
#endif
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif /* __FSGSBASE__ */
|
2014-11-03 14:51:41 +08:00
|
|
|
|
2018-09-29 01:09:51 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MOVBE__)
|
|
|
|
|
|
|
|
/* The structs used below are to force the load/store to be unaligned. This
|
|
|
|
* is accomplished with the __packed__ attribute. The __may_alias__ prevents
|
|
|
|
* tbaa metadata from being generated based on the struct and the type of the
|
|
|
|
* field inside of it.
|
|
|
|
*/
|
|
|
|
|
|
|
|
static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
|
|
|
_loadbe_i16(void const * __P) {
|
|
|
|
struct __loadu_i16 {
|
|
|
|
short __v;
|
|
|
|
} __attribute__((__packed__, __may_alias__));
|
|
|
|
return __builtin_bswap16(((struct __loadu_i16*)__P)->__v);
|
|
|
|
}
|
|
|
|
|
|
|
|
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
|
|
|
_storebe_i16(void * __P, short __D) {
|
|
|
|
struct __storeu_i16 {
|
|
|
|
short __v;
|
|
|
|
} __attribute__((__packed__, __may_alias__));
|
|
|
|
((struct __storeu_i16*)__P)->__v = __builtin_bswap16(__D);
|
|
|
|
}
|
|
|
|
|
|
|
|
static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
|
|
|
_loadbe_i32(void const * __P) {
|
|
|
|
struct __loadu_i32 {
|
|
|
|
int __v;
|
|
|
|
} __attribute__((__packed__, __may_alias__));
|
|
|
|
return __builtin_bswap32(((struct __loadu_i32*)__P)->__v);
|
|
|
|
}
|
|
|
|
|
|
|
|
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
|
|
|
_storebe_i32(void * __P, int __D) {
|
|
|
|
struct __storeu_i32 {
|
|
|
|
int __v;
|
|
|
|
} __attribute__((__packed__, __may_alias__));
|
|
|
|
((struct __storeu_i32*)__P)->__v = __builtin_bswap32(__D);
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef __x86_64__
|
|
|
|
static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
|
|
|
_loadbe_i64(void const * __P) {
|
|
|
|
struct __loadu_i64 {
|
|
|
|
long long __v;
|
|
|
|
} __attribute__((__packed__, __may_alias__));
|
|
|
|
return __builtin_bswap64(((struct __loadu_i64*)__P)->__v);
|
|
|
|
}
|
|
|
|
|
|
|
|
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
|
|
|
_storebe_i64(void * __P, long long __D) {
|
|
|
|
struct __storeu_i64 {
|
|
|
|
long long __v;
|
|
|
|
} __attribute__((__packed__, __may_alias__));
|
|
|
|
((struct __storeu_i64*)__P)->__v = __builtin_bswap64(__D);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#endif /* __MOVBE */
|
|
|
|
|
2016-05-17 02:14:07 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RTM__)
|
2012-11-10 13:17:46 +08:00
|
|
|
#include <rtmintrin.h>
|
2015-06-18 02:42:07 +08:00
|
|
|
#include <xtestintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2013-03-29 13:14:06 +08:00
|
|
|
|
2016-05-17 02:14:07 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHA__)
|
2013-09-19 22:00:22 +08:00
|
|
|
#include <shaintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2013-09-19 22:00:22 +08:00
|
|
|
|
2016-05-17 02:14:07 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FXSR__)
|
2015-06-30 17:45:38 +08:00
|
|
|
#include <fxsrintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2015-06-30 17:45:38 +08:00
|
|
|
|
2019-01-17 06:56:25 +08:00
|
|
|
/* No feature check desired due to internal MSC_VER checks */
|
2015-10-13 20:29:35 +08:00
|
|
|
#include <xsaveintrin.h>
|
|
|
|
|
2016-05-17 02:14:07 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEOPT__)
|
2015-10-13 20:29:35 +08:00
|
|
|
#include <xsaveoptintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2015-10-13 20:29:35 +08:00
|
|
|
|
2016-05-17 02:14:07 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEC__)
|
2015-10-13 20:29:35 +08:00
|
|
|
#include <xsavecintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2015-10-13 20:29:35 +08:00
|
|
|
|
2016-05-17 02:14:07 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVES__)
|
2015-10-13 20:29:35 +08:00
|
|
|
#include <xsavesintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2015-10-13 20:29:35 +08:00
|
|
|
|
2017-11-26 20:34:54 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHSTK__)
|
|
|
|
#include <cetintrin.h>
|
|
|
|
#endif
|
|
|
|
|
2015-06-18 02:42:03 +08:00
|
|
|
/* Some intrinsics inside adxintrin.h are available only on processors with ADX,
|
|
|
|
* whereas others are also available at all times. */
|
2014-09-19 18:17:06 +08:00
|
|
|
#include <adxintrin.h>
|
|
|
|
|
2018-05-24 02:32:58 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDSEED__)
|
|
|
|
#include <rdseedintrin.h>
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__WBNOINVD__)
|
|
|
|
#include <wbnoinvdintrin.h>
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLDEMOTE__)
|
|
|
|
#include <cldemoteintrin.h>
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__WAITPKG__)
|
|
|
|
#include <waitpkgintrin.h>
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || \
|
|
|
|
defined(__MOVDIRI__) || defined(__MOVDIR64B__)
|
|
|
|
#include <movdirintrin.h>
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PCONFIG__)
|
|
|
|
#include <pconfigintrin.h>
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SGX__)
|
|
|
|
#include <sgxintrin.h>
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PTWRITE__)
|
|
|
|
#include <ptwriteintrin.h>
|
|
|
|
#endif
|
|
|
|
|
2018-05-25 14:34:42 +08:00
|
|
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__INVPCID__)
|
|
|
|
#include <invpcidintrin.h>
|
|
|
|
#endif
|
|
|
|
|
2018-06-15 02:43:52 +08:00
|
|
|
#ifdef _MSC_VER
|
|
|
|
/* Define the default attributes for these intrinsics */
|
|
|
|
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
|
|
|
|
#ifdef __cplusplus
|
|
|
|
extern "C" {
|
|
|
|
#endif
|
|
|
|
/*----------------------------------------------------------------------------*\
|
|
|
|
|* Interlocked Exchange HLE
|
|
|
|
\*----------------------------------------------------------------------------*/
|
|
|
|
#if defined(__i386__) || defined(__x86_64__)
|
|
|
|
static __inline__ long __DEFAULT_FN_ATTRS
|
|
|
|
_InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) {
|
|
|
|
__asm__ __volatile__(".byte 0xf2 ; lock ; xchg %0, %1"
|
|
|
|
: "+r" (_Value), "+m" (*_Target) :: "memory");
|
|
|
|
return _Value;
|
|
|
|
}
|
|
|
|
static __inline__ long __DEFAULT_FN_ATTRS
|
|
|
|
_InterlockedExchange_HLERelease(long volatile *_Target, long _Value) {
|
|
|
|
__asm__ __volatile__(".byte 0xf3 ; lock ; xchg %0, %1"
|
|
|
|
: "+r" (_Value), "+m" (*_Target) :: "memory");
|
|
|
|
return _Value;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#if defined(__x86_64__)
|
|
|
|
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
|
|
|
_InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) {
|
|
|
|
__asm__ __volatile__(".byte 0xf2 ; lock ; xchg %0, %1"
|
|
|
|
: "+r" (_Value), "+m" (*_Target) :: "memory");
|
|
|
|
return _Value;
|
|
|
|
}
|
|
|
|
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
|
|
|
_InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) {
|
|
|
|
__asm__ __volatile__(".byte 0xf3 ; lock ; xchg %0, %1"
|
|
|
|
: "+r" (_Value), "+m" (*_Target) :: "memory");
|
|
|
|
return _Value;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
/*----------------------------------------------------------------------------*\
|
|
|
|
|* Interlocked Compare Exchange HLE
|
|
|
|
\*----------------------------------------------------------------------------*/
|
|
|
|
#if defined(__i386__) || defined(__x86_64__)
|
|
|
|
static __inline__ long __DEFAULT_FN_ATTRS
|
|
|
|
_InterlockedCompareExchange_HLEAcquire(long volatile *_Destination,
|
|
|
|
long _Exchange, long _Comparand) {
|
|
|
|
__asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg %2, %1"
|
|
|
|
: "+a" (_Comparand), "+m" (*_Destination)
|
|
|
|
: "r" (_Exchange) : "memory");
|
|
|
|
return _Comparand;
|
|
|
|
}
|
|
|
|
static __inline__ long __DEFAULT_FN_ATTRS
|
|
|
|
_InterlockedCompareExchange_HLERelease(long volatile *_Destination,
|
|
|
|
long _Exchange, long _Comparand) {
|
|
|
|
__asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg %2, %1"
|
|
|
|
: "+a" (_Comparand), "+m" (*_Destination)
|
|
|
|
: "r" (_Exchange) : "memory");
|
|
|
|
return _Comparand;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#if defined(__x86_64__)
|
|
|
|
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
|
|
|
_InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination,
|
|
|
|
__int64 _Exchange, __int64 _Comparand) {
|
|
|
|
__asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg %2, %1"
|
|
|
|
: "+a" (_Comparand), "+m" (*_Destination)
|
|
|
|
: "r" (_Exchange) : "memory");
|
|
|
|
return _Comparand;
|
|
|
|
}
|
|
|
|
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
|
|
|
_InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination,
|
|
|
|
__int64 _Exchange, __int64 _Comparand) {
|
|
|
|
__asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg %2, %1"
|
|
|
|
: "+a" (_Comparand), "+m" (*_Destination)
|
|
|
|
: "r" (_Exchange) : "memory");
|
|
|
|
return _Comparand;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef __cplusplus
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#undef __DEFAULT_FN_ATTRS
|
|
|
|
|
|
|
|
#endif /* _MSC_VER */
|
|
|
|
|
2010-08-21 02:04:07 +08:00
|
|
|
#endif /* __IMMINTRIN_H */
|