2010-08-21 02:04:07 +08:00
|
|
|
/*===---- immintrin.h - Intel intrinsics -----------------------------------===
|
|
|
|
*
|
2019-04-09 04:51:30 +08:00
|
|
|
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
* See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2010-08-21 02:04:07 +08:00
|
|
|
*
|
|
|
|
*===-----------------------------------------------------------------------===
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef __IMMINTRIN_H
|
|
|
|
#define __IMMINTRIN_H
|
|
|
|
|
2021-09-13 21:27:24 +08:00
|
|
|
#if !defined(__i386__) && !defined(__x86_64__)
|
|
|
|
#error "This header is only meant to be used on x86 and x64 architecture"
|
|
|
|
#endif
|
|
|
|
|
2020-10-13 08:42:46 +08:00
|
|
|
#include <x86gprintrin.h>
|
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__MMX__)
|
2010-08-21 02:04:07 +08:00
|
|
|
#include <mmintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2010-08-21 02:04:07 +08:00
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__SSE__)
|
2010-08-21 02:04:07 +08:00
|
|
|
#include <xmmintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2010-08-21 02:04:07 +08:00
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__SSE2__)
|
2010-08-21 02:04:07 +08:00
|
|
|
#include <emmintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2010-08-21 02:04:07 +08:00
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__SSE3__)
|
2010-08-21 02:04:07 +08:00
|
|
|
#include <pmmintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2010-08-21 02:04:07 +08:00
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__SSSE3__)
|
2010-08-21 02:04:07 +08:00
|
|
|
#include <tmmintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2010-08-21 02:04:07 +08:00
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
2016-05-17 02:14:07 +08:00
|
|
|
(defined(__SSE4_2__) || defined(__SSE4_1__))
|
2010-08-21 02:04:07 +08:00
|
|
|
#include <smmintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2010-08-21 02:04:07 +08:00
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
2016-05-17 02:14:07 +08:00
|
|
|
(defined(__AES__) || defined(__PCLMUL__))
|
2010-08-21 02:04:07 +08:00
|
|
|
#include <wmmintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2010-08-21 02:04:07 +08:00
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__CLFLUSHOPT__)
|
2016-07-05 23:56:03 +08:00
|
|
|
#include <clflushoptintrin.h>
|
|
|
|
#endif
|
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__CLWB__)
|
2017-10-13 02:57:15 +08:00
|
|
|
#include <clwbintrin.h>
|
|
|
|
#endif
|
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__AVX__)
|
2010-08-21 02:04:07 +08:00
|
|
|
#include <avxintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2010-08-21 02:04:07 +08:00
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__AVX2__)
|
2011-12-19 13:04:33 +08:00
|
|
|
#include <avx2intrin.h>
|
2018-05-23 04:33:04 +08:00
|
|
|
#endif
|
2011-12-19 13:04:33 +08:00
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__F16C__)
|
2018-05-23 02:54:19 +08:00
|
|
|
#include <f16cintrin.h>
|
2018-05-23 04:33:04 +08:00
|
|
|
#endif
|
2015-12-03 02:41:52 +08:00
|
|
|
|
2019-10-11 14:07:53 +08:00
|
|
|
/* No feature check desired due to internal checks */
|
2011-12-25 14:25:37 +08:00
|
|
|
#include <bmiintrin.h>
|
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__BMI2__)
|
2011-12-26 10:31:10 +08:00
|
|
|
#include <bmi2intrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2011-12-26 10:31:10 +08:00
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__LZCNT__)
|
2011-12-25 14:25:37 +08:00
|
|
|
#include <lzcntintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2011-12-25 14:25:37 +08:00
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__POPCNT__)
|
2018-05-24 02:32:58 +08:00
|
|
|
#include <popcntintrin.h>
|
|
|
|
#endif
|
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__FMA__)
|
2012-06-04 11:42:47 +08:00
|
|
|
#include <fmaintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2012-06-04 11:42:47 +08:00
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__AVX512F__)
|
2014-07-22 19:31:39 +08:00
|
|
|
#include <avx512fintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2014-07-22 19:31:39 +08:00
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__AVX512VL__)
|
2014-10-09 01:18:13 +08:00
|
|
|
#include <avx512vlintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2014-10-09 01:18:13 +08:00
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__AVX512BW__)
|
2014-10-09 01:18:13 +08:00
|
|
|
#include <avx512bwintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2014-10-09 01:18:13 +08:00
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__AVX512BITALG__)
|
2017-12-27 18:01:00 +08:00
|
|
|
#include <avx512bitalgintrin.h>
|
|
|
|
#endif
|
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__AVX512CD__)
|
2015-06-29 20:51:53 +08:00
|
|
|
#include <avx512cdintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2015-06-29 20:51:53 +08:00
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__AVX512VPOPCNTDQ__)
|
2017-05-25 21:44:11 +08:00
|
|
|
#include <avx512vpopcntdqintrin.h>
|
|
|
|
#endif
|
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
2017-12-16 14:02:31 +08:00
|
|
|
(defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__))
|
|
|
|
#include <avx512vpopcntdqvlintrin.h>
|
|
|
|
#endif
|
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__AVX512VNNI__)
|
2017-12-27 18:37:51 +08:00
|
|
|
#include <avx512vnniintrin.h>
|
|
|
|
#endif
|
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
2017-12-27 18:37:51 +08:00
|
|
|
(defined(__AVX512VL__) && defined(__AVX512VNNI__))
|
|
|
|
#include <avx512vlvnniintrin.h>
|
|
|
|
#endif
|
|
|
|
|
2020-10-30 12:58:05 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__AVXVNNI__)
|
|
|
|
#include <avxvnniintrin.h>
|
|
|
|
#endif
|
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__AVX512DQ__)
|
2015-04-30 17:24:29 +08:00
|
|
|
#include <avx512dqintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2015-04-30 17:24:29 +08:00
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
2017-12-27 18:01:00 +08:00
|
|
|
(defined(__AVX512VL__) && defined(__AVX512BITALG__))
|
|
|
|
#include <avx512vlbitalgintrin.h>
|
|
|
|
#endif
|
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
2016-05-17 02:14:07 +08:00
|
|
|
(defined(__AVX512VL__) && defined(__AVX512BW__))
|
2014-10-09 01:18:13 +08:00
|
|
|
#include <avx512vlbwintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2014-10-09 01:18:13 +08:00
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
2016-05-17 02:14:07 +08:00
|
|
|
(defined(__AVX512VL__) && defined(__AVX512CD__))
|
2016-04-27 19:43:14 +08:00
|
|
|
#include <avx512vlcdintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2016-04-27 19:43:14 +08:00
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
2016-05-17 02:14:07 +08:00
|
|
|
(defined(__AVX512VL__) && defined(__AVX512DQ__))
|
2015-04-30 17:24:29 +08:00
|
|
|
#include <avx512vldqintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2015-04-30 17:24:29 +08:00
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__AVX512ER__)
|
2014-07-22 19:31:39 +08:00
|
|
|
#include <avx512erintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2014-07-22 19:31:39 +08:00
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__AVX512IFMA__)
|
2016-03-07 17:55:55 +08:00
|
|
|
#include <avx512ifmaintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2016-03-07 17:55:55 +08:00
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
2016-05-17 02:14:07 +08:00
|
|
|
(defined(__AVX512IFMA__) && defined(__AVX512VL__))
|
2016-03-07 17:55:55 +08:00
|
|
|
#include <avx512ifmavlintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2016-03-07 17:55:55 +08:00
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__AVX512VBMI__)
|
2016-03-08 01:04:11 +08:00
|
|
|
#include <avx512vbmiintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2016-03-08 01:04:11 +08:00
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
2016-05-17 02:14:07 +08:00
|
|
|
(defined(__AVX512VBMI__) && defined(__AVX512VL__))
|
2016-03-08 01:04:11 +08:00
|
|
|
#include <avx512vbmivlintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2016-03-08 01:04:11 +08:00
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__AVX512VBMI2__)
|
[x86][icelake][vbmi2]
added vbmi2 feature recognition
added intrinsics support for vbmi2 instructions
_mm[128,256,512]_mask[z]_compress_epi[16,32]
_mm[128,256,512]_mask_compressstoreu_epi[16,32]
_mm[128,256,512]_mask[z]_expand_epi[16,32]
_mm[128,256,512]_mask[z]_expandloadu_epi[16,32]
_mm[128,256,512]_mask[z]_sh[l,r]di_epi[16,32,64]
_mm[128,256,512]_mask_sh[l,r]dv_epi[16,32,64]
matching a similar work on the backend (D40206)
Differential Revision: https://reviews.llvm.org/D41557
llvm-svn: 321487
2017-12-27 19:25:07 +08:00
|
|
|
#include <avx512vbmi2intrin.h>
|
|
|
|
#endif
|
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
[x86][icelake][vbmi2]
added vbmi2 feature recognition
added intrinsics support for vbmi2 instructions
_mm[128,256,512]_mask[z]_compress_epi[16,32]
_mm[128,256,512]_mask_compressstoreu_epi[16,32]
_mm[128,256,512]_mask[z]_expand_epi[16,32]
_mm[128,256,512]_mask[z]_expandloadu_epi[16,32]
_mm[128,256,512]_mask[z]_sh[l,r]di_epi[16,32,64]
_mm[128,256,512]_mask_sh[l,r]dv_epi[16,32,64]
matching a similar work on the backend (D40206)
Differential Revision: https://reviews.llvm.org/D41557
llvm-svn: 321487
2017-12-27 19:25:07 +08:00
|
|
|
(defined(__AVX512VBMI2__) && defined(__AVX512VL__))
|
|
|
|
#include <avx512vlvbmi2intrin.h>
|
|
|
|
#endif
|
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__AVX512PF__)
|
2016-04-21 20:47:27 +08:00
|
|
|
#include <avx512pfintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2016-04-21 20:47:27 +08:00
|
|
|
|
2021-08-10 11:18:40 +08:00
|
|
|
/*
|
|
|
|
* FIXME: _Float16 type is legal only when HW support float16 operation.
|
|
|
|
* We use __AVX512FP16__ to identify if float16 is supported or not, so
|
|
|
|
* when float16 is not supported, the related header is not included.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
#if defined(__AVX512FP16__)
|
|
|
|
#include <avx512fp16intrin.h>
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if defined(__AVX512FP16__) && defined(__AVX512VL__)
|
|
|
|
#include <avx512vlfp16intrin.h>
|
|
|
|
#endif
|
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__AVX512BF16__)
|
Enable intrinsics of AVX512_BF16, which are supported for BFLOAT16 in Cooper Lake
Summary:
1. Enable infrastructure of AVX512_BF16, which is supported for BFLOAT16 in Cooper Lake;
2. Enable intrinsics for VCVTNE2PS2BF16, VCVTNEPS2BF16 and DPBF16PS instructions, which are Vector Neural Network Instructions supporting BFLOAT16 inputs and conversion instructions from IEEE single precision.
For more details about BF16 intrinsic, please refer to the latest ISE document: https://software.intel.com/en-us/download/intel-architecture-instruction-set-extensions-programming-reference
Patch by LiuTianle
Reviewers: craig.topper, smaslov, LuoYuanke, wxiao3, annita.zhang, spatel, RKSimon
Reviewed By: craig.topper
Subscribers: mgorny, cfe-commits
Tags: #clang
Differential Revision: https://reviews.llvm.org/D60552
llvm-svn: 360018
2019-05-06 16:25:11 +08:00
|
|
|
#include <avx512bf16intrin.h>
|
|
|
|
#endif
|
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
Enable intrinsics of AVX512_BF16, which are supported for BFLOAT16 in Cooper Lake
Summary:
1. Enable infrastructure of AVX512_BF16, which is supported for BFLOAT16 in Cooper Lake;
2. Enable intrinsics for VCVTNE2PS2BF16, VCVTNEPS2BF16 and DPBF16PS instructions, which are Vector Neural Network Instructions supporting BFLOAT16 inputs and conversion instructions from IEEE single precision.
For more details about BF16 intrinsic, please refer to the latest ISE document: https://software.intel.com/en-us/download/intel-architecture-instruction-set-extensions-programming-reference
Patch by LiuTianle
Reviewers: craig.topper, smaslov, LuoYuanke, wxiao3, annita.zhang, spatel, RKSimon
Reviewed By: craig.topper
Subscribers: mgorny, cfe-commits
Tags: #clang
Differential Revision: https://reviews.llvm.org/D60552
llvm-svn: 360018
2019-05-06 16:25:11 +08:00
|
|
|
(defined(__AVX512VL__) && defined(__AVX512BF16__))
|
|
|
|
#include <avx512vlbf16intrin.h>
|
|
|
|
#endif
|
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__PKU__)
|
2015-12-31 22:14:07 +08:00
|
|
|
#include <pkuintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2015-12-31 22:14:07 +08:00
|
|
|
|
2021-03-29 02:25:21 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__VPCLMULQDQ__)
|
|
|
|
#include <vpclmulqdqintrin.h>
|
|
|
|
#endif
|
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__VAES__)
|
2017-12-27 16:16:54 +08:00
|
|
|
#include <vaesintrin.h>
|
|
|
|
#endif
|
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__GFNI__)
|
2017-12-27 16:37:47 +08:00
|
|
|
#include <gfniintrin.h>
|
|
|
|
#endif
|
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__RDPID__)
|
2018-05-09 09:00:01 +08:00
|
|
|
/// Returns the value of the IA32_TSC_AUX MSR (0xc0000103).
|
2018-01-21 02:36:52 +08:00
|
|
|
///
|
|
|
|
/// \headerfile <immintrin.h>
|
|
|
|
///
|
|
|
|
/// This intrinsic corresponds to the <c> RDPID </c> instruction.
|
|
|
|
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid")))
|
|
|
|
_rdpid_u32(void) {
|
|
|
|
return __builtin_ia32_rdpid();
|
|
|
|
}
|
|
|
|
#endif // __RDPID__
|
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__RDRND__)
|
2015-06-17 15:09:32 +08:00
|
|
|
static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
|
2012-07-12 17:33:03 +08:00
|
|
|
_rdrand16_step(unsigned short *__p)
|
|
|
|
{
|
|
|
|
return __builtin_ia32_rdrand16_step(__p);
|
|
|
|
}
|
|
|
|
|
2015-06-17 15:09:32 +08:00
|
|
|
static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
|
2012-07-12 17:33:03 +08:00
|
|
|
_rdrand32_step(unsigned int *__p)
|
|
|
|
{
|
|
|
|
return __builtin_ia32_rdrand32_step(__p);
|
|
|
|
}
|
|
|
|
|
2017-07-10 15:13:56 +08:00
|
|
|
#ifdef __x86_64__
|
|
|
|
static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
|
|
|
|
_rdrand64_step(unsigned long long *__p)
|
|
|
|
{
|
|
|
|
return __builtin_ia32_rdrand64_step(__p);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#endif /* __RDRND__ */
|
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__FSGSBASE__)
|
2014-11-03 14:51:41 +08:00
|
|
|
#ifdef __x86_64__
|
2015-06-17 15:09:32 +08:00
|
|
|
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
|
2014-11-03 14:51:41 +08:00
|
|
|
_readfsbase_u32(void)
|
|
|
|
{
|
|
|
|
return __builtin_ia32_rdfsbase32();
|
|
|
|
}
|
|
|
|
|
2015-06-17 15:09:32 +08:00
|
|
|
static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
|
2014-11-03 14:51:41 +08:00
|
|
|
_readfsbase_u64(void)
|
|
|
|
{
|
|
|
|
return __builtin_ia32_rdfsbase64();
|
|
|
|
}
|
|
|
|
|
2015-06-17 15:09:32 +08:00
|
|
|
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
|
2014-11-03 14:51:41 +08:00
|
|
|
_readgsbase_u32(void)
|
|
|
|
{
|
|
|
|
return __builtin_ia32_rdgsbase32();
|
|
|
|
}
|
|
|
|
|
2015-06-17 15:09:32 +08:00
|
|
|
static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
|
2014-11-03 14:51:41 +08:00
|
|
|
_readgsbase_u64(void)
|
|
|
|
{
|
|
|
|
return __builtin_ia32_rdgsbase64();
|
|
|
|
}
|
|
|
|
|
2015-06-17 15:09:32 +08:00
|
|
|
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
|
2014-11-03 14:51:41 +08:00
|
|
|
_writefsbase_u32(unsigned int __V)
|
|
|
|
{
|
2018-05-31 01:23:45 +08:00
|
|
|
__builtin_ia32_wrfsbase32(__V);
|
2014-11-03 14:51:41 +08:00
|
|
|
}
|
|
|
|
|
2015-06-17 15:09:32 +08:00
|
|
|
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
|
2014-11-03 14:51:41 +08:00
|
|
|
_writefsbase_u64(unsigned long long __V)
|
|
|
|
{
|
2018-05-31 01:23:45 +08:00
|
|
|
__builtin_ia32_wrfsbase64(__V);
|
2014-11-03 14:51:41 +08:00
|
|
|
}
|
|
|
|
|
2015-06-17 15:09:32 +08:00
|
|
|
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
|
2014-11-03 14:51:41 +08:00
|
|
|
_writegsbase_u32(unsigned int __V)
|
|
|
|
{
|
2018-05-31 01:23:45 +08:00
|
|
|
__builtin_ia32_wrgsbase32(__V);
|
2014-11-03 14:51:41 +08:00
|
|
|
}
|
|
|
|
|
2015-06-17 15:09:32 +08:00
|
|
|
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
|
2014-11-03 14:51:41 +08:00
|
|
|
_writegsbase_u64(unsigned long long __V)
|
|
|
|
{
|
2018-05-31 01:23:45 +08:00
|
|
|
__builtin_ia32_wrgsbase64(__V);
|
2014-11-03 14:51:41 +08:00
|
|
|
}
|
2016-06-01 20:21:00 +08:00
|
|
|
|
2014-11-03 14:51:41 +08:00
|
|
|
#endif
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif /* __FSGSBASE__ */
|
2014-11-03 14:51:41 +08:00
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__MOVBE__)
|
2018-09-29 01:09:51 +08:00
|
|
|
|
|
|
|
/* The structs used below are to force the load/store to be unaligned. This
|
|
|
|
* is accomplished with the __packed__ attribute. The __may_alias__ prevents
|
|
|
|
* tbaa metadata from being generated based on the struct and the type of the
|
|
|
|
* field inside of it.
|
|
|
|
*/
|
|
|
|
|
|
|
|
static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
|
|
|
_loadbe_i16(void const * __P) {
|
|
|
|
struct __loadu_i16 {
|
|
|
|
short __v;
|
|
|
|
} __attribute__((__packed__, __may_alias__));
|
2019-12-20 03:40:50 +08:00
|
|
|
return __builtin_bswap16(((const struct __loadu_i16*)__P)->__v);
|
2018-09-29 01:09:51 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
|
|
|
_storebe_i16(void * __P, short __D) {
|
|
|
|
struct __storeu_i16 {
|
|
|
|
short __v;
|
|
|
|
} __attribute__((__packed__, __may_alias__));
|
|
|
|
((struct __storeu_i16*)__P)->__v = __builtin_bswap16(__D);
|
|
|
|
}
|
|
|
|
|
|
|
|
static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
|
|
|
_loadbe_i32(void const * __P) {
|
|
|
|
struct __loadu_i32 {
|
|
|
|
int __v;
|
|
|
|
} __attribute__((__packed__, __may_alias__));
|
2019-12-20 03:40:50 +08:00
|
|
|
return __builtin_bswap32(((const struct __loadu_i32*)__P)->__v);
|
2018-09-29 01:09:51 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
|
|
|
_storebe_i32(void * __P, int __D) {
|
|
|
|
struct __storeu_i32 {
|
|
|
|
int __v;
|
|
|
|
} __attribute__((__packed__, __may_alias__));
|
|
|
|
((struct __storeu_i32*)__P)->__v = __builtin_bswap32(__D);
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef __x86_64__
|
|
|
|
static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
|
|
|
_loadbe_i64(void const * __P) {
|
|
|
|
struct __loadu_i64 {
|
|
|
|
long long __v;
|
|
|
|
} __attribute__((__packed__, __may_alias__));
|
2019-12-20 03:40:50 +08:00
|
|
|
return __builtin_bswap64(((const struct __loadu_i64*)__P)->__v);
|
2018-09-29 01:09:51 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
|
|
|
_storebe_i64(void * __P, long long __D) {
|
|
|
|
struct __storeu_i64 {
|
|
|
|
long long __v;
|
|
|
|
} __attribute__((__packed__, __may_alias__));
|
|
|
|
((struct __storeu_i64*)__P)->__v = __builtin_bswap64(__D);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#endif /* __MOVBE */
|
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__RTM__)
|
2012-11-10 13:17:46 +08:00
|
|
|
#include <rtmintrin.h>
|
2015-06-18 02:42:07 +08:00
|
|
|
#include <xtestintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2013-03-29 13:14:06 +08:00
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__SHA__)
|
2013-09-19 22:00:22 +08:00
|
|
|
#include <shaintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2013-09-19 22:00:22 +08:00
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__FXSR__)
|
2015-06-30 17:45:38 +08:00
|
|
|
#include <fxsrintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2015-06-30 17:45:38 +08:00
|
|
|
|
2019-01-17 06:56:25 +08:00
|
|
|
/* No feature check desired due to internal MSC_VER checks */
|
2015-10-13 20:29:35 +08:00
|
|
|
#include <xsaveintrin.h>
|
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__XSAVEOPT__)
|
2015-10-13 20:29:35 +08:00
|
|
|
#include <xsaveoptintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2015-10-13 20:29:35 +08:00
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__XSAVEC__)
|
2015-10-13 20:29:35 +08:00
|
|
|
#include <xsavecintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2015-10-13 20:29:35 +08:00
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__XSAVES__)
|
2015-10-13 20:29:35 +08:00
|
|
|
#include <xsavesintrin.h>
|
2016-05-17 02:14:07 +08:00
|
|
|
#endif
|
2015-10-13 20:29:35 +08:00
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__SHSTK__)
|
2017-11-26 20:34:54 +08:00
|
|
|
#include <cetintrin.h>
|
|
|
|
#endif
|
|
|
|
|
2015-06-18 02:42:03 +08:00
|
|
|
/* Some intrinsics inside adxintrin.h are available only on processors with ADX,
|
|
|
|
* whereas others are also available at all times. */
|
2014-09-19 18:17:06 +08:00
|
|
|
#include <adxintrin.h>
|
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__RDSEED__)
|
2018-05-24 02:32:58 +08:00
|
|
|
#include <rdseedintrin.h>
|
|
|
|
#endif
|
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__WBNOINVD__)
|
2018-05-24 02:32:58 +08:00
|
|
|
#include <wbnoinvdintrin.h>
|
|
|
|
#endif
|
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__CLDEMOTE__)
|
2018-05-24 02:32:58 +08:00
|
|
|
#include <cldemoteintrin.h>
|
|
|
|
#endif
|
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__WAITPKG__)
|
2018-05-24 02:32:58 +08:00
|
|
|
#include <waitpkgintrin.h>
|
|
|
|
#endif
|
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__MOVDIRI__) || defined(__MOVDIR64B__)
|
2018-05-24 02:32:58 +08:00
|
|
|
#include <movdirintrin.h>
|
|
|
|
#endif
|
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__PCONFIG__)
|
2018-05-24 02:32:58 +08:00
|
|
|
#include <pconfigintrin.h>
|
|
|
|
#endif
|
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__SGX__)
|
2018-05-24 02:32:58 +08:00
|
|
|
#include <sgxintrin.h>
|
|
|
|
#endif
|
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__PTWRITE__)
|
2018-05-24 02:32:58 +08:00
|
|
|
#include <ptwriteintrin.h>
|
|
|
|
#endif
|
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__INVPCID__)
|
2018-05-25 14:34:42 +08:00
|
|
|
#include <invpcidintrin.h>
|
|
|
|
#endif
|
|
|
|
|
2020-09-30 18:01:15 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
2020-10-05 03:09:21 +08:00
|
|
|
defined(__KL__) || defined(__WIDEKL__)
|
2020-09-30 18:01:15 +08:00
|
|
|
#include <keylockerintrin.h>
|
|
|
|
#endif
|
|
|
|
|
2020-07-07 09:50:17 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__AMXTILE__) || defined(__AMXINT8__) || defined(__AMXBF16__)
|
|
|
|
#include <amxintrin.h>
|
|
|
|
#endif
|
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__AVX512VP2INTERSECT__)
|
2019-05-31 14:09:35 +08:00
|
|
|
#include <avx512vp2intersectintrin.h>
|
|
|
|
#endif
|
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
(defined(__AVX512VL__) && defined(__AVX512VP2INTERSECT__))
|
2019-05-31 14:09:35 +08:00
|
|
|
#include <avx512vlvp2intersectintrin.h>
|
|
|
|
#endif
|
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__ENQCMD__)
|
2019-06-06 16:28:42 +08:00
|
|
|
#include <enqcmdintrin.h>
|
|
|
|
#endif
|
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__SERIALIZE__)
|
2020-04-02 16:15:34 +08:00
|
|
|
#include <serializeintrin.h>
|
|
|
|
#endif
|
|
|
|
|
2020-05-01 07:17:34 +08:00
|
|
|
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
|
|
|
defined(__TSXLDTRK__)
|
2020-04-09 13:15:42 +08:00
|
|
|
#include <tsxldtrkintrin.h>
|
|
|
|
#endif
|
|
|
|
|
[X86] Make `x86intrin.h`, `immintrin.h` includable with `-fno-gnu-inline-asm`.
Currently `immintrin.h` includes `pconfigintrin.h` and `sgxintrin.h`
which contain inline assembly. It causes failures when building with the
flag `-fno-gnu-inline-asm`.
Fix by excluding functions with inline assembly when this extension is
disabled. So far there was no need to support `_pconfig_u32`,
`_enclu_u32`, `_encls_u32`, `_enclv_u32` on platforms that require
`-fno-gnu-inline-asm`. But if developers start using these functions,
they'll have compile-time undeclared identifier errors which is
preferrable to runtime errors.
rdar://problem/49540880
Reviewers: craig.topper, GBuella, rnk, echristo
Reviewed By: rnk
Subscribers: jkorous, dexonsmith, cfe-commits
Differential Revision: https://reviews.llvm.org/D61621
llvm-svn: 360630
2019-05-14 06:40:11 +08:00
|
|
|
#if defined(_MSC_VER) && __has_extension(gnu_asm)
|
2018-06-15 02:43:52 +08:00
|
|
|
/* Define the default attributes for these intrinsics */
|
|
|
|
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
|
|
|
|
#ifdef __cplusplus
|
|
|
|
extern "C" {
|
|
|
|
#endif
|
|
|
|
/*----------------------------------------------------------------------------*\
|
|
|
|
|* Interlocked Exchange HLE
|
|
|
|
\*----------------------------------------------------------------------------*/
|
|
|
|
#if defined(__i386__) || defined(__x86_64__)
|
|
|
|
static __inline__ long __DEFAULT_FN_ATTRS
|
|
|
|
_InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) {
|
[clang] Make -masm=intel affect inline asm style
With this,
void f() { __asm__("mov eax, ebx"); }
now compiles with clang with -masm=intel.
This matches gcc.
The flag is not accepted in clang-cl mode. It has no effect on
MSVC-style `__asm {}` blocks, which are unconditionally in intel
mode both before and after this change.
One difference to gcc is that in clang, inline asm strings are
"local" while they're "global" in gcc. Building the following with
-masm=intel works with clang, but not with gcc where the ".att_syntax"
from the 2nd __asm__() is in effect until file end (or until a
".intel_syntax" somewhere later in the file):
__asm__("mov eax, ebx");
__asm__(".att_syntax\nmovl %ebx, %eax");
__asm__("mov eax, ebx");
This also updates clang's intrinsic headers to work both in
-masm=att (the default) and -masm=intel modes.
The official solution for this according to "Multiple assembler dialects in asm
templates" in gcc docs->Extensions->Inline Assembly->Extended Asm
is to write every inline asm snippet twice:
bt{l %[Offset],%[Base] | %[Base],%[Offset]}
This works in LLVM after D113932 and D113894, so use that.
(Just putting `.att_syntax` at the start of the snippet works in some but not
all cases: When LLVM interpolates in parameters like `%0`, it uses at&t or
intel syntax according to the inline asm snippet's flavor, so the `.att_syntax`
within the snippet happens to late: The interpolated-in parameter is already
in intel style, and then won't parse in the switched `.att_syntax`.)
It might be nice to invent a `#pragma clang asm_dialect push "att"` /
`#pragma clang asm_dialect pop` to be able to force asm style per snippet,
so that the inline asm string doesn't contain the same code in two variants,
but let's leave that for a follow-up.
Fixes PR21401 and PR20241.
Differential Revision: https://reviews.llvm.org/D113707
2021-11-12 03:20:02 +08:00
|
|
|
__asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}"
|
2018-06-15 02:43:52 +08:00
|
|
|
: "+r" (_Value), "+m" (*_Target) :: "memory");
|
|
|
|
return _Value;
|
|
|
|
}
|
|
|
|
static __inline__ long __DEFAULT_FN_ATTRS
|
|
|
|
_InterlockedExchange_HLERelease(long volatile *_Target, long _Value) {
|
[clang] Make -masm=intel affect inline asm style
With this,
void f() { __asm__("mov eax, ebx"); }
now compiles with clang with -masm=intel.
This matches gcc.
The flag is not accepted in clang-cl mode. It has no effect on
MSVC-style `__asm {}` blocks, which are unconditionally in intel
mode both before and after this change.
One difference to gcc is that in clang, inline asm strings are
"local" while they're "global" in gcc. Building the following with
-masm=intel works with clang, but not with gcc where the ".att_syntax"
from the 2nd __asm__() is in effect until file end (or until a
".intel_syntax" somewhere later in the file):
__asm__("mov eax, ebx");
__asm__(".att_syntax\nmovl %ebx, %eax");
__asm__("mov eax, ebx");
This also updates clang's intrinsic headers to work both in
-masm=att (the default) and -masm=intel modes.
The official solution for this according to "Multiple assembler dialects in asm
templates" in gcc docs->Extensions->Inline Assembly->Extended Asm
is to write every inline asm snippet twice:
bt{l %[Offset],%[Base] | %[Base],%[Offset]}
This works in LLVM after D113932 and D113894, so use that.
(Just putting `.att_syntax` at the start of the snippet works in some but not
all cases: When LLVM interpolates in parameters like `%0`, it uses at&t or
intel syntax according to the inline asm snippet's flavor, so the `.att_syntax`
within the snippet happens to late: The interpolated-in parameter is already
in intel style, and then won't parse in the switched `.att_syntax`.)
It might be nice to invent a `#pragma clang asm_dialect push "att"` /
`#pragma clang asm_dialect pop` to be able to force asm style per snippet,
so that the inline asm string doesn't contain the same code in two variants,
but let's leave that for a follow-up.
Fixes PR21401 and PR20241.
Differential Revision: https://reviews.llvm.org/D113707
2021-11-12 03:20:02 +08:00
|
|
|
__asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}"
|
2018-06-15 02:43:52 +08:00
|
|
|
: "+r" (_Value), "+m" (*_Target) :: "memory");
|
|
|
|
return _Value;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#if defined(__x86_64__)
|
|
|
|
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
|
|
|
_InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) {
|
[clang] Make -masm=intel affect inline asm style
With this,
void f() { __asm__("mov eax, ebx"); }
now compiles with clang with -masm=intel.
This matches gcc.
The flag is not accepted in clang-cl mode. It has no effect on
MSVC-style `__asm {}` blocks, which are unconditionally in intel
mode both before and after this change.
One difference to gcc is that in clang, inline asm strings are
"local" while they're "global" in gcc. Building the following with
-masm=intel works with clang, but not with gcc where the ".att_syntax"
from the 2nd __asm__() is in effect until file end (or until a
".intel_syntax" somewhere later in the file):
__asm__("mov eax, ebx");
__asm__(".att_syntax\nmovl %ebx, %eax");
__asm__("mov eax, ebx");
This also updates clang's intrinsic headers to work both in
-masm=att (the default) and -masm=intel modes.
The official solution for this according to "Multiple assembler dialects in asm
templates" in gcc docs->Extensions->Inline Assembly->Extended Asm
is to write every inline asm snippet twice:
bt{l %[Offset],%[Base] | %[Base],%[Offset]}
This works in LLVM after D113932 and D113894, so use that.
(Just putting `.att_syntax` at the start of the snippet works in some but not
all cases: When LLVM interpolates in parameters like `%0`, it uses at&t or
intel syntax according to the inline asm snippet's flavor, so the `.att_syntax`
within the snippet happens to late: The interpolated-in parameter is already
in intel style, and then won't parse in the switched `.att_syntax`.)
It might be nice to invent a `#pragma clang asm_dialect push "att"` /
`#pragma clang asm_dialect pop` to be able to force asm style per snippet,
so that the inline asm string doesn't contain the same code in two variants,
but let's leave that for a follow-up.
Fixes PR21401 and PR20241.
Differential Revision: https://reviews.llvm.org/D113707
2021-11-12 03:20:02 +08:00
|
|
|
__asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}"
|
2018-06-15 02:43:52 +08:00
|
|
|
: "+r" (_Value), "+m" (*_Target) :: "memory");
|
|
|
|
return _Value;
|
|
|
|
}
|
|
|
|
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
|
|
|
_InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) {
|
[clang] Make -masm=intel affect inline asm style
With this,
void f() { __asm__("mov eax, ebx"); }
now compiles with clang with -masm=intel.
This matches gcc.
The flag is not accepted in clang-cl mode. It has no effect on
MSVC-style `__asm {}` blocks, which are unconditionally in intel
mode both before and after this change.
One difference to gcc is that in clang, inline asm strings are
"local" while they're "global" in gcc. Building the following with
-masm=intel works with clang, but not with gcc where the ".att_syntax"
from the 2nd __asm__() is in effect until file end (or until a
".intel_syntax" somewhere later in the file):
__asm__("mov eax, ebx");
__asm__(".att_syntax\nmovl %ebx, %eax");
__asm__("mov eax, ebx");
This also updates clang's intrinsic headers to work both in
-masm=att (the default) and -masm=intel modes.
The official solution for this according to "Multiple assembler dialects in asm
templates" in gcc docs->Extensions->Inline Assembly->Extended Asm
is to write every inline asm snippet twice:
bt{l %[Offset],%[Base] | %[Base],%[Offset]}
This works in LLVM after D113932 and D113894, so use that.
(Just putting `.att_syntax` at the start of the snippet works in some but not
all cases: When LLVM interpolates in parameters like `%0`, it uses at&t or
intel syntax according to the inline asm snippet's flavor, so the `.att_syntax`
within the snippet happens to late: The interpolated-in parameter is already
in intel style, and then won't parse in the switched `.att_syntax`.)
It might be nice to invent a `#pragma clang asm_dialect push "att"` /
`#pragma clang asm_dialect pop` to be able to force asm style per snippet,
so that the inline asm string doesn't contain the same code in two variants,
but let's leave that for a follow-up.
Fixes PR21401 and PR20241.
Differential Revision: https://reviews.llvm.org/D113707
2021-11-12 03:20:02 +08:00
|
|
|
__asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}"
|
2018-06-15 02:43:52 +08:00
|
|
|
: "+r" (_Value), "+m" (*_Target) :: "memory");
|
|
|
|
return _Value;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
/*----------------------------------------------------------------------------*\
|
|
|
|
|* Interlocked Compare Exchange HLE
|
|
|
|
\*----------------------------------------------------------------------------*/
|
|
|
|
#if defined(__i386__) || defined(__x86_64__)
|
|
|
|
static __inline__ long __DEFAULT_FN_ATTRS
|
|
|
|
_InterlockedCompareExchange_HLEAcquire(long volatile *_Destination,
|
|
|
|
long _Exchange, long _Comparand) {
|
[clang] Make -masm=intel affect inline asm style
With this,
void f() { __asm__("mov eax, ebx"); }
now compiles with clang with -masm=intel.
This matches gcc.
The flag is not accepted in clang-cl mode. It has no effect on
MSVC-style `__asm {}` blocks, which are unconditionally in intel
mode both before and after this change.
One difference to gcc is that in clang, inline asm strings are
"local" while they're "global" in gcc. Building the following with
-masm=intel works with clang, but not with gcc where the ".att_syntax"
from the 2nd __asm__() is in effect until file end (or until a
".intel_syntax" somewhere later in the file):
__asm__("mov eax, ebx");
__asm__(".att_syntax\nmovl %ebx, %eax");
__asm__("mov eax, ebx");
This also updates clang's intrinsic headers to work both in
-masm=att (the default) and -masm=intel modes.
The official solution for this according to "Multiple assembler dialects in asm
templates" in gcc docs->Extensions->Inline Assembly->Extended Asm
is to write every inline asm snippet twice:
bt{l %[Offset],%[Base] | %[Base],%[Offset]}
This works in LLVM after D113932 and D113894, so use that.
(Just putting `.att_syntax` at the start of the snippet works in some but not
all cases: When LLVM interpolates in parameters like `%0`, it uses at&t or
intel syntax according to the inline asm snippet's flavor, so the `.att_syntax`
within the snippet happens to late: The interpolated-in parameter is already
in intel style, and then won't parse in the switched `.att_syntax`.)
It might be nice to invent a `#pragma clang asm_dialect push "att"` /
`#pragma clang asm_dialect pop` to be able to force asm style per snippet,
so that the inline asm string doesn't contain the same code in two variants,
but let's leave that for a follow-up.
Fixes PR21401 and PR20241.
Differential Revision: https://reviews.llvm.org/D113707
2021-11-12 03:20:02 +08:00
|
|
|
__asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}"
|
2018-06-15 02:43:52 +08:00
|
|
|
: "+a" (_Comparand), "+m" (*_Destination)
|
|
|
|
: "r" (_Exchange) : "memory");
|
|
|
|
return _Comparand;
|
|
|
|
}
|
|
|
|
static __inline__ long __DEFAULT_FN_ATTRS
|
|
|
|
_InterlockedCompareExchange_HLERelease(long volatile *_Destination,
|
|
|
|
long _Exchange, long _Comparand) {
|
[clang] Make -masm=intel affect inline asm style
With this,
void f() { __asm__("mov eax, ebx"); }
now compiles with clang with -masm=intel.
This matches gcc.
The flag is not accepted in clang-cl mode. It has no effect on
MSVC-style `__asm {}` blocks, which are unconditionally in intel
mode both before and after this change.
One difference to gcc is that in clang, inline asm strings are
"local" while they're "global" in gcc. Building the following with
-masm=intel works with clang, but not with gcc where the ".att_syntax"
from the 2nd __asm__() is in effect until file end (or until a
".intel_syntax" somewhere later in the file):
__asm__("mov eax, ebx");
__asm__(".att_syntax\nmovl %ebx, %eax");
__asm__("mov eax, ebx");
This also updates clang's intrinsic headers to work both in
-masm=att (the default) and -masm=intel modes.
The official solution for this according to "Multiple assembler dialects in asm
templates" in gcc docs->Extensions->Inline Assembly->Extended Asm
is to write every inline asm snippet twice:
bt{l %[Offset],%[Base] | %[Base],%[Offset]}
This works in LLVM after D113932 and D113894, so use that.
(Just putting `.att_syntax` at the start of the snippet works in some but not
all cases: When LLVM interpolates in parameters like `%0`, it uses at&t or
intel syntax according to the inline asm snippet's flavor, so the `.att_syntax`
within the snippet happens to late: The interpolated-in parameter is already
in intel style, and then won't parse in the switched `.att_syntax`.)
It might be nice to invent a `#pragma clang asm_dialect push "att"` /
`#pragma clang asm_dialect pop` to be able to force asm style per snippet,
so that the inline asm string doesn't contain the same code in two variants,
but let's leave that for a follow-up.
Fixes PR21401 and PR20241.
Differential Revision: https://reviews.llvm.org/D113707
2021-11-12 03:20:02 +08:00
|
|
|
__asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}"
|
2018-06-15 02:43:52 +08:00
|
|
|
: "+a" (_Comparand), "+m" (*_Destination)
|
|
|
|
: "r" (_Exchange) : "memory");
|
|
|
|
return _Comparand;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#if defined(__x86_64__)
|
|
|
|
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
|
|
|
_InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination,
|
|
|
|
__int64 _Exchange, __int64 _Comparand) {
|
[clang] Make -masm=intel affect inline asm style
With this,
void f() { __asm__("mov eax, ebx"); }
now compiles with clang with -masm=intel.
This matches gcc.
The flag is not accepted in clang-cl mode. It has no effect on
MSVC-style `__asm {}` blocks, which are unconditionally in intel
mode both before and after this change.
One difference to gcc is that in clang, inline asm strings are
"local" while they're "global" in gcc. Building the following with
-masm=intel works with clang, but not with gcc where the ".att_syntax"
from the 2nd __asm__() is in effect until file end (or until a
".intel_syntax" somewhere later in the file):
__asm__("mov eax, ebx");
__asm__(".att_syntax\nmovl %ebx, %eax");
__asm__("mov eax, ebx");
This also updates clang's intrinsic headers to work both in
-masm=att (the default) and -masm=intel modes.
The official solution for this according to "Multiple assembler dialects in asm
templates" in gcc docs->Extensions->Inline Assembly->Extended Asm
is to write every inline asm snippet twice:
bt{l %[Offset],%[Base] | %[Base],%[Offset]}
This works in LLVM after D113932 and D113894, so use that.
(Just putting `.att_syntax` at the start of the snippet works in some but not
all cases: When LLVM interpolates in parameters like `%0`, it uses at&t or
intel syntax according to the inline asm snippet's flavor, so the `.att_syntax`
within the snippet happens to late: The interpolated-in parameter is already
in intel style, and then won't parse in the switched `.att_syntax`.)
It might be nice to invent a `#pragma clang asm_dialect push "att"` /
`#pragma clang asm_dialect pop` to be able to force asm style per snippet,
so that the inline asm string doesn't contain the same code in two variants,
but let's leave that for a follow-up.
Fixes PR21401 and PR20241.
Differential Revision: https://reviews.llvm.org/D113707
2021-11-12 03:20:02 +08:00
|
|
|
__asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}"
|
2018-06-15 02:43:52 +08:00
|
|
|
: "+a" (_Comparand), "+m" (*_Destination)
|
|
|
|
: "r" (_Exchange) : "memory");
|
|
|
|
return _Comparand;
|
|
|
|
}
|
|
|
|
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
|
|
|
_InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination,
|
|
|
|
__int64 _Exchange, __int64 _Comparand) {
|
[clang] Make -masm=intel affect inline asm style
With this,
void f() { __asm__("mov eax, ebx"); }
now compiles with clang with -masm=intel.
This matches gcc.
The flag is not accepted in clang-cl mode. It has no effect on
MSVC-style `__asm {}` blocks, which are unconditionally in intel
mode both before and after this change.
One difference to gcc is that in clang, inline asm strings are
"local" while they're "global" in gcc. Building the following with
-masm=intel works with clang, but not with gcc where the ".att_syntax"
from the 2nd __asm__() is in effect until file end (or until a
".intel_syntax" somewhere later in the file):
__asm__("mov eax, ebx");
__asm__(".att_syntax\nmovl %ebx, %eax");
__asm__("mov eax, ebx");
This also updates clang's intrinsic headers to work both in
-masm=att (the default) and -masm=intel modes.
The official solution for this according to "Multiple assembler dialects in asm
templates" in gcc docs->Extensions->Inline Assembly->Extended Asm
is to write every inline asm snippet twice:
bt{l %[Offset],%[Base] | %[Base],%[Offset]}
This works in LLVM after D113932 and D113894, so use that.
(Just putting `.att_syntax` at the start of the snippet works in some but not
all cases: When LLVM interpolates in parameters like `%0`, it uses at&t or
intel syntax according to the inline asm snippet's flavor, so the `.att_syntax`
within the snippet happens to late: The interpolated-in parameter is already
in intel style, and then won't parse in the switched `.att_syntax`.)
It might be nice to invent a `#pragma clang asm_dialect push "att"` /
`#pragma clang asm_dialect pop` to be able to force asm style per snippet,
so that the inline asm string doesn't contain the same code in two variants,
but let's leave that for a follow-up.
Fixes PR21401 and PR20241.
Differential Revision: https://reviews.llvm.org/D113707
2021-11-12 03:20:02 +08:00
|
|
|
__asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}"
|
2018-06-15 02:43:52 +08:00
|
|
|
: "+a" (_Comparand), "+m" (*_Destination)
|
|
|
|
: "r" (_Exchange) : "memory");
|
|
|
|
return _Comparand;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef __cplusplus
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#undef __DEFAULT_FN_ATTRS
|
|
|
|
|
[X86] Make `x86intrin.h`, `immintrin.h` includable with `-fno-gnu-inline-asm`.
Currently `immintrin.h` includes `pconfigintrin.h` and `sgxintrin.h`
which contain inline assembly. It causes failures when building with the
flag `-fno-gnu-inline-asm`.
Fix by excluding functions with inline assembly when this extension is
disabled. So far there was no need to support `_pconfig_u32`,
`_enclu_u32`, `_encls_u32`, `_enclv_u32` on platforms that require
`-fno-gnu-inline-asm`. But if developers start using these functions,
they'll have compile-time undeclared identifier errors which is
preferrable to runtime errors.
rdar://problem/49540880
Reviewers: craig.topper, GBuella, rnk, echristo
Reviewed By: rnk
Subscribers: jkorous, dexonsmith, cfe-commits
Differential Revision: https://reviews.llvm.org/D61621
llvm-svn: 360630
2019-05-14 06:40:11 +08:00
|
|
|
#endif /* defined(_MSC_VER) && __has_extension(gnu_asm) */
|
2018-06-15 02:43:52 +08:00
|
|
|
|
2010-08-21 02:04:07 +08:00
|
|
|
#endif /* __IMMINTRIN_H */
|