forked from OSchip/llvm-project
[x86][icelake][vaes]
added vaes feature recognition added intrinsics support for vaes instructions, matching a similar work on the backend (D40078) _mm256_aesenc_epi128 _mm512_aesenc_epi128 _mm256_aesenclast_epi128 _mm512_aesenclast_epi128 _mm256_aesdec_epi128 _mm512_aesdec_epi128 _mm256_aesdeclast_epi128 _mm512_aesdeclast_epi128 llvm-svn: 321474
This commit is contained in:
parent
a0ab8d7a58
commit
a1e5f0c339
|
@ -429,6 +429,16 @@ TARGET_BUILTIN(__builtin_ia32_aesdeclast128, "V2LLiV2LLiV2LLi", "", "aes")
|
|||
TARGET_BUILTIN(__builtin_ia32_aesimc128, "V2LLiV2LLi", "", "aes")
|
||||
TARGET_BUILTIN(__builtin_ia32_aeskeygenassist128, "V2LLiV2LLiIc", "", "aes")
|
||||
|
||||
// VAES
|
||||
TARGET_BUILTIN(__builtin_ia32_aesenc256, "V4LLiV4LLiV4LLi", "", "vaes")
|
||||
TARGET_BUILTIN(__builtin_ia32_aesenc512, "V8LLiV8LLiV8LLi", "", "avx512f,vaes")
|
||||
TARGET_BUILTIN(__builtin_ia32_aesenclast256, "V4LLiV4LLiV4LLi", "", "vaes")
|
||||
TARGET_BUILTIN(__builtin_ia32_aesenclast512, "V8LLiV8LLiV8LLi", "", "avx512f,vaes")
|
||||
TARGET_BUILTIN(__builtin_ia32_aesdec256, "V4LLiV4LLiV4LLi", "", "vaes")
|
||||
TARGET_BUILTIN(__builtin_ia32_aesdec512, "V8LLiV8LLiV8LLi", "", "avx512f,vaes")
|
||||
TARGET_BUILTIN(__builtin_ia32_aesdeclast256, "V4LLiV4LLiV4LLi", "", "vaes")
|
||||
TARGET_BUILTIN(__builtin_ia32_aesdeclast512, "V8LLiV8LLiV8LLi", "", "avx512f,vaes")
|
||||
|
||||
// CLMUL
|
||||
TARGET_BUILTIN(__builtin_ia32_pclmulqdq128, "V2LLiV2LLiV2LLiIc", "", "pclmul")
|
||||
|
||||
|
|
|
@ -2543,6 +2543,8 @@ def msha : Flag<["-"], "msha">, Group<m_x86_Features_Group>;
|
|||
def mno_sha : Flag<["-"], "mno-sha">, Group<m_x86_Features_Group>;
|
||||
def mtbm : Flag<["-"], "mtbm">, Group<m_x86_Features_Group>;
|
||||
def mno_tbm : Flag<["-"], "mno-tbm">, Group<m_x86_Features_Group>;
|
||||
def mvaes : Flag<["-"], "mvaes">, Group<m_x86_Features_Group>;
|
||||
def mno_vaes : Flag<["-"], "mno-vaes">, Group<m_x86_Features_Group>;
|
||||
def mxop : Flag<["-"], "mxop">, Group<m_x86_Features_Group>;
|
||||
def mno_xop : Flag<["-"], "mno-xop">, Group<m_x86_Features_Group>;
|
||||
def mxsave : Flag<["-"], "mxsave">, Group<m_x86_Features_Group>;
|
||||
|
|
|
@ -132,6 +132,7 @@ bool X86TargetInfo::initFeatureMap(
|
|||
break;
|
||||
|
||||
case CK_Icelake:
|
||||
setFeatureEnabledImpl(Features, "vaes", true);
|
||||
// TODO: Add icelake features here.
|
||||
LLVM_FALLTHROUGH;
|
||||
case CK_Cannonlake:
|
||||
|
@ -460,7 +461,7 @@ void X86TargetInfo::setSSELevel(llvm::StringMap<bool> &Features,
|
|||
LLVM_FALLTHROUGH;
|
||||
case AVX:
|
||||
Features["fma"] = Features["avx"] = Features["f16c"] = Features["xsave"] =
|
||||
Features["xsaveopt"] = false;
|
||||
Features["xsaveopt"] = Features["vaes"] = false;
|
||||
setXOPLevel(Features, FMA4, false);
|
||||
LLVM_FALLTHROUGH;
|
||||
case AVX2:
|
||||
|
@ -572,6 +573,13 @@ void X86TargetInfo::setFeatureEnabledImpl(llvm::StringMap<bool> &Features,
|
|||
} else if (Name == "aes") {
|
||||
if (Enabled)
|
||||
setSSELevel(Features, SSE2, Enabled);
|
||||
else
|
||||
Features["vaes"] = false;
|
||||
} else if (Name == "vaes") {
|
||||
if (Enabled) {
|
||||
setSSELevel(Features, AVX, Enabled);
|
||||
Features["aes"] = true;
|
||||
}
|
||||
} else if (Name == "pclmul") {
|
||||
if (Enabled)
|
||||
setSSELevel(Features, SSE2, Enabled);
|
||||
|
@ -636,6 +644,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
|
|||
|
||||
if (Feature == "+aes") {
|
||||
HasAES = true;
|
||||
} else if (Feature == "+vaes") {
|
||||
HasVAES = true;
|
||||
} else if (Feature == "+pclmul") {
|
||||
HasPCLMUL = true;
|
||||
} else if (Feature == "+lzcnt") {
|
||||
|
@ -934,6 +944,9 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
|
|||
if (HasAES)
|
||||
Builder.defineMacro("__AES__");
|
||||
|
||||
if (HasVAES)
|
||||
Builder.defineMacro("__VAES__");
|
||||
|
||||
if (HasPCLMUL)
|
||||
Builder.defineMacro("__PCLMUL__");
|
||||
|
||||
|
@ -1185,6 +1198,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
|
|||
.Case("sse4.2", true)
|
||||
.Case("sse4a", true)
|
||||
.Case("tbm", true)
|
||||
.Case("vaes", true)
|
||||
.Case("x87", true)
|
||||
.Case("xop", true)
|
||||
.Case("xsave", true)
|
||||
|
@ -1249,6 +1263,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
|
|||
.Case("sse4.2", SSELevel >= SSE42)
|
||||
.Case("sse4a", XOPLevel >= SSE4A)
|
||||
.Case("tbm", HasTBM)
|
||||
.Case("vaes", HasVAES)
|
||||
.Case("x86", true)
|
||||
.Case("x86_32", getTriple().getArch() == llvm::Triple::x86)
|
||||
.Case("x86_64", getTriple().getArch() == llvm::Triple::x86_64)
|
||||
|
|
|
@ -48,6 +48,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
|
|||
enum XOPEnum { NoXOP, SSE4A, FMA4, XOP } XOPLevel = NoXOP;
|
||||
|
||||
bool HasAES = false;
|
||||
bool HasVAES = false;
|
||||
bool HasPCLMUL = false;
|
||||
bool HasLZCNT = false;
|
||||
bool HasRDRND = false;
|
||||
|
|
|
@ -82,6 +82,7 @@ set(files
|
|||
tmmintrin.h
|
||||
unwind.h
|
||||
vadefs.h
|
||||
vaesintrin.h
|
||||
varargs.h
|
||||
vecintrin.h
|
||||
wmmintrin.h
|
||||
|
|
|
@ -208,6 +208,10 @@ _mm256_cvtph_ps(__m128i __a)
|
|||
#include <pkuintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VAES__)
|
||||
#include <vaesintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDRND__)
|
||||
static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
|
||||
_rdrand16_step(unsigned short *__p)
|
||||
|
|
|
@ -0,0 +1,98 @@
|
|||
/*===------------------ vaesintrin.h - VAES intrinsics ---------------------===
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
#ifndef __IMMINTRIN_H
|
||||
#error "Never use <vaesintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __VAESINTRIN_H
|
||||
#define __VAESINTRIN_H
|
||||
|
||||
/* Default attributes for YMM forms. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("vaes")))
|
||||
|
||||
/* Default attributes for ZMM forms. */
|
||||
#define __DEFAULT_FN_ATTRS_F __attribute__((__always_inline__, __nodebug__, __target__("avx512f,vaes")))
|
||||
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_aesenc_epi128(__m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_aesenc256((__v4di) __A,
|
||||
(__v4di) __B);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS_F
|
||||
_mm512_aesenc_epi128(__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_aesenc512((__v8di) __A,
|
||||
(__v8di) __B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_aesdec_epi128(__m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_aesdec256((__v4di) __A,
|
||||
(__v4di) __B);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS_F
|
||||
_mm512_aesdec_epi128(__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_aesdec512((__v8di) __A,
|
||||
(__v8di) __B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_aesenclast_epi128(__m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_aesenclast256((__v4di) __A,
|
||||
(__v4di) __B);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS_F
|
||||
_mm512_aesenclast_epi128(__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_aesenclast512((__v8di) __A,
|
||||
(__v8di) __B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_aesdeclast_epi128(__m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_aesdeclast256((__v4di) __A,
|
||||
(__v4di) __B);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS_F
|
||||
_mm512_aesdeclast_epi128(__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_aesdeclast512((__v8di) __A,
|
||||
(__v8di) __B);
|
||||
}
|
||||
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
#undef __DEFAULT_FN_ATTRS_F
|
||||
|
||||
#endif
|
|
@ -38,9 +38,9 @@ int __attribute__((target("arch=lakemont,mmx"))) lake(int a) { return 4; }
|
|||
// CHECK: lake{{.*}} #7
|
||||
// CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+x87"
|
||||
// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+aes,+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
|
||||
// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-aes,-avx,-avx2,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-xop,-xsave,-xsaveopt"
|
||||
// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-aes,-avx,-avx2,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-xop,-xsave,-xsaveopt"
|
||||
// CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87"
|
||||
// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-avx,-avx2,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-xop,-xsave,-xsaveopt"
|
||||
// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes"
|
||||
// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-avx,-avx2,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-xop,-xsave,-xsaveopt"
|
||||
// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes"
|
||||
// CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-3dnow,-3dnowa,-mmx"
|
||||
// CHECK: #7 = {{.*}}"target-cpu"="lakemont" "target-features"="+mmx"
|
||||
|
|
|
@ -0,0 +1,55 @@
|
|||
// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +vaes -emit-llvm -o - | FileCheck %s --check-prefix AVX
|
||||
// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -DAVX512 -target-feature +vaes -target-feature +avx512f -emit-llvm -o - | FileCheck %s --check-prefixes AVX,AVX512
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
__m256i test_mm256_aesenc_epi128(__m256i __A, __m256i __B) {
|
||||
// AVX-LABEL: @test_mm256_aesenc_epi128
|
||||
// AVX: @llvm.x86.aesni.aesenc.256
|
||||
return _mm256_aesenc_epi128(__A, __B);
|
||||
}
|
||||
|
||||
__m256i test_mm256_aesenclast_epi128(__m256i __A, __m256i __B) {
|
||||
// AVX-LABEL: @test_mm256_aesenclast_epi128
|
||||
// AVX: @llvm.x86.aesni.aesenclast.256
|
||||
return _mm256_aesenclast_epi128(__A, __B);
|
||||
}
|
||||
|
||||
__m256i test_mm256_aesdec_epi128(__m256i __A, __m256i __B) {
|
||||
// AVX-LABEL: @test_mm256_aesdec_epi128
|
||||
// AVX: @llvm.x86.aesni.aesdec.256
|
||||
return _mm256_aesdec_epi128(__A, __B);
|
||||
}
|
||||
|
||||
__m256i test_mm256_aesdeclast_epi128(__m256i __A, __m256i __B) {
|
||||
// AVX-LABEL: @test_mm256_aesdeclast_epi128
|
||||
// AVX: @llvm.x86.aesni.aesdeclast.256
|
||||
return _mm256_aesdeclast_epi128(__A, __B);
|
||||
}
|
||||
|
||||
#ifdef AVX512
|
||||
__m512i test_mm512_aesenc_epi128(__m512i __A, __m512i __B) {
|
||||
// AVX512-LABEL: @test_mm512_aesenc_epi128
|
||||
// AVX512: @llvm.x86.aesni.aesenc.512
|
||||
return _mm512_aesenc_epi128(__A, __B);
|
||||
}
|
||||
|
||||
__m512i test_mm512_aesenclast_epi128(__m512i __A, __m512i __B) {
|
||||
// AVX512-LABEL: @test_mm512_aesenclast_epi128
|
||||
// AVX512: @llvm.x86.aesni.aesenclast.512
|
||||
return _mm512_aesenclast_epi128(__A, __B);
|
||||
}
|
||||
|
||||
__m512i test_mm512_aesdec_epi128(__m512i __A, __m512i __B) {
|
||||
// AVX512-LABEL: @test_mm512_aesdec_epi128
|
||||
// AVX512: @llvm.x86.aesni.aesdec.512
|
||||
return _mm512_aesdec_epi128(__A, __B);
|
||||
}
|
||||
|
||||
__m512i test_mm512_aesdeclast_epi128(__m512i __A, __m512i __B) {
|
||||
// AVX512-LABEL: @test_mm512_aesdeclast_epi128
|
||||
// AVX512: @llvm.x86.aesni.aesdeclast.512
|
||||
return _mm512_aesdeclast_epi128(__A, __B);
|
||||
}
|
||||
#endif
|
||||
|
|
@ -94,3 +94,9 @@
|
|||
// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-clzero %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-CLZERO %s
|
||||
// CLZERO: "-target-feature" "+clzero"
|
||||
// NO-CLZERO: "-target-feature" "-clzero"
|
||||
|
||||
// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mvaes %s -### -o %t.o 2>&1 | FileCheck -check-prefix=VAES %s
|
||||
// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-vaes %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-VAES %s
|
||||
// VAES: "-target-feature" "+vaes"
|
||||
// NO-VAES: "-target-feature" "-vaes"
|
||||
|
||||
|
|
|
@ -1079,6 +1079,7 @@
|
|||
// CHECK_ICL_M32: #define __SSE4_2__ 1
|
||||
// CHECK_ICL_M32: #define __SSE__ 1
|
||||
// CHECK_ICL_M32: #define __SSSE3__ 1
|
||||
// CHECK_ICL_M32: #define __VAES__ 1
|
||||
// CHECK_ICL_M32: #define __XSAVEC__ 1
|
||||
// CHECK_ICL_M32: #define __XSAVEOPT__ 1
|
||||
// CHECK_ICL_M32: #define __XSAVES__ 1
|
||||
|
@ -1124,6 +1125,7 @@
|
|||
// CHECK_ICL_M64: #define __SSE4_2__ 1
|
||||
// CHECK_ICL_M64: #define __SSE__ 1
|
||||
// CHECK_ICL_M64: #define __SSSE3__ 1
|
||||
// CHECK_ICL_M64: #define __VAES__ 1
|
||||
// CHECK_ICL_M64: #define __XSAVEC__ 1
|
||||
// CHECK_ICL_M64: #define __XSAVEOPT__ 1
|
||||
// CHECK_ICL_M64: #define __XSAVES__ 1
|
||||
|
|
|
@ -368,3 +368,14 @@
|
|||
// RUN: %clang -target i386-unknown-unknown -march=atom -mclflushopt -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=CLFLUSHOPT %s
|
||||
|
||||
// CLFLUSHOPT: #define __CLFLUSHOPT__ 1
|
||||
|
||||
// RUN: %clang -target i386-unknown-unknown -march=atom -mvaes -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=VAES %s
|
||||
|
||||
// VAES: #define __AES__ 1
|
||||
// VAES: #define __VAES__ 1
|
||||
|
||||
// RUN: %clang -target i386-unknown-unknown -march=atom -mvaes -mno-aes -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=VAESNOAES %s
|
||||
|
||||
// VAESNOAES-NOT: #define __AES__ 1
|
||||
// VAESNOAES-NOT: #define __VAES__ 1
|
||||
|
||||
|
|
Loading…
Reference in New Issue