forked from OSchip/llvm-project
[AArch64] Adding SHA3 Intrinsics support
This patch adds the following SHA3 Intrinsics: vsha512hq_u64, vsha512h2q_u64, vsha512su0q_u64, vsha512su1q_u64 veor3q_u8 veor3q_u16 veor3q_u32 veor3q_u64 veor3q_s8 veor3q_s16 veor3q_s32 veor3q_s64 vrax1q_u64 vxarq_u64 vbcaxq_u8 vbcaxq_u16 vbcaxq_u32 vbcaxq_u64 vbcaxq_s8 vbcaxq_s16 vbcaxq_s32 vbcaxq_s64 Note need to include +sha3 and +crypto when building from the front-end Reviewed By: DavidSpickett Differential Revision: https://reviews.llvm.org/D96381
This commit is contained in:
parent
4813518092
commit
2c25efcbd3
|
@ -1135,6 +1135,19 @@ def SHA256H : SInst<"vsha256h", "....", "QUi">;
|
|||
def SHA256H2 : SInst<"vsha256h2", "....", "QUi">;
|
||||
def SHA256SU1 : SInst<"vsha256su1", "....", "QUi">;
|
||||
|
||||
def BCAX : SInst<"vbcax", "....", "QUcQUsQUiQUlQcQsQiQl">;
|
||||
def EOR3 : SInst<"veor3", "....", "QUcQUsQUiQUlQcQsQiQl">;
|
||||
def RAX1 : SInst<"vrax1", "...", "QUl">;
|
||||
|
||||
let isVXAR = 1 in {
|
||||
def XAR : SInst<"vxar", "...I", "QUl">;
|
||||
}
|
||||
|
||||
def SHA512SU0 : SInst<"vsha512su0", "...", "QUl">;
|
||||
def SHA512su1 : SInst<"vsha512su1", "....", "QUl">;
|
||||
def SHA512H : SInst<"vsha512h", "....", "QUl">;
|
||||
def SHA512H2 : SInst<"vsha512h2", "....", "QUl">;
|
||||
|
||||
def SM3SS1 : SInst<"vsm3ss1", "....", "QUi">;
|
||||
def SM3TT1A : SInst<"vsm3tt1a", "....I", "QUi">;
|
||||
def SM3TT1B : SInst<"vsm3tt1b", "....I", "QUi">;
|
||||
|
|
|
@ -272,6 +272,7 @@ class Inst <string n, string p, string t, Operation o> {
|
|||
bit isScalarShift = 0;
|
||||
bit isScalarNarrowShift = 0;
|
||||
bit isVCVT_N = 0;
|
||||
bit isVXAR = 0;
|
||||
// For immediate checks: the immediate will be assumed to specify the lane of
|
||||
// a Q register. Only used for intrinsics which end up calling polymorphic
|
||||
// builtins.
|
||||
|
|
|
@ -5676,6 +5676,7 @@ static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
|
|||
NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
|
||||
NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
|
||||
NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
|
||||
NEONMAP2(vbcaxq_v, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
|
||||
NEONMAP1(vbfdot_v, aarch64_neon_bfdot, 0),
|
||||
NEONMAP1(vbfdotq_v, aarch64_neon_bfdot, 0),
|
||||
NEONMAP1(vbfmlalbq_v, aarch64_neon_bfmlalb, 0),
|
||||
|
@ -5745,6 +5746,7 @@ static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
|
|||
NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
|
||||
NEONMAP2(vdot_v, aarch64_neon_udot, aarch64_neon_sdot, 0),
|
||||
NEONMAP2(vdotq_v, aarch64_neon_udot, aarch64_neon_sdot, 0),
|
||||
NEONMAP2(veor3q_v, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
|
||||
NEONMAP0(vext_v),
|
||||
NEONMAP0(vextq_v),
|
||||
NEONMAP0(vfma_v),
|
||||
|
@ -5810,6 +5812,7 @@ static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
|
|||
NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
|
||||
NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
|
||||
NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
|
||||
NEONMAP1(vrax1q_v, aarch64_crypto_rax1, 0),
|
||||
NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
|
||||
NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
|
||||
NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
|
||||
|
@ -5833,6 +5836,10 @@ static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
|
|||
NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0),
|
||||
NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0),
|
||||
NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0),
|
||||
NEONMAP1(vsha512h2q_v, aarch64_crypto_sha512h2, 0),
|
||||
NEONMAP1(vsha512hq_v, aarch64_crypto_sha512h, 0),
|
||||
NEONMAP1(vsha512su0q_v, aarch64_crypto_sha512su0, 0),
|
||||
NEONMAP1(vsha512su1q_v, aarch64_crypto_sha512su1, 0),
|
||||
NEONMAP0(vshl_n_v),
|
||||
NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
|
||||
NEONMAP0(vshll_n_v),
|
||||
|
@ -5862,6 +5869,7 @@ static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
|
|||
NEONMAP1(vusdot_v, aarch64_neon_usdot, 0),
|
||||
NEONMAP1(vusdotq_v, aarch64_neon_usdot, 0),
|
||||
NEONMAP1(vusmmlaq_v, aarch64_neon_usmmla, 0),
|
||||
NEONMAP1(vxarq_v, aarch64_crypto_xar, 0),
|
||||
};
|
||||
|
||||
static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
|
||||
|
@ -6688,6 +6696,13 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
|
|||
case NEON::BI__builtin_neon_vrshrq_n_v:
|
||||
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
|
||||
1, true);
|
||||
case NEON::BI__builtin_neon_vsha512hq_v:
|
||||
case NEON::BI__builtin_neon_vsha512h2q_v:
|
||||
case NEON::BI__builtin_neon_vsha512su0q_v:
|
||||
case NEON::BI__builtin_neon_vsha512su1q_v: {
|
||||
Function *F = CGM.getIntrinsic(Int);
|
||||
return EmitNeonCall(F, Ops, "");
|
||||
}
|
||||
case NEON::BI__builtin_neon_vshl_n_v:
|
||||
case NEON::BI__builtin_neon_vshlq_n_v:
|
||||
Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
|
||||
|
@ -6833,6 +6848,11 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
|
|||
}
|
||||
return SV;
|
||||
}
|
||||
case NEON::BI__builtin_neon_vxarq_v: {
|
||||
Function *F = CGM.getIntrinsic(Int);
|
||||
Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
|
||||
return EmitNeonCall(F, Ops, "");
|
||||
}
|
||||
case NEON::BI__builtin_neon_vzip_v:
|
||||
case NEON::BI__builtin_neon_vzipq_v: {
|
||||
Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
|
||||
|
|
|
@ -30,3 +30,10 @@ void test_range_check_vsm3tt2b(uint32x4_t a, uint32x4_t b, uint32x4_t c) {
|
|||
vsm3tt2bq_u32(a, b, c, 0);
|
||||
}
|
||||
|
||||
void test_range_check_xar(uint64x2_t a, uint64x2_t b) {
|
||||
vxarq_u64(a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 63]}}
|
||||
vxarq_u64(a, b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}}
|
||||
vxarq_u64(a, b, 0);
|
||||
vxarq_u64(a, b, 63);
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,162 @@
|
|||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
|
||||
// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon \
|
||||
// RUN: -target-feature +crypto -S -emit-llvm -o - %s \
|
||||
// RUN: | FileCheck %s
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
// CHECK-LABEL: @test_vsha512h(
|
||||
// CHECK: call <2 x i64> @llvm.aarch64.crypto.sha512h
|
||||
//
|
||||
void test_vsha512h(uint64x2_t hash_ed, uint64x2_t hash_gf, uint64x2_t kwh_kwh2) {
|
||||
uint64x2_t result = vsha512hq_u64(hash_ed, hash_gf, kwh_kwh2);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vsha512h2(
|
||||
// CHECK: call <2 x i64> @llvm.aarch64.crypto.sha512h2
|
||||
//
|
||||
void test_vsha512h2(uint64x2_t sum_ab, uint64x2_t hash_c_, uint64x2_t hash_ab) {
|
||||
uint64x2_t result = vsha512h2q_u64(sum_ab, hash_c_, hash_ab);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vsha512su0(
|
||||
// CHECK: call <2 x i64> @llvm.aarch64.crypto.sha512su0
|
||||
//
|
||||
void test_vsha512su0(uint64x2_t w0_1, uint64x2_t w2_) {
|
||||
uint64x2_t result = vsha512su0q_u64(w0_1, w2_);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vsha512su1(
|
||||
// CHECK: call <2 x i64> @llvm.aarch64.crypto.sha512su1
|
||||
//
|
||||
void test_vsha512su1(uint64x2_t s01_s02, uint64x2_t w14_15, uint64x2_t w9_10) {
|
||||
uint64x2_t result = vsha512su1q_u64(s01_s02, w14_15, w9_10);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vrax1(
|
||||
// CHECK: call <2 x i64> @llvm.aarch64.crypto.rax1
|
||||
//
|
||||
void test_vrax1(uint64x2_t a, uint64x2_t b) {
|
||||
uint64x2_t result = vrax1q_u64(a, b);
|
||||
}
|
||||
|
||||
|
||||
// CHECK-LABEL: @test_xar(
|
||||
// CHECK: call <2 x i64> @llvm.aarch64.crypto.xar
|
||||
//
|
||||
void test_xar(uint64x2_t a, uint64x2_t b) {
|
||||
uint64x2_t result = vxarq_u64(a, b, 10);
|
||||
}
|
||||
|
||||
|
||||
// CHECK-LABEL: @test_vbcax_u8(
|
||||
// CHECK: call <16 x i8> @llvm.aarch64.crypto.bcaxu.v16i8
|
||||
//
|
||||
void test_vbcax_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
|
||||
uint8x16_t result = vbcaxq_u8(a, b, c);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vbcax_u16(
|
||||
// CHECK: call <8 x i16> @llvm.aarch64.crypto.bcaxu.v8i16
|
||||
//
|
||||
void test_vbcax_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) {
|
||||
uint16x8_t result = vbcaxq_u16(a, b, c);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vbcax_u32(
|
||||
// CHECK: call <4 x i32> @llvm.aarch64.crypto.bcaxu.v4i32
|
||||
//
|
||||
void test_vbcax_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) {
|
||||
uint32x4_t result = vbcaxq_u32(a, b, c);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vbcax_u64(
|
||||
// CHECK: call <2 x i64> @llvm.aarch64.crypto.bcaxu.v2i64
|
||||
//
|
||||
void test_vbcax_u64(uint64x2_t a, uint64x2_t b, uint64x2_t c) {
|
||||
uint64x2_t result = vbcaxq_u64(a, b, c);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vbcax_s8(
|
||||
// CHECK: call <16 x i8> @llvm.aarch64.crypto.bcaxs.v16i8
|
||||
//
|
||||
void test_vbcax_s8(int8x16_t a, int8x16_t b, int8x16_t c) {
|
||||
int8x16_t result = vbcaxq_s8(a, b, c);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vbcax_s16(
|
||||
// CHECK: call <8 x i16> @llvm.aarch64.crypto.bcaxs.v8i16
|
||||
//
|
||||
void test_vbcax_s16(int16x8_t a, int16x8_t b, int16x8_t c) {
|
||||
int16x8_t result = vbcaxq_s16(a, b, c);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vbcax_s32(
|
||||
// CHECK: call <4 x i32> @llvm.aarch64.crypto.bcaxs.v4i32
|
||||
//
|
||||
void test_vbcax_s32(int32x4_t a, int32x4_t b, int32x4_t c) {
|
||||
int32x4_t result = vbcaxq_s32(a, b, c);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vbcax_s64(
|
||||
// CHECK: call <2 x i64> @llvm.aarch64.crypto.bcaxs.v2i64
|
||||
//
|
||||
void test_vbcax_s64(int64x2_t a, int64x2_t b, int64x2_t c) {
|
||||
int64x2_t result = vbcaxq_s64(a, b, c);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_veor3_u8(
|
||||
// CHECK: call <16 x i8> @llvm.aarch64.crypto.eor3u.v16i8
|
||||
//
|
||||
void test_veor3_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
|
||||
uint8x16_t result = veor3q_u8(a, b, c);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_veor3_u16(
|
||||
// CHECK: call <8 x i16> @llvm.aarch64.crypto.eor3u.v8i16
|
||||
//
|
||||
void test_veor3_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) {
|
||||
uint16x8_t result = veor3q_u16(a, b, c);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_veor3_u32(
|
||||
// CHECK: call <4 x i32> @llvm.aarch64.crypto.eor3u.v4i32
|
||||
//
|
||||
void test_veor3_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) {
|
||||
uint32x4_t result = veor3q_u32(a, b, c);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_veor3_u64(
|
||||
// CHECK: call <2 x i64> @llvm.aarch64.crypto.eor3u.v2i64
|
||||
//
|
||||
void test_veor3_u64(uint64x2_t a, uint64x2_t b, uint64x2_t c) {
|
||||
uint64x2_t result = veor3q_u64(a, b, c);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_veor3_s8(
|
||||
// CHECK: call <16 x i8> @llvm.aarch64.crypto.eor3s.v16i8
|
||||
//
|
||||
void test_veor3_s8(int8x16_t a, int8x16_t b, int8x16_t c) {
|
||||
int8x16_t result = veor3q_s8(a, b, c);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_veor3_s16(
|
||||
// CHECK: call <8 x i16> @llvm.aarch64.crypto.eor3s.v8i16
|
||||
//
|
||||
void test_veor3_s16(int16x8_t a, int16x8_t b, int16x8_t c) {
|
||||
int16x8_t result = veor3q_s16(a, b, c);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_veor3_s32(
|
||||
// CHECK: call <4 x i32> @llvm.aarch64.crypto.eor3s.v4i32
|
||||
//
|
||||
void test_veor3_s32(int32x4_t a, int32x4_t b, int32x4_t c) {
|
||||
int32x4_t result = veor3q_s32(a, b, c);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_veor3_s64(
|
||||
// CHECK: call <2 x i64> @llvm.aarch64.crypto.eor3s.v2i64
|
||||
//
|
||||
void test_veor3_s64(int64x2_t a, int64x2_t b, int64x2_t c) {
|
||||
int64x2_t result = veor3q_s64(a, b, c);
|
||||
}
|
|
@ -2115,7 +2115,11 @@ void NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS,
|
|||
std::string LowerBound, UpperBound;
|
||||
|
||||
Record *R = Def->getRecord();
|
||||
if (R->getValueAsBit("isVCVT_N")) {
|
||||
if (R->getValueAsBit("isVXAR")) {
|
||||
//VXAR takes an immediate in the range [0, 63]
|
||||
LowerBound = "0";
|
||||
UpperBound = "63";
|
||||
} else if (R->getValueAsBit("isVCVT_N")) {
|
||||
// VCVT between floating- and fixed-point values takes an immediate
|
||||
// in the range [1, 32) for f32 or [1, 64) for f64 or [1, 16) for f16.
|
||||
LowerBound = "1";
|
||||
|
|
|
@ -715,6 +715,31 @@ let TargetPrefix = "aarch64" in {
|
|||
: DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
// SHA512 intrinsic taking 2 arguments
|
||||
class Crypto_SHA512_2Arg_Intrinsic
|
||||
: DefaultAttrsIntrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
|
||||
|
||||
// SHA512 intrinsic taking 3 Arguments
|
||||
class Crypto_SHA512_3Arg_Intrinsic
|
||||
: DefaultAttrsIntrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
// SHA3 Intrinsics taking 3 arguments
|
||||
class Crypto_SHA3_3Arg_Intrinsic
|
||||
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
|
||||
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
|
||||
[IntrNoMem]>;
|
||||
|
||||
// SHA3 Intrinsic taking 2 arguments
|
||||
class Crypto_SHA3_2Arg_Intrinsic
|
||||
: DefaultAttrsIntrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
// SHA3 Intrinsic taking 3 Arguments 1 immediate
|
||||
class Crypto_SHA3_2ArgImm_Intrinsic
|
||||
: DefaultAttrsIntrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i64_ty],
|
||||
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
|
||||
|
||||
class Crypto_SM3_3Vector_Intrinsic
|
||||
: Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
|
||||
[IntrNoMem]>;
|
||||
|
@ -748,6 +773,20 @@ def int_aarch64_crypto_sha256h2 : Crypto_SHA_8Hash4Schedule_Intrinsic;
|
|||
def int_aarch64_crypto_sha256su0 : Crypto_SHA_8Schedule_Intrinsic;
|
||||
def int_aarch64_crypto_sha256su1 : Crypto_SHA_12Schedule_Intrinsic;
|
||||
|
||||
//SHA3
|
||||
def int_aarch64_crypto_eor3s : Crypto_SHA3_3Arg_Intrinsic;
|
||||
def int_aarch64_crypto_eor3u : Crypto_SHA3_3Arg_Intrinsic;
|
||||
def int_aarch64_crypto_bcaxs : Crypto_SHA3_3Arg_Intrinsic;
|
||||
def int_aarch64_crypto_bcaxu : Crypto_SHA3_3Arg_Intrinsic;
|
||||
def int_aarch64_crypto_rax1 : Crypto_SHA3_2Arg_Intrinsic;
|
||||
def int_aarch64_crypto_xar : Crypto_SHA3_2ArgImm_Intrinsic;
|
||||
|
||||
// SHA512
|
||||
def int_aarch64_crypto_sha512h : Crypto_SHA512_3Arg_Intrinsic;
|
||||
def int_aarch64_crypto_sha512h2 : Crypto_SHA512_3Arg_Intrinsic;
|
||||
def int_aarch64_crypto_sha512su0 : Crypto_SHA512_2Arg_Intrinsic;
|
||||
def int_aarch64_crypto_sha512su1 : Crypto_SHA512_3Arg_Intrinsic;
|
||||
|
||||
//SM3 & SM4
|
||||
def int_aarch64_crypto_sm3partw1 : Crypto_SM3_3Vector_Intrinsic;
|
||||
def int_aarch64_crypto_sm3partw2 : Crypto_SM3_3Vector_Intrinsic;
|
||||
|
|
|
@ -890,6 +890,12 @@ def imm0_63 : Operand<i64>, ImmLeaf<i64, [{
|
|||
let ParserMatchClass = Imm0_63Operand;
|
||||
}
|
||||
|
||||
def timm0_63 : Operand<i64>, TImmLeaf<i64, [{
|
||||
return ((uint64_t)Imm) < 64;
|
||||
}]> {
|
||||
let ParserMatchClass = Imm0_63Operand;
|
||||
}
|
||||
|
||||
// imm0_31 predicate - True if the immediate is in the range [0,31]
|
||||
def imm0_31 : Operand<i64>, ImmLeaf<i64, [{
|
||||
return ((uint64_t)Imm) < 32;
|
||||
|
|
|
@ -924,6 +924,45 @@ def RAX1 : CryptoRRR_2D<0b0,0b11, "rax1">;
|
|||
def EOR3 : CryptoRRRR_16B<0b00, "eor3">;
|
||||
def BCAX : CryptoRRRR_16B<0b01, "bcax">;
|
||||
def XAR : CryptoRRRi6<"xar">;
|
||||
|
||||
class SHA3_pattern<Instruction INST, Intrinsic OpNode, ValueType VecTy>
|
||||
: Pat<(VecTy (OpNode (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))),
|
||||
(INST (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))>;
|
||||
|
||||
def : Pat<(v2i64 (int_aarch64_crypto_sha512su0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))),
|
||||
(SHA512SU0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>;
|
||||
|
||||
def : SHA3_pattern<SHA512H, int_aarch64_crypto_sha512h, v2i64>;
|
||||
def : SHA3_pattern<SHA512H2, int_aarch64_crypto_sha512h2, v2i64>;
|
||||
def : SHA3_pattern<SHA512SU1, int_aarch64_crypto_sha512su1, v2i64>;
|
||||
|
||||
def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v16i8>;
|
||||
def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v8i16>;
|
||||
def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v4i32>;
|
||||
def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v2i64>;
|
||||
|
||||
def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v16i8>;
|
||||
def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v8i16>;
|
||||
def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v4i32>;
|
||||
def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v2i64>;
|
||||
|
||||
def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v16i8>;
|
||||
def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v8i16>;
|
||||
def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v4i32>;
|
||||
def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v2i64>;
|
||||
|
||||
def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v16i8>;
|
||||
def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v8i16>;
|
||||
def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v4i32>;
|
||||
def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v2i64>;
|
||||
|
||||
def : Pat<(v2i64 (int_aarch64_crypto_rax1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))),
|
||||
(RAX1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>;
|
||||
|
||||
def : Pat<(v2i64 (int_aarch64_crypto_xar (v2i64 V128:$Vn), (v2i64 V128:$Vm), (i64 timm0_63:$imm))),
|
||||
(XAR (v2i64 V128:$Vn), (v2i64 V128:$Vm), (timm0_63:$imm))>;
|
||||
|
||||
|
||||
} // HasSHA3
|
||||
|
||||
let Predicates = [HasSM4] in {
|
||||
|
|
|
@ -0,0 +1,246 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc %s -mtriple=aarch64 -mattr=+v8.3a,+sha3 -o - | FileCheck %s
|
||||
|
||||
define <2 x i64> @test_vsha512h(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
|
||||
; CHECK-LABEL: test_vsha512h:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: sha512h q0, q1, v2.2d
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%vsha512h.i = tail call <2 x i64> @llvm.aarch64.crypto.sha512h(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
|
||||
ret <2 x i64> %vsha512h.i
|
||||
}
|
||||
|
||||
define <2 x i64> @test_vsha512h2(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
|
||||
; CHECK-LABEL: test_vsha512h2:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: sha512h2 q0, q1, v2.2d
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%vsha512h2.i = tail call <2 x i64> @llvm.aarch64.crypto.sha512h2(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
|
||||
ret <2 x i64> %vsha512h2.i
|
||||
}
|
||||
|
||||
define <2 x i64> @test_vsha512su0(<2 x i64> %a, <2 x i64> %b) {
|
||||
; CHECK-LABEL: test_vsha512su0:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: sha512su0 v0.2d, v1.2d
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%vsha512su0.i = tail call <2 x i64> @llvm.aarch64.crypto.sha512su0(<2 x i64> %a, <2 x i64> %b)
|
||||
ret <2 x i64> %vsha512su0.i
|
||||
}
|
||||
|
||||
define <2 x i64> @test_vsha512su1(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
|
||||
; CHECK-LABEL: test_vsha512su1:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: sha512su1 v0.2d, v1.2d, v2.2d
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%vsha512su1.i = tail call <2 x i64> @llvm.aarch64.crypto.sha512su1(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
|
||||
ret <2 x i64> %vsha512su1.i
|
||||
}
|
||||
|
||||
define <2 x i64> @test_vrax1(<2 x i64> %a, <2 x i64> %b) {
|
||||
; CHECK-LABEL: test_vrax1:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: rax1 v0.2d, v0.2d, v1.2d
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%vrax1.i = tail call <2 x i64> @llvm.aarch64.crypto.rax1(<2 x i64> %a, <2 x i64> %b)
|
||||
ret <2 x i64> %vrax1.i
|
||||
}
|
||||
|
||||
define <2 x i64> @test_vxar(<2 x i64> %a, <2 x i64> %b) {
|
||||
; CHECK-LABEL: test_vxar:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: xar v0.2d, v0.2d, v1.2d, #1
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%vxar.i = tail call <2 x i64> @llvm.aarch64.crypto.xar(<2 x i64> %a, <2 x i64> %b, i64 1)
|
||||
ret <2 x i64> %vxar.i
|
||||
}
|
||||
|
||||
define <16 x i8> @test_bcax_8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
|
||||
; CHECK-LABEL: test_bcax_8:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: bcax v0.16b, v0.16b, v1.16b, v2.16b
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%vbcax_8.i = tail call <16 x i8> @llvm.aarch64.crypto.bcaxu.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
|
||||
ret <16 x i8> %vbcax_8.i
|
||||
}
|
||||
|
||||
define <16 x i8> @test_eor3_8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
|
||||
; CHECK-LABEL: test_eor3_8:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: eor3 v0.16b, v0.16b, v1.16b, v2.16b
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%veor3_8.i = tail call <16 x i8> @llvm.aarch64.crypto.eor3u.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
|
||||
ret <16 x i8> %veor3_8.i
|
||||
}
|
||||
|
||||
define <16 x i8> @test_bcax_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
|
||||
; CHECK-LABEL: test_bcax_s8:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: bcax v0.16b, v0.16b, v1.16b, v2.16b
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%vbcax_8.i = tail call <16 x i8> @llvm.aarch64.crypto.bcaxs.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
|
||||
ret <16 x i8> %vbcax_8.i
|
||||
}
|
||||
|
||||
define <16 x i8> @test_eor3_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
|
||||
; CHECK-LABEL: test_eor3_s8:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: eor3 v0.16b, v0.16b, v1.16b, v2.16b
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%veor3_8.i = tail call <16 x i8> @llvm.aarch64.crypto.eor3s.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
|
||||
ret <16 x i8> %veor3_8.i
|
||||
}
|
||||
|
||||
define <8 x i16> @test_bcax_16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) {
|
||||
; CHECK-LABEL: test_bcax_16:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: bcax v0.16b, v0.16b, v1.16b, v2.16b
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%vbcax_16.i = tail call <8 x i16> @llvm.aarch64.crypto.bcaxu.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c)
|
||||
ret <8 x i16> %vbcax_16.i
|
||||
}
|
||||
|
||||
define <8 x i16> @test_eor3_16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) {
|
||||
; CHECK-LABEL: test_eor3_16:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: eor3 v0.16b, v0.16b, v1.16b, v2.16b
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%veor3_16.i = tail call <8 x i16> @llvm.aarch64.crypto.eor3u.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c)
|
||||
ret <8 x i16> %veor3_16.i
|
||||
}
|
||||
|
||||
define <8 x i16> @test_bcax_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) {
|
||||
; CHECK-LABEL: test_bcax_s16:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: bcax v0.16b, v0.16b, v1.16b, v2.16b
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%vbcax_16.i = tail call <8 x i16> @llvm.aarch64.crypto.bcaxs.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c)
|
||||
ret <8 x i16> %vbcax_16.i
|
||||
}
|
||||
|
||||
define <8 x i16> @test_eor3_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) {
|
||||
; CHECK-LABEL: test_eor3_s16:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: eor3 v0.16b, v0.16b, v1.16b, v2.16b
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%veor3_16.i = tail call <8 x i16> @llvm.aarch64.crypto.eor3s.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c)
|
||||
ret <8 x i16> %veor3_16.i
|
||||
}
|
||||
|
||||
define <4 x i32> @test_bcax_32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
||||
; CHECK-LABEL: test_bcax_32:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: bcax v0.16b, v0.16b, v1.16b, v2.16b
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%vbcax_32.i = tail call <4 x i32> @llvm.aarch64.crypto.bcaxu.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
|
||||
ret <4 x i32> %vbcax_32.i
|
||||
}
|
||||
|
||||
define <4 x i32> @test_eor3_32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
||||
; CHECK-LABEL: test_eor3_32:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: eor3 v0.16b, v0.16b, v1.16b, v2.16b
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%veor3_32.i = tail call <4 x i32> @llvm.aarch64.crypto.eor3u.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
|
||||
ret <4 x i32> %veor3_32.i
|
||||
}
|
||||
|
||||
define <4 x i32> @test_bcax_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
||||
; CHECK-LABEL: test_bcax_s32:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: bcax v0.16b, v0.16b, v1.16b, v2.16b
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%vbcax_32.i = tail call <4 x i32> @llvm.aarch64.crypto.bcaxs.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
|
||||
ret <4 x i32> %vbcax_32.i
|
||||
}
|
||||
|
||||
define <4 x i32> @test_eor3_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
||||
; CHECK-LABEL: test_eor3_s32:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: eor3 v0.16b, v0.16b, v1.16b, v2.16b
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%veor3_32.i = tail call <4 x i32> @llvm.aarch64.crypto.eor3s.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
|
||||
ret <4 x i32> %veor3_32.i
|
||||
}
|
||||
|
||||
define <2 x i64> @test_bcax_64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
|
||||
; CHECK-LABEL: test_bcax_64:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: bcax v0.16b, v0.16b, v1.16b, v2.16b
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%vbcax_64.i = tail call <2 x i64> @llvm.aarch64.crypto.bcaxu.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
|
||||
ret <2 x i64> %vbcax_64.i
|
||||
}
|
||||
|
||||
define <2 x i64> @test_eor3_64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
|
||||
; CHECK-LABEL: test_eor3_64:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: eor3 v0.16b, v0.16b, v1.16b, v2.16b
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%veor3_64.i = tail call <2 x i64> @llvm.aarch64.crypto.eor3u.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
|
||||
ret <2 x i64> %veor3_64.i
|
||||
}
|
||||
|
||||
define <2 x i64> @test_bcax_s64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
|
||||
; CHECK-LABEL: test_bcax_s64:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: bcax v0.16b, v0.16b, v1.16b, v2.16b
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%vbcax_64.i = tail call <2 x i64> @llvm.aarch64.crypto.bcaxs.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
|
||||
ret <2 x i64> %vbcax_64.i
|
||||
}
|
||||
|
||||
define <2 x i64> @test_eor3_s64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
|
||||
; CHECK-LABEL: test_eor3_s64:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: eor3 v0.16b, v0.16b, v1.16b, v2.16b
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%veor3_64.i = tail call <2 x i64> @llvm.aarch64.crypto.eor3s.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
|
||||
ret <2 x i64> %veor3_64.i
|
||||
}
|
||||
|
||||
declare <2 x i64> @llvm.aarch64.crypto.sha512h(<2 x i64>, <2 x i64>, <2 x i64>)
|
||||
declare <2 x i64> @llvm.aarch64.crypto.sha512h2(<2 x i64>, <2 x i64>, <2 x i64>)
|
||||
declare <2 x i64> @llvm.aarch64.crypto.sha512su0(<2 x i64>, <2 x i64>)
|
||||
declare <2 x i64> @llvm.aarch64.crypto.sha512su1(<2 x i64>, <2 x i64>, <2 x i64>)
|
||||
declare <2 x i64> @llvm.aarch64.crypto.rax1(<2 x i64>, <2 x i64>)
|
||||
declare <2 x i64> @llvm.aarch64.crypto.xar(<2 x i64>, <2 x i64>, i64 immarg)
|
||||
declare <16 x i8> @llvm.aarch64.crypto.bcaxu.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
|
||||
declare <8 x i16> @llvm.aarch64.crypto.bcaxu.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
|
||||
declare <4 x i32> @llvm.aarch64.crypto.bcaxu.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
|
||||
declare <2 x i64> @llvm.aarch64.crypto.bcaxu.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
|
||||
declare <16 x i8> @llvm.aarch64.crypto.bcaxs.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
|
||||
declare <8 x i16> @llvm.aarch64.crypto.bcaxs.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
|
||||
declare <4 x i32> @llvm.aarch64.crypto.bcaxs.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
|
||||
declare <2 x i64> @llvm.aarch64.crypto.bcaxs.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
|
||||
declare <16 x i8> @llvm.aarch64.crypto.eor3u.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
|
||||
declare <8 x i16> @llvm.aarch64.crypto.eor3u.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
|
||||
declare <4 x i32> @llvm.aarch64.crypto.eor3u.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
|
||||
declare <2 x i64> @llvm.aarch64.crypto.eor3u.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
|
||||
declare <16 x i8> @llvm.aarch64.crypto.eor3s.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
|
||||
declare <8 x i16> @llvm.aarch64.crypto.eor3s.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
|
||||
declare <4 x i32> @llvm.aarch64.crypto.eor3s.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
|
||||
declare <2 x i64> @llvm.aarch64.crypto.eor3s.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
|
||||
|
Loading…
Reference in New Issue