diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td index cb8e30445a76..0b0105c665f5 100644 --- a/clang/include/clang/Basic/arm_neon.td +++ b/clang/include/clang/Basic/arm_neon.td @@ -1135,6 +1135,19 @@ def SHA256H : SInst<"vsha256h", "....", "QUi">; def SHA256H2 : SInst<"vsha256h2", "....", "QUi">; def SHA256SU1 : SInst<"vsha256su1", "....", "QUi">; +def BCAX : SInst<"vbcax", "....", "QUcQUsQUiQUlQcQsQiQl">; +def EOR3 : SInst<"veor3", "....", "QUcQUsQUiQUlQcQsQiQl">; +def RAX1 : SInst<"vrax1", "...", "QUl">; + +let isVXAR = 1 in { +def XAR : SInst<"vxar", "...I", "QUl">; +} + +def SHA512SU0 : SInst<"vsha512su0", "...", "QUl">; +def SHA512su1 : SInst<"vsha512su1", "....", "QUl">; +def SHA512H : SInst<"vsha512h", "....", "QUl">; +def SHA512H2 : SInst<"vsha512h2", "....", "QUl">; + def SM3SS1 : SInst<"vsm3ss1", "....", "QUi">; def SM3TT1A : SInst<"vsm3tt1a", "....I", "QUi">; def SM3TT1B : SInst<"vsm3tt1b", "....I", "QUi">; diff --git a/clang/include/clang/Basic/arm_neon_incl.td b/clang/include/clang/Basic/arm_neon_incl.td index dd20b70433ef..60dbea627d58 100644 --- a/clang/include/clang/Basic/arm_neon_incl.td +++ b/clang/include/clang/Basic/arm_neon_incl.td @@ -272,6 +272,7 @@ class Inst { bit isScalarShift = 0; bit isScalarNarrowShift = 0; bit isVCVT_N = 0; + bit isVXAR = 0; // For immediate checks: the immediate will be assumed to specify the lane of // a Q register. Only used for intrinsics which end up calling polymorphic // builtins. diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index c1eaba49ce91..04289bf885b7 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -5676,6 +5676,7 @@ static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = { NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0), NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0), NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0), + NEONMAP2(vbcaxq_v, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts), NEONMAP1(vbfdot_v, aarch64_neon_bfdot, 0), NEONMAP1(vbfdotq_v, aarch64_neon_bfdot, 0), NEONMAP1(vbfmlalbq_v, aarch64_neon_bfmlalb, 0), @@ -5745,6 +5746,7 @@ static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = { NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType), NEONMAP2(vdot_v, aarch64_neon_udot, aarch64_neon_sdot, 0), NEONMAP2(vdotq_v, aarch64_neon_udot, aarch64_neon_sdot, 0), + NEONMAP2(veor3q_v, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts), NEONMAP0(vext_v), NEONMAP0(vextq_v), NEONMAP0(vfma_v), @@ -5810,6 +5812,7 @@ static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = { NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType), + NEONMAP1(vrax1q_v, aarch64_crypto_rax1, 0), NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType), @@ -5833,6 +5836,10 @@ static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = { NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0), NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0), NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0), + NEONMAP1(vsha512h2q_v, aarch64_crypto_sha512h2, 0), + NEONMAP1(vsha512hq_v, aarch64_crypto_sha512h, 0), + NEONMAP1(vsha512su0q_v, aarch64_crypto_sha512su0, 0), + NEONMAP1(vsha512su1q_v, aarch64_crypto_sha512su1, 0), NEONMAP0(vshl_n_v), NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts), NEONMAP0(vshll_n_v), @@ -5862,6 +5869,7 @@ static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = { NEONMAP1(vusdot_v, aarch64_neon_usdot, 0), NEONMAP1(vusdotq_v, aarch64_neon_usdot, 0), NEONMAP1(vusmmlaq_v, aarch64_neon_usmmla, 0), + NEONMAP1(vxarq_v, aarch64_crypto_xar, 0), }; static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = { @@ -6688,6 +6696,13 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( case NEON::BI__builtin_neon_vrshrq_n_v: return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", 1, true); + case NEON::BI__builtin_neon_vsha512hq_v: + case NEON::BI__builtin_neon_vsha512h2q_v: + case NEON::BI__builtin_neon_vsha512su0q_v: + case NEON::BI__builtin_neon_vsha512su1q_v: { + Function *F = CGM.getIntrinsic(Int); + return EmitNeonCall(F, Ops, ""); + } case NEON::BI__builtin_neon_vshl_n_v: case NEON::BI__builtin_neon_vshlq_n_v: Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false); @@ -6833,6 +6848,11 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( } return SV; } + case NEON::BI__builtin_neon_vxarq_v: { + Function *F = CGM.getIntrinsic(Int); + Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty); + return EmitNeonCall(F, Ops, ""); + } case NEON::BI__builtin_neon_vzip_v: case NEON::BI__builtin_neon_vzipq_v: { Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); diff --git a/clang/test/CodeGen/aarch64-neon-range-checks.c b/clang/test/CodeGen/aarch64-neon-range-checks.c index fa5454b955e6..25718ea7868b 100644 --- a/clang/test/CodeGen/aarch64-neon-range-checks.c +++ b/clang/test/CodeGen/aarch64-neon-range-checks.c @@ -30,3 +30,10 @@ void test_range_check_vsm3tt2b(uint32x4_t a, uint32x4_t b, uint32x4_t c) { vsm3tt2bq_u32(a, b, c, 0); } +void test_range_check_xar(uint64x2_t a, uint64x2_t b) { + vxarq_u64(a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 63]}} + vxarq_u64(a, b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + vxarq_u64(a, b, 0); + vxarq_u64(a, b, 63); +} + diff --git a/clang/test/CodeGen/aarch64-neon-sha3.c b/clang/test/CodeGen/aarch64-neon-sha3.c new file mode 100644 index 000000000000..6790563ee74c --- /dev/null +++ b/clang/test/CodeGen/aarch64-neon-sha3.c @@ -0,0 +1,162 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon \ +// RUN: -target-feature +crypto -S -emit-llvm -o - %s \ +// RUN: | FileCheck %s + +#include + +// CHECK-LABEL: @test_vsha512h( +// CHECK: call <2 x i64> @llvm.aarch64.crypto.sha512h +// +void test_vsha512h(uint64x2_t hash_ed, uint64x2_t hash_gf, uint64x2_t kwh_kwh2) { + uint64x2_t result = vsha512hq_u64(hash_ed, hash_gf, kwh_kwh2); +} + +// CHECK-LABEL: @test_vsha512h2( +// CHECK: call <2 x i64> @llvm.aarch64.crypto.sha512h2 +// +void test_vsha512h2(uint64x2_t sum_ab, uint64x2_t hash_c_, uint64x2_t hash_ab) { + uint64x2_t result = vsha512h2q_u64(sum_ab, hash_c_, hash_ab); +} + +// CHECK-LABEL: @test_vsha512su0( +// CHECK: call <2 x i64> @llvm.aarch64.crypto.sha512su0 +// +void test_vsha512su0(uint64x2_t w0_1, uint64x2_t w2_) { + uint64x2_t result = vsha512su0q_u64(w0_1, w2_); +} + +// CHECK-LABEL: @test_vsha512su1( +// CHECK: call <2 x i64> @llvm.aarch64.crypto.sha512su1 +// +void test_vsha512su1(uint64x2_t s01_s02, uint64x2_t w14_15, uint64x2_t w9_10) { + uint64x2_t result = vsha512su1q_u64(s01_s02, w14_15, w9_10); +} + +// CHECK-LABEL: @test_vrax1( +// CHECK: call <2 x i64> @llvm.aarch64.crypto.rax1 +// +void test_vrax1(uint64x2_t a, uint64x2_t b) { + uint64x2_t result = vrax1q_u64(a, b); +} + + +// CHECK-LABEL: @test_xar( +// CHECK: call <2 x i64> @llvm.aarch64.crypto.xar +// +void test_xar(uint64x2_t a, uint64x2_t b) { + uint64x2_t result = vxarq_u64(a, b, 10); +} + + +// CHECK-LABEL: @test_vbcax_u8( +// CHECK: call <16 x i8> @llvm.aarch64.crypto.bcaxu.v16i8 +// +void test_vbcax_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) { + uint8x16_t result = vbcaxq_u8(a, b, c); +} + +// CHECK-LABEL: @test_vbcax_u16( +// CHECK: call <8 x i16> @llvm.aarch64.crypto.bcaxu.v8i16 +// +void test_vbcax_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) { + uint16x8_t result = vbcaxq_u16(a, b, c); +} + +// CHECK-LABEL: @test_vbcax_u32( +// CHECK: call <4 x i32> @llvm.aarch64.crypto.bcaxu.v4i32 +// +void test_vbcax_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) { + uint32x4_t result = vbcaxq_u32(a, b, c); +} + +// CHECK-LABEL: @test_vbcax_u64( +// CHECK: call <2 x i64> @llvm.aarch64.crypto.bcaxu.v2i64 +// +void test_vbcax_u64(uint64x2_t a, uint64x2_t b, uint64x2_t c) { + uint64x2_t result = vbcaxq_u64(a, b, c); +} + +// CHECK-LABEL: @test_vbcax_s8( +// CHECK: call <16 x i8> @llvm.aarch64.crypto.bcaxs.v16i8 +// +void test_vbcax_s8(int8x16_t a, int8x16_t b, int8x16_t c) { + int8x16_t result = vbcaxq_s8(a, b, c); +} + +// CHECK-LABEL: @test_vbcax_s16( +// CHECK: call <8 x i16> @llvm.aarch64.crypto.bcaxs.v8i16 +// +void test_vbcax_s16(int16x8_t a, int16x8_t b, int16x8_t c) { + int16x8_t result = vbcaxq_s16(a, b, c); +} + +// CHECK-LABEL: @test_vbcax_s32( +// CHECK: call <4 x i32> @llvm.aarch64.crypto.bcaxs.v4i32 +// +void test_vbcax_s32(int32x4_t a, int32x4_t b, int32x4_t c) { + int32x4_t result = vbcaxq_s32(a, b, c); +} + +// CHECK-LABEL: @test_vbcax_s64( +// CHECK: call <2 x i64> @llvm.aarch64.crypto.bcaxs.v2i64 +// +void test_vbcax_s64(int64x2_t a, int64x2_t b, int64x2_t c) { + int64x2_t result = vbcaxq_s64(a, b, c); +} + +// CHECK-LABEL: @test_veor3_u8( +// CHECK: call <16 x i8> @llvm.aarch64.crypto.eor3u.v16i8 +// +void test_veor3_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) { + uint8x16_t result = veor3q_u8(a, b, c); +} + +// CHECK-LABEL: @test_veor3_u16( +// CHECK: call <8 x i16> @llvm.aarch64.crypto.eor3u.v8i16 +// +void test_veor3_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) { + uint16x8_t result = veor3q_u16(a, b, c); +} + +// CHECK-LABEL: @test_veor3_u32( +// CHECK: call <4 x i32> @llvm.aarch64.crypto.eor3u.v4i32 +// +void test_veor3_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) { + uint32x4_t result = veor3q_u32(a, b, c); +} + +// CHECK-LABEL: @test_veor3_u64( +// CHECK: call <2 x i64> @llvm.aarch64.crypto.eor3u.v2i64 +// +void test_veor3_u64(uint64x2_t a, uint64x2_t b, uint64x2_t c) { + uint64x2_t result = veor3q_u64(a, b, c); +} + +// CHECK-LABEL: @test_veor3_s8( +// CHECK: call <16 x i8> @llvm.aarch64.crypto.eor3s.v16i8 +// +void test_veor3_s8(int8x16_t a, int8x16_t b, int8x16_t c) { + int8x16_t result = veor3q_s8(a, b, c); +} + +// CHECK-LABEL: @test_veor3_s16( +// CHECK: call <8 x i16> @llvm.aarch64.crypto.eor3s.v8i16 +// +void test_veor3_s16(int16x8_t a, int16x8_t b, int16x8_t c) { + int16x8_t result = veor3q_s16(a, b, c); +} + +// CHECK-LABEL: @test_veor3_s32( +// CHECK: call <4 x i32> @llvm.aarch64.crypto.eor3s.v4i32 +// +void test_veor3_s32(int32x4_t a, int32x4_t b, int32x4_t c) { + int32x4_t result = veor3q_s32(a, b, c); +} + +// CHECK-LABEL: @test_veor3_s64( +// CHECK: call <2 x i64> @llvm.aarch64.crypto.eor3s.v2i64 +// +void test_veor3_s64(int64x2_t a, int64x2_t b, int64x2_t c) { + int64x2_t result = veor3q_s64(a, b, c); +} diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp index ba952f220037..f0da1a7d2f4e 100644 --- a/clang/utils/TableGen/NeonEmitter.cpp +++ b/clang/utils/TableGen/NeonEmitter.cpp @@ -2115,7 +2115,11 @@ void NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS, std::string LowerBound, UpperBound; Record *R = Def->getRecord(); - if (R->getValueAsBit("isVCVT_N")) { + if (R->getValueAsBit("isVXAR")) { + //VXAR takes an immediate in the range [0, 63] + LowerBound = "0"; + UpperBound = "63"; + } else if (R->getValueAsBit("isVCVT_N")) { // VCVT between floating- and fixed-point values takes an immediate // in the range [1, 32) for f32 or [1, 64) for f64 or [1, 16) for f16. LowerBound = "1"; diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 9ee478c485eb..4d9a0682c477 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -715,6 +715,31 @@ let TargetPrefix = "aarch64" in { : DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; + // SHA512 intrinsic taking 2 arguments + class Crypto_SHA512_2Arg_Intrinsic + : DefaultAttrsIntrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + + // SHA512 intrinsic taking 3 Arguments + class Crypto_SHA512_3Arg_Intrinsic + : DefaultAttrsIntrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + + // SHA3 Intrinsics taking 3 arguments + class Crypto_SHA3_3Arg_Intrinsic + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem]>; + + // SHA3 Intrinsic taking 2 arguments + class Crypto_SHA3_2Arg_Intrinsic + : DefaultAttrsIntrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + + // SHA3 Intrinsic taking 3 Arguments 1 immediate + class Crypto_SHA3_2ArgImm_Intrinsic + : DefaultAttrsIntrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i64_ty], + [IntrNoMem, ImmArg>]>; + class Crypto_SM3_3Vector_Intrinsic : Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; @@ -748,6 +773,20 @@ def int_aarch64_crypto_sha256h2 : Crypto_SHA_8Hash4Schedule_Intrinsic; def int_aarch64_crypto_sha256su0 : Crypto_SHA_8Schedule_Intrinsic; def int_aarch64_crypto_sha256su1 : Crypto_SHA_12Schedule_Intrinsic; +//SHA3 +def int_aarch64_crypto_eor3s : Crypto_SHA3_3Arg_Intrinsic; +def int_aarch64_crypto_eor3u : Crypto_SHA3_3Arg_Intrinsic; +def int_aarch64_crypto_bcaxs : Crypto_SHA3_3Arg_Intrinsic; +def int_aarch64_crypto_bcaxu : Crypto_SHA3_3Arg_Intrinsic; +def int_aarch64_crypto_rax1 : Crypto_SHA3_2Arg_Intrinsic; +def int_aarch64_crypto_xar : Crypto_SHA3_2ArgImm_Intrinsic; + +// SHA512 +def int_aarch64_crypto_sha512h : Crypto_SHA512_3Arg_Intrinsic; +def int_aarch64_crypto_sha512h2 : Crypto_SHA512_3Arg_Intrinsic; +def int_aarch64_crypto_sha512su0 : Crypto_SHA512_2Arg_Intrinsic; +def int_aarch64_crypto_sha512su1 : Crypto_SHA512_3Arg_Intrinsic; + //SM3 & SM4 def int_aarch64_crypto_sm3partw1 : Crypto_SM3_3Vector_Intrinsic; def int_aarch64_crypto_sm3partw2 : Crypto_SM3_3Vector_Intrinsic; diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index b96aa42b4f65..c8545643bdc9 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -890,6 +890,12 @@ def imm0_63 : Operand, ImmLeaf, TImmLeaf { + let ParserMatchClass = Imm0_63Operand; +} + // imm0_31 predicate - True if the immediate is in the range [0,31] def imm0_31 : Operand, ImmLeaf; def EOR3 : CryptoRRRR_16B<0b00, "eor3">; def BCAX : CryptoRRRR_16B<0b01, "bcax">; def XAR : CryptoRRRi6<"xar">; + +class SHA3_pattern + : Pat<(VecTy (OpNode (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))), + (INST (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))>; + +def : Pat<(v2i64 (int_aarch64_crypto_sha512su0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))), + (SHA512SU0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>; + +def : SHA3_pattern; +def : SHA3_pattern; +def : SHA3_pattern; + +def : SHA3_pattern; +def : SHA3_pattern; +def : SHA3_pattern; +def : SHA3_pattern; + +def : SHA3_pattern; +def : SHA3_pattern; +def : SHA3_pattern; +def : SHA3_pattern; + +def : SHA3_pattern; +def : SHA3_pattern; +def : SHA3_pattern; +def : SHA3_pattern; + +def : SHA3_pattern; +def : SHA3_pattern; +def : SHA3_pattern; +def : SHA3_pattern; + +def : Pat<(v2i64 (int_aarch64_crypto_rax1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))), + (RAX1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>; + +def : Pat<(v2i64 (int_aarch64_crypto_xar (v2i64 V128:$Vn), (v2i64 V128:$Vm), (i64 timm0_63:$imm))), + (XAR (v2i64 V128:$Vn), (v2i64 V128:$Vm), (timm0_63:$imm))>; + + } // HasSHA3 let Predicates = [HasSM4] in { diff --git a/llvm/test/CodeGen/AArch64/neon-sha3.ll b/llvm/test/CodeGen/AArch64/neon-sha3.ll new file mode 100644 index 000000000000..ec99cf03e108 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/neon-sha3.ll @@ -0,0 +1,246 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc %s -mtriple=aarch64 -mattr=+v8.3a,+sha3 -o - | FileCheck %s + +define <2 x i64> @test_vsha512h(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { +; CHECK-LABEL: test_vsha512h: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sha512h q0, q1, v2.2d +; CHECK-NEXT: ret +entry: + %vsha512h.i = tail call <2 x i64> @llvm.aarch64.crypto.sha512h(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) + ret <2 x i64> %vsha512h.i +} + +define <2 x i64> @test_vsha512h2(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { +; CHECK-LABEL: test_vsha512h2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sha512h2 q0, q1, v2.2d +; CHECK-NEXT: ret +entry: + %vsha512h2.i = tail call <2 x i64> @llvm.aarch64.crypto.sha512h2(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) + ret <2 x i64> %vsha512h2.i +} + +define <2 x i64> @test_vsha512su0(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vsha512su0: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sha512su0 v0.2d, v1.2d +; CHECK-NEXT: ret +entry: + %vsha512su0.i = tail call <2 x i64> @llvm.aarch64.crypto.sha512su0(<2 x i64> %a, <2 x i64> %b) + ret <2 x i64> %vsha512su0.i +} + +define <2 x i64> @test_vsha512su1(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { +; CHECK-LABEL: test_vsha512su1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sha512su1 v0.2d, v1.2d, v2.2d +; CHECK-NEXT: ret +entry: + %vsha512su1.i = tail call <2 x i64> @llvm.aarch64.crypto.sha512su1(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) + ret <2 x i64> %vsha512su1.i +} + +define <2 x i64> @test_vrax1(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vrax1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: rax1 v0.2d, v0.2d, v1.2d +; CHECK-NEXT: ret +entry: + %vrax1.i = tail call <2 x i64> @llvm.aarch64.crypto.rax1(<2 x i64> %a, <2 x i64> %b) + ret <2 x i64> %vrax1.i +} + +define <2 x i64> @test_vxar(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vxar: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: xar v0.2d, v0.2d, v1.2d, #1 +; CHECK-NEXT: ret +entry: + %vxar.i = tail call <2 x i64> @llvm.aarch64.crypto.xar(<2 x i64> %a, <2 x i64> %b, i64 1) + ret <2 x i64> %vxar.i +} + +define <16 x i8> @test_bcax_8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { +; CHECK-LABEL: test_bcax_8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: bcax v0.16b, v0.16b, v1.16b, v2.16b +; CHECK-NEXT: ret +entry: + %vbcax_8.i = tail call <16 x i8> @llvm.aarch64.crypto.bcaxu.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) + ret <16 x i8> %vbcax_8.i +} + +define <16 x i8> @test_eor3_8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { +; CHECK-LABEL: test_eor3_8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: eor3 v0.16b, v0.16b, v1.16b, v2.16b +; CHECK-NEXT: ret +entry: + %veor3_8.i = tail call <16 x i8> @llvm.aarch64.crypto.eor3u.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) + ret <16 x i8> %veor3_8.i +} + +define <16 x i8> @test_bcax_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { +; CHECK-LABEL: test_bcax_s8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: bcax v0.16b, v0.16b, v1.16b, v2.16b +; CHECK-NEXT: ret +entry: + %vbcax_8.i = tail call <16 x i8> @llvm.aarch64.crypto.bcaxs.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) + ret <16 x i8> %vbcax_8.i +} + +define <16 x i8> @test_eor3_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { +; CHECK-LABEL: test_eor3_s8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: eor3 v0.16b, v0.16b, v1.16b, v2.16b +; CHECK-NEXT: ret +entry: + %veor3_8.i = tail call <16 x i8> @llvm.aarch64.crypto.eor3s.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) + ret <16 x i8> %veor3_8.i +} + +define <8 x i16> @test_bcax_16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) { +; CHECK-LABEL: test_bcax_16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: bcax v0.16b, v0.16b, v1.16b, v2.16b +; CHECK-NEXT: ret +entry: + %vbcax_16.i = tail call <8 x i16> @llvm.aarch64.crypto.bcaxu.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) + ret <8 x i16> %vbcax_16.i +} + +define <8 x i16> @test_eor3_16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) { +; CHECK-LABEL: test_eor3_16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: eor3 v0.16b, v0.16b, v1.16b, v2.16b +; CHECK-NEXT: ret +entry: + %veor3_16.i = tail call <8 x i16> @llvm.aarch64.crypto.eor3u.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) + ret <8 x i16> %veor3_16.i +} + +define <8 x i16> @test_bcax_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) { +; CHECK-LABEL: test_bcax_s16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: bcax v0.16b, v0.16b, v1.16b, v2.16b +; CHECK-NEXT: ret +entry: + %vbcax_16.i = tail call <8 x i16> @llvm.aarch64.crypto.bcaxs.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) + ret <8 x i16> %vbcax_16.i +} + +define <8 x i16> @test_eor3_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) { +; CHECK-LABEL: test_eor3_s16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: eor3 v0.16b, v0.16b, v1.16b, v2.16b +; CHECK-NEXT: ret +entry: + %veor3_16.i = tail call <8 x i16> @llvm.aarch64.crypto.eor3s.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) + ret <8 x i16> %veor3_16.i +} + +define <4 x i32> @test_bcax_32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: test_bcax_32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: bcax v0.16b, v0.16b, v1.16b, v2.16b +; CHECK-NEXT: ret +entry: + %vbcax_32.i = tail call <4 x i32> @llvm.aarch64.crypto.bcaxu.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) + ret <4 x i32> %vbcax_32.i +} + +define <4 x i32> @test_eor3_32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: test_eor3_32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: eor3 v0.16b, v0.16b, v1.16b, v2.16b +; CHECK-NEXT: ret +entry: + %veor3_32.i = tail call <4 x i32> @llvm.aarch64.crypto.eor3u.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) + ret <4 x i32> %veor3_32.i +} + +define <4 x i32> @test_bcax_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: test_bcax_s32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: bcax v0.16b, v0.16b, v1.16b, v2.16b +; CHECK-NEXT: ret +entry: + %vbcax_32.i = tail call <4 x i32> @llvm.aarch64.crypto.bcaxs.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) + ret <4 x i32> %vbcax_32.i +} + +define <4 x i32> @test_eor3_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: test_eor3_s32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: eor3 v0.16b, v0.16b, v1.16b, v2.16b +; CHECK-NEXT: ret +entry: + %veor3_32.i = tail call <4 x i32> @llvm.aarch64.crypto.eor3s.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) + ret <4 x i32> %veor3_32.i +} + +define <2 x i64> @test_bcax_64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { +; CHECK-LABEL: test_bcax_64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: bcax v0.16b, v0.16b, v1.16b, v2.16b +; CHECK-NEXT: ret +entry: + %vbcax_64.i = tail call <2 x i64> @llvm.aarch64.crypto.bcaxu.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) + ret <2 x i64> %vbcax_64.i +} + +define <2 x i64> @test_eor3_64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { +; CHECK-LABEL: test_eor3_64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: eor3 v0.16b, v0.16b, v1.16b, v2.16b +; CHECK-NEXT: ret +entry: + %veor3_64.i = tail call <2 x i64> @llvm.aarch64.crypto.eor3u.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) + ret <2 x i64> %veor3_64.i +} + +define <2 x i64> @test_bcax_s64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { +; CHECK-LABEL: test_bcax_s64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: bcax v0.16b, v0.16b, v1.16b, v2.16b +; CHECK-NEXT: ret +entry: + %vbcax_64.i = tail call <2 x i64> @llvm.aarch64.crypto.bcaxs.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) + ret <2 x i64> %vbcax_64.i +} + +define <2 x i64> @test_eor3_s64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { +; CHECK-LABEL: test_eor3_s64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: eor3 v0.16b, v0.16b, v1.16b, v2.16b +; CHECK-NEXT: ret +entry: + %veor3_64.i = tail call <2 x i64> @llvm.aarch64.crypto.eor3s.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) + ret <2 x i64> %veor3_64.i +} + +declare <2 x i64> @llvm.aarch64.crypto.sha512h(<2 x i64>, <2 x i64>, <2 x i64>) +declare <2 x i64> @llvm.aarch64.crypto.sha512h2(<2 x i64>, <2 x i64>, <2 x i64>) +declare <2 x i64> @llvm.aarch64.crypto.sha512su0(<2 x i64>, <2 x i64>) +declare <2 x i64> @llvm.aarch64.crypto.sha512su1(<2 x i64>, <2 x i64>, <2 x i64>) +declare <2 x i64> @llvm.aarch64.crypto.rax1(<2 x i64>, <2 x i64>) +declare <2 x i64> @llvm.aarch64.crypto.xar(<2 x i64>, <2 x i64>, i64 immarg) +declare <16 x i8> @llvm.aarch64.crypto.bcaxu.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) +declare <8 x i16> @llvm.aarch64.crypto.bcaxu.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) +declare <4 x i32> @llvm.aarch64.crypto.bcaxu.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) +declare <2 x i64> @llvm.aarch64.crypto.bcaxu.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) +declare <16 x i8> @llvm.aarch64.crypto.bcaxs.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) +declare <8 x i16> @llvm.aarch64.crypto.bcaxs.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) +declare <4 x i32> @llvm.aarch64.crypto.bcaxs.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) +declare <2 x i64> @llvm.aarch64.crypto.bcaxs.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) +declare <16 x i8> @llvm.aarch64.crypto.eor3u.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) +declare <8 x i16> @llvm.aarch64.crypto.eor3u.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) +declare <4 x i32> @llvm.aarch64.crypto.eor3u.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) +declare <2 x i64> @llvm.aarch64.crypto.eor3u.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) +declare <16 x i8> @llvm.aarch64.crypto.eor3s.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) +declare <8 x i16> @llvm.aarch64.crypto.eor3s.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) +declare <4 x i32> @llvm.aarch64.crypto.eor3s.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) +declare <2 x i64> @llvm.aarch64.crypto.eor3s.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) +