From 274d72faad6741613f74a17ad9a4a516826722ab Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Thu, 24 May 2018 15:26:42 +0000 Subject: [PATCH] [InstCombine] Combine XOR and AES instructions on ARM/ARM64. The ARM/ARM64 AESE and AESD instructions have a builtin XOR as the first step in the instruction. Therefore, if the AES key is zero and the AES data was previously XORed, it can be combined into a single instruction. Differential Revision: https://reviews.llvm.org/D47239 Patch by Michael Brase! llvm-svn: 333193 --- .../InstCombine/InstCombineCalls.cpp | 17 +++++++ .../InstCombine/AArch64/aes-intrinsics.ll | 44 +++++++++++++++++++ .../InstCombine/ARM/aes-intrinsics.ll | 43 ++++++++++++++++++ 3 files changed, 104 insertions(+) create mode 100644 llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll create mode 100644 llvm/test/Transforms/InstCombine/ARM/aes-intrinsics.ll diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 8b7c28281ca0..a5a0edcd96f1 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2966,6 +2966,23 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } + case Intrinsic::arm_neon_aesd: + case Intrinsic::arm_neon_aese: + case Intrinsic::aarch64_crypto_aesd: + case Intrinsic::aarch64_crypto_aese: { + Value *DataArg = II->getArgOperand(0); + Value *KeyArg = II->getArgOperand(1); + + // Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR + Value *Data, *Key; + if (match(KeyArg, m_ZeroInt()) && + match(DataArg, m_Xor(m_Value(Data), m_Value(Key)))) { + II->setArgOperand(0, Data); + II->setArgOperand(1, Key); + return II; + } + break; + } case Intrinsic::amdgcn_rcp: { Value *Src = II->getArgOperand(0); diff --git a/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll b/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll new file mode 100644 index 000000000000..c38385907be9 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll @@ -0,0 +1,44 @@ +; RUN: opt -S -instcombine < %s | FileCheck %s +; ARM64 AES intrinsic variants + +define <16 x i8> @combineXorAeseZeroARM64(<16 x i8> %data, <16 x i8> %key) { +; CHECK-LABEL: @combineXorAeseZeroARM64( +; CHECK-NEXT: %data.aes = tail call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %data, <16 x i8> %key) +; CHECK-NEXT: ret <16 x i8> %data.aes + %data.xor = xor <16 x i8> %data, %key + %data.aes = tail call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %data.xor, <16 x i8> zeroinitializer) + ret <16 x i8> %data.aes +} + +define <16 x i8> @combineXorAeseNonZeroARM64(<16 x i8> %data, <16 x i8> %key) { +; CHECK-LABEL: @combineXorAeseNonZeroARM64( +; CHECK-NEXT: %data.xor = xor <16 x i8> %data, %key +; CHECK-NEXT: %data.aes = tail call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %data.xor, <16 x i8> ) +; CHECK-NEXT: ret <16 x i8> %data.aes + %data.xor = xor <16 x i8> %data, %key + %data.aes = tail call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %data.xor, <16 x i8> ) + ret <16 x i8> %data.aes +} + +define <16 x i8> @combineXorAesdZeroARM64(<16 x i8> %data, <16 x i8> %key) { +; CHECK-LABEL: @combineXorAesdZeroARM64( +; CHECK-NEXT: %data.aes = tail call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %data, <16 x i8> %key) +; CHECK-NEXT: ret <16 x i8> %data.aes + %data.xor = xor <16 x i8> %data, %key + %data.aes = tail call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %data.xor, <16 x i8> zeroinitializer) + ret <16 x i8> %data.aes +} + +define <16 x i8> @combineXorAesdNonZeroARM64(<16 x i8> %data, <16 x i8> %key) { +; CHECK-LABEL: @combineXorAesdNonZeroARM64( +; CHECK-NEXT: %data.xor = xor <16 x i8> %data, %key +; CHECK-NEXT: %data.aes = tail call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %data.xor, <16 x i8> ) +; CHECK-NEXT: ret <16 x i8> %data.aes + %data.xor = xor <16 x i8> %data, %key + %data.aes = tail call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %data.xor, <16 x i8> ) + ret <16 x i8> %data.aes +} + +declare <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8>, <16 x i8>) #0 +declare <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8>, <16 x i8>) #0 + diff --git a/llvm/test/Transforms/InstCombine/ARM/aes-intrinsics.ll b/llvm/test/Transforms/InstCombine/ARM/aes-intrinsics.ll new file mode 100644 index 000000000000..56eee5468010 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/ARM/aes-intrinsics.ll @@ -0,0 +1,43 @@ +; RUN: opt -S -instcombine < %s | FileCheck %s +; ARM AES intrinsic variants + +define <16 x i8> @combineXorAeseZeroARM(<16 x i8> %data, <16 x i8> %key) { +; CHECK-LABEL: @combineXorAeseZeroARM( +; CHECK-NEXT: %data.aes = tail call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %data, <16 x i8> %key) +; CHECK-NEXT: ret <16 x i8> %data.aes + %data.xor = xor <16 x i8> %data, %key + %data.aes = tail call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %data.xor, <16 x i8> zeroinitializer) + ret <16 x i8> %data.aes +} + +define <16 x i8> @combineXorAeseNonZeroARM(<16 x i8> %data, <16 x i8> %key) { +; CHECK-LABEL: @combineXorAeseNonZeroARM( +; CHECK-NEXT: %data.xor = xor <16 x i8> %data, %key +; CHECK-NEXT: %data.aes = tail call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %data.xor, <16 x i8> ) +; CHECK-NEXT: ret <16 x i8> %data.aes + %data.xor = xor <16 x i8> %data, %key + %data.aes = tail call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %data.xor, <16 x i8> ) + ret <16 x i8> %data.aes +} + +define <16 x i8> @combineXorAesdZeroARM(<16 x i8> %data, <16 x i8> %key) { +; CHECK-LABEL: @combineXorAesdZeroARM( +; CHECK-NEXT: %data.aes = tail call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %data, <16 x i8> %key) +; CHECK-NEXT: ret <16 x i8> %data.aes + %data.xor = xor <16 x i8> %data, %key + %data.aes = tail call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %data.xor, <16 x i8> zeroinitializer) + ret <16 x i8> %data.aes +} + +define <16 x i8> @combineXorAesdNonZeroARM(<16 x i8> %data, <16 x i8> %key) { +; CHECK-LABEL: @combineXorAesdNonZeroARM( +; CHECK-NEXT: %data.xor = xor <16 x i8> %data, %key +; CHECK-NEXT: %data.aes = tail call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %data.xor, <16 x i8> ) +; CHECK-NEXT: ret <16 x i8> %data.aes + %data.xor = xor <16 x i8> %data, %key + %data.aes = tail call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %data.xor, <16 x i8> ) + ret <16 x i8> %data.aes +} + +declare <16 x i8> @llvm.arm.neon.aese(<16 x i8>, <16 x i8>) #0 +declare <16 x i8> @llvm.arm.neon.aesd(<16 x i8>, <16 x i8>) #0