forked from OSchip/llvm-project
[InstCombine] Combine XOR and AES instructions on ARM/ARM64.
The ARM/ARM64 AESE and AESD instructions have a builtin XOR as the first step in the instruction. Therefore, if the AES key is zero and the AES data was previously XORed, it can be combined into a single instruction. Differential Revision: https://reviews.llvm.org/D47239 Patch by Michael Brase! llvm-svn: 333193
This commit is contained in:
parent
451f6c8680
commit
274d72faad
|
@ -2966,6 +2966,23 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
|||
|
||||
break;
|
||||
}
|
||||
case Intrinsic::arm_neon_aesd:
|
||||
case Intrinsic::arm_neon_aese:
|
||||
case Intrinsic::aarch64_crypto_aesd:
|
||||
case Intrinsic::aarch64_crypto_aese: {
|
||||
Value *DataArg = II->getArgOperand(0);
|
||||
Value *KeyArg = II->getArgOperand(1);
|
||||
|
||||
// Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR
|
||||
Value *Data, *Key;
|
||||
if (match(KeyArg, m_ZeroInt()) &&
|
||||
match(DataArg, m_Xor(m_Value(Data), m_Value(Key)))) {
|
||||
II->setArgOperand(0, Data);
|
||||
II->setArgOperand(1, Key);
|
||||
return II;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Intrinsic::amdgcn_rcp: {
|
||||
Value *Src = II->getArgOperand(0);
|
||||
|
||||
|
|
|
@ -0,0 +1,44 @@
|
|||
; RUN: opt -S -instcombine < %s | FileCheck %s
|
||||
; ARM64 AES intrinsic variants
|
||||
|
||||
define <16 x i8> @combineXorAeseZeroARM64(<16 x i8> %data, <16 x i8> %key) {
|
||||
; CHECK-LABEL: @combineXorAeseZeroARM64(
|
||||
; CHECK-NEXT: %data.aes = tail call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %data, <16 x i8> %key)
|
||||
; CHECK-NEXT: ret <16 x i8> %data.aes
|
||||
%data.xor = xor <16 x i8> %data, %key
|
||||
%data.aes = tail call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %data.xor, <16 x i8> zeroinitializer)
|
||||
ret <16 x i8> %data.aes
|
||||
}
|
||||
|
||||
define <16 x i8> @combineXorAeseNonZeroARM64(<16 x i8> %data, <16 x i8> %key) {
|
||||
; CHECK-LABEL: @combineXorAeseNonZeroARM64(
|
||||
; CHECK-NEXT: %data.xor = xor <16 x i8> %data, %key
|
||||
; CHECK-NEXT: %data.aes = tail call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %data.xor, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
|
||||
; CHECK-NEXT: ret <16 x i8> %data.aes
|
||||
%data.xor = xor <16 x i8> %data, %key
|
||||
%data.aes = tail call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %data.xor, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
|
||||
ret <16 x i8> %data.aes
|
||||
}
|
||||
|
||||
define <16 x i8> @combineXorAesdZeroARM64(<16 x i8> %data, <16 x i8> %key) {
|
||||
; CHECK-LABEL: @combineXorAesdZeroARM64(
|
||||
; CHECK-NEXT: %data.aes = tail call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %data, <16 x i8> %key)
|
||||
; CHECK-NEXT: ret <16 x i8> %data.aes
|
||||
%data.xor = xor <16 x i8> %data, %key
|
||||
%data.aes = tail call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %data.xor, <16 x i8> zeroinitializer)
|
||||
ret <16 x i8> %data.aes
|
||||
}
|
||||
|
||||
define <16 x i8> @combineXorAesdNonZeroARM64(<16 x i8> %data, <16 x i8> %key) {
|
||||
; CHECK-LABEL: @combineXorAesdNonZeroARM64(
|
||||
; CHECK-NEXT: %data.xor = xor <16 x i8> %data, %key
|
||||
; CHECK-NEXT: %data.aes = tail call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %data.xor, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
|
||||
; CHECK-NEXT: ret <16 x i8> %data.aes
|
||||
%data.xor = xor <16 x i8> %data, %key
|
||||
%data.aes = tail call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %data.xor, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
|
||||
ret <16 x i8> %data.aes
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8>, <16 x i8>) #0
|
||||
declare <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8>, <16 x i8>) #0
|
||||
|
|
@ -0,0 +1,43 @@
|
|||
; RUN: opt -S -instcombine < %s | FileCheck %s
|
||||
; ARM AES intrinsic variants
|
||||
|
||||
define <16 x i8> @combineXorAeseZeroARM(<16 x i8> %data, <16 x i8> %key) {
|
||||
; CHECK-LABEL: @combineXorAeseZeroARM(
|
||||
; CHECK-NEXT: %data.aes = tail call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %data, <16 x i8> %key)
|
||||
; CHECK-NEXT: ret <16 x i8> %data.aes
|
||||
%data.xor = xor <16 x i8> %data, %key
|
||||
%data.aes = tail call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %data.xor, <16 x i8> zeroinitializer)
|
||||
ret <16 x i8> %data.aes
|
||||
}
|
||||
|
||||
define <16 x i8> @combineXorAeseNonZeroARM(<16 x i8> %data, <16 x i8> %key) {
|
||||
; CHECK-LABEL: @combineXorAeseNonZeroARM(
|
||||
; CHECK-NEXT: %data.xor = xor <16 x i8> %data, %key
|
||||
; CHECK-NEXT: %data.aes = tail call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %data.xor, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
|
||||
; CHECK-NEXT: ret <16 x i8> %data.aes
|
||||
%data.xor = xor <16 x i8> %data, %key
|
||||
%data.aes = tail call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %data.xor, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
|
||||
ret <16 x i8> %data.aes
|
||||
}
|
||||
|
||||
define <16 x i8> @combineXorAesdZeroARM(<16 x i8> %data, <16 x i8> %key) {
|
||||
; CHECK-LABEL: @combineXorAesdZeroARM(
|
||||
; CHECK-NEXT: %data.aes = tail call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %data, <16 x i8> %key)
|
||||
; CHECK-NEXT: ret <16 x i8> %data.aes
|
||||
%data.xor = xor <16 x i8> %data, %key
|
||||
%data.aes = tail call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %data.xor, <16 x i8> zeroinitializer)
|
||||
ret <16 x i8> %data.aes
|
||||
}
|
||||
|
||||
define <16 x i8> @combineXorAesdNonZeroARM(<16 x i8> %data, <16 x i8> %key) {
|
||||
; CHECK-LABEL: @combineXorAesdNonZeroARM(
|
||||
; CHECK-NEXT: %data.xor = xor <16 x i8> %data, %key
|
||||
; CHECK-NEXT: %data.aes = tail call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %data.xor, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
|
||||
; CHECK-NEXT: ret <16 x i8> %data.aes
|
||||
%data.xor = xor <16 x i8> %data, %key
|
||||
%data.aes = tail call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %data.xor, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
|
||||
ret <16 x i8> %data.aes
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.arm.neon.aese(<16 x i8>, <16 x i8>) #0
|
||||
declare <16 x i8> @llvm.arm.neon.aesd(<16 x i8>, <16 x i8>) #0
|
Loading…
Reference in New Issue