forked from OSchip/llvm-project
Big-endian version of vpermxor
A big-endian version of vpermxor, named vpermxor_be, is added to LLVM and Clang. vpermxor_be can be called directly on both the little-endian and the big-endian platforms. Reviewed By: nemanjai Differential Revision: https://reviews.llvm.org/D114540
This commit is contained in:
parent
858eb8fc11
commit
0850655da6
|
@ -404,6 +404,7 @@ BUILTIN(__builtin_altivec_vbpermd, "V2ULLiV2ULLiV16Uc", "")
|
|||
// P8 Crypto built-ins.
|
||||
BUILTIN(__builtin_altivec_crypto_vsbox, "V2ULLiV2ULLi", "")
|
||||
BUILTIN(__builtin_altivec_crypto_vpermxor, "V16UcV16UcV16UcV16Uc", "")
|
||||
BUILTIN(__builtin_altivec_crypto_vpermxor_be, "V16UcV16UcV16UcV16Uc", "")
|
||||
BUILTIN(__builtin_altivec_crypto_vshasigmaw, "V4UiV4UiIiIi", "")
|
||||
BUILTIN(__builtin_altivec_crypto_vshasigmad, "V2ULLiV2ULLiIiIi", "")
|
||||
BUILTIN(__builtin_altivec_crypto_vcipher, "V2ULLiV2ULLiV2ULLi", "")
|
||||
|
|
|
@ -132,6 +132,62 @@ vector unsigned char test_vpermxoruc(vector unsigned char a,
|
|||
// CHECK: @llvm.ppc.altivec.crypto.vpermxor
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define{{.*}} <16 x i8> @test_vpermxorb_be
|
||||
vector unsigned char test_vpermxorb_be(vector unsigned char a,
|
||||
vector unsigned char b,
|
||||
vector unsigned char c) {
|
||||
return __builtin_altivec_crypto_vpermxor_be(a, b, c);
|
||||
// CHECK: @llvm.ppc.altivec.crypto.vpermxor.be
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define{{.*}} <8 x i16> @test_vpermxorh_be
|
||||
vector unsigned short test_vpermxorh_be(vector unsigned short a,
|
||||
vector unsigned short b,
|
||||
vector unsigned short c) {
|
||||
return __builtin_altivec_crypto_vpermxor_be(a, b, c);
|
||||
// CHECK: @llvm.ppc.altivec.crypto.vpermxor.be
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define{{.*}} <4 x i32> @test_vpermxorw_be
|
||||
vector unsigned int test_vpermxorw_be(vector unsigned int a,
|
||||
vector unsigned int b,
|
||||
vector unsigned int c) {
|
||||
return __builtin_altivec_crypto_vpermxor_be(a, b, c);
|
||||
// CHECK: @llvm.ppc.altivec.crypto.vpermxor.be
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define{{.*}} <2 x i64> @test_vpermxord_be
|
||||
vector unsigned long long test_vpermxord_be(vector unsigned long long a,
|
||||
vector unsigned long long b,
|
||||
vector unsigned long long c) {
|
||||
return __builtin_altivec_crypto_vpermxor_be(a, b, c);
|
||||
// CHECK: @llvm.ppc.altivec.crypto.vpermxor.be
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define{{.*}} <16 x i8> @test_vpermxorbc_be
|
||||
vector bool char test_vpermxorbc_be(vector bool char a,
|
||||
vector bool char b,
|
||||
vector bool char c) {
|
||||
return __builtin_altivec_crypto_vpermxor_be(a, b, c);
|
||||
// CHECK: @llvm.ppc.altivec.crypto.vpermxor.be
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define{{.*}} <16 x i8> @test_vpermxorsc_be
|
||||
vector signed char test_vpermxorsc_be(vector signed char a,
|
||||
vector signed char b,
|
||||
vector signed char c) {
|
||||
return __builtin_altivec_crypto_vpermxor_be(a, b, c);
|
||||
// CHECK: @llvm.ppc.altivec.crypto.vpermxor.be
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define{{.*}} <16 x i8> @test_vpermxoruc_be
|
||||
vector unsigned char test_vpermxoruc_be(vector unsigned char a,
|
||||
vector unsigned char b,
|
||||
vector unsigned char c) {
|
||||
return __builtin_altivec_crypto_vpermxor_be(a, b, c);
|
||||
// CHECK: @llvm.ppc.altivec.crypto.vpermxor.be
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define{{.*}} <2 x i64> @test_vcipher
|
||||
vector unsigned long long test_vcipher(void)
|
||||
{
|
||||
|
|
|
@ -1087,6 +1087,10 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
|
|||
GCCBuiltin<"__builtin_altivec_crypto_vpermxor">,
|
||||
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
|
||||
llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
|
||||
def int_ppc_altivec_crypto_vpermxor_be :
|
||||
GCCBuiltin<"__builtin_altivec_crypto_vpermxor_be">,
|
||||
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
|
||||
llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
|
||||
|
||||
def int_ppc_altivec_crypto_vshasigmad :
|
||||
GCCBuiltin<"__builtin_altivec_crypto_vshasigmad">,
|
||||
|
|
|
@ -2471,6 +2471,7 @@ def DblwdCmp {
|
|||
// [HasVSX, HasP8Vector, IsLittleEndian]
|
||||
// [HasVSX, HasP8Vector, NoP9Vector, IsBigEndian, IsPPC64]
|
||||
// [HasVSX, HasP8Vector, NoP9Vector, IsLittleEndian]
|
||||
// [HasVSX, HasP8Altivec]
|
||||
// [HasVSX, HasDirectMove]
|
||||
// [HasVSX, HasDirectMove, IsBigEndian]
|
||||
// [HasVSX, HasDirectMove, IsLittleEndian]
|
||||
|
@ -2500,6 +2501,10 @@ let Predicates = [HasVSX, IsBigEndian, HasP8Altivec] in
|
|||
def : Pat<(v16i8 (int_ppc_altivec_crypto_vpermxor v16i8:$a,
|
||||
v16i8:$b, v16i8:$c)),
|
||||
(v16i8 (VPERMXOR $a, $b, $c))>;
|
||||
let Predicates = [HasVSX, HasP8Altivec] in
|
||||
def : Pat<(v16i8 (int_ppc_altivec_crypto_vpermxor_be v16i8:$a,
|
||||
v16i8:$b, v16i8:$c)),
|
||||
(v16i8 (VPERMXOR $a, $b, $c))>;
|
||||
|
||||
let AddedComplexity = 400 in {
|
||||
// Valid for any VSX subtarget, regardless of endianness.
|
||||
|
|
|
@ -0,0 +1,165 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
|
||||
; RUN: -mcpu=pwr8 < %s | FileCheck %s --check-prefixes=CHECK-LE-P8
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
|
||||
; RUN: -mcpu=pwr9 < %s | FileCheck %s --check-prefixes=CHECK-P9
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
|
||||
; RUN: -mcpu=pwr8 < %s | FileCheck %s --check-prefixes=CHECK-BE-P8
|
||||
|
||||
define <16 x i8> @test_vpermxorb() local_unnamed_addr {
|
||||
; CHECK-LE-P8-LABEL: test_vpermxorb:
|
||||
; CHECK-LE-P8: # %bb.0: # %entry
|
||||
; CHECK-LE-P8-NEXT: addis 3, 2, .LCPI0_0@toc@ha
|
||||
; CHECK-LE-P8-NEXT: addis 4, 2, .LCPI0_1@toc@ha
|
||||
; CHECK-LE-P8-NEXT: addi 3, 3, .LCPI0_0@toc@l
|
||||
; CHECK-LE-P8-NEXT: lvx 2, 0, 3
|
||||
; CHECK-LE-P8-NEXT: addi 3, 4, .LCPI0_1@toc@l
|
||||
; CHECK-LE-P8-NEXT: lvx 3, 0, 3
|
||||
; CHECK-LE-P8-NEXT: vpermxor 2, 3, 2, 2
|
||||
; CHECK-LE-P8-NEXT: blr
|
||||
;
|
||||
; CHECK-P9-LABEL: test_vpermxorb:
|
||||
; CHECK-P9: # %bb.0: # %entry
|
||||
; CHECK-P9-NEXT: addis 3, 2, .LCPI0_0@toc@ha
|
||||
; CHECK-P9-NEXT: addi 3, 3, .LCPI0_0@toc@l
|
||||
; CHECK-P9-NEXT: lxv 34, 0(3)
|
||||
; CHECK-P9-NEXT: addis 3, 2, .LCPI0_1@toc@ha
|
||||
; CHECK-P9-NEXT: addi 3, 3, .LCPI0_1@toc@l
|
||||
; CHECK-P9-NEXT: lxv 35, 0(3)
|
||||
; CHECK-P9-NEXT: vpermxor 2, 3, 2, 2
|
||||
; CHECK-P9-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-P8-LABEL: test_vpermxorb:
|
||||
; CHECK-BE-P8: # %bb.0: # %entry
|
||||
; CHECK-BE-P8-NEXT: addis 3, 2, .LCPI0_0@toc@ha
|
||||
; CHECK-BE-P8-NEXT: addis 4, 2, .LCPI0_1@toc@ha
|
||||
; CHECK-BE-P8-NEXT: addi 3, 3, .LCPI0_0@toc@l
|
||||
; CHECK-BE-P8-NEXT: addi 4, 4, .LCPI0_1@toc@l
|
||||
; CHECK-BE-P8-NEXT: lxvw4x 34, 0, 3
|
||||
; CHECK-BE-P8-NEXT: lxvw4x 35, 0, 4
|
||||
; CHECK-BE-P8-NEXT: vpermxor 2, 3, 2, 2
|
||||
; CHECK-BE-P8-NEXT: blr
|
||||
entry:
|
||||
%0 = tail call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor.be(<16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <16 x i8> <i8 113, i8 114, i8 115, i8 116, i8 117, i8 118, i8 119, i8 120, i8 121, i8 122, i8 123, i8 124, i8 125, i8 126, i8 127, i8 112>, <16 x i8> <i8 113, i8 114, i8 115, i8 116, i8 117, i8 118, i8 119, i8 120, i8 121, i8 122, i8 123, i8 124, i8 125, i8 126, i8 127, i8 112>)
|
||||
ret <16 x i8> %0
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.ppc.altivec.crypto.vpermxor.be(<16 x i8>, <16 x i8>, <16 x i8>)
|
||||
|
||||
define <8 x i16> @test_vpermxorh() local_unnamed_addr {
|
||||
; CHECK-LE-P8-LABEL: test_vpermxorh:
|
||||
; CHECK-LE-P8: # %bb.0: # %entry
|
||||
; CHECK-LE-P8-NEXT: addis 3, 2, .LCPI1_0@toc@ha
|
||||
; CHECK-LE-P8-NEXT: addis 4, 2, .LCPI1_1@toc@ha
|
||||
; CHECK-LE-P8-NEXT: addi 3, 3, .LCPI1_0@toc@l
|
||||
; CHECK-LE-P8-NEXT: lvx 2, 0, 3
|
||||
; CHECK-LE-P8-NEXT: addi 3, 4, .LCPI1_1@toc@l
|
||||
; CHECK-LE-P8-NEXT: lvx 3, 0, 3
|
||||
; CHECK-LE-P8-NEXT: vpermxor 2, 3, 2, 2
|
||||
; CHECK-LE-P8-NEXT: blr
|
||||
;
|
||||
; CHECK-P9-LABEL: test_vpermxorh:
|
||||
; CHECK-P9: # %bb.0: # %entry
|
||||
; CHECK-P9-NEXT: addis 3, 2, .LCPI1_0@toc@ha
|
||||
; CHECK-P9-NEXT: addi 3, 3, .LCPI1_0@toc@l
|
||||
; CHECK-P9-NEXT: lxv 34, 0(3)
|
||||
; CHECK-P9-NEXT: addis 3, 2, .LCPI1_1@toc@ha
|
||||
; CHECK-P9-NEXT: addi 3, 3, .LCPI1_1@toc@l
|
||||
; CHECK-P9-NEXT: lxv 35, 0(3)
|
||||
; CHECK-P9-NEXT: vpermxor 2, 3, 2, 2
|
||||
; CHECK-P9-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-P8-LABEL: test_vpermxorh:
|
||||
; CHECK-BE-P8: # %bb.0: # %entry
|
||||
; CHECK-BE-P8-NEXT: addis 3, 2, .LCPI1_0@toc@ha
|
||||
; CHECK-BE-P8-NEXT: addis 4, 2, .LCPI1_1@toc@ha
|
||||
; CHECK-BE-P8-NEXT: addi 3, 3, .LCPI1_0@toc@l
|
||||
; CHECK-BE-P8-NEXT: addi 4, 4, .LCPI1_1@toc@l
|
||||
; CHECK-BE-P8-NEXT: lxvw4x 34, 0, 3
|
||||
; CHECK-BE-P8-NEXT: lxvw4x 35, 0, 4
|
||||
; CHECK-BE-P8-NEXT: vpermxor 2, 3, 2, 2
|
||||
; CHECK-BE-P8-NEXT: blr
|
||||
entry:
|
||||
%0 = tail call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor.be(<16 x i8> <i8 2, i8 1, i8 4, i8 3, i8 6, i8 5, i8 8, i8 7, i8 10, i8 9, i8 12, i8 11, i8 14, i8 13, i8 16, i8 15>, <16 x i8> <i8 114, i8 113, i8 116, i8 115, i8 118, i8 117, i8 120, i8 119, i8 122, i8 121, i8 124, i8 123, i8 126, i8 125, i8 112, i8 127>, <16 x i8> <i8 114, i8 113, i8 116, i8 115, i8 118, i8 117, i8 120, i8 119, i8 122, i8 121, i8 124, i8 123, i8 126, i8 125, i8 112, i8 127>)
|
||||
%1 = bitcast <16 x i8> %0 to <8 x i16>
|
||||
ret <8 x i16> %1
|
||||
}
|
||||
|
||||
define <4 x i32> @test_vpermxorw() local_unnamed_addr {
|
||||
; CHECK-LE-P8-LABEL: test_vpermxorw:
|
||||
; CHECK-LE-P8: # %bb.0: # %entry
|
||||
; CHECK-LE-P8-NEXT: addis 3, 2, .LCPI2_0@toc@ha
|
||||
; CHECK-LE-P8-NEXT: addis 4, 2, .LCPI2_1@toc@ha
|
||||
; CHECK-LE-P8-NEXT: addi 3, 3, .LCPI2_0@toc@l
|
||||
; CHECK-LE-P8-NEXT: lvx 2, 0, 3
|
||||
; CHECK-LE-P8-NEXT: addi 3, 4, .LCPI2_1@toc@l
|
||||
; CHECK-LE-P8-NEXT: lvx 3, 0, 3
|
||||
; CHECK-LE-P8-NEXT: vpermxor 2, 3, 2, 2
|
||||
; CHECK-LE-P8-NEXT: blr
|
||||
;
|
||||
; CHECK-P9-LABEL: test_vpermxorw:
|
||||
; CHECK-P9: # %bb.0: # %entry
|
||||
; CHECK-P9-NEXT: addis 3, 2, .LCPI2_0@toc@ha
|
||||
; CHECK-P9-NEXT: addi 3, 3, .LCPI2_0@toc@l
|
||||
; CHECK-P9-NEXT: lxv 34, 0(3)
|
||||
; CHECK-P9-NEXT: addis 3, 2, .LCPI2_1@toc@ha
|
||||
; CHECK-P9-NEXT: addi 3, 3, .LCPI2_1@toc@l
|
||||
; CHECK-P9-NEXT: lxv 35, 0(3)
|
||||
; CHECK-P9-NEXT: vpermxor 2, 3, 2, 2
|
||||
; CHECK-P9-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-P8-LABEL: test_vpermxorw:
|
||||
; CHECK-BE-P8: # %bb.0: # %entry
|
||||
; CHECK-BE-P8-NEXT: addis 3, 2, .LCPI2_0@toc@ha
|
||||
; CHECK-BE-P8-NEXT: addis 4, 2, .LCPI2_1@toc@ha
|
||||
; CHECK-BE-P8-NEXT: addi 3, 3, .LCPI2_0@toc@l
|
||||
; CHECK-BE-P8-NEXT: addi 4, 4, .LCPI2_1@toc@l
|
||||
; CHECK-BE-P8-NEXT: lxvw4x 34, 0, 3
|
||||
; CHECK-BE-P8-NEXT: lxvw4x 35, 0, 4
|
||||
; CHECK-BE-P8-NEXT: vpermxor 2, 3, 2, 2
|
||||
; CHECK-BE-P8-NEXT: blr
|
||||
entry:
|
||||
%0 = tail call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor.be(<16 x i8> <i8 4, i8 3, i8 2, i8 1, i8 8, i8 7, i8 6, i8 5, i8 12, i8 11, i8 10, i8 9, i8 16, i8 15, i8 14, i8 13>, <16 x i8> <i8 116, i8 115, i8 114, i8 113, i8 120, i8 119, i8 118, i8 117, i8 124, i8 123, i8 122, i8 121, i8 112, i8 127, i8 126, i8 125>, <16 x i8> <i8 116, i8 115, i8 114, i8 113, i8 120, i8 119, i8 118, i8 117, i8 124, i8 123, i8 122, i8 121, i8 112, i8 127, i8 126, i8 125>)
|
||||
%1 = bitcast <16 x i8> %0 to <4 x i32>
|
||||
ret <4 x i32> %1
|
||||
}
|
||||
|
||||
define <2 x i64> @test_vpermxord() local_unnamed_addr {
|
||||
; CHECK-LE-P8-LABEL: test_vpermxord:
|
||||
; CHECK-LE-P8: # %bb.0: # %entry
|
||||
; CHECK-LE-P8-NEXT: addis 3, 2, .LCPI3_0@toc@ha
|
||||
; CHECK-LE-P8-NEXT: addis 4, 2, .LCPI3_1@toc@ha
|
||||
; CHECK-LE-P8-NEXT: addi 3, 3, .LCPI3_0@toc@l
|
||||
; CHECK-LE-P8-NEXT: lvx 2, 0, 3
|
||||
; CHECK-LE-P8-NEXT: addi 3, 4, .LCPI3_1@toc@l
|
||||
; CHECK-LE-P8-NEXT: lvx 3, 0, 3
|
||||
; CHECK-LE-P8-NEXT: vpermxor 2, 3, 2, 2
|
||||
; CHECK-LE-P8-NEXT: blr
|
||||
;
|
||||
; CHECK-P9-LABEL: test_vpermxord:
|
||||
; CHECK-P9: # %bb.0: # %entry
|
||||
; CHECK-P9-NEXT: addis 3, 2, .LCPI3_0@toc@ha
|
||||
; CHECK-P9-NEXT: addi 3, 3, .LCPI3_0@toc@l
|
||||
; CHECK-P9-NEXT: lxv 34, 0(3)
|
||||
; CHECK-P9-NEXT: addis 3, 2, .LCPI3_1@toc@ha
|
||||
; CHECK-P9-NEXT: addi 3, 3, .LCPI3_1@toc@l
|
||||
; CHECK-P9-NEXT: lxv 35, 0(3)
|
||||
; CHECK-P9-NEXT: vpermxor 2, 3, 2, 2
|
||||
; CHECK-P9-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-P8-LABEL: test_vpermxord:
|
||||
; CHECK-BE-P8: # %bb.0: # %entry
|
||||
; CHECK-BE-P8-NEXT: addis 3, 2, .LCPI3_0@toc@ha
|
||||
; CHECK-BE-P8-NEXT: addis 4, 2, .LCPI3_1@toc@ha
|
||||
; CHECK-BE-P8-NEXT: addi 3, 3, .LCPI3_0@toc@l
|
||||
; CHECK-BE-P8-NEXT: addi 4, 4, .LCPI3_1@toc@l
|
||||
; CHECK-BE-P8-NEXT: lxvw4x 34, 0, 3
|
||||
; CHECK-BE-P8-NEXT: lxvw4x 35, 0, 4
|
||||
; CHECK-BE-P8-NEXT: vpermxor 2, 3, 2, 2
|
||||
; CHECK-BE-P8-NEXT: blr
|
||||
entry:
|
||||
%0 = tail call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor.be(<16 x i8> <i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9>, <16 x i8> <i8 120, i8 119, i8 118, i8 117, i8 116, i8 115, i8 114, i8 113, i8 112, i8 127, i8 126, i8 125, i8 124, i8 123, i8 122, i8 121>, <16 x i8> <i8 120, i8 119, i8 118, i8 117, i8 116, i8 115, i8 114, i8 113, i8 112, i8 127, i8 126, i8 125, i8 124, i8 123, i8 122, i8 121>)
|
||||
%1 = bitcast <16 x i8> %0 to <2 x i64>
|
||||
ret <2 x i64> %1
|
||||
}
|
||||
|
Loading…
Reference in New Issue