forked from OSchip/llvm-project
LLVM support for vector quad bit permute and gather instructions through builtins
This patch corresponds to review: http://reviews.llvm.org/D10096 This is the back end portion of the patch related to D10095. The patch adds the instructions and back end intrinsics for: vbpermq vgbbd llvm-svn: 239505
This commit is contained in:
parent
00be6d0ff8
commit
ea1db8a697
|
@ -608,6 +608,11 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
|
|||
def int_ppc_altivec_vsel : GCCBuiltin<"__builtin_altivec_vsel_4si">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
|
||||
llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
|
||||
def int_ppc_altivec_vgbbd : GCCBuiltin<"__builtin_altivec_vgbbd">,
|
||||
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
|
||||
def int_ppc_altivec_vbpermq : GCCBuiltin<"__builtin_altivec_vbpermq">,
|
||||
Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
|
||||
[IntrNoMem]>;
|
||||
}
|
||||
|
||||
def int_ppc_altivec_vexptefp : PowerPC_Vec_FF_Intrinsic<"vexptefp">;
|
||||
|
|
|
@ -1142,7 +1142,9 @@ def:Pat<(vpkudum_unary_shuffle v16i8:$vA, undef),
|
|||
def:Pat<(vpkudum_swapped_shuffle v16i8:$vA, v16i8:$vB),
|
||||
(VPKUDUM $vB, $vA)>;
|
||||
|
||||
|
||||
def VGBBD : VX2_Int_Ty2<1292, "vgbbd", int_ppc_altivec_vgbbd, v16i8, v16i8>;
|
||||
def VBPERMQ : VX1_Int_Ty2<1356, "vbpermq", int_ppc_altivec_vbpermq,
|
||||
v2i64, v16i8>;
|
||||
} // end HasP8Altivec
|
||||
|
||||
// Crypto instructions (from builtins)
|
||||
|
|
|
@ -0,0 +1,91 @@
|
|||
; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s
|
||||
; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s
|
||||
; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+power8-vector -mattr=-vsx < %s | FileCheck %s
|
||||
; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-VSX
|
||||
|
||||
@vsc = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5>, align 16
|
||||
@vuc = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5>, align 16
|
||||
@res_vll = common global <2 x i64> zeroinitializer, align 16
|
||||
@res_vull = common global <2 x i64> zeroinitializer, align 16
|
||||
@res_vsc = common global <16 x i8> zeroinitializer, align 16
|
||||
@res_vuc = common global <16 x i8> zeroinitializer, align 16
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define void @test1() {
|
||||
entry:
|
||||
%__a.addr.i = alloca <16 x i8>, align 16
|
||||
%__b.addr.i = alloca <16 x i8>, align 16
|
||||
%0 = load <16 x i8>, <16 x i8>* @vsc, align 16
|
||||
%1 = load <16 x i8>, <16 x i8>* @vsc, align 16
|
||||
store <16 x i8> %0, <16 x i8>* %__a.addr.i, align 16
|
||||
store <16 x i8> %1, <16 x i8>* %__b.addr.i, align 16
|
||||
%2 = load <16 x i8>, <16 x i8>* %__a.addr.i, align 16
|
||||
%3 = load <16 x i8>, <16 x i8>* %__b.addr.i, align 16
|
||||
%4 = call <2 x i64> @llvm.ppc.altivec.vbpermq(<16 x i8> %2, <16 x i8> %3)
|
||||
store <2 x i64> %4, <2 x i64>* @res_vll, align 16
|
||||
ret void
|
||||
; CHECK-LABEL: @test1
|
||||
; CHECK: lvx [[REG1:[0-9]+]],
|
||||
; CHECK: lvx [[REG2:[0-9]+]],
|
||||
; CHECK: vbpermq {{[0-9]+}}, [[REG2]], [[REG1]]
|
||||
; CHECK-VSX: vbpermq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define void @test2() {
|
||||
entry:
|
||||
%__a.addr.i = alloca <16 x i8>, align 16
|
||||
%__b.addr.i = alloca <16 x i8>, align 16
|
||||
%0 = load <16 x i8>, <16 x i8>* @vuc, align 16
|
||||
%1 = load <16 x i8>, <16 x i8>* @vuc, align 16
|
||||
store <16 x i8> %0, <16 x i8>* %__a.addr.i, align 16
|
||||
store <16 x i8> %1, <16 x i8>* %__b.addr.i, align 16
|
||||
%2 = load <16 x i8>, <16 x i8>* %__a.addr.i, align 16
|
||||
%3 = load <16 x i8>, <16 x i8>* %__b.addr.i, align 16
|
||||
%4 = call <2 x i64> @llvm.ppc.altivec.vbpermq(<16 x i8> %2, <16 x i8> %3)
|
||||
store <2 x i64> %4, <2 x i64>* @res_vull, align 16
|
||||
ret void
|
||||
; CHECK-LABEL: @test2
|
||||
; CHECK: lvx [[REG1:[0-9]+]],
|
||||
; CHECK: lvx [[REG2:[0-9]+]],
|
||||
; CHECK: vbpermq {{[0-9]+}}, [[REG2]], [[REG1]]
|
||||
; CHECK-VSX: vbpermq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define void @test3() {
|
||||
entry:
|
||||
%__a.addr.i = alloca <16 x i8>, align 16
|
||||
%0 = load <16 x i8>, <16 x i8>* @vsc, align 16
|
||||
store <16 x i8> %0, <16 x i8>* %__a.addr.i, align 16
|
||||
%1 = load <16 x i8>, <16 x i8>* %__a.addr.i, align 16
|
||||
%2 = call <16 x i8> @llvm.ppc.altivec.vgbbd(<16 x i8> %1)
|
||||
store <16 x i8> %2, <16 x i8>* @res_vsc, align 16
|
||||
ret void
|
||||
; CHECK-LABEL: @test3
|
||||
; CHECK: lvx [[REG1:[0-9]+]],
|
||||
; CHECK: vgbbd {{[0-9]+}}, [[REG1]]
|
||||
; CHECK-VSX: vgbbd {{[0-9]+}}, {{[0-9]+}}
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define void @test4() {
|
||||
entry:
|
||||
%__a.addr.i = alloca <16 x i8>, align 16
|
||||
%0 = load <16 x i8>, <16 x i8>* @vuc, align 16
|
||||
store <16 x i8> %0, <16 x i8>* %__a.addr.i, align 16
|
||||
%1 = load <16 x i8>, <16 x i8>* %__a.addr.i, align 16
|
||||
%2 = call <16 x i8> @llvm.ppc.altivec.vgbbd(<16 x i8> %1)
|
||||
store <16 x i8> %2, <16 x i8>* @res_vuc, align 16
|
||||
ret void
|
||||
; CHECK-LABEL: @test4
|
||||
; CHECK: lvx [[REG1:[0-9]+]],
|
||||
; CHECK: vgbbd {{[0-9]+}}, [[REG1]]
|
||||
; CHECK-VSX: vgbbd {{[0-9]+}}, {{[0-9]+}}
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare <2 x i64> @llvm.ppc.altivec.vbpermq(<16 x i8>, <16 x i8>)
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare <16 x i8> @llvm.ppc.altivec.vgbbd(<16 x i8>)
|
|
@ -630,6 +630,12 @@
|
|||
# CHECK: vrsqrtefp 2, 3
|
||||
0x10 0x40 0x19 0x4a
|
||||
|
||||
# CHECK: vgbbd 2, 3
|
||||
0x10 0x40 0x1d 0x0c
|
||||
|
||||
# CHECK: vbpermq 2, 5, 17
|
||||
0x10 0x45 0x8d 0x4c
|
||||
|
||||
# CHECK: vclzb 2, 3
|
||||
0x10 0x40 0x1f 0x02
|
||||
|
||||
|
|
|
@ -686,6 +686,12 @@
|
|||
# CHECK-BE: vrsqrtefp 2, 3 # encoding: [0x10,0x40,0x19,0x4a]
|
||||
# CHECK-LE: vrsqrtefp 2, 3 # encoding: [0x4a,0x19,0x40,0x10]
|
||||
vrsqrtefp 2, 3
|
||||
# CHECK-BE: vgbbd 2, 3 # encoding: [0x10,0x40,0x1d,0x0c]
|
||||
# CHECK-LE: vgbbd 2, 3 # encoding: [0x0c,0x1d,0x40,0x10]
|
||||
vgbbd 2, 3
|
||||
# CHECK-BE: vbpermq 2, 5, 17 # encoding: [0x10,0x45,0x8d,0x4c]
|
||||
# CHECK-LE: vbpermq 2, 5, 17 # encoding: [0x4c,0x8d,0x45,0x10]
|
||||
vbpermq 2, 5, 17
|
||||
|
||||
# Vector count leading zero instructions
|
||||
# CHECK-BE: vclzb 2, 3 # encoding: [0x10,0x40,0x1f,0x02]
|
||||
|
|
Loading…
Reference in New Issue