forked from OSchip/llvm-project
AVX-512: Implemented DAG lowering for shuff62x2/shufi62x2 instuctions ( Shuffle Packed Values at 128-bit Granularity )
Tests added , vector-shuffle-512-v8.ll test re-generated. Differential Revision: http://reviews.llvm.org/D10300 llvm-svn: 239697
This commit is contained in:
parent
ce1ce989e2
commit
5e49697138
|
@ -9383,6 +9383,30 @@ static SDValue lowerV2X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1,
|
|||
DAG.getConstant(PermMask, DL, MVT::i8));
|
||||
}
|
||||
|
||||
/// \brief Handle lowering 4-lane 128-bit shuffles.
|
||||
static SDValue lowerV4X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1,
|
||||
SDValue V2, ArrayRef<int> WidenedMask,
|
||||
SelectionDAG &DAG) {
|
||||
|
||||
assert(WidenedMask.size() == 4 && "Unexpected mask size for 128bit shuffle!");
|
||||
// form a 128-bit permutation.
|
||||
// convert the 64-bit shuffle mask selection values into 128-bit selection
|
||||
// bits defined by a vshuf64x2 instruction's immediate control byte.
|
||||
unsigned PermMask = 0, Imm = 0;
|
||||
|
||||
for (int i = 0, Size = WidenedMask.size(); i < Size; ++i) {
|
||||
if(WidenedMask[i] == SM_SentinelZero)
|
||||
return SDValue();
|
||||
|
||||
// use first element in place of undef musk
|
||||
Imm = (WidenedMask[i] == SM_SentinelUndef) ? 0 : WidenedMask[i];
|
||||
PermMask |= (Imm % 4) << (i * 2);
|
||||
}
|
||||
|
||||
return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2,
|
||||
DAG.getConstant(PermMask, DL, MVT::i8));
|
||||
}
|
||||
|
||||
/// \brief Lower a vector shuffle by first fixing the 128-bit lanes and then
|
||||
/// shuffling each lane.
|
||||
///
|
||||
|
@ -10176,6 +10200,10 @@ static SDValue lowerV8X64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
|||
ArrayRef<int> Mask = SVOp->getMask();
|
||||
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
|
||||
|
||||
SmallVector<int, 4> WidenedMask;
|
||||
if (canWidenShuffleElements(Mask, WidenedMask))
|
||||
if(SDValue Op = lowerV4X128VectorShuffle(DL, VT, V1, V2, WidenedMask, DAG))
|
||||
return Op;
|
||||
// X86 has dedicated unpack instructions that can handle specific blend
|
||||
// operations: UNPCKH and UNPCKL.
|
||||
if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 2, 10, 4, 12, 6, 14}))
|
||||
|
|
|
@ -116,10 +116,10 @@ define <16 x i32> @test15(<16 x i32> %a) {
|
|||
ret <16 x i32> %b
|
||||
}
|
||||
; CHECK-LABEL: test16
|
||||
; CHECK: valignq $2, %zmm0, %zmm1
|
||||
; CHECK: valignq $3, %zmm0, %zmm1
|
||||
; CHECK: ret
|
||||
define <8 x double> @test16(<8 x double> %a, <8 x double> %b) nounwind {
|
||||
%c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
|
||||
%c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
|
||||
ret <8 x double> %c
|
||||
}
|
||||
|
||||
|
@ -252,6 +252,62 @@ define <8 x double> @test32(<8 x double> %a, <8 x double> %b) nounwind {
|
|||
ret <8 x double> %c
|
||||
}
|
||||
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
|
||||
define <8 x double> @test_vshuff64x2_512(<8 x double> %x, <8 x double> %x1) nounwind {
|
||||
; CHECK-LABEL: test_vshuff64x2_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vshuff64x2 $136, %zmm0, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 0, i32 1, i32 4, i32 5>
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <8 x double> @test_vshuff64x2_512_mask(<8 x double> %x, <8 x double> %x1, <8 x i1> %mask) nounwind {
|
||||
; CHECK-LABEL: test_vshuff64x2_512_mask:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpmovsxwq %xmm2, %zmm1
|
||||
; CHECK-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm1, %zmm1
|
||||
; CHECK-NEXT: vptestmq %zmm1, %zmm1, %k1
|
||||
; CHECK-NEXT: vshuff64x2 $136, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%y = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 0, i32 1, i32 4, i32 5>
|
||||
%res = select <8 x i1> %mask, <8 x double> %y, <8 x double> zeroinitializer
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <8 x i64> @test_vshufi64x2_512_mask(<8 x i64> %x, <8 x i64> %x1, <8 x i1> %mask) nounwind {
|
||||
; CHECK-LABEL: test_vshufi64x2_512_mask:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpmovsxwq %xmm2, %zmm1
|
||||
; CHECK-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm1, %zmm1
|
||||
; CHECK-NEXT: vptestmq %zmm1, %zmm1, %k1
|
||||
; CHECK-NEXT: vshufi64x2 $168, %zmm0, %zmm0, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%y = shufflevector <8 x i64> %x, <8 x i64> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 4, i32 5, i32 4, i32 5>
|
||||
%res = select <8 x i1> %mask, <8 x i64> %y, <8 x i64> %x
|
||||
ret <8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x double> @test_vshuff64x2_512_mem(<8 x double> %x, <8 x double> *%ptr) nounwind {
|
||||
; CHECK-LABEL: test_vshuff64x2_512_mem:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vshuff64x2 $40, %zmm0, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%x1 = load <8 x double>,<8 x double> *%ptr,align 1
|
||||
%res = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 4, i32 5, i32 0, i32 1>
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_vshuff32x4_512_mem(<16 x float> %x, <16 x float> *%ptr) nounwind {
|
||||
; CHECK-LABEL: test_vshuff32x4_512_mem:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vshuff64x2 $20, %zmm0, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%x1 = load <16 x float>,<16 x float> *%ptr,align 1
|
||||
%res = shufflevector <16 x float> %x, <16 x float> %x1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x i32> @test_align_v16i32_rr(<16 x i32> %a, <16 x i32> %b) nounwind {
|
||||
; CHECK-LABEL: test_align_v16i32_rr:
|
||||
; CHECK: ## BB#0:
|
||||
|
|
|
@ -88,7 +88,7 @@ define <8 x double> @shuffle_v8f64_70000000(<8 x double> %a, <8 x double> %b) {
|
|||
define <8 x double> @shuffle_v8f64_01014545(<8 x double> %a, <8 x double> %b) {
|
||||
; ALL-LABEL: shuffle_v8f64_01014545:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vpermpd $68, %zmm0, %zmm0
|
||||
; ALL-NEXT: vshuff64x2 $160, %zmm0, %zmm0, %zmm0
|
||||
; ALL-NEXT: retq
|
||||
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
|
||||
ret <8 x double> %shuffle
|
||||
|
@ -650,7 +650,7 @@ define <8 x i64> @shuffle_v8i64_70000000(<8 x i64> %a, <8 x i64> %b) {
|
|||
define <8 x i64> @shuffle_v8i64_01014545(<8 x i64> %a, <8 x i64> %b) {
|
||||
; ALL-LABEL: shuffle_v8i64_01014545:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vpermq $68, %zmm0, %zmm0
|
||||
; ALL-NEXT: vshufi64x2 $160, %zmm0, %zmm0, %zmm0
|
||||
; ALL-NEXT: retq
|
||||
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
|
||||
ret <8 x i64> %shuffle
|
||||
|
|
Loading…
Reference in New Issue