forked from OSchip/llvm-project
[X86] Improve lower1BitShuffle handling for KSHIFTL on narrow vectors.
We can insert the value into a larger legal type and shift that by the desired amount. llvm-svn: 369215
This commit is contained in:
parent
37860d524e
commit
9e074c06fe
|
@ -16562,20 +16562,36 @@ static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
}
|
||||
|
||||
// Try to match KSHIFTs.
|
||||
// TODO: Support narrower than legal shifts by widening and extracting.
|
||||
if (NumElts >= 16 || (Subtarget.hasDQI() && NumElts == 8)) {
|
||||
unsigned Offset = 0;
|
||||
for (SDValue V : { V1, V2 }) {
|
||||
unsigned Opcode;
|
||||
int ShiftAmt = match1BitShuffleAsKSHIFT(Opcode, Mask, Offset, Zeroable);
|
||||
if (ShiftAmt >= 0)
|
||||
unsigned Offset = 0;
|
||||
for (SDValue V : { V1, V2 }) {
|
||||
unsigned Opcode;
|
||||
int ShiftAmt = match1BitShuffleAsKSHIFT(Opcode, Mask, Offset, Zeroable);
|
||||
if (ShiftAmt >= 0) {
|
||||
// FIXME: We can't easily widen an illegal right shift if we need to shift
|
||||
// in zeroes.
|
||||
if (Opcode == X86ISD::KSHIFTR &&
|
||||
(NumElts >= 16 || (Subtarget.hasDQI() && NumElts == 8)))
|
||||
return DAG.getNode(Opcode, DL, VT, V,
|
||||
DAG.getConstant(ShiftAmt, DL, MVT::i8));
|
||||
Offset += NumElts; // Increment for next iteration.
|
||||
if (Opcode == X86ISD::KSHIFTL) {
|
||||
// If this is a shift left we can widen the VT to a suported kshiftl.
|
||||
MVT WideVT = VT;
|
||||
if ((!Subtarget.hasDQI() && NumElts == 8) || NumElts < 8)
|
||||
WideVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
|
||||
SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideVT,
|
||||
DAG.getUNDEF(WideVT), V,
|
||||
DAG.getIntPtrConstant(0, DL));
|
||||
Res = DAG.getNode(Opcode, DL, WideVT, V,
|
||||
DAG.getConstant(ShiftAmt, DL, MVT::i8));
|
||||
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
|
||||
DAG.getIntPtrConstant(0, DL));
|
||||
}
|
||||
}
|
||||
Offset += NumElts; // Increment for next iteration.
|
||||
}
|
||||
|
||||
|
||||
|
||||
MVT ExtVT;
|
||||
switch (VT.SimpleTy) {
|
||||
default:
|
||||
|
|
|
@ -5,13 +5,9 @@
|
|||
define i8 @kshiftl_v8i1_1(<8 x i64> %x, <8 x i64> %y) {
|
||||
; KNL-LABEL: kshiftl_v8i1_1:
|
||||
; KNL: # %bb.0:
|
||||
; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k1
|
||||
; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
||||
; KNL-NEXT: movb $-2, %al
|
||||
; KNL-NEXT: kmovw %eax, %k1
|
||||
; KNL-NEXT: vpexpandq %zmm0, %zmm0 {%k1} {z}
|
||||
; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k1
|
||||
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
|
||||
; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0
|
||||
; KNL-NEXT: kshiftlw $1, %k0, %k1
|
||||
; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k0 {%k1}
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: # kill: def $al killed $al killed $eax
|
||||
; KNL-NEXT: vzeroupper
|
||||
|
@ -177,13 +173,9 @@ define i64 @kshiftl_v64i1_1(<64 x i8> %x, <64 x i8> %y) {
|
|||
define i8 @kshiftl_v8i1_7(<8 x i64> %x, <8 x i64> %y) {
|
||||
; KNL-LABEL: kshiftl_v8i1_7:
|
||||
; KNL: # %bb.0:
|
||||
; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k1
|
||||
; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
||||
; KNL-NEXT: movb $-128, %al
|
||||
; KNL-NEXT: kmovw %eax, %k1
|
||||
; KNL-NEXT: vpexpandq %zmm0, %zmm0 {%k1} {z}
|
||||
; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k1
|
||||
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
|
||||
; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0
|
||||
; KNL-NEXT: kshiftlw $7, %k0, %k1
|
||||
; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k0 {%k1}
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: # kill: def $al killed $al killed $eax
|
||||
; KNL-NEXT: vzeroupper
|
||||
|
@ -475,13 +467,9 @@ define i64 @kshiftr_v64i1_1(<64 x i8> %x, <64 x i8> %y) {
|
|||
define i8 @kshiftr_v8i1_7(<8 x i64> %x, <8 x i64> %y) {
|
||||
; KNL-LABEL: kshiftr_v8i1_7:
|
||||
; KNL: # %bb.0:
|
||||
; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k1
|
||||
; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
||||
; KNL-NEXT: movb $-2, %al
|
||||
; KNL-NEXT: kmovw %eax, %k1
|
||||
; KNL-NEXT: vpexpandq %zmm0, %zmm0 {%k1} {z}
|
||||
; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k1
|
||||
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
|
||||
; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0
|
||||
; KNL-NEXT: kshiftlw $1, %k0, %k1
|
||||
; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k0 {%k1}
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: # kill: def $al killed $al killed $eax
|
||||
; KNL-NEXT: vzeroupper
|
||||
|
@ -599,14 +587,9 @@ define i64 @kshiftr_v64i1_63(<64 x i8> %x, <64 x i8> %y) {
|
|||
define i8 @kshiftl_v8i1_zu123u56(<8 x i64> %x, <8 x i64> %y) {
|
||||
; KNL-LABEL: kshiftl_v8i1_zu123u56:
|
||||
; KNL: # %bb.0:
|
||||
; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k1
|
||||
; KNL-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
; KNL-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
||||
; KNL-NEXT: vmovdqa64 {{.*#+}} zmm3 = <8,u,1,2,3,u,5,6>
|
||||
; KNL-NEXT: vpermi2q %zmm0, %zmm2, %zmm3
|
||||
; KNL-NEXT: vpsllq $63, %zmm3, %zmm0
|
||||
; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k1
|
||||
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
|
||||
; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0
|
||||
; KNL-NEXT: kshiftlw $1, %k0, %k1
|
||||
; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k0 {%k1}
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: # kill: def $al killed $al killed $eax
|
||||
; KNL-NEXT: vzeroupper
|
||||
|
@ -632,12 +615,9 @@ define i8 @kshiftl_v8i1_zu123u56(<8 x i64> %x, <8 x i64> %y) {
|
|||
define i8 @kshiftl_v8i1_u0123456(<8 x i64> %x, <8 x i64> %y) {
|
||||
; KNL-LABEL: kshiftl_v8i1_u0123456:
|
||||
; KNL: # %bb.0:
|
||||
; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k1
|
||||
; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
||||
; KNL-NEXT: valignq {{.*#+}} zmm0 = zmm0[7,0,1,2,3,4,5,6]
|
||||
; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
|
||||
; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k1
|
||||
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
|
||||
; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0
|
||||
; KNL-NEXT: kshiftlw $1, %k0, %k1
|
||||
; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k0 {%k1}
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: # kill: def $al killed $al killed $eax
|
||||
; KNL-NEXT: vzeroupper
|
||||
|
|
Loading…
Reference in New Issue