[X86] Lower extract_element from k-registers by bitcasting from v16i1 to i16 and extending/truncating.

This is equivalent to what isel was doing anyway but by canonicalizing earlier we can remove some patterns.

llvm-svn: 326375
This commit is contained in:
Craig Topper 2018-02-28 22:23:55 +00:00
parent 4142369204
commit e31b9d1e5f
5 changed files with 33 additions and 41 deletions

View File

@ -14910,36 +14910,35 @@ static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt); return DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt);
} }
// Canonicalize result type to MVT::i32.
if (EltVT != MVT::i32) {
SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
Vec, Idx);
return DAG.getAnyExtOrTrunc(Extract, dl, EltVT);
}
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
// Extracts from element 0 are always allowed.
if (IdxVal == 0)
return Op;
// If the kshift instructions of the correct width aren't natively supported // If the kshift instructions of the correct width aren't natively supported
// then we need to promote the vector to the native size to get the correct // then we need to promote the vector to the native size to get the correct
// zeroing behavior. // zeroing behavior.
if ((!Subtarget.hasDQI() && (VecVT.getVectorNumElements() == 8)) || if (VecVT.getVectorNumElements() < 16) {
(VecVT.getVectorNumElements() < 8)) {
VecVT = MVT::v16i1; VecVT = MVT::v16i1;
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VecVT, Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
DAG.getUNDEF(VecVT), DAG.getUNDEF(VecVT), Vec,
Vec,
DAG.getIntPtrConstant(0, dl)); DAG.getIntPtrConstant(0, dl));
} }
// Use kshiftr instruction to move to the lower element. // Extracts from element 0 are always allowed.
Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec, if (IdxVal != 0) {
DAG.getConstant(IdxVal, dl, MVT::i8)); // Use kshiftr instruction to move to the lower element.
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, Vec, Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec,
DAG.getIntPtrConstant(0, dl)); DAG.getConstant(IdxVal, dl, MVT::i8));
}
// Shrink to v16i1 since that's always legal.
if (VecVT.getVectorNumElements() > 16) {
VecVT = MVT::v16i1;
Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VecVT, Vec,
DAG.getIntPtrConstant(0, dl));
}
// Convert to a bitcast+aext/trunc.
MVT CastVT = MVT::getIntegerVT(VecVT.getVectorNumElements());
return DAG.getAnyExtOrTrunc(DAG.getBitcast(CastVT, Vec), dl, EltVT);
} }
SDValue SDValue

View File

@ -2859,9 +2859,6 @@ let Predicates = [HasAVX512] in {
def : Pat<(maskVT (scalar_to_vector GR32:$src)), def : Pat<(maskVT (scalar_to_vector GR32:$src)),
(COPY_TO_REGCLASS GR32:$src, maskRC)>; (COPY_TO_REGCLASS GR32:$src, maskRC)>;
def : Pat<(i32 (X86kextract maskRC:$src, (iPTR 0))),
(COPY_TO_REGCLASS maskRC:$src, GR32)>;
def : Pat<(maskVT (scalar_to_vector GR8:$src)), def : Pat<(maskVT (scalar_to_vector GR8:$src)),
(COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>; (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
} }

View File

@ -457,10 +457,6 @@ def X86SubVBroadcast : SDNode<"X86ISD::SUBV_BROADCAST",
def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>; def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
def X86VBroadcastm : SDNode<"X86ISD::VBROADCASTM", SDTVBroadcastm>; def X86VBroadcastm : SDNode<"X86ISD::VBROADCASTM", SDTVBroadcastm>;
def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
SDTCVecEltisVT<1, i1>,
SDTCisPtrTy<2>]>>;
def X86Blendi : SDNode<"X86ISD::BLENDI", SDTBlend>; def X86Blendi : SDNode<"X86ISD::BLENDI", SDTBlend>;

View File

@ -269,7 +269,7 @@ define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) {
; SKX-LABEL: test14: ; SKX-LABEL: test14:
; SKX: ## %bb.0: ; SKX: ## %bb.0:
; SKX-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 ; SKX-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
; SKX-NEXT: kshiftrb $4, %k0, %k0 ; SKX-NEXT: kshiftrw $4, %k0, %k0
; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: testb $1, %al ; SKX-NEXT: testb $1, %al
; SKX-NEXT: cmoveq %rsi, %rdi ; SKX-NEXT: cmoveq %rsi, %rdi

View File

@ -12,32 +12,32 @@ target triple = "x86_64-unknown-linux-gnu"
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kshiftrw $1, %k0, %k1 ; KNL-NEXT: kshiftrw $1, %k0, %k1
; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftrw $2, %k0, %k1
; KNL-NEXT: kshiftrw $1, %k1, %k2
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: testb $1, %al ; KNL-NEXT: testb $1, %al
; KNL-NEXT: fld1 ; KNL-NEXT: fld1
; KNL-NEXT: fldz ; KNL-NEXT: fldz
; KNL-NEXT: fld %st(0) ; KNL-NEXT: fld %st(0)
; KNL-NEXT: fcmovne %st(2), %st(0) ; KNL-NEXT: fcmovne %st(2), %st(0)
; KNL-NEXT: testb $1, %cl ; KNL-NEXT: kshiftrw $2, %k0, %k1
; KNL-NEXT: kshiftrw $1, %k1, %k2
; KNL-NEXT: kmovw %k2, %eax
; KNL-NEXT: testb $1, %al
; KNL-NEXT: fld %st(1) ; KNL-NEXT: fld %st(1)
; KNL-NEXT: fcmovne %st(3), %st(0) ; KNL-NEXT: fcmovne %st(3), %st(0)
; KNL-NEXT: kmovw %k2, %eax ; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: testb $1, %al ; KNL-NEXT: testb $1, %al
; KNL-NEXT: fld %st(2) ; KNL-NEXT: fld %st(2)
; KNL-NEXT: fcmovne %st(4), %st(0) ; KNL-NEXT: fcmovne %st(4), %st(0)
; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: testb $1, %al ; KNL-NEXT: testb $1, %al
; KNL-NEXT: fxch %st(3) ; KNL-NEXT: fxch %st(3)
; KNL-NEXT: fcmovne %st(4), %st(0) ; KNL-NEXT: fcmovne %st(4), %st(0)
; KNL-NEXT: fstp %st(4) ; KNL-NEXT: fstp %st(4)
; KNL-NEXT: fxch %st(3) ; KNL-NEXT: fxch %st(3)
; KNL-NEXT: fstpt 20(%rdi)
; KNL-NEXT: fxch %st(1)
; KNL-NEXT: fstpt (%rdi) ; KNL-NEXT: fstpt (%rdi)
; KNL-NEXT: fxch %st(1) ; KNL-NEXT: fxch %st(1)
; KNL-NEXT: fstpt 30(%rdi) ; KNL-NEXT: fstpt 30(%rdi)
; KNL-NEXT: fxch %st(1)
; KNL-NEXT: fstpt 20(%rdi)
; KNL-NEXT: fstpt 10(%rdi) ; KNL-NEXT: fstpt 10(%rdi)
; KNL-NEXT: vzeroupper ; KNL-NEXT: vzeroupper
; KNL-NEXT: retq ; KNL-NEXT: retq
@ -54,11 +54,11 @@ target triple = "x86_64-unknown-linux-gnu"
; SKX-NEXT: fldz ; SKX-NEXT: fldz
; SKX-NEXT: fld %st(0) ; SKX-NEXT: fld %st(0)
; SKX-NEXT: fcmovne %st(2), %st(0) ; SKX-NEXT: fcmovne %st(2), %st(0)
; SKX-NEXT: kshiftrw $1, %k0, %k2 ; SKX-NEXT: kmovd %k1, %eax
; SKX-NEXT: kmovd %k2, %eax
; SKX-NEXT: testb $1, %al ; SKX-NEXT: testb $1, %al
; SKX-NEXT: fld %st(1) ; SKX-NEXT: fld %st(1)
; SKX-NEXT: fcmovne %st(3), %st(0) ; SKX-NEXT: fcmovne %st(3), %st(0)
; SKX-NEXT: kshiftrw $1, %k0, %k1
; SKX-NEXT: kmovd %k1, %eax ; SKX-NEXT: kmovd %k1, %eax
; SKX-NEXT: testb $1, %al ; SKX-NEXT: testb $1, %al
; SKX-NEXT: fld %st(2) ; SKX-NEXT: fld %st(2)
@ -71,9 +71,9 @@ target triple = "x86_64-unknown-linux-gnu"
; SKX-NEXT: fxch %st(3) ; SKX-NEXT: fxch %st(3)
; SKX-NEXT: fstpt (%rdi) ; SKX-NEXT: fstpt (%rdi)
; SKX-NEXT: fxch %st(1) ; SKX-NEXT: fxch %st(1)
; SKX-NEXT: fstpt 20(%rdi)
; SKX-NEXT: fxch %st(1)
; SKX-NEXT: fstpt 10(%rdi) ; SKX-NEXT: fstpt 10(%rdi)
; SKX-NEXT: fxch %st(1)
; SKX-NEXT: fstpt 20(%rdi)
; SKX-NEXT: fstpt 30(%rdi) ; SKX-NEXT: fstpt 30(%rdi)
; SKX-NEXT: retq ; SKX-NEXT: retq
bb: bb: