forked from OSchip/llvm-project
[X86] Lower extract_element from k-registers by bitcasting from v16i1 to i16 and extending/truncating.
This is equivalent to what isel was doing anyway but by canonicalizing earlier we can remove some patterns. llvm-svn: 326375
This commit is contained in:
parent
4142369204
commit
e31b9d1e5f
|
@ -14910,36 +14910,35 @@ static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG,
|
|||
return DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt);
|
||||
}
|
||||
|
||||
// Canonicalize result type to MVT::i32.
|
||||
if (EltVT != MVT::i32) {
|
||||
SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
|
||||
Vec, Idx);
|
||||
return DAG.getAnyExtOrTrunc(Extract, dl, EltVT);
|
||||
}
|
||||
|
||||
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
|
||||
|
||||
// Extracts from element 0 are always allowed.
|
||||
if (IdxVal == 0)
|
||||
return Op;
|
||||
|
||||
// If the kshift instructions of the correct width aren't natively supported
|
||||
// then we need to promote the vector to the native size to get the correct
|
||||
// zeroing behavior.
|
||||
if ((!Subtarget.hasDQI() && (VecVT.getVectorNumElements() == 8)) ||
|
||||
(VecVT.getVectorNumElements() < 8)) {
|
||||
if (VecVT.getVectorNumElements() < 16) {
|
||||
VecVT = MVT::v16i1;
|
||||
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VecVT,
|
||||
DAG.getUNDEF(VecVT),
|
||||
Vec,
|
||||
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
|
||||
DAG.getUNDEF(VecVT), Vec,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
}
|
||||
|
||||
// Use kshiftr instruction to move to the lower element.
|
||||
Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec,
|
||||
DAG.getConstant(IdxVal, dl, MVT::i8));
|
||||
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, Vec,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
// Extracts from element 0 are always allowed.
|
||||
if (IdxVal != 0) {
|
||||
// Use kshiftr instruction to move to the lower element.
|
||||
Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec,
|
||||
DAG.getConstant(IdxVal, dl, MVT::i8));
|
||||
}
|
||||
|
||||
// Shrink to v16i1 since that's always legal.
|
||||
if (VecVT.getVectorNumElements() > 16) {
|
||||
VecVT = MVT::v16i1;
|
||||
Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VecVT, Vec,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
}
|
||||
|
||||
// Convert to a bitcast+aext/trunc.
|
||||
MVT CastVT = MVT::getIntegerVT(VecVT.getVectorNumElements());
|
||||
return DAG.getAnyExtOrTrunc(DAG.getBitcast(CastVT, Vec), dl, EltVT);
|
||||
}
|
||||
|
||||
SDValue
|
||||
|
|
|
@ -2859,9 +2859,6 @@ let Predicates = [HasAVX512] in {
|
|||
def : Pat<(maskVT (scalar_to_vector GR32:$src)),
|
||||
(COPY_TO_REGCLASS GR32:$src, maskRC)>;
|
||||
|
||||
def : Pat<(i32 (X86kextract maskRC:$src, (iPTR 0))),
|
||||
(COPY_TO_REGCLASS maskRC:$src, GR32)>;
|
||||
|
||||
def : Pat<(maskVT (scalar_to_vector GR8:$src)),
|
||||
(COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
|
||||
}
|
||||
|
|
|
@ -457,10 +457,6 @@ def X86SubVBroadcast : SDNode<"X86ISD::SUBV_BROADCAST",
|
|||
|
||||
def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
|
||||
def X86VBroadcastm : SDNode<"X86ISD::VBROADCASTM", SDTVBroadcastm>;
|
||||
def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
|
||||
SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
|
||||
SDTCVecEltisVT<1, i1>,
|
||||
SDTCisPtrTy<2>]>>;
|
||||
|
||||
def X86Blendi : SDNode<"X86ISD::BLENDI", SDTBlend>;
|
||||
|
||||
|
|
|
@ -269,7 +269,7 @@ define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) {
|
|||
; SKX-LABEL: test14:
|
||||
; SKX: ## %bb.0:
|
||||
; SKX-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
|
||||
; SKX-NEXT: kshiftrb $4, %k0, %k0
|
||||
; SKX-NEXT: kshiftrw $4, %k0, %k0
|
||||
; SKX-NEXT: kmovd %k0, %eax
|
||||
; SKX-NEXT: testb $1, %al
|
||||
; SKX-NEXT: cmoveq %rsi, %rdi
|
||||
|
|
|
@ -12,32 +12,32 @@ target triple = "x86_64-unknown-linux-gnu"
|
|||
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
||||
; KNL-NEXT: kshiftrw $1, %k0, %k1
|
||||
; KNL-NEXT: kmovw %k1, %eax
|
||||
; KNL-NEXT: kshiftrw $2, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $1, %k1, %k2
|
||||
; KNL-NEXT: kmovw %k1, %ecx
|
||||
; KNL-NEXT: testb $1, %al
|
||||
; KNL-NEXT: fld1
|
||||
; KNL-NEXT: fldz
|
||||
; KNL-NEXT: fld %st(0)
|
||||
; KNL-NEXT: fcmovne %st(2), %st(0)
|
||||
; KNL-NEXT: testb $1, %cl
|
||||
; KNL-NEXT: kshiftrw $2, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $1, %k1, %k2
|
||||
; KNL-NEXT: kmovw %k2, %eax
|
||||
; KNL-NEXT: testb $1, %al
|
||||
; KNL-NEXT: fld %st(1)
|
||||
; KNL-NEXT: fcmovne %st(3), %st(0)
|
||||
; KNL-NEXT: kmovw %k2, %eax
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: testb $1, %al
|
||||
; KNL-NEXT: fld %st(2)
|
||||
; KNL-NEXT: fcmovne %st(4), %st(0)
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: kmovw %k1, %eax
|
||||
; KNL-NEXT: testb $1, %al
|
||||
; KNL-NEXT: fxch %st(3)
|
||||
; KNL-NEXT: fcmovne %st(4), %st(0)
|
||||
; KNL-NEXT: fstp %st(4)
|
||||
; KNL-NEXT: fxch %st(3)
|
||||
; KNL-NEXT: fstpt 20(%rdi)
|
||||
; KNL-NEXT: fxch %st(1)
|
||||
; KNL-NEXT: fstpt (%rdi)
|
||||
; KNL-NEXT: fxch %st(1)
|
||||
; KNL-NEXT: fstpt 30(%rdi)
|
||||
; KNL-NEXT: fxch %st(1)
|
||||
; KNL-NEXT: fstpt 20(%rdi)
|
||||
; KNL-NEXT: fstpt 10(%rdi)
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: retq
|
||||
|
@ -54,11 +54,11 @@ target triple = "x86_64-unknown-linux-gnu"
|
|||
; SKX-NEXT: fldz
|
||||
; SKX-NEXT: fld %st(0)
|
||||
; SKX-NEXT: fcmovne %st(2), %st(0)
|
||||
; SKX-NEXT: kshiftrw $1, %k0, %k2
|
||||
; SKX-NEXT: kmovd %k2, %eax
|
||||
; SKX-NEXT: kmovd %k1, %eax
|
||||
; SKX-NEXT: testb $1, %al
|
||||
; SKX-NEXT: fld %st(1)
|
||||
; SKX-NEXT: fcmovne %st(3), %st(0)
|
||||
; SKX-NEXT: kshiftrw $1, %k0, %k1
|
||||
; SKX-NEXT: kmovd %k1, %eax
|
||||
; SKX-NEXT: testb $1, %al
|
||||
; SKX-NEXT: fld %st(2)
|
||||
|
@ -71,9 +71,9 @@ target triple = "x86_64-unknown-linux-gnu"
|
|||
; SKX-NEXT: fxch %st(3)
|
||||
; SKX-NEXT: fstpt (%rdi)
|
||||
; SKX-NEXT: fxch %st(1)
|
||||
; SKX-NEXT: fstpt 20(%rdi)
|
||||
; SKX-NEXT: fxch %st(1)
|
||||
; SKX-NEXT: fstpt 10(%rdi)
|
||||
; SKX-NEXT: fxch %st(1)
|
||||
; SKX-NEXT: fstpt 20(%rdi)
|
||||
; SKX-NEXT: fstpt 30(%rdi)
|
||||
; SKX-NEXT: retq
|
||||
bb:
|
||||
|
|
Loading…
Reference in New Issue