forked from OSchip/llvm-project
Masked gather and scatter intrinsics - enabled codegen for KNL.
llvm-svn: 236394
This commit is contained in:
parent
a63b7cee66
commit
1b60ed7069
|
@ -625,6 +625,7 @@ private:
|
|||
void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
|
||||
void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi);
|
||||
void SplitVecRes_MLOAD(MaskedLoadSDNode *N, SDValue &Lo, SDValue &Hi);
|
||||
void SplitVecRes_MGATHER(MaskedGatherSDNode *N, SDValue &Lo, SDValue &Hi);
|
||||
void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
|
||||
void SplitVecRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi);
|
||||
void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
|
||||
|
@ -643,6 +644,8 @@ private:
|
|||
SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
|
||||
SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
|
||||
SDValue SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
|
||||
SDValue SplitVecOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo);
|
||||
SDValue SplitVecOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo);
|
||||
SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
|
||||
SDValue SplitVecOp_VSETCC(SDNode *N);
|
||||
SDValue SplitVecOp_FP_ROUND(SDNode *N);
|
||||
|
|
|
@ -196,6 +196,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
|
|||
|
||||
SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops), 0);
|
||||
|
||||
bool HasVectorValue = false;
|
||||
if (Op.getOpcode() == ISD::LOAD) {
|
||||
LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
|
||||
ISD::LoadExtType ExtType = LD->getExtensionType();
|
||||
|
@ -243,9 +244,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
|
|||
Changed = true;
|
||||
return LegalizeOp(ExpandStore(Op));
|
||||
}
|
||||
}
|
||||
} else if (Op.getOpcode() == ISD::MSCATTER)
|
||||
HasVectorValue = true;
|
||||
|
||||
bool HasVectorValue = false;
|
||||
for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end();
|
||||
J != E;
|
||||
++J)
|
||||
|
@ -330,6 +331,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
|
|||
case ISD::UINT_TO_FP:
|
||||
QueryType = Node->getOperand(0).getValueType();
|
||||
break;
|
||||
case ISD::MSCATTER:
|
||||
QueryType = cast<MaskedScatterSDNode>(Node)->getValue().getValueType();
|
||||
break;
|
||||
}
|
||||
|
||||
switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) {
|
||||
|
|
|
@ -600,6 +600,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
|
|||
case ISD::MLOAD:
|
||||
SplitVecRes_MLOAD(cast<MaskedLoadSDNode>(N), Lo, Hi);
|
||||
break;
|
||||
case ISD::MGATHER:
|
||||
SplitVecRes_MGATHER(cast<MaskedGatherSDNode>(N), Lo, Hi);
|
||||
break;
|
||||
case ISD::SETCC:
|
||||
SplitVecRes_SETCC(N, Lo, Hi);
|
||||
break;
|
||||
|
@ -1045,6 +1048,54 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
|
|||
|
||||
}
|
||||
|
||||
void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
|
||||
SDValue &Lo, SDValue &Hi) {
|
||||
EVT LoVT, HiVT;
|
||||
SDLoc dl(MGT);
|
||||
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MGT->getValueType(0));
|
||||
|
||||
SDValue Ch = MGT->getChain();
|
||||
SDValue Ptr = MGT->getBasePtr();
|
||||
SDValue Mask = MGT->getMask();
|
||||
unsigned Alignment = MGT->getOriginalAlignment();
|
||||
|
||||
SDValue MaskLo, MaskHi;
|
||||
std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
|
||||
|
||||
EVT MemoryVT = MGT->getMemoryVT();
|
||||
EVT LoMemVT, HiMemVT;
|
||||
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
|
||||
|
||||
SDValue Src0Lo, Src0Hi;
|
||||
std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(MGT->getValue(), dl);
|
||||
|
||||
SDValue IndexHi, IndexLo;
|
||||
std::tie(IndexLo, IndexHi) = DAG.SplitVector(MGT->getIndex(), dl);
|
||||
|
||||
MachineMemOperand *MMO = DAG.getMachineFunction().
|
||||
getMachineMemOperand(MGT->getPointerInfo(),
|
||||
MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
|
||||
Alignment, MGT->getAAInfo(), MGT->getRanges());
|
||||
|
||||
SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo};
|
||||
Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, OpsLo,
|
||||
MMO);
|
||||
|
||||
SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi};
|
||||
Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, OpsHi,
|
||||
MMO);
|
||||
|
||||
// Build a factor node to remember that this load is independent of the
|
||||
// other one.
|
||||
Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
|
||||
Hi.getValue(1));
|
||||
|
||||
// Legalized the chain result - switch anything that used the old chain to
|
||||
// use the new one.
|
||||
ReplaceValueWith(SDValue(MGT, 1), Ch);
|
||||
}
|
||||
|
||||
|
||||
void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) {
|
||||
assert(N->getValueType(0).isVector() &&
|
||||
N->getOperand(0).getValueType().isVector() &&
|
||||
|
@ -1306,6 +1357,12 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
|
|||
case ISD::MSTORE:
|
||||
Res = SplitVecOp_MSTORE(cast<MaskedStoreSDNode>(N), OpNo);
|
||||
break;
|
||||
case ISD::MSCATTER:
|
||||
Res = SplitVecOp_MSCATTER(cast<MaskedScatterSDNode>(N), OpNo);
|
||||
break;
|
||||
case ISD::MGATHER:
|
||||
Res = SplitVecOp_MGATHER(cast<MaskedGatherSDNode>(N), OpNo);
|
||||
break;
|
||||
case ISD::VSELECT:
|
||||
Res = SplitVecOp_VSELECT(N, OpNo);
|
||||
break;
|
||||
|
@ -1478,6 +1535,68 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
|
|||
MachinePointerInfo(), EltVT, false, false, false, 0);
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
|
||||
unsigned OpNo) {
|
||||
EVT LoVT, HiVT;
|
||||
SDLoc dl(MGT);
|
||||
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MGT->getValueType(0));
|
||||
|
||||
SDValue Ch = MGT->getChain();
|
||||
SDValue Ptr = MGT->getBasePtr();
|
||||
SDValue Index = MGT->getIndex();
|
||||
SDValue Mask = MGT->getMask();
|
||||
unsigned Alignment = MGT->getOriginalAlignment();
|
||||
|
||||
SDValue MaskLo, MaskHi;
|
||||
std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
|
||||
|
||||
EVT MemoryVT = MGT->getMemoryVT();
|
||||
EVT LoMemVT, HiMemVT;
|
||||
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
|
||||
|
||||
SDValue Src0Lo, Src0Hi;
|
||||
std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(MGT->getValue(), dl);
|
||||
|
||||
SDValue IndexHi, IndexLo;
|
||||
if (Index.getNode())
|
||||
std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl);
|
||||
else
|
||||
IndexLo = IndexHi = Index;
|
||||
|
||||
MachineMemOperand *MMO = DAG.getMachineFunction().
|
||||
getMachineMemOperand(MGT->getPointerInfo(),
|
||||
MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
|
||||
Alignment, MGT->getAAInfo(), MGT->getRanges());
|
||||
|
||||
SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo};
|
||||
SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl,
|
||||
OpsLo, MMO);
|
||||
|
||||
MMO = DAG.getMachineFunction().
|
||||
getMachineMemOperand(MGT->getPointerInfo(),
|
||||
MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),
|
||||
Alignment, MGT->getAAInfo(),
|
||||
MGT->getRanges());
|
||||
|
||||
SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi};
|
||||
SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl,
|
||||
OpsHi, MMO);
|
||||
|
||||
// Build a factor node to remember that this load is independent of the
|
||||
// other one.
|
||||
Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
|
||||
Hi.getValue(1));
|
||||
|
||||
// Legalized the chain result - switch anything that used the old chain to
|
||||
// use the new one.
|
||||
ReplaceValueWith(SDValue(MGT, 1), Ch);
|
||||
|
||||
SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MGT->getValueType(0), Lo,
|
||||
Hi);
|
||||
ReplaceValueWith(SDValue(MGT, 0), Res);
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
|
||||
unsigned OpNo) {
|
||||
SDValue Ch = N->getChain();
|
||||
|
@ -1523,11 +1642,64 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
|
|||
Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
|
||||
N->isTruncatingStore());
|
||||
|
||||
// Build a factor node to remember that this store is independent of the
|
||||
// other one.
|
||||
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
|
||||
unsigned OpNo) {
|
||||
SDValue Ch = N->getChain();
|
||||
SDValue Ptr = N->getBasePtr();
|
||||
SDValue Mask = N->getMask();
|
||||
SDValue Index = N->getIndex();
|
||||
SDValue Data = N->getValue();
|
||||
EVT MemoryVT = N->getMemoryVT();
|
||||
unsigned Alignment = N->getOriginalAlignment();
|
||||
SDLoc DL(N);
|
||||
|
||||
EVT LoMemVT, HiMemVT;
|
||||
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
|
||||
|
||||
SDValue DataLo, DataHi;
|
||||
GetSplitVector(Data, DataLo, DataHi);
|
||||
SDValue MaskLo, MaskHi;
|
||||
GetSplitVector(Mask, MaskLo, MaskHi);
|
||||
|
||||
SDValue PtrLo, PtrHi;
|
||||
if (Ptr.getValueType().isVector()) // gather form vector of pointers
|
||||
std::tie(PtrLo, PtrHi) = DAG.SplitVector(Ptr, DL);
|
||||
else
|
||||
PtrLo = PtrHi = Ptr;
|
||||
|
||||
SDValue IndexHi, IndexLo;
|
||||
if (Index.getNode())
|
||||
std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
|
||||
else
|
||||
IndexLo = IndexHi = Index;
|
||||
|
||||
SDValue Lo, Hi;
|
||||
MachineMemOperand *MMO = DAG.getMachineFunction().
|
||||
getMachineMemOperand(N->getPointerInfo(),
|
||||
MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
|
||||
Alignment, N->getAAInfo(), N->getRanges());
|
||||
|
||||
SDValue OpsLo[] = {Ch, DataLo, MaskLo, PtrLo, IndexLo};
|
||||
Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
|
||||
DL, OpsLo, MMO);
|
||||
|
||||
MMO = DAG.getMachineFunction().
|
||||
getMachineMemOperand(N->getPointerInfo(),
|
||||
MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
|
||||
Alignment, N->getAAInfo(), N->getRanges());
|
||||
|
||||
SDValue OpsHi[] = {Ch, DataHi, MaskHi, PtrHi, IndexHi};
|
||||
Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
|
||||
DL, OpsHi, MMO);
|
||||
|
||||
// Build a factor node to remember that this store is independent of the
|
||||
// other one.
|
||||
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
|
||||
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
|
||||
|
|
|
@ -1389,6 +1389,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
// Custom lower several nodes.
|
||||
for (MVT VT : MVT::vector_valuetypes()) {
|
||||
unsigned EltSize = VT.getVectorElementType().getSizeInBits();
|
||||
if (EltSize >= 32 && VT.getSizeInBits() <= 512) {
|
||||
setOperationAction(ISD::MGATHER, VT, Custom);
|
||||
setOperationAction(ISD::MSCATTER, VT, Custom);
|
||||
}
|
||||
// Extract subvector is special because the value type
|
||||
// (result) is 256/128-bit but the source is 512-bit wide.
|
||||
if (VT.is128BitVector() || VT.is256BitVector()) {
|
||||
|
@ -17360,6 +17364,56 @@ static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget *Subtarget,
|
|||
return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, SinVal, CosVal);
|
||||
}
|
||||
|
||||
static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget *Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
assert(Subtarget->hasAVX512() &&
|
||||
"MGATHER/MSCATTER are supported on AVX-512 arch only");
|
||||
|
||||
MaskedScatterSDNode *N = cast<MaskedScatterSDNode>(Op.getNode());
|
||||
EVT VT = N->getValue().getValueType();
|
||||
assert(VT.getScalarSizeInBits() >= 32 && "Unsupported scatter op");
|
||||
SDLoc dl(Op);
|
||||
|
||||
// X86 scatter kills mask register, so its type should be added to
|
||||
// the list of return values
|
||||
if (N->getNumValues() == 1) {
|
||||
SDValue Index = N->getIndex();
|
||||
if (!Subtarget->hasVLX() && !VT.is512BitVector() &&
|
||||
!Index.getValueType().is512BitVector())
|
||||
Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index);
|
||||
|
||||
SDVTList VTs = DAG.getVTList(N->getMask().getValueType(), MVT::Other);
|
||||
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
|
||||
N->getOperand(3), Index };
|
||||
|
||||
SDValue NewScatter = DAG.getMaskedScatter(VTs, VT, dl, Ops, N->getMemOperand());
|
||||
DAG.ReplaceAllUsesWith(Op, SDValue(NewScatter.getNode(), 1));
|
||||
return SDValue(NewScatter.getNode(), 0);
|
||||
}
|
||||
return Op;
|
||||
}
|
||||
|
||||
static SDValue LowerMGATHER(SDValue Op, const X86Subtarget *Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
assert(Subtarget->hasAVX512() &&
|
||||
"MGATHER/MSCATTER are supported on AVX-512 arch only");
|
||||
|
||||
MaskedGatherSDNode *N = cast<MaskedGatherSDNode>(Op.getNode());
|
||||
EVT VT = Op.getValueType();
|
||||
assert(VT.getScalarSizeInBits() >= 32 && "Unsupported gather op");
|
||||
SDLoc dl(Op);
|
||||
|
||||
SDValue Index = N->getIndex();
|
||||
if (!Subtarget->hasVLX() && !VT.is512BitVector() &&
|
||||
!Index.getValueType().is512BitVector()) {
|
||||
Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index);
|
||||
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
|
||||
N->getOperand(3), Index };
|
||||
DAG.UpdateNodeOperands(N, Ops);
|
||||
}
|
||||
return Op;
|
||||
}
|
||||
|
||||
/// LowerOperation - Provide custom lowering hooks for some operations.
|
||||
///
|
||||
SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
|
@ -17447,6 +17501,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
|||
case ISD::ADD: return LowerADD(Op, DAG);
|
||||
case ISD::SUB: return LowerSUB(Op, DAG);
|
||||
case ISD::FSINCOS: return LowerFSINCOS(Op, Subtarget, DAG);
|
||||
case ISD::MGATHER: return LowerMGATHER(Op, Subtarget, DAG);
|
||||
case ISD::MSCATTER: return LowerMSCATTER(Op, Subtarget, DAG);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,142 @@
|
|||
; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=knl < %s | FileCheck %s -check-prefix=KNL
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
; KNL-LABEL: test1
|
||||
; KNL: kxnorw %k1, %k1, %k1
|
||||
; KNL: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
|
||||
define <16 x float> @test1(float* %base, <16 x i32> %ind) {
|
||||
|
||||
%broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
|
||||
%broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
|
||||
|
||||
%sext_ind = sext <16 x i32> %ind to <16 x i64>
|
||||
%gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
|
||||
|
||||
%res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
|
||||
ret <16 x float>%res
|
||||
}
|
||||
|
||||
declare <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*>, i32, <16 x i1>, <16 x i32>)
|
||||
declare <16 x float> @llvm.masked.gather.v16f32(<16 x float*>, i32, <16 x i1>, <16 x float>)
|
||||
declare <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> , i32, <8 x i1> , <8 x i32> )
|
||||
|
||||
; KNL-LABEL: test2
|
||||
; KNL: kmovw %esi, %k1
|
||||
; KNL: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
|
||||
define <16 x float> @test2(float* %base, <16 x i32> %ind, i16 %mask) {
|
||||
|
||||
%broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
|
||||
%broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
|
||||
|
||||
%sext_ind = sext <16 x i32> %ind to <16 x i64>
|
||||
%gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
|
||||
%imask = bitcast i16 %mask to <16 x i1>
|
||||
%res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> %imask, <16 x float>undef)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
; KNL-LABEL: test3
|
||||
; KNL: kmovw %esi, %k1
|
||||
; KNL: vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k1}
|
||||
define <16 x i32> @test3(i32* %base, <16 x i32> %ind, i16 %mask) {
|
||||
|
||||
%broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
|
||||
%broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
|
||||
|
||||
%sext_ind = sext <16 x i32> %ind to <16 x i64>
|
||||
%gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i64> %sext_ind
|
||||
%imask = bitcast i16 %mask to <16 x i1>
|
||||
%res = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>undef)
|
||||
ret <16 x i32> %res
|
||||
}
|
||||
|
||||
; KNL-LABEL: test4
|
||||
; KNL: kmovw %esi, %k1
|
||||
; KNL: kmovw
|
||||
; KNL: vpgatherdd
|
||||
; KNL: vpgatherdd
|
||||
|
||||
define <16 x i32> @test4(i32* %base, <16 x i32> %ind, i16 %mask) {
|
||||
|
||||
%broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
|
||||
%broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
|
||||
|
||||
%gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
|
||||
%imask = bitcast i16 %mask to <16 x i1>
|
||||
%gt1 = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>undef)
|
||||
%gt2 = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>%gt1)
|
||||
%res = add <16 x i32> %gt1, %gt2
|
||||
ret <16 x i32> %res
|
||||
}
|
||||
|
||||
; KNL-LABEL: test5
|
||||
; KNL: kmovw %k1, %k2
|
||||
; KNL: vpscatterdd {{.*}}%k2
|
||||
; KNL: vpscatterdd {{.*}}%k1
|
||||
|
||||
define void @test5(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) {
|
||||
|
||||
%broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
|
||||
%broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
|
||||
|
||||
%gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
|
||||
%imask = bitcast i16 %mask to <16 x i1>
|
||||
call void @llvm.masked.scatter.v16i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
|
||||
call void @llvm.masked.scatter.v16i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.masked.scatter.v8i32(<8 x i32> , <8 x i32*> , i32 , <8 x i1> )
|
||||
declare void @llvm.masked.scatter.v16i32(<16 x i32> , <16 x i32*> , i32 , <16 x i1> )
|
||||
|
||||
; KNL-LABEL: test6
|
||||
; KNL: kxnorw %k1, %k1, %k1
|
||||
; KNL: kxnorw %k2, %k2, %k2
|
||||
; KNL: vpgatherqd (,%zmm{{.*}}), %ymm{{.*}} {%k2}
|
||||
; KNL: vpscatterqd %ymm{{.*}}, (,%zmm{{.*}}) {%k1}
|
||||
define <8 x i32> @test6(<8 x i32>%a1, <8 x i32*> %ptr) {
|
||||
|
||||
%a = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %ptr, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
|
||||
|
||||
call void @llvm.masked.scatter.v8i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
|
||||
ret <8 x i32>%a
|
||||
}
|
||||
|
||||
; In this case the index should be promoted to <8 x i64> for KNL
|
||||
; KNL-LABEL: test7
|
||||
; KNL: vpmovsxdq %ymm0, %zmm0
|
||||
; KNL: kmovw %k1, %k2
|
||||
; KNL: vpgatherqd {{.*}} {%k2}
|
||||
; KNL: vpgatherqd {{.*}} {%k1}
|
||||
define <8 x i32> @test7(i32* %base, <8 x i32> %ind, i8 %mask) {
|
||||
|
||||
%broadcast.splatinsert = insertelement <8 x i32*> undef, i32* %base, i32 0
|
||||
%broadcast.splat = shufflevector <8 x i32*> %broadcast.splatinsert, <8 x i32*> undef, <8 x i32> zeroinitializer
|
||||
|
||||
%gep.random = getelementptr i32, <8 x i32*> %broadcast.splat, <8 x i32> %ind
|
||||
%imask = bitcast i8 %mask to <8 x i1>
|
||||
%gt1 = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.random, i32 4, <8 x i1> %imask, <8 x i32>undef)
|
||||
%gt2 = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.random, i32 4, <8 x i1> %imask, <8 x i32>%gt1)
|
||||
%res = add <8 x i32> %gt1, %gt2
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
; No uniform base in this case, index <8 x i64> contains addresses,
|
||||
; each gather call will be split into two
|
||||
; KNL-LABEL: test8
|
||||
; KNL: kshiftrw $8, %k1, %k2
|
||||
; KNL: vpgatherqd
|
||||
; KNL: vpgatherqd
|
||||
; KNL: vinserti64x4
|
||||
; KNL: vpgatherqd
|
||||
; KNL: vpgatherqd
|
||||
; KNL: vinserti64x4
|
||||
define <16 x i32> @test8(<16 x i32*> %ptr.random, <16 x i32> %ind, i16 %mask) {
|
||||
%imask = bitcast i16 %mask to <16 x i1>
|
||||
%gt1 = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %ptr.random, i32 4, <16 x i1> %imask, <16 x i32>undef)
|
||||
%gt2 = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %ptr.random, i32 4, <16 x i1> %imask, <16 x i32>%gt1)
|
||||
%res = add <16 x i32> %gt1, %gt2
|
||||
ret <16 x i32> %res
|
||||
}
|
|
@ -7,8 +7,8 @@
|
|||
; AVX512: vmovdqu32 (%rdi), %zmm0 {%k1} {z}
|
||||
|
||||
; AVX2-LABEL: test1
|
||||
; AVX2: vpmaskmovd 32(%rdi)
|
||||
; AVX2: vpmaskmovd (%rdi)
|
||||
; AVX2: vpmaskmovd {{.*}}(%rdi)
|
||||
; AVX2: vpmaskmovd {{.*}}(%rdi)
|
||||
; AVX2-NOT: blend
|
||||
|
||||
; AVX_SCALAR-LABEL: test1
|
||||
|
|
Loading…
Reference in New Issue