forked from OSchip/llvm-project
[X86] Use custom isel for gather/scatter instructions.
The type profile we use for the isel patterns lied about how many operands the gather/scatter node has to skip the index and scale operands. This allowed us to expand the baseptr operand into base, displacement, and segment and then merge the index and scale with them in the final instruction during isel. This is kind of a hack that relies on isel not checking the number of operands at all. This commit switches to custom isel where we can manage this directly without relying on holes in the isel checking.
This commit is contained in:
parent
3a6bb32bd2
commit
f1b8ec3398
|
@ -5394,6 +5394,161 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
|
|||
CurDAG->RemoveDeadNode(Node);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case X86ISD::MGATHER: {
|
||||
auto *Mgt = cast<X86MaskedGatherSDNode>(Node);
|
||||
SDValue IndexOp = Mgt->getIndex();
|
||||
SDValue Mask = Mgt->getMask();
|
||||
MVT IndexVT = IndexOp.getSimpleValueType();
|
||||
MVT ValueVT = Node->getSimpleValueType(0);
|
||||
MVT MaskVT = Mask.getSimpleValueType();
|
||||
|
||||
// This is just to prevent crashes if the nodes are malformed somehow. We're
|
||||
// otherwise only doing loose type checking in here based on type what
|
||||
// a type constraint would say just like table based isel.
|
||||
if (!ValueVT.isVector() || !MaskVT.isVector())
|
||||
break;
|
||||
|
||||
unsigned NumElts = ValueVT.getVectorNumElements();
|
||||
MVT ValueSVT = ValueVT.getVectorElementType();
|
||||
|
||||
bool IsFP = ValueSVT.isFloatingPoint();
|
||||
unsigned EltSize = ValueSVT.getSizeInBits();
|
||||
|
||||
unsigned Opc = 0;
|
||||
bool AVX512Gather = MaskVT.getVectorElementType() == MVT::i1;
|
||||
if (AVX512Gather) {
|
||||
if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 32)
|
||||
Opc = IsFP ? X86::VGATHERDPSZ128rm : X86::VPGATHERDDZ128rm;
|
||||
else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 32)
|
||||
Opc = IsFP ? X86::VGATHERDPSZ256rm : X86::VPGATHERDDZ256rm;
|
||||
else if (IndexVT == MVT::v16i32 && NumElts == 16 && EltSize == 32)
|
||||
Opc = IsFP ? X86::VGATHERDPSZrm : X86::VPGATHERDDZrm;
|
||||
else if (IndexVT == MVT::v4i32 && NumElts == 2 && EltSize == 64)
|
||||
Opc = IsFP ? X86::VGATHERDPDZ128rm : X86::VPGATHERDQZ128rm;
|
||||
else if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 64)
|
||||
Opc = IsFP ? X86::VGATHERDPDZ256rm : X86::VPGATHERDQZ256rm;
|
||||
else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 64)
|
||||
Opc = IsFP ? X86::VGATHERDPDZrm : X86::VPGATHERDQZrm;
|
||||
else if (IndexVT == MVT::v2i64 && NumElts == 4 && EltSize == 32)
|
||||
Opc = IsFP ? X86::VGATHERQPSZ128rm : X86::VPGATHERQDZ128rm;
|
||||
else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 32)
|
||||
Opc = IsFP ? X86::VGATHERQPSZ256rm : X86::VPGATHERQDZ256rm;
|
||||
else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 32)
|
||||
Opc = IsFP ? X86::VGATHERQPSZrm : X86::VPGATHERQDZrm;
|
||||
else if (IndexVT == MVT::v2i64 && NumElts == 2 && EltSize == 64)
|
||||
Opc = IsFP ? X86::VGATHERQPDZ128rm : X86::VPGATHERQQZ128rm;
|
||||
else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 64)
|
||||
Opc = IsFP ? X86::VGATHERQPDZ256rm : X86::VPGATHERQQZ256rm;
|
||||
else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 64)
|
||||
Opc = IsFP ? X86::VGATHERQPDZrm : X86::VPGATHERQQZrm;
|
||||
} else {
|
||||
if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 32)
|
||||
Opc = IsFP ? X86::VGATHERDPSrm : X86::VPGATHERDDrm;
|
||||
else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 32)
|
||||
Opc = IsFP ? X86::VGATHERDPSYrm : X86::VPGATHERDDYrm;
|
||||
else if (IndexVT == MVT::v4i32 && NumElts == 2 && EltSize == 64)
|
||||
Opc = IsFP ? X86::VGATHERDPDrm : X86::VPGATHERDQrm;
|
||||
else if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 64)
|
||||
Opc = IsFP ? X86::VGATHERDPDYrm : X86::VPGATHERDQYrm;
|
||||
else if (IndexVT == MVT::v2i64 && NumElts == 4 && EltSize == 32)
|
||||
Opc = IsFP ? X86::VGATHERQPSrm : X86::VPGATHERQDrm;
|
||||
else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 32)
|
||||
Opc = IsFP ? X86::VGATHERQPSYrm : X86::VPGATHERQDYrm;
|
||||
else if (IndexVT == MVT::v2i64 && NumElts == 2 && EltSize == 64)
|
||||
Opc = IsFP ? X86::VGATHERQPDrm : X86::VPGATHERQQrm;
|
||||
else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 64)
|
||||
Opc = IsFP ? X86::VGATHERQPDYrm : X86::VPGATHERQQYrm;
|
||||
}
|
||||
|
||||
if (!Opc)
|
||||
break;
|
||||
|
||||
SDValue BasePtr = Mgt->getBasePtr();
|
||||
SDValue Base, Scale, Index, Disp, Segment;
|
||||
if (!selectVectorAddr(Node, BasePtr, Base, Scale, Index, Disp, Segment))
|
||||
break;
|
||||
|
||||
SDValue PassThru = Mgt->getPassThru();
|
||||
SDValue Chain = Mgt->getChain();
|
||||
SDVTList VTs = Mgt->getVTList();
|
||||
|
||||
MachineSDNode *NewNode;
|
||||
if (AVX512Gather) {
|
||||
SDValue Ops[] = {PassThru, Mask, Base, Scale,
|
||||
Index, Disp, Segment, Chain};
|
||||
NewNode = CurDAG->getMachineNode(Opc, SDLoc(dl), VTs, Ops);
|
||||
} else {
|
||||
SDValue Ops[] = {PassThru, Base, Scale, Index,
|
||||
Disp, Segment, Mask, Chain};
|
||||
NewNode = CurDAG->getMachineNode(Opc, SDLoc(dl), VTs, Ops);
|
||||
}
|
||||
CurDAG->setNodeMemRefs(NewNode, {Mgt->getMemOperand()});
|
||||
ReplaceNode(Node, NewNode);
|
||||
return;
|
||||
}
|
||||
case X86ISD::MSCATTER: {
|
||||
auto *Sc = cast<X86MaskedScatterSDNode>(Node);
|
||||
SDValue Value = Sc->getValue();
|
||||
SDValue IndexOp = Sc->getIndex();
|
||||
MVT IndexVT = IndexOp.getSimpleValueType();
|
||||
MVT ValueVT = Value.getSimpleValueType();
|
||||
|
||||
// This is just to prevent crashes if the nodes are malformed somehow. We're
|
||||
// otherwise only doing loose type checking in here based on type what
|
||||
// a type constraint would say just like table based isel.
|
||||
if (!ValueVT.isVector())
|
||||
break;
|
||||
|
||||
unsigned NumElts = ValueVT.getVectorNumElements();
|
||||
MVT ValueSVT = ValueVT.getVectorElementType();
|
||||
|
||||
bool IsFP = ValueSVT.isFloatingPoint();
|
||||
unsigned EltSize = ValueSVT.getSizeInBits();
|
||||
|
||||
unsigned Opc;
|
||||
if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 32)
|
||||
Opc = IsFP ? X86::VSCATTERDPSZ128mr : X86::VPSCATTERDDZ128mr;
|
||||
else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 32)
|
||||
Opc = IsFP ? X86::VSCATTERDPSZ256mr : X86::VPSCATTERDDZ256mr;
|
||||
else if (IndexVT == MVT::v16i32 && NumElts == 16 && EltSize == 32)
|
||||
Opc = IsFP ? X86::VSCATTERDPSZmr : X86::VPSCATTERDDZmr;
|
||||
else if (IndexVT == MVT::v4i32 && NumElts == 2 && EltSize == 64)
|
||||
Opc = IsFP ? X86::VSCATTERDPDZ128mr : X86::VPSCATTERDQZ128mr;
|
||||
else if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 64)
|
||||
Opc = IsFP ? X86::VSCATTERDPDZ256mr : X86::VPSCATTERDQZ256mr;
|
||||
else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 64)
|
||||
Opc = IsFP ? X86::VSCATTERDPDZmr : X86::VPSCATTERDQZmr;
|
||||
else if (IndexVT == MVT::v2i64 && NumElts == 4 && EltSize == 32)
|
||||
Opc = IsFP ? X86::VSCATTERQPSZ128mr : X86::VPSCATTERQDZ128mr;
|
||||
else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 32)
|
||||
Opc = IsFP ? X86::VSCATTERQPSZ256mr : X86::VPSCATTERQDZ256mr;
|
||||
else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 32)
|
||||
Opc = IsFP ? X86::VSCATTERQPSZmr : X86::VPSCATTERQDZmr;
|
||||
else if (IndexVT == MVT::v2i64 && NumElts == 2 && EltSize == 64)
|
||||
Opc = IsFP ? X86::VSCATTERQPDZ128mr : X86::VPSCATTERQQZ128mr;
|
||||
else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 64)
|
||||
Opc = IsFP ? X86::VSCATTERQPDZ256mr : X86::VPSCATTERQQZ256mr;
|
||||
else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 64)
|
||||
Opc = IsFP ? X86::VSCATTERQPDZmr : X86::VPSCATTERQQZmr;
|
||||
else
|
||||
break;
|
||||
|
||||
SDValue BasePtr = Sc->getBasePtr();
|
||||
SDValue Base, Scale, Index, Disp, Segment;
|
||||
if (!selectVectorAddr(Node, BasePtr, Base, Scale, Index, Disp, Segment))
|
||||
break;
|
||||
|
||||
SDValue Mask = Sc->getMask();
|
||||
SDValue Chain = Sc->getChain();
|
||||
SDVTList VTs = Sc->getVTList();
|
||||
SDValue Ops[] = {Base, Scale, Index, Disp, Segment, Mask, Value, Chain};
|
||||
|
||||
MachineSDNode *NewNode = CurDAG->getMachineNode(Opc, SDLoc(dl), VTs, Ops);
|
||||
CurDAG->setNodeMemRefs(NewNode, {Sc->getMemOperand()});
|
||||
ReplaceNode(Node, NewNode);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -9726,54 +9726,49 @@ def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))),
|
|||
|
||||
// FIXME: Improve scheduling of gather/scatter instructions.
|
||||
multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
|
||||
X86MemOperand memop, PatFrag GatherNode,
|
||||
RegisterClass MaskRC = _.KRCWM> {
|
||||
X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
|
||||
let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
|
||||
ExeDomain = _.ExeDomain in
|
||||
ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
|
||||
def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
|
||||
(ins _.RC:$src1, MaskRC:$mask, memop:$src2),
|
||||
!strconcat(OpcodeStr#_.Suffix,
|
||||
"\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
|
||||
[(set _.RC:$dst, MaskRC:$mask_wb,
|
||||
(GatherNode (_.VT _.RC:$src1), MaskRC:$mask,
|
||||
vectoraddr:$src2))]>, EVEX, EVEX_K,
|
||||
EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>;
|
||||
[]>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>;
|
||||
}
|
||||
|
||||
multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
|
||||
AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
|
||||
defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512,
|
||||
vy512xmem, mgatherv8i32>, EVEX_V512, VEX_W;
|
||||
vy512xmem>, EVEX_V512, VEX_W;
|
||||
defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512,
|
||||
vz512mem, mgatherv8i64>, EVEX_V512, VEX_W;
|
||||
vz512mem>, EVEX_V512, VEX_W;
|
||||
let Predicates = [HasVLX] in {
|
||||
defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
|
||||
vx256xmem, mgatherv4i32>, EVEX_V256, VEX_W;
|
||||
vx256xmem>, EVEX_V256, VEX_W;
|
||||
defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256,
|
||||
vy256xmem, mgatherv4i64>, EVEX_V256, VEX_W;
|
||||
vy256xmem>, EVEX_V256, VEX_W;
|
||||
defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
|
||||
vx128xmem, mgatherv4i32>, EVEX_V128, VEX_W;
|
||||
vx128xmem>, EVEX_V128, VEX_W;
|
||||
defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
|
||||
vx128xmem, mgatherv2i64>, EVEX_V128, VEX_W;
|
||||
vx128xmem>, EVEX_V128, VEX_W;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
|
||||
AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
|
||||
defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz512mem,
|
||||
mgatherv16i32>, EVEX_V512;
|
||||
defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz256mem,
|
||||
mgatherv8i64>, EVEX_V512;
|
||||
defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz512mem>,
|
||||
EVEX_V512;
|
||||
defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz256mem>,
|
||||
EVEX_V512;
|
||||
let Predicates = [HasVLX] in {
|
||||
defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
|
||||
vy256xmem, mgatherv8i32>, EVEX_V256;
|
||||
vy256xmem>, EVEX_V256;
|
||||
defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128,
|
||||
vy128xmem, mgatherv4i64>, EVEX_V256;
|
||||
vy128xmem>, EVEX_V256;
|
||||
defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
|
||||
vx128xmem, mgatherv4i32>, EVEX_V128;
|
||||
vx128xmem>, EVEX_V128;
|
||||
defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
|
||||
vx64xmem, mgatherv2i64, VK2WM>,
|
||||
EVEX_V128;
|
||||
vx64xmem, VK2WM>, EVEX_V128;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -9785,55 +9780,52 @@ defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q
|
|||
avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
|
||||
|
||||
multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
|
||||
X86MemOperand memop, PatFrag ScatterNode,
|
||||
RegisterClass MaskRC = _.KRCWM> {
|
||||
X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
|
||||
|
||||
let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in
|
||||
let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain,
|
||||
hasSideEffects = 0 in
|
||||
|
||||
def mr : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
|
||||
(ins memop:$dst, MaskRC:$mask, _.RC:$src),
|
||||
!strconcat(OpcodeStr#_.Suffix,
|
||||
"\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
|
||||
[(set MaskRC:$mask_wb, (ScatterNode (_.VT _.RC:$src),
|
||||
MaskRC:$mask, vectoraddr:$dst))]>,
|
||||
EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
|
||||
[]>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
|
||||
Sched<[WriteStore]>;
|
||||
}
|
||||
|
||||
multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
|
||||
AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
|
||||
defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512,
|
||||
vy512xmem, mscatterv8i32>, EVEX_V512, VEX_W;
|
||||
vy512xmem>, EVEX_V512, VEX_W;
|
||||
defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512,
|
||||
vz512mem, mscatterv8i64>, EVEX_V512, VEX_W;
|
||||
vz512mem>, EVEX_V512, VEX_W;
|
||||
let Predicates = [HasVLX] in {
|
||||
defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
|
||||
vx256xmem, mscatterv4i32>, EVEX_V256, VEX_W;
|
||||
vx256xmem>, EVEX_V256, VEX_W;
|
||||
defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256,
|
||||
vy256xmem, mscatterv4i64>, EVEX_V256, VEX_W;
|
||||
vy256xmem>, EVEX_V256, VEX_W;
|
||||
defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
|
||||
vx128xmem, mscatterv4i32>, EVEX_V128, VEX_W;
|
||||
vx128xmem>, EVEX_V128, VEX_W;
|
||||
defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
|
||||
vx128xmem, mscatterv2i64>, EVEX_V128, VEX_W;
|
||||
vx128xmem>, EVEX_V128, VEX_W;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
|
||||
AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
|
||||
defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz512mem,
|
||||
mscatterv16i32>, EVEX_V512;
|
||||
defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz256mem,
|
||||
mscatterv8i64>, EVEX_V512;
|
||||
defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz512mem>,
|
||||
EVEX_V512;
|
||||
defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz256mem>,
|
||||
EVEX_V512;
|
||||
let Predicates = [HasVLX] in {
|
||||
defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
|
||||
vy256xmem, mscatterv8i32>, EVEX_V256;
|
||||
vy256xmem>, EVEX_V256;
|
||||
defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
|
||||
vy128xmem, mscatterv4i64>, EVEX_V256;
|
||||
vy128xmem>, EVEX_V256;
|
||||
defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
|
||||
vx128xmem, mscatterv4i32>, EVEX_V128;
|
||||
vx128xmem>, EVEX_V128;
|
||||
defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
|
||||
vx64xmem, mscatterv2i64, VK2WM>,
|
||||
EVEX_V128;
|
||||
vx64xmem, VK2WM>, EVEX_V128;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -915,89 +915,6 @@ def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>;
|
|||
def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop node:$ptr))>;
|
||||
def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>;
|
||||
|
||||
def X86masked_gather : SDNode<"X86ISD::MGATHER",
|
||||
SDTypeProfile<2, 3, [SDTCisVec<0>,
|
||||
SDTCisVec<1>, SDTCisInt<1>,
|
||||
SDTCisSameAs<0, 2>,
|
||||
SDTCisSameAs<1, 3>,
|
||||
SDTCisPtrTy<4>]>,
|
||||
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
|
||||
|
||||
def X86masked_scatter : SDNode<"X86ISD::MSCATTER",
|
||||
SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>,
|
||||
SDTCisSameAs<0, 2>,
|
||||
SDTCVecEltisVT<0, i1>,
|
||||
SDTCisPtrTy<3>]>,
|
||||
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
|
||||
|
||||
def mgatherv4i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
|
||||
(X86masked_gather node:$src1, node:$src2, node:$src3) , [{
|
||||
X86MaskedGatherSDNode *Mgt = cast<X86MaskedGatherSDNode>(N);
|
||||
return Mgt->getIndex().getValueType() == MVT::v4i32;
|
||||
}]>;
|
||||
|
||||
def mgatherv8i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
|
||||
(X86masked_gather node:$src1, node:$src2, node:$src3) , [{
|
||||
X86MaskedGatherSDNode *Mgt = cast<X86MaskedGatherSDNode>(N);
|
||||
return Mgt->getIndex().getValueType() == MVT::v8i32;
|
||||
}]>;
|
||||
|
||||
def mgatherv2i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
|
||||
(X86masked_gather node:$src1, node:$src2, node:$src3) , [{
|
||||
X86MaskedGatherSDNode *Mgt = cast<X86MaskedGatherSDNode>(N);
|
||||
return Mgt->getIndex().getValueType() == MVT::v2i64;
|
||||
}]>;
|
||||
def mgatherv4i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
|
||||
(X86masked_gather node:$src1, node:$src2, node:$src3) , [{
|
||||
X86MaskedGatherSDNode *Mgt = cast<X86MaskedGatherSDNode>(N);
|
||||
return Mgt->getIndex().getValueType() == MVT::v4i64;
|
||||
}]>;
|
||||
def mgatherv8i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
|
||||
(X86masked_gather node:$src1, node:$src2, node:$src3) , [{
|
||||
X86MaskedGatherSDNode *Mgt = cast<X86MaskedGatherSDNode>(N);
|
||||
return Mgt->getIndex().getValueType() == MVT::v8i64;
|
||||
}]>;
|
||||
def mgatherv16i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
|
||||
(X86masked_gather node:$src1, node:$src2, node:$src3) , [{
|
||||
X86MaskedGatherSDNode *Mgt = cast<X86MaskedGatherSDNode>(N);
|
||||
return Mgt->getIndex().getValueType() == MVT::v16i32;
|
||||
}]>;
|
||||
|
||||
def mscatterv2i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
|
||||
(X86masked_scatter node:$src1, node:$src2, node:$src3) , [{
|
||||
X86MaskedScatterSDNode *Sc = cast<X86MaskedScatterSDNode>(N);
|
||||
return Sc->getIndex().getValueType() == MVT::v2i64;
|
||||
}]>;
|
||||
|
||||
def mscatterv4i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
|
||||
(X86masked_scatter node:$src1, node:$src2, node:$src3) , [{
|
||||
X86MaskedScatterSDNode *Sc = cast<X86MaskedScatterSDNode>(N);
|
||||
return Sc->getIndex().getValueType() == MVT::v4i32;
|
||||
}]>;
|
||||
|
||||
def mscatterv4i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
|
||||
(X86masked_scatter node:$src1, node:$src2, node:$src3) , [{
|
||||
X86MaskedScatterSDNode *Sc = cast<X86MaskedScatterSDNode>(N);
|
||||
return Sc->getIndex().getValueType() == MVT::v4i64;
|
||||
}]>;
|
||||
|
||||
def mscatterv8i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
|
||||
(X86masked_scatter node:$src1, node:$src2, node:$src3) , [{
|
||||
X86MaskedScatterSDNode *Sc = cast<X86MaskedScatterSDNode>(N);
|
||||
return Sc->getIndex().getValueType() == MVT::v8i32;
|
||||
}]>;
|
||||
|
||||
def mscatterv8i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
|
||||
(X86masked_scatter node:$src1, node:$src2, node:$src3) , [{
|
||||
X86MaskedScatterSDNode *Sc = cast<X86MaskedScatterSDNode>(N);
|
||||
return Sc->getIndex().getValueType() == MVT::v8i64;
|
||||
}]>;
|
||||
def mscatterv16i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
|
||||
(X86masked_scatter node:$src1, node:$src2, node:$src3) , [{
|
||||
X86MaskedScatterSDNode *Sc = cast<X86MaskedScatterSDNode>(N);
|
||||
return Sc->getIndex().getValueType() == MVT::v16i32;
|
||||
}]>;
|
||||
|
||||
// 128-bit bitconvert pattern fragments
|
||||
def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>;
|
||||
def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>;
|
||||
|
|
|
@ -7933,57 +7933,48 @@ let Predicates = [HasAVX2, NoVLX] in {
|
|||
|
||||
// FIXME: Improve scheduling of gather instructions.
|
||||
multiclass avx2_gather<bits<8> opc, string OpcodeStr, ValueType VTx,
|
||||
ValueType VTy, PatFrag GatherNode128,
|
||||
PatFrag GatherNode256, RegisterClass RC256,
|
||||
ValueType VTy, RegisterClass RC256,
|
||||
X86MemOperand memop128, X86MemOperand memop256,
|
||||
ValueType MTx = VTx, ValueType MTy = VTy> {
|
||||
let mayLoad = 1, hasSideEffects = 0 in {
|
||||
def rm : AVX28I<opc, MRMSrcMem4VOp3, (outs VR128:$dst, VR128:$mask_wb),
|
||||
(ins VR128:$src1, memop128:$src2, VR128:$mask),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$mask, $src2, $dst|$dst, $src2, $mask}"),
|
||||
[(set (VTx VR128:$dst), (MTx VR128:$mask_wb),
|
||||
(GatherNode128 VR128:$src1, VR128:$mask,
|
||||
vectoraddr:$src2))]>,
|
||||
VEX, Sched<[WriteLoad]>;
|
||||
[]>, VEX, Sched<[WriteLoad]>;
|
||||
def Yrm : AVX28I<opc, MRMSrcMem4VOp3, (outs RC256:$dst, RC256:$mask_wb),
|
||||
(ins RC256:$src1, memop256:$src2, RC256:$mask),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$mask, $src2, $dst|$dst, $src2, $mask}"),
|
||||
[(set (VTy RC256:$dst), (MTy RC256:$mask_wb),
|
||||
(GatherNode256 RC256:$src1, RC256:$mask,
|
||||
vectoraddr:$src2))]>,
|
||||
VEX, VEX_L, Sched<[WriteLoad]>;
|
||||
[]>, VEX, VEX_L, Sched<[WriteLoad]>;
|
||||
}
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX2] in {
|
||||
let mayLoad = 1, hasSideEffects = 0, Constraints
|
||||
= "@earlyclobber $dst,@earlyclobber $mask_wb, $src1 = $dst, $mask = $mask_wb"
|
||||
in {
|
||||
defm VPGATHERDQ : avx2_gather<0x90, "vpgatherdq", v2i64, v4i64, mgatherv4i32,
|
||||
mgatherv4i32, VR256, vx128mem, vx256mem>, VEX_W;
|
||||
defm VPGATHERQQ : avx2_gather<0x91, "vpgatherqq", v2i64, v4i64, mgatherv2i64,
|
||||
mgatherv4i64, VR256, vx128mem, vy256mem>, VEX_W;
|
||||
defm VPGATHERDD : avx2_gather<0x90, "vpgatherdd", v4i32, v8i32, mgatherv4i32,
|
||||
mgatherv8i32, VR256, vx128mem, vy256mem>;
|
||||
defm VPGATHERQD : avx2_gather<0x91, "vpgatherqd", v4i32, v4i32, mgatherv2i64,
|
||||
mgatherv4i64, VR128, vx64mem, vy128mem>;
|
||||
defm VPGATHERDQ : avx2_gather<0x90, "vpgatherdq", v2i64, v4i64,
|
||||
VR256, vx128mem, vx256mem>, VEX_W;
|
||||
defm VPGATHERQQ : avx2_gather<0x91, "vpgatherqq", v2i64, v4i64,
|
||||
VR256, vx128mem, vy256mem>, VEX_W;
|
||||
defm VPGATHERDD : avx2_gather<0x90, "vpgatherdd", v4i32, v8i32,
|
||||
VR256, vx128mem, vy256mem>;
|
||||
defm VPGATHERQD : avx2_gather<0x91, "vpgatherqd", v4i32, v4i32,
|
||||
VR128, vx64mem, vy128mem>;
|
||||
|
||||
let ExeDomain = SSEPackedDouble in {
|
||||
defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", v2f64, v4f64, mgatherv4i32,
|
||||
mgatherv4i32, VR256, vx128mem, vx256mem,
|
||||
v2i64, v4i64>, VEX_W;
|
||||
defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", v2f64, v4f64, mgatherv2i64,
|
||||
mgatherv4i64, VR256, vx128mem, vy256mem,
|
||||
v2i64, v4i64>, VEX_W;
|
||||
defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", v2f64, v4f64,
|
||||
VR256, vx128mem, vx256mem, v2i64, v4i64>, VEX_W;
|
||||
defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", v2f64, v4f64,
|
||||
VR256, vx128mem, vy256mem, v2i64, v4i64>, VEX_W;
|
||||
}
|
||||
|
||||
let ExeDomain = SSEPackedSingle in {
|
||||
defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", v4f32, v8f32, mgatherv4i32,
|
||||
mgatherv8i32, VR256, vx128mem, vy256mem,
|
||||
v4i32, v8i32>;
|
||||
defm VGATHERQPS : avx2_gather<0x93, "vgatherqps", v4f32, v4f32, mgatherv2i64,
|
||||
mgatherv4i64, VR128, vx64mem, vy128mem,
|
||||
v4i32, v4i32>;
|
||||
defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", v4f32, v8f32,
|
||||
VR256, vx128mem, vy256mem, v4i32, v8i32>;
|
||||
defm VGATHERQPS : avx2_gather<0x93, "vgatherqps", v4f32, v4f32,
|
||||
VR128, vx64mem, vy128mem, v4i32, v4i32>;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue