forked from OSchip/llvm-project
Type legalizer for masked gather and scatter intrinsics.
Full type legalizer that works with all vectors length - from 2 to 16, (i32, i64, float, double). This intrinsic, for example void @llvm.masked.scatter.v2f32(<2 x float>%data , <2 x float*>%ptrs , i32 align , <2 x i1>%mask ) requires type widening for data and type promotion for mask. Differential Revision: http://reviews.llvm.org/D13633 llvm-svn: 255629
This commit is contained in:
parent
b076446368
commit
6015f5c823
|
@ -2122,12 +2122,13 @@ public:
|
|||
: MaskedGatherScatterSDNode(ISD::MGATHER, Order, dl, Operands, VTs, MemVT,
|
||||
MMO) {
|
||||
assert(getValue().getValueType() == getValueType(0) &&
|
||||
"Incompatible type of the PathThru value in MaskedGatherSDNode");
|
||||
"Incompatible type of the PassThru value in MaskedGatherSDNode");
|
||||
assert(getMask().getValueType().getVectorNumElements() ==
|
||||
getValueType(0).getVectorNumElements() &&
|
||||
"Vector width mismatch between mask and data");
|
||||
assert(getMask().getValueType().getScalarType() == MVT::i1 &&
|
||||
getValueType(0).getVectorNumElements() &&
|
||||
"Vector width mismatch between mask and data");
|
||||
assert(getIndex().getValueType().getVectorNumElements() ==
|
||||
getValueType(0).getVectorNumElements() &&
|
||||
"Vector width mismatch between index and data");
|
||||
}
|
||||
|
||||
static bool classof(const SDNode *N) {
|
||||
|
@ -2143,13 +2144,14 @@ public:
|
|||
friend class SelectionDAG;
|
||||
MaskedScatterSDNode(unsigned Order, DebugLoc dl,ArrayRef<SDValue> Operands,
|
||||
SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
|
||||
: MaskedGatherScatterSDNode(ISD::MSCATTER, Order, dl, Operands, VTs,
|
||||
MemVT, MMO) {
|
||||
: MaskedGatherScatterSDNode(ISD::MSCATTER, Order, dl, Operands, VTs, MemVT,
|
||||
MMO) {
|
||||
assert(getMask().getValueType().getVectorNumElements() ==
|
||||
getValue().getValueType().getVectorNumElements() &&
|
||||
"Vector width mismatch between mask and data");
|
||||
assert(getMask().getValueType().getScalarType() == MVT::i1 &&
|
||||
getValue().getValueType().getVectorNumElements() &&
|
||||
"Vector width mismatch between mask and data");
|
||||
assert(getIndex().getValueType().getVectorNumElements() ==
|
||||
getValue().getValueType().getVectorNumElements() &&
|
||||
"Vector width mismatch between index and data");
|
||||
}
|
||||
|
||||
static bool classof(const SDNode *N) {
|
||||
|
|
|
@ -66,8 +66,11 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
|
|||
case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break;
|
||||
case ISD::EXTRACT_VECTOR_ELT:
|
||||
Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break;
|
||||
case ISD::LOAD: Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N));break;
|
||||
case ISD::MLOAD: Res = PromoteIntRes_MLOAD(cast<MaskedLoadSDNode>(N));break;
|
||||
case ISD::LOAD: Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N)); break;
|
||||
case ISD::MLOAD: Res = PromoteIntRes_MLOAD(cast<MaskedLoadSDNode>(N));
|
||||
break;
|
||||
case ISD::MGATHER: Res = PromoteIntRes_MGATHER(cast<MaskedGatherSDNode>(N));
|
||||
break;
|
||||
case ISD::SELECT: Res = PromoteIntRes_SELECT(N); break;
|
||||
case ISD::VSELECT: Res = PromoteIntRes_VSELECT(N); break;
|
||||
case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break;
|
||||
|
@ -181,7 +184,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic0(AtomicSDNode *N) {
|
|||
N->getChain(), N->getBasePtr(),
|
||||
N->getMemOperand(), N->getOrdering(),
|
||||
N->getSynchScope());
|
||||
// Legalized the chain result - switch anything that used the old chain to
|
||||
// Legalize the chain result - switch anything that used the old chain to
|
||||
// use the new one.
|
||||
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
|
||||
return Res;
|
||||
|
@ -194,7 +197,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) {
|
|||
N->getChain(), N->getBasePtr(),
|
||||
Op2, N->getMemOperand(), N->getOrdering(),
|
||||
N->getSynchScope());
|
||||
// Legalized the chain result - switch anything that used the old chain to
|
||||
// Legalize the chain result - switch anything that used the old chain to
|
||||
// use the new one.
|
||||
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
|
||||
return Res;
|
||||
|
@ -479,7 +482,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
|
|||
SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(),
|
||||
N->getMemoryVT(), N->getMemOperand());
|
||||
|
||||
// Legalized the chain result - switch anything that used the old chain to
|
||||
// Legalize the chain result - switch anything that used the old chain to
|
||||
// use the new one.
|
||||
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
|
||||
return Res;
|
||||
|
@ -489,20 +492,34 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) {
|
|||
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
|
||||
SDValue ExtSrc0 = GetPromotedInteger(N->getSrc0());
|
||||
|
||||
SDValue Mask = N->getMask();
|
||||
EVT NewMaskVT = getSetCCResultType(NVT);
|
||||
if (NewMaskVT != N->getMask().getValueType())
|
||||
Mask = PromoteTargetBoolean(Mask, NewMaskVT);
|
||||
SDLoc dl(N);
|
||||
|
||||
SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(),
|
||||
Mask, ExtSrc0, N->getMemoryVT(),
|
||||
N->getMask(), ExtSrc0, N->getMemoryVT(),
|
||||
N->getMemOperand(), ISD::SEXTLOAD);
|
||||
// Legalized the chain result - switch anything that used the old chain to
|
||||
// Legalize the chain result - switch anything that used the old chain to
|
||||
// use the new one.
|
||||
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
|
||||
return Res;
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) {
|
||||
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
|
||||
SDValue ExtSrc0 = GetPromotedInteger(N->getValue());
|
||||
assert(NVT == ExtSrc0.getValueType() &&
|
||||
"Gather result type and the passThru agrument type should be the same");
|
||||
|
||||
SDLoc dl(N);
|
||||
SDValue Ops[] = {N->getChain(), ExtSrc0, N->getMask(), N->getBasePtr(),
|
||||
N->getIndex()};
|
||||
SDValue Res = DAG.getMaskedGather(DAG.getVTList(NVT, MVT::Other),
|
||||
N->getMemoryVT(), dl, Ops,
|
||||
N->getMemOperand());
|
||||
// Legalize the chain result - switch anything that used the old chain to
|
||||
// use the new one.
|
||||
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
|
||||
return Res;
|
||||
}
|
||||
|
||||
/// Promote the overflow flag of an overflowing arithmetic node.
|
||||
SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
|
||||
// Simply change the return type of the boolean result.
|
||||
|
@ -889,6 +906,10 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
|
|||
OpNo); break;
|
||||
case ISD::MLOAD: Res = PromoteIntOp_MLOAD(cast<MaskedLoadSDNode>(N),
|
||||
OpNo); break;
|
||||
case ISD::MGATHER: Res = PromoteIntOp_MGATHER(cast<MaskedGatherSDNode>(N),
|
||||
OpNo); break;
|
||||
case ISD::MSCATTER: Res = PromoteIntOp_MSCATTER(cast<MaskedScatterSDNode>(N),
|
||||
OpNo); break;
|
||||
case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break;
|
||||
case ISD::FP16_TO_FP:
|
||||
case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break;
|
||||
|
@ -1157,56 +1178,49 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
|
|||
N->getMemoryVT(), N->getMemOperand());
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo){
|
||||
SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N,
|
||||
unsigned OpNo) {
|
||||
|
||||
SDValue DataOp = N->getValue();
|
||||
EVT DataVT = DataOp.getValueType();
|
||||
SDValue Mask = N->getMask();
|
||||
EVT MaskVT = Mask.getValueType();
|
||||
SDLoc dl(N);
|
||||
|
||||
bool TruncateStore = false;
|
||||
if (!TLI.isTypeLegal(DataVT)) {
|
||||
if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger) {
|
||||
DataOp = GetPromotedInteger(DataOp);
|
||||
if (!TLI.isTypeLegal(MaskVT))
|
||||
Mask = PromoteTargetBoolean(Mask, DataOp.getValueType());
|
||||
TruncateStore = true;
|
||||
}
|
||||
if (OpNo == 2) {
|
||||
// Mask comes before the data operand. If the data operand is legal, we just
|
||||
// promote the mask.
|
||||
// When the data operand has illegal type, we should legalize the data
|
||||
// operand first. The mask will be promoted/splitted/widened according to
|
||||
// the data operand type.
|
||||
if (TLI.isTypeLegal(DataVT))
|
||||
Mask = PromoteTargetBoolean(Mask, DataVT);
|
||||
else {
|
||||
assert(getTypeAction(DataVT) == TargetLowering::TypeWidenVector &&
|
||||
"Unexpected data legalization in MSTORE");
|
||||
DataOp = GetWidenedVector(DataOp);
|
||||
if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger)
|
||||
return PromoteIntOp_MSTORE(N, 3);
|
||||
|
||||
else if (getTypeAction(DataVT) == TargetLowering::TypeWidenVector)
|
||||
return WidenVecOp_MSTORE(N, 3);
|
||||
|
||||
if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
|
||||
Mask = GetWidenedVector(Mask);
|
||||
else {
|
||||
EVT BoolVT = getSetCCResultType(DataOp.getValueType());
|
||||
|
||||
// We can't use ModifyToType() because we should fill the mask with
|
||||
// zeroes
|
||||
unsigned WidenNumElts = BoolVT.getVectorNumElements();
|
||||
unsigned MaskNumElts = MaskVT.getVectorNumElements();
|
||||
|
||||
unsigned NumConcat = WidenNumElts / MaskNumElts;
|
||||
SmallVector<SDValue, 16> Ops(NumConcat);
|
||||
SDValue ZeroVal = DAG.getConstant(0, dl, MaskVT);
|
||||
Ops[0] = Mask;
|
||||
for (unsigned i = 1; i != NumConcat; ++i)
|
||||
Ops[i] = ZeroVal;
|
||||
|
||||
Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops);
|
||||
assert (getTypeAction(DataVT) == TargetLowering::TypeSplitVector);
|
||||
return SplitVecOp_MSTORE(N, 3);
|
||||
}
|
||||
}
|
||||
} else { // Data operand
|
||||
assert(OpNo == 3 && "Unexpected operand for promotion");
|
||||
DataOp = GetPromotedInteger(DataOp);
|
||||
Mask = PromoteTargetBoolean(Mask, DataOp.getValueType());
|
||||
TruncateStore = true;
|
||||
}
|
||||
else
|
||||
Mask = PromoteTargetBoolean(N->getMask(), DataOp.getValueType());
|
||||
|
||||
return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(), Mask,
|
||||
N->getMemoryVT(), N->getMemOperand(),
|
||||
TruncateStore);
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo){
|
||||
SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N,
|
||||
unsigned OpNo) {
|
||||
assert(OpNo == 2 && "Only know how to promote the mask!");
|
||||
EVT DataVT = N->getValueType(0);
|
||||
SDValue Mask = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
|
||||
|
@ -1215,6 +1229,31 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo)
|
|||
return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N,
|
||||
unsigned OpNo) {
|
||||
|
||||
SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end());
|
||||
if (OpNo == 2) {
|
||||
// The Mask
|
||||
EVT DataVT = N->getValueType(0);
|
||||
NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
|
||||
} else
|
||||
NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));
|
||||
return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N,
|
||||
unsigned OpNo) {
|
||||
SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end());
|
||||
if (OpNo == 2) {
|
||||
// The Mask
|
||||
EVT DataVT = N->getValue().getValueType();
|
||||
NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
|
||||
} else
|
||||
NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));
|
||||
return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) {
|
||||
SDValue Op = GetPromotedInteger(N->getOperand(0));
|
||||
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), Op);
|
||||
|
@ -2071,7 +2110,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
|
|||
}
|
||||
}
|
||||
|
||||
// Legalized the chain result - switch anything that used the old chain to
|
||||
// Legalize the chain result - switch anything that used the old chain to
|
||||
// use the new one.
|
||||
ReplaceValueWith(SDValue(N, 1), Ch);
|
||||
}
|
||||
|
|
|
@ -1127,6 +1127,23 @@ SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT ValVT) {
|
|||
return DAG.getNode(ExtendCode, dl, BoolVT, Bool);
|
||||
}
|
||||
|
||||
/// WidenTargetBoolean - Widen the given target boolean to a target boolean
|
||||
/// of the given type. The boolean vector is widened and then promoted to match
|
||||
/// the target boolean type of the given ValVT.
|
||||
SDValue DAGTypeLegalizer::WidenTargetBoolean(SDValue Bool, EVT ValVT,
|
||||
bool WithZeroes) {
|
||||
SDLoc dl(Bool);
|
||||
EVT BoolVT = Bool.getValueType();
|
||||
|
||||
assert(ValVT.getVectorNumElements() > BoolVT.getVectorNumElements() &&
|
||||
TLI.isTypeLegal(ValVT) &&
|
||||
"Unexpected types in WidenTargetBoolean");
|
||||
EVT WideVT = EVT::getVectorVT(*DAG.getContext(), BoolVT.getScalarType(),
|
||||
ValVT.getVectorNumElements());
|
||||
Bool = ModifyToType(Bool, WideVT, WithZeroes);
|
||||
return PromoteTargetBoolean(Bool, ValVT);
|
||||
}
|
||||
|
||||
/// SplitInteger - Return the lower LoVT bits of Op in Lo and the upper HiVT
|
||||
/// bits in Hi.
|
||||
void DAGTypeLegalizer::SplitInteger(SDValue Op,
|
||||
|
|
|
@ -187,6 +187,11 @@ private:
|
|||
std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
|
||||
|
||||
SDValue PromoteTargetBoolean(SDValue Bool, EVT ValVT);
|
||||
|
||||
/// Modify Bit Vector to match SetCC result type of ValVT.
|
||||
/// The bit vector is widened with zeroes when WithZeroes is true.
|
||||
SDValue WidenTargetBoolean(SDValue Bool, EVT ValVT, bool WithZeroes = false);
|
||||
|
||||
void ReplaceValueWith(SDValue From, SDValue To);
|
||||
void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
|
||||
void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT,
|
||||
|
@ -261,6 +266,7 @@ private:
|
|||
SDValue PromoteIntRes_INT_EXTEND(SDNode *N);
|
||||
SDValue PromoteIntRes_LOAD(LoadSDNode *N);
|
||||
SDValue PromoteIntRes_MLOAD(MaskedLoadSDNode *N);
|
||||
SDValue PromoteIntRes_MGATHER(MaskedGatherSDNode *N);
|
||||
SDValue PromoteIntRes_Overflow(SDNode *N);
|
||||
SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo);
|
||||
SDValue PromoteIntRes_SDIV(SDNode *N);
|
||||
|
@ -307,6 +313,8 @@ private:
|
|||
SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N);
|
||||
SDValue PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
|
||||
SDValue PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo);
|
||||
SDValue PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo);
|
||||
SDValue PromoteIntOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo);
|
||||
|
||||
void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);
|
||||
|
||||
|
@ -710,6 +718,7 @@ private:
|
|||
SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
|
||||
SDValue WidenVecRes_LOAD(SDNode* N);
|
||||
SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N);
|
||||
SDValue WidenVecRes_MGATHER(MaskedGatherSDNode* N);
|
||||
SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N);
|
||||
SDValue WidenVecRes_SELECT(SDNode* N);
|
||||
SDValue WidenVecRes_SELECT_CC(SDNode* N);
|
||||
|
@ -737,6 +746,7 @@ private:
|
|||
SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
|
||||
SDValue WidenVecOp_STORE(SDNode* N);
|
||||
SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo);
|
||||
SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo);
|
||||
SDValue WidenVecOp_SETCC(SDNode* N);
|
||||
|
||||
SDValue WidenVecOp_Convert(SDNode *N);
|
||||
|
@ -776,8 +786,10 @@ private:
|
|||
|
||||
/// Modifies a vector input (widen or narrows) to a vector of NVT. The
|
||||
/// input vector must have the same element type as NVT.
|
||||
SDValue ModifyToType(SDValue InOp, EVT WidenVT);
|
||||
|
||||
/// When FillWithZeroes is "on" the vector will be widened with
|
||||
/// zeroes.
|
||||
/// By default, the vector will be widened with undefined values.
|
||||
SDValue ModifyToType(SDValue InOp, EVT NVT, bool FillWithZeroes = false);
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Generic Splitting: LegalizeTypesGeneric.cpp
|
||||
|
|
|
@ -235,7 +235,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
|
|||
N->isInvariant(), N->getOriginalAlignment(),
|
||||
N->getAAInfo());
|
||||
|
||||
// Legalized the chain result - switch anything that used the old chain to
|
||||
// Legalize the chain result - switch anything that used the old chain to
|
||||
// use the new one.
|
||||
ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
|
||||
return Result;
|
||||
|
@ -1020,7 +1020,7 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
|
|||
Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
|
||||
Hi.getValue(1));
|
||||
|
||||
// Legalized the chain result - switch anything that used the old chain to
|
||||
// Legalize the chain result - switch anything that used the old chain to
|
||||
// use the new one.
|
||||
ReplaceValueWith(SDValue(LD, 1), Ch);
|
||||
}
|
||||
|
@ -1034,6 +1034,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
|
|||
SDValue Ch = MLD->getChain();
|
||||
SDValue Ptr = MLD->getBasePtr();
|
||||
SDValue Mask = MLD->getMask();
|
||||
SDValue Src0 = MLD->getSrc0();
|
||||
unsigned Alignment = MLD->getOriginalAlignment();
|
||||
ISD::LoadExtType ExtType = MLD->getExtensionType();
|
||||
|
||||
|
@ -1043,16 +1044,22 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
|
|||
(Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
|
||||
Alignment/2 : Alignment;
|
||||
|
||||
// Split Mask operand
|
||||
SDValue MaskLo, MaskHi;
|
||||
std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
|
||||
if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
|
||||
GetSplitVector(Mask, MaskLo, MaskHi);
|
||||
else
|
||||
std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
|
||||
|
||||
EVT MemoryVT = MLD->getMemoryVT();
|
||||
EVT LoMemVT, HiMemVT;
|
||||
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
|
||||
|
||||
SDValue Src0 = MLD->getSrc0();
|
||||
SDValue Src0Lo, Src0Hi;
|
||||
std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl);
|
||||
if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector)
|
||||
GetSplitVector(Src0, Src0Lo, Src0Hi);
|
||||
else
|
||||
std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl);
|
||||
|
||||
MachineMemOperand *MMO = DAG.getMachineFunction().
|
||||
getMachineMemOperand(MLD->getPointerInfo(),
|
||||
|
@ -1080,7 +1087,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
|
|||
Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
|
||||
Hi.getValue(1));
|
||||
|
||||
// Legalized the chain result - switch anything that used the old chain to
|
||||
// Legalize the chain result - switch anything that used the old chain to
|
||||
// use the new one.
|
||||
ReplaceValueWith(SDValue(MLD, 1), Ch);
|
||||
|
||||
|
@ -1095,20 +1102,33 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
|
|||
SDValue Ch = MGT->getChain();
|
||||
SDValue Ptr = MGT->getBasePtr();
|
||||
SDValue Mask = MGT->getMask();
|
||||
SDValue Src0 = MGT->getValue();
|
||||
SDValue Index = MGT->getIndex();
|
||||
unsigned Alignment = MGT->getOriginalAlignment();
|
||||
|
||||
// Split Mask operand
|
||||
SDValue MaskLo, MaskHi;
|
||||
std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
|
||||
if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
|
||||
GetSplitVector(Mask, MaskLo, MaskHi);
|
||||
else
|
||||
std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
|
||||
|
||||
EVT MemoryVT = MGT->getMemoryVT();
|
||||
EVT LoMemVT, HiMemVT;
|
||||
// Split MemoryVT
|
||||
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
|
||||
|
||||
SDValue Src0Lo, Src0Hi;
|
||||
std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(MGT->getValue(), dl);
|
||||
if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector)
|
||||
GetSplitVector(Src0, Src0Lo, Src0Hi);
|
||||
else
|
||||
std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl);
|
||||
|
||||
SDValue IndexHi, IndexLo;
|
||||
std::tie(IndexLo, IndexHi) = DAG.SplitVector(MGT->getIndex(), dl);
|
||||
if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
|
||||
GetSplitVector(Index, IndexLo, IndexHi);
|
||||
else
|
||||
std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl);
|
||||
|
||||
MachineMemOperand *MMO = DAG.getMachineFunction().
|
||||
getMachineMemOperand(MGT->getPointerInfo(),
|
||||
|
@ -1128,7 +1148,7 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
|
|||
Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
|
||||
Hi.getValue(1));
|
||||
|
||||
// Legalized the chain result - switch anything that used the old chain to
|
||||
// Legalize the chain result - switch anything that used the old chain to
|
||||
// use the new one.
|
||||
ReplaceValueWith(SDValue(MGT, 1), Ch);
|
||||
}
|
||||
|
@ -1599,23 +1619,31 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
|
|||
SDValue Ptr = MGT->getBasePtr();
|
||||
SDValue Index = MGT->getIndex();
|
||||
SDValue Mask = MGT->getMask();
|
||||
SDValue Src0 = MGT->getValue();
|
||||
unsigned Alignment = MGT->getOriginalAlignment();
|
||||
|
||||
SDValue MaskLo, MaskHi;
|
||||
std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
|
||||
if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
|
||||
// Split Mask operand
|
||||
GetSplitVector(Mask, MaskLo, MaskHi);
|
||||
else
|
||||
std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
|
||||
|
||||
EVT MemoryVT = MGT->getMemoryVT();
|
||||
EVT LoMemVT, HiMemVT;
|
||||
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
|
||||
|
||||
SDValue Src0Lo, Src0Hi;
|
||||
std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(MGT->getValue(), dl);
|
||||
if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector)
|
||||
GetSplitVector(Src0, Src0Lo, Src0Hi);
|
||||
else
|
||||
std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl);
|
||||
|
||||
SDValue IndexHi, IndexLo;
|
||||
if (Index.getNode())
|
||||
std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl);
|
||||
if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
|
||||
GetSplitVector(Index, IndexLo, IndexHi);
|
||||
else
|
||||
IndexLo = IndexHi = Index;
|
||||
std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl);
|
||||
|
||||
MachineMemOperand *MMO = DAG.getMachineFunction().
|
||||
getMachineMemOperand(MGT->getPointerInfo(),
|
||||
|
@ -1641,7 +1669,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
|
|||
Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
|
||||
Hi.getValue(1));
|
||||
|
||||
// Legalized the chain result - switch anything that used the old chain to
|
||||
// Legalize the chain result - switch anything that used the old chain to
|
||||
// use the new one.
|
||||
ReplaceValueWith(SDValue(MGT, 1), Ch);
|
||||
|
||||
|
@ -1665,9 +1693,21 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
|
|||
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
|
||||
|
||||
SDValue DataLo, DataHi;
|
||||
GetSplitVector(Data, DataLo, DataHi);
|
||||
if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector)
|
||||
// Split Data operand
|
||||
GetSplitVector(Data, DataLo, DataHi);
|
||||
else
|
||||
std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
|
||||
|
||||
SDValue MaskLo, MaskHi;
|
||||
GetSplitVector(Mask, MaskLo, MaskHi);
|
||||
if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
|
||||
// Split Mask operand
|
||||
GetSplitVector(Mask, MaskLo, MaskHi);
|
||||
else
|
||||
std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
|
||||
|
||||
MaskLo = PromoteTargetBoolean(MaskLo, DataLo.getValueType());
|
||||
MaskHi = PromoteTargetBoolean(MaskHi, DataHi.getValueType());
|
||||
|
||||
// if Alignment is equal to the vector size,
|
||||
// take the half of it for the second part
|
||||
|
@ -1712,25 +1752,29 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
|
|||
unsigned Alignment = N->getOriginalAlignment();
|
||||
SDLoc DL(N);
|
||||
|
||||
// Split all operands
|
||||
EVT LoMemVT, HiMemVT;
|
||||
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
|
||||
|
||||
SDValue DataLo, DataHi;
|
||||
GetSplitVector(Data, DataLo, DataHi);
|
||||
SDValue MaskLo, MaskHi;
|
||||
GetSplitVector(Mask, MaskLo, MaskHi);
|
||||
|
||||
SDValue PtrLo, PtrHi;
|
||||
if (Ptr.getValueType().isVector()) // gather form vector of pointers
|
||||
std::tie(PtrLo, PtrHi) = DAG.SplitVector(Ptr, DL);
|
||||
if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector)
|
||||
// Split Data operand
|
||||
GetSplitVector(Data, DataLo, DataHi);
|
||||
else
|
||||
PtrLo = PtrHi = Ptr;
|
||||
std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
|
||||
|
||||
SDValue MaskLo, MaskHi;
|
||||
if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
|
||||
// Split Mask operand
|
||||
GetSplitVector(Mask, MaskLo, MaskHi);
|
||||
else
|
||||
std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
|
||||
|
||||
SDValue IndexHi, IndexLo;
|
||||
if (Index.getNode())
|
||||
std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
|
||||
if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
|
||||
GetSplitVector(Index, IndexLo, IndexHi);
|
||||
else
|
||||
IndexLo = IndexHi = Index;
|
||||
std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
|
||||
|
||||
SDValue Lo, Hi;
|
||||
MachineMemOperand *MMO = DAG.getMachineFunction().
|
||||
|
@ -1738,7 +1782,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
|
|||
MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
|
||||
Alignment, N->getAAInfo(), N->getRanges());
|
||||
|
||||
SDValue OpsLo[] = {Ch, DataLo, MaskLo, PtrLo, IndexLo};
|
||||
SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo};
|
||||
Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
|
||||
DL, OpsLo, MMO);
|
||||
|
||||
|
@ -1747,7 +1791,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
|
|||
MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
|
||||
Alignment, N->getAAInfo(), N->getRanges());
|
||||
|
||||
SDValue OpsHi[] = {Ch, DataHi, MaskHi, PtrHi, IndexHi};
|
||||
SDValue OpsHi[] = {Ch, DataHi, MaskHi, Ptr, IndexHi};
|
||||
Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
|
||||
DL, OpsHi, MMO);
|
||||
|
||||
|
@ -1975,6 +2019,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
|
|||
case ISD::MLOAD:
|
||||
Res = WidenVecRes_MLOAD(cast<MaskedLoadSDNode>(N));
|
||||
break;
|
||||
case ISD::MGATHER:
|
||||
Res = WidenVecRes_MGATHER(cast<MaskedGatherSDNode>(N));
|
||||
break;
|
||||
|
||||
case ISD::ADD:
|
||||
case ISD::AND:
|
||||
|
@ -2728,7 +2775,35 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
|
|||
SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(),
|
||||
Mask, Src0, N->getMemoryVT(),
|
||||
N->getMemOperand(), ExtType);
|
||||
// Legalized the chain result - switch anything that used the old chain to
|
||||
// Legalize the chain result - switch anything that used the old chain to
|
||||
// use the new one.
|
||||
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
|
||||
return Res;
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) {
|
||||
|
||||
EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
|
||||
SDValue Mask = N->getMask();
|
||||
SDValue Src0 = GetWidenedVector(N->getValue());
|
||||
unsigned NumElts = WideVT.getVectorNumElements();
|
||||
SDLoc dl(N);
|
||||
|
||||
// The mask should be widened as well
|
||||
Mask = WidenTargetBoolean(Mask, WideVT, true);
|
||||
|
||||
// Widen the Index operand
|
||||
SDValue Index = N->getIndex();
|
||||
EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(),
|
||||
Index.getValueType().getScalarType(),
|
||||
NumElts);
|
||||
Index = ModifyToType(Index, WideIndexVT);
|
||||
SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index };
|
||||
SDValue Res = DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other),
|
||||
N->getMemoryVT(), dl, Ops,
|
||||
N->getMemOperand());
|
||||
|
||||
// Legalize the chain result - switch anything that used the old chain to
|
||||
// use the new one.
|
||||
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
|
||||
return Res;
|
||||
|
@ -2890,6 +2965,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
|
|||
case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
|
||||
case ISD::STORE: Res = WidenVecOp_STORE(N); break;
|
||||
case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break;
|
||||
case ISD::MSCATTER: Res = WidenVecOp_MSCATTER(N, OpNo); break;
|
||||
case ISD::SETCC: Res = WidenVecOp_SETCC(N); break;
|
||||
case ISD::FCOPYSIGN: Res = WidenVecOp_FCOPYSIGN(N); break;
|
||||
|
||||
|
@ -3137,6 +3213,34 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
|
|||
false);
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) {
|
||||
assert(OpNo == 1 && "Can widen only data operand of mscatter");
|
||||
MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
|
||||
SDValue DataOp = MSC->getValue();
|
||||
SDValue Mask = MSC->getMask();
|
||||
|
||||
// Widen the value
|
||||
SDValue WideVal = GetWidenedVector(DataOp);
|
||||
EVT WideVT = WideVal.getValueType();
|
||||
unsigned NumElts = WideVal.getValueType().getVectorNumElements();
|
||||
SDLoc dl(N);
|
||||
|
||||
// The mask should be widened as well
|
||||
Mask = WidenTargetBoolean(Mask, WideVT, true);
|
||||
|
||||
// Widen index
|
||||
SDValue Index = MSC->getIndex();
|
||||
EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(),
|
||||
Index.getValueType().getScalarType(),
|
||||
NumElts);
|
||||
Index = ModifyToType(Index, WideIndexVT);
|
||||
|
||||
SDValue Ops[] = {MSC->getChain(), WideVal, Mask, MSC->getBasePtr(), Index};
|
||||
return DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
|
||||
MSC->getMemoryVT(), dl, Ops,
|
||||
MSC->getMemOperand());
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
|
||||
SDValue InOp0 = GetWidenedVector(N->getOperand(0));
|
||||
SDValue InOp1 = GetWidenedVector(N->getOperand(1));
|
||||
|
@ -3600,7 +3704,9 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
|
|||
|
||||
/// Modifies a vector input (widen or narrows) to a vector of NVT. The
|
||||
/// input vector must have the same element type as NVT.
|
||||
SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) {
|
||||
/// FillWithZeroes specifies that the vector should be widened with zeroes.
|
||||
SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT,
|
||||
bool FillWithZeroes) {
|
||||
// Note that InOp might have been widened so it might already have
|
||||
// the right width or it might need be narrowed.
|
||||
EVT InVT = InOp.getValueType();
|
||||
|
@ -3617,10 +3723,11 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) {
|
|||
if (WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0) {
|
||||
unsigned NumConcat = WidenNumElts / InNumElts;
|
||||
SmallVector<SDValue, 16> Ops(NumConcat);
|
||||
SDValue UndefVal = DAG.getUNDEF(InVT);
|
||||
SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, InVT) :
|
||||
DAG.getUNDEF(InVT);
|
||||
Ops[0] = InOp;
|
||||
for (unsigned i = 1; i != NumConcat; ++i)
|
||||
Ops[i] = UndefVal;
|
||||
Ops[i] = FillVal;
|
||||
|
||||
return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, Ops);
|
||||
}
|
||||
|
@ -3640,8 +3747,9 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) {
|
|||
ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
|
||||
DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
|
||||
|
||||
SDValue UndefVal = DAG.getUNDEF(EltVT);
|
||||
SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) :
|
||||
DAG.getUNDEF(EltVT);
|
||||
for ( ; Idx < WidenNumElts; ++Idx)
|
||||
Ops[Idx] = UndefVal;
|
||||
Ops[Idx] = FillVal;
|
||||
return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Ops);
|
||||
}
|
||||
|
|
|
@ -1579,7 +1579,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
setOperationAction(ISD::OR, VT, Legal);
|
||||
setOperationAction(ISD::XOR, VT, Legal);
|
||||
}
|
||||
if (EltSize >= 32 && VT.getSizeInBits() <= 512) {
|
||||
if ((VT.is128BitVector() || VT.is256BitVector()) && EltSize >= 32) {
|
||||
setOperationAction(ISD::MGATHER, VT, Custom);
|
||||
setOperationAction(ISD::MSCATTER, VT, Custom);
|
||||
}
|
||||
|
@ -1605,6 +1605,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
|
||||
setOperationAction(ISD::MLOAD, VT, Legal);
|
||||
setOperationAction(ISD::MSTORE, VT, Legal);
|
||||
setOperationAction(ISD::MGATHER, VT, Legal);
|
||||
setOperationAction(ISD::MSCATTER, VT, Custom);
|
||||
}
|
||||
}
|
||||
for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32 }) {
|
||||
|
@ -1813,6 +1815,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
setTargetDAGCombine(ISD::BUILD_VECTOR);
|
||||
setTargetDAGCombine(ISD::MUL);
|
||||
setTargetDAGCombine(ISD::XOR);
|
||||
setTargetDAGCombine(ISD::MSCATTER);
|
||||
setTargetDAGCombine(ISD::MGATHER);
|
||||
|
||||
computeRegisterProperties(Subtarget->getRegisterInfo());
|
||||
|
||||
|
@ -19760,6 +19764,16 @@ static SDValue ExtendToType(SDValue InOp, MVT NVT, SelectionDAG &DAG,
|
|||
EVT EltVT = NVT.getVectorElementType();
|
||||
|
||||
SDLoc dl(InOp);
|
||||
if (InOp.getOpcode() == ISD::CONCAT_VECTORS &&
|
||||
InOp.getNumOperands() == 2) {
|
||||
SDValue N1 = InOp.getOperand(1);
|
||||
if ((ISD::isBuildVectorAllZeros(N1.getNode()) && FillWithZeroes) ||
|
||||
N1.isUndef()) {
|
||||
InOp = InOp.getOperand(0);
|
||||
InVT = InOp.getSimpleValueType();
|
||||
InNumElts = InVT.getVectorNumElements();
|
||||
}
|
||||
}
|
||||
if (ISD::isBuildVectorOfConstantSDNodes(InOp.getNode()) ||
|
||||
ISD::isBuildVectorOfConstantFPSDNodes(InOp.getNode())) {
|
||||
SmallVector<SDValue, 16> Ops;
|
||||
|
@ -19783,28 +19797,93 @@ static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget *Subtarget,
|
|||
assert(Subtarget->hasAVX512() &&
|
||||
"MGATHER/MSCATTER are supported on AVX-512 arch only");
|
||||
|
||||
// X86 scatter kills mask register, so its type should be added to
|
||||
// the list of return values.
|
||||
// If the "scatter" has 2 return values, it is already handled.
|
||||
if (Op.getNode()->getNumValues() == 2)
|
||||
return Op;
|
||||
|
||||
MaskedScatterSDNode *N = cast<MaskedScatterSDNode>(Op.getNode());
|
||||
MVT VT = N->getValue().getSimpleValueType();
|
||||
SDValue Src = N->getValue();
|
||||
MVT VT = Src.getSimpleValueType();
|
||||
assert(VT.getScalarSizeInBits() >= 32 && "Unsupported scatter op");
|
||||
SDLoc dl(Op);
|
||||
|
||||
// X86 scatter kills mask register, so its type should be added to
|
||||
// the list of return values
|
||||
if (N->getNumValues() == 1) {
|
||||
SDValue Index = N->getIndex();
|
||||
if (!Subtarget->hasVLX() && !VT.is512BitVector() &&
|
||||
!Index.getSimpleValueType().is512BitVector())
|
||||
Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index);
|
||||
SDValue NewScatter;
|
||||
SDValue Index = N->getIndex();
|
||||
SDValue Mask = N->getMask();
|
||||
SDValue Chain = N->getChain();
|
||||
SDValue BasePtr = N->getBasePtr();
|
||||
MVT MemVT = N->getMemoryVT().getSimpleVT();
|
||||
MVT IndexVT = Index.getSimpleValueType();
|
||||
MVT MaskVT = Mask.getSimpleValueType();
|
||||
|
||||
SDVTList VTs = DAG.getVTList(N->getMask().getValueType(), MVT::Other);
|
||||
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
|
||||
N->getOperand(3), Index };
|
||||
if (MemVT.getScalarSizeInBits() < VT.getScalarSizeInBits()) {
|
||||
// The v2i32 value was promoted to v2i64.
|
||||
// Now we "redo" the type legalizer's work and widen the original
|
||||
// v2i32 value to v4i32. The original v2i32 is retrieved from v2i64
|
||||
// with a shuffle.
|
||||
assert((MemVT == MVT::v2i32 && VT == MVT::v2i64) &&
|
||||
"Unexpected memory type");
|
||||
int ShuffleMask[] = {0, 2, -1, -1};
|
||||
Src = DAG.getVectorShuffle(MVT::v4i32, dl, DAG.getBitcast(MVT::v4i32, Src),
|
||||
DAG.getUNDEF(MVT::v4i32), ShuffleMask);
|
||||
// Now we have 4 elements instead of 2.
|
||||
// Expand the index.
|
||||
MVT NewIndexVT = MVT::getVectorVT(IndexVT.getScalarType(), 4);
|
||||
Index = ExtendToType(Index, NewIndexVT, DAG);
|
||||
|
||||
SDValue NewScatter = DAG.getMaskedScatter(VTs, VT, dl, Ops, N->getMemOperand());
|
||||
DAG.ReplaceAllUsesWith(Op, SDValue(NewScatter.getNode(), 1));
|
||||
return SDValue(NewScatter.getNode(), 0);
|
||||
// Expand the mask with zeroes
|
||||
// Mask may be <2 x i64> or <2 x i1> at this moment
|
||||
assert((MaskVT == MVT::v2i1 || MaskVT == MVT::v2i64) &&
|
||||
"Unexpected mask type");
|
||||
MVT ExtMaskVT = MVT::getVectorVT(MaskVT.getScalarType(), 4);
|
||||
Mask = ExtendToType(Mask, ExtMaskVT, DAG, true);
|
||||
VT = MVT::v4i32;
|
||||
}
|
||||
return Op;
|
||||
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
if (!Subtarget->hasVLX() && !VT.is512BitVector() &&
|
||||
!Index.getSimpleValueType().is512BitVector()) {
|
||||
// AVX512F supports only 512-bit vectors. Or data or index should
|
||||
// be 512 bit wide. If now the both index and data are 256-bit, but
|
||||
// the vector contains 8 elements, we just sign-extend the index
|
||||
if (IndexVT == MVT::v8i32)
|
||||
// Just extend index
|
||||
Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index);
|
||||
else {
|
||||
// The minimal number of elts in scatter is 8
|
||||
NumElts = 8;
|
||||
// Index
|
||||
MVT NewIndexVT = MVT::getVectorVT(IndexVT.getScalarType(), NumElts);
|
||||
// Use original index here, do not modify the index twice
|
||||
Index = ExtendToType(N->getIndex(), NewIndexVT, DAG);
|
||||
if (IndexVT.getScalarType() == MVT::i32)
|
||||
Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index);
|
||||
|
||||
// Mask
|
||||
// At this point we have promoted mask operand
|
||||
assert(MaskVT.getScalarSizeInBits() >= 32 && "unexpected mask type");
|
||||
MVT ExtMaskVT = MVT::getVectorVT(MaskVT.getScalarType(), NumElts);
|
||||
// Use the original mask here, do not modify the mask twice
|
||||
Mask = ExtendToType(N->getMask(), ExtMaskVT, DAG, true);
|
||||
|
||||
// The value that should be stored
|
||||
MVT NewVT = MVT::getVectorVT(VT.getScalarType(), NumElts);
|
||||
Src = ExtendToType(Src, NewVT, DAG);
|
||||
}
|
||||
}
|
||||
// If the mask is "wide" at this point - truncate it to i1 vector
|
||||
MVT BitMaskVT = MVT::getVectorVT(MVT::i1, NumElts);
|
||||
Mask = DAG.getNode(ISD::TRUNCATE, dl, BitMaskVT, Mask);
|
||||
|
||||
// The mask is killed by scatter, add it to the values
|
||||
SDVTList VTs = DAG.getVTList(BitMaskVT, MVT::Other);
|
||||
SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index};
|
||||
NewScatter = DAG.getMaskedScatter(VTs, N->getMemoryVT(), dl, Ops,
|
||||
N->getMemOperand());
|
||||
DAG.ReplaceAllUsesWith(Op, SDValue(NewScatter.getNode(), 1));
|
||||
return SDValue(NewScatter.getNode(), 0);
|
||||
}
|
||||
|
||||
static SDValue LowerMLOAD(SDValue Op, const X86Subtarget *Subtarget,
|
||||
|
@ -19869,17 +19948,59 @@ static SDValue LowerMGATHER(SDValue Op, const X86Subtarget *Subtarget,
|
|||
"MGATHER/MSCATTER are supported on AVX-512 arch only");
|
||||
|
||||
MaskedGatherSDNode *N = cast<MaskedGatherSDNode>(Op.getNode());
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
assert(VT.getScalarSizeInBits() >= 32 && "Unsupported gather op");
|
||||
SDLoc dl(Op);
|
||||
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
SDValue Index = N->getIndex();
|
||||
SDValue Mask = N->getMask();
|
||||
SDValue Src0 = N->getValue();
|
||||
MVT IndexVT = Index.getSimpleValueType();
|
||||
MVT MaskVT = Mask.getSimpleValueType();
|
||||
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
assert(VT.getScalarSizeInBits() >= 32 && "Unsupported gather op");
|
||||
|
||||
if (!Subtarget->hasVLX() && !VT.is512BitVector() &&
|
||||
!Index.getSimpleValueType().is512BitVector()) {
|
||||
Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index);
|
||||
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
|
||||
N->getOperand(3), Index };
|
||||
DAG.UpdateNodeOperands(N, Ops);
|
||||
// AVX512F supports only 512-bit vectors. Or data or index should
|
||||
// be 512 bit wide. If now the both index and data are 256-bit, but
|
||||
// the vector contains 8 elements, we just sign-extend the index
|
||||
if (NumElts == 8) {
|
||||
Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index);
|
||||
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
|
||||
N->getOperand(3), Index };
|
||||
DAG.UpdateNodeOperands(N, Ops);
|
||||
return Op;
|
||||
}
|
||||
|
||||
// Minimal number of elements in Gather
|
||||
NumElts = 8;
|
||||
// Index
|
||||
MVT NewIndexVT = MVT::getVectorVT(IndexVT.getScalarType(), NumElts);
|
||||
Index = ExtendToType(Index, NewIndexVT, DAG);
|
||||
if (IndexVT.getScalarType() == MVT::i32)
|
||||
Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index);
|
||||
|
||||
// Mask
|
||||
MVT MaskBitVT = MVT::getVectorVT(MVT::i1, NumElts);
|
||||
// At this point we have promoted mask operand
|
||||
assert(MaskVT.getScalarSizeInBits() >= 32 && "unexpected mask type");
|
||||
MVT ExtMaskVT = MVT::getVectorVT(MaskVT.getScalarType(), NumElts);
|
||||
Mask = ExtendToType(Mask, ExtMaskVT, DAG, true);
|
||||
Mask = DAG.getNode(ISD::TRUNCATE, dl, MaskBitVT, Mask);
|
||||
|
||||
// The pass-thru value
|
||||
MVT NewVT = MVT::getVectorVT(VT.getScalarType(), NumElts);
|
||||
Src0 = ExtendToType(Src0, NewVT, DAG);
|
||||
|
||||
SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index };
|
||||
SDValue NewGather = DAG.getMaskedGather(DAG.getVTList(NewVT, MVT::Other),
|
||||
N->getMemoryVT(), dl, Ops,
|
||||
N->getMemOperand());
|
||||
SDValue Exract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
|
||||
NewGather.getValue(0),
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
SDValue RetOps[] = {Exract, NewGather.getValue(1)};
|
||||
return DAG.getMergeValues(RetOps, dl);
|
||||
}
|
||||
return Op;
|
||||
}
|
||||
|
@ -26907,6 +27028,20 @@ static SDValue PerformBLENDICombine(SDNode *N, SelectionDAG &DAG) {
|
|||
return SDValue();
|
||||
}
|
||||
|
||||
static SDValue PerformGatherScatterCombine(SDNode *N, SelectionDAG &DAG) {
|
||||
SDLoc DL(N);
|
||||
// Gather and Scatter instructions use k-registers for masks. The type of
|
||||
// the masks is v*i1. So the mask will be truncated anyway.
|
||||
// The SIGN_EXTEND_INREG my be dropped.
|
||||
SDValue Mask = N->getOperand(2);
|
||||
if (Mask.getOpcode() == ISD::SIGN_EXTEND_INREG) {
|
||||
SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end());
|
||||
NewOps[2] = Mask.getOperand(0);
|
||||
DAG.UpdateNodeOperands(N, NewOps);
|
||||
}
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// Helper function of PerformSETCCCombine. It is to materialize "setb reg"
|
||||
// as "sbb reg,reg", since it can be extended without zext and produces
|
||||
// an all-ones bit which is more useful than 0/1 in some cases.
|
||||
|
@ -27348,6 +27483,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
|||
case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget);
|
||||
case ISD::FMA: return PerformFMACombine(N, DAG, Subtarget);
|
||||
case X86ISD::BLENDI: return PerformBLENDICombine(N, DAG);
|
||||
case ISD::MGATHER:
|
||||
case ISD::MSCATTER: return PerformGatherScatterCombine(N, DAG);
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
|
|
|
@ -2176,17 +2176,19 @@ let Predicates = [HasAVX512] in {
|
|||
(EXTRACT_SUBREG
|
||||
(AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)),
|
||||
sub_16bit)>;
|
||||
def : Pat<(v16i1 (scalar_to_vector VK1:$src)),
|
||||
(COPY_TO_REGCLASS VK1:$src, VK16)>;
|
||||
def : Pat<(v8i1 (scalar_to_vector VK1:$src)),
|
||||
(COPY_TO_REGCLASS VK1:$src, VK8)>;
|
||||
}
|
||||
let Predicates = [HasBWI] in {
|
||||
def : Pat<(v32i1 (scalar_to_vector VK1:$src)),
|
||||
(COPY_TO_REGCLASS VK1:$src, VK32)>;
|
||||
def : Pat<(v64i1 (scalar_to_vector VK1:$src)),
|
||||
(COPY_TO_REGCLASS VK1:$src, VK64)>;
|
||||
}
|
||||
def : Pat<(v16i1 (scalar_to_vector VK1:$src)),
|
||||
(COPY_TO_REGCLASS VK1:$src, VK16)>;
|
||||
def : Pat<(v8i1 (scalar_to_vector VK1:$src)),
|
||||
(COPY_TO_REGCLASS VK1:$src, VK8)>;
|
||||
def : Pat<(v4i1 (scalar_to_vector VK1:$src)),
|
||||
(COPY_TO_REGCLASS VK1:$src, VK4)>;
|
||||
def : Pat<(v2i1 (scalar_to_vector VK1:$src)),
|
||||
(COPY_TO_REGCLASS VK1:$src, VK2)>;
|
||||
def : Pat<(v32i1 (scalar_to_vector VK1:$src)),
|
||||
(COPY_TO_REGCLASS VK1:$src, VK32)>;
|
||||
def : Pat<(v64i1 (scalar_to_vector VK1:$src)),
|
||||
(COPY_TO_REGCLASS VK1:$src, VK64)>;
|
||||
|
||||
|
||||
// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
|
||||
|
@ -2489,6 +2491,9 @@ def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 8))),
|
|||
def : Pat<(v16i1 (extract_subvector (v32i1 VK32:$src), (iPTR 0))),
|
||||
(v16i1 (COPY_TO_REGCLASS VK32:$src, VK16))>;
|
||||
|
||||
def : Pat<(v16i1 (extract_subvector (v32i1 VK32:$src), (iPTR 16))),
|
||||
(v16i1 (COPY_TO_REGCLASS (KSHIFTRDri VK32:$src, (i8 16)), VK16))>;
|
||||
|
||||
def : Pat<(v32i1 (extract_subvector (v64i1 VK64:$src), (iPTR 0))),
|
||||
(v32i1 (COPY_TO_REGCLASS VK64:$src, VK32))>;
|
||||
|
||||
|
@ -2497,6 +2502,7 @@ def : Pat<(v32i1 (extract_subvector (v64i1 VK64:$src), (iPTR 32))),
|
|||
|
||||
def : Pat<(v4i1 (extract_subvector (v8i1 VK8:$src), (iPTR 0))),
|
||||
(v4i1 (COPY_TO_REGCLASS VK8:$src, VK4))>;
|
||||
|
||||
def : Pat<(v2i1 (extract_subvector (v8i1 VK8:$src), (iPTR 0))),
|
||||
(v2i1 (COPY_TO_REGCLASS VK8:$src, VK2))>;
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,7 +1,7 @@
|
|||
; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=knl < %s | FileCheck %s -check-prefix=AVX512
|
||||
; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=core-avx2 < %s | FileCheck %s -check-prefix=AVX2
|
||||
; RUN: opt -mtriple=x86_64-apple-darwin -codegenprepare -mcpu=corei7-avx -S < %s | FileCheck %s -check-prefix=AVX_SCALAR
|
||||
; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=skx < %s | FileCheck %s -check-prefix=SKX
|
||||
; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=knl < %s | FileCheck %s --check-prefix=AVX512
|
||||
; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=core-avx2 < %s | FileCheck %s --check-prefix=AVX2
|
||||
; RUN: opt -mtriple=x86_64-apple-darwin -codegenprepare -mcpu=corei7-avx -S < %s | FileCheck %s --check-prefix=AVX_SCALAR
|
||||
; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=skx < %s | FileCheck %s --check-prefix=SKX
|
||||
|
||||
; AVX512-LABEL: test1
|
||||
; AVX512: vmovdqu32 (%rdi), %zmm0 {%k1} {z}
|
||||
|
@ -274,6 +274,15 @@ define <2 x i32> @test17(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) {
|
|||
; AVX2-NOT: blend
|
||||
; AVX2: ret
|
||||
define <2 x float> @test18(<2 x i32> %trigger, <2 x float>* %addr) {
|
||||
; SKX-LABEL: test18:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
|
||||
; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
|
||||
; SKX-NEXT: kshiftlw $2, %k0, %k0
|
||||
; SKX-NEXT: kshiftrw $2, %k0, %k1
|
||||
; SKX-NEXT: vmovups (%rdi), %xmm0 {%k1} {z}
|
||||
; SKX-NEXT: retq
|
||||
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
|
||||
%res = call <2 x float> @llvm.masked.load.v2f32(<2 x float>* %addr, i32 4, <2 x i1>%mask, <2 x float>undef)
|
||||
ret <2 x float> %res
|
||||
|
@ -363,3 +372,77 @@ define <16 x %mystruct*> @test24(<16 x i1> %mask, <16 x %mystruct*>* %addr) {
|
|||
%res = call <16 x %mystruct*> @llvm.masked.load.v16p0mystruct(<16 x %mystruct*>* %addr, i32 4, <16 x i1>%mask, <16 x %mystruct*>zeroinitializer)
|
||||
ret <16 x %mystruct*> %res
|
||||
}
|
||||
|
||||
define void @test_store_16i64(<16 x i64>* %ptrs, <16 x i1> %mask, <16 x i64> %src0) {
|
||||
; SKX-LABEL: test_store_16i64:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovb2m %xmm0, %k1
|
||||
; SKX-NEXT: vmovdqu64 %zmm1, (%rdi) {%k1}
|
||||
; SKX-NEXT: kshiftrw $8, %k1, %k1
|
||||
; SKX-NEXT: vmovdqu64 %zmm2, 64(%rdi) {%k1}
|
||||
; SKX-NEXT: retq
|
||||
call void @llvm.masked.store.v16i64(<16 x i64> %src0, <16 x i64>* %ptrs, i32 4, <16 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
declare void @llvm.masked.store.v16i64(<16 x i64> %src0, <16 x i64>* %ptrs, i32, <16 x i1> %mask)
|
||||
define void @test_store_16f64(<16 x double>* %ptrs, <16 x i1> %mask, <16 x double> %src0) {
|
||||
; SKX-LABEL: test_store_16f64:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovb2m %xmm0, %k1
|
||||
; SKX-NEXT: vmovupd %zmm1, (%rdi) {%k1}
|
||||
; SKX-NEXT: kshiftrw $8, %k1, %k1
|
||||
; SKX-NEXT: vmovupd %zmm2, 64(%rdi) {%k1}
|
||||
; SKX-NEXT: retq
|
||||
call void @llvm.masked.store.v16f64(<16 x double> %src0, <16 x double>* %ptrs, i32 4, <16 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
declare void @llvm.masked.store.v16f64(<16 x double> %src0, <16 x double>* %ptrs, i32, <16 x i1> %mask)
|
||||
define <16 x i64> @test_load_16i64(<16 x i64>* %ptrs, <16 x i1> %mask, <16 x i64> %src0) {
|
||||
; SKX-LABEL: test_load_16i64:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovb2m %xmm0, %k1
|
||||
; SKX-NEXT: vmovdqu64 (%rdi), %zmm1 {%k1}
|
||||
; SKX-NEXT: kshiftrw $8, %k1, %k1
|
||||
; SKX-NEXT: vmovdqu64 64(%rdi), %zmm2 {%k1}
|
||||
; SKX-NEXT: vmovaps %zmm1, %zmm0
|
||||
; SKX-NEXT: vmovaps %zmm2, %zmm1
|
||||
; SKX-NEXT: retq
|
||||
%res = call <16 x i64> @llvm.masked.load.v16i64(<16 x i64>* %ptrs, i32 4, <16 x i1> %mask, <16 x i64> %src0)
|
||||
ret <16 x i64> %res
|
||||
}
|
||||
declare <16 x i64> @llvm.masked.load.v16i64(<16 x i64>* %ptrs, i32, <16 x i1> %mask, <16 x i64> %src0)
|
||||
define <16 x double> @test_load_16f64(<16 x double>* %ptrs, <16 x i1> %mask, <16 x double> %src0) {
|
||||
; SKX-LABEL: test_load_16f64:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovb2m %xmm0, %k1
|
||||
; SKX-NEXT: vmovupd (%rdi), %zmm1 {%k1}
|
||||
; SKX-NEXT: kshiftrw $8, %k1, %k1
|
||||
; SKX-NEXT: vmovupd 64(%rdi), %zmm2 {%k1}
|
||||
; SKX-NEXT: vmovaps %zmm1, %zmm0
|
||||
; SKX-NEXT: vmovaps %zmm2, %zmm1
|
||||
; SKX-NEXT: retq
|
||||
%res = call <16 x double> @llvm.masked.load.v16f64(<16 x double>* %ptrs, i32 4, <16 x i1> %mask, <16 x double> %src0)
|
||||
ret <16 x double> %res
|
||||
}
|
||||
declare <16 x double> @llvm.masked.load.v16f64(<16 x double>* %ptrs, i32, <16 x i1> %mask, <16 x double> %src0)
|
||||
|
||||
define <32 x double> @test_load_32f64(<32 x double>* %ptrs, <32 x i1> %mask, <32 x double> %src0) {
|
||||
; SKX-LABEL: test_load_32f64:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovb2m %ymm0, %k1
|
||||
; SKX-NEXT: vmovupd (%rdi), %zmm1 {%k1}
|
||||
; SKX-NEXT: kshiftrd $16, %k1, %k2
|
||||
; SKX-NEXT: vmovupd 128(%rdi), %zmm3 {%k2}
|
||||
; SKX-NEXT: kshiftrw $8, %k1, %k1
|
||||
; SKX-NEXT: vmovupd 64(%rdi), %zmm2 {%k1}
|
||||
; SKX-NEXT: kshiftrw $8, %k2, %k1
|
||||
; SKX-NEXT: vmovupd 192(%rdi), %zmm4 {%k1}
|
||||
; SKX-NEXT: vmovaps %zmm1, %zmm0
|
||||
; SKX-NEXT: vmovaps %zmm2, %zmm1
|
||||
; SKX-NEXT: vmovaps %zmm3, %zmm2
|
||||
; SKX-NEXT: vmovaps %zmm4, %zmm3
|
||||
; SKX-NEXT: retq
|
||||
%res = call <32 x double> @llvm.masked.load.v32f64(<32 x double>* %ptrs, i32 4, <32 x i1> %mask, <32 x double> %src0)
|
||||
ret <32 x double> %res
|
||||
}
|
||||
declare <32 x double> @llvm.masked.load.v32f64(<32 x double>* %ptrs, i32, <32 x i1> %mask, <32 x double> %src0)
|
||||
|
|
Loading…
Reference in New Issue