forked from OSchip/llvm-project
[AVX-512] Simplify code for creating 512-bit SHUF128 operations.
We don't need two loops and we can safely assume assume and hardcode the size of the widened mask. llvm-svn: 290942
This commit is contained in:
parent
6039656441
commit
83115a809f
|
@ -12616,33 +12616,26 @@ static SDValue lowerV4X128VectorShuffle(const SDLoc &DL, MVT VT,
|
||||||
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV);
|
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Try to lower to to vshuf64x2/vshuf32x4.
|
||||||
|
assert(WidenedMask.size() == 4);
|
||||||
SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)};
|
SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)};
|
||||||
|
unsigned PermMask = 0;
|
||||||
// Insure elements came from the same Op.
|
// Insure elements came from the same Op.
|
||||||
int MaxOp1Index = VT.getVectorNumElements()/2 - 1;
|
for (int i = 0; i < 4; ++i) {
|
||||||
for (int i = 0, Size = WidenedMask.size(); i < Size; ++i) {
|
assert(WidenedMask[i] >= -1);
|
||||||
if (WidenedMask[i] == SM_SentinelZero)
|
if (WidenedMask[i] < 0)
|
||||||
return SDValue();
|
|
||||||
if (WidenedMask[i] == SM_SentinelUndef)
|
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
SDValue Op = WidenedMask[i] > MaxOp1Index ? V2 : V1;
|
SDValue Op = WidenedMask[i] >= 4 ? V2 : V1;
|
||||||
unsigned OpIndex = (i < Size/2) ? 0 : 1;
|
unsigned OpIndex = i / 2;
|
||||||
if (Ops[OpIndex].isUndef())
|
if (Ops[OpIndex].isUndef())
|
||||||
Ops[OpIndex] = Op;
|
Ops[OpIndex] = Op;
|
||||||
else if (Ops[OpIndex] != Op)
|
else if (Ops[OpIndex] != Op)
|
||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
|
||||||
|
|
||||||
// Form a 128-bit permutation.
|
// Convert the 128-bit shuffle mask selection values into 128-bit selection
|
||||||
// Convert the 64-bit shuffle mask selection values into 128-bit selection
|
// bits defined by a vshuf64x2 instruction's immediate control byte.
|
||||||
// bits defined by a vshuf64x2 instruction's immediate control byte.
|
PermMask |= (WidenedMask[i] % 4) << (i * 2);
|
||||||
unsigned PermMask = 0, Imm = 0;
|
|
||||||
unsigned ControlBitsNum = WidenedMask.size() / 2;
|
|
||||||
|
|
||||||
for (int i = 0, Size = WidenedMask.size(); i < Size; ++i) {
|
|
||||||
// Use first element in place of undef mask.
|
|
||||||
Imm = (WidenedMask[i] == SM_SentinelUndef) ? 0 : WidenedMask[i];
|
|
||||||
PermMask |= (Imm % WidenedMask.size()) << (i * ControlBitsNum);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return DAG.getNode(X86ISD::SHUF128, DL, VT, Ops[0], Ops[1],
|
return DAG.getNode(X86ISD::SHUF128, DL, VT, Ops[0], Ops[1],
|
||||||
|
|
Loading…
Reference in New Issue