forked from OSchip/llvm-project
[Hexagon] Restore handling of expanding shuffles
Fixed bugs, added testcases. The byte-unpack is actually recognized by the DAG combiner, but the halfword-unpack it not.
This commit is contained in:
parent
79c09d5ee1
commit
002f5e158d
|
@ -852,10 +852,10 @@ namespace llvm {
|
||||||
PackMux,
|
PackMux,
|
||||||
};
|
};
|
||||||
OpRef concats(OpRef Va, OpRef Vb, ResultStack &Results);
|
OpRef concats(OpRef Va, OpRef Vb, ResultStack &Results);
|
||||||
OpRef packss(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results,
|
OpRef packs(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results,
|
||||||
MutableArrayRef<int> NewMask, unsigned Options = None);
|
MutableArrayRef<int> NewMask, unsigned Options = None);
|
||||||
OpRef packpp(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results,
|
OpRef packp(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results,
|
||||||
MutableArrayRef<int> NewMask);
|
MutableArrayRef<int> NewMask);
|
||||||
OpRef vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
|
OpRef vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
|
||||||
ResultStack &Results);
|
ResultStack &Results);
|
||||||
OpRef vmuxp(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
|
OpRef vmuxp(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
|
||||||
|
@ -868,6 +868,7 @@ namespace llvm {
|
||||||
|
|
||||||
OpRef butterfly(ShuffleMask SM, OpRef Va, ResultStack &Results);
|
OpRef butterfly(ShuffleMask SM, OpRef Va, ResultStack &Results);
|
||||||
OpRef contracting(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results);
|
OpRef contracting(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results);
|
||||||
|
OpRef expanding(ShuffleMask SM, OpRef Va, ResultStack &Results);
|
||||||
OpRef perfect(ShuffleMask SM, OpRef Va, ResultStack &Results);
|
OpRef perfect(ShuffleMask SM, OpRef Va, ResultStack &Results);
|
||||||
|
|
||||||
bool selectVectorConstants(SDNode *N);
|
bool selectVectorConstants(SDNode *N);
|
||||||
|
@ -1114,10 +1115,12 @@ OpRef HvxSelector::concats(OpRef Lo, OpRef Hi, ResultStack &Results) {
|
||||||
return OpRef::res(Results.top());
|
return OpRef::res(Results.top());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Va, Vb are single vectors, SM is a single vector.
|
// Va, Vb are single vectors. If SM only uses two vector halves from Va/Vb,
|
||||||
OpRef HvxSelector::packss(ShuffleMask SM, OpRef Va, OpRef Vb,
|
// pack these halves into a single vector, and remap SM into NewMask to use
|
||||||
ResultStack &Results, MutableArrayRef<int> NewMask,
|
// the new vector instead.
|
||||||
unsigned Options) {
|
OpRef HvxSelector::packs(ShuffleMask SM, OpRef Va, OpRef Vb,
|
||||||
|
ResultStack &Results, MutableArrayRef<int> NewMask,
|
||||||
|
unsigned Options) {
|
||||||
DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
|
DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
|
||||||
if (!Va.isValid() || !Vb.isValid())
|
if (!Va.isValid() || !Vb.isValid())
|
||||||
return OpRef::fail();
|
return OpRef::fail();
|
||||||
|
@ -1125,6 +1128,7 @@ OpRef HvxSelector::packss(ShuffleMask SM, OpRef Va, OpRef Vb,
|
||||||
MVT Ty = getSingleVT(MVT::i8);
|
MVT Ty = getSingleVT(MVT::i8);
|
||||||
MVT PairTy = getPairVT(MVT::i8);
|
MVT PairTy = getPairVT(MVT::i8);
|
||||||
OpRef Inp[2] = {Va, Vb};
|
OpRef Inp[2] = {Va, Vb};
|
||||||
|
unsigned VecLen = SM.Mask.size();
|
||||||
|
|
||||||
auto valign = [this](OpRef Lo, OpRef Hi, unsigned Amt, MVT Ty,
|
auto valign = [this](OpRef Lo, OpRef Hi, unsigned Amt, MVT Ty,
|
||||||
ResultStack &Results) {
|
ResultStack &Results) {
|
||||||
|
@ -1144,18 +1148,51 @@ OpRef HvxSelector::packss(ShuffleMask SM, OpRef Va, OpRef Vb,
|
||||||
return OpRef::res(Results.top());
|
return OpRef::res(Results.top());
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Segment is a vector half.
|
||||||
|
unsigned SegLen = HwLen / 2;
|
||||||
|
|
||||||
// Check if we can shuffle vector halves around to get the used elements
|
// Check if we can shuffle vector halves around to get the used elements
|
||||||
// into a single vector.
|
// into a single vector.
|
||||||
SmallVector<int,128> MaskH(SM.Mask.begin(), SM.Mask.end());
|
SmallVector<int,128> MaskH(SM.Mask.begin(), SM.Mask.end());
|
||||||
SmallVector<unsigned, 4> SegList = getInputSegmentList(SM.Mask, HwLen/2);
|
SmallVector<unsigned, 4> SegList = getInputSegmentList(SM.Mask, SegLen);
|
||||||
unsigned SegCount = SegList.size();
|
unsigned SegCount = SegList.size();
|
||||||
|
SmallVector<unsigned, 4> SegMap = getOutputSegmentMap(SM.Mask, SegLen);
|
||||||
|
|
||||||
if (SegList.empty())
|
if (SegList.empty())
|
||||||
return OpRef::undef(Ty);
|
return OpRef::undef(Ty);
|
||||||
|
|
||||||
|
// NOTE:
|
||||||
|
// In the following part of the function, where the segments are rearranged,
|
||||||
|
// the shuffle mask SM can be of any length that is a multiple of a vector
|
||||||
|
// (i.e. a multiple of 2*SegLen), and non-zero.
|
||||||
|
// The output segment map is computed, and it may have any even number of
|
||||||
|
// entries, but the rearrangement of input segments will be done based only
|
||||||
|
// on the first two (non-undef) entries in the segment map.
|
||||||
|
// For example, if the output map is 3, 1, 1, 3 (it can have at most two
|
||||||
|
// distinct entries!), the segments 1 and 3 of Va/Vb will be packaged into
|
||||||
|
// a single vector V = 3:1. The output mask will then be updated to use
|
||||||
|
// seg(0,V), seg(1,V), seg(1,V), seg(0,V).
|
||||||
|
//
|
||||||
|
// Picking the segments based on the output map is an optimization. For
|
||||||
|
// correctness it is only necessary that Seg0 and Seg1 are the two input
|
||||||
|
// segments that are used in the output.
|
||||||
|
|
||||||
|
unsigned Seg0 = ~0u, Seg1 = ~0u;
|
||||||
|
for (int I = 0, E = SegMap.size(); I != E; ++I) {
|
||||||
|
unsigned X = SegMap[I];
|
||||||
|
if (X == ~0u)
|
||||||
|
continue;
|
||||||
|
if (Seg0 == ~0u)
|
||||||
|
Seg0 = X;
|
||||||
|
else if (Seg1 != ~0u)
|
||||||
|
break;
|
||||||
|
if (X == ~1u || X != Seg0)
|
||||||
|
Seg1 = X;
|
||||||
|
}
|
||||||
|
|
||||||
if (SegCount == 1) {
|
if (SegCount == 1) {
|
||||||
unsigned SrcOp = SegList[0] / 2;
|
unsigned SrcOp = SegList[0] / 2;
|
||||||
for (int I = 0, E = SM.Mask.size(); I != E; ++I) {
|
for (int I = 0; I != static_cast<int>(VecLen); ++I) {
|
||||||
int M = SM.Mask[I];
|
int M = SM.Mask[I];
|
||||||
if (M >= 0) {
|
if (M >= 0) {
|
||||||
M -= SrcOp * HwLen;
|
M -= SrcOp * HwLen;
|
||||||
|
@ -1167,58 +1204,69 @@ OpRef HvxSelector::packss(ShuffleMask SM, OpRef Va, OpRef Vb,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (SegCount == 2) {
|
if (SegCount == 2) {
|
||||||
SmallVector<unsigned, 4> SegMap = getOutputSegmentMap(SM.Mask, HwLen/2);
|
// Seg0 should not be undef here: this would imply a SegList
|
||||||
unsigned Seg0 = SegMap[0], Seg1 = SegMap[1];
|
// with <= 1 elements, which was checked earlier.
|
||||||
|
assert(Seg0 != ~0u);
|
||||||
|
|
||||||
// Both output segments shouldn't be undef here: this would imply
|
// If Seg0 or Seg1 are "multi-defined", pick them from the input
|
||||||
// empty SegList, which was checked above.
|
// segment list instead.
|
||||||
assert(Seg0 != ~0u || Seg1 != ~0u);
|
if (Seg0 == ~1u || Seg1 == ~1u) {
|
||||||
|
if (Seg0 == Seg1) {
|
||||||
if (Seg0 != ~1u && Seg1 != ~1u) {
|
Seg0 = SegList[0];
|
||||||
const SDLoc &dl(Results.InpNode);
|
Seg1 = SegList[1];
|
||||||
Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(HwLen/2, dl)});
|
} else if (Seg0 == ~1u) {
|
||||||
OpRef HL = OpRef::res(Results.top());
|
Seg0 = SegList[0] != Seg1 ? SegList[0] : SegList[1];
|
||||||
|
|
||||||
// Va = AB, Vb = CD
|
|
||||||
|
|
||||||
if (Seg0 / 2 == Seg1 / 2) {
|
|
||||||
// Same input vector.
|
|
||||||
Va = Inp[Seg0 / 2];
|
|
||||||
if (Seg0 > Seg1) {
|
|
||||||
// Swap halves.
|
|
||||||
Results.push(Hexagon::V6_vror, Ty, {Inp[Seg0 / 2], HL});
|
|
||||||
Va = OpRef::res(Results.top());
|
|
||||||
}
|
|
||||||
packSegmentMask(SM.Mask, SegMap, HwLen/2, MaskH);
|
|
||||||
} else if (Seg0 % 2 == Seg1 % 2) {
|
|
||||||
// Picking AC, BD, CA, or DB.
|
|
||||||
// vshuff(CD,AB,HL) -> BD:AC
|
|
||||||
// vshuff(AB,CD,HL) -> DB:CA
|
|
||||||
auto Vs = (Seg0 == 0 || Seg0 == 1) ? std::make_pair(Vb, Va) // AC or BD
|
|
||||||
: std::make_pair(Va, Vb); // CA or DB
|
|
||||||
Results.push(Hexagon::V6_vshuffvdd, PairTy, {Vs.first, Vs.second, HL});
|
|
||||||
OpRef P = OpRef::res(Results.top());
|
|
||||||
Va = (Seg0 == 0 || Seg0 == 2) ? OpRef::lo(P) : OpRef::hi(P);
|
|
||||||
packSegmentMask(SM.Mask, SegMap, HwLen/2, MaskH);
|
|
||||||
} else {
|
} else {
|
||||||
// Picking AD, BC, CB, or DA.
|
assert(Seg1 == ~1u); // Sanity
|
||||||
if ((Seg0 == 0 && Seg1 == 3) || (Seg0 == 2 && Seg1 == 1)) {
|
Seg1 = SegList[0] != Seg0 ? SegList[0] : SegList[1];
|
||||||
// AD or BC: this can be done using vmux.
|
}
|
||||||
// Q = V6_pred_scalar2 HwLen/2
|
}
|
||||||
// V = V6_vmux Q, (Va, Vb) or (Vb, Va)
|
assert(Seg0 != ~1u && Seg1 != ~1u);
|
||||||
Results.push(Hexagon::V6_pred_scalar2, getBoolVT(), {HL});
|
|
||||||
OpRef Qt = OpRef::res(Results.top());
|
assert(Seg0 != Seg1 && "Expecting different segments");
|
||||||
auto Vs = (Seg0 == 0) ? std::make_pair(Va, Vb) // AD
|
const SDLoc &dl(Results.InpNode);
|
||||||
: std::make_pair(Vb, Va); // CB
|
Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(SegLen, dl)});
|
||||||
Results.push(Hexagon::V6_vmux, Ty, {Qt, Vs.first, Vs.second});
|
OpRef HL = OpRef::res(Results.top());
|
||||||
Va = OpRef::res(Results.top());
|
|
||||||
packSegmentMask(SM.Mask, SegMap, HwLen/2, MaskH);
|
// Va = AB, Vb = CD
|
||||||
} else {
|
|
||||||
// BC or DA: this could be done via valign by HwLen/2.
|
if (Seg0 / 2 == Seg1 / 2) {
|
||||||
// Do nothing here, because valign (if possible) will be generated
|
// Same input vector.
|
||||||
// later on (make sure the Seg0 values are as expected, for sanity).
|
Va = Inp[Seg0 / 2];
|
||||||
assert(Seg0 == 1 || Seg0 == 3);
|
if (Seg0 > Seg1) {
|
||||||
}
|
// Swap halves.
|
||||||
|
Results.push(Hexagon::V6_vror, Ty, {Inp[Seg0 / 2], HL});
|
||||||
|
Va = OpRef::res(Results.top());
|
||||||
|
}
|
||||||
|
packSegmentMask(SM.Mask, {Seg0, Seg1}, SegLen, MaskH);
|
||||||
|
} else if (Seg0 % 2 == Seg1 % 2) {
|
||||||
|
// Picking AC, BD, CA, or DB.
|
||||||
|
// vshuff(CD,AB,HL) -> BD:AC
|
||||||
|
// vshuff(AB,CD,HL) -> DB:CA
|
||||||
|
auto Vs = (Seg0 == 0 || Seg0 == 1) ? std::make_pair(Vb, Va) // AC or BD
|
||||||
|
: std::make_pair(Va, Vb); // CA or DB
|
||||||
|
Results.push(Hexagon::V6_vshuffvdd, PairTy, {Vs.first, Vs.second, HL});
|
||||||
|
OpRef P = OpRef::res(Results.top());
|
||||||
|
Va = (Seg0 == 0 || Seg0 == 2) ? OpRef::lo(P) : OpRef::hi(P);
|
||||||
|
packSegmentMask(SM.Mask, {Seg0, Seg1}, SegLen, MaskH);
|
||||||
|
} else {
|
||||||
|
// Picking AD, BC, CB, or DA.
|
||||||
|
if ((Seg0 == 0 && Seg1 == 3) || (Seg0 == 2 && Seg1 == 1)) {
|
||||||
|
// AD or BC: this can be done using vmux.
|
||||||
|
// Q = V6_pred_scalar2 SegLen
|
||||||
|
// V = V6_vmux Q, (Va, Vb) or (Vb, Va)
|
||||||
|
Results.push(Hexagon::V6_pred_scalar2, getBoolVT(), {HL});
|
||||||
|
OpRef Qt = OpRef::res(Results.top());
|
||||||
|
auto Vs = (Seg0 == 0) ? std::make_pair(Va, Vb) // AD
|
||||||
|
: std::make_pair(Vb, Va); // CB
|
||||||
|
Results.push(Hexagon::V6_vmux, Ty, {Qt, Vs.first, Vs.second});
|
||||||
|
Va = OpRef::res(Results.top());
|
||||||
|
packSegmentMask(SM.Mask, {Seg0, Seg1}, SegLen, MaskH);
|
||||||
|
} else {
|
||||||
|
// BC or DA: this could be done via valign by SegLen.
|
||||||
|
// Do nothing here, because valign (if possible) will be generated
|
||||||
|
// later on (make sure the Seg0 values are as expected, for sanity).
|
||||||
|
assert(Seg0 == 1 || Seg0 == 3);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1226,6 +1274,7 @@ OpRef HvxSelector::packss(ShuffleMask SM, OpRef Va, OpRef Vb,
|
||||||
// Check if the arguments can be packed by valign(Va,Vb) or valign(Vb,Va).
|
// Check if the arguments can be packed by valign(Va,Vb) or valign(Vb,Va).
|
||||||
|
|
||||||
ShuffleMask SMH(MaskH);
|
ShuffleMask SMH(MaskH);
|
||||||
|
assert(SMH.Mask.size() == VecLen);
|
||||||
SmallVector<int,128> MaskA(SMH.Mask.begin(), SMH.Mask.end());
|
SmallVector<int,128> MaskA(SMH.Mask.begin(), SMH.Mask.end());
|
||||||
|
|
||||||
if (SMH.MaxSrc - SMH.MinSrc >= static_cast<int>(HwLen)) {
|
if (SMH.MaxSrc - SMH.MinSrc >= static_cast<int>(HwLen)) {
|
||||||
|
@ -1239,6 +1288,7 @@ OpRef HvxSelector::packss(ShuffleMask SM, OpRef Va, OpRef Vb,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ShuffleMask SMA(MaskA);
|
ShuffleMask SMA(MaskA);
|
||||||
|
assert(SMA.Mask.size() == VecLen);
|
||||||
|
|
||||||
if (SMA.MaxSrc - SMA.MinSrc < static_cast<int>(HwLen)) {
|
if (SMA.MaxSrc - SMA.MinSrc < static_cast<int>(HwLen)) {
|
||||||
int ShiftR = SMA.MinSrc;
|
int ShiftR = SMA.MinSrc;
|
||||||
|
@ -1249,7 +1299,7 @@ OpRef HvxSelector::packss(ShuffleMask SM, OpRef Va, OpRef Vb,
|
||||||
}
|
}
|
||||||
OpRef RetVal = valign(Va, Vb, ShiftR, Ty, Results);
|
OpRef RetVal = valign(Va, Vb, ShiftR, Ty, Results);
|
||||||
|
|
||||||
for (int I = 0, E = SMA.Mask.size(); I != E; ++I) {
|
for (int I = 0; I != static_cast<int>(VecLen); ++I) {
|
||||||
int M = SMA.Mask[I];
|
int M = SMA.Mask[I];
|
||||||
if (M != -1)
|
if (M != -1)
|
||||||
M -= SMA.MinSrc;
|
M -= SMA.MinSrc;
|
||||||
|
@ -1269,7 +1319,7 @@ OpRef HvxSelector::packss(ShuffleMask SM, OpRef Va, OpRef Vb,
|
||||||
BitVector Picked(HwLen);
|
BitVector Picked(HwLen);
|
||||||
SmallVector<uint8_t,128> MuxBytes(HwLen);
|
SmallVector<uint8_t,128> MuxBytes(HwLen);
|
||||||
bool CanMux = true;
|
bool CanMux = true;
|
||||||
for (int I = 0, E = SM.Mask.size(); I != E; ++I) {
|
for (int I = 0; I != static_cast<int>(VecLen); ++I) {
|
||||||
int M = SM.Mask[I];
|
int M = SM.Mask[I];
|
||||||
if (M == -1)
|
if (M == -1)
|
||||||
continue;
|
continue;
|
||||||
|
@ -1289,9 +1339,11 @@ OpRef HvxSelector::packss(ShuffleMask SM, OpRef Va, OpRef Vb,
|
||||||
return OpRef::fail();
|
return OpRef::fail();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Va, Vb are vector pairs, SM is a vector pair.
|
// Va, Vb are vector pairs. If SM only uses two single vectors from Va/Vb,
|
||||||
OpRef HvxSelector::packpp(ShuffleMask SM, OpRef Va, OpRef Vb,
|
// pack these vectors into a pair, and remap SM into NewMask to use the
|
||||||
ResultStack &Results, MutableArrayRef<int> NewMask) {
|
// new pair instead.
|
||||||
|
OpRef HvxSelector::packp(ShuffleMask SM, OpRef Va, OpRef Vb,
|
||||||
|
ResultStack &Results, MutableArrayRef<int> NewMask) {
|
||||||
DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
|
DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
|
||||||
SmallVector<unsigned, 4> SegList = getInputSegmentList(SM.Mask, HwLen);
|
SmallVector<unsigned, 4> SegList = getInputSegmentList(SM.Mask, HwLen);
|
||||||
if (SegList.empty())
|
if (SegList.empty())
|
||||||
|
@ -1320,7 +1372,7 @@ OpRef HvxSelector::packpp(ShuffleMask SM, OpRef Va, OpRef Vb,
|
||||||
// NOTE: Using SegList as the packing map here (not SegMap). This works,
|
// NOTE: Using SegList as the packing map here (not SegMap). This works,
|
||||||
// because we're not concerned here about the order of the segments (i.e.
|
// because we're not concerned here about the order of the segments (i.e.
|
||||||
// single vectors) in the output pair. Changing the order of vectors is
|
// single vectors) in the output pair. Changing the order of vectors is
|
||||||
// free (as opposed to changing the order of vector halves as in packss),
|
// free (as opposed to changing the order of vector halves as in packs),
|
||||||
// and so there is no extra cost added in case the order needs to be
|
// and so there is no extra cost added in case the order needs to be
|
||||||
// changed later.
|
// changed later.
|
||||||
packSegmentMask(SM.Mask, SegList, HwLen, NewMask);
|
packSegmentMask(SM.Mask, SegList, HwLen, NewMask);
|
||||||
|
@ -1398,7 +1450,7 @@ OpRef HvxSelector::shuffs2(ShuffleMask SM, OpRef Va, OpRef Vb,
|
||||||
|
|
||||||
int VecLen = SM.Mask.size();
|
int VecLen = SM.Mask.size();
|
||||||
SmallVector<int,128> PackedMask(VecLen);
|
SmallVector<int,128> PackedMask(VecLen);
|
||||||
OpRef P = packss(SM, Va, Vb, Results, PackedMask);
|
OpRef P = packs(SM, Va, Vb, Results, PackedMask);
|
||||||
if (P.isValid())
|
if (P.isValid())
|
||||||
return shuffs1(ShuffleMask(PackedMask), P, Results);
|
return shuffs1(ShuffleMask(PackedMask), P, Results);
|
||||||
|
|
||||||
|
@ -1424,12 +1476,27 @@ OpRef HvxSelector::shuffs2(ShuffleMask SM, OpRef Va, OpRef Vb,
|
||||||
|
|
||||||
OpRef HvxSelector::shuffp1(ShuffleMask SM, OpRef Va, ResultStack &Results) {
|
OpRef HvxSelector::shuffp1(ShuffleMask SM, OpRef Va, ResultStack &Results) {
|
||||||
DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
|
DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
|
||||||
|
int VecLen = SM.Mask.size();
|
||||||
|
|
||||||
if (isIdentity(SM.Mask))
|
if (isIdentity(SM.Mask))
|
||||||
return Va;
|
return Va;
|
||||||
if (isUndef(SM.Mask))
|
if (isUndef(SM.Mask))
|
||||||
return OpRef::undef(getPairVT(MVT::i8));
|
return OpRef::undef(getPairVT(MVT::i8));
|
||||||
|
|
||||||
|
SmallVector<int,128> PackedMask(VecLen);
|
||||||
|
OpRef P = packs(SM, OpRef::lo(Va), OpRef::hi(Va), Results, PackedMask);
|
||||||
|
if (P.isValid()) {
|
||||||
|
ShuffleMask PM(PackedMask);
|
||||||
|
OpRef E = expanding(PM, P, Results);
|
||||||
|
if (E.isValid())
|
||||||
|
return E;
|
||||||
|
|
||||||
|
OpRef L = shuffs1(PM.lo(), P, Results);
|
||||||
|
OpRef H = shuffs1(PM.hi(), P, Results);
|
||||||
|
if (L.isValid() && H.isValid())
|
||||||
|
return concats(L, H, Results);
|
||||||
|
}
|
||||||
|
|
||||||
OpRef R = perfect(SM, Va, Results);
|
OpRef R = perfect(SM, Va, Results);
|
||||||
if (R.isValid())
|
if (R.isValid())
|
||||||
return R;
|
return R;
|
||||||
|
@ -1451,7 +1518,7 @@ OpRef HvxSelector::shuffp2(ShuffleMask SM, OpRef Va, OpRef Vb,
|
||||||
|
|
||||||
int VecLen = SM.Mask.size();
|
int VecLen = SM.Mask.size();
|
||||||
SmallVector<int,256> PackedMask(VecLen);
|
SmallVector<int,256> PackedMask(VecLen);
|
||||||
OpRef P = packpp(SM, Va, Vb, Results, PackedMask);
|
OpRef P = packp(SM, Va, Vb, Results, PackedMask);
|
||||||
if (P.isValid())
|
if (P.isValid())
|
||||||
return shuffp1(ShuffleMask(PackedMask), P, Results);
|
return shuffp1(ShuffleMask(PackedMask), P, Results);
|
||||||
|
|
||||||
|
@ -1776,6 +1843,60 @@ OpRef HvxSelector::contracting(ShuffleMask SM, OpRef Va, OpRef Vb,
|
||||||
return OpRef::res(Results.top());
|
return OpRef::res(Results.top());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
OpRef HvxSelector::expanding(ShuffleMask SM, OpRef Va, ResultStack &Results) {
|
||||||
|
DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
|
||||||
|
// Expanding shuffles (using all elements and inserting into larger vector):
|
||||||
|
//
|
||||||
|
// V6_vunpacku{b,h} [*]
|
||||||
|
//
|
||||||
|
// [*] Only if the upper elements (filled with 0s) are "don't care" in Mask.
|
||||||
|
//
|
||||||
|
// Note: V6_vunpacko{b,h} are or-ing the high byte/half in the result, so
|
||||||
|
// they are not shuffles.
|
||||||
|
//
|
||||||
|
// The argument is a single vector.
|
||||||
|
|
||||||
|
int VecLen = SM.Mask.size();
|
||||||
|
assert(2*HwLen == unsigned(VecLen) && "Expecting vector-pair type");
|
||||||
|
|
||||||
|
std::pair<int,unsigned> Strip = findStrip(SM.Mask, 1, VecLen);
|
||||||
|
|
||||||
|
// The patterns for the unpacks, in terms of the starting offsets of the
|
||||||
|
// consecutive strips (L = length of the strip, N = VecLen):
|
||||||
|
//
|
||||||
|
// vunpacku: 0, -1, L, -1, 2L, -1 ...
|
||||||
|
|
||||||
|
if (Strip.first != 0)
|
||||||
|
return OpRef::fail();
|
||||||
|
|
||||||
|
// The vunpackus only handle byte and half-word.
|
||||||
|
if (Strip.second != 1 && Strip.second != 2)
|
||||||
|
return OpRef::fail();
|
||||||
|
|
||||||
|
int N = VecLen;
|
||||||
|
int L = Strip.second;
|
||||||
|
|
||||||
|
// First, check the non-ignored strips.
|
||||||
|
for (int I = 2*L; I < N; I += 2*L) {
|
||||||
|
auto S = findStrip(SM.Mask.drop_front(I), 1, N-I);
|
||||||
|
if (S.second != unsigned(L))
|
||||||
|
return OpRef::fail();
|
||||||
|
if (2*S.first != I)
|
||||||
|
return OpRef::fail();
|
||||||
|
}
|
||||||
|
// Check the -1s.
|
||||||
|
for (int I = L; I < N; I += 2*L) {
|
||||||
|
auto S = findStrip(SM.Mask.drop_front(I), 0, N-I);
|
||||||
|
if (S.first != -1 || S.second != unsigned(L))
|
||||||
|
return OpRef::fail();
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned Opc = Strip.second == 1 ? Hexagon::V6_vunpackub
|
||||||
|
: Hexagon::V6_vunpackuh;
|
||||||
|
Results.push(Opc, getPairVT(MVT::i8), {Va});
|
||||||
|
return OpRef::res(Results.top());
|
||||||
|
}
|
||||||
|
|
||||||
OpRef HvxSelector::perfect(ShuffleMask SM, OpRef Va, ResultStack &Results) {
|
OpRef HvxSelector::perfect(ShuffleMask SM, OpRef Va, ResultStack &Results) {
|
||||||
DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
|
DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
|
||||||
// V6_vdeal{b,h}
|
// V6_vdeal{b,h}
|
||||||
|
|
|
@ -0,0 +1,28 @@
|
||||||
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
|
; RUN: llc -march=hexagon < %s | FileCheck %s
|
||||||
|
|
||||||
|
define <256 x i8> @f0(<128 x i8> %a0) #0 {
|
||||||
|
; CHECK-LABEL: f0:
|
||||||
|
; CHECK: // %bb.0:
|
||||||
|
; CHECK-NEXT: {
|
||||||
|
; CHECK-NEXT: v1:0.uh = vunpack(v0.ub)
|
||||||
|
; CHECK-NEXT: jumpr r31
|
||||||
|
; CHECK-NEXT: }
|
||||||
|
%v0 = shufflevector <128 x i8> %a0, <128 x i8> undef, <256 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7, i32 undef, i32 8, i32 undef, i32 9, i32 undef, i32 10, i32 undef, i32 11, i32 undef, i32 12, i32 undef, i32 13, i32 undef, i32 14, i32 undef, i32 15, i32 undef, i32 16, i32 undef, i32 17, i32 undef, i32 18, i32 undef, i32 19, i32 undef, i32 20, i32 undef, i32 21, i32 undef, i32 22, i32 undef, i32 23, i32 undef, i32 24, i32 undef, i32 25, i32 undef, i32 26, i32 undef, i32 27, i32 undef, i32 28, i32 undef, i32 29, i32 undef, i32 30, i32 undef, i32 31, i32 undef, i32 32, i32 undef, i32 33, i32 undef, i32 34, i32 undef, i32 35, i32 undef, i32 36, i32 undef, i32 37, i32 undef, i32 38, i32 undef, i32 39, i32 undef, i32 40, i32 undef, i32 41, i32 undef, i32 42, i32 undef, i32 43, i32 undef, i32 44, i32 undef, i32 45, i32 undef, i32 46, i32 undef, i32 47, i32 undef, i32 48, i32 undef, i32 49, i32 undef, i32 50, i32 undef, i32 51, i32 undef, i32 52, i32 undef, i32 53, i32 undef, i32 54, i32 undef, i32 55, i32 undef, i32 56, i32 undef, i32 57, i32 undef, i32 58, i32 undef, i32 59, i32 undef, i32 60, i32 undef, i32 61, i32 undef, i32 62, i32 undef, i32 63, i32 undef, i32 64, i32 undef, i32 65, i32 undef, i32 66, i32 undef, i32 67, i32 undef, i32 68, i32 undef, i32 69, i32 undef, i32 70, i32 undef, i32 71, i32 undef, i32 72, i32 undef, i32 73, i32 undef, i32 74, i32 undef, i32 75, i32 undef, i32 76, i32 undef, i32 77, i32 undef, i32 78, i32 undef, i32 79, i32 undef, i32 80, i32 undef, i32 81, i32 undef, i32 82, i32 undef, i32 83, i32 undef, i32 84, i32 undef, i32 85, i32 undef, i32 86, i32 undef, i32 87, i32 undef, i32 88, i32 undef, i32 89, i32 undef, i32 90, i32 undef, i32 91, i32 undef, i32 92, i32 undef, i32 93, i32 undef, i32 94, i32 undef, i32 95, i32 undef, i32 96, i32 undef, i32 97, i32 undef, i32 98, i32 undef, i32 99, i32 undef, i32 100, i32 undef, i32 101, i32 undef, i32 102, i32 undef, i32 103, i32 undef, i32 104, i32 undef, i32 105, i32 undef, i32 106, i32 undef, i32 107, i32 undef, i32 108, i32 undef, i32 109, i32 undef, i32 110, i32 undef, i32 111, i32 undef, i32 112, i32 undef, i32 113, i32 undef, i32 114, i32 undef, i32 115, i32 undef, i32 116, i32 undef, i32 117, i32 undef, i32 118, i32 undef, i32 119, i32 undef, i32 120, i32 undef, i32 121, i32 undef, i32 122, i32 undef, i32 123, i32 undef, i32 124, i32 undef, i32 125, i32 undef, i32 126, i32 undef, i32 127, i32 undef>
|
||||||
|
ret <256 x i8> %v0
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
define <256 x i8> @f1(<128 x i8> %a0) #0 {
|
||||||
|
; CHECK-LABEL: f1:
|
||||||
|
; CHECK: // %bb.0:
|
||||||
|
; CHECK-NEXT: {
|
||||||
|
; CHECK-NEXT: v1:0.uw = vunpack(v0.uh)
|
||||||
|
; CHECK-NEXT: jumpr r31
|
||||||
|
; CHECK-NEXT: }
|
||||||
|
%v0 = shufflevector <128 x i8> %a0, <128 x i8> undef, <256 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 2, i32 3, i32 undef, i32 undef, i32 4, i32 5, i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 undef, i32 8, i32 9, i32 undef, i32 undef, i32 10, i32 11, i32 undef, i32 undef, i32 12, i32 13, i32 undef, i32 undef, i32 14, i32 15, i32 undef, i32 undef, i32 16, i32 17, i32 undef, i32 undef, i32 18, i32 19, i32 undef, i32 undef, i32 20, i32 21, i32 undef, i32 undef, i32 22, i32 23, i32 undef, i32 undef, i32 24, i32 25, i32 undef, i32 undef, i32 26, i32 27, i32 undef, i32 undef, i32 28, i32 29, i32 undef, i32 undef, i32 30, i32 31, i32 undef, i32 undef, i32 32, i32 33, i32 undef, i32 undef, i32 34, i32 35, i32 undef, i32 undef, i32 36, i32 37, i32 undef, i32 undef, i32 38, i32 39, i32 undef, i32 undef, i32 40, i32 41, i32 undef, i32 undef, i32 42, i32 43, i32 undef, i32 undef, i32 44, i32 45, i32 undef, i32 undef, i32 46, i32 47, i32 undef, i32 undef, i32 48, i32 49, i32 undef, i32 undef, i32 50, i32 51, i32 undef, i32 undef, i32 52, i32 53, i32 undef, i32 undef, i32 54, i32 55, i32 undef, i32 undef, i32 56, i32 57, i32 undef, i32 undef, i32 58, i32 59, i32 undef, i32 undef, i32 60, i32 61, i32 undef, i32 undef, i32 62, i32 63, i32 undef, i32 undef, i32 64, i32 65, i32 undef, i32 undef, i32 66, i32 67, i32 undef, i32 undef, i32 68, i32 69, i32 undef, i32 undef, i32 70, i32 71, i32 undef, i32 undef, i32 72, i32 73, i32 undef, i32 undef, i32 74, i32 75, i32 undef, i32 undef, i32 76, i32 77, i32 undef, i32 undef, i32 78, i32 79, i32 undef, i32 undef, i32 80, i32 81, i32 undef, i32 undef, i32 82, i32 83, i32 undef, i32 undef, i32 84, i32 85, i32 undef, i32 undef, i32 86, i32 87, i32 undef, i32 undef, i32 88, i32 89, i32 undef, i32 undef, i32 90, i32 91, i32 undef, i32 undef, i32 92, i32 93, i32 undef, i32 undef, i32 94, i32 95, i32 undef, i32 undef, i32 96, i32 97, i32 undef, i32 undef, i32 98, i32 99, i32 undef, i32 undef, i32 100, i32 101, i32 undef, i32 undef, i32 102, i32 103, i32 undef, i32 undef, i32 104, i32 105, i32 undef, i32 undef, i32 106, i32 107, i32 undef, i32 undef, i32 108, i32 109, i32 undef, i32 undef, i32 110, i32 111, i32 undef, i32 undef, i32 112, i32 113, i32 undef, i32 undef, i32 114, i32 115, i32 undef, i32 undef, i32 116, i32 117, i32 undef, i32 undef, i32 118, i32 119, i32 undef, i32 undef, i32 120, i32 121, i32 undef, i32 undef, i32 122, i32 123, i32 undef, i32 undef, i32 124, i32 125, i32 undef, i32 undef, i32 126, i32 127, i32 undef, i32 undef>
|
||||||
|
ret <256 x i8> %v0
|
||||||
|
}
|
||||||
|
|
||||||
|
attributes #0 = { nounwind readnone "target-features"="+hvx,+hvx-length128b" }
|
||||||
|
|
|
@ -0,0 +1,28 @@
|
||||||
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
|
; RUN: llc -march=hexagon < %s | FileCheck %s
|
||||||
|
|
||||||
|
define <128 x i8> @f0(<64 x i8> %a0) #0 {
|
||||||
|
; CHECK-LABEL: f0:
|
||||||
|
; CHECK: // %bb.0:
|
||||||
|
; CHECK-NEXT: {
|
||||||
|
; CHECK-NEXT: v1:0.uh = vunpack(v0.ub)
|
||||||
|
; CHECK-NEXT: jumpr r31
|
||||||
|
; CHECK-NEXT: }
|
||||||
|
%v0 = shufflevector <64 x i8> %a0, <64 x i8> undef, <128 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7, i32 undef, i32 8, i32 undef, i32 9, i32 undef, i32 10, i32 undef, i32 11, i32 undef, i32 12, i32 undef, i32 13, i32 undef, i32 14, i32 undef, i32 15, i32 undef, i32 16, i32 undef, i32 17, i32 undef, i32 18, i32 undef, i32 19, i32 undef, i32 20, i32 undef, i32 21, i32 undef, i32 22, i32 undef, i32 23, i32 undef, i32 24, i32 undef, i32 25, i32 undef, i32 26, i32 undef, i32 27, i32 undef, i32 28, i32 undef, i32 29, i32 undef, i32 30, i32 undef, i32 31, i32 undef, i32 32, i32 undef, i32 33, i32 undef, i32 34, i32 undef, i32 35, i32 undef, i32 36, i32 undef, i32 37, i32 undef, i32 38, i32 undef, i32 39, i32 undef, i32 40, i32 undef, i32 41, i32 undef, i32 42, i32 undef, i32 43, i32 undef, i32 44, i32 undef, i32 45, i32 undef, i32 46, i32 undef, i32 47, i32 undef, i32 48, i32 undef, i32 49, i32 undef, i32 50, i32 undef, i32 51, i32 undef, i32 52, i32 undef, i32 53, i32 undef, i32 54, i32 undef, i32 55, i32 undef, i32 56, i32 undef, i32 57, i32 undef, i32 58, i32 undef, i32 59, i32 undef, i32 60, i32 undef, i32 61, i32 undef, i32 62, i32 undef, i32 63, i32 undef>
|
||||||
|
ret <128 x i8> %v0
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
define <128 x i8> @f1(<64 x i8> %a0) #0 {
|
||||||
|
; CHECK-LABEL: f1:
|
||||||
|
; CHECK: // %bb.0:
|
||||||
|
; CHECK-NEXT: {
|
||||||
|
; CHECK-NEXT: v1:0.uw = vunpack(v0.uh)
|
||||||
|
; CHECK-NEXT: jumpr r31
|
||||||
|
; CHECK-NEXT: }
|
||||||
|
%v0 = shufflevector <64 x i8> %a0, <64 x i8> undef, <128 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 2, i32 3, i32 undef, i32 undef, i32 4, i32 5, i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 undef, i32 8, i32 9, i32 undef, i32 undef, i32 10, i32 11, i32 undef, i32 undef, i32 12, i32 13, i32 undef, i32 undef, i32 14, i32 15, i32 undef, i32 undef, i32 16, i32 17, i32 undef, i32 undef, i32 18, i32 19, i32 undef, i32 undef, i32 20, i32 21, i32 undef, i32 undef, i32 22, i32 23, i32 undef, i32 undef, i32 24, i32 25, i32 undef, i32 undef, i32 26, i32 27, i32 undef, i32 undef, i32 28, i32 29, i32 undef, i32 undef, i32 30, i32 31, i32 undef, i32 undef, i32 32, i32 33, i32 undef, i32 undef, i32 34, i32 35, i32 undef, i32 undef, i32 36, i32 37, i32 undef, i32 undef, i32 38, i32 39, i32 undef, i32 undef, i32 40, i32 41, i32 undef, i32 undef, i32 42, i32 43, i32 undef, i32 undef, i32 44, i32 45, i32 undef, i32 undef, i32 46, i32 47, i32 undef, i32 undef, i32 48, i32 49, i32 undef, i32 undef, i32 50, i32 51, i32 undef, i32 undef, i32 52, i32 53, i32 undef, i32 undef, i32 54, i32 55, i32 undef, i32 undef, i32 56, i32 57, i32 undef, i32 undef, i32 58, i32 59, i32 undef, i32 undef, i32 60, i32 61, i32 undef, i32 undef, i32 62, i32 63, i32 undef, i32 undef>
|
||||||
|
ret <128 x i8> %v0
|
||||||
|
}
|
||||||
|
|
||||||
|
attributes #0 = { nounwind readnone "target-features"="+hvx,+hvx-length64b" }
|
||||||
|
|
Loading…
Reference in New Issue