Refactoring the stride 4 code in the X86interleavedaccess NFC

llvm-svn: 313166
This commit is contained in:
Michael Zuckerman 2017-09-13 18:28:09 +00:00
parent 3dcd122151
commit 80d3649f23
1 changed files with 32 additions and 34 deletions

View File

@ -210,6 +210,14 @@ static void createConcatShuffleMask(int NumElements,
Mask.push_back(i + Offset + NumElements); Mask.push_back(i + Offset + NumElements);
} }
// Changing the scale of the vector type by reducing the number of elements and
// doubling the scalar size.
static MVT scaleVectorType(MVT VT) {
unsigned ScalarSize = VT.getVectorElementType().getScalarSizeInBits() * 2;
return MVT::getVectorVT(MVT::getIntegerVT(ScalarSize),
VT.getVectorNumElements() / 2);
}
void X86InterleavedAccessGroup::interleave8bitStride4( void X86InterleavedAccessGroup::interleave8bitStride4(
ArrayRef<Instruction *> Matrix, SmallVectorImpl<Value *> &TransposedMatrix, ArrayRef<Instruction *> Matrix, SmallVectorImpl<Value *> &TransposedMatrix,
unsigned numberOfElement) { unsigned numberOfElement) {
@ -220,49 +228,32 @@ void X86InterleavedAccessGroup::interleave8bitStride4(
// Matrix[2]= y0 y1 y2 y3 y4 ... y31 // Matrix[2]= y0 y1 y2 y3 y4 ... y31
// Matrix[3]= k0 k1 k2 k3 k4 ... k31 // Matrix[3]= k0 k1 k2 k3 k4 ... k31
Type *VecTyepVt = VectorType::get(Type::getInt8Ty(Shuffles[0]->getContext()), MVT VT = MVT::getVectorVT(MVT::i8, numberOfElement);
numberOfElement); MVT HalfVT = scaleVectorType(VT);
Type *VecTyepVtHalf = VectorType::get(
Type::getInt16Ty(Shuffles[0]->getContext()), numberOfElement / 2);
MVT VT = MVT::getVT(VecTyepVt);
MVT HalfVT = MVT::getVT(VecTyepVtHalf);
TransposedMatrix.resize(4); TransposedMatrix.resize(4);
SmallVector<uint32_t, 32> MaskHigh;
SmallVector<uint32_t, 32> MaskHighTemp; SmallVector<uint32_t, 32> MaskLow;
SmallVector<uint32_t, 32> MaskLowTemp;
SmallVector<uint32_t, 32> MaskHighTemp1; SmallVector<uint32_t, 32> MaskHighTemp1;
SmallVector<uint32_t, 32> MaskLowTemp1; SmallVector<uint32_t, 32> MaskLowTemp1;
SmallVector<uint32_t, 32> MaskHighTemp2; SmallVector<uint32_t, 32> MaskHighWord;
SmallVector<uint32_t, 32> MaskLowTemp2; SmallVector<uint32_t, 32> MaskLowWord;
SmallVector<uint32_t, 32> ConcatLow; SmallVector<uint32_t, 32> ConcatLow;
SmallVector<uint32_t, 32> ConcatHigh; SmallVector<uint32_t, 32> ConcatHigh;
// MaskHighTemp and MaskLowTemp built in the vpunpckhbw and vpunpcklbw X86 // MaskHighTemp and MaskLowTemp built in the vpunpckhbw and vpunpcklbw X86
// shuffle pattern. // shuffle pattern.
createUnpackShuffleMask<uint32_t>(VT, MaskHighTemp, false, false); createUnpackShuffleMask<uint32_t>(VT, MaskHigh, false, false);
createUnpackShuffleMask<uint32_t>(VT, MaskLowTemp, true, false); createUnpackShuffleMask<uint32_t>(VT, MaskLow, true, false);
ArrayRef<uint32_t> MaskHigh = makeArrayRef(MaskHighTemp);
ArrayRef<uint32_t> MaskLow = makeArrayRef(MaskLowTemp);
// ConcatHigh and ConcatLow built in the vperm2i128 and vinserti128 X86
// shuffle pattern.
createConcatShuffleMask(32, ConcatLow, true);
createConcatShuffleMask(32, ConcatHigh, false);
ArrayRef<uint32_t> MaskConcatLow = makeArrayRef(ConcatLow);
ArrayRef<uint32_t> MaskConcatHigh = makeArrayRef(ConcatHigh);
// MaskHighTemp1 and MaskLowTemp1 built in the vpunpckhdw and vpunpckldw X86 // MaskHighTemp1 and MaskLowTemp1 built in the vpunpckhdw and vpunpckldw X86
// shuffle pattern. // shuffle pattern.
createUnpackShuffleMask<uint32_t>(HalfVT, MaskLowTemp1, true, false); createUnpackShuffleMask<uint32_t>(HalfVT, MaskLowTemp1, true, false);
createUnpackShuffleMask<uint32_t>(HalfVT, MaskHighTemp1, false, false); createUnpackShuffleMask<uint32_t>(HalfVT, MaskHighTemp1, false, false);
scaleShuffleMask<uint32_t>(2, makeArrayRef(MaskHighTemp1), MaskHighTemp2); scaleShuffleMask<uint32_t>(2, MaskHighTemp1, MaskHighWord);
scaleShuffleMask<uint32_t>(2, makeArrayRef(MaskLowTemp1), MaskLowTemp2); scaleShuffleMask<uint32_t>(2, MaskLowTemp1, MaskLowWord);
ArrayRef<uint32_t> MaskHighWord = makeArrayRef(MaskHighTemp2);
ArrayRef<uint32_t> MaskLowWord = makeArrayRef(MaskLowTemp2);
// IntrVec1Low = c0 m0 c1 m1 ... c7 m7 | c16 m16 c17 m17 ... c23 m23 // IntrVec1Low = c0 m0 c1 m1 ... c7 m7 | c16 m16 c17 m17 ... c23 m23
// IntrVec1High = c8 m8 c9 m9 ... c15 m15 | c24 m24 c25 m25 ... c31 m31 // IntrVec1High = c8 m8 c9 m9 ... c15 m15 | c24 m24 c25 m25 ... c31 m31
@ -299,16 +290,22 @@ void X86InterleavedAccessGroup::interleave8bitStride4(
TransposedMatrix[3] = High1; TransposedMatrix[3] = High1;
return; return;
} }
// cmyk0 cmyk1 cmyk2 cmyk3 | cmyk4 cmyk5 cmyk6 cmyk7
// cmyk8 cmyk9 cmyk10 cmyk11 | cmyk12 cmyk13 cmyk14 cmyk15 // cmyk0 cmyk1 cmyk2 cmyk3 | cmyk4 cmyk5 cmyk6 cmyk7
// cmyk8 cmyk9 cmyk10 cmyk11 | cmyk12 cmyk13 cmyk14 cmyk15
// cmyk16 cmyk17 cmyk18 cmyk19 | cmyk20 cmyk21 cmyk22 cmyk23 // cmyk16 cmyk17 cmyk18 cmyk19 | cmyk20 cmyk21 cmyk22 cmyk23
// cmyk24 cmyk25 cmyk26 cmyk27 | cmyk28 cmyk29 cmyk30 cmyk31 // cmyk24 cmyk25 cmyk26 cmyk27 | cmyk28 cmyk29 cmyk30 cmyk31
TransposedMatrix[0] = Builder.CreateShuffleVector(Low, High, MaskConcatLow); // ConcatHigh and ConcatLow built in the vperm2i128 and vinserti128 X86
TransposedMatrix[1] = Builder.CreateShuffleVector(Low1, High1, MaskConcatLow); // shuffle pattern.
TransposedMatrix[2] = Builder.CreateShuffleVector(Low, High, MaskConcatHigh); SmallVector<uint32_t, 32> ConcatHigh12, ConcatHigh13;
TransposedMatrix[3] = createConcatShuffleMask(numberOfElement, ConcatLow, true);
Builder.CreateShuffleVector(Low1, High1, MaskConcatHigh); createConcatShuffleMask(numberOfElement, ConcatHigh, false);
TransposedMatrix[0] = Builder.CreateShuffleVector(Low, High, ConcatLow);
TransposedMatrix[1] = Builder.CreateShuffleVector(Low1, High1, ConcatLow);
TransposedMatrix[2] = Builder.CreateShuffleVector(Low, High, ConcatHigh);
TransposedMatrix[3] = Builder.CreateShuffleVector(Low1, High1, ConcatHigh);
} }
// createShuffleStride returns shuffle mask of size N. // createShuffleStride returns shuffle mask of size N.
@ -606,3 +603,4 @@ bool X86TargetLowering::lowerInterleavedStore(StoreInst *SI,
return Grp.isSupported() && Grp.lowerIntoOptimizedSequence(); return Grp.isSupported() && Grp.lowerIntoOptimizedSequence();
} }