Upgrade calls to CreateShuffleVector to use the preferred form of passing an array of ints

No functionality change intended.
This commit is contained in:
Benjamin Kramer 2020-04-15 12:41:54 +02:00
parent 5b4b3e0b6e
commit 6f64daca8f
14 changed files with 117 additions and 164 deletions

View File

@ -5711,7 +5711,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
case NEON::BI__builtin_neon_vext_v: case NEON::BI__builtin_neon_vext_v:
case NEON::BI__builtin_neon_vextq_v: { case NEON::BI__builtin_neon_vextq_v: {
int CV = cast<ConstantInt>(Ops[2])->getSExtValue(); int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
SmallVector<uint32_t, 16> Indices; SmallVector<int, 16> Indices;
for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
Indices.push_back(i+CV); Indices.push_back(i+CV);
@ -5983,7 +5983,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
Value *SV = nullptr; Value *SV = nullptr;
for (unsigned vi = 0; vi != 2; ++vi) { for (unsigned vi = 0; vi != 2; ++vi) {
SmallVector<uint32_t, 16> Indices; SmallVector<int, 16> Indices;
for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
Indices.push_back(i+vi); Indices.push_back(i+vi);
Indices.push_back(i+e+vi); Indices.push_back(i+e+vi);
@ -6011,7 +6011,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
Value *SV = nullptr; Value *SV = nullptr;
for (unsigned vi = 0; vi != 2; ++vi) { for (unsigned vi = 0; vi != 2; ++vi) {
SmallVector<uint32_t, 16> Indices; SmallVector<int, 16> Indices;
for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
Indices.push_back(2*i+vi); Indices.push_back(2*i+vi);
@ -6029,7 +6029,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
Value *SV = nullptr; Value *SV = nullptr;
for (unsigned vi = 0; vi != 2; ++vi) { for (unsigned vi = 0; vi != 2; ++vi) {
SmallVector<uint32_t, 16> Indices; SmallVector<int, 16> Indices;
for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
Indices.push_back((i + vi*e) >> 1); Indices.push_back((i + vi*e) >> 1);
Indices.push_back(((i + vi*e) >> 1)+e); Indices.push_back(((i + vi*e) >> 1)+e);
@ -6120,7 +6120,7 @@ static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
TblOps.push_back(ExtOp); TblOps.push_back(ExtOp);
// Build a vector containing sequential number like (0, 1, 2, ..., 15) // Build a vector containing sequential number like (0, 1, 2, ..., 15)
SmallVector<uint32_t, 16> Indices; SmallVector<int, 16> Indices;
llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType()); llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) { for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
Indices.push_back(2*i); Indices.push_back(2*i);
@ -6957,7 +6957,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
if (VTy->getElementType()->isIntegerTy(64)) { if (VTy->getElementType()->isIntegerTy(64)) {
// Extract the other lane. // Extract the other lane.
Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue(); int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane)); Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
// Load the value as a one-element vector. // Load the value as a one-element vector.
@ -6967,9 +6967,8 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
Value *Align = getAlignmentValue32(PtrOp0); Value *Align = getAlignmentValue32(PtrOp0);
Value *Ld = Builder.CreateCall(F, {Ops[0], Align}); Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
// Combine them. // Combine them.
uint32_t Indices[] = {1 - Lane, Lane}; int Indices[] = {1 - Lane, Lane};
SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices); return Builder.CreateShuffleVector(Ops[1], Ld, Indices, "vld1q_lane");
return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
} }
LLVM_FALLTHROUGH; LLVM_FALLTHROUGH;
case NEON::BI__builtin_neon_vld1_lane_v: { case NEON::BI__builtin_neon_vld1_lane_v: {
@ -7144,7 +7143,7 @@ static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder,
static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) { static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) {
// Make a shufflevector that extracts every other element of a vector (evens // Make a shufflevector that extracts every other element of a vector (evens
// or odds, as desired). // or odds, as desired).
SmallVector<uint32_t, 16> Indices; SmallVector<int, 16> Indices;
unsigned InputElements = unsigned InputElements =
cast<llvm::VectorType>(V->getType())->getNumElements(); cast<llvm::VectorType>(V->getType())->getNumElements();
for (unsigned i = 0; i < InputElements; i += 2) for (unsigned i = 0; i < InputElements; i += 2)
@ -7157,7 +7156,7 @@ static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
llvm::Value *V1) { llvm::Value *V1) {
// Make a shufflevector that interleaves two vectors element by element. // Make a shufflevector that interleaves two vectors element by element.
assert(V0->getType() == V1->getType() && "Can't zip different vector types"); assert(V0->getType() == V1->getType() && "Can't zip different vector types");
SmallVector<uint32_t, 16> Indices; SmallVector<int, 16> Indices;
unsigned InputElements = unsigned InputElements =
cast<llvm::VectorType>(V0->getType())->getNumElements(); cast<llvm::VectorType>(V0->getType())->getNumElements();
for (unsigned i = 0; i < InputElements; i++) { for (unsigned i = 0; i < InputElements; i++) {
@ -7185,7 +7184,7 @@ static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
unsigned ReverseWidth) { unsigned ReverseWidth) {
// MVE-specific helper function which reverses the elements of a // MVE-specific helper function which reverses the elements of a
// vector within every (ReverseWidth)-bit collection of lanes. // vector within every (ReverseWidth)-bit collection of lanes.
SmallVector<uint32_t, 16> Indices; SmallVector<int, 16> Indices;
unsigned LaneSize = V->getType()->getScalarSizeInBits(); unsigned LaneSize = V->getType()->getScalarSizeInBits();
unsigned Elements = 128 / LaneSize; unsigned Elements = 128 / LaneSize;
unsigned Mask = ReverseWidth / LaneSize - 1; unsigned Mask = ReverseWidth / LaneSize - 1;
@ -9971,7 +9970,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Value *SV = nullptr; Value *SV = nullptr;
for (unsigned vi = 0; vi != 2; ++vi) { for (unsigned vi = 0; vi != 2; ++vi) {
SmallVector<uint32_t, 16> Indices; SmallVector<int, 16> Indices;
for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
Indices.push_back(i+vi); Indices.push_back(i+vi);
Indices.push_back(i+e+vi); Indices.push_back(i+e+vi);
@ -9990,7 +9989,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Value *SV = nullptr; Value *SV = nullptr;
for (unsigned vi = 0; vi != 2; ++vi) { for (unsigned vi = 0; vi != 2; ++vi) {
SmallVector<uint32_t, 16> Indices; SmallVector<int, 16> Indices;
for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
Indices.push_back(2*i+vi); Indices.push_back(2*i+vi);
@ -10008,7 +10007,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Value *SV = nullptr; Value *SV = nullptr;
for (unsigned vi = 0; vi != 2; ++vi) { for (unsigned vi = 0; vi != 2; ++vi) {
SmallVector<uint32_t, 16> Indices; SmallVector<int, 16> Indices;
for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
Indices.push_back((i + vi*e) >> 1); Indices.push_back((i + vi*e) >> 1);
Indices.push_back(((i + vi*e) >> 1)+e); Indices.push_back(((i + vi*e) >> 1)+e);
@ -10132,7 +10131,7 @@ static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask,
// If we have less than 8 elements, then the starting mask was an i8 and // If we have less than 8 elements, then the starting mask was an i8 and
// we need to extract down to the right number of elements. // we need to extract down to the right number of elements.
if (NumElts < 8) { if (NumElts < 8) {
uint32_t Indices[4]; int Indices[4];
for (unsigned i = 0; i != NumElts; ++i) for (unsigned i = 0; i != NumElts; ++i)
Indices[i] = i; Indices[i] = i;
MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec, MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec,
@ -10321,7 +10320,7 @@ static Value *EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp,
} }
if (NumElts < 8) { if (NumElts < 8) {
uint32_t Indices[8]; int Indices[8];
for (unsigned i = 0; i != NumElts; ++i) for (unsigned i = 0; i != NumElts; ++i)
Indices[i] = i; Indices[i] = i;
for (unsigned i = NumElts; i != 8; ++i) for (unsigned i = NumElts; i != 8; ++i)
@ -10661,9 +10660,8 @@ static Value *EmitX86CvtF16ToFloatExpr(CodeGenFunction &CGF,
// Extract the subvector. // Extract the subvector.
if (NumDstElts != cast<llvm::VectorType>(Src->getType())->getNumElements()) { if (NumDstElts != cast<llvm::VectorType>(Src->getType())->getNumElements()) {
assert(NumDstElts == 4 && "Unexpected vector size"); assert(NumDstElts == 4 && "Unexpected vector size");
uint32_t ShuffleMask[4] = {0, 1, 2, 3};
Src = CGF.Builder.CreateShuffleVector(Src, UndefValue::get(Src->getType()), Src = CGF.Builder.CreateShuffleVector(Src, UndefValue::get(Src->getType()),
ShuffleMask); ArrayRef<int>{0, 1, 2, 3});
} }
// Bitcast from vXi16 to vXf16. // Bitcast from vXi16 to vXf16.
@ -11545,7 +11543,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
Index &= SubVectors - 1; // Remove any extra bits. Index &= SubVectors - 1; // Remove any extra bits.
Index *= NumElts; Index *= NumElts;
uint32_t Indices[16]; int Indices[16];
for (unsigned i = 0; i != NumElts; ++i) for (unsigned i = 0; i != NumElts; ++i)
Indices[i] = i + Index; Indices[i] = i + Index;
@ -11585,7 +11583,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
Index &= SubVectors - 1; // Remove any extra bits. Index &= SubVectors - 1; // Remove any extra bits.
Index *= SrcNumElts; Index *= SrcNumElts;
uint32_t Indices[16]; int Indices[16];
for (unsigned i = 0; i != DstNumElts; ++i) for (unsigned i = 0; i != DstNumElts; ++i)
Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i; Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i;
@ -11646,7 +11644,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
cast<llvm::VectorType>(Ops[0]->getType())->getNumElements(); cast<llvm::VectorType>(Ops[0]->getType())->getNumElements();
unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
uint32_t Indices[16]; int Indices[16];
// If there are more than 8 elements, the immediate is used twice so make // If there are more than 8 elements, the immediate is used twice so make
// sure we handle that. // sure we handle that.
for (unsigned i = 0; i != NumElts; ++i) for (unsigned i = 0; i != NumElts; ++i)
@ -11666,7 +11664,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
// Splat the 8-bits of immediate 4 times to help the loop wrap around. // Splat the 8-bits of immediate 4 times to help the loop wrap around.
Imm = (Imm & 0xff) * 0x01010101; Imm = (Imm & 0xff) * 0x01010101;
uint32_t Indices[32]; int Indices[32];
for (unsigned l = 0; l != NumElts; l += 8) { for (unsigned l = 0; l != NumElts; l += 8) {
for (unsigned i = 0; i != 4; ++i) { for (unsigned i = 0; i != 4; ++i) {
Indices[l + i] = l + (Imm & 3); Indices[l + i] = l + (Imm & 3);
@ -11690,7 +11688,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
// Splat the 8-bits of immediate 4 times to help the loop wrap around. // Splat the 8-bits of immediate 4 times to help the loop wrap around.
Imm = (Imm & 0xff) * 0x01010101; Imm = (Imm & 0xff) * 0x01010101;
uint32_t Indices[32]; int Indices[32];
for (unsigned l = 0; l != NumElts; l += 8) { for (unsigned l = 0; l != NumElts; l += 8) {
for (unsigned i = 0; i != 4; ++i) for (unsigned i = 0; i != 4; ++i)
Indices[l + i] = l + i; Indices[l + i] = l + i;
@ -11722,7 +11720,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
// Splat the 8-bits of immediate 4 times to help the loop wrap around. // Splat the 8-bits of immediate 4 times to help the loop wrap around.
Imm = (Imm & 0xff) * 0x01010101; Imm = (Imm & 0xff) * 0x01010101;
uint32_t Indices[16]; int Indices[16];
for (unsigned l = 0; l != NumElts; l += NumLaneElts) { for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
for (unsigned i = 0; i != NumLaneElts; ++i) { for (unsigned i = 0; i != NumLaneElts; ++i) {
Indices[i + l] = (Imm % NumLaneElts) + l; Indices[i + l] = (Imm % NumLaneElts) + l;
@ -11773,7 +11771,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
unsigned NumElts = Ty->getNumElements(); unsigned NumElts = Ty->getNumElements();
// These intrinsics operate on 256-bit lanes of four 64-bit elements. // These intrinsics operate on 256-bit lanes of four 64-bit elements.
uint32_t Indices[8]; int Indices[8];
for (unsigned l = 0; l != NumElts; l += 4) for (unsigned l = 0; l != NumElts; l += 4)
for (unsigned i = 0; i != 4; ++i) for (unsigned i = 0; i != 4; ++i)
Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3); Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3);
@ -11804,7 +11802,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType()); Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
} }
uint32_t Indices[64]; int Indices[64];
// 256-bit palignr operates on 128-bit lanes so we need to handle that // 256-bit palignr operates on 128-bit lanes so we need to handle that
for (unsigned l = 0; l != NumElts; l += 16) { for (unsigned l = 0; l != NumElts; l += 16) {
for (unsigned i = 0; i != 16; ++i) { for (unsigned i = 0; i != 16; ++i) {
@ -11832,7 +11830,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
// Mask the shift amount to width of two vectors. // Mask the shift amount to width of two vectors.
ShiftVal &= (2 * NumElts) - 1; ShiftVal &= (2 * NumElts) - 1;
uint32_t Indices[16]; int Indices[16];
for (unsigned i = 0; i != NumElts; ++i) for (unsigned i = 0; i != NumElts; ++i)
Indices[i] = i + ShiftVal; Indices[i] = i + ShiftVal;
@ -11854,7 +11852,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2; unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2;
unsigned NumLaneElts = NumElts / NumLanes; unsigned NumLaneElts = NumElts / NumLanes;
uint32_t Indices[16]; int Indices[16];
for (unsigned l = 0; l != NumElts; l += NumLaneElts) { for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
unsigned Index = (Imm % NumLanes) * NumLaneElts; unsigned Index = (Imm % NumLanes) * NumLaneElts;
Imm /= NumLanes; // Discard the bits we just used. Imm /= NumLanes; // Discard the bits we just used.
@ -11884,7 +11882,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
// duplicate sources, but this can be dealt with in the backend. // duplicate sources, but this can be dealt with in the backend.
Value *OutOps[2]; Value *OutOps[2];
uint32_t Indices[8]; int Indices[8];
for (unsigned l = 0; l != 2; ++l) { for (unsigned l = 0; l != 2; ++l) {
// Determine the source for this lane. // Determine the source for this lane.
if (Imm & (1 << ((l * 4) + 3))) if (Imm & (1 << ((l * 4) + 3)))
@ -11922,7 +11920,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
if (ShiftVal >= 16) if (ShiftVal >= 16)
return llvm::Constant::getNullValue(ResultType); return llvm::Constant::getNullValue(ResultType);
uint32_t Indices[64]; int Indices[64];
// 256/512-bit pslldq operates on 128-bit lanes so we need to handle that // 256/512-bit pslldq operates on 128-bit lanes so we need to handle that
for (unsigned l = 0; l != NumElts; l += 16) { for (unsigned l = 0; l != NumElts; l += 16) {
for (unsigned i = 0; i != 16; ++i) { for (unsigned i = 0; i != 16; ++i) {
@ -11952,7 +11950,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
if (ShiftVal >= 16) if (ShiftVal >= 16)
return llvm::Constant::getNullValue(ResultType); return llvm::Constant::getNullValue(ResultType);
uint32_t Indices[64]; int Indices[64];
// 256/512-bit psrldq operates on 128-bit lanes so we need to handle that // 256/512-bit psrldq operates on 128-bit lanes so we need to handle that
for (unsigned l = 0; l != NumElts; l += 16) { for (unsigned l = 0; l != NumElts; l += 16) {
for (unsigned i = 0; i != 16; ++i) { for (unsigned i = 0; i != 16; ++i) {
@ -11982,7 +11980,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
Value *In = getMaskVecValue(*this, Ops[0], NumElts); Value *In = getMaskVecValue(*this, Ops[0], NumElts);
uint32_t Indices[64]; int Indices[64];
for (unsigned i = 0; i != NumElts; ++i) for (unsigned i = 0; i != NumElts; ++i)
Indices[i] = NumElts + i - ShiftVal; Indices[i] = NumElts + i - ShiftVal;
@ -12004,7 +12002,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
Value *In = getMaskVecValue(*this, Ops[0], NumElts); Value *In = getMaskVecValue(*this, Ops[0], NumElts);
uint32_t Indices[64]; int Indices[64];
for (unsigned i = 0; i != NumElts; ++i) for (unsigned i = 0; i != NumElts; ++i)
Indices[i] = i + ShiftVal; Indices[i] = i + ShiftVal;
@ -12284,7 +12282,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
Value *LHS = getMaskVecValue(*this, Ops[0], NumElts); Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
Value *RHS = getMaskVecValue(*this, Ops[1], NumElts); Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
uint32_t Indices[64]; int Indices[64];
for (unsigned i = 0; i != NumElts; ++i) for (unsigned i = 0; i != NumElts; ++i)
Indices[i] = i; Indices[i] = i;
@ -13443,15 +13441,9 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2)); Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
if (getTarget().isLittleEndian()) { if (getTarget().isLittleEndian()) {
// Create a shuffle mask of (1, 0)
Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
ConstantInt::get(Int32Ty, 0)
};
Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
// Reverse the double words in the vector we will extract from. // Reverse the double words in the vector we will extract from.
Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ShuffleMask); Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ArrayRef<int>{1, 0});
// Reverse the index. // Reverse the index.
Index = MaxIndex - Index; Index = MaxIndex - Index;
@ -13485,13 +13477,8 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
// Emit the call, then reverse the double words of the results vector. // Emit the call, then reverse the double words of the results vector.
Value *Call = Builder.CreateCall(F, Ops); Value *Call = Builder.CreateCall(F, Ops);
// Create a shuffle mask of (1, 0) Value *ShuffleCall =
Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1), Builder.CreateShuffleVector(Call, Call, ArrayRef<int>{1, 0});
ConstantInt::get(Int32Ty, 0)
};
Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
Value *ShuffleCall = Builder.CreateShuffleVector(Call, Call, ShuffleMask);
return ShuffleCall; return ShuffleCall;
} else { } else {
Ops[1] = ConstantInt::getSigned(Int32Ty, Index); Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
@ -13510,15 +13497,12 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
// Account for endianness by treating this as just a shuffle. So we use the // Account for endianness by treating this as just a shuffle. So we use the
// same indices for both LE and BE in order to produce expected results in // same indices for both LE and BE in order to produce expected results in
// both cases. // both cases.
unsigned ElemIdx0 = (Index & 2) >> 1; int ElemIdx0 = (Index & 2) >> 1;
unsigned ElemIdx1 = 2 + (Index & 1); int ElemIdx1 = 2 + (Index & 1);
Constant *ShuffleElts[2] = {ConstantInt::get(Int32Ty, ElemIdx0),
ConstantInt::get(Int32Ty, ElemIdx1)};
Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
int ShuffleElts[2] = {ElemIdx0, ElemIdx1};
Value *ShuffleCall = Value *ShuffleCall =
Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask); Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleElts);
QualType BIRetType = E->getType(); QualType BIRetType = E->getType();
auto RetTy = ConvertType(BIRetType); auto RetTy = ConvertType(BIRetType);
return Builder.CreateBitCast(ShuffleCall, RetTy); return Builder.CreateBitCast(ShuffleCall, RetTy);
@ -13532,10 +13516,10 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int32Ty, 4)); Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int32Ty, 4));
// Create a shuffle mask // Create a shuffle mask
unsigned ElemIdx0; int ElemIdx0;
unsigned ElemIdx1; int ElemIdx1;
unsigned ElemIdx2; int ElemIdx2;
unsigned ElemIdx3; int ElemIdx3;
if (getTarget().isLittleEndian()) { if (getTarget().isLittleEndian()) {
// Little endian element N comes from element 8+N-Index of the // Little endian element N comes from element 8+N-Index of the
// concatenated wide vector (of course, using modulo arithmetic on // concatenated wide vector (of course, using modulo arithmetic on
@ -13552,14 +13536,9 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
ElemIdx3 = Index + 3; ElemIdx3 = Index + 3;
} }
Constant *ShuffleElts[4] = {ConstantInt::get(Int32Ty, ElemIdx0), int ShuffleElts[4] = {ElemIdx0, ElemIdx1, ElemIdx2, ElemIdx3};
ConstantInt::get(Int32Ty, ElemIdx1),
ConstantInt::get(Int32Ty, ElemIdx2),
ConstantInt::get(Int32Ty, ElemIdx3)};
Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
Value *ShuffleCall = Value *ShuffleCall =
Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask); Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleElts);
QualType BIRetType = E->getType(); QualType BIRetType = E->getType();
auto RetTy = ConvertType(BIRetType); auto RetTy = ConvertType(BIRetType);
return Builder.CreateBitCast(ShuffleCall, RetTy); return Builder.CreateBitCast(ShuffleCall, RetTy);

View File

@ -1745,12 +1745,9 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr,
// Handle vec3 special. // Handle vec3 special.
if (VecTy && VecTy->getNumElements() == 3) { if (VecTy && VecTy->getNumElements() == 3) {
// Our source is a vec3, do a shuffle vector to make it a vec4. // Our source is a vec3, do a shuffle vector to make it a vec4.
llvm::Constant *Mask[] = {Builder.getInt32(0), Builder.getInt32(1),
Builder.getInt32(2),
llvm::UndefValue::get(Builder.getInt32Ty())};
llvm::Value *MaskV = llvm::ConstantVector::get(Mask);
Value = Builder.CreateShuffleVector(Value, llvm::UndefValue::get(VecTy), Value = Builder.CreateShuffleVector(Value, llvm::UndefValue::get(VecTy),
MaskV, "extractVec"); ArrayRef<int>{0, 1, 2, -1},
"extractVec");
SrcTy = llvm::VectorType::get(VecTy->getElementType(), 4); SrcTy = llvm::VectorType::get(VecTy->getElementType(), 4);
} }
if (Addr.getElementType() != SrcTy) { if (Addr.getElementType() != SrcTy) {
@ -1886,13 +1883,12 @@ RValue CodeGenFunction::EmitLoadOfExtVectorElementLValue(LValue LV) {
// Always use shuffle vector to try to retain the original program structure // Always use shuffle vector to try to retain the original program structure
unsigned NumResultElts = ExprVT->getNumElements(); unsigned NumResultElts = ExprVT->getNumElements();
SmallVector<llvm::Constant*, 4> Mask; SmallVector<int, 4> Mask;
for (unsigned i = 0; i != NumResultElts; ++i) for (unsigned i = 0; i != NumResultElts; ++i)
Mask.push_back(Builder.getInt32(getAccessedFieldNo(i, Elts))); Mask.push_back(getAccessedFieldNo(i, Elts));
llvm::Value *MaskV = llvm::ConstantVector::get(Mask);
Vec = Builder.CreateShuffleVector(Vec, llvm::UndefValue::get(Vec->getType()), Vec = Builder.CreateShuffleVector(Vec, llvm::UndefValue::get(Vec->getType()),
MaskV); Mask);
return RValue::get(Vec); return RValue::get(Vec);
} }
@ -2133,32 +2129,27 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src,
// Use shuffle vector is the src and destination are the same number of // Use shuffle vector is the src and destination are the same number of
// elements and restore the vector mask since it is on the side it will be // elements and restore the vector mask since it is on the side it will be
// stored. // stored.
SmallVector<llvm::Constant*, 4> Mask(NumDstElts); SmallVector<int, 4> Mask(NumDstElts);
for (unsigned i = 0; i != NumSrcElts; ++i) for (unsigned i = 0; i != NumSrcElts; ++i)
Mask[getAccessedFieldNo(i, Elts)] = Builder.getInt32(i); Mask[getAccessedFieldNo(i, Elts)] = i;
llvm::Value *MaskV = llvm::ConstantVector::get(Mask); Vec = Builder.CreateShuffleVector(
Vec = Builder.CreateShuffleVector(SrcVal, SrcVal, llvm::UndefValue::get(Vec->getType()), Mask);
llvm::UndefValue::get(Vec->getType()),
MaskV);
} else if (NumDstElts > NumSrcElts) { } else if (NumDstElts > NumSrcElts) {
// Extended the source vector to the same length and then shuffle it // Extended the source vector to the same length and then shuffle it
// into the destination. // into the destination.
// FIXME: since we're shuffling with undef, can we just use the indices // FIXME: since we're shuffling with undef, can we just use the indices
// into that? This could be simpler. // into that? This could be simpler.
SmallVector<llvm::Constant*, 4> ExtMask; SmallVector<int, 4> ExtMask;
for (unsigned i = 0; i != NumSrcElts; ++i) for (unsigned i = 0; i != NumSrcElts; ++i)
ExtMask.push_back(Builder.getInt32(i)); ExtMask.push_back(i);
ExtMask.resize(NumDstElts, llvm::UndefValue::get(Int32Ty)); ExtMask.resize(NumDstElts, -1);
llvm::Value *ExtMaskV = llvm::ConstantVector::get(ExtMask); llvm::Value *ExtSrcVal = Builder.CreateShuffleVector(
llvm::Value *ExtSrcVal = SrcVal, llvm::UndefValue::get(SrcVal->getType()), ExtMask);
Builder.CreateShuffleVector(SrcVal,
llvm::UndefValue::get(SrcVal->getType()),
ExtMaskV);
// build identity // build identity
SmallVector<llvm::Constant*, 4> Mask; SmallVector<int, 4> Mask;
for (unsigned i = 0; i != NumDstElts; ++i) for (unsigned i = 0; i != NumDstElts; ++i)
Mask.push_back(Builder.getInt32(i)); Mask.push_back(i);
// When the vector size is odd and .odd or .hi is used, the last element // When the vector size is odd and .odd or .hi is used, the last element
// of the Elts constant array will be one past the size of the vector. // of the Elts constant array will be one past the size of the vector.
@ -2168,9 +2159,8 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src,
// modify when what gets shuffled in // modify when what gets shuffled in
for (unsigned i = 0; i != NumSrcElts; ++i) for (unsigned i = 0; i != NumSrcElts; ++i)
Mask[getAccessedFieldNo(i, Elts)] = Builder.getInt32(i+NumDstElts); Mask[getAccessedFieldNo(i, Elts)] = i + NumDstElts;
llvm::Value *MaskV = llvm::ConstantVector::get(Mask); Vec = Builder.CreateShuffleVector(Vec, ExtSrcVal, Mask);
Vec = Builder.CreateShuffleVector(Vec, ExtSrcVal, MaskV);
} else { } else {
// We should never shorten the vector // We should never shorten the vector
llvm_unreachable("unexpected shorten vector length"); llvm_unreachable("unexpected shorten vector length");

View File

@ -1650,18 +1650,17 @@ Value *ScalarExprEmitter::VisitShuffleVectorExpr(ShuffleVectorExpr *E) {
Value* V1 = CGF.EmitScalarExpr(E->getExpr(0)); Value* V1 = CGF.EmitScalarExpr(E->getExpr(0));
Value* V2 = CGF.EmitScalarExpr(E->getExpr(1)); Value* V2 = CGF.EmitScalarExpr(E->getExpr(1));
SmallVector<llvm::Constant*, 32> indices; SmallVector<int, 32> Indices;
for (unsigned i = 2; i < E->getNumSubExprs(); ++i) { for (unsigned i = 2; i < E->getNumSubExprs(); ++i) {
llvm::APSInt Idx = E->getShuffleMaskIdx(CGF.getContext(), i-2); llvm::APSInt Idx = E->getShuffleMaskIdx(CGF.getContext(), i-2);
// Check for -1 and output it as undef in the IR. // Check for -1 and output it as undef in the IR.
if (Idx.isSigned() && Idx.isAllOnesValue()) if (Idx.isSigned() && Idx.isAllOnesValue())
indices.push_back(llvm::UndefValue::get(CGF.Int32Ty)); Indices.push_back(-1);
else else
indices.push_back(Builder.getInt32(Idx.getZExtValue())); Indices.push_back(Idx.getZExtValue());
} }
Value *SV = llvm::ConstantVector::get(indices); return Builder.CreateShuffleVector(V1, V2, Indices, "shuffle");
return Builder.CreateShuffleVector(V1, V2, SV, "shuffle");
} }
Value *ScalarExprEmitter::VisitConvertVectorExpr(ConvertVectorExpr *E) { Value *ScalarExprEmitter::VisitConvertVectorExpr(ConvertVectorExpr *E) {
@ -4532,14 +4531,9 @@ Value *ScalarExprEmitter::VisitBlockExpr(const BlockExpr *block) {
static Value *ConvertVec3AndVec4(CGBuilderTy &Builder, CodeGenFunction &CGF, static Value *ConvertVec3AndVec4(CGBuilderTy &Builder, CodeGenFunction &CGF,
Value *Src, unsigned NumElementsDst) { Value *Src, unsigned NumElementsDst) {
llvm::Value *UnV = llvm::UndefValue::get(Src->getType()); llvm::Value *UnV = llvm::UndefValue::get(Src->getType());
SmallVector<llvm::Constant*, 4> Args; static constexpr int Mask[] = {0, 1, 2, -1};
Args.push_back(Builder.getInt32(0)); return Builder.CreateShuffleVector(Src, UnV,
Args.push_back(Builder.getInt32(1)); llvm::makeArrayRef(Mask, NumElementsDst));
Args.push_back(Builder.getInt32(2));
if (NumElementsDst == 4)
Args.push_back(llvm::UndefValue::get(CGF.Int32Ty));
llvm::Constant *Mask = llvm::ConstantVector::get(Args);
return Builder.CreateShuffleVector(Src, UnV, Mask);
} }
// Create cast instructions for converting LLVM value \p Src to LLVM type \p // Create cast instructions for converting LLVM value \p Src to LLVM type \p

View File

@ -1230,7 +1230,7 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad,
// Create the final SVIs and replace all uses. // Create the final SVIs and replace all uses.
int i = 0; int i = 0;
for (auto &VI : InterleavedLoad) { for (auto &VI : InterleavedLoad) {
SmallVector<uint32_t, 4> Mask; SmallVector<int, 4> Mask;
for (unsigned j = 0; j < ElementsPerSVI; j++) for (unsigned j = 0; j < ElementsPerSVI; j++)
Mask.push_back(i + j * Factor); Mask.push_back(i + j * Factor);

View File

@ -242,8 +242,8 @@ bool X86PartialReduction::tryMAddReplacement(Value *Op, BinaryOperator *Add) {
// Extract even elements and odd elements and add them together. This will // Extract even elements and odd elements and add them together. This will
// be pattern matched by SelectionDAG to pmaddwd. This instruction will be // be pattern matched by SelectionDAG to pmaddwd. This instruction will be
// half the original width. // half the original width.
SmallVector<uint32_t, 16> EvenMask(NumElts / 2); SmallVector<int, 16> EvenMask(NumElts / 2);
SmallVector<uint32_t, 16> OddMask(NumElts / 2); SmallVector<int, 16> OddMask(NumElts / 2);
for (int i = 0, e = NumElts / 2; i != e; ++i) { for (int i = 0, e = NumElts / 2; i != e; ++i) {
EvenMask[i] = i * 2; EvenMask[i] = i * 2;
OddMask[i] = i * 2 + 1; OddMask[i] = i * 2 + 1;
@ -253,7 +253,7 @@ bool X86PartialReduction::tryMAddReplacement(Value *Op, BinaryOperator *Add) {
Value *MAdd = Builder.CreateAdd(EvenElts, OddElts); Value *MAdd = Builder.CreateAdd(EvenElts, OddElts);
// Concatenate zeroes to extend back to the original type. // Concatenate zeroes to extend back to the original type.
SmallVector<uint32_t, 32> ConcatMask(NumElts); SmallVector<int, 32> ConcatMask(NumElts);
std::iota(ConcatMask.begin(), ConcatMask.end(), 0); std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
Value *Zero = Constant::getNullValue(MAdd->getType()); Value *Zero = Constant::getNullValue(MAdd->getType());
Value *Concat = Builder.CreateShuffleVector(MAdd, Zero, ConcatMask); Value *Concat = Builder.CreateShuffleVector(MAdd, Zero, ConcatMask);
@ -339,7 +339,7 @@ bool X86PartialReduction::trySADReplacement(Value *Op, BinaryOperator *Add) {
if (NumElts < 16) { if (NumElts < 16) {
// Pad input with zeroes. // Pad input with zeroes.
SmallVector<uint32_t, 32> ConcatMask(16); SmallVector<int, 32> ConcatMask(16);
for (unsigned i = 0; i != NumElts; ++i) for (unsigned i = 0; i != NumElts; ++i)
ConcatMask[i] = i; ConcatMask[i] = i;
for (unsigned i = NumElts; i != 16; ++i) for (unsigned i = NumElts; i != 16; ++i)
@ -360,7 +360,7 @@ bool X86PartialReduction::trySADReplacement(Value *Op, BinaryOperator *Add) {
// First collect the pieces we need. // First collect the pieces we need.
SmallVector<Value *, 4> Ops(NumSplits); SmallVector<Value *, 4> Ops(NumSplits);
for (unsigned i = 0; i != NumSplits; ++i) { for (unsigned i = 0; i != NumSplits; ++i) {
SmallVector<uint32_t, 64> ExtractMask(IntrinsicNumElts); SmallVector<int, 64> ExtractMask(IntrinsicNumElts);
std::iota(ExtractMask.begin(), ExtractMask.end(), i * IntrinsicNumElts); std::iota(ExtractMask.begin(), ExtractMask.end(), i * IntrinsicNumElts);
Value *ExtractOp0 = Builder.CreateShuffleVector(Op0, Op0, ExtractMask); Value *ExtractOp0 = Builder.CreateShuffleVector(Op0, Op0, ExtractMask);
Value *ExtractOp1 = Builder.CreateShuffleVector(Op1, Op0, ExtractMask); Value *ExtractOp1 = Builder.CreateShuffleVector(Op1, Op0, ExtractMask);
@ -373,7 +373,7 @@ bool X86PartialReduction::trySADReplacement(Value *Op, BinaryOperator *Add) {
for (unsigned s = Stages; s > 0; --s) { for (unsigned s = Stages; s > 0; --s) {
unsigned NumConcatElts = Ops[0]->getType()->getVectorNumElements() * 2; unsigned NumConcatElts = Ops[0]->getType()->getVectorNumElements() * 2;
for (unsigned i = 0; i != 1U << (s - 1); ++i) { for (unsigned i = 0; i != 1U << (s - 1); ++i) {
SmallVector<uint32_t, 64> ConcatMask(NumConcatElts); SmallVector<int, 64> ConcatMask(NumConcatElts);
std::iota(ConcatMask.begin(), ConcatMask.end(), 0); std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
Ops[i] = Builder.CreateShuffleVector(Ops[i*2], Ops[i*2+1], ConcatMask); Ops[i] = Builder.CreateShuffleVector(Ops[i*2], Ops[i*2+1], ConcatMask);
} }
@ -386,7 +386,7 @@ bool X86PartialReduction::trySADReplacement(Value *Op, BinaryOperator *Add) {
// Extract down to 2 elements. // Extract down to 2 elements.
Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ArrayRef<int>{0, 1}); Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ArrayRef<int>{0, 1});
} else if (NumElts >= 8) { } else if (NumElts >= 8) {
SmallVector<uint32_t, 32> ConcatMask(NumElts); SmallVector<int, 32> ConcatMask(NumElts);
unsigned SubElts = Ops[0]->getType()->getVectorNumElements(); unsigned SubElts = Ops[0]->getType()->getVectorNumElements();
for (unsigned i = 0; i != SubElts; ++i) for (unsigned i = 0; i != SubElts; ++i)
ConcatMask[i] = i; ConcatMask[i] = i;

View File

@ -416,7 +416,7 @@ static Value *simplifyX86immShift(const IntrinsicInst &II,
Amt, DemandedUpper, II.getModule()->getDataLayout()); Amt, DemandedUpper, II.getModule()->getDataLayout());
if (KnownLowerBits.getMaxValue().ult(BitWidth) && if (KnownLowerBits.getMaxValue().ult(BitWidth) &&
(DemandedUpper.isNullValue() || KnownUpperBits.isZero())) { (DemandedUpper.isNullValue() || KnownUpperBits.isZero())) {
SmallVector<uint32_t, 16> ZeroSplat(VWidth, 0); SmallVector<int, 16> ZeroSplat(VWidth, 0);
Amt = Builder.CreateShuffleVector(Amt, Amt, ZeroSplat); Amt = Builder.CreateShuffleVector(Amt, Amt, ZeroSplat);
return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt) return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt)
: Builder.CreateLShr(Vec, Amt)) : Builder.CreateLShr(Vec, Amt))
@ -663,7 +663,7 @@ static Value *simplifyX86pack(IntrinsicInst &II,
Arg1 = Builder.CreateSelect(Builder.CreateICmpSGT(Arg1, MaxC), MaxC, Arg1); Arg1 = Builder.CreateSelect(Builder.CreateICmpSGT(Arg1, MaxC), MaxC, Arg1);
// Shuffle clamped args together at the lane level. // Shuffle clamped args together at the lane level.
SmallVector<unsigned, 32> PackMask; SmallVector<int, 32> PackMask;
for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
for (unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt) for (unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)
PackMask.push_back(Elt + (Lane * NumSrcEltsPerLane)); PackMask.push_back(Elt + (Lane * NumSrcEltsPerLane));
@ -760,7 +760,7 @@ static Value *simplifyX86insertps(const IntrinsicInst &II,
return ZeroVector; return ZeroVector;
// Initialize by passing all of the first source bits through. // Initialize by passing all of the first source bits through.
uint32_t ShuffleMask[4] = { 0, 1, 2, 3 }; int ShuffleMask[4] = {0, 1, 2, 3};
// We may replace the second operand with the zero vector. // We may replace the second operand with the zero vector.
Value *V1 = II.getArgOperand(1); Value *V1 = II.getArgOperand(1);

View File

@ -1158,7 +1158,7 @@ Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
DemandedElts.countTrailingZeros()); DemandedElts.countTrailingZeros());
} }
SmallVector<uint32_t, 8> EltMask; SmallVector<int, 8> EltMask;
unsigned NewLoadIdx = 0; unsigned NewLoadIdx = 0;
for (unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) { for (unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) {
if (!!DemandedElts[OrigLoadIdx]) if (!!DemandedElts[OrigLoadIdx])

View File

@ -2099,12 +2099,11 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
if (!BegIsAligned) { if (!BegIsAligned) {
// Shuffle the input so [0,NumElements) contains the output, and // Shuffle the input so [0,NumElements) contains the output, and
// [NumElems,SrcNumElems) is undef. // [NumElems,SrcNumElems) is undef.
SmallVector<Constant *, 16> ShuffleMask(SrcNumElems, SmallVector<int, 16> ShuffleMask(SrcNumElems, -1);
UndefValue::get(Int32Ty));
for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I) for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
ShuffleMask[I] = ConstantInt::get(Int32Ty, Idx); ShuffleMask[I] = Idx;
V = Builder.CreateShuffleVector(V, UndefValue::get(V->getType()), V = Builder.CreateShuffleVector(V, UndefValue::get(V->getType()),
ConstantVector::get(ShuffleMask), ShuffleMask,
SVI.getName() + ".extract"); SVI.getName() + ".extract");
BegIdx = 0; BegIdx = 0;
} }

View File

@ -916,21 +916,19 @@ public:
// If Col is 7 long and I is 2 and BlockNumElts is 2 the mask is: 0, 1, 7, // If Col is 7 long and I is 2 and BlockNumElts is 2 the mask is: 0, 1, 7,
// 8, 4, 5, 6 // 8, 4, 5, 6
SmallVector<Constant *, 16> Mask; SmallVector<int, 16> Mask;
unsigned i; unsigned i;
for (i = 0; i < I; i++) for (i = 0; i < I; i++)
Mask.push_back(Builder.getInt32(i)); Mask.push_back(i);
unsigned VecNumElts = cast<VectorType>(Col->getType())->getNumElements(); unsigned VecNumElts = cast<VectorType>(Col->getType())->getNumElements();
for (; i < I + BlockNumElts; i++) for (; i < I + BlockNumElts; i++)
Mask.push_back(Builder.getInt32(i - I + VecNumElts)); Mask.push_back(i - I + VecNumElts);
for (; i < VecNumElts; i++) for (; i < VecNumElts; i++)
Mask.push_back(Builder.getInt32(i)); Mask.push_back(i);
Value *MaskVal = ConstantVector::get(Mask); return Builder.CreateShuffleVector(Col, Block, Mask);
return Builder.CreateShuffleVector(Col, Block, MaskVal);
} }
Value *createMulAdd(Value *Sum, Value *A, Value *B, bool UseFPOp, Value *createMulAdd(Value *Sum, Value *A, Value *B, bool UseFPOp,

View File

@ -2207,12 +2207,12 @@ static Value *extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex,
return V; return V;
} }
SmallVector<Constant *, 8> Mask; SmallVector<int, 8> Mask;
Mask.reserve(NumElements); Mask.reserve(NumElements);
for (unsigned i = BeginIndex; i != EndIndex; ++i) for (unsigned i = BeginIndex; i != EndIndex; ++i)
Mask.push_back(IRB.getInt32(i)); Mask.push_back(i);
V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()), V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()), Mask,
ConstantVector::get(Mask), Name + ".extract"); Name + ".extract");
LLVM_DEBUG(dbgs() << " shuffle: " << *V << "\n"); LLVM_DEBUG(dbgs() << " shuffle: " << *V << "\n");
return V; return V;
} }

View File

@ -917,19 +917,17 @@ llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op,
assert(isPowerOf2_32(VF) && assert(isPowerOf2_32(VF) &&
"Reduction emission only supported for pow2 vectors!"); "Reduction emission only supported for pow2 vectors!");
Value *TmpVec = Src; Value *TmpVec = Src;
SmallVector<Constant *, 32> ShuffleMask(VF, nullptr); SmallVector<int, 32> ShuffleMask(VF);
for (unsigned i = VF; i != 1; i >>= 1) { for (unsigned i = VF; i != 1; i >>= 1) {
// Move the upper half of the vector to the lower half. // Move the upper half of the vector to the lower half.
for (unsigned j = 0; j != i / 2; ++j) for (unsigned j = 0; j != i / 2; ++j)
ShuffleMask[j] = Builder.getInt32(i / 2 + j); ShuffleMask[j] = i / 2 + j;
// Fill the rest of the mask with undef. // Fill the rest of the mask with undef.
std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), -1);
UndefValue::get(Builder.getInt32Ty()));
Value *Shuf = Builder.CreateShuffleVector( Value *Shuf = Builder.CreateShuffleVector(
TmpVec, UndefValue::get(TmpVec->getType()), TmpVec, UndefValue::get(TmpVec->getType()), ShuffleMask, "rdx.shuf");
ConstantVector::get(ShuffleMask), "rdx.shuf");
if (Op != Instruction::ICmp && Op != Instruction::FCmp) { if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
// The builder propagates its fast-math-flags setting. // The builder propagates its fast-math-flags setting.

View File

@ -2128,13 +2128,12 @@ void InnerLoopVectorizer::packScalarIntoVectorValue(
Value *InnerLoopVectorizer::reverseVector(Value *Vec) { Value *InnerLoopVectorizer::reverseVector(Value *Vec) {
assert(Vec->getType()->isVectorTy() && "Invalid type"); assert(Vec->getType()->isVectorTy() && "Invalid type");
SmallVector<Constant *, 8> ShuffleMask; SmallVector<int, 8> ShuffleMask;
for (unsigned i = 0; i < VF; ++i) for (unsigned i = 0; i < VF; ++i)
ShuffleMask.push_back(Builder.getInt32(VF - i - 1)); ShuffleMask.push_back(VF - i - 1);
return Builder.CreateShuffleVector(Vec, UndefValue::get(Vec->getType()), return Builder.CreateShuffleVector(Vec, UndefValue::get(Vec->getType()),
ConstantVector::get(ShuffleMask), ShuffleMask, "reverse");
"reverse");
} }
// Return whether we allow using masked interleave-groups (for dealing with // Return whether we allow using masked interleave-groups (for dealing with
@ -3628,10 +3627,10 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
// We will construct a vector for the recurrence by combining the values for // We will construct a vector for the recurrence by combining the values for
// the current and previous iterations. This is the required shuffle mask. // the current and previous iterations. This is the required shuffle mask.
SmallVector<Constant *, 8> ShuffleMask(VF); SmallVector<int, 8> ShuffleMask(VF);
ShuffleMask[0] = Builder.getInt32(VF - 1); ShuffleMask[0] = VF - 1;
for (unsigned I = 1; I < VF; ++I) for (unsigned I = 1; I < VF; ++I)
ShuffleMask[I] = Builder.getInt32(I + VF - 1); ShuffleMask[I] = I + VF - 1;
// The vector from which to take the initial value for the current iteration // The vector from which to take the initial value for the current iteration
// (actual or unrolled). Initially, this is the vector phi node. // (actual or unrolled). Initially, this is the vector phi node.
@ -3641,10 +3640,9 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
for (unsigned Part = 0; Part < UF; ++Part) { for (unsigned Part = 0; Part < UF; ++Part) {
Value *PreviousPart = getOrCreateVectorValue(Previous, Part); Value *PreviousPart = getOrCreateVectorValue(Previous, Part);
Value *PhiPart = VectorLoopValueMap.getVectorValue(Phi, Part); Value *PhiPart = VectorLoopValueMap.getVectorValue(Phi, Part);
auto *Shuffle = auto *Shuffle = VF > 1 ? Builder.CreateShuffleVector(Incoming, PreviousPart,
VF > 1 ? Builder.CreateShuffleVector(Incoming, PreviousPart, ShuffleMask)
ConstantVector::get(ShuffleMask)) : Incoming;
: Incoming;
PhiPart->replaceAllUsesWith(Shuffle); PhiPart->replaceAllUsesWith(Shuffle);
cast<Instruction>(PhiPart)->eraseFromParent(); cast<Instruction>(PhiPart)->eraseFromParent();
VectorLoopValueMap.resetVectorValue(Phi, Part, Shuffle); VectorLoopValueMap.resetVectorValue(Phi, Part, Shuffle);

View File

@ -1437,7 +1437,7 @@ private:
return VL.size() == ReuseShuffleIndices.size() && return VL.size() == ReuseShuffleIndices.size() &&
std::equal( std::equal(
VL.begin(), VL.end(), ReuseShuffleIndices.begin(), VL.begin(), VL.end(), ReuseShuffleIndices.begin(),
[this](Value *V, unsigned Idx) { return V == Scalars[Idx]; }); [this](Value *V, int Idx) { return V == Scalars[Idx]; });
} }
/// A vector of scalars. /// A vector of scalars.
@ -1451,7 +1451,7 @@ private:
EntryState State; EntryState State;
/// Does this sequence require some shuffling? /// Does this sequence require some shuffling?
SmallVector<unsigned, 4> ReuseShuffleIndices; SmallVector<int, 4> ReuseShuffleIndices;
/// Does this entry require reordering? /// Does this entry require reordering?
ArrayRef<unsigned> ReorderIndices; ArrayRef<unsigned> ReorderIndices;
@ -4027,9 +4027,9 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
V = SV->getOperand(0); V = SV->getOperand(0);
} else { } else {
// Reshuffle to get only unique values. // Reshuffle to get only unique values.
SmallVector<unsigned, 4> UniqueIdxs; SmallVector<int, 4> UniqueIdxs;
SmallSet<unsigned, 4> UsedIdxs; SmallSet<int, 4> UsedIdxs;
for(unsigned Idx : E->ReuseShuffleIndices) for (int Idx : E->ReuseShuffleIndices)
if (UsedIdxs.insert(Idx).second) if (UsedIdxs.insert(Idx).second)
UniqueIdxs.emplace_back(Idx); UniqueIdxs.emplace_back(Idx);
V = Builder.CreateShuffleVector(V, UndefValue::get(V->getType()), V = Builder.CreateShuffleVector(V, UndefValue::get(V->getType()),
@ -4046,7 +4046,7 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
ScalarTy = SI->getValueOperand()->getType(); ScalarTy = SI->getValueOperand()->getType();
// Check that every instruction appears once in this bundle. // Check that every instruction appears once in this bundle.
SmallVector<unsigned, 4> ReuseShuffleIndicies; SmallVector<int, 4> ReuseShuffleIndicies;
SmallVector<Value *, 4> UniqueValues; SmallVector<Value *, 4> UniqueValues;
if (VL.size() > 2) { if (VL.size() > 2) {
DenseMap<Value *, unsigned> UniquePositions; DenseMap<Value *, unsigned> UniquePositions;

View File

@ -237,16 +237,13 @@ static bool foldExtractExtract(Instruction &I, const TargetTransformInfo &TTI) {
uint64_t SplatIndex = ConvertToShuffle == Ext0 ? C0 : C1; uint64_t SplatIndex = ConvertToShuffle == Ext0 ? C0 : C1;
uint64_t CheapExtIndex = ConvertToShuffle == Ext0 ? C1 : C0; uint64_t CheapExtIndex = ConvertToShuffle == Ext0 ? C1 : C0;
auto *VecTy = cast<VectorType>(V0->getType()); auto *VecTy = cast<VectorType>(V0->getType());
Type *I32Ty = IntegerType::getInt32Ty(I.getContext()); SmallVector<int, 32> ShufMask(VecTy->getNumElements(), -1);
UndefValue *Undef = UndefValue::get(I32Ty); ShufMask[CheapExtIndex] = SplatIndex;
SmallVector<Constant *, 32> ShufMask(VecTy->getNumElements(), Undef);
ShufMask[CheapExtIndex] = ConstantInt::get(I32Ty, SplatIndex);
IRBuilder<> Builder(ConvertToShuffle); IRBuilder<> Builder(ConvertToShuffle);
// extelt X, C --> extelt (splat X), C' // extelt X, C --> extelt (splat X), C'
Value *Shuf = Builder.CreateShuffleVector(ConvertToShuffle->getOperand(0), Value *Shuf = Builder.CreateShuffleVector(ConvertToShuffle->getOperand(0),
UndefValue::get(VecTy), UndefValue::get(VecTy), ShufMask);
ConstantVector::get(ShufMask));
Value *NewExt = Builder.CreateExtractElement(Shuf, CheapExtIndex); Value *NewExt = Builder.CreateExtractElement(Shuf, CheapExtIndex);
if (ConvertToShuffle == Ext0) if (ConvertToShuffle == Ext0)
Ext0 = cast<Instruction>(NewExt); Ext0 = cast<Instruction>(NewExt);