forked from OSchip/llvm-project
In the common case where we are shuffling a vector, emit an
llvm vector shuffle instead of a bunch of insert/extract operations. For: vec4 = vec4.yyyy; // splat Emit: %tmp1 = shufflevector <4 x float> %tmp, <4 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 > instead of: %tmp1 = extractelement <4 x float> %tmp, i32 1 %tmp2 = insertelement <4 x float> undef, float %tmp1, i32 0 %tmp3 = extractelement <4 x float> %tmp, i32 1 %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1 %tmp5 = extractelement <4 x float> %tmp, i32 1 %tmp6 = insertelement <4 x float> %tmp4, float %tmp5, i32 2 %tmp7 = extractelement <4 x float> %tmp, i32 1 %tmp8 = insertelement <4 x float> %tmp6, float %tmp7, i32 3 llvm-svn: 40779
This commit is contained in:
parent
177bd450e0
commit
fb837dccac
|
@ -298,14 +298,31 @@ RValue CodeGenFunction::EmitLoadOfLValue(LValue LV, QualType ExprType) {
|
||||||
return RValue::get(Builder.CreateExtractElement(Vec, Elt, "tmp"));
|
return RValue::get(Builder.CreateExtractElement(Vec, Elt, "tmp"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If the source and destination have the same number of elements, use a
|
||||||
|
// vector shuffle instead of insert/extracts.
|
||||||
|
unsigned NumResultElts = cast<VectorType>(ExprType)->getNumElements();
|
||||||
|
unsigned NumSourceElts =
|
||||||
|
cast<llvm::VectorType>(Vec->getType())->getNumElements();
|
||||||
|
|
||||||
unsigned NumElts = cast<VectorType>(ExprType)->getNumElements();
|
if (NumResultElts == NumSourceElts) {
|
||||||
|
llvm::SmallVector<llvm::Constant*, 4> Mask;
|
||||||
|
for (unsigned i = 0; i != NumResultElts; ++i) {
|
||||||
|
unsigned InIdx = OCUVectorComponent::getAccessedFieldNo(i, EncFields);
|
||||||
|
Mask.push_back(llvm::ConstantInt::get(llvm::Type::Int32Ty, InIdx));
|
||||||
|
}
|
||||||
|
|
||||||
|
llvm::Value *MaskV = llvm::ConstantVector::get(&Mask[0], Mask.size());
|
||||||
|
Vec = Builder.CreateShuffleVector(Vec,
|
||||||
|
llvm::UndefValue::get(Vec->getType()),
|
||||||
|
MaskV, "tmp");
|
||||||
|
return RValue::get(Vec);
|
||||||
|
}
|
||||||
|
|
||||||
// Start out with an undef of the result type.
|
// Start out with an undef of the result type.
|
||||||
llvm::Value *Result = llvm::UndefValue::get(ConvertType(ExprType));
|
llvm::Value *Result = llvm::UndefValue::get(ConvertType(ExprType));
|
||||||
|
|
||||||
// Extract/Insert each element of the result.
|
// Extract/Insert each element of the result.
|
||||||
for (unsigned i = 0; i != NumElts; ++i) {
|
for (unsigned i = 0; i != NumResultElts; ++i) {
|
||||||
unsigned InIdx = OCUVectorComponent::getAccessedFieldNo(i, EncFields);
|
unsigned InIdx = OCUVectorComponent::getAccessedFieldNo(i, EncFields);
|
||||||
llvm::Value *Elt = llvm::ConstantInt::get(llvm::Type::Int32Ty, InIdx);
|
llvm::Value *Elt = llvm::ConstantInt::get(llvm::Type::Int32Ty, InIdx);
|
||||||
Elt = Builder.CreateExtractElement(Vec, Elt, "tmp");
|
Elt = Builder.CreateExtractElement(Vec, Elt, "tmp");
|
||||||
|
|
Loading…
Reference in New Issue