forked from OSchip/llvm-project
Also handle ConstantAggregateZero when optimizing vpermilvar*.
llvm-svn: 207582
This commit is contained in:
parent
35907d8e23
commit
85f3610222
|
@ -724,11 +724,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
|||
case Intrinsic::x86_avx_vpermilvar_pd_256: {
|
||||
// Convert vpermil* to shufflevector if the mask is constant.
|
||||
Value *V = II->getArgOperand(1);
|
||||
unsigned Size = cast<VectorType>(V->getType())->getNumElements();
|
||||
assert(Size == 8 || Size == 4 || Size == 2);
|
||||
uint32_t Indexes[8];
|
||||
if (auto C = dyn_cast<ConstantDataVector>(V)) {
|
||||
unsigned Size = C->getNumElements();
|
||||
assert(Size == 8 || Size == 4 || Size == 2);
|
||||
uint32_t Indexes[8];
|
||||
|
||||
// The intrinsics only read one or two bits, clear the rest.
|
||||
for (unsigned I = 0; I < Size; ++I) {
|
||||
uint32_t Index = C->getElementAsInteger(I) & 0x3;
|
||||
|
@ -737,23 +736,26 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
|||
Index >>= 1;
|
||||
Indexes[I] = Index;
|
||||
}
|
||||
|
||||
// The _256 variants are a bit trickier since the mask bits always index
|
||||
// into the corresponding 128 half. In order to convert to a generic
|
||||
// shuffle, we have to make that explicit.
|
||||
if (II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_ps_256 ||
|
||||
II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd_256) {
|
||||
for (unsigned I = Size / 2; I < Size; ++I)
|
||||
Indexes[I] += Size / 2;
|
||||
}
|
||||
auto NewC =
|
||||
ConstantDataVector::get(C->getContext(), makeArrayRef(Indexes, Size));
|
||||
auto V1 = II->getArgOperand(0);
|
||||
auto V2 = UndefValue::get(V1->getType());
|
||||
auto Shuffle = Builder->CreateShuffleVector(V1, V2, NewC);
|
||||
return ReplaceInstUsesWith(CI, Shuffle);
|
||||
} else if (isa<ConstantAggregateZero>(V)) {
|
||||
for (unsigned I = 0; I < Size; ++I)
|
||||
Indexes[I] = 0;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
break;
|
||||
// The _256 variants are a bit trickier since the mask bits always index
|
||||
// into the corresponding 128 half. In order to convert to a generic
|
||||
// shuffle, we have to make that explicit.
|
||||
if (II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_ps_256 ||
|
||||
II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd_256) {
|
||||
for (unsigned I = Size / 2; I < Size; ++I)
|
||||
Indexes[I] += Size / 2;
|
||||
}
|
||||
auto NewC =
|
||||
ConstantDataVector::get(V->getContext(), makeArrayRef(Indexes, Size));
|
||||
auto V1 = II->getArgOperand(0);
|
||||
auto V2 = UndefValue::get(V1->getType());
|
||||
auto Shuffle = Builder->CreateShuffleVector(V1, V2, NewC);
|
||||
return ReplaceInstUsesWith(CI, Shuffle);
|
||||
}
|
||||
|
||||
case Intrinsic::ppc_altivec_vperm:
|
||||
|
|
|
@ -339,6 +339,34 @@ define <4 x double> @test_vpermilvar_pd_256(<4 x double> %v) {
|
|||
ret <4 x double> %a
|
||||
}
|
||||
|
||||
define <4 x float> @test_vpermilvar_ps_zero(<4 x float> %v) {
|
||||
; CHECK-LABEL: @test_vpermilvar_ps_zero(
|
||||
; CHECK: shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
|
||||
%a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> zeroinitializer)
|
||||
ret <4 x float> %a
|
||||
}
|
||||
|
||||
define <8 x float> @test_vpermilvar_ps_256_zero(<8 x float> %v) {
|
||||
; CHECK-LABEL: @test_vpermilvar_ps_256_zero(
|
||||
; CHECK: shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
|
||||
%a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> zeroinitializer)
|
||||
ret <8 x float> %a
|
||||
}
|
||||
|
||||
define <2 x double> @test_vpermilvar_pd_zero(<2 x double> %v) {
|
||||
; CHECK-LABEL: @test_vpermilvar_pd_zero(
|
||||
; CHECK: shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
|
||||
%a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i32> zeroinitializer)
|
||||
ret <2 x double> %a
|
||||
}
|
||||
|
||||
define <4 x double> @test_vpermilvar_pd_256_zero(<4 x double> %v) {
|
||||
; CHECK-LABEL: @test_vpermilvar_pd_256_zero(
|
||||
; CHECK: shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
|
||||
%a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i32> zeroinitializer)
|
||||
ret <4 x double> %a
|
||||
}
|
||||
|
||||
define <2 x i64> @test_sse2_1() nounwind readnone uwtable {
|
||||
%S = bitcast i32 1 to i32
|
||||
%1 = zext i32 %S to i64
|
||||
|
|
Loading…
Reference in New Issue