forked from OSchip/llvm-project
[X86][AutoUpgrade] Make some tweaks to reduce the number of nested if/else in the intrinsic upgrade code to avoid an MSVC compiler limit.
MSVC has a nesting limit of around 110-130. An if/else if/else if counts against this next level. The autoupgrade code consists a long chain of these checking matches against strings. This commit moves some code to a helper function to move out a large if/else chain that was inside of one of the blocks into a separate function. There are more of these we could move or we could change some to lookup tables. I've also merged together a few similar blocks in the outer chain. This should buy us some margin for a little bit. llvm-svn: 350564
This commit is contained in:
parent
33c9088783
commit
81fe1fbf4a
|
@ -933,6 +933,67 @@ static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
|
|||
return EmitX86Select(Builder, Mask, Align, Passthru);
|
||||
}
|
||||
|
||||
static Value *UpgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallInst &CI,
|
||||
bool ZeroMask, bool IndexForm) {
|
||||
Type *Ty = CI.getType();
|
||||
unsigned VecWidth = Ty->getPrimitiveSizeInBits();
|
||||
unsigned EltWidth = Ty->getScalarSizeInBits();
|
||||
bool IsFloat = Ty->isFPOrFPVectorTy();
|
||||
Intrinsic::ID IID;
|
||||
if (VecWidth == 128 && EltWidth == 32 && IsFloat)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
|
||||
else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_d_128;
|
||||
else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
|
||||
else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_q_128;
|
||||
else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
|
||||
else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_d_256;
|
||||
else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
|
||||
else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_q_256;
|
||||
else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
|
||||
else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_d_512;
|
||||
else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
|
||||
else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_q_512;
|
||||
else if (VecWidth == 128 && EltWidth == 16)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
|
||||
else if (VecWidth == 256 && EltWidth == 16)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
|
||||
else if (VecWidth == 512 && EltWidth == 16)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
|
||||
else if (VecWidth == 128 && EltWidth == 8)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
|
||||
else if (VecWidth == 256 && EltWidth == 8)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
|
||||
else if (VecWidth == 512 && EltWidth == 8)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
|
||||
else
|
||||
llvm_unreachable("Unexpected intrinsic");
|
||||
|
||||
Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
|
||||
CI.getArgOperand(2) };
|
||||
|
||||
// If this isn't index form we need to swap operand 0 and 1.
|
||||
if (!IndexForm)
|
||||
std::swap(Args[0], Args[1]);
|
||||
|
||||
Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
|
||||
Args);
|
||||
Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
|
||||
: Builder.CreateBitCast(CI.getArgOperand(1),
|
||||
Ty);
|
||||
return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
|
||||
}
|
||||
|
||||
static Value *UpgradeX86AddSubSatIntrinsics(IRBuilder<> &Builder, CallInst &CI,
|
||||
bool IsSigned, bool IsAddition) {
|
||||
Type *Ty = CI.getType();
|
||||
|
@ -2406,24 +2467,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
|||
|
||||
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
|
||||
CI->getArgOperand(2));
|
||||
} else if (IsX86 && Name.startswith("avx512.mask.pand.")) {
|
||||
Rep = Builder.CreateAnd(CI->getArgOperand(0), CI->getArgOperand(1));
|
||||
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
|
||||
CI->getArgOperand(2));
|
||||
} else if (IsX86 && Name.startswith("avx512.mask.pandn.")) {
|
||||
Rep = Builder.CreateAnd(Builder.CreateNot(CI->getArgOperand(0)),
|
||||
CI->getArgOperand(1));
|
||||
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
|
||||
CI->getArgOperand(2));
|
||||
} else if (IsX86 && Name.startswith("avx512.mask.por.")) {
|
||||
Rep = Builder.CreateOr(CI->getArgOperand(0), CI->getArgOperand(1));
|
||||
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
|
||||
CI->getArgOperand(2));
|
||||
} else if (IsX86 && Name.startswith("avx512.mask.pxor.")) {
|
||||
Rep = Builder.CreateXor(CI->getArgOperand(0), CI->getArgOperand(1));
|
||||
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
|
||||
CI->getArgOperand(2));
|
||||
} else if (IsX86 && Name.startswith("avx512.mask.and.")) {
|
||||
} else if (IsX86 && (Name.startswith("avx512.mask.and.") ||
|
||||
Name.startswith("avx512.mask.pand."))) {
|
||||
VectorType *FTy = cast<VectorType>(CI->getType());
|
||||
VectorType *ITy = VectorType::getInteger(FTy);
|
||||
Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
|
||||
|
@ -2431,7 +2476,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
|||
Rep = Builder.CreateBitCast(Rep, FTy);
|
||||
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
|
||||
CI->getArgOperand(2));
|
||||
} else if (IsX86 && Name.startswith("avx512.mask.andn.")) {
|
||||
} else if (IsX86 && (Name.startswith("avx512.mask.andn.") ||
|
||||
Name.startswith("avx512.mask.pandn."))) {
|
||||
VectorType *FTy = cast<VectorType>(CI->getType());
|
||||
VectorType *ITy = VectorType::getInteger(FTy);
|
||||
Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
|
||||
|
@ -2440,7 +2486,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
|||
Rep = Builder.CreateBitCast(Rep, FTy);
|
||||
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
|
||||
CI->getArgOperand(2));
|
||||
} else if (IsX86 && Name.startswith("avx512.mask.or.")) {
|
||||
} else if (IsX86 && (Name.startswith("avx512.mask.or.") ||
|
||||
Name.startswith("avx512.mask.por."))) {
|
||||
VectorType *FTy = cast<VectorType>(CI->getType());
|
||||
VectorType *ITy = VectorType::getInteger(FTy);
|
||||
Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
|
||||
|
@ -2448,7 +2495,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
|||
Rep = Builder.CreateBitCast(Rep, FTy);
|
||||
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
|
||||
CI->getArgOperand(2));
|
||||
} else if (IsX86 && Name.startswith("avx512.mask.xor.")) {
|
||||
} else if (IsX86 && (Name.startswith("avx512.mask.xor.") ||
|
||||
Name.startswith("avx512.mask.pxor."))) {
|
||||
VectorType *FTy = cast<VectorType>(CI->getType());
|
||||
VectorType *ITy = VectorType::getInteger(FTy);
|
||||
Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
|
||||
|
@ -2532,26 +2580,16 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
|||
}
|
||||
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
|
||||
CI->getArgOperand(2));
|
||||
} else if (IsX86 && Name.startswith("avx512.mask.max.p") &&
|
||||
} else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
|
||||
Name.startswith("avx512.mask.min.p")) &&
|
||||
Name.drop_front(18) == ".512") {
|
||||
Intrinsic::ID IID;
|
||||
if (Name[17] == 's')
|
||||
IID = Intrinsic::x86_avx512_max_ps_512;
|
||||
else
|
||||
IID = Intrinsic::x86_avx512_max_pd_512;
|
||||
|
||||
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
|
||||
{ CI->getArgOperand(0), CI->getArgOperand(1),
|
||||
CI->getArgOperand(4) });
|
||||
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
|
||||
CI->getArgOperand(2));
|
||||
} else if (IsX86 && Name.startswith("avx512.mask.min.p") &&
|
||||
Name.drop_front(18) == ".512") {
|
||||
Intrinsic::ID IID;
|
||||
if (Name[17] == 's')
|
||||
IID = Intrinsic::x86_avx512_min_ps_512;
|
||||
else
|
||||
IID = Intrinsic::x86_avx512_min_pd_512;
|
||||
bool IsDouble = Name[17] == 'd';
|
||||
bool IsMin = Name[13] == 'i';
|
||||
static const Intrinsic::ID MinMaxTbl[2][2] = {
|
||||
{ Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 },
|
||||
{ Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 }
|
||||
};
|
||||
Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
|
||||
|
||||
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
|
||||
{ CI->getArgOperand(0), CI->getArgOperand(1),
|
||||
|
@ -3095,62 +3133,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
|||
Name.startswith("avx512.maskz.vpermt2var."))) {
|
||||
bool ZeroMask = Name[11] == 'z';
|
||||
bool IndexForm = Name[17] == 'i';
|
||||
unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
|
||||
unsigned EltWidth = CI->getType()->getScalarSizeInBits();
|
||||
bool IsFloat = CI->getType()->isFPOrFPVectorTy();
|
||||
Intrinsic::ID IID;
|
||||
if (VecWidth == 128 && EltWidth == 32 && IsFloat)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
|
||||
else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_d_128;
|
||||
else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
|
||||
else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_q_128;
|
||||
else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
|
||||
else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_d_256;
|
||||
else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
|
||||
else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_q_256;
|
||||
else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
|
||||
else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_d_512;
|
||||
else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
|
||||
else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_q_512;
|
||||
else if (VecWidth == 128 && EltWidth == 16)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
|
||||
else if (VecWidth == 256 && EltWidth == 16)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
|
||||
else if (VecWidth == 512 && EltWidth == 16)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
|
||||
else if (VecWidth == 128 && EltWidth == 8)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
|
||||
else if (VecWidth == 256 && EltWidth == 8)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
|
||||
else if (VecWidth == 512 && EltWidth == 8)
|
||||
IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
|
||||
else
|
||||
llvm_unreachable("Unexpected intrinsic");
|
||||
|
||||
Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
|
||||
CI->getArgOperand(2) };
|
||||
|
||||
// If this isn't index form we need to swap operand 0 and 1.
|
||||
if (!IndexForm)
|
||||
std::swap(Args[0], Args[1]);
|
||||
|
||||
Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
|
||||
Args);
|
||||
Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
|
||||
: Builder.CreateBitCast(CI->getArgOperand(1),
|
||||
CI->getType());
|
||||
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
|
||||
Rep = UpgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
|
||||
} else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") ||
|
||||
Name.startswith("avx512.maskz.vpdpbusd.") ||
|
||||
Name.startswith("avx512.mask.vpdpbusds.") ||
|
||||
|
|
Loading…
Reference in New Issue