[X86][AutoUpgrade] Make some tweaks to reduce the number of nested if/else in the intrinsic upgrade code to avoid an MSVC compiler limit.

MSVC has a nesting limit of around 110-130. An if/else if/else if counts against this next level. The autoupgrade code consists a long chain of these checking matches against strings.

This commit moves some code to a helper function to move out a large if/else chain that was inside of one of the blocks into a separate function. There are more of these we could move or we could change some to lookup tables.

I've also merged together a few similar blocks in the outer chain. This should buy us some margin for a little bit.

llvm-svn: 350564
This commit is contained in:
Craig Topper 2019-01-07 20:13:45 +00:00
parent 33c9088783
commit 81fe1fbf4a
1 changed files with 79 additions and 96 deletions

View File

@ -933,6 +933,67 @@ static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
return EmitX86Select(Builder, Mask, Align, Passthru);
}
static Value *UpgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallInst &CI,
bool ZeroMask, bool IndexForm) {
Type *Ty = CI.getType();
unsigned VecWidth = Ty->getPrimitiveSizeInBits();
unsigned EltWidth = Ty->getScalarSizeInBits();
bool IsFloat = Ty->isFPOrFPVectorTy();
Intrinsic::ID IID;
if (VecWidth == 128 && EltWidth == 32 && IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_d_128;
else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_q_128;
else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_d_256;
else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_q_256;
else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_d_512;
else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_q_512;
else if (VecWidth == 128 && EltWidth == 16)
IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
else if (VecWidth == 256 && EltWidth == 16)
IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
else if (VecWidth == 512 && EltWidth == 16)
IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
else if (VecWidth == 128 && EltWidth == 8)
IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
else if (VecWidth == 256 && EltWidth == 8)
IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
else if (VecWidth == 512 && EltWidth == 8)
IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
else
llvm_unreachable("Unexpected intrinsic");
Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
CI.getArgOperand(2) };
// If this isn't index form we need to swap operand 0 and 1.
if (!IndexForm)
std::swap(Args[0], Args[1]);
Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
Args);
Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
: Builder.CreateBitCast(CI.getArgOperand(1),
Ty);
return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
}
static Value *UpgradeX86AddSubSatIntrinsics(IRBuilder<> &Builder, CallInst &CI,
bool IsSigned, bool IsAddition) {
Type *Ty = CI.getType();
@ -2406,24 +2467,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.pand.")) {
Rep = Builder.CreateAnd(CI->getArgOperand(0), CI->getArgOperand(1));
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.pandn.")) {
Rep = Builder.CreateAnd(Builder.CreateNot(CI->getArgOperand(0)),
CI->getArgOperand(1));
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.por.")) {
Rep = Builder.CreateOr(CI->getArgOperand(0), CI->getArgOperand(1));
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.pxor.")) {
Rep = Builder.CreateXor(CI->getArgOperand(0), CI->getArgOperand(1));
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.and.")) {
} else if (IsX86 && (Name.startswith("avx512.mask.and.") ||
Name.startswith("avx512.mask.pand."))) {
VectorType *FTy = cast<VectorType>(CI->getType());
VectorType *ITy = VectorType::getInteger(FTy);
Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
@ -2431,7 +2476,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = Builder.CreateBitCast(Rep, FTy);
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.andn.")) {
} else if (IsX86 && (Name.startswith("avx512.mask.andn.") ||
Name.startswith("avx512.mask.pandn."))) {
VectorType *FTy = cast<VectorType>(CI->getType());
VectorType *ITy = VectorType::getInteger(FTy);
Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
@ -2440,7 +2486,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = Builder.CreateBitCast(Rep, FTy);
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.or.")) {
} else if (IsX86 && (Name.startswith("avx512.mask.or.") ||
Name.startswith("avx512.mask.por."))) {
VectorType *FTy = cast<VectorType>(CI->getType());
VectorType *ITy = VectorType::getInteger(FTy);
Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
@ -2448,7 +2495,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = Builder.CreateBitCast(Rep, FTy);
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.xor.")) {
} else if (IsX86 && (Name.startswith("avx512.mask.xor.") ||
Name.startswith("avx512.mask.pxor."))) {
VectorType *FTy = cast<VectorType>(CI->getType());
VectorType *ITy = VectorType::getInteger(FTy);
Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
@ -2532,26 +2580,16 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
}
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.max.p") &&
} else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
Name.startswith("avx512.mask.min.p")) &&
Name.drop_front(18) == ".512") {
Intrinsic::ID IID;
if (Name[17] == 's')
IID = Intrinsic::x86_avx512_max_ps_512;
else
IID = Intrinsic::x86_avx512_max_pd_512;
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
{ CI->getArgOperand(0), CI->getArgOperand(1),
CI->getArgOperand(4) });
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.min.p") &&
Name.drop_front(18) == ".512") {
Intrinsic::ID IID;
if (Name[17] == 's')
IID = Intrinsic::x86_avx512_min_ps_512;
else
IID = Intrinsic::x86_avx512_min_pd_512;
bool IsDouble = Name[17] == 'd';
bool IsMin = Name[13] == 'i';
static const Intrinsic::ID MinMaxTbl[2][2] = {
{ Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 },
{ Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 }
};
Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
{ CI->getArgOperand(0), CI->getArgOperand(1),
@ -3095,62 +3133,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Name.startswith("avx512.maskz.vpermt2var."))) {
bool ZeroMask = Name[11] == 'z';
bool IndexForm = Name[17] == 'i';
unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
unsigned EltWidth = CI->getType()->getScalarSizeInBits();
bool IsFloat = CI->getType()->isFPOrFPVectorTy();
Intrinsic::ID IID;
if (VecWidth == 128 && EltWidth == 32 && IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_d_128;
else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_q_128;
else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_d_256;
else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_q_256;
else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_d_512;
else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
IID = Intrinsic::x86_avx512_vpermi2var_q_512;
else if (VecWidth == 128 && EltWidth == 16)
IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
else if (VecWidth == 256 && EltWidth == 16)
IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
else if (VecWidth == 512 && EltWidth == 16)
IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
else if (VecWidth == 128 && EltWidth == 8)
IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
else if (VecWidth == 256 && EltWidth == 8)
IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
else if (VecWidth == 512 && EltWidth == 8)
IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
else
llvm_unreachable("Unexpected intrinsic");
Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
CI->getArgOperand(2) };
// If this isn't index form we need to swap operand 0 and 1.
if (!IndexForm)
std::swap(Args[0], Args[1]);
Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
Args);
Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
: Builder.CreateBitCast(CI->getArgOperand(1),
CI->getType());
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
Rep = UpgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
} else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") ||
Name.startswith("avx512.maskz.vpdpbusd.") ||
Name.startswith("avx512.mask.vpdpbusds.") ||