From 81fe1fbf4abd00b24e078a80cb15a2d2c1343f11 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 7 Jan 2019 20:13:45 +0000 Subject: [PATCH] [X86][AutoUpgrade] Make some tweaks to reduce the number of nested if/else in the intrinsic upgrade code to avoid an MSVC compiler limit. MSVC has a nesting limit of around 110-130. An if/else if/else if counts against this next level. The autoupgrade code consists a long chain of these checking matches against strings. This commit moves some code to a helper function to move out a large if/else chain that was inside of one of the blocks into a separate function. There are more of these we could move or we could change some to lookup tables. I've also merged together a few similar blocks in the outer chain. This should buy us some margin for a little bit. llvm-svn: 350564 --- llvm/lib/IR/AutoUpgrade.cpp | 175 ++++++++++++++++-------------------- 1 file changed, 79 insertions(+), 96 deletions(-) diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 51fef27999b4..be7cf858c86e 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -933,6 +933,67 @@ static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, return EmitX86Select(Builder, Mask, Align, Passthru); } +static Value *UpgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallInst &CI, + bool ZeroMask, bool IndexForm) { + Type *Ty = CI.getType(); + unsigned VecWidth = Ty->getPrimitiveSizeInBits(); + unsigned EltWidth = Ty->getScalarSizeInBits(); + bool IsFloat = Ty->isFPOrFPVectorTy(); + Intrinsic::ID IID; + if (VecWidth == 128 && EltWidth == 32 && IsFloat) + IID = Intrinsic::x86_avx512_vpermi2var_ps_128; + else if (VecWidth == 128 && EltWidth == 32 && !IsFloat) + IID = Intrinsic::x86_avx512_vpermi2var_d_128; + else if (VecWidth == 128 && EltWidth == 64 && IsFloat) + IID = Intrinsic::x86_avx512_vpermi2var_pd_128; + else if (VecWidth == 128 && EltWidth == 64 && !IsFloat) + IID = Intrinsic::x86_avx512_vpermi2var_q_128; + else if (VecWidth == 256 && EltWidth == 32 && IsFloat) + IID = Intrinsic::x86_avx512_vpermi2var_ps_256; + else if (VecWidth == 256 && EltWidth == 32 && !IsFloat) + IID = Intrinsic::x86_avx512_vpermi2var_d_256; + else if (VecWidth == 256 && EltWidth == 64 && IsFloat) + IID = Intrinsic::x86_avx512_vpermi2var_pd_256; + else if (VecWidth == 256 && EltWidth == 64 && !IsFloat) + IID = Intrinsic::x86_avx512_vpermi2var_q_256; + else if (VecWidth == 512 && EltWidth == 32 && IsFloat) + IID = Intrinsic::x86_avx512_vpermi2var_ps_512; + else if (VecWidth == 512 && EltWidth == 32 && !IsFloat) + IID = Intrinsic::x86_avx512_vpermi2var_d_512; + else if (VecWidth == 512 && EltWidth == 64 && IsFloat) + IID = Intrinsic::x86_avx512_vpermi2var_pd_512; + else if (VecWidth == 512 && EltWidth == 64 && !IsFloat) + IID = Intrinsic::x86_avx512_vpermi2var_q_512; + else if (VecWidth == 128 && EltWidth == 16) + IID = Intrinsic::x86_avx512_vpermi2var_hi_128; + else if (VecWidth == 256 && EltWidth == 16) + IID = Intrinsic::x86_avx512_vpermi2var_hi_256; + else if (VecWidth == 512 && EltWidth == 16) + IID = Intrinsic::x86_avx512_vpermi2var_hi_512; + else if (VecWidth == 128 && EltWidth == 8) + IID = Intrinsic::x86_avx512_vpermi2var_qi_128; + else if (VecWidth == 256 && EltWidth == 8) + IID = Intrinsic::x86_avx512_vpermi2var_qi_256; + else if (VecWidth == 512 && EltWidth == 8) + IID = Intrinsic::x86_avx512_vpermi2var_qi_512; + else + llvm_unreachable("Unexpected intrinsic"); + + Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1), + CI.getArgOperand(2) }; + + // If this isn't index form we need to swap operand 0 and 1. + if (!IndexForm) + std::swap(Args[0], Args[1]); + + Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID), + Args); + Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) + : Builder.CreateBitCast(CI.getArgOperand(1), + Ty); + return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru); +} + static Value *UpgradeX86AddSubSatIntrinsics(IRBuilder<> &Builder, CallInst &CI, bool IsSigned, bool IsAddition) { Type *Ty = CI.getType(); @@ -2406,24 +2467,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2)); - } else if (IsX86 && Name.startswith("avx512.mask.pand.")) { - Rep = Builder.CreateAnd(CI->getArgOperand(0), CI->getArgOperand(1)); - Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, - CI->getArgOperand(2)); - } else if (IsX86 && Name.startswith("avx512.mask.pandn.")) { - Rep = Builder.CreateAnd(Builder.CreateNot(CI->getArgOperand(0)), - CI->getArgOperand(1)); - Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, - CI->getArgOperand(2)); - } else if (IsX86 && Name.startswith("avx512.mask.por.")) { - Rep = Builder.CreateOr(CI->getArgOperand(0), CI->getArgOperand(1)); - Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, - CI->getArgOperand(2)); - } else if (IsX86 && Name.startswith("avx512.mask.pxor.")) { - Rep = Builder.CreateXor(CI->getArgOperand(0), CI->getArgOperand(1)); - Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, - CI->getArgOperand(2)); - } else if (IsX86 && Name.startswith("avx512.mask.and.")) { + } else if (IsX86 && (Name.startswith("avx512.mask.and.") || + Name.startswith("avx512.mask.pand."))) { VectorType *FTy = cast(CI->getType()); VectorType *ITy = VectorType::getInteger(FTy); Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy), @@ -2431,7 +2476,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Rep = Builder.CreateBitCast(Rep, FTy); Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2)); - } else if (IsX86 && Name.startswith("avx512.mask.andn.")) { + } else if (IsX86 && (Name.startswith("avx512.mask.andn.") || + Name.startswith("avx512.mask.pandn."))) { VectorType *FTy = cast(CI->getType()); VectorType *ITy = VectorType::getInteger(FTy); Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy)); @@ -2440,7 +2486,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Rep = Builder.CreateBitCast(Rep, FTy); Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2)); - } else if (IsX86 && Name.startswith("avx512.mask.or.")) { + } else if (IsX86 && (Name.startswith("avx512.mask.or.") || + Name.startswith("avx512.mask.por."))) { VectorType *FTy = cast(CI->getType()); VectorType *ITy = VectorType::getInteger(FTy); Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy), @@ -2448,7 +2495,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Rep = Builder.CreateBitCast(Rep, FTy); Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2)); - } else if (IsX86 && Name.startswith("avx512.mask.xor.")) { + } else if (IsX86 && (Name.startswith("avx512.mask.xor.") || + Name.startswith("avx512.mask.pxor."))) { VectorType *FTy = cast(CI->getType()); VectorType *ITy = VectorType::getInteger(FTy); Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy), @@ -2532,26 +2580,16 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { } Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2)); - } else if (IsX86 && Name.startswith("avx512.mask.max.p") && + } else if (IsX86 && (Name.startswith("avx512.mask.max.p") || + Name.startswith("avx512.mask.min.p")) && Name.drop_front(18) == ".512") { - Intrinsic::ID IID; - if (Name[17] == 's') - IID = Intrinsic::x86_avx512_max_ps_512; - else - IID = Intrinsic::x86_avx512_max_pd_512; - - Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), - { CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(4) }); - Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, - CI->getArgOperand(2)); - } else if (IsX86 && Name.startswith("avx512.mask.min.p") && - Name.drop_front(18) == ".512") { - Intrinsic::ID IID; - if (Name[17] == 's') - IID = Intrinsic::x86_avx512_min_ps_512; - else - IID = Intrinsic::x86_avx512_min_pd_512; + bool IsDouble = Name[17] == 'd'; + bool IsMin = Name[13] == 'i'; + static const Intrinsic::ID MinMaxTbl[2][2] = { + { Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 }, + { Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 } + }; + Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble]; Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), { CI->getArgOperand(0), CI->getArgOperand(1), @@ -3095,62 +3133,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Name.startswith("avx512.maskz.vpermt2var."))) { bool ZeroMask = Name[11] == 'z'; bool IndexForm = Name[17] == 'i'; - unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits(); - unsigned EltWidth = CI->getType()->getScalarSizeInBits(); - bool IsFloat = CI->getType()->isFPOrFPVectorTy(); - Intrinsic::ID IID; - if (VecWidth == 128 && EltWidth == 32 && IsFloat) - IID = Intrinsic::x86_avx512_vpermi2var_ps_128; - else if (VecWidth == 128 && EltWidth == 32 && !IsFloat) - IID = Intrinsic::x86_avx512_vpermi2var_d_128; - else if (VecWidth == 128 && EltWidth == 64 && IsFloat) - IID = Intrinsic::x86_avx512_vpermi2var_pd_128; - else if (VecWidth == 128 && EltWidth == 64 && !IsFloat) - IID = Intrinsic::x86_avx512_vpermi2var_q_128; - else if (VecWidth == 256 && EltWidth == 32 && IsFloat) - IID = Intrinsic::x86_avx512_vpermi2var_ps_256; - else if (VecWidth == 256 && EltWidth == 32 && !IsFloat) - IID = Intrinsic::x86_avx512_vpermi2var_d_256; - else if (VecWidth == 256 && EltWidth == 64 && IsFloat) - IID = Intrinsic::x86_avx512_vpermi2var_pd_256; - else if (VecWidth == 256 && EltWidth == 64 && !IsFloat) - IID = Intrinsic::x86_avx512_vpermi2var_q_256; - else if (VecWidth == 512 && EltWidth == 32 && IsFloat) - IID = Intrinsic::x86_avx512_vpermi2var_ps_512; - else if (VecWidth == 512 && EltWidth == 32 && !IsFloat) - IID = Intrinsic::x86_avx512_vpermi2var_d_512; - else if (VecWidth == 512 && EltWidth == 64 && IsFloat) - IID = Intrinsic::x86_avx512_vpermi2var_pd_512; - else if (VecWidth == 512 && EltWidth == 64 && !IsFloat) - IID = Intrinsic::x86_avx512_vpermi2var_q_512; - else if (VecWidth == 128 && EltWidth == 16) - IID = Intrinsic::x86_avx512_vpermi2var_hi_128; - else if (VecWidth == 256 && EltWidth == 16) - IID = Intrinsic::x86_avx512_vpermi2var_hi_256; - else if (VecWidth == 512 && EltWidth == 16) - IID = Intrinsic::x86_avx512_vpermi2var_hi_512; - else if (VecWidth == 128 && EltWidth == 8) - IID = Intrinsic::x86_avx512_vpermi2var_qi_128; - else if (VecWidth == 256 && EltWidth == 8) - IID = Intrinsic::x86_avx512_vpermi2var_qi_256; - else if (VecWidth == 512 && EltWidth == 8) - IID = Intrinsic::x86_avx512_vpermi2var_qi_512; - else - llvm_unreachable("Unexpected intrinsic"); - - Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1), - CI->getArgOperand(2) }; - - // If this isn't index form we need to swap operand 0 and 1. - if (!IndexForm) - std::swap(Args[0], Args[1]); - - Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID), - Args); - Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType()) - : Builder.CreateBitCast(CI->getArgOperand(1), - CI->getType()); - Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru); + Rep = UpgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm); } else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") || Name.startswith("avx512.maskz.vpdpbusd.") || Name.startswith("avx512.mask.vpdpbusds.") ||