forked from OSchip/llvm-project
[SVE] Eliminate calls to default-false VectorType::get() from AMDGPU
Reviewers: efriedma, david-arm, fpetrogalli, arsenm Reviewed By: david-arm Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, tschuett, hiraditya, rkruppe, psnobl, kerbowa, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D80328
This commit is contained in:
parent
3f0841f6d0
commit
aad9365482
|
@ -438,7 +438,7 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
|
|||
|
||||
Type *const Ty = I.getType();
|
||||
const unsigned TyBitWidth = DL->getTypeSizeInBits(Ty);
|
||||
Type *const VecTy = VectorType::get(B.getInt32Ty(), 2);
|
||||
auto *const VecTy = FixedVectorType::get(B.getInt32Ty(), 2);
|
||||
|
||||
// This is the value in the atomic operation we need to combine in order to
|
||||
// reduce the number of atomic operations.
|
||||
|
|
|
@ -598,7 +598,7 @@ bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
|
|||
if (Size <= 8)
|
||||
PtrElemTy = Type::getIntNTy(Ctx, Size * 8);
|
||||
else
|
||||
PtrElemTy = VectorType::get(Type::getInt64Ty(Ctx), Size / 8);
|
||||
PtrElemTy = FixedVectorType::get(Type::getInt64Ty(Ctx), Size / 8);
|
||||
unsigned PtrArgLoc = CI->getNumArgOperands() - 3;
|
||||
auto PtrArg = CI->getArgOperand(PtrArgLoc);
|
||||
unsigned PtrArgAS = PtrArg->getType()->getPointerAddressSpace();
|
||||
|
|
|
@ -902,7 +902,7 @@ static Type* getIntrinsicParamType(
|
|||
return nullptr;
|
||||
}
|
||||
if (P.VectorSize > 1)
|
||||
T = VectorType::get(T, P.VectorSize);
|
||||
T = FixedVectorType::get(T, P.VectorSize);
|
||||
if (P.PtrKind != AMDGPULibFunc::BYVALUE)
|
||||
T = useAddrSpace ? T->getPointerTo((P.PtrKind & AMDGPULibFunc::ADDR_SPACE)
|
||||
- 1)
|
||||
|
|
|
@ -167,7 +167,7 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
|
|||
}
|
||||
|
||||
if (IsV3 && Size >= 32) {
|
||||
V4Ty = VectorType::get(VT->getElementType(), 4);
|
||||
V4Ty = FixedVectorType::get(VT->getElementType(), 4);
|
||||
// Use the hack that clang uses to avoid SelectionDAG ruining v3 loads
|
||||
AdjustedArgTy = V4Ty;
|
||||
}
|
||||
|
|
|
@ -516,7 +516,7 @@ bool AMDGPUPrintfRuntimeBinding::lowerPrintfForGpu(
|
|||
break;
|
||||
}
|
||||
if (EleCount > 1) {
|
||||
IType = dyn_cast<Type>(VectorType::get(IType, EleCount));
|
||||
IType = FixedVectorType::get(IType, EleCount);
|
||||
}
|
||||
Arg = new BitCastInst(Arg, IType, "PrintArgVect", Brnch);
|
||||
WhatToStore.push_back(Arg);
|
||||
|
|
|
@ -334,12 +334,12 @@ Type *GCNTTIImpl::getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
|
|||
SrcAddrSpace == AMDGPUAS::REGION_ADDRESS ||
|
||||
DestAddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
|
||||
DestAddrSpace == AMDGPUAS::REGION_ADDRESS) {
|
||||
return VectorType::get(Type::getInt32Ty(Context), 2);
|
||||
return FixedVectorType::get(Type::getInt32Ty(Context), 2);
|
||||
}
|
||||
|
||||
// Global memory works best with 16-byte accesses. Private memory will also
|
||||
// hit this, although they'll be decomposed.
|
||||
return VectorType::get(Type::getInt32Ty(Context), 4);
|
||||
return FixedVectorType::get(Type::getInt32Ty(Context), 4);
|
||||
}
|
||||
|
||||
void GCNTTIImpl::getMemcpyLoopResidualLoweringType(
|
||||
|
|
Loading…
Reference in New Issue