AMDGPU: SimplifyDemandedElts for image intrinsics

Causes some VGPR usage improvements in shaderdb, but
introduces some SGPR spilling regressions due to random
scheduling changes later.

llvm-svn: 300453
This commit is contained in:
Matt Arsenault 2017-04-17 15:12:44 +00:00
parent ce161a7812
commit 7205f3c2e4
2 changed files with 1270 additions and 9 deletions

View File

@ -1641,7 +1641,52 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
UndefElts.setHighBits(VWidth / 2);
break;
case Intrinsic::amdgcn_buffer_load:
case Intrinsic::amdgcn_buffer_load_format: {
case Intrinsic::amdgcn_buffer_load_format:
case Intrinsic::amdgcn_image_sample:
case Intrinsic::amdgcn_image_sample_cl:
case Intrinsic::amdgcn_image_sample_d:
case Intrinsic::amdgcn_image_sample_d_cl:
case Intrinsic::amdgcn_image_sample_l:
case Intrinsic::amdgcn_image_sample_b:
case Intrinsic::amdgcn_image_sample_b_cl:
case Intrinsic::amdgcn_image_sample_lz:
case Intrinsic::amdgcn_image_sample_cd:
case Intrinsic::amdgcn_image_sample_cd_cl:
case Intrinsic::amdgcn_image_sample_c:
case Intrinsic::amdgcn_image_sample_c_cl:
case Intrinsic::amdgcn_image_sample_c_d:
case Intrinsic::amdgcn_image_sample_c_d_cl:
case Intrinsic::amdgcn_image_sample_c_l:
case Intrinsic::amdgcn_image_sample_c_b:
case Intrinsic::amdgcn_image_sample_c_b_cl:
case Intrinsic::amdgcn_image_sample_c_lz:
case Intrinsic::amdgcn_image_sample_c_cd:
case Intrinsic::amdgcn_image_sample_c_cd_cl:
case Intrinsic::amdgcn_image_sample_o:
case Intrinsic::amdgcn_image_sample_cl_o:
case Intrinsic::amdgcn_image_sample_d_o:
case Intrinsic::amdgcn_image_sample_d_cl_o:
case Intrinsic::amdgcn_image_sample_l_o:
case Intrinsic::amdgcn_image_sample_b_o:
case Intrinsic::amdgcn_image_sample_b_cl_o:
case Intrinsic::amdgcn_image_sample_lz_o:
case Intrinsic::amdgcn_image_sample_cd_o:
case Intrinsic::amdgcn_image_sample_cd_cl_o:
case Intrinsic::amdgcn_image_sample_c_o:
case Intrinsic::amdgcn_image_sample_c_cl_o:
case Intrinsic::amdgcn_image_sample_c_d_o:
case Intrinsic::amdgcn_image_sample_c_d_cl_o:
case Intrinsic::amdgcn_image_sample_c_l_o:
case Intrinsic::amdgcn_image_sample_c_b_o:
case Intrinsic::amdgcn_image_sample_c_b_cl_o:
case Intrinsic::amdgcn_image_sample_c_lz_o:
case Intrinsic::amdgcn_image_sample_c_cd_o:
case Intrinsic::amdgcn_image_sample_c_cd_cl_o:
case Intrinsic::amdgcn_image_getlod: {
if (VWidth == 1 || !DemandedElts.isMask())
return nullptr;
@ -1656,8 +1701,17 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
Type *NewTy = (NewNumElts == 1) ? EltTy :
VectorType::get(EltTy, NewNumElts);
Function *NewIntrin = Intrinsic::getDeclaration(M, II->getIntrinsicID(),
NewTy);
auto IID = II->getIntrinsicID();
bool IsBuffer = IID == Intrinsic::amdgcn_buffer_load ||
IID == Intrinsic::amdgcn_buffer_load_format;
Function *NewIntrin = IsBuffer ?
Intrinsic::getDeclaration(M, IID, NewTy) :
// Samplers have 3 mangled types.
Intrinsic::getDeclaration(M, IID,
{ NewTy, II->getArgOperand(0)->getType(),
II->getArgOperand(1)->getType()});
SmallVector<Value *, 5> Args;
for (unsigned I = 0, E = II->getNumArgOperands(); I != E; ++I)
@ -1669,6 +1723,29 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
CallInst *NewCall = Builder->CreateCall(NewIntrin, Args);
NewCall->takeName(II);
NewCall->copyMetadata(*II);
if (!IsBuffer) {
ConstantInt *DMask = dyn_cast<ConstantInt>(NewCall->getArgOperand(3));
if (DMask) {
unsigned DMaskVal = DMask->getZExtValue() & 0xf;
unsigned PopCnt = 0;
unsigned NewDMask = 0;
for (unsigned I = 0; I < 4; ++I) {
const unsigned Bit = 1 << I;
if (!!(DMaskVal & Bit)) {
if (++PopCnt > NewNumElts)
break;
NewDMask |= Bit;
}
}
NewCall->setArgOperand(3, ConstantInt::get(DMask->getType(), NewDMask));
}
}
if (NewNumElts == 1) {
return Builder->CreateInsertElement(UndefValue::get(V->getType()),
NewCall, static_cast<uint64_t>(0));

File diff suppressed because it is too large Load Diff