forked from OSchip/llvm-project
AMDGPU: Correct memory size for image intrinsics
This was incorrectly rounding up to the next power of 2. v4f32 was rounding up to v8f32, which was just wrong. There are also v3i16/v3f16 available in MVT, so we don't even need to round the f16 cases anymore. Additionally, this field is really an EVT so we don't even need to consider this. Also switch some asserts to return invalid. We should have an IR verifier for these intrinsic return types, but for now it's better to not assert on IR that passes the verifier. This should also probably be fixed to consider that dmask is really eliminating some of the loaded components.
This commit is contained in:
parent
ec50e10db4
commit
9260d01faa
|
@ -878,45 +878,20 @@ unsigned SITargetLowering::getVectorTypeBreakdownForCallingConv(
|
|||
Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
|
||||
}
|
||||
|
||||
static MVT memVTFromAggregate(Type *Ty) {
|
||||
// Peek through TFE struct returns to only use the data size.
|
||||
static EVT memVTFromImageReturn(Type *Ty) {
|
||||
auto *ST = dyn_cast<StructType>(Ty);
|
||||
if (!ST)
|
||||
return EVT::getEVT(Ty, true);
|
||||
|
||||
// Some intrinsics return an aggregate type - special case to work out the
|
||||
// correct memVT.
|
||||
//
|
||||
// Only limited forms of aggregate type currently expected.
|
||||
assert(Ty->isStructTy() && "Expected struct type");
|
||||
|
||||
|
||||
Type *ElementType = nullptr;
|
||||
unsigned NumElts;
|
||||
if (Ty->getContainedType(0)->isVectorTy()) {
|
||||
VectorType *VecComponent = cast<VectorType>(Ty->getContainedType(0));
|
||||
ElementType = VecComponent->getElementType();
|
||||
NumElts = VecComponent->getNumElements();
|
||||
} else {
|
||||
ElementType = Ty->getContainedType(0);
|
||||
NumElts = 1;
|
||||
}
|
||||
|
||||
assert((Ty->getContainedType(1) && Ty->getContainedType(1)->isIntegerTy(32)) && "Expected int32 type");
|
||||
|
||||
// Calculate the size of the memVT type from the aggregate
|
||||
unsigned Pow2Elts = 0;
|
||||
unsigned ElementSize;
|
||||
switch (ElementType->getTypeID()) {
|
||||
default:
|
||||
llvm_unreachable("Unknown type!");
|
||||
case Type::IntegerTyID:
|
||||
ElementSize = cast<IntegerType>(ElementType)->getBitWidth();
|
||||
break;
|
||||
case Type::HalfTyID:
|
||||
ElementSize = 16;
|
||||
break;
|
||||
case Type::FloatTyID:
|
||||
ElementSize = 32;
|
||||
break;
|
||||
}
|
||||
unsigned AdditionalElts = ElementSize == 16 ? 2 : 1;
|
||||
Pow2Elts = 1 << Log2_32_Ceil(NumElts + AdditionalElts);
|
||||
|
||||
return MVT::getVectorVT(MVT::getVT(ElementType, false),
|
||||
Pow2Elts);
|
||||
if (ST->getNumContainedTypes() != 2 ||
|
||||
!ST->getContainedType(1)->isIntegerTy(32))
|
||||
return EVT();
|
||||
return EVT::getEVT(ST->getContainedType(0));
|
||||
}
|
||||
|
||||
bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
|
||||
|
@ -946,12 +921,8 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
|
|||
Info.flags = MachineMemOperand::MODereferenceable;
|
||||
if (Attr.hasFnAttribute(Attribute::ReadOnly)) {
|
||||
Info.opc = ISD::INTRINSIC_W_CHAIN;
|
||||
Info.memVT = MVT::getVT(CI.getType(), true);
|
||||
if (Info.memVT == MVT::Other) {
|
||||
// Some intrinsics return an aggregate type - special case to work out
|
||||
// the correct memVT
|
||||
Info.memVT = memVTFromAggregate(CI.getType());
|
||||
}
|
||||
// TODO: Account for dmask reducing loaded size.
|
||||
Info.memVT = memVTFromImageReturn(CI.getType());
|
||||
Info.flags |= MachineMemOperand::MOLoad;
|
||||
} else if (Attr.hasFnAttribute(Attribute::WriteOnly)) {
|
||||
Info.opc = ISD::INTRINSIC_VOID;
|
||||
|
|
Loading…
Reference in New Issue