GlobalISel: Handle EVT argument lowering correctly

handleAssignments was assuming every argument type is an MVT, and
assignArg would always fail. This fixes one of the hacks in the
current AMDGPU calling convention code that pre-processes the
arguments.
This commit is contained in:
Matt Arsenault 2020-07-07 15:21:13 -04:00
parent 42bb481442
commit 23157f3bdb
5 changed files with 33 additions and 44 deletions

View File

@ -194,11 +194,11 @@ bool CallLowering::handleAssignments(CCState &CCInfo,
unsigned NumArgs = Args.size();
for (unsigned i = 0; i != NumArgs; ++i) {
MVT CurVT = MVT::getVT(Args[i].Ty);
if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i],
Args[i].Flags[0], CCInfo)) {
if (!CurVT.isValid())
return false;
EVT CurVT = EVT::getEVT(Args[i].Ty);
if (!CurVT.isSimple() ||
Handler.assignArg(i, CurVT.getSimpleVT(), CurVT.getSimpleVT(),
CCValAssign::Full, Args[i], Args[i].Flags[0],
CCInfo)) {
MVT NewVT = TLI->getRegisterTypeForCallingConv(
F.getContext(), F.getCallingConv(), EVT(CurVT));
@ -309,8 +309,10 @@ bool CallLowering::handleAssignments(CCState &CCInfo,
// FIXME: Pack registers if we have more than one.
Register ArgReg = Args[i].Regs[0];
MVT OrigVT = MVT::getVT(Args[i].Ty);
MVT VAVT = VA.getValVT();
EVT OrigVT = EVT::getEVT(Args[i].Ty);
EVT VAVT = VA.getValVT();
const LLT OrigTy = getLLTForType(*Args[i].Ty, DL);
if (VA.isRegLoc()) {
if (Handler.isIncomingArgumentHandler() && VAVT != OrigVT) {
if (VAVT.getSizeInBits() < OrigVT.getSizeInBits()) {
@ -332,7 +334,7 @@ bool CallLowering::handleAssignments(CCState &CCInfo,
MIRBuilder.buildMerge(Args[i].OrigRegs[0], Args[i].Regs);
continue;
}
const LLT VATy(VAVT);
const LLT VATy(VAVT.getSimpleVT());
Register NewReg =
MIRBuilder.getMRI()->createGenericVirtualRegister(VATy);
Handler.assignValueToReg(NewReg, VA.getLocReg(), VA);
@ -340,7 +342,6 @@ bool CallLowering::handleAssignments(CCState &CCInfo,
// or do an unmerge to get the lower block of elements.
if (VATy.isVector() &&
VATy.getNumElements() > OrigVT.getVectorNumElements()) {
const LLT OrigTy(OrigVT);
// Just handle the case where the VA type is 2 * original type.
if (VATy.getNumElements() != OrigVT.getVectorNumElements() * 2) {
LLVM_DEBUG(dbgs()

View File

@ -226,19 +226,6 @@ void AMDGPUCallLowering::splitToValueTypes(
MVT RegVT = TLI.getRegisterTypeForCallingConv(Ctx, CallConv, VT);
if (NumParts == 1) {
// Fixup EVTs to an MVT.
//
// FIXME: This is pretty hacky. Why do we have to split the type
// legalization logic between here and handleAssignments?
if (OrigArgIdx != AttributeList::ReturnIndex && VT != RegVT) {
assert(VT.getSizeInBits() < 32 &&
"unexpected illegal type");
Ty = Type::getInt32Ty(Ctx);
Register OrigReg = Reg;
Reg = B.getMRI()->createGenericVirtualRegister(LLT::scalar(32));
B.buildTrunc(OrigReg, Reg);
}
// No splitting to do, but we want to replace the original type (e.g. [1 x
// double] -> double).
SplitArgs.emplace_back(Reg, Ty, OrigArg.Flags, OrigArg.IsFixed);

View File

@ -123,10 +123,11 @@ define void @void_func_i8(i8 %arg0) #0 {
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK: G_STORE [[TRUNC]](s8), [[DEF]](p1) :: (store 1 into `i8 addrspace(1)* undef`, addrspace 1)
; CHECK: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (store 1 into `i8 addrspace(1)* undef`, addrspace 1)
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK: S_SETPC_B64_return [[COPY2]]
store i8 %arg0, i8 addrspace(1)* undef
@ -138,8 +139,8 @@ define void @void_func_i8_zeroext(i8 zeroext %arg0) #0 {
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8)
@ -158,8 +159,8 @@ define void @void_func_i8_signext(i8 signext %arg0) #0 {
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s8)
@ -233,8 +234,8 @@ define void @void_func_i24(i24 %arg0) #0 {
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[COPY]](s32)
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK: G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store 3 into `i24 addrspace(1)* undef`, align 4, addrspace 1)
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
@ -248,8 +249,8 @@ define void @void_func_i24_zeroext(i24 zeroext %arg0) #0 {
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[COPY]](s32)
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK: G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store 3 into `i24 addrspace(1)* undef`, align 4, addrspace 1)
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
@ -263,8 +264,8 @@ define void @void_func_i24_signext(i24 signext %arg0) #0 {
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[COPY]](s32)
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK: G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store 3 into `i24 addrspace(1)* undef`, align 4, addrspace 1)
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
@ -1569,11 +1570,12 @@ define void @void_func_struct_i8_i32({ i8, i32 } %arg0) #0 {
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK: G_STORE [[TRUNC]](s8), [[DEF]](p1) :: (store 1 into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1)
; CHECK: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (store 1 into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1)
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64)
; CHECK: G_STORE [[COPY1]](s32), [[PTR_ADD]](p1) :: (store 4 into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1)
@ -1766,14 +1768,13 @@ define void @void_func_v32i32_i1_i8_i16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i1
; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
; CHECK: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 1 from %fixed-stack.3, align 16, addrspace 5)
; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.2, addrspace 5)
; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 1 from %fixed-stack.2, align 4, addrspace 5)
; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
; CHECK: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 2 from %fixed-stack.1, align 8, addrspace 5)
; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
; CHECK: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 2 from %fixed-stack.0, align 4, addrspace 5)
; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32)
; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD1]](s32)
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
; CHECK: [[COPY34:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
@ -1781,7 +1782,7 @@ define void @void_func_v32i32_i1_i8_i16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i1
; CHECK: [[COPY36:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1)
; CHECK: G_STORE [[LOAD]](s1), [[COPY33]](p1) :: (volatile store 1 into `i1 addrspace(1)* undef`, addrspace 1)
; CHECK: G_STORE [[TRUNC]](s8), [[COPY34]](p1) :: (volatile store 1 into `i8 addrspace(1)* undef`, addrspace 1)
; CHECK: G_STORE [[LOAD1]](s8), [[COPY34]](p1) :: (volatile store 1 into `i8 addrspace(1)* undef`, addrspace 1)
; CHECK: G_STORE [[LOAD2]](s16), [[COPY35]](p1) :: (volatile store 2 into `i16 addrspace(1)* undef`, addrspace 1)
; CHECK: G_STORE [[LOAD3]](s16), [[COPY36]](p1) :: (volatile store 2 into `half addrspace(1)* undef`, addrspace 1)
; CHECK: [[COPY37:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]]

View File

@ -48,9 +48,9 @@ define i8* @ptrmask_flat_i16(i8* %ptr, i16 %mask) {
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
; CHECK: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[TRUNC]](s16)
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0)
; CHECK: $vgpr0 = COPY [[UV]](s32)
@ -119,8 +119,8 @@ define i8 addrspace(3)* @ptrmask_local_i16(i8 addrspace(3)* %ptr, i16 %mask) {
; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[TRUNC]](s16)
; CHECK: $vgpr0 = COPY [[PTRMASK]](p3)
; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]

View File

@ -6,10 +6,10 @@ define i16 @uaddsat_i16(i16 %lhs, i16 %rhs) {
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC1]]
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDSAT]](s16)
; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
@ -85,10 +85,10 @@ define i16 @saddsat_i16(i16 %lhs, i16 %rhs) {
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[TRUNC]], [[TRUNC1]]
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SADDSAT]](s16)
; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
@ -164,10 +164,10 @@ define i16 @usubsat_i16(i16 %lhs, i16 %rhs) {
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC1]]
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBSAT]](s16)
; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
@ -243,10 +243,10 @@ define i16 @ssubsat_i16(i16 %lhs, i16 %rhs) {
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[TRUNC]], [[TRUNC1]]
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSUBSAT]](s16)
; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)