GlobalISel: Add default implementation of assignValueToReg

Refactor insertion of the asserting ops. This enables using them for
AMDGPU.

This code should essentially be the same for every target. Mips, X86
and ARM all have different code there now, but this seems to be an
accident. The assignment functions are called with different types
than they would be in the DAG, so this is all likely an assortment of
hacks to get around that.
This commit is contained in:
Matt Arsenault 2021-03-02 17:40:50 -05:00
parent 8da090381d
commit 78dcff4841
6 changed files with 115 additions and 76 deletions

View File

@ -209,6 +209,14 @@ public:
IncomingValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, IncomingValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
CCAssignFn *AssignFn) CCAssignFn *AssignFn)
: ValueHandler(true, MIRBuilder, MRI, AssignFn) {} : ValueHandler(true, MIRBuilder, MRI, AssignFn) {}
/// Insert G_ASSERT_ZEXT/G_ASSERT_SEXT or other hint instruction based on \p
/// VA, returning the new register if a hint was inserted.
Register buildExtensionHint(CCValAssign &VA, Register SrcReg, LLT NarrowTy);
/// Provides a default implementation for argument handling.
void assignValueToReg(Register ValVReg, Register PhysReg,
CCValAssign &VA) override;
}; };
struct OutgoingValueHandler : public ValueHandler { struct OutgoingValueHandler : public ValueHandler {

View File

@ -985,3 +985,41 @@ Register CallLowering::ValueHandler::extendRegister(Register ValReg,
} }
void CallLowering::ValueHandler::anchor() {} void CallLowering::ValueHandler::anchor() {}
Register CallLowering::IncomingValueHandler::buildExtensionHint(CCValAssign &VA,
Register SrcReg,
LLT NarrowTy) {
switch (VA.getLocInfo()) {
case CCValAssign::LocInfo::ZExt: {
return MIRBuilder
.buildAssertZExt(MRI.cloneVirtualRegister(SrcReg), SrcReg,
NarrowTy.getScalarSizeInBits())
.getReg(0);
}
case CCValAssign::LocInfo::SExt: {
return MIRBuilder
.buildAssertSExt(MRI.cloneVirtualRegister(SrcReg), SrcReg,
NarrowTy.getScalarSizeInBits())
.getReg(0);
break;
}
default:
return SrcReg;
}
}
void CallLowering::IncomingValueHandler::assignValueToReg(Register ValVReg,
Register PhysReg,
CCValAssign &VA) {
const LLT LocTy(VA.getLocVT());
const LLT ValTy = MRI.getType(ValVReg);
if (ValTy.getSizeInBits() == LocTy.getSizeInBits()) {
MIRBuilder.buildCopy(ValVReg, PhysReg);
return;
}
auto Copy = MIRBuilder.buildCopy(LocTy, PhysReg);
auto Hint = buildExtensionHint(VA, Copy.getReg(0), ValTy);
MIRBuilder.buildTrunc(ValVReg, Hint);
}

View File

@ -70,34 +70,7 @@ struct IncomingArgHandler : public CallLowering::IncomingValueHandler {
void assignValueToReg(Register ValVReg, Register PhysReg, void assignValueToReg(Register ValVReg, Register PhysReg,
CCValAssign &VA) override { CCValAssign &VA) override {
markPhysRegUsed(PhysReg); markPhysRegUsed(PhysReg);
switch (VA.getLocInfo()) { IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
default:
MIRBuilder.buildCopy(ValVReg, PhysReg);
break;
case CCValAssign::LocInfo::ZExt: {
auto WideTy = LLT{VA.getLocVT()};
auto NarrowTy = MRI.getType(ValVReg);
MIRBuilder.buildTrunc(ValVReg,
MIRBuilder.buildAssertZExt(
WideTy, MIRBuilder.buildCopy(WideTy, PhysReg),
NarrowTy.getSizeInBits()));
break;
}
case CCValAssign::LocInfo::SExt: {
auto WideTy = LLT{VA.getLocVT()};
auto NarrowTy = MRI.getType(ValVReg);
MIRBuilder.buildTrunc(ValVReg,
MIRBuilder.buildAssertSExt(
WideTy, MIRBuilder.buildCopy(WideTy, PhysReg),
NarrowTy.getSizeInBits()));
break;
}
case CCValAssign::LocInfo::AExt: {
auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg);
MIRBuilder.buildTrunc(ValVReg, Copy);
break;
}
}
} }
void assignValueToAddress(Register ValVReg, Register Addr, uint64_t MemSize, void assignValueToAddress(Register ValVReg, Register Addr, uint64_t MemSize,

View File

@ -29,28 +29,22 @@ using namespace llvm;
namespace { namespace {
struct AMDGPUValueHandler : public CallLowering::ValueHandler { /// Wrapper around extendRegister to ensure we extend to a full 32-bit register.
AMDGPUValueHandler(bool IsIncoming, MachineIRBuilder &B, static Register extendRegisterMin32(CallLowering::ValueHandler &Handler,
MachineRegisterInfo &MRI, CCAssignFn *AssignFn) Register ValVReg, CCValAssign &VA) {
: ValueHandler(IsIncoming, B, MRI, AssignFn) {} if (VA.getLocVT().getSizeInBits() < 32) {
// 16-bit types are reported as legal for 32-bit registers. We need to
/// Wrapper around extendRegister to ensure we extend to a full 32-bit // extend and do a 32-bit copy to avoid the verifier complaining about it.
/// register. return Handler.MIRBuilder.buildAnyExt(LLT::scalar(32), ValVReg).getReg(0);
Register extendRegisterMin32(Register ValVReg, CCValAssign &VA) {
if (VA.getLocVT().getSizeInBits() < 32) {
// 16-bit types are reported as legal for 32-bit registers. We need to
// extend and do a 32-bit copy to avoid the verifier complaining about it.
return MIRBuilder.buildAnyExt(LLT::scalar(32), ValVReg).getReg(0);
}
return extendRegister(ValVReg, VA);
} }
};
struct AMDGPUOutgoingValueHandler : public AMDGPUValueHandler { return Handler.extendRegister(ValVReg, VA);
}
struct AMDGPUOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
AMDGPUOutgoingValueHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI, AMDGPUOutgoingValueHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI,
MachineInstrBuilder MIB, CCAssignFn *AssignFn) MachineInstrBuilder MIB, CCAssignFn *AssignFn)
: AMDGPUValueHandler(false, B, MRI, AssignFn), MIB(MIB) {} : OutgoingValueHandler(B, MRI, AssignFn), MIB(MIB) {}
MachineInstrBuilder MIB; MachineInstrBuilder MIB;
@ -66,7 +60,7 @@ struct AMDGPUOutgoingValueHandler : public AMDGPUValueHandler {
void assignValueToReg(Register ValVReg, Register PhysReg, void assignValueToReg(Register ValVReg, Register PhysReg,
CCValAssign &VA) override { CCValAssign &VA) override {
Register ExtReg = extendRegisterMin32(ValVReg, VA); Register ExtReg = extendRegisterMin32(*this, ValVReg, VA);
// If this is a scalar return, insert a readfirstlane just in case the value // If this is a scalar return, insert a readfirstlane just in case the value
// ends up in a VGPR. // ends up in a VGPR.
@ -93,12 +87,12 @@ struct AMDGPUOutgoingValueHandler : public AMDGPUValueHandler {
} }
}; };
struct AMDGPUIncomingArgHandler : public AMDGPUValueHandler { struct AMDGPUIncomingArgHandler : public CallLowering::IncomingValueHandler {
uint64_t StackUsed = 0; uint64_t StackUsed = 0;
AMDGPUIncomingArgHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI, AMDGPUIncomingArgHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI,
CCAssignFn *AssignFn) CCAssignFn *AssignFn)
: AMDGPUValueHandler(true, B, MRI, AssignFn) {} : IncomingValueHandler(B, MRI, AssignFn) {}
Register getStackAddress(uint64_t Size, int64_t Offset, Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) override { MachinePointerInfo &MPO) override {
@ -119,22 +113,16 @@ struct AMDGPUIncomingArgHandler : public AMDGPUValueHandler {
// 16-bit types are reported as legal for 32-bit registers. We need to do // 16-bit types are reported as legal for 32-bit registers. We need to do
// a 32-bit copy, and truncate to avoid the verifier complaining about it. // a 32-bit copy, and truncate to avoid the verifier complaining about it.
auto Copy = MIRBuilder.buildCopy(LLT::scalar(32), PhysReg); auto Copy = MIRBuilder.buildCopy(LLT::scalar(32), PhysReg);
MIRBuilder.buildTrunc(ValVReg, Copy);
// If we have signext/zeroext, it applies to the whole 32-bit register
// before truncation.
auto Extended =
buildExtensionHint(VA, Copy.getReg(0), LLT(VA.getLocVT()));
MIRBuilder.buildTrunc(ValVReg, Extended);
return; return;
} }
switch (VA.getLocInfo()) { IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
case CCValAssign::LocInfo::SExt:
case CCValAssign::LocInfo::ZExt:
case CCValAssign::LocInfo::AExt: {
auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg);
MIRBuilder.buildTrunc(ValVReg, Copy);
break;
}
default:
MIRBuilder.buildCopy(ValVReg, PhysReg);
break;
}
} }
void assignValueToAddress(Register ValVReg, Register Addr, uint64_t MemSize, void assignValueToAddress(Register ValVReg, Register Addr, uint64_t MemSize,
@ -180,8 +168,7 @@ struct CallReturnHandler : public AMDGPUIncomingArgHandler {
MachineInstrBuilder MIB; MachineInstrBuilder MIB;
}; };
struct AMDGPUOutgoingArgHandler : public AMDGPUValueHandler { struct AMDGPUOutgoingArgHandler : public AMDGPUOutgoingValueHandler {
MachineInstrBuilder MIB;
CCAssignFn *AssignFnVarArg; CCAssignFn *AssignFnVarArg;
/// For tail calls, the byte offset of the call's argument area from the /// For tail calls, the byte offset of the call's argument area from the
@ -197,7 +184,7 @@ struct AMDGPUOutgoingArgHandler : public AMDGPUValueHandler {
MachineRegisterInfo &MRI, MachineInstrBuilder MIB, MachineRegisterInfo &MRI, MachineInstrBuilder MIB,
CCAssignFn *AssignFn, CCAssignFn *AssignFnVarArg, CCAssignFn *AssignFn, CCAssignFn *AssignFnVarArg,
bool IsTailCall = false, int FPDiff = 0) bool IsTailCall = false, int FPDiff = 0)
: AMDGPUValueHandler(false, MIRBuilder, MRI, AssignFn), MIB(MIB), : AMDGPUOutgoingValueHandler(MIRBuilder, MRI, MIB, AssignFn),
AssignFnVarArg(AssignFnVarArg), FPDiff(FPDiff), IsTailCall(IsTailCall) { AssignFnVarArg(AssignFnVarArg), FPDiff(FPDiff), IsTailCall(IsTailCall) {
} }
@ -226,7 +213,7 @@ struct AMDGPUOutgoingArgHandler : public AMDGPUValueHandler {
void assignValueToReg(Register ValVReg, Register PhysReg, void assignValueToReg(Register ValVReg, Register PhysReg,
CCValAssign &VA) override { CCValAssign &VA) override {
MIB.addUse(PhysReg, RegState::Implicit); MIB.addUse(PhysReg, RegState::Implicit);
Register ExtReg = extendRegisterMin32(ValVReg, VA); Register ExtReg = extendRegisterMin32(*this, ValVReg, VA);
MIRBuilder.buildCopy(PhysReg, ExtReg); MIRBuilder.buildCopy(PhysReg, ExtReg);
} }

View File

@ -50,7 +50,8 @@ define void @void_func_i1_zeroext(i1 zeroext %arg0) #0 {
; CHECK: bb.1 (%ir-block.0): ; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) ; CHECK: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 1
; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s32)
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@ -70,7 +71,8 @@ define void @void_func_i1_signext(i1 signext %arg0) #0 {
; CHECK: bb.1 (%ir-block.0): ; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) ; CHECK: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 1
; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_SEXT]](s32)
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@ -139,7 +141,8 @@ define void @void_func_i8_zeroext(i8 zeroext %arg0) #0 {
; CHECK: bb.1 (%ir-block.0): ; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) ; CHECK: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 8
; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_ZEXT]](s32)
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@ -159,7 +162,8 @@ define void @void_func_i8_signext(i8 signext %arg0) #0 {
; CHECK: bb.1 (%ir-block.0): ; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) ; CHECK: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 8
; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_SEXT]](s32)
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@ -194,7 +198,8 @@ define void @void_func_i16_zeroext(i16 zeroext %arg0) #0 {
; CHECK: bb.1 (%ir-block.0): ; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; CHECK: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 16
; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT]](s32)
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@ -214,7 +219,8 @@ define void @void_func_i16_signext(i16 signext %arg0) #0 {
; CHECK: bb.1 (%ir-block.0): ; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; CHECK: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 16
; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_SEXT]](s32)
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@ -288,6 +294,36 @@ define void @void_func_i32(i32 %arg0) #0 {
ret void ret void
} }
; The signext is an no-op
define void @void_func_i32_signext(i32 signext %arg0) #0 {
; CHECK-LABEL: name: void_func_i32_signext
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK: S_SETPC_B64_return [[COPY2]]
store i32 %arg0, i32 addrspace(1)* undef
ret void
}
; The zeroext is an no-op
define void @void_func_i32_zeroext(i32 zeroext %arg0) #0 {
; CHECK-LABEL: name: void_func_i32_zeroext
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK: S_SETPC_B64_return [[COPY2]]
store i32 %arg0, i32 addrspace(1)* undef
ret void
}
define void @void_func_p3i8(i8 addrspace(3)* %arg0) #0 { define void @void_func_p3i8(i8 addrspace(3)* %arg0) #0 {
; CHECK-LABEL: name: void_func_p3i8 ; CHECK-LABEL: name: void_func_p3i8
; CHECK: bb.1 (%ir-block.0): ; CHECK: bb.1 (%ir-block.0):

View File

@ -85,11 +85,8 @@ define zeroext i16 @v_mul_i16_zeroext(i16 zeroext %num, i16 zeroext %den) {
; GFX7-LABEL: v_mul_i16_zeroext: ; GFX7-LABEL: v_mul_i16_zeroext:
; GFX7: ; %bb.0: ; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: s_mov_b32 s4, 0xffff
; GFX7-NEXT: v_and_b32_e32 v0, s4, v0
; GFX7-NEXT: v_and_b32_e32 v1, s4, v1
; GFX7-NEXT: v_mul_u32_u24_e32 v0, v0, v1 ; GFX7-NEXT: v_mul_u32_u24_e32 v0, v0, v1
; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 ; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7-NEXT: s_setpc_b64 s[30:31] ; GFX7-NEXT: s_setpc_b64 s[30:31]
; ;
; GFX8-LABEL: v_mul_i16_zeroext: ; GFX8-LABEL: v_mul_i16_zeroext: