AMDGPU/GlobalISel: Select VOP3P instructions

This only handles the basic cases. More work is needed to make better
use of op_sel.
This commit is contained in:
Matt Arsenault 2019-08-31 22:39:00 -04:00 committed by Matt Arsenault
parent 72eef820d5
commit dfce5fd50a
13 changed files with 792 additions and 108 deletions

View File

@ -47,6 +47,10 @@ def gi_vop3opselmods0 :
GIComplexOperandMatcher<s32, "selectVOP3OpSelMods0">,
GIComplexPatternEquiv<VOP3OpSelMods0>;
def gi_vop3pmods :
GIComplexOperandMatcher<s32, "selectVOP3PMods">,
GIComplexPatternEquiv<VOP3PMods>;
def gi_vop3opselmods :
GIComplexOperandMatcher<s32, "selectVOP3OpSelMods">,
GIComplexPatternEquiv<VOP3OpSelMods>;

View File

@ -2463,6 +2463,58 @@ AMDGPUInstructionSelector::selectVOP3NoMods(MachineOperand &Root) const {
}};
}
std::pair<Register, unsigned>
AMDGPUInstructionSelector::selectVOP3PModsImpl(
Register Src, const MachineRegisterInfo &MRI) const {
unsigned Mods = 0;
MachineInstr *MI = MRI.getVRegDef(Src);
if (MI && MI->getOpcode() == AMDGPU::G_FNEG) {
Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
Src = MI->getOperand(1).getReg();
MI = MRI.getVRegDef(Src);
}
// TODO: Match op_sel through g_build_vector_trunc and g_shuffle_vector.
// Packed instructions do not have abs modifiers.
Mods |= SISrcMods::OP_SEL_1;
return std::make_pair(Src, Mods);
}
InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectVOP3PMods(MachineOperand &Root) const {
MachineRegisterInfo &MRI
= Root.getParent()->getParent()->getParent()->getRegInfo();
Register Src;
unsigned Mods;
std::tie(Src, Mods) = selectVOP3PModsImpl(Root.getReg(), MRI);
return {{
[=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
[=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
}};
}
InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectVOP3PMods0(MachineOperand &Root) const {
MachineRegisterInfo &MRI
= Root.getParent()->getParent()->getParent()->getRegInfo();
Register Src;
unsigned Mods;
std::tie(Src, Mods) = selectVOP3PModsImpl(Root.getReg(), MRI);
return {{
[=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
[=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } , // src_mods
// FIXME: Handle clamp and op_sel
[=](MachineInstrBuilder &MIB) { MIB.addImm(0); }
}};
}
InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectVOP3Mods_nnan(MachineOperand &Root) const {
Register Src;

View File

@ -148,6 +148,15 @@ private:
InstructionSelector::ComplexRendererFns
selectVOP3Mods_nnan(MachineOperand &Root) const;
std::pair<Register, unsigned>
selectVOP3PModsImpl(Register Src, const MachineRegisterInfo &MRI) const;
InstructionSelector::ComplexRendererFns
selectVOP3PMods(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
selectVOP3PMods0(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
selectVOP3OpSelMods0(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns

View File

@ -0,0 +1,542 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
define <2 x half> @v_fmul_v2f16(<2 x half> %a, <2 x half> %b) {
; GFX9-LABEL: v_fmul_v2f16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_fmul_v2f16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mul_f16_e32 v2, v0, v1
; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mov_b32_e32 v1, 16
; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
%mul = fmul <2 x half> %a, %b
ret <2 x half> %mul
}
define <2 x half> @v_fmul_v2f16_fneg_lhs(<2 x half> %a, <2 x half> %b) {
; GFX9-LABEL: v_fmul_v2f16_fneg_lhs:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_fmul_v2f16_fneg_lhs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
; GFX8-NEXT: v_mul_f16_e32 v2, v0, v1
; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mov_b32_e32 v1, 16
; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
%neg.a = fneg <2 x half> %a
%mul = fmul <2 x half> %neg.a, %b
ret <2 x half> %mul
}
define <2 x half> @v_fmul_v2f16_fneg_rhs(<2 x half> %a, <2 x half> %b) {
; GFX9-LABEL: v_fmul_v2f16_fneg_rhs:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_fmul_v2f16_fneg_rhs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
; GFX8-NEXT: v_mul_f16_e32 v2, v0, v1
; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mov_b32_e32 v1, 16
; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
%neg.b = fneg <2 x half> %b
%mul = fmul <2 x half> %a, %neg.b
ret <2 x half> %mul
}
define <2 x half> @v_fmul_v2f16_fneg_lhs_fneg_rhs(<2 x half> %a, <2 x half> %b) {
; GFX9-LABEL: v_fmul_v2f16_fneg_lhs_fneg_rhs:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v1 neg_lo:[1,1] neg_hi:[1,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_fmul_v2f16_fneg_lhs_fneg_rhs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_mov_b32 s4, 0x80008000
; GFX8-NEXT: v_xor_b32_e32 v0, s4, v0
; GFX8-NEXT: v_xor_b32_e32 v1, s4, v1
; GFX8-NEXT: v_mul_f16_e32 v2, v0, v1
; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mov_b32_e32 v1, 16
; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
%neg.a = fneg <2 x half> %a
%neg.b = fneg <2 x half> %b
%mul = fmul <2 x half> %neg.a, %neg.b
ret <2 x half> %mul
}
; FIXME
; define <3 x half> @v_fmul_v3f16(<3 x half> %a, <3 x half> %b) {
; %mul = fmul <3 x half> %a, %b
; ret <3 x half> %mul
; }
; define <3 x half> @v_fmul_v3f16_fneg_lhs(<3 x half> %a, <3 x half> %b) {
; %neg.a = fneg <3 x half> %a
; %mul = fmul <3 x half> %neg.a, %b
; ret <3 x half> %mul
; }
; define <3 x half> @v_fmul_v3f16_fneg_rhs(<3 x half> %a, <3 x half> %b) {
; %neg.b = fneg <3 x half> %b
; %mul = fmul <3 x half> %a, %neg.b
; ret <3 x half> %mul
; }
; define <3 x half> @v_fmul_v3f16_fneg_lhs_fneg_rhs(<3 x half> %a, <3 x half> %b) {
; %neg.a = fneg <3 x half> %a
; %neg.b = fneg <3 x half> %b
; %mul = fmul <3 x half> %neg.a, %neg.b
; ret <3 x half> %mul
; }
define <4 x half> @v_fmul_v4f16(<4 x half> %a, <4 x half> %b) {
; GFX9-LABEL: v_fmul_v4f16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2
; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_fmul_v4f16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mul_f16_e32 v4, v0, v2
; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v2, v1, v3
; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mov_b32_e32 v3, 16
; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
%mul = fmul <4 x half> %a, %b
ret <4 x half> %mul
}
define <4 x half> @v_fmul_v4f16_fneg_lhs(<4 x half> %a, <4 x half> %b) {
; GFX9-LABEL: v_fmul_v4f16_fneg_lhs:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[1,0] neg_hi:[1,0]
; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[1,0] neg_hi:[1,0]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_fmul_v4f16_fneg_lhs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_mov_b32 s4, 0x80008000
; GFX8-NEXT: v_xor_b32_e32 v0, s4, v0
; GFX8-NEXT: v_xor_b32_e32 v1, s4, v1
; GFX8-NEXT: v_mul_f16_e32 v4, v0, v2
; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v2, v1, v3
; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mov_b32_e32 v3, 16
; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
%neg.a = fneg <4 x half> %a
%mul = fmul <4 x half> %neg.a, %b
ret <4 x half> %mul
}
define <4 x half> @v_fmul_v4f16_fneg_rhs(<4 x half> %a, <4 x half> %b) {
; GFX9-LABEL: v_fmul_v4f16_fneg_rhs:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_fmul_v4f16_fneg_rhs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_mov_b32 s4, 0x80008000
; GFX8-NEXT: v_xor_b32_e32 v2, s4, v2
; GFX8-NEXT: v_xor_b32_e32 v3, s4, v3
; GFX8-NEXT: v_mul_f16_e32 v4, v0, v2
; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v2, v1, v3
; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mov_b32_e32 v3, 16
; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
%neg.b = fneg <4 x half> %b
%mul = fmul <4 x half> %a, %neg.b
ret <4 x half> %mul
}
define <4 x half> @v_fmul_v4f16_fneg_lhs_fneg_rhs(<4 x half> %a, <4 x half> %b) {
; GFX9-LABEL: v_fmul_v4f16_fneg_lhs_fneg_rhs:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[1,1] neg_hi:[1,1]
; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[1,1] neg_hi:[1,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_fmul_v4f16_fneg_lhs_fneg_rhs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_mov_b32 s4, 0x80008000
; GFX8-NEXT: v_xor_b32_e32 v0, s4, v0
; GFX8-NEXT: v_xor_b32_e32 v2, s4, v2
; GFX8-NEXT: v_xor_b32_e32 v1, s4, v1
; GFX8-NEXT: v_xor_b32_e32 v3, s4, v3
; GFX8-NEXT: v_mul_f16_e32 v4, v0, v2
; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v2, v1, v3
; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mov_b32_e32 v3, 16
; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
%neg.a = fneg <4 x half> %a
%neg.b = fneg <4 x half> %b
%mul = fmul <4 x half> %neg.a, %neg.b
ret <4 x half> %mul
}
define <6 x half> @v_fmul_v6f16(<6 x half> %a, <6 x half> %b) {
; GFX9-LABEL: v_fmul_v6f16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v3
; GFX9-NEXT: v_pk_mul_f16 v1, v1, v4
; GFX9-NEXT: v_pk_mul_f16 v2, v2, v5
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_fmul_v6f16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mul_f16_e32 v6, v0, v3
; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v3, v1, v4
; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v4, v2, v5
; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mov_b32_e32 v5, 16
; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_or_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_mov_b32_e32 v3, 16
; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_or_b32_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
%mul = fmul <6 x half> %a, %b
ret <6 x half> %mul
}
define <6 x half> @v_fmul_v6f16_fneg_lhs(<6 x half> %a, <6 x half> %b) {
; GFX9-LABEL: v_fmul_v6f16_fneg_lhs:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v3 neg_lo:[1,0] neg_hi:[1,0]
; GFX9-NEXT: v_pk_mul_f16 v1, v1, v4 neg_lo:[1,0] neg_hi:[1,0]
; GFX9-NEXT: v_pk_mul_f16 v2, v2, v5 neg_lo:[1,0] neg_hi:[1,0]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_fmul_v6f16_fneg_lhs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_mov_b32 s4, 0x80008000
; GFX8-NEXT: v_xor_b32_e32 v0, s4, v0
; GFX8-NEXT: v_xor_b32_e32 v1, s4, v1
; GFX8-NEXT: v_xor_b32_e32 v2, s4, v2
; GFX8-NEXT: v_mul_f16_e32 v6, v0, v3
; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v3, v1, v4
; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v4, v2, v5
; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mov_b32_e32 v5, 16
; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_or_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_mov_b32_e32 v3, 16
; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_or_b32_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
%neg.a = fneg <6 x half> %a
%mul = fmul <6 x half> %neg.a, %b
ret <6 x half> %mul
}
define <6 x half> @v_fmul_v6f16_fneg_rhs(<6 x half> %a, <6 x half> %b) {
; GFX9-LABEL: v_fmul_v6f16_fneg_rhs:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v3 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: v_pk_mul_f16 v1, v1, v4 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: v_pk_mul_f16 v2, v2, v5 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_fmul_v6f16_fneg_rhs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_mov_b32 s4, 0x80008000
; GFX8-NEXT: v_xor_b32_e32 v3, s4, v3
; GFX8-NEXT: v_xor_b32_e32 v4, s4, v4
; GFX8-NEXT: v_xor_b32_e32 v5, s4, v5
; GFX8-NEXT: v_mul_f16_e32 v6, v0, v3
; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v3, v1, v4
; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v4, v2, v5
; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mov_b32_e32 v5, 16
; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_or_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_mov_b32_e32 v3, 16
; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_or_b32_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
%neg.b = fneg <6 x half> %b
%mul = fmul <6 x half> %a, %neg.b
ret <6 x half> %mul
}
define <6 x half> @v_fmul_v6f16_fneg_lhs_fneg_rhs(<6 x half> %a, <6 x half> %b) {
; GFX9-LABEL: v_fmul_v6f16_fneg_lhs_fneg_rhs:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v3 neg_lo:[1,1] neg_hi:[1,1]
; GFX9-NEXT: v_pk_mul_f16 v1, v1, v4 neg_lo:[1,1] neg_hi:[1,1]
; GFX9-NEXT: v_pk_mul_f16 v2, v2, v5 neg_lo:[1,1] neg_hi:[1,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_fmul_v6f16_fneg_lhs_fneg_rhs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_mov_b32 s4, 0x80008000
; GFX8-NEXT: v_xor_b32_e32 v0, s4, v0
; GFX8-NEXT: v_xor_b32_e32 v3, s4, v3
; GFX8-NEXT: v_xor_b32_e32 v1, s4, v1
; GFX8-NEXT: v_xor_b32_e32 v2, s4, v2
; GFX8-NEXT: v_xor_b32_e32 v4, s4, v4
; GFX8-NEXT: v_xor_b32_e32 v5, s4, v5
; GFX8-NEXT: v_mul_f16_e32 v6, v0, v3
; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v3, v1, v4
; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v4, v2, v5
; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mov_b32_e32 v5, 16
; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_or_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_mov_b32_e32 v3, 16
; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_or_b32_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
%neg.a = fneg <6 x half> %a
%neg.b = fneg <6 x half> %b
%mul = fmul <6 x half> %neg.a, %neg.b
ret <6 x half> %mul
}
define <8 x half> @v_fmul_v8f16(<8 x half> %a, <8 x half> %b) {
; GFX9-LABEL: v_fmul_v8f16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v4
; GFX9-NEXT: v_pk_mul_f16 v1, v1, v5
; GFX9-NEXT: v_pk_mul_f16 v2, v2, v6
; GFX9-NEXT: v_pk_mul_f16 v3, v3, v7
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_fmul_v8f16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mul_f16_e32 v8, v0, v4
; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v4, v1, v5
; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v5, v2, v6
; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v6, v3, v7
; GFX8-NEXT: v_mul_f16_sdwa v3, v3, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mov_b32_e32 v7, 16
; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_mov_b32_e32 v7, 16
; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v7, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_or_b32_sdwa v0, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_sdwa v2, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_sdwa v3, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
%mul = fmul <8 x half> %a, %b
ret <8 x half> %mul
}
define <8 x half> @v_fmul_v8f16_fneg_lhs(<8 x half> %a, <8 x half> %b) {
; GFX9-LABEL: v_fmul_v8f16_fneg_lhs:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v4 neg_lo:[1,0] neg_hi:[1,0]
; GFX9-NEXT: v_pk_mul_f16 v1, v1, v5 neg_lo:[1,0] neg_hi:[1,0]
; GFX9-NEXT: v_pk_mul_f16 v2, v2, v6 neg_lo:[1,0] neg_hi:[1,0]
; GFX9-NEXT: v_pk_mul_f16 v3, v3, v7 neg_lo:[1,0] neg_hi:[1,0]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_fmul_v8f16_fneg_lhs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_mov_b32 s4, 0x80008000
; GFX8-NEXT: v_xor_b32_e32 v0, s4, v0
; GFX8-NEXT: v_xor_b32_e32 v1, s4, v1
; GFX8-NEXT: v_xor_b32_e32 v2, s4, v2
; GFX8-NEXT: v_xor_b32_e32 v3, s4, v3
; GFX8-NEXT: v_mul_f16_e32 v8, v0, v4
; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v4, v1, v5
; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v5, v2, v6
; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v6, v3, v7
; GFX8-NEXT: v_mul_f16_sdwa v3, v3, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mov_b32_e32 v7, 16
; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_mov_b32_e32 v7, 16
; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v7, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_or_b32_sdwa v0, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_sdwa v2, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_sdwa v3, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
%neg.a = fneg <8 x half> %a
%mul = fmul <8 x half> %neg.a, %b
ret <8 x half> %mul
}
define <8 x half> @v_fmul_v8f16_fneg_rhs(<8 x half> %a, <8 x half> %b) {
; GFX9-LABEL: v_fmul_v8f16_fneg_rhs:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v4 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: v_pk_mul_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: v_pk_mul_f16 v2, v2, v6 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: v_pk_mul_f16 v3, v3, v7 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_fmul_v8f16_fneg_rhs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_mov_b32 s4, 0x80008000
; GFX8-NEXT: v_xor_b32_e32 v4, s4, v4
; GFX8-NEXT: v_xor_b32_e32 v5, s4, v5
; GFX8-NEXT: v_xor_b32_e32 v6, s4, v6
; GFX8-NEXT: v_xor_b32_e32 v7, s4, v7
; GFX8-NEXT: v_mul_f16_e32 v8, v0, v4
; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v4, v1, v5
; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v5, v2, v6
; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v6, v3, v7
; GFX8-NEXT: v_mul_f16_sdwa v3, v3, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mov_b32_e32 v7, 16
; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_mov_b32_e32 v7, 16
; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v7, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_or_b32_sdwa v0, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_sdwa v2, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_sdwa v3, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
%neg.b = fneg <8 x half> %b
%mul = fmul <8 x half> %a, %neg.b
ret <8 x half> %mul
}
define <8 x half> @v_fmul_v8f16_fneg_lhs_fneg_rhs(<8 x half> %a, <8 x half> %b) {
; GFX9-LABEL: v_fmul_v8f16_fneg_lhs_fneg_rhs:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v4 neg_lo:[1,1] neg_hi:[1,1]
; GFX9-NEXT: v_pk_mul_f16 v1, v1, v5 neg_lo:[1,1] neg_hi:[1,1]
; GFX9-NEXT: v_pk_mul_f16 v2, v2, v6 neg_lo:[1,1] neg_hi:[1,1]
; GFX9-NEXT: v_pk_mul_f16 v3, v3, v7 neg_lo:[1,1] neg_hi:[1,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_fmul_v8f16_fneg_lhs_fneg_rhs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_mov_b32 s4, 0x80008000
; GFX8-NEXT: v_xor_b32_e32 v0, s4, v0
; GFX8-NEXT: v_xor_b32_e32 v4, s4, v4
; GFX8-NEXT: v_xor_b32_e32 v1, s4, v1
; GFX8-NEXT: v_xor_b32_e32 v2, s4, v2
; GFX8-NEXT: v_xor_b32_e32 v3, s4, v3
; GFX8-NEXT: v_xor_b32_e32 v5, s4, v5
; GFX8-NEXT: v_xor_b32_e32 v6, s4, v6
; GFX8-NEXT: v_xor_b32_e32 v7, s4, v7
; GFX8-NEXT: v_mul_f16_e32 v8, v0, v4
; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v4, v1, v5
; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v5, v2, v6
; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v6, v3, v7
; GFX8-NEXT: v_mul_f16_sdwa v3, v3, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mov_b32_e32 v7, 16
; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_mov_b32_e32 v7, 16
; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v7, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_or_b32_sdwa v0, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_sdwa v2, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_sdwa v3, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
%neg.a = fneg <8 x half> %a
%neg.b = fneg <8 x half> %b
%mul = fmul <8 x half> %neg.a, %neg.b
ret <8 x half> %mul
}

View File

@ -7,9 +7,6 @@
# ERR-NOT: remark
# ERR-GFX910: remark: <unknown>:0:0: cannot select: %2:sgpr(<2 x s16>) = G_ASHR %0:sgpr, %1:sgpr(<2 x s16>) (in function: ashr_v2s16_ss)
# ERR-GFX910-NEXT: remark: <unknown>:0:0: cannot select: %2:vgpr(<2 x s16>) = G_ASHR %0:sgpr, %1:vgpr(<2 x s16>) (in function: ashr_v2s16_sv)
# ERR-GFX910-NEXT: remark: <unknown>:0:0: cannot select: %2:vgpr(<2 x s16>) = G_ASHR %0:vgpr, %1:sgpr(<2 x s16>) (in function: ashr_v2s16_vs)
# ERR-GFX910-NEXT: remark: <unknown>:0:0: cannot select: %2:vgpr(<2 x s16>) = G_ASHR %0:vgpr, %1:vgpr(<2 x s16>) (in function: ashr_v2s16_vv)
# ERR-NOT: remark
---
@ -75,15 +72,16 @@ body: |
; GFX8: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>)
; GFX8: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>)
; GFX9-LABEL: name: ashr_v2s16_sv
; GFX9: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
; GFX9: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>)
; GFX9: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>)
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[V_PK_ASHRREV_I16_:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
; GFX9: S_ENDPGM 0, implicit [[V_PK_ASHRREV_I16_]]
; GFX10-LABEL: name: ashr_v2s16_sv
; GFX10: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
; GFX10: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
; GFX10: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>)
; GFX10: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>)
; GFX10: $vcc_hi = IMPLICIT_DEF
; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX10: [[V_PK_ASHRREV_I16_:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
; GFX10: S_ENDPGM 0, implicit [[V_PK_ASHRREV_I16_]]
%0:sgpr(<2 x s16>) = COPY $sgpr0
%1:vgpr(<2 x s16>) = COPY $vgpr0
%2:vgpr(<2 x s16>) = G_ASHR %0, %1
@ -114,15 +112,16 @@ body: |
; GFX8: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>)
; GFX8: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>)
; GFX9-LABEL: name: ashr_v2s16_vs
; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
; GFX9: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>)
; GFX9: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>)
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX9: [[V_PK_ASHRREV_I16_:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
; GFX9: S_ENDPGM 0, implicit [[V_PK_ASHRREV_I16_]]
; GFX10-LABEL: name: ashr_v2s16_vs
; GFX10: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
; GFX10: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
; GFX10: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>)
; GFX10: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>)
; GFX10: $vcc_hi = IMPLICIT_DEF
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX10: [[V_PK_ASHRREV_I16_:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
; GFX10: S_ENDPGM 0, implicit [[V_PK_ASHRREV_I16_]]
%0:vgpr(<2 x s16>) = COPY $vgpr0
%1:sgpr(<2 x s16>) = COPY $sgpr0
%2:vgpr(<2 x s16>) = G_ASHR %0, %1
@ -153,15 +152,16 @@ body: |
; GFX8: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>)
; GFX8: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>)
; GFX9-LABEL: name: ashr_v2s16_vv
; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
; GFX9: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>)
; GFX9: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>)
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: [[V_PK_ASHRREV_I16_:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
; GFX9: S_ENDPGM 0, implicit [[V_PK_ASHRREV_I16_]]
; GFX10-LABEL: name: ashr_v2s16_vv
; GFX10: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
; GFX10: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
; GFX10: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>)
; GFX10: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>)
; GFX10: $vcc_hi = IMPLICIT_DEF
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX10: [[V_PK_ASHRREV_I16_:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
; GFX10: S_ENDPGM 0, implicit [[V_PK_ASHRREV_I16_]]
%0:vgpr(<2 x s16>) = COPY $vgpr0
%1:vgpr(<2 x s16>) = COPY $vgpr1
%2:vgpr(<2 x s16>) = G_ASHR %0, %1

View File

@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=2 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
---
@ -108,9 +108,9 @@ body: |
liveins: $vgpr0
; GFX9-LABEL: name: fcanonicalize_v2f16_denorm
; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
; GFX9: [[FCANONICALIZE:%[0-9]+]]:vgpr(<2 x s16>) = G_FCANONICALIZE [[COPY]]
; GFX9: S_ENDPGM 0, implicit [[FCANONICALIZE]](<2 x s16>)
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
; GFX9: S_ENDPGM 0, implicit [[V_PK_MAX_F16_]]
%0:vgpr(<2 x s16>) = COPY $vgpr0
%1:vgpr(<2 x s16>) = G_FCANONICALIZE %0
S_ENDPGM 0, implicit %1
@ -131,9 +131,9 @@ body: |
liveins: $vgpr0
; GFX9-LABEL: name: fcanonicalize_v2f16_flush
; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
; GFX9: [[FCANONICALIZE:%[0-9]+]]:vgpr(<2 x s16>) = G_FCANONICALIZE [[COPY]]
; GFX9: S_ENDPGM 0, implicit [[FCANONICALIZE]](<2 x s16>)
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[V_PK_MUL_F16_:%[0-9]+]]:vgpr_32 = V_PK_MUL_F16 0, 15360, 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
; GFX9: S_ENDPGM 0, implicit [[V_PK_MUL_F16_]]
%0:vgpr(<2 x s16>) = COPY $vgpr0
%1:vgpr(<2 x s16>) = G_FCANONICALIZE %0
S_ENDPGM 0, implicit %1

View File

@ -1,4 +1,5 @@
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
---
name: fmaxnum_ieee_v2f16_vv
@ -10,10 +11,10 @@ body: |
liveins: $sgpr0, $sgpr1
; GFX9-LABEL: name: fmaxnum_ieee_v2f16_vv
; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(<2 x s16>) = G_FMAXNUM_IEEE [[COPY]], [[COPY1]]
; GFX9: S_ENDPGM 0, implicit [[FMAXNUM_IEEE]](<2 x s16>)
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = V_PK_MAX_F16 8, [[COPY]], 8, [[COPY1]], 0, 0, 0, 0, 0, implicit $exec
; GFX9: S_ENDPGM 0, implicit [[V_PK_MAX_F16_]]
%0:vgpr(<2 x s16>) = COPY $vgpr0
%1:vgpr(<2 x s16>) = COPY $vgpr1
%2:vgpr(<2 x s16>) = G_FMAXNUM_IEEE %0, %1

View File

@ -1,4 +1,5 @@
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
# FIXME: Ideally this would fail to select with ieee mode enabled.
---
@ -11,10 +12,10 @@ body: |
liveins: $sgpr0, $sgpr1
; GFX9-LABEL: name: fmaxnum_v2f16_vv
; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
; GFX9: [[FMAXNUM:%[0-9]+]]:vgpr(<2 x s16>) = G_FMAXNUM [[COPY]], [[COPY1]]
; GFX9: S_ENDPGM 0, implicit [[FMAXNUM]](<2 x s16>)
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = V_PK_MAX_F16 8, [[COPY]], 8, [[COPY1]], 0, 0, 0, 0, 0, implicit $exec
; GFX9: S_ENDPGM 0, implicit [[V_PK_MAX_F16_]]
%0:vgpr(<2 x s16>) = COPY $vgpr0
%1:vgpr(<2 x s16>) = COPY $vgpr1
%2:vgpr(<2 x s16>) = G_FMAXNUM %0, %1

View File

@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
---
name: fminnum_ieee_v2f16_vv
@ -11,10 +11,10 @@ body: |
liveins: $sgpr0, $sgpr1
; GFX9-LABEL: name: fminnum_ieee_v2f16_vv
; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(<2 x s16>) = G_FMINNUM_IEEE [[COPY]], [[COPY1]]
; GFX9: S_ENDPGM 0, implicit [[FMINNUM_IEEE]](<2 x s16>)
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: [[V_PK_MIN_F16_:%[0-9]+]]:vgpr_32 = V_PK_MIN_F16 8, [[COPY]], 8, [[COPY1]], 0, 0, 0, 0, 0, implicit $exec
; GFX9: S_ENDPGM 0, implicit [[V_PK_MIN_F16_]]
%0:vgpr(<2 x s16>) = COPY $vgpr0
%1:vgpr(<2 x s16>) = COPY $vgpr1
%2:vgpr(<2 x s16>) = G_FMINNUM_IEEE %0, %1

View File

@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
---
name: fminnum_v2f16_vv
@ -11,10 +11,10 @@ body: |
liveins: $sgpr0, $sgpr1
; GFX9-LABEL: name: fminnum_v2f16_vv
; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
; GFX9: [[FMINNUM:%[0-9]+]]:vgpr(<2 x s16>) = G_FMINNUM [[COPY]], [[COPY1]]
; GFX9: S_ENDPGM 0, implicit [[FMINNUM]](<2 x s16>)
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: [[V_PK_MIN_F16_:%[0-9]+]]:vgpr_32 = V_PK_MIN_F16 8, [[COPY]], 8, [[COPY1]], 0, 0, 0, 0, 0, implicit $exec
; GFX9: S_ENDPGM 0, implicit [[V_PK_MIN_F16_]]
%0:vgpr(<2 x s16>) = COPY $vgpr0
%1:vgpr(<2 x s16>) = COPY $vgpr1
%2:vgpr(<2 x s16>) = G_FMINNUM %0, %1

View File

@ -0,0 +1,74 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
---
name: fmul_v2f16_vv
legalized: true
regBankSelected: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1
; GFX9-LABEL: name: fmul_v2f16_vv
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: [[V_PK_MUL_F16_:%[0-9]+]]:vgpr_32 = V_PK_MUL_F16 8, [[COPY]], 8, [[COPY1]], 0, 0, 0, 0, 0, implicit $exec
; GFX9: S_ENDPGM 0, implicit [[V_PK_MUL_F16_]]
%0:vgpr(<2 x s16>) = COPY $vgpr0
%1:vgpr(<2 x s16>) = COPY $vgpr1
%2:vgpr(<2 x s16>) = G_FMUL %0, %1
S_ENDPGM 0, implicit %2
...
---
name: fmul_v2f16_fneg_v_fneg_v
legalized: true
regBankSelected: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1
; GFX9-LABEL: name: fmul_v2f16_fneg_v_fneg_v
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: [[V_PK_MUL_F16_:%[0-9]+]]:vgpr_32 = V_PK_MUL_F16 11, [[COPY]], 11, [[COPY1]], 0, 0, 0, 0, 0, implicit $exec
; GFX9: S_ENDPGM 0, implicit [[V_PK_MUL_F16_]]
%0:vgpr(<2 x s16>) = COPY $vgpr0
%1:vgpr(<2 x s16>) = COPY $vgpr1
%2:vgpr(<2 x s16>) = G_FNEG %0
%3:vgpr(<2 x s16>) = G_FNEG %1
%4:vgpr(<2 x s16>) = G_FMUL %2, %3
S_ENDPGM 0, implicit %4
...
---
name: fmul_v2f16_fneg_lo_v_v
legalized: true
regBankSelected: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2
; GFX9-LABEL: name: fmul_v2f16_fneg_lo_v_v
; GFX9: [[COPY:%[0-9]+]]:vgpr_32(<2 x s16>) = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; GFX9: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
; GFX9: [[FNEG:%[0-9]+]]:vgpr(s16) = G_FNEG [[TRUNC]]
; GFX9: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[FNEG]](s16)
; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:vgpr_32(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[COPY2]](s32)
; GFX9: [[V_PK_MUL_F16_:%[0-9]+]]:vgpr_32(<2 x s16>) = V_PK_MUL_F16 8, [[BUILD_VECTOR_TRUNC]](<2 x s16>), 8, [[COPY]](<2 x s16>), 0, 0, 0, 0, 0, implicit $exec
; GFX9: S_ENDPGM 0, implicit [[V_PK_MUL_F16_]](<2 x s16>)
%0:vgpr(<2 x s16>) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = COPY $vgpr2
%3:vgpr(s16) = G_TRUNC %1
%4:vgpr(s16) = G_FNEG %3
%5:vgpr(s32) = G_ANYEXT %4
%6:vgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %5, %2
%7:vgpr(<2 x s16>) = G_FMUL %6, %0
S_ENDPGM 0, implicit %7
...

View File

@ -7,9 +7,6 @@
# ERR-NOT: remark
# ERR-GFX910: remark: <unknown>:0:0: cannot select: %2:sgpr(<2 x s16>) = G_LSHR %0:sgpr, %1:sgpr(<2 x s16>) (in function: lshr_v2s16_ss)
# ERR-GFX910-NEXT: remark: <unknown>:0:0: cannot select: %2:vgpr(<2 x s16>) = G_LSHR %0:sgpr, %1:vgpr(<2 x s16>) (in function: lshr_v2s16_sv)
# ERR-GFX910-NEXT: remark: <unknown>:0:0: cannot select: %2:vgpr(<2 x s16>) = G_LSHR %0:vgpr, %1:sgpr(<2 x s16>) (in function: lshr_v2s16_vs)
# ERR-GFX910-NEXT: remark: <unknown>:0:0: cannot select: %2:vgpr(<2 x s16>) = G_LSHR %0:vgpr, %1:vgpr(<2 x s16>) (in function: lshr_v2s16_vv)
# ERR-NOT: remark
---
@ -75,15 +72,16 @@ body: |
; GFX8: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>)
; GFX8: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>)
; GFX9-LABEL: name: lshr_v2s16_sv
; GFX9: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
; GFX9: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>)
; GFX9: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>)
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[V_PK_LSHRREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
; GFX9: S_ENDPGM 0, implicit [[V_PK_LSHRREV_B16_]]
; GFX10-LABEL: name: lshr_v2s16_sv
; GFX10: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
; GFX10: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
; GFX10: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>)
; GFX10: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>)
; GFX10: $vcc_hi = IMPLICIT_DEF
; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX10: [[V_PK_LSHRREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
; GFX10: S_ENDPGM 0, implicit [[V_PK_LSHRREV_B16_]]
%0:sgpr(<2 x s16>) = COPY $sgpr0
%1:vgpr(<2 x s16>) = COPY $vgpr0
%2:vgpr(<2 x s16>) = G_LSHR %0, %1
@ -114,15 +112,16 @@ body: |
; GFX8: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>)
; GFX8: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>)
; GFX9-LABEL: name: lshr_v2s16_vs
; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
; GFX9: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>)
; GFX9: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>)
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX9: [[V_PK_LSHRREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
; GFX9: S_ENDPGM 0, implicit [[V_PK_LSHRREV_B16_]]
; GFX10-LABEL: name: lshr_v2s16_vs
; GFX10: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
; GFX10: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
; GFX10: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>)
; GFX10: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>)
; GFX10: $vcc_hi = IMPLICIT_DEF
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX10: [[V_PK_LSHRREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
; GFX10: S_ENDPGM 0, implicit [[V_PK_LSHRREV_B16_]]
%0:vgpr(<2 x s16>) = COPY $vgpr0
%1:sgpr(<2 x s16>) = COPY $sgpr0
%2:vgpr(<2 x s16>) = G_LSHR %0, %1
@ -153,15 +152,16 @@ body: |
; GFX8: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>)
; GFX8: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>)
; GFX9-LABEL: name: lshr_v2s16_vv
; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
; GFX9: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>)
; GFX9: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>)
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: [[V_PK_LSHRREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
; GFX9: S_ENDPGM 0, implicit [[V_PK_LSHRREV_B16_]]
; GFX10-LABEL: name: lshr_v2s16_vv
; GFX10: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
; GFX10: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
; GFX10: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>)
; GFX10: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>)
; GFX10: $vcc_hi = IMPLICIT_DEF
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX10: [[V_PK_LSHRREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
; GFX10: S_ENDPGM 0, implicit [[V_PK_LSHRREV_B16_]]
%0:vgpr(<2 x s16>) = COPY $vgpr0
%1:vgpr(<2 x s16>) = COPY $vgpr1
%2:vgpr(<2 x s16>) = G_LSHR %0, %1

View File

@ -1,3 +1,4 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX9 %s
# RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t
@ -6,9 +7,6 @@
# ERR-NOT: remark
# ERR-GFX910: remark: <unknown>:0:0: cannot select: %2:sgpr(<2 x s16>) = G_SHL %0:sgpr, %1:sgpr(<2 x s16>) (in function: shl_v2s16_ss)
# ERR-GFX910-NEXT: remark: <unknown>:0:0: cannot select: %2:vgpr(<2 x s16>) = G_SHL %0:sgpr, %1:vgpr(<2 x s16>) (in function: shl_v2s16_sv)
# ERR-GFX910-NEXT: remark: <unknown>:0:0: cannot select: %2:vgpr(<2 x s16>) = G_SHL %0:vgpr, %1:sgpr(<2 x s16>) (in function: shl_v2s16_vs)
# ERR-GFX910-NEXT: remark: <unknown>:0:0: cannot select: %2:vgpr(<2 x s16>) = G_SHL %0:vgpr, %1:vgpr(<2 x s16>) (in function: shl_v2s16_vv)
# ERR-NOT: remark
---
@ -74,15 +72,16 @@ body: |
; GFX8: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>)
; GFX8: S_ENDPGM 0, implicit [[SHL]](<2 x s16>)
; GFX9-LABEL: name: shl_v2s16_sv
; GFX9: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
; GFX9: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>)
; GFX9: S_ENDPGM 0, implicit [[SHL]](<2 x s16>)
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[V_PK_LSHLREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
; GFX9: S_ENDPGM 0, implicit [[V_PK_LSHLREV_B16_]]
; GFX10-LABEL: name: shl_v2s16_sv
; GFX10: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
; GFX10: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
; GFX10: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>)
; GFX10: S_ENDPGM 0, implicit [[SHL]](<2 x s16>)
; GFX10: $vcc_hi = IMPLICIT_DEF
; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX10: [[V_PK_LSHLREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
; GFX10: S_ENDPGM 0, implicit [[V_PK_LSHLREV_B16_]]
%0:sgpr(<2 x s16>) = COPY $sgpr0
%1:vgpr(<2 x s16>) = COPY $vgpr0
%2:vgpr(<2 x s16>) = G_SHL %0, %1
@ -113,15 +112,16 @@ body: |
; GFX8: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>)
; GFX8: S_ENDPGM 0, implicit [[SHL]](<2 x s16>)
; GFX9-LABEL: name: shl_v2s16_vs
; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
; GFX9: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>)
; GFX9: S_ENDPGM 0, implicit [[SHL]](<2 x s16>)
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX9: [[V_PK_LSHLREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
; GFX9: S_ENDPGM 0, implicit [[V_PK_LSHLREV_B16_]]
; GFX10-LABEL: name: shl_v2s16_vs
; GFX10: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
; GFX10: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
; GFX10: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>)
; GFX10: S_ENDPGM 0, implicit [[SHL]](<2 x s16>)
; GFX10: $vcc_hi = IMPLICIT_DEF
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX10: [[V_PK_LSHLREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
; GFX10: S_ENDPGM 0, implicit [[V_PK_LSHLREV_B16_]]
%0:vgpr(<2 x s16>) = COPY $vgpr0
%1:sgpr(<2 x s16>) = COPY $sgpr0
%2:vgpr(<2 x s16>) = G_SHL %0, %1
@ -152,15 +152,16 @@ body: |
; GFX8: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>)
; GFX8: S_ENDPGM 0, implicit [[SHL]](<2 x s16>)
; GFX9-LABEL: name: shl_v2s16_vv
; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
; GFX9: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>)
; GFX9: S_ENDPGM 0, implicit [[SHL]](<2 x s16>)
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: [[V_PK_LSHLREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
; GFX9: S_ENDPGM 0, implicit [[V_PK_LSHLREV_B16_]]
; GFX10-LABEL: name: shl_v2s16_vv
; GFX10: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
; GFX10: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
; GFX10: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>)
; GFX10: S_ENDPGM 0, implicit [[SHL]](<2 x s16>)
; GFX10: $vcc_hi = IMPLICIT_DEF
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX10: [[V_PK_LSHLREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
; GFX10: S_ENDPGM 0, implicit [[V_PK_LSHLREV_B16_]]
%0:vgpr(<2 x s16>) = COPY $vgpr0
%1:vgpr(<2 x s16>) = COPY $vgpr1
%2:vgpr(<2 x s16>) = G_SHL %0, %1