forked from OSchip/llvm-project
AMDGPU/GlobalISel: Select VOP3P instructions
This only handles the basic cases. More work is needed to make better use of op_sel.
This commit is contained in:
parent
72eef820d5
commit
dfce5fd50a
|
@ -47,6 +47,10 @@ def gi_vop3opselmods0 :
|
|||
GIComplexOperandMatcher<s32, "selectVOP3OpSelMods0">,
|
||||
GIComplexPatternEquiv<VOP3OpSelMods0>;
|
||||
|
||||
def gi_vop3pmods :
|
||||
GIComplexOperandMatcher<s32, "selectVOP3PMods">,
|
||||
GIComplexPatternEquiv<VOP3PMods>;
|
||||
|
||||
def gi_vop3opselmods :
|
||||
GIComplexOperandMatcher<s32, "selectVOP3OpSelMods">,
|
||||
GIComplexPatternEquiv<VOP3OpSelMods>;
|
||||
|
|
|
@ -2463,6 +2463,58 @@ AMDGPUInstructionSelector::selectVOP3NoMods(MachineOperand &Root) const {
|
|||
}};
|
||||
}
|
||||
|
||||
std::pair<Register, unsigned>
|
||||
AMDGPUInstructionSelector::selectVOP3PModsImpl(
|
||||
Register Src, const MachineRegisterInfo &MRI) const {
|
||||
unsigned Mods = 0;
|
||||
MachineInstr *MI = MRI.getVRegDef(Src);
|
||||
|
||||
if (MI && MI->getOpcode() == AMDGPU::G_FNEG) {
|
||||
Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
|
||||
Src = MI->getOperand(1).getReg();
|
||||
MI = MRI.getVRegDef(Src);
|
||||
}
|
||||
|
||||
// TODO: Match op_sel through g_build_vector_trunc and g_shuffle_vector.
|
||||
|
||||
// Packed instructions do not have abs modifiers.
|
||||
Mods |= SISrcMods::OP_SEL_1;
|
||||
|
||||
return std::make_pair(Src, Mods);
|
||||
}
|
||||
|
||||
InstructionSelector::ComplexRendererFns
|
||||
AMDGPUInstructionSelector::selectVOP3PMods(MachineOperand &Root) const {
|
||||
MachineRegisterInfo &MRI
|
||||
= Root.getParent()->getParent()->getParent()->getRegInfo();
|
||||
|
||||
Register Src;
|
||||
unsigned Mods;
|
||||
std::tie(Src, Mods) = selectVOP3PModsImpl(Root.getReg(), MRI);
|
||||
|
||||
return {{
|
||||
[=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
|
||||
[=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
|
||||
}};
|
||||
}
|
||||
|
||||
InstructionSelector::ComplexRendererFns
|
||||
AMDGPUInstructionSelector::selectVOP3PMods0(MachineOperand &Root) const {
|
||||
MachineRegisterInfo &MRI
|
||||
= Root.getParent()->getParent()->getParent()->getRegInfo();
|
||||
|
||||
Register Src;
|
||||
unsigned Mods;
|
||||
std::tie(Src, Mods) = selectVOP3PModsImpl(Root.getReg(), MRI);
|
||||
|
||||
return {{
|
||||
[=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
|
||||
[=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } , // src_mods
|
||||
// FIXME: Handle clamp and op_sel
|
||||
[=](MachineInstrBuilder &MIB) { MIB.addImm(0); }
|
||||
}};
|
||||
}
|
||||
|
||||
InstructionSelector::ComplexRendererFns
|
||||
AMDGPUInstructionSelector::selectVOP3Mods_nnan(MachineOperand &Root) const {
|
||||
Register Src;
|
||||
|
|
|
@ -148,6 +148,15 @@ private:
|
|||
InstructionSelector::ComplexRendererFns
|
||||
selectVOP3Mods_nnan(MachineOperand &Root) const;
|
||||
|
||||
std::pair<Register, unsigned>
|
||||
selectVOP3PModsImpl(Register Src, const MachineRegisterInfo &MRI) const;
|
||||
|
||||
InstructionSelector::ComplexRendererFns
|
||||
selectVOP3PMods(MachineOperand &Root) const;
|
||||
|
||||
InstructionSelector::ComplexRendererFns
|
||||
selectVOP3PMods0(MachineOperand &Root) const;
|
||||
|
||||
InstructionSelector::ComplexRendererFns
|
||||
selectVOP3OpSelMods0(MachineOperand &Root) const;
|
||||
InstructionSelector::ComplexRendererFns
|
||||
|
|
|
@ -0,0 +1,542 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
|
||||
|
||||
define <2 x half> @v_fmul_v2f16(<2 x half> %a, <2 x half> %b) {
|
||||
; GFX9-LABEL: v_fmul_v2f16:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v1
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_fmul_v2f16:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_mul_f16_e32 v2, v0, v1
|
||||
; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; GFX8-NEXT: v_mov_b32_e32 v1, 16
|
||||
; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
||||
; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%mul = fmul <2 x half> %a, %b
|
||||
ret <2 x half> %mul
|
||||
}
|
||||
|
||||
define <2 x half> @v_fmul_v2f16_fneg_lhs(<2 x half> %a, <2 x half> %b) {
|
||||
; GFX9-LABEL: v_fmul_v2f16_fneg_lhs:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_fmul_v2f16_fneg_lhs:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
|
||||
; GFX8-NEXT: v_mul_f16_e32 v2, v0, v1
|
||||
; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; GFX8-NEXT: v_mov_b32_e32 v1, 16
|
||||
; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
||||
; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%neg.a = fneg <2 x half> %a
|
||||
%mul = fmul <2 x half> %neg.a, %b
|
||||
ret <2 x half> %mul
|
||||
}
|
||||
|
||||
define <2 x half> @v_fmul_v2f16_fneg_rhs(<2 x half> %a, <2 x half> %b) {
|
||||
; GFX9-LABEL: v_fmul_v2f16_fneg_rhs:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_fmul_v2f16_fneg_rhs:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
|
||||
; GFX8-NEXT: v_mul_f16_e32 v2, v0, v1
|
||||
; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; GFX8-NEXT: v_mov_b32_e32 v1, 16
|
||||
; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
||||
; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%neg.b = fneg <2 x half> %b
|
||||
%mul = fmul <2 x half> %a, %neg.b
|
||||
ret <2 x half> %mul
|
||||
}
|
||||
|
||||
define <2 x half> @v_fmul_v2f16_fneg_lhs_fneg_rhs(<2 x half> %a, <2 x half> %b) {
|
||||
; GFX9-LABEL: v_fmul_v2f16_fneg_lhs_fneg_rhs:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v1 neg_lo:[1,1] neg_hi:[1,1]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_fmul_v2f16_fneg_lhs_fneg_rhs:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: s_mov_b32 s4, 0x80008000
|
||||
; GFX8-NEXT: v_xor_b32_e32 v0, s4, v0
|
||||
; GFX8-NEXT: v_xor_b32_e32 v1, s4, v1
|
||||
; GFX8-NEXT: v_mul_f16_e32 v2, v0, v1
|
||||
; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; GFX8-NEXT: v_mov_b32_e32 v1, 16
|
||||
; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
||||
; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%neg.a = fneg <2 x half> %a
|
||||
%neg.b = fneg <2 x half> %b
|
||||
%mul = fmul <2 x half> %neg.a, %neg.b
|
||||
ret <2 x half> %mul
|
||||
}
|
||||
|
||||
; FIXME
|
||||
; define <3 x half> @v_fmul_v3f16(<3 x half> %a, <3 x half> %b) {
|
||||
; %mul = fmul <3 x half> %a, %b
|
||||
; ret <3 x half> %mul
|
||||
; }
|
||||
|
||||
; define <3 x half> @v_fmul_v3f16_fneg_lhs(<3 x half> %a, <3 x half> %b) {
|
||||
; %neg.a = fneg <3 x half> %a
|
||||
; %mul = fmul <3 x half> %neg.a, %b
|
||||
; ret <3 x half> %mul
|
||||
; }
|
||||
|
||||
; define <3 x half> @v_fmul_v3f16_fneg_rhs(<3 x half> %a, <3 x half> %b) {
|
||||
; %neg.b = fneg <3 x half> %b
|
||||
; %mul = fmul <3 x half> %a, %neg.b
|
||||
; ret <3 x half> %mul
|
||||
; }
|
||||
|
||||
; define <3 x half> @v_fmul_v3f16_fneg_lhs_fneg_rhs(<3 x half> %a, <3 x half> %b) {
|
||||
; %neg.a = fneg <3 x half> %a
|
||||
; %neg.b = fneg <3 x half> %b
|
||||
; %mul = fmul <3 x half> %neg.a, %neg.b
|
||||
; ret <3 x half> %mul
|
||||
; }
|
||||
|
||||
define <4 x half> @v_fmul_v4f16(<4 x half> %a, <4 x half> %b) {
|
||||
; GFX9-LABEL: v_fmul_v4f16:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2
|
||||
; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_fmul_v4f16:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_mul_f16_e32 v4, v0, v2
|
||||
; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; GFX8-NEXT: v_mul_f16_e32 v2, v1, v3
|
||||
; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; GFX8-NEXT: v_mov_b32_e32 v3, 16
|
||||
; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
||||
; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
||||
; GFX8-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX8-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%mul = fmul <4 x half> %a, %b
|
||||
ret <4 x half> %mul
|
||||
}
|
||||
|
||||
define <4 x half> @v_fmul_v4f16_fneg_lhs(<4 x half> %a, <4 x half> %b) {
|
||||
; GFX9-LABEL: v_fmul_v4f16_fneg_lhs:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[1,0] neg_hi:[1,0]
|
||||
; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[1,0] neg_hi:[1,0]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_fmul_v4f16_fneg_lhs:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: s_mov_b32 s4, 0x80008000
|
||||
; GFX8-NEXT: v_xor_b32_e32 v0, s4, v0
|
||||
; GFX8-NEXT: v_xor_b32_e32 v1, s4, v1
|
||||
; GFX8-NEXT: v_mul_f16_e32 v4, v0, v2
|
||||
; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; GFX8-NEXT: v_mul_f16_e32 v2, v1, v3
|
||||
; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; GFX8-NEXT: v_mov_b32_e32 v3, 16
|
||||
; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
||||
; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
||||
; GFX8-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX8-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%neg.a = fneg <4 x half> %a
|
||||
%mul = fmul <4 x half> %neg.a, %b
|
||||
ret <4 x half> %mul
|
||||
}
|
||||
|
||||
define <4 x half> @v_fmul_v4f16_fneg_rhs(<4 x half> %a, <4 x half> %b) {
|
||||
; GFX9-LABEL: v_fmul_v4f16_fneg_rhs:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1]
|
||||
; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_fmul_v4f16_fneg_rhs:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: s_mov_b32 s4, 0x80008000
|
||||
; GFX8-NEXT: v_xor_b32_e32 v2, s4, v2
|
||||
; GFX8-NEXT: v_xor_b32_e32 v3, s4, v3
|
||||
; GFX8-NEXT: v_mul_f16_e32 v4, v0, v2
|
||||
; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; GFX8-NEXT: v_mul_f16_e32 v2, v1, v3
|
||||
; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; GFX8-NEXT: v_mov_b32_e32 v3, 16
|
||||
; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
||||
; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
||||
; GFX8-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX8-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%neg.b = fneg <4 x half> %b
|
||||
%mul = fmul <4 x half> %a, %neg.b
|
||||
ret <4 x half> %mul
|
||||
}
|
||||
|
||||
define <4 x half> @v_fmul_v4f16_fneg_lhs_fneg_rhs(<4 x half> %a, <4 x half> %b) {
|
||||
; GFX9-LABEL: v_fmul_v4f16_fneg_lhs_fneg_rhs:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[1,1] neg_hi:[1,1]
|
||||
; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[1,1] neg_hi:[1,1]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_fmul_v4f16_fneg_lhs_fneg_rhs:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: s_mov_b32 s4, 0x80008000
|
||||
; GFX8-NEXT: v_xor_b32_e32 v0, s4, v0
|
||||
; GFX8-NEXT: v_xor_b32_e32 v2, s4, v2
|
||||
; GFX8-NEXT: v_xor_b32_e32 v1, s4, v1
|
||||
; GFX8-NEXT: v_xor_b32_e32 v3, s4, v3
|
||||
; GFX8-NEXT: v_mul_f16_e32 v4, v0, v2
|
||||
; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; GFX8-NEXT: v_mul_f16_e32 v2, v1, v3
|
||||
; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; GFX8-NEXT: v_mov_b32_e32 v3, 16
|
||||
; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
||||
; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
||||
; GFX8-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX8-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%neg.a = fneg <4 x half> %a
|
||||
%neg.b = fneg <4 x half> %b
|
||||
%mul = fmul <4 x half> %neg.a, %neg.b
|
||||
ret <4 x half> %mul
|
||||
}
|
||||
|
||||
define <6 x half> @v_fmul_v6f16(<6 x half> %a, <6 x half> %b) {
|
||||
; GFX9-LABEL: v_fmul_v6f16:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v3
|
||||
; GFX9-NEXT: v_pk_mul_f16 v1, v1, v4
|
||||
; GFX9-NEXT: v_pk_mul_f16 v2, v2, v5
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_fmul_v6f16:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_mul_f16_e32 v6, v0, v3
|
||||
; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; GFX8-NEXT: v_mul_f16_e32 v3, v1, v4
|
||||
; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; GFX8-NEXT: v_mul_f16_e32 v4, v2, v5
|
||||
; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; GFX8-NEXT: v_mov_b32_e32 v5, 16
|
||||
; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
||||
; GFX8-NEXT: v_or_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX8-NEXT: v_mov_b32_e32 v3, 16
|
||||
; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
||||
; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
||||
; GFX8-NEXT: v_or_b32_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX8-NEXT: v_or_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%mul = fmul <6 x half> %a, %b
|
||||
ret <6 x half> %mul
|
||||
}
|
||||
|
||||
define <6 x half> @v_fmul_v6f16_fneg_lhs(<6 x half> %a, <6 x half> %b) {
|
||||
; GFX9-LABEL: v_fmul_v6f16_fneg_lhs:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v3 neg_lo:[1,0] neg_hi:[1,0]
|
||||
; GFX9-NEXT: v_pk_mul_f16 v1, v1, v4 neg_lo:[1,0] neg_hi:[1,0]
|
||||
; GFX9-NEXT: v_pk_mul_f16 v2, v2, v5 neg_lo:[1,0] neg_hi:[1,0]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_fmul_v6f16_fneg_lhs:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: s_mov_b32 s4, 0x80008000
|
||||
; GFX8-NEXT: v_xor_b32_e32 v0, s4, v0
|
||||
; GFX8-NEXT: v_xor_b32_e32 v1, s4, v1
|
||||
; GFX8-NEXT: v_xor_b32_e32 v2, s4, v2
|
||||
; GFX8-NEXT: v_mul_f16_e32 v6, v0, v3
|
||||
; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; GFX8-NEXT: v_mul_f16_e32 v3, v1, v4
|
||||
; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; GFX8-NEXT: v_mul_f16_e32 v4, v2, v5
|
||||
; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; GFX8-NEXT: v_mov_b32_e32 v5, 16
|
||||
; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
||||
; GFX8-NEXT: v_or_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX8-NEXT: v_mov_b32_e32 v3, 16
|
||||
; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
||||
; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
||||
; GFX8-NEXT: v_or_b32_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX8-NEXT: v_or_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%neg.a = fneg <6 x half> %a
|
||||
%mul = fmul <6 x half> %neg.a, %b
|
||||
ret <6 x half> %mul
|
||||
}
|
||||
|
||||
define <6 x half> @v_fmul_v6f16_fneg_rhs(<6 x half> %a, <6 x half> %b) {
|
||||
; GFX9-LABEL: v_fmul_v6f16_fneg_rhs:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v3 neg_lo:[0,1] neg_hi:[0,1]
|
||||
; GFX9-NEXT: v_pk_mul_f16 v1, v1, v4 neg_lo:[0,1] neg_hi:[0,1]
|
||||
; GFX9-NEXT: v_pk_mul_f16 v2, v2, v5 neg_lo:[0,1] neg_hi:[0,1]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_fmul_v6f16_fneg_rhs:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: s_mov_b32 s4, 0x80008000
|
||||
; GFX8-NEXT: v_xor_b32_e32 v3, s4, v3
|
||||
; GFX8-NEXT: v_xor_b32_e32 v4, s4, v4
|
||||
; GFX8-NEXT: v_xor_b32_e32 v5, s4, v5
|
||||
; GFX8-NEXT: v_mul_f16_e32 v6, v0, v3
|
||||
; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; GFX8-NEXT: v_mul_f16_e32 v3, v1, v4
|
||||
; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; GFX8-NEXT: v_mul_f16_e32 v4, v2, v5
|
||||
; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; GFX8-NEXT: v_mov_b32_e32 v5, 16
|
||||
; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
||||
; GFX8-NEXT: v_or_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX8-NEXT: v_mov_b32_e32 v3, 16
|
||||
; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
||||
; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
||||
; GFX8-NEXT: v_or_b32_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX8-NEXT: v_or_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%neg.b = fneg <6 x half> %b
|
||||
%mul = fmul <6 x half> %a, %neg.b
|
||||
ret <6 x half> %mul
|
||||
}
|
||||
|
||||
define <6 x half> @v_fmul_v6f16_fneg_lhs_fneg_rhs(<6 x half> %a, <6 x half> %b) {
|
||||
; GFX9-LABEL: v_fmul_v6f16_fneg_lhs_fneg_rhs:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v3 neg_lo:[1,1] neg_hi:[1,1]
|
||||
; GFX9-NEXT: v_pk_mul_f16 v1, v1, v4 neg_lo:[1,1] neg_hi:[1,1]
|
||||
; GFX9-NEXT: v_pk_mul_f16 v2, v2, v5 neg_lo:[1,1] neg_hi:[1,1]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_fmul_v6f16_fneg_lhs_fneg_rhs:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: s_mov_b32 s4, 0x80008000
|
||||
; GFX8-NEXT: v_xor_b32_e32 v0, s4, v0
|
||||
; GFX8-NEXT: v_xor_b32_e32 v3, s4, v3
|
||||
; GFX8-NEXT: v_xor_b32_e32 v1, s4, v1
|
||||
; GFX8-NEXT: v_xor_b32_e32 v2, s4, v2
|
||||
; GFX8-NEXT: v_xor_b32_e32 v4, s4, v4
|
||||
; GFX8-NEXT: v_xor_b32_e32 v5, s4, v5
|
||||
; GFX8-NEXT: v_mul_f16_e32 v6, v0, v3
|
||||
; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; GFX8-NEXT: v_mul_f16_e32 v3, v1, v4
|
||||
; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; GFX8-NEXT: v_mul_f16_e32 v4, v2, v5
|
||||
; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; GFX8-NEXT: v_mov_b32_e32 v5, 16
|
||||
; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
||||
; GFX8-NEXT: v_or_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX8-NEXT: v_mov_b32_e32 v3, 16
|
||||
; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
||||
; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
||||
; GFX8-NEXT: v_or_b32_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX8-NEXT: v_or_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%neg.a = fneg <6 x half> %a
|
||||
%neg.b = fneg <6 x half> %b
|
||||
%mul = fmul <6 x half> %neg.a, %neg.b
|
||||
ret <6 x half> %mul
|
||||
}
|
||||
|
||||
define <8 x half> @v_fmul_v8f16(<8 x half> %a, <8 x half> %b) {
|
||||
; GFX9-LABEL: v_fmul_v8f16:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v4
|
||||
; GFX9-NEXT: v_pk_mul_f16 v1, v1, v5
|
||||
; GFX9-NEXT: v_pk_mul_f16 v2, v2, v6
|
||||
; GFX9-NEXT: v_pk_mul_f16 v3, v3, v7
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_fmul_v8f16:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_mul_f16_e32 v8, v0, v4
|
||||
; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; GFX8-NEXT: v_mul_f16_e32 v4, v1, v5
|
||||
; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; GFX8-NEXT: v_mul_f16_e32 v5, v2, v6
|
||||
; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; GFX8-NEXT: v_mul_f16_e32 v6, v3, v7
|
||||
; GFX8-NEXT: v_mul_f16_sdwa v3, v3, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; GFX8-NEXT: v_mov_b32_e32 v7, 16
|
||||
; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
||||
; GFX8-NEXT: v_mov_b32_e32 v7, 16
|
||||
; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
||||
; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
||||
; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v7, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
||||
; GFX8-NEXT: v_or_b32_sdwa v0, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX8-NEXT: v_or_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX8-NEXT: v_or_b32_sdwa v2, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX8-NEXT: v_or_b32_sdwa v3, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%mul = fmul <8 x half> %a, %b
|
||||
ret <8 x half> %mul
|
||||
}
|
||||
|
||||
define <8 x half> @v_fmul_v8f16_fneg_lhs(<8 x half> %a, <8 x half> %b) {
|
||||
; GFX9-LABEL: v_fmul_v8f16_fneg_lhs:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v4 neg_lo:[1,0] neg_hi:[1,0]
|
||||
; GFX9-NEXT: v_pk_mul_f16 v1, v1, v5 neg_lo:[1,0] neg_hi:[1,0]
|
||||
; GFX9-NEXT: v_pk_mul_f16 v2, v2, v6 neg_lo:[1,0] neg_hi:[1,0]
|
||||
; GFX9-NEXT: v_pk_mul_f16 v3, v3, v7 neg_lo:[1,0] neg_hi:[1,0]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_fmul_v8f16_fneg_lhs:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: s_mov_b32 s4, 0x80008000
|
||||
; GFX8-NEXT: v_xor_b32_e32 v0, s4, v0
|
||||
; GFX8-NEXT: v_xor_b32_e32 v1, s4, v1
|
||||
; GFX8-NEXT: v_xor_b32_e32 v2, s4, v2
|
||||
; GFX8-NEXT: v_xor_b32_e32 v3, s4, v3
|
||||
; GFX8-NEXT: v_mul_f16_e32 v8, v0, v4
|
||||
; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; GFX8-NEXT: v_mul_f16_e32 v4, v1, v5
|
||||
; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; GFX8-NEXT: v_mul_f16_e32 v5, v2, v6
|
||||
; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; GFX8-NEXT: v_mul_f16_e32 v6, v3, v7
|
||||
; GFX8-NEXT: v_mul_f16_sdwa v3, v3, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; GFX8-NEXT: v_mov_b32_e32 v7, 16
|
||||
; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
||||
; GFX8-NEXT: v_mov_b32_e32 v7, 16
|
||||
; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
||||
; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
||||
; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v7, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
||||
; GFX8-NEXT: v_or_b32_sdwa v0, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX8-NEXT: v_or_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX8-NEXT: v_or_b32_sdwa v2, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX8-NEXT: v_or_b32_sdwa v3, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%neg.a = fneg <8 x half> %a
|
||||
%mul = fmul <8 x half> %neg.a, %b
|
||||
ret <8 x half> %mul
|
||||
}
|
||||
|
||||
define <8 x half> @v_fmul_v8f16_fneg_rhs(<8 x half> %a, <8 x half> %b) {
|
||||
; GFX9-LABEL: v_fmul_v8f16_fneg_rhs:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v4 neg_lo:[0,1] neg_hi:[0,1]
|
||||
; GFX9-NEXT: v_pk_mul_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
|
||||
; GFX9-NEXT: v_pk_mul_f16 v2, v2, v6 neg_lo:[0,1] neg_hi:[0,1]
|
||||
; GFX9-NEXT: v_pk_mul_f16 v3, v3, v7 neg_lo:[0,1] neg_hi:[0,1]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_fmul_v8f16_fneg_rhs:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: s_mov_b32 s4, 0x80008000
|
||||
; GFX8-NEXT: v_xor_b32_e32 v4, s4, v4
|
||||
; GFX8-NEXT: v_xor_b32_e32 v5, s4, v5
|
||||
; GFX8-NEXT: v_xor_b32_e32 v6, s4, v6
|
||||
; GFX8-NEXT: v_xor_b32_e32 v7, s4, v7
|
||||
; GFX8-NEXT: v_mul_f16_e32 v8, v0, v4
|
||||
; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; GFX8-NEXT: v_mul_f16_e32 v4, v1, v5
|
||||
; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; GFX8-NEXT: v_mul_f16_e32 v5, v2, v6
|
||||
; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; GFX8-NEXT: v_mul_f16_e32 v6, v3, v7
|
||||
; GFX8-NEXT: v_mul_f16_sdwa v3, v3, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; GFX8-NEXT: v_mov_b32_e32 v7, 16
|
||||
; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
||||
; GFX8-NEXT: v_mov_b32_e32 v7, 16
|
||||
; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
||||
; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
||||
; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v7, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
||||
; GFX8-NEXT: v_or_b32_sdwa v0, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX8-NEXT: v_or_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX8-NEXT: v_or_b32_sdwa v2, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX8-NEXT: v_or_b32_sdwa v3, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%neg.b = fneg <8 x half> %b
|
||||
%mul = fmul <8 x half> %a, %neg.b
|
||||
ret <8 x half> %mul
|
||||
}
|
||||
|
||||
define <8 x half> @v_fmul_v8f16_fneg_lhs_fneg_rhs(<8 x half> %a, <8 x half> %b) {
|
||||
; GFX9-LABEL: v_fmul_v8f16_fneg_lhs_fneg_rhs:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v4 neg_lo:[1,1] neg_hi:[1,1]
|
||||
; GFX9-NEXT: v_pk_mul_f16 v1, v1, v5 neg_lo:[1,1] neg_hi:[1,1]
|
||||
; GFX9-NEXT: v_pk_mul_f16 v2, v2, v6 neg_lo:[1,1] neg_hi:[1,1]
|
||||
; GFX9-NEXT: v_pk_mul_f16 v3, v3, v7 neg_lo:[1,1] neg_hi:[1,1]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_fmul_v8f16_fneg_lhs_fneg_rhs:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: s_mov_b32 s4, 0x80008000
|
||||
; GFX8-NEXT: v_xor_b32_e32 v0, s4, v0
|
||||
; GFX8-NEXT: v_xor_b32_e32 v4, s4, v4
|
||||
; GFX8-NEXT: v_xor_b32_e32 v1, s4, v1
|
||||
; GFX8-NEXT: v_xor_b32_e32 v2, s4, v2
|
||||
; GFX8-NEXT: v_xor_b32_e32 v3, s4, v3
|
||||
; GFX8-NEXT: v_xor_b32_e32 v5, s4, v5
|
||||
; GFX8-NEXT: v_xor_b32_e32 v6, s4, v6
|
||||
; GFX8-NEXT: v_xor_b32_e32 v7, s4, v7
|
||||
; GFX8-NEXT: v_mul_f16_e32 v8, v0, v4
|
||||
; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; GFX8-NEXT: v_mul_f16_e32 v4, v1, v5
|
||||
; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; GFX8-NEXT: v_mul_f16_e32 v5, v2, v6
|
||||
; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; GFX8-NEXT: v_mul_f16_e32 v6, v3, v7
|
||||
; GFX8-NEXT: v_mul_f16_sdwa v3, v3, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; GFX8-NEXT: v_mov_b32_e32 v7, 16
|
||||
; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
||||
; GFX8-NEXT: v_mov_b32_e32 v7, 16
|
||||
; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
||||
; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
||||
; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v7, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
||||
; GFX8-NEXT: v_or_b32_sdwa v0, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX8-NEXT: v_or_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX8-NEXT: v_or_b32_sdwa v2, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX8-NEXT: v_or_b32_sdwa v3, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%neg.a = fneg <8 x half> %a
|
||||
%neg.b = fneg <8 x half> %b
|
||||
%mul = fmul <8 x half> %neg.a, %neg.b
|
||||
ret <8 x half> %mul
|
||||
}
|
|
@ -7,9 +7,6 @@
|
|||
|
||||
# ERR-NOT: remark
|
||||
# ERR-GFX910: remark: <unknown>:0:0: cannot select: %2:sgpr(<2 x s16>) = G_ASHR %0:sgpr, %1:sgpr(<2 x s16>) (in function: ashr_v2s16_ss)
|
||||
# ERR-GFX910-NEXT: remark: <unknown>:0:0: cannot select: %2:vgpr(<2 x s16>) = G_ASHR %0:sgpr, %1:vgpr(<2 x s16>) (in function: ashr_v2s16_sv)
|
||||
# ERR-GFX910-NEXT: remark: <unknown>:0:0: cannot select: %2:vgpr(<2 x s16>) = G_ASHR %0:vgpr, %1:sgpr(<2 x s16>) (in function: ashr_v2s16_vs)
|
||||
# ERR-GFX910-NEXT: remark: <unknown>:0:0: cannot select: %2:vgpr(<2 x s16>) = G_ASHR %0:vgpr, %1:vgpr(<2 x s16>) (in function: ashr_v2s16_vv)
|
||||
# ERR-NOT: remark
|
||||
|
||||
---
|
||||
|
@ -75,15 +72,16 @@ body: |
|
|||
; GFX8: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>)
|
||||
; GFX8: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>)
|
||||
; GFX9-LABEL: name: ashr_v2s16_sv
|
||||
; GFX9: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
; GFX9: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>)
|
||||
; GFX9: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>)
|
||||
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[V_PK_ASHRREV_I16_:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
|
||||
; GFX9: S_ENDPGM 0, implicit [[V_PK_ASHRREV_I16_]]
|
||||
; GFX10-LABEL: name: ashr_v2s16_sv
|
||||
; GFX10: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
; GFX10: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>)
|
||||
; GFX10: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>)
|
||||
; GFX10: $vcc_hi = IMPLICIT_DEF
|
||||
; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX10: [[V_PK_ASHRREV_I16_:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
|
||||
; GFX10: S_ENDPGM 0, implicit [[V_PK_ASHRREV_I16_]]
|
||||
%0:sgpr(<2 x s16>) = COPY $sgpr0
|
||||
%1:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
%2:vgpr(<2 x s16>) = G_ASHR %0, %1
|
||||
|
@ -114,15 +112,16 @@ body: |
|
|||
; GFX8: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>)
|
||||
; GFX8: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>)
|
||||
; GFX9-LABEL: name: ashr_v2s16_vs
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
|
||||
; GFX9: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>)
|
||||
; GFX9: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>)
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||
; GFX9: [[V_PK_ASHRREV_I16_:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
|
||||
; GFX9: S_ENDPGM 0, implicit [[V_PK_ASHRREV_I16_]]
|
||||
; GFX10-LABEL: name: ashr_v2s16_vs
|
||||
; GFX10: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
; GFX10: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
|
||||
; GFX10: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>)
|
||||
; GFX10: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>)
|
||||
; GFX10: $vcc_hi = IMPLICIT_DEF
|
||||
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||
; GFX10: [[V_PK_ASHRREV_I16_:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
|
||||
; GFX10: S_ENDPGM 0, implicit [[V_PK_ASHRREV_I16_]]
|
||||
%0:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
%1:sgpr(<2 x s16>) = COPY $sgpr0
|
||||
%2:vgpr(<2 x s16>) = G_ASHR %0, %1
|
||||
|
@ -153,15 +152,16 @@ body: |
|
|||
; GFX8: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>)
|
||||
; GFX8: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>)
|
||||
; GFX9-LABEL: name: ashr_v2s16_vv
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
|
||||
; GFX9: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>)
|
||||
; GFX9: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>)
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX9: [[V_PK_ASHRREV_I16_:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
|
||||
; GFX9: S_ENDPGM 0, implicit [[V_PK_ASHRREV_I16_]]
|
||||
; GFX10-LABEL: name: ashr_v2s16_vv
|
||||
; GFX10: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
|
||||
; GFX10: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>)
|
||||
; GFX10: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>)
|
||||
; GFX10: $vcc_hi = IMPLICIT_DEF
|
||||
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX10: [[V_PK_ASHRREV_I16_:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
|
||||
; GFX10: S_ENDPGM 0, implicit [[V_PK_ASHRREV_I16_]]
|
||||
%0:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
%1:vgpr(<2 x s16>) = COPY $vgpr1
|
||||
%2:vgpr(<2 x s16>) = G_ASHR %0, %1
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=2 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
---
|
||||
|
||||
|
@ -108,9 +108,9 @@ body: |
|
|||
liveins: $vgpr0
|
||||
|
||||
; GFX9-LABEL: name: fcanonicalize_v2f16_denorm
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
; GFX9: [[FCANONICALIZE:%[0-9]+]]:vgpr(<2 x s16>) = G_FCANONICALIZE [[COPY]]
|
||||
; GFX9: S_ENDPGM 0, implicit [[FCANONICALIZE]](<2 x s16>)
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
|
||||
; GFX9: S_ENDPGM 0, implicit [[V_PK_MAX_F16_]]
|
||||
%0:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
%1:vgpr(<2 x s16>) = G_FCANONICALIZE %0
|
||||
S_ENDPGM 0, implicit %1
|
||||
|
@ -131,9 +131,9 @@ body: |
|
|||
liveins: $vgpr0
|
||||
|
||||
; GFX9-LABEL: name: fcanonicalize_v2f16_flush
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
; GFX9: [[FCANONICALIZE:%[0-9]+]]:vgpr(<2 x s16>) = G_FCANONICALIZE [[COPY]]
|
||||
; GFX9: S_ENDPGM 0, implicit [[FCANONICALIZE]](<2 x s16>)
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[V_PK_MUL_F16_:%[0-9]+]]:vgpr_32 = V_PK_MUL_F16 0, 15360, 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
|
||||
; GFX9: S_ENDPGM 0, implicit [[V_PK_MUL_F16_]]
|
||||
%0:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
%1:vgpr(<2 x s16>) = G_FCANONICALIZE %0
|
||||
S_ENDPGM 0, implicit %1
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
---
|
||||
name: fmaxnum_ieee_v2f16_vv
|
||||
|
@ -10,10 +11,10 @@ body: |
|
|||
liveins: $sgpr0, $sgpr1
|
||||
|
||||
; GFX9-LABEL: name: fmaxnum_ieee_v2f16_vv
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
|
||||
; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(<2 x s16>) = G_FMAXNUM_IEEE [[COPY]], [[COPY1]]
|
||||
; GFX9: S_ENDPGM 0, implicit [[FMAXNUM_IEEE]](<2 x s16>)
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX9: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = V_PK_MAX_F16 8, [[COPY]], 8, [[COPY1]], 0, 0, 0, 0, 0, implicit $exec
|
||||
; GFX9: S_ENDPGM 0, implicit [[V_PK_MAX_F16_]]
|
||||
%0:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
%1:vgpr(<2 x s16>) = COPY $vgpr1
|
||||
%2:vgpr(<2 x s16>) = G_FMAXNUM_IEEE %0, %1
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# FIXME: Ideally this would fail to select with ieee mode enabled.
|
||||
|
||||
---
|
||||
|
@ -11,10 +12,10 @@ body: |
|
|||
liveins: $sgpr0, $sgpr1
|
||||
|
||||
; GFX9-LABEL: name: fmaxnum_v2f16_vv
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
|
||||
; GFX9: [[FMAXNUM:%[0-9]+]]:vgpr(<2 x s16>) = G_FMAXNUM [[COPY]], [[COPY1]]
|
||||
; GFX9: S_ENDPGM 0, implicit [[FMAXNUM]](<2 x s16>)
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX9: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = V_PK_MAX_F16 8, [[COPY]], 8, [[COPY1]], 0, 0, 0, 0, 0, implicit $exec
|
||||
; GFX9: S_ENDPGM 0, implicit [[V_PK_MAX_F16_]]
|
||||
%0:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
%1:vgpr(<2 x s16>) = COPY $vgpr1
|
||||
%2:vgpr(<2 x s16>) = G_FMAXNUM %0, %1
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
---
|
||||
name: fminnum_ieee_v2f16_vv
|
||||
|
@ -11,10 +11,10 @@ body: |
|
|||
liveins: $sgpr0, $sgpr1
|
||||
|
||||
; GFX9-LABEL: name: fminnum_ieee_v2f16_vv
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
|
||||
; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(<2 x s16>) = G_FMINNUM_IEEE [[COPY]], [[COPY1]]
|
||||
; GFX9: S_ENDPGM 0, implicit [[FMINNUM_IEEE]](<2 x s16>)
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX9: [[V_PK_MIN_F16_:%[0-9]+]]:vgpr_32 = V_PK_MIN_F16 8, [[COPY]], 8, [[COPY1]], 0, 0, 0, 0, 0, implicit $exec
|
||||
; GFX9: S_ENDPGM 0, implicit [[V_PK_MIN_F16_]]
|
||||
%0:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
%1:vgpr(<2 x s16>) = COPY $vgpr1
|
||||
%2:vgpr(<2 x s16>) = G_FMINNUM_IEEE %0, %1
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
---
|
||||
name: fminnum_v2f16_vv
|
||||
|
@ -11,10 +11,10 @@ body: |
|
|||
liveins: $sgpr0, $sgpr1
|
||||
|
||||
; GFX9-LABEL: name: fminnum_v2f16_vv
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
|
||||
; GFX9: [[FMINNUM:%[0-9]+]]:vgpr(<2 x s16>) = G_FMINNUM [[COPY]], [[COPY1]]
|
||||
; GFX9: S_ENDPGM 0, implicit [[FMINNUM]](<2 x s16>)
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX9: [[V_PK_MIN_F16_:%[0-9]+]]:vgpr_32 = V_PK_MIN_F16 8, [[COPY]], 8, [[COPY1]], 0, 0, 0, 0, 0, implicit $exec
|
||||
; GFX9: S_ENDPGM 0, implicit [[V_PK_MIN_F16_]]
|
||||
%0:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
%1:vgpr(<2 x s16>) = COPY $vgpr1
|
||||
%2:vgpr(<2 x s16>) = G_FMINNUM %0, %1
|
||||
|
|
|
@ -0,0 +1,74 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
---
|
||||
name: fmul_v2f16_vv
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1
|
||||
|
||||
; GFX9-LABEL: name: fmul_v2f16_vv
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX9: [[V_PK_MUL_F16_:%[0-9]+]]:vgpr_32 = V_PK_MUL_F16 8, [[COPY]], 8, [[COPY1]], 0, 0, 0, 0, 0, implicit $exec
|
||||
; GFX9: S_ENDPGM 0, implicit [[V_PK_MUL_F16_]]
|
||||
%0:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
%1:vgpr(<2 x s16>) = COPY $vgpr1
|
||||
%2:vgpr(<2 x s16>) = G_FMUL %0, %1
|
||||
S_ENDPGM 0, implicit %2
|
||||
...
|
||||
|
||||
---
|
||||
name: fmul_v2f16_fneg_v_fneg_v
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1
|
||||
|
||||
; GFX9-LABEL: name: fmul_v2f16_fneg_v_fneg_v
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX9: [[V_PK_MUL_F16_:%[0-9]+]]:vgpr_32 = V_PK_MUL_F16 11, [[COPY]], 11, [[COPY1]], 0, 0, 0, 0, 0, implicit $exec
|
||||
; GFX9: S_ENDPGM 0, implicit [[V_PK_MUL_F16_]]
|
||||
%0:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
%1:vgpr(<2 x s16>) = COPY $vgpr1
|
||||
%2:vgpr(<2 x s16>) = G_FNEG %0
|
||||
%3:vgpr(<2 x s16>) = G_FNEG %1
|
||||
%4:vgpr(<2 x s16>) = G_FMUL %2, %3
|
||||
S_ENDPGM 0, implicit %4
|
||||
...
|
||||
|
||||
---
|
||||
name: fmul_v2f16_fneg_lo_v_v
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1, $vgpr2
|
||||
|
||||
; GFX9-LABEL: name: fmul_v2f16_fneg_lo_v_v
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32(<2 x s16>) = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||
; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
|
||||
; GFX9: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
|
||||
; GFX9: [[FNEG:%[0-9]+]]:vgpr(s16) = G_FNEG [[TRUNC]]
|
||||
; GFX9: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[FNEG]](s16)
|
||||
; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:vgpr_32(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[COPY2]](s32)
|
||||
; GFX9: [[V_PK_MUL_F16_:%[0-9]+]]:vgpr_32(<2 x s16>) = V_PK_MUL_F16 8, [[BUILD_VECTOR_TRUNC]](<2 x s16>), 8, [[COPY]](<2 x s16>), 0, 0, 0, 0, 0, implicit $exec
|
||||
; GFX9: S_ENDPGM 0, implicit [[V_PK_MUL_F16_]](<2 x s16>)
|
||||
%0:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
%1:vgpr(s32) = COPY $vgpr1
|
||||
%2:vgpr(s32) = COPY $vgpr2
|
||||
%3:vgpr(s16) = G_TRUNC %1
|
||||
%4:vgpr(s16) = G_FNEG %3
|
||||
%5:vgpr(s32) = G_ANYEXT %4
|
||||
%6:vgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %5, %2
|
||||
%7:vgpr(<2 x s16>) = G_FMUL %6, %0
|
||||
S_ENDPGM 0, implicit %7
|
||||
...
|
|
@ -7,9 +7,6 @@
|
|||
|
||||
# ERR-NOT: remark
|
||||
# ERR-GFX910: remark: <unknown>:0:0: cannot select: %2:sgpr(<2 x s16>) = G_LSHR %0:sgpr, %1:sgpr(<2 x s16>) (in function: lshr_v2s16_ss)
|
||||
# ERR-GFX910-NEXT: remark: <unknown>:0:0: cannot select: %2:vgpr(<2 x s16>) = G_LSHR %0:sgpr, %1:vgpr(<2 x s16>) (in function: lshr_v2s16_sv)
|
||||
# ERR-GFX910-NEXT: remark: <unknown>:0:0: cannot select: %2:vgpr(<2 x s16>) = G_LSHR %0:vgpr, %1:sgpr(<2 x s16>) (in function: lshr_v2s16_vs)
|
||||
# ERR-GFX910-NEXT: remark: <unknown>:0:0: cannot select: %2:vgpr(<2 x s16>) = G_LSHR %0:vgpr, %1:vgpr(<2 x s16>) (in function: lshr_v2s16_vv)
|
||||
# ERR-NOT: remark
|
||||
|
||||
---
|
||||
|
@ -75,15 +72,16 @@ body: |
|
|||
; GFX8: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>)
|
||||
; GFX8: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>)
|
||||
; GFX9-LABEL: name: lshr_v2s16_sv
|
||||
; GFX9: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
; GFX9: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>)
|
||||
; GFX9: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>)
|
||||
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[V_PK_LSHRREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
|
||||
; GFX9: S_ENDPGM 0, implicit [[V_PK_LSHRREV_B16_]]
|
||||
; GFX10-LABEL: name: lshr_v2s16_sv
|
||||
; GFX10: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
; GFX10: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>)
|
||||
; GFX10: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>)
|
||||
; GFX10: $vcc_hi = IMPLICIT_DEF
|
||||
; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX10: [[V_PK_LSHRREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
|
||||
; GFX10: S_ENDPGM 0, implicit [[V_PK_LSHRREV_B16_]]
|
||||
%0:sgpr(<2 x s16>) = COPY $sgpr0
|
||||
%1:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
%2:vgpr(<2 x s16>) = G_LSHR %0, %1
|
||||
|
@ -114,15 +112,16 @@ body: |
|
|||
; GFX8: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>)
|
||||
; GFX8: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>)
|
||||
; GFX9-LABEL: name: lshr_v2s16_vs
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
|
||||
; GFX9: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>)
|
||||
; GFX9: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>)
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||
; GFX9: [[V_PK_LSHRREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
|
||||
; GFX9: S_ENDPGM 0, implicit [[V_PK_LSHRREV_B16_]]
|
||||
; GFX10-LABEL: name: lshr_v2s16_vs
|
||||
; GFX10: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
; GFX10: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
|
||||
; GFX10: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>)
|
||||
; GFX10: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>)
|
||||
; GFX10: $vcc_hi = IMPLICIT_DEF
|
||||
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||
; GFX10: [[V_PK_LSHRREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
|
||||
; GFX10: S_ENDPGM 0, implicit [[V_PK_LSHRREV_B16_]]
|
||||
%0:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
%1:sgpr(<2 x s16>) = COPY $sgpr0
|
||||
%2:vgpr(<2 x s16>) = G_LSHR %0, %1
|
||||
|
@ -153,15 +152,16 @@ body: |
|
|||
; GFX8: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>)
|
||||
; GFX8: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>)
|
||||
; GFX9-LABEL: name: lshr_v2s16_vv
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
|
||||
; GFX9: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>)
|
||||
; GFX9: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>)
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX9: [[V_PK_LSHRREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
|
||||
; GFX9: S_ENDPGM 0, implicit [[V_PK_LSHRREV_B16_]]
|
||||
; GFX10-LABEL: name: lshr_v2s16_vv
|
||||
; GFX10: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
|
||||
; GFX10: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>)
|
||||
; GFX10: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>)
|
||||
; GFX10: $vcc_hi = IMPLICIT_DEF
|
||||
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX10: [[V_PK_LSHRREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
|
||||
; GFX10: S_ENDPGM 0, implicit [[V_PK_LSHRREV_B16_]]
|
||||
%0:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
%1:vgpr(<2 x s16>) = COPY $vgpr1
|
||||
%2:vgpr(<2 x s16>) = G_LSHR %0, %1
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t
|
||||
|
||||
|
@ -6,9 +7,6 @@
|
|||
|
||||
# ERR-NOT: remark
|
||||
# ERR-GFX910: remark: <unknown>:0:0: cannot select: %2:sgpr(<2 x s16>) = G_SHL %0:sgpr, %1:sgpr(<2 x s16>) (in function: shl_v2s16_ss)
|
||||
# ERR-GFX910-NEXT: remark: <unknown>:0:0: cannot select: %2:vgpr(<2 x s16>) = G_SHL %0:sgpr, %1:vgpr(<2 x s16>) (in function: shl_v2s16_sv)
|
||||
# ERR-GFX910-NEXT: remark: <unknown>:0:0: cannot select: %2:vgpr(<2 x s16>) = G_SHL %0:vgpr, %1:sgpr(<2 x s16>) (in function: shl_v2s16_vs)
|
||||
# ERR-GFX910-NEXT: remark: <unknown>:0:0: cannot select: %2:vgpr(<2 x s16>) = G_SHL %0:vgpr, %1:vgpr(<2 x s16>) (in function: shl_v2s16_vv)
|
||||
# ERR-NOT: remark
|
||||
|
||||
---
|
||||
|
@ -74,15 +72,16 @@ body: |
|
|||
; GFX8: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>)
|
||||
; GFX8: S_ENDPGM 0, implicit [[SHL]](<2 x s16>)
|
||||
; GFX9-LABEL: name: shl_v2s16_sv
|
||||
; GFX9: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
; GFX9: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>)
|
||||
; GFX9: S_ENDPGM 0, implicit [[SHL]](<2 x s16>)
|
||||
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[V_PK_LSHLREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
|
||||
; GFX9: S_ENDPGM 0, implicit [[V_PK_LSHLREV_B16_]]
|
||||
; GFX10-LABEL: name: shl_v2s16_sv
|
||||
; GFX10: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
; GFX10: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>)
|
||||
; GFX10: S_ENDPGM 0, implicit [[SHL]](<2 x s16>)
|
||||
; GFX10: $vcc_hi = IMPLICIT_DEF
|
||||
; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX10: [[V_PK_LSHLREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
|
||||
; GFX10: S_ENDPGM 0, implicit [[V_PK_LSHLREV_B16_]]
|
||||
%0:sgpr(<2 x s16>) = COPY $sgpr0
|
||||
%1:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
%2:vgpr(<2 x s16>) = G_SHL %0, %1
|
||||
|
@ -113,15 +112,16 @@ body: |
|
|||
; GFX8: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>)
|
||||
; GFX8: S_ENDPGM 0, implicit [[SHL]](<2 x s16>)
|
||||
; GFX9-LABEL: name: shl_v2s16_vs
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
|
||||
; GFX9: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>)
|
||||
; GFX9: S_ENDPGM 0, implicit [[SHL]](<2 x s16>)
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||
; GFX9: [[V_PK_LSHLREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
|
||||
; GFX9: S_ENDPGM 0, implicit [[V_PK_LSHLREV_B16_]]
|
||||
; GFX10-LABEL: name: shl_v2s16_vs
|
||||
; GFX10: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
; GFX10: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
|
||||
; GFX10: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>)
|
||||
; GFX10: S_ENDPGM 0, implicit [[SHL]](<2 x s16>)
|
||||
; GFX10: $vcc_hi = IMPLICIT_DEF
|
||||
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||
; GFX10: [[V_PK_LSHLREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
|
||||
; GFX10: S_ENDPGM 0, implicit [[V_PK_LSHLREV_B16_]]
|
||||
%0:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
%1:sgpr(<2 x s16>) = COPY $sgpr0
|
||||
%2:vgpr(<2 x s16>) = G_SHL %0, %1
|
||||
|
@ -152,15 +152,16 @@ body: |
|
|||
; GFX8: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>)
|
||||
; GFX8: S_ENDPGM 0, implicit [[SHL]](<2 x s16>)
|
||||
; GFX9-LABEL: name: shl_v2s16_vv
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
|
||||
; GFX9: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>)
|
||||
; GFX9: S_ENDPGM 0, implicit [[SHL]](<2 x s16>)
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX9: [[V_PK_LSHLREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
|
||||
; GFX9: S_ENDPGM 0, implicit [[V_PK_LSHLREV_B16_]]
|
||||
; GFX10-LABEL: name: shl_v2s16_vv
|
||||
; GFX10: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
|
||||
; GFX10: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>)
|
||||
; GFX10: S_ENDPGM 0, implicit [[SHL]](<2 x s16>)
|
||||
; GFX10: $vcc_hi = IMPLICIT_DEF
|
||||
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX10: [[V_PK_LSHLREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
|
||||
; GFX10: S_ENDPGM 0, implicit [[V_PK_LSHLREV_B16_]]
|
||||
%0:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
%1:vgpr(<2 x s16>) = COPY $vgpr1
|
||||
%2:vgpr(<2 x s16>) = G_SHL %0, %1
|
||||
|
|
Loading…
Reference in New Issue