AMDGPU/GlobalISel: Select llvm.amdgcn.class

Also fixes missing SubtargetPredicate on f16 class instructions.

llvm-svn: 371436
This commit is contained in:
Matt Arsenault 2019-09-09 18:29:45 +00:00
parent d6c1f5bb15
commit 77e3e9cafd
7 changed files with 303 additions and 1 deletions

View File

@ -34,6 +34,10 @@ def gi_vop3omods :
GIComplexOperandMatcher<s32, "selectVOP3OMods">,
GIComplexPatternEquiv<VOP3OMods>;
def gi_vop3omods0clamp0omod :
GIComplexOperandMatcher<s32, "selectVOP3Mods0Clamp0OMod">,
GIComplexPatternEquiv<VOP3Mods0Clamp0OMod>;
def gi_vop3opselmods0 :
GIComplexOperandMatcher<s32, "selectVOP3OpSelMods0">,
GIComplexPatternEquiv<VOP3OpSelMods0>;

View File

@ -141,7 +141,7 @@ def AMDGPUfp_to_f16 : SDNode<"AMDGPUISD::FP_TO_FP16" , SDTFPToIntOp>;
def AMDGPUfp16_zext : SDNode<"AMDGPUISD::FP16_ZEXT" , SDTFPToIntOp>;
def AMDGPUfp_class : SDNode<"AMDGPUISD::FP_CLASS", AMDGPUFPClassOp>;
def AMDGPUfp_class_impl : SDNode<"AMDGPUISD::FP_CLASS", AMDGPUFPClassOp>;
// out = max(a, b) a and b are floats, where a nan comparison fails.
// This is not commutative because this gives the second operand:
@ -438,6 +438,10 @@ def AMDGPUldexp : PatFrags<(ops node:$src0, node:$src1),
[(int_amdgcn_ldexp node:$src0, node:$src1),
(AMDGPUldexp_impl node:$src0, node:$src1)]>;
def AMDGPUfp_class : PatFrags<(ops node:$src0, node:$src1),
[(int_amdgcn_class node:$src0, node:$src1),
(AMDGPUfp_class_impl node:$src0, node:$src1)]>;
def AMDGPUfmed3 : PatFrags<(ops node:$src0, node:$src1, node:$src2),
[(int_amdgcn_fmed3 node:$src0, node:$src1, node:$src2),
(AMDGPUfmed3_impl node:$src0, node:$src1, node:$src2)]>;

View File

@ -1479,6 +1479,24 @@ AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const {
[=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
}};
}
InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectVOP3Mods0Clamp0OMod(MachineOperand &Root) const {
MachineRegisterInfo &MRI
= Root.getParent()->getParent()->getParent()->getRegInfo();
Register Src;
unsigned Mods;
std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI);
return {{
[=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
[=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods
[=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
[=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
}};
}
InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const {
return {{

View File

@ -110,6 +110,8 @@ private:
InstructionSelector::ComplexRendererFns
selectVOP3Mods0(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
selectVOP3Mods0Clamp0OMod(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
selectVOP3OMods(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
selectVOP3Mods(MachineOperand &Root) const;

View File

@ -738,8 +738,11 @@ defm V_CMP_CLASS_F32 : VOPC_CLASS_F32 <"v_cmp_class_f32">;
defm V_CMPX_CLASS_F32 : VOPCX_CLASS_F32 <"v_cmpx_class_f32">;
defm V_CMP_CLASS_F64 : VOPC_CLASS_F64 <"v_cmp_class_f64">;
defm V_CMPX_CLASS_F64 : VOPCX_CLASS_F64 <"v_cmpx_class_f64">;
let SubtargetPredicate = Has16BitInsts in {
defm V_CMP_CLASS_F16 : VOPC_CLASS_F16 <"v_cmp_class_f16">;
defm V_CMPX_CLASS_F16 : VOPCX_CLASS_F16 <"v_cmpx_class_f16">;
}
//===----------------------------------------------------------------------===//
// V_ICMPIntrinsic Pattern.

View File

@ -0,0 +1,173 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE64 %s
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE32 %s
---
name: class_s32_vcc_sv
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0, $vgpr0
; WAVE64-LABEL: name: class_s32_vcc_sv
; WAVE64: liveins: $sgpr0, $vgpr0
; WAVE64: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE64: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec
; WAVE64: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]]
; WAVE32-LABEL: name: class_s32_vcc_sv
; WAVE32: liveins: $sgpr0, $vgpr0
; WAVE32: $vcc_hi = IMPLICIT_DEF
; WAVE32: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE32: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec
; WAVE32: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]]
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = COPY $vgpr0
%2:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %0, %1
S_ENDPGM 0, implicit %2
...
---
name: class_s32_vcc_vs
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0, $vgpr0
; WAVE64-LABEL: name: class_s32_vcc_vs
; WAVE64: liveins: $sgpr0, $vgpr0
; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE64: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
; WAVE64: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec
; WAVE64: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]]
; WAVE32-LABEL: name: class_s32_vcc_vs
; WAVE32: liveins: $sgpr0, $vgpr0
; WAVE32: $vcc_hi = IMPLICIT_DEF
; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE32: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
; WAVE32: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec
; WAVE32: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr(s32) = COPY $sgpr0
%2:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %0, %1
S_ENDPGM 0, implicit %2
...
---
name: class_s32_vcc_vv
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1
; WAVE64-LABEL: name: class_s32_vcc_vv
; WAVE64: liveins: $vgpr0, $vgpr1
; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE64: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec
; WAVE64: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]]
; WAVE32-LABEL: name: class_s32_vcc_vv
; WAVE32: liveins: $vgpr0, $vgpr1
; WAVE32: $vcc_hi = IMPLICIT_DEF
; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE32: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec
; WAVE32: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %0, %1
S_ENDPGM 0, implicit %2
...
---
name: class_s64_vcc_sv
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0_sgpr1, $vgpr0
; WAVE64-LABEL: name: class_s64_vcc_sv
; WAVE64: liveins: $sgpr0_sgpr1, $vgpr0
; WAVE64: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE64: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec
; WAVE64: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]]
; WAVE32-LABEL: name: class_s64_vcc_sv
; WAVE32: liveins: $sgpr0_sgpr1, $vgpr0
; WAVE32: $vcc_hi = IMPLICIT_DEF
; WAVE32: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE32: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec
; WAVE32: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]]
%0:sgpr(s64) = COPY $sgpr0_sgpr1
%1:vgpr(s32) = COPY $vgpr0
%2:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %0, %1
S_ENDPGM 0, implicit %2
...
---
name: class_s64_vcc_vs
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0_sgpr1, $vgpr0
; WAVE64-LABEL: name: class_s64_vcc_vs
; WAVE64: liveins: $sgpr0_sgpr1, $vgpr0
; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; WAVE64: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
; WAVE64: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec
; WAVE64: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]]
; WAVE32-LABEL: name: class_s64_vcc_vs
; WAVE32: liveins: $sgpr0_sgpr1, $vgpr0
; WAVE32: $vcc_hi = IMPLICIT_DEF
; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; WAVE32: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
; WAVE32: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec
; WAVE32: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]]
%0:vgpr(s64) = COPY $vgpr0_vgpr1
%1:sgpr(s32) = COPY $sgpr0
%2:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %0, %1
S_ENDPGM 0, implicit %2
...
---
name: class_s64_vcc_vv
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0_vgpr1, $vgpr2
; WAVE64-LABEL: name: class_s64_vcc_vv
; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2
; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; WAVE64: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec
; WAVE64: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]]
; WAVE32-LABEL: name: class_s64_vcc_vv
; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2
; WAVE32: $vcc_hi = IMPLICIT_DEF
; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; WAVE32: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec
; WAVE32: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]]
%0:vgpr(s64) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %0, %1
S_ENDPGM 0, implicit %2
...

View File

@ -0,0 +1,98 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE32 %s
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE64 %s
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=2 -pass-remarks-missed='gisel*' %s -o /dev/null 2>&1 | FileCheck -check-prefix=SI-ERR %s
# SI-ERR-NOT: remark
# SI-ERR: remark: <unknown>:0:0: cannot select: %3:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %2:sgpr(s16), %1:vgpr(s32) (in function: class_s16_vcc_sv)
# SI-ERR-NEXT: remark: <unknown>:0:0: cannot select: %3:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %2:vgpr(s16), %1:sgpr(s32) (in function: class_s16_vcc_vs)
# SI-ERR-NEXT: remark: <unknown>:0:0: cannot select: %3:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %2:vgpr(s16), %1:vgpr(s32) (in function: class_s16_vcc_vv)
# SI-ERR-NOT: remark
---
name: class_s16_vcc_sv
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0, $vgpr0
; WAVE32-LABEL: name: class_s16_vcc_sv
; WAVE32: liveins: $sgpr0, $vgpr0
; WAVE32: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE32: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec
; WAVE32: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]]
; WAVE64-LABEL: name: class_s16_vcc_sv
; WAVE64: liveins: $sgpr0, $vgpr0
; WAVE64: $vcc_hi = IMPLICIT_DEF
; WAVE64: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE64: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec
; WAVE64: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]]
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = COPY $vgpr0
%2:sgpr(s16) = G_TRUNC %0
%4:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %2, %1
S_ENDPGM 0, implicit %4
...
---
name: class_s16_vcc_vs
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0, $vgpr0
; WAVE32-LABEL: name: class_s16_vcc_vs
; WAVE32: liveins: $sgpr0, $vgpr0
; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE32: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
; WAVE32: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec
; WAVE32: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]]
; WAVE64-LABEL: name: class_s16_vcc_vs
; WAVE64: liveins: $sgpr0, $vgpr0
; WAVE64: $vcc_hi = IMPLICIT_DEF
; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE64: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
; WAVE64: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec
; WAVE64: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr(s32) = COPY $sgpr0
%2:vgpr(s16) = G_TRUNC %0
%4:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %2, %1
S_ENDPGM 0, implicit %4
...
---
name: class_s16_vcc_vv
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1
; WAVE32-LABEL: name: class_s16_vcc_vv
; WAVE32: liveins: $vgpr0, $vgpr1
; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE32: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec
; WAVE32: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]]
; WAVE64-LABEL: name: class_s16_vcc_vv
; WAVE64: liveins: $vgpr0, $vgpr1
; WAVE64: $vcc_hi = IMPLICIT_DEF
; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE64: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec
; WAVE64: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
%4:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %2, %1
S_ENDPGM 0, implicit %4
...