AMDGPU/GlobalISel: Select mul24 intrinsics

This commit is contained in:
Matt Arsenault 2019-09-08 18:11:51 -04:00 committed by Matt Arsenault
parent 94d08feaef
commit 7fa0bfe7d5
3 changed files with 77 additions and 4 deletions

View File

@ -290,10 +290,10 @@ def AMDGPUffbl_b32 : SDNode<"AMDGPUISD::FFBL_B32", SDTIntUnaryOp>;
// Signed and unsigned 24-bit multiply. The highest 8-bits are ignore
// when performing the mulitply. The result is a 32-bit value.
def AMDGPUmul_u24 : SDNode<"AMDGPUISD::MUL_U24", SDTIntBinOp,
def AMDGPUmul_u24_impl : SDNode<"AMDGPUISD::MUL_U24", SDTIntBinOp,
[SDNPCommutative, SDNPAssociative]
>;
def AMDGPUmul_i24 : SDNode<"AMDGPUISD::MUL_I24", SDTIntBinOp,
def AMDGPUmul_i24_impl : SDNode<"AMDGPUISD::MUL_I24", SDTIntBinOp,
[SDNPCommutative, SDNPAssociative]
>;
@ -465,3 +465,11 @@ def AMDGPUpk_u16_u32 : PatFrags<(ops node:$src0, node:$src1),
def AMDGPUfmad_ftz : PatFrags<(ops node:$src0, node:$src1, node:$src2),
[(int_amdgcn_fmad_ftz node:$src0, node:$src1, node:$src2),
(AMDGPUfmad_ftz_impl node:$src0, node:$src1, node:$src2)]>;
def AMDGPUmul_u24 : PatFrags<(ops node:$src0, node:$src1),
[(int_amdgcn_mul_u24 node:$src0, node:$src1),
(AMDGPUmul_u24_impl node:$src0, node:$src1)]>;
def AMDGPUmul_i24 : PatFrags<(ops node:$src0, node:$src1),
[(int_amdgcn_mul_i24 node:$src0, node:$src1),
(AMDGPUmul_i24_impl node:$src0, node:$src1)]>;

View File

@ -467,9 +467,9 @@ defm V_SUB_F32 : VOP2Inst <"v_sub_f32", VOP_F32_F32_F32, fsub>;
defm V_SUBREV_F32 : VOP2Inst <"v_subrev_f32", VOP_F32_F32_F32, null_frag, "v_sub_f32">;
defm V_MUL_LEGACY_F32 : VOP2Inst <"v_mul_legacy_f32", VOP_F32_F32_F32, AMDGPUfmul_legacy>;
defm V_MUL_F32 : VOP2Inst <"v_mul_f32", VOP_F32_F32_F32, fmul>;
defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmul_i24>;
defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_I32_I32_I32, AMDGPUmul_i24>;
defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmulhi_i24>;
defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmul_u24>;
defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32, AMDGPUmul_u24>;
defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmulhi_u24>;
defm V_MIN_F32 : VOP2Inst <"v_min_f32", VOP_F32_F32_F32, fminnum_like>;
defm V_MAX_F32 : VOP2Inst <"v_max_f32", VOP_F32_F32_F32, fmaxnum_like>;

View File

@ -0,0 +1,65 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
---
name: mul_u24_vsv
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0, $vgpr0
; GCN-LABEL: name: mul_u24_vsv
; GCN: liveins: $sgpr0, $vgpr0
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[V_MUL_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e64 [[COPY]], [[COPY1]], implicit $exec
; GCN: S_ENDPGM 0, implicit [[V_MUL_U32_U24_e64_]]
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = COPY $vgpr0
%2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mul.u24), %0, %1
S_ENDPGM 0, implicit %2
...
---
name: mul_u24_vvs
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0, $vgpr0
; GCN-LABEL: name: mul_u24_vvs
; GCN: liveins: $sgpr0, $vgpr0
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GCN: [[V_MUL_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e64 [[COPY]], [[COPY1]], implicit $exec
; GCN: S_ENDPGM 0, implicit [[V_MUL_U32_U24_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr(s32) = COPY $sgpr0
%2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mul.u24), %0, %1
S_ENDPGM 0, implicit %2
...
---
name: mul_u24_vvv
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1
; GCN-LABEL: name: mul_u24_vvv
; GCN: liveins: $vgpr0, $vgpr1
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GCN: [[V_MUL_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e64 [[COPY]], [[COPY1]], implicit $exec
; GCN: S_ENDPGM 0, implicit [[V_MUL_U32_U24_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mul.u24), %0, %1
S_ENDPGM 0, implicit %2
...