forked from OSchip/llvm-project
AMDGPU/GlobalISel: Select mul24 intrinsics
This commit is contained in:
parent
94d08feaef
commit
7fa0bfe7d5
|
@ -290,10 +290,10 @@ def AMDGPUffbl_b32 : SDNode<"AMDGPUISD::FFBL_B32", SDTIntUnaryOp>;
|
|||
|
||||
// Signed and unsigned 24-bit multiply. The highest 8-bits are ignore
|
||||
// when performing the mulitply. The result is a 32-bit value.
|
||||
def AMDGPUmul_u24 : SDNode<"AMDGPUISD::MUL_U24", SDTIntBinOp,
|
||||
def AMDGPUmul_u24_impl : SDNode<"AMDGPUISD::MUL_U24", SDTIntBinOp,
|
||||
[SDNPCommutative, SDNPAssociative]
|
||||
>;
|
||||
def AMDGPUmul_i24 : SDNode<"AMDGPUISD::MUL_I24", SDTIntBinOp,
|
||||
def AMDGPUmul_i24_impl : SDNode<"AMDGPUISD::MUL_I24", SDTIntBinOp,
|
||||
[SDNPCommutative, SDNPAssociative]
|
||||
>;
|
||||
|
||||
|
@ -465,3 +465,11 @@ def AMDGPUpk_u16_u32 : PatFrags<(ops node:$src0, node:$src1),
|
|||
def AMDGPUfmad_ftz : PatFrags<(ops node:$src0, node:$src1, node:$src2),
|
||||
[(int_amdgcn_fmad_ftz node:$src0, node:$src1, node:$src2),
|
||||
(AMDGPUfmad_ftz_impl node:$src0, node:$src1, node:$src2)]>;
|
||||
|
||||
def AMDGPUmul_u24 : PatFrags<(ops node:$src0, node:$src1),
|
||||
[(int_amdgcn_mul_u24 node:$src0, node:$src1),
|
||||
(AMDGPUmul_u24_impl node:$src0, node:$src1)]>;
|
||||
|
||||
def AMDGPUmul_i24 : PatFrags<(ops node:$src0, node:$src1),
|
||||
[(int_amdgcn_mul_i24 node:$src0, node:$src1),
|
||||
(AMDGPUmul_i24_impl node:$src0, node:$src1)]>;
|
||||
|
|
|
@ -467,9 +467,9 @@ defm V_SUB_F32 : VOP2Inst <"v_sub_f32", VOP_F32_F32_F32, fsub>;
|
|||
defm V_SUBREV_F32 : VOP2Inst <"v_subrev_f32", VOP_F32_F32_F32, null_frag, "v_sub_f32">;
|
||||
defm V_MUL_LEGACY_F32 : VOP2Inst <"v_mul_legacy_f32", VOP_F32_F32_F32, AMDGPUfmul_legacy>;
|
||||
defm V_MUL_F32 : VOP2Inst <"v_mul_f32", VOP_F32_F32_F32, fmul>;
|
||||
defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmul_i24>;
|
||||
defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_I32_I32_I32, AMDGPUmul_i24>;
|
||||
defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmulhi_i24>;
|
||||
defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmul_u24>;
|
||||
defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32, AMDGPUmul_u24>;
|
||||
defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmulhi_u24>;
|
||||
defm V_MIN_F32 : VOP2Inst <"v_min_f32", VOP_F32_F32_F32, fminnum_like>;
|
||||
defm V_MAX_F32 : VOP2Inst <"v_max_f32", VOP_F32_F32_F32, fmaxnum_like>;
|
||||
|
|
|
@ -0,0 +1,65 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
|
||||
|
||||
---
|
||||
name: mul_u24_vsv
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0, $vgpr0
|
||||
; GCN-LABEL: name: mul_u24_vsv
|
||||
; GCN: liveins: $sgpr0, $vgpr0
|
||||
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GCN: [[V_MUL_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e64 [[COPY]], [[COPY1]], implicit $exec
|
||||
; GCN: S_ENDPGM 0, implicit [[V_MUL_U32_U24_e64_]]
|
||||
%0:sgpr(s32) = COPY $sgpr0
|
||||
%1:vgpr(s32) = COPY $vgpr0
|
||||
%2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mul.u24), %0, %1
|
||||
S_ENDPGM 0, implicit %2
|
||||
...
|
||||
|
||||
---
|
||||
name: mul_u24_vvs
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0, $vgpr0
|
||||
; GCN-LABEL: name: mul_u24_vvs
|
||||
; GCN: liveins: $sgpr0, $vgpr0
|
||||
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||
; GCN: [[V_MUL_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e64 [[COPY]], [[COPY1]], implicit $exec
|
||||
; GCN: S_ENDPGM 0, implicit [[V_MUL_U32_U24_e64_]]
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:sgpr(s32) = COPY $sgpr0
|
||||
%2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mul.u24), %0, %1
|
||||
S_ENDPGM 0, implicit %2
|
||||
...
|
||||
|
||||
---
|
||||
name: mul_u24_vvv
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1
|
||||
; GCN-LABEL: name: mul_u24_vvv
|
||||
; GCN: liveins: $vgpr0, $vgpr1
|
||||
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GCN: [[V_MUL_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e64 [[COPY]], [[COPY1]], implicit $exec
|
||||
; GCN: S_ENDPGM 0, implicit [[V_MUL_U32_U24_e64_]]
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(s32) = COPY $vgpr1
|
||||
%2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mul.u24), %0, %1
|
||||
S_ENDPGM 0, implicit %2
|
||||
...
|
Loading…
Reference in New Issue