forked from OSchip/llvm-project
[AMDGPU] Implement llvm.amdgcn.mulhi.[i,u]24 intrinsics.
These intrinsics maps to the 24-bit v_mul_hi instructions. This change also fixes an incorrect assumption on the associativity of 24-bit mulhi in its SDNode record in tblgen. Differential Revision: https://reviews.llvm.org/D112394
This commit is contained in:
parent
1189d97d12
commit
9bd5cfeb1f
|
@ -1535,6 +1535,16 @@ def int_amdgcn_mul_u24 : Intrinsic<[llvm_i32_ty],
|
|||
[IntrNoMem, IntrSpeculatable, IntrWillReturn]
|
||||
>;
|
||||
|
||||
def int_amdgcn_mulhi_i24 : Intrinsic<[llvm_i32_ty],
|
||||
[llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem, IntrSpeculatable, IntrWillReturn]
|
||||
>;
|
||||
|
||||
def int_amdgcn_mulhi_u24 : Intrinsic<[llvm_i32_ty],
|
||||
[llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem, IntrSpeculatable, IntrWillReturn]
|
||||
>;
|
||||
|
||||
// llvm.amdgcn.ds.gws.init(i32 bar_val, i32 resource_id)
|
||||
//
|
||||
// bar_val is the total number of waves that will wait on this
|
||||
|
|
|
@ -279,11 +279,18 @@ def AMDGPUmul_i24_impl : SDNode<"AMDGPUISD::MUL_I24", SDTIntBinOp,
|
|||
[SDNPCommutative, SDNPAssociative]
|
||||
>;
|
||||
|
||||
def AMDGPUmulhi_u24 : SDNode<"AMDGPUISD::MULHI_U24", SDTIntBinOp,
|
||||
[SDNPCommutative, SDNPAssociative]
|
||||
// mulhi24 yields the high-order 16 bits of the 48-bit result. Here's an example
|
||||
// that shows mulhi24 is not associative:
|
||||
//
|
||||
// Given a = 0x10002, b = c = 0xffffff:
|
||||
// mulhi24(mulhi24(a, b), c) = mulhi24(0x100, 0xffffff) = 0
|
||||
// Which is not equal to:
|
||||
// mulhi24(a, mulhi24(b, c)) = mulhi24(0x10002, 0xffff) = 1
|
||||
def AMDGPUmulhi_u24_impl : SDNode<"AMDGPUISD::MULHI_U24", SDTIntBinOp,
|
||||
[SDNPCommutative]
|
||||
>;
|
||||
def AMDGPUmulhi_i24 : SDNode<"AMDGPUISD::MULHI_I24", SDTIntBinOp,
|
||||
[SDNPCommutative, SDNPAssociative]
|
||||
def AMDGPUmulhi_i24_impl : SDNode<"AMDGPUISD::MULHI_I24", SDTIntBinOp,
|
||||
[SDNPCommutative]
|
||||
>;
|
||||
|
||||
def AMDGPUmad_u24 : SDNode<"AMDGPUISD::MAD_U24", AMDGPUDTIntTernaryOp,
|
||||
|
@ -435,6 +442,14 @@ def AMDGPUmul_i24 : PatFrags<(ops node:$src0, node:$src1),
|
|||
[(int_amdgcn_mul_i24 node:$src0, node:$src1),
|
||||
(AMDGPUmul_i24_impl node:$src0, node:$src1)]>;
|
||||
|
||||
def AMDGPUmulhi_u24 : PatFrags<(ops node:$src0, node:$src1),
|
||||
[(int_amdgcn_mulhi_u24 node:$src0, node:$src1),
|
||||
(AMDGPUmulhi_u24_impl node:$src0, node:$src1)]>;
|
||||
|
||||
def AMDGPUmulhi_i24 : PatFrags<(ops node:$src0, node:$src1),
|
||||
[(int_amdgcn_mulhi_i24 node:$src0, node:$src1),
|
||||
(AMDGPUmulhi_i24_impl node:$src0, node:$src1)]>;
|
||||
|
||||
def AMDGPUbfe_i32 : PatFrags<(ops node:$src0, node:$src1, node:$src2),
|
||||
[(int_amdgcn_sbfe node:$src0, node:$src1, node:$src2),
|
||||
(AMDGPUbfe_i32_impl node:$src0, node:$src1, node:$src2)]>;
|
||||
|
|
|
@ -4081,6 +4081,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
|
|||
case Intrinsic::amdgcn_mbcnt_hi:
|
||||
case Intrinsic::amdgcn_mul_u24:
|
||||
case Intrinsic::amdgcn_mul_i24:
|
||||
case Intrinsic::amdgcn_mulhi_u24:
|
||||
case Intrinsic::amdgcn_mulhi_i24:
|
||||
case Intrinsic::amdgcn_lerp:
|
||||
case Intrinsic::amdgcn_sad_u8:
|
||||
case Intrinsic::amdgcn_msad_u8:
|
||||
|
|
|
@ -492,9 +492,9 @@ defm V_SUBREV_F32 : VOP2Inst <"v_subrev_f32", VOP_F32_F32_F32, null_frag, "v_sub
|
|||
defm V_MUL_LEGACY_F32 : VOP2Inst <"v_mul_legacy_f32", VOP_F32_F32_F32, AMDGPUfmul_legacy>;
|
||||
defm V_MUL_F32 : VOP2Inst <"v_mul_f32", VOP_F32_F32_F32, any_fmul>;
|
||||
defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_I32_I32_I32_ARITH, AMDGPUmul_i24>;
|
||||
defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmulhi_i24>;
|
||||
defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_I32_I32_I32, AMDGPUmulhi_i24>;
|
||||
defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32_ARITH, AMDGPUmul_u24>;
|
||||
defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmulhi_u24>;
|
||||
defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_I32_I32_I32, AMDGPUmulhi_u24>;
|
||||
defm V_MIN_F32 : VOP2Inst <"v_min_f32", VOP_F32_F32_F32, fminnum_like>;
|
||||
defm V_MAX_F32 : VOP2Inst <"v_max_f32", VOP_F32_F32_F32, fmaxnum_like>;
|
||||
defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smin>;
|
||||
|
|
|
@ -0,0 +1,68 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
|
||||
|
||||
---
|
||||
name: mulhi_i24_vsv
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0, $vgpr0
|
||||
; CHECK-LABEL: name: mulhi_i24_vsv
|
||||
; CHECK: liveins: $sgpr0, $vgpr0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK-NEXT: [[V_MUL_HI_I32_I24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_I24_e64 [[COPY]], [[COPY1]], implicit $exec
|
||||
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_I32_I24_e64_]]
|
||||
%0:sgpr(s32) = COPY $sgpr0
|
||||
%1:vgpr(s32) = COPY $vgpr0
|
||||
%2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mulhi.i24), %0, %1
|
||||
S_ENDPGM 0, implicit %2
|
||||
...
|
||||
|
||||
---
|
||||
name: mulhi_i24_vvs
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0, $vgpr0
|
||||
; CHECK-LABEL: name: mulhi_i24_vvs
|
||||
; CHECK: liveins: $sgpr0, $vgpr0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||
; CHECK-NEXT: [[V_MUL_HI_I32_I24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_I24_e64 [[COPY]], [[COPY1]], implicit $exec
|
||||
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_I32_I24_e64_]]
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:sgpr(s32) = COPY $sgpr0
|
||||
%2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mulhi.i24), %0, %1
|
||||
S_ENDPGM 0, implicit %2
|
||||
...
|
||||
|
||||
---
|
||||
name: mulhi_i24_vvv
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1
|
||||
; CHECK-LABEL: name: mulhi_i24_vvv
|
||||
; CHECK: liveins: $vgpr0, $vgpr1
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK-NEXT: [[V_MUL_HI_I32_I24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_I24_e64 [[COPY]], [[COPY1]], implicit $exec
|
||||
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_I32_I24_e64_]]
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(s32) = COPY $vgpr1
|
||||
%2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mulhi.i24), %0, %1
|
||||
S_ENDPGM 0, implicit %2
|
||||
...
|
|
@ -0,0 +1,68 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
|
||||
|
||||
---
|
||||
name: mulhi_u24_vsv
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0, $vgpr0
|
||||
; CHECK-LABEL: name: mulhi_u24_vsv
|
||||
; CHECK: liveins: $sgpr0, $vgpr0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK-NEXT: [[V_MUL_HI_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_U24_e64 [[COPY]], [[COPY1]], implicit $exec
|
||||
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_U32_U24_e64_]]
|
||||
%0:sgpr(s32) = COPY $sgpr0
|
||||
%1:vgpr(s32) = COPY $vgpr0
|
||||
%2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mulhi.u24), %0, %1
|
||||
S_ENDPGM 0, implicit %2
|
||||
...
|
||||
|
||||
---
|
||||
name: mulhi_u24_vvs
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0, $vgpr0
|
||||
; CHECK-LABEL: name: mulhi_u24_vvs
|
||||
; CHECK: liveins: $sgpr0, $vgpr0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||
; CHECK-NEXT: [[V_MUL_HI_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_U24_e64 [[COPY]], [[COPY1]], implicit $exec
|
||||
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_U32_U24_e64_]]
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:sgpr(s32) = COPY $sgpr0
|
||||
%2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mulhi.u24), %0, %1
|
||||
S_ENDPGM 0, implicit %2
|
||||
...
|
||||
|
||||
---
|
||||
name: mulhi_u24_vvv
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1
|
||||
; CHECK-LABEL: name: mulhi_u24_vvv
|
||||
; CHECK: liveins: $vgpr0, $vgpr1
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK-NEXT: [[V_MUL_HI_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_U24_e64 [[COPY]], [[COPY1]], implicit $exec
|
||||
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_U32_U24_e64_]]
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(s32) = COPY $vgpr1
|
||||
%2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mulhi.u24), %0, %1
|
||||
S_ENDPGM 0, implicit %2
|
||||
...
|
|
@ -0,0 +1,15 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s
|
||||
|
||||
define i32 @basic(i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: basic:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_mul_hi_i32_i24_e32 v0, v0, v1
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%mul = call i32 @llvm.amdgcn.mulhi.i24(i32 %a, i32 %b)
|
||||
ret i32 %mul
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.mulhi.i24(i32, i32)
|
|
@ -0,0 +1,15 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s
|
||||
|
||||
define i32 @basic(i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: basic:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_mul_hi_u32_u24_e32 v0, v0, v1
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%mul = call i32 @llvm.amdgcn.mulhi.u24(i32 %a, i32 %b)
|
||||
ret i32 %mul
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.mulhi.u24(i32, i32)
|
Loading…
Reference in New Issue