forked from OSchip/llvm-project
GlobalISel: Legalize G_FPOWI
This commit is contained in:
parent
7941dc5041
commit
7cd8a0256d
|
@ -302,6 +302,7 @@ public:
|
|||
|
||||
LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI);
|
||||
LegalizeResult lowerFPTRUNC(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
|
||||
LegalizeResult lowerFPOWI(MachineInstr &MI);
|
||||
|
||||
LegalizeResult lowerMinMax(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
|
||||
LegalizeResult lowerFCopySign(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
|
||||
|
|
|
@ -1583,6 +1583,13 @@ public:
|
|||
return buildInstr(TargetOpcode::G_FEXP2, {Dst}, {Src}, Flags);
|
||||
}
|
||||
|
||||
/// Build and insert \p Dst = G_FPOW \p Src0, \p Src1
|
||||
MachineInstrBuilder buildFPow(const DstOp &Dst, const SrcOp &Src0,
|
||||
const SrcOp &Src1,
|
||||
Optional<unsigned> Flags = None) {
|
||||
return buildInstr(TargetOpcode::G_FPOW, {Dst}, {Src0, Src1}, Flags);
|
||||
}
|
||||
|
||||
/// Build and insert \p Res = G_FCOPYSIGN \p Op0, \p Op1
|
||||
MachineInstrBuilder buildFCopysign(const DstOp &Dst, const SrcOp &Src0,
|
||||
const SrcOp &Src1) {
|
||||
|
|
|
@ -2170,6 +2170,15 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
|
|||
widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
|
||||
Observer.changedInstr(MI);
|
||||
return Legalized;
|
||||
case TargetOpcode::G_FPOWI: {
|
||||
if (TypeIdx != 0)
|
||||
return UnableToLegalize;
|
||||
Observer.changingInstr(MI);
|
||||
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
|
||||
widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
|
||||
Observer.changedInstr(MI);
|
||||
return Legalized;
|
||||
}
|
||||
case TargetOpcode::G_INTTOPTR:
|
||||
if (TypeIdx != 1)
|
||||
return UnableToLegalize;
|
||||
|
@ -2673,6 +2682,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
|
|||
return lowerFPTOSI(MI);
|
||||
case G_FPTRUNC:
|
||||
return lowerFPTRUNC(MI, TypeIdx, Ty);
|
||||
case G_FPOWI:
|
||||
return lowerFPOWI(MI);
|
||||
case G_SMIN:
|
||||
case G_SMAX:
|
||||
case G_UMIN:
|
||||
|
@ -4821,6 +4832,20 @@ LegalizerHelper::lowerFPTRUNC(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
|
|||
return UnableToLegalize;
|
||||
}
|
||||
|
||||
// TODO: If RHS is a constant SelectionDAGBuilder expands this into a
|
||||
// multiplication tree.
|
||||
LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) {
|
||||
Register Dst = MI.getOperand(0).getReg();
|
||||
Register Src0 = MI.getOperand(1).getReg();
|
||||
Register Src1 = MI.getOperand(2).getReg();
|
||||
LLT Ty = MRI.getType(Dst);
|
||||
|
||||
auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
|
||||
MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
|
||||
MI.eraseFromParent();
|
||||
return Legalized;
|
||||
}
|
||||
|
||||
static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
|
||||
switch (Opc) {
|
||||
case TargetOpcode::G_SMIN:
|
||||
|
|
|
@ -747,6 +747,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
|
|||
ExpOps.clampScalar(0, MinScalarFPTy, S32)
|
||||
.scalarize(0);
|
||||
|
||||
getActionDefinitionsBuilder(G_FPOWI)
|
||||
.clampScalar(0, MinScalarFPTy, S32)
|
||||
.lower();
|
||||
|
||||
// The 64-bit versions produce 32-bit results, but only on the SALU.
|
||||
getActionDefinitionsBuilder(G_CTPOP)
|
||||
.legalFor({{S32, S32}, {S32, S64}})
|
||||
|
|
|
@ -0,0 +1,70 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX6 %s
|
||||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
---
|
||||
name: test_fpowi_s16_s32_flags
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1
|
||||
|
||||
; GFX6-LABEL: name: test_fpowi_s16_s32_flags
|
||||
; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
|
||||
; GFX6: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
|
||||
; GFX6: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[COPY1]](s32)
|
||||
; GFX6: [[FLOG2_:%[0-9]+]]:_(s32) = nnan G_FLOG2 [[FPEXT]]
|
||||
; GFX6: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FLOG2_]](s32), [[SITOFP]](s32)
|
||||
; GFX6: [[FEXP2_:%[0-9]+]]:_(s32) = nnan G_FEXP2 [[INT]]
|
||||
; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FEXP2_]](s32)
|
||||
; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
|
||||
; GFX6: $vgpr0 = COPY [[ANYEXT]](s32)
|
||||
; GFX9-LABEL: name: test_fpowi_s16_s32_flags
|
||||
; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
|
||||
; GFX9: [[SITOFP:%[0-9]+]]:_(s16) = G_SITOFP [[COPY1]](s32)
|
||||
; GFX9: [[FLOG2_:%[0-9]+]]:_(s16) = nnan G_FLOG2 [[TRUNC]]
|
||||
; GFX9: [[FPEXT:%[0-9]+]]:_(s32) = nnan G_FPEXT [[FLOG2_]](s16)
|
||||
; GFX9: [[FPEXT1:%[0-9]+]]:_(s32) = nnan G_FPEXT [[SITOFP]](s16)
|
||||
; GFX9: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FPEXT]](s32), [[FPEXT1]](s32)
|
||||
; GFX9: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32)
|
||||
; GFX9: [[FEXP2_:%[0-9]+]]:_(s16) = nnan G_FEXP2 [[FPTRUNC]]
|
||||
; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FEXP2_]](s16)
|
||||
; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(s16) = G_TRUNC %0
|
||||
%3:_(s16) = nnan G_FPOWI %2, %1
|
||||
%4:_(s32) = G_ANYEXT %3
|
||||
$vgpr0 = COPY %4
|
||||
...
|
||||
|
||||
---
|
||||
name: test_fpowi_s32_s32_flags
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1
|
||||
|
||||
; GFX6-LABEL: name: test_fpowi_s32_s32_flags
|
||||
; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; GFX6: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[COPY1]](s32)
|
||||
; GFX6: [[FLOG2_:%[0-9]+]]:_(s32) = nnan G_FLOG2 [[COPY]]
|
||||
; GFX6: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FLOG2_]](s32), [[SITOFP]](s32)
|
||||
; GFX6: [[FEXP2_:%[0-9]+]]:_(s32) = nnan G_FEXP2 [[INT]]
|
||||
; GFX6: $vgpr0 = COPY [[FEXP2_]](s32)
|
||||
; GFX9-LABEL: name: test_fpowi_s32_s32_flags
|
||||
; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; GFX9: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[COPY1]](s32)
|
||||
; GFX9: [[FLOG2_:%[0-9]+]]:_(s32) = nnan G_FLOG2 [[COPY]]
|
||||
; GFX9: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FLOG2_]](s32), [[SITOFP]](s32)
|
||||
; GFX9: [[FEXP2_:%[0-9]+]]:_(s32) = nnan G_FEXP2 [[INT]]
|
||||
; GFX9: $vgpr0 = COPY [[FEXP2_]](s32)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(s32) = nnan G_FPOWI %0, %1
|
||||
$vgpr0 = COPY %2
|
||||
...
|
|
@ -0,0 +1,181 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GFX7 %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s
|
||||
|
||||
define i16 @v_powi_f16(i16 %l, i32 %r) {
|
||||
; GFX7-LABEL: v_powi_f16:
|
||||
; GFX7: ; %bb.0:
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
|
||||
; GFX7-NEXT: v_cvt_f32_i32_e32 v1, v1
|
||||
; GFX7-NEXT: v_log_f32_e32 v0, v0
|
||||
; GFX7-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
|
||||
; GFX7-NEXT: v_exp_f32_e32 v0, v0
|
||||
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_powi_f16:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_cvt_f32_i32_e32 v1, v1
|
||||
; GFX8-NEXT: v_log_f16_e32 v0, v0
|
||||
; GFX8-NEXT: v_cvt_f16_f32_e32 v1, v1
|
||||
; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0
|
||||
; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1
|
||||
; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
|
||||
; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
|
||||
; GFX8-NEXT: v_exp_f16_e32 v0, v0
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%l.cast = bitcast i16 %l to half
|
||||
%res = call half @llvm.powi.f16(half %l.cast, i32 %r)
|
||||
%res.cast = bitcast half %res to i16
|
||||
ret i16 %res.cast
|
||||
}
|
||||
|
||||
define float @v_powi_f32(float %l, i32 %r) {
|
||||
; GCN-LABEL: v_powi_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_cvt_f32_i32_e32 v1, v1
|
||||
; GCN-NEXT: v_log_f32_e32 v0, v0
|
||||
; GCN-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
|
||||
; GCN-NEXT: v_exp_f32_e32 v0, v0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%res = call float @llvm.powi.f32(float %l, i32 %r)
|
||||
ret float %res
|
||||
}
|
||||
|
||||
define float @v_powi_0_f32(float %l) {
|
||||
; GCN-LABEL: v_powi_0_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 1.0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%res = call float @llvm.powi.f32(float %l, i32 0)
|
||||
ret float %res
|
||||
}
|
||||
|
||||
define float @v_powi_1_f32(float %l) {
|
||||
; GCN-LABEL: v_powi_1_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%res = call float @llvm.powi.f32(float %l, i32 1)
|
||||
ret float %res
|
||||
}
|
||||
|
||||
define float @v_powi_neg1_f32(float %l) {
|
||||
; GCN-LABEL: v_powi_neg1_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_cvt_f32_i32_e32 v1, -1
|
||||
; GCN-NEXT: v_log_f32_e32 v0, v0
|
||||
; GCN-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
|
||||
; GCN-NEXT: v_exp_f32_e32 v0, v0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%res = call float @llvm.powi.f32(float %l, i32 -1)
|
||||
ret float %res
|
||||
}
|
||||
|
||||
define float @v_powi_2_f32(float %l) {
|
||||
; GCN-LABEL: v_powi_2_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_log_f32_e32 v0, v0
|
||||
; GCN-NEXT: v_cvt_f32_ubyte0_e32 v1, 2
|
||||
; GCN-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
|
||||
; GCN-NEXT: v_exp_f32_e32 v0, v0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%res = call float @llvm.powi.f32(float %l, i32 2)
|
||||
ret float %res
|
||||
}
|
||||
|
||||
define float @v_powi_neg2_f32(float %l) {
|
||||
; GCN-LABEL: v_powi_neg2_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_cvt_f32_i32_e32 v1, -2
|
||||
; GCN-NEXT: v_log_f32_e32 v0, v0
|
||||
; GCN-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
|
||||
; GCN-NEXT: v_exp_f32_e32 v0, v0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%res = call float @llvm.powi.f32(float %l, i32 -2)
|
||||
ret float %res
|
||||
}
|
||||
|
||||
define float @v_powi_4_f32(float %l) {
|
||||
; GCN-LABEL: v_powi_4_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_log_f32_e32 v0, v0
|
||||
; GCN-NEXT: v_cvt_f32_ubyte0_e32 v1, 4
|
||||
; GCN-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
|
||||
; GCN-NEXT: v_exp_f32_e32 v0, v0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%res = call float @llvm.powi.f32(float %l, i32 4)
|
||||
ret float %res
|
||||
}
|
||||
|
||||
define float @v_powi_8_f32(float %l) {
|
||||
; GCN-LABEL: v_powi_8_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_log_f32_e32 v0, v0
|
||||
; GCN-NEXT: v_cvt_f32_ubyte0_e32 v1, 8
|
||||
; GCN-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
|
||||
; GCN-NEXT: v_exp_f32_e32 v0, v0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%res = call float @llvm.powi.f32(float %l, i32 8)
|
||||
ret float %res
|
||||
}
|
||||
|
||||
define float @v_powi_16_f32(float %l) {
|
||||
; GCN-LABEL: v_powi_16_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_log_f32_e32 v0, v0
|
||||
; GCN-NEXT: v_cvt_f32_ubyte0_e32 v1, 16
|
||||
; GCN-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
|
||||
; GCN-NEXT: v_exp_f32_e32 v0, v0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%res = call float @llvm.powi.f32(float %l, i32 16)
|
||||
ret float %res
|
||||
}
|
||||
|
||||
define float @v_powi_128_f32(float %l) {
|
||||
; GCN-LABEL: v_powi_128_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_log_f32_e32 v0, v0
|
||||
; GCN-NEXT: v_cvt_f32_ubyte0_e32 v1, 0x80
|
||||
; GCN-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
|
||||
; GCN-NEXT: v_exp_f32_e32 v0, v0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%res = call float @llvm.powi.f32(float %l, i32 128)
|
||||
ret float %res
|
||||
}
|
||||
|
||||
define float @v_powi_neg128_f32(float %l) {
|
||||
; GCN-LABEL: v_powi_neg128_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_cvt_f32_i32_e32 v1, 0xffffff80
|
||||
; GCN-NEXT: v_log_f32_e32 v0, v0
|
||||
; GCN-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
|
||||
; GCN-NEXT: v_exp_f32_e32 v0, v0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%res = call float @llvm.powi.f32(float %l, i32 -128)
|
||||
ret float %res
|
||||
}
|
||||
|
||||
; FIXME: f64 broken
|
||||
; define double @v_powi_f64(double %l, i32 %r) {
|
||||
; %res = call double @llvm.powi.f64(double %l, i32 %r)
|
||||
; ret double %res
|
||||
; }
|
||||
|
||||
declare half @llvm.powi.f16(half, i32) #0
|
||||
declare float @llvm.powi.f32(float, i32) #0
|
||||
declare double @llvm.powi.f64(double, i32) #0
|
||||
|
||||
attributes #0 = { nounwind readnone speculatable willreturn }
|
Loading…
Reference in New Issue