GlobalISel: Legalize G_FPOWI

This commit is contained in:
Matt Arsenault 2020-07-17 11:01:15 -04:00
parent 7941dc5041
commit 7cd8a0256d
6 changed files with 288 additions and 0 deletions

View File

@ -302,6 +302,7 @@ public:
LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI);
LegalizeResult lowerFPTRUNC(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
LegalizeResult lowerFPOWI(MachineInstr &MI);
LegalizeResult lowerMinMax(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
LegalizeResult lowerFCopySign(MachineInstr &MI, unsigned TypeIdx, LLT Ty);

View File

@ -1583,6 +1583,13 @@ public:
return buildInstr(TargetOpcode::G_FEXP2, {Dst}, {Src}, Flags);
}
/// Build and insert \p Dst = G_FPOW \p Src0, \p Src1
MachineInstrBuilder buildFPow(const DstOp &Dst, const SrcOp &Src0,
const SrcOp &Src1,
Optional<unsigned> Flags = None) {
return buildInstr(TargetOpcode::G_FPOW, {Dst}, {Src0, Src1}, Flags);
}
/// Build and insert \p Res = G_FCOPYSIGN \p Op0, \p Op1
MachineInstrBuilder buildFCopysign(const DstOp &Dst, const SrcOp &Src0,
const SrcOp &Src1) {

View File

@ -2170,6 +2170,15 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_FPOWI: {
if (TypeIdx != 0)
return UnableToLegalize;
Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_INTTOPTR:
if (TypeIdx != 1)
return UnableToLegalize;
@ -2673,6 +2682,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
return lowerFPTOSI(MI);
case G_FPTRUNC:
return lowerFPTRUNC(MI, TypeIdx, Ty);
case G_FPOWI:
return lowerFPOWI(MI);
case G_SMIN:
case G_SMAX:
case G_UMIN:
@ -4821,6 +4832,20 @@ LegalizerHelper::lowerFPTRUNC(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
return UnableToLegalize;
}
// TODO: If RHS is a constant SelectionDAGBuilder expands this into a
// multiplication tree.
LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register Src0 = MI.getOperand(1).getReg();
Register Src1 = MI.getOperand(2).getReg();
LLT Ty = MRI.getType(Dst);
auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
MI.eraseFromParent();
return Legalized;
}
static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
switch (Opc) {
case TargetOpcode::G_SMIN:

View File

@ -747,6 +747,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
ExpOps.clampScalar(0, MinScalarFPTy, S32)
.scalarize(0);
getActionDefinitionsBuilder(G_FPOWI)
.clampScalar(0, MinScalarFPTy, S32)
.lower();
// The 64-bit versions produce 32-bit results, but only on the SALU.
getActionDefinitionsBuilder(G_CTPOP)
.legalFor({{S32, S32}, {S32, S64}})

View File

@ -0,0 +1,70 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX6 %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s
---
name: test_fpowi_s16_s32_flags
body: |
bb.0:
liveins: $vgpr0, $vgpr1
; GFX6-LABEL: name: test_fpowi_s16_s32_flags
; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
; GFX6: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
; GFX6: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[COPY1]](s32)
; GFX6: [[FLOG2_:%[0-9]+]]:_(s32) = nnan G_FLOG2 [[FPEXT]]
; GFX6: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FLOG2_]](s32), [[SITOFP]](s32)
; GFX6: [[FEXP2_:%[0-9]+]]:_(s32) = nnan G_FEXP2 [[INT]]
; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FEXP2_]](s32)
; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
; GFX6: $vgpr0 = COPY [[ANYEXT]](s32)
; GFX9-LABEL: name: test_fpowi_s16_s32_flags
; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
; GFX9: [[SITOFP:%[0-9]+]]:_(s16) = G_SITOFP [[COPY1]](s32)
; GFX9: [[FLOG2_:%[0-9]+]]:_(s16) = nnan G_FLOG2 [[TRUNC]]
; GFX9: [[FPEXT:%[0-9]+]]:_(s32) = nnan G_FPEXT [[FLOG2_]](s16)
; GFX9: [[FPEXT1:%[0-9]+]]:_(s32) = nnan G_FPEXT [[SITOFP]](s16)
; GFX9: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FPEXT]](s32), [[FPEXT1]](s32)
; GFX9: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32)
; GFX9: [[FEXP2_:%[0-9]+]]:_(s16) = nnan G_FEXP2 [[FPTRUNC]]
; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FEXP2_]](s16)
; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s16) = G_TRUNC %0
%3:_(s16) = nnan G_FPOWI %2, %1
%4:_(s32) = G_ANYEXT %3
$vgpr0 = COPY %4
...
---
name: test_fpowi_s32_s32_flags
body: |
bb.0:
liveins: $vgpr0, $vgpr1
; GFX6-LABEL: name: test_fpowi_s32_s32_flags
; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX6: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[COPY1]](s32)
; GFX6: [[FLOG2_:%[0-9]+]]:_(s32) = nnan G_FLOG2 [[COPY]]
; GFX6: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FLOG2_]](s32), [[SITOFP]](s32)
; GFX6: [[FEXP2_:%[0-9]+]]:_(s32) = nnan G_FEXP2 [[INT]]
; GFX6: $vgpr0 = COPY [[FEXP2_]](s32)
; GFX9-LABEL: name: test_fpowi_s32_s32_flags
; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX9: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[COPY1]](s32)
; GFX9: [[FLOG2_:%[0-9]+]]:_(s32) = nnan G_FLOG2 [[COPY]]
; GFX9: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FLOG2_]](s32), [[SITOFP]](s32)
; GFX9: [[FEXP2_:%[0-9]+]]:_(s32) = nnan G_FEXP2 [[INT]]
; GFX9: $vgpr0 = COPY [[FEXP2_]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = nnan G_FPOWI %0, %1
$vgpr0 = COPY %2
...

View File

@ -0,0 +1,181 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GFX7 %s
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s
define i16 @v_powi_f16(i16 %l, i32 %r) {
; GFX7-LABEL: v_powi_f16:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX7-NEXT: v_cvt_f32_i32_e32 v1, v1
; GFX7-NEXT: v_log_f32_e32 v0, v0
; GFX7-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
; GFX7-NEXT: v_exp_f32_e32 v0, v0
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_powi_f16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_cvt_f32_i32_e32 v1, v1
; GFX8-NEXT: v_log_f16_e32 v0, v0
; GFX8-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1
; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX8-NEXT: v_exp_f16_e32 v0, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
%l.cast = bitcast i16 %l to half
%res = call half @llvm.powi.f16(half %l.cast, i32 %r)
%res.cast = bitcast half %res to i16
ret i16 %res.cast
}
define float @v_powi_f32(float %l, i32 %r) {
; GCN-LABEL: v_powi_f32:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_cvt_f32_i32_e32 v1, v1
; GCN-NEXT: v_log_f32_e32 v0, v0
; GCN-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
; GCN-NEXT: v_exp_f32_e32 v0, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%res = call float @llvm.powi.f32(float %l, i32 %r)
ret float %res
}
define float @v_powi_0_f32(float %l) {
; GCN-LABEL: v_powi_0_f32:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v0, 1.0
; GCN-NEXT: s_setpc_b64 s[30:31]
%res = call float @llvm.powi.f32(float %l, i32 0)
ret float %res
}
define float @v_powi_1_f32(float %l) {
; GCN-LABEL: v_powi_1_f32:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
%res = call float @llvm.powi.f32(float %l, i32 1)
ret float %res
}
define float @v_powi_neg1_f32(float %l) {
; GCN-LABEL: v_powi_neg1_f32:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_cvt_f32_i32_e32 v1, -1
; GCN-NEXT: v_log_f32_e32 v0, v0
; GCN-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
; GCN-NEXT: v_exp_f32_e32 v0, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%res = call float @llvm.powi.f32(float %l, i32 -1)
ret float %res
}
define float @v_powi_2_f32(float %l) {
; GCN-LABEL: v_powi_2_f32:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_log_f32_e32 v0, v0
; GCN-NEXT: v_cvt_f32_ubyte0_e32 v1, 2
; GCN-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
; GCN-NEXT: v_exp_f32_e32 v0, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%res = call float @llvm.powi.f32(float %l, i32 2)
ret float %res
}
define float @v_powi_neg2_f32(float %l) {
; GCN-LABEL: v_powi_neg2_f32:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_cvt_f32_i32_e32 v1, -2
; GCN-NEXT: v_log_f32_e32 v0, v0
; GCN-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
; GCN-NEXT: v_exp_f32_e32 v0, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%res = call float @llvm.powi.f32(float %l, i32 -2)
ret float %res
}
define float @v_powi_4_f32(float %l) {
; GCN-LABEL: v_powi_4_f32:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_log_f32_e32 v0, v0
; GCN-NEXT: v_cvt_f32_ubyte0_e32 v1, 4
; GCN-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
; GCN-NEXT: v_exp_f32_e32 v0, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%res = call float @llvm.powi.f32(float %l, i32 4)
ret float %res
}
define float @v_powi_8_f32(float %l) {
; GCN-LABEL: v_powi_8_f32:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_log_f32_e32 v0, v0
; GCN-NEXT: v_cvt_f32_ubyte0_e32 v1, 8
; GCN-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
; GCN-NEXT: v_exp_f32_e32 v0, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%res = call float @llvm.powi.f32(float %l, i32 8)
ret float %res
}
define float @v_powi_16_f32(float %l) {
; GCN-LABEL: v_powi_16_f32:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_log_f32_e32 v0, v0
; GCN-NEXT: v_cvt_f32_ubyte0_e32 v1, 16
; GCN-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
; GCN-NEXT: v_exp_f32_e32 v0, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%res = call float @llvm.powi.f32(float %l, i32 16)
ret float %res
}
define float @v_powi_128_f32(float %l) {
; GCN-LABEL: v_powi_128_f32:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_log_f32_e32 v0, v0
; GCN-NEXT: v_cvt_f32_ubyte0_e32 v1, 0x80
; GCN-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
; GCN-NEXT: v_exp_f32_e32 v0, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%res = call float @llvm.powi.f32(float %l, i32 128)
ret float %res
}
define float @v_powi_neg128_f32(float %l) {
; GCN-LABEL: v_powi_neg128_f32:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_cvt_f32_i32_e32 v1, 0xffffff80
; GCN-NEXT: v_log_f32_e32 v0, v0
; GCN-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
; GCN-NEXT: v_exp_f32_e32 v0, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%res = call float @llvm.powi.f32(float %l, i32 -128)
ret float %res
}
; FIXME: f64 broken
; define double @v_powi_f64(double %l, i32 %r) {
; %res = call double @llvm.powi.f64(double %l, i32 %r)
; ret double %res
; }
declare half @llvm.powi.f16(half, i32) #0
declare float @llvm.powi.f32(float, i32) #0
declare double @llvm.powi.f64(double, i32) #0
attributes #0 = { nounwind readnone speculatable willreturn }