AMDGPU: Add intrinsic for s_setreg

This will be more useful with fenv access implemented.
This commit is contained in:
Matt Arsenault 2020-05-06 16:43:33 -04:00
parent 504d8d9d8a
commit 97f3f0bab0
11 changed files with 164 additions and 24 deletions

View File

@ -44,6 +44,7 @@ BUILTIN(__builtin_amdgcn_mbcnt_lo, "UiUiUi", "nc")
// Instruction builtins.
//===----------------------------------------------------------------------===//
BUILTIN(__builtin_amdgcn_s_getreg, "UiIi", "n")
BUILTIN(__builtin_amdgcn_s_setreg, "vIiUi", "n")
BUILTIN(__builtin_amdgcn_s_getpc, "LUi", "n")
BUILTIN(__builtin_amdgcn_s_waitcnt, "vIi", "n")
BUILTIN(__builtin_amdgcn_s_sendmsg, "vIiUi", "n")

View File

@ -715,6 +715,12 @@ kernel void test_mqsad_u32_u8(global uint4* out, ulong src0, uint src1, uint4 sr
*out = __builtin_amdgcn_mqsad_u32_u8(src0, src1, src2);
}
// CHECK-LABEL: test_s_setreg(
// CHECK: call void @llvm.amdgcn.s.setreg(i32 8193, i32 %val)
kernel void test_s_setreg(uint val) {
__builtin_amdgcn_s_setreg(8193, val);
}
// CHECK-DAG: [[$WI_RANGE]] = !{i32 0, i32 1024}
// CHECK-DAG: [[$WS_RANGE]] = !{i16 1, i16 1025}
// CHECK-DAG: attributes #[[$NOUNWIND_READONLY:[0-9]+]] = { nounwind readonly }

View File

@ -139,3 +139,8 @@ void test_fence() {
const char ptr[] = "workgroup";
__builtin_amdgcn_fence(__ATOMIC_ACQUIRE, ptr); // expected-error {{expression is not a string literal}}
}
void test_s_setreg(int x, int y) {
__builtin_amdgcn_s_setreg(x, 0); // expected-error {{argument to '__builtin_amdgcn_s_setreg' must be a constant integer}}
__builtin_amdgcn_s_setreg(x, y); // expected-error {{argument to '__builtin_amdgcn_s_setreg' must be a constant integer}}
}

View File

@ -1207,6 +1207,16 @@ def int_amdgcn_s_getreg :
[IntrInaccessibleMemOnly, IntrReadMem, IntrSpeculatable, ImmArg<ArgIndex<0>>]
>;
// Note this can be used to set FP environment properties that are
// unsafe to change in non-strictfp functions. The register properties
// available (and value required to access them) may differ per
// subtarget. llvm.amdgcn.s.setreg(hwmode, value)
def int_amdgcn_s_setreg :
GCCBuiltin<"__builtin_amdgcn_s_setreg">,
Intrinsic<[], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>]
>;
// int_amdgcn_s_getpc is provided to allow a specific style of position
// independent code to determine the high part of its address when it is
// known (through convention) that the code and any data of interest does

View File

@ -202,13 +202,6 @@ def AMDGPUSetCCOp : SDTypeProfile<1, 3, [ // setcc
def AMDGPUsetcc : SDNode<"AMDGPUISD::SETCC", AMDGPUSetCCOp>;
def AMDGPUSetRegOp : SDTypeProfile<0, 2, [
SDTCisInt<0>, SDTCisInt<1>
]>;
def AMDGPUsetreg : SDNode<"AMDGPUISD::SETREG", AMDGPUSetRegOp, [
SDNPHasChain, SDNPSideEffect, SDNPOptInGlue, SDNPOutGlue]>;
def AMDGPUfma : SDNode<"AMDGPUISD::FMA_W_CHAIN", SDTFPTernaryOp, [
SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;

View File

@ -2783,6 +2783,10 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
constrainOpWithReadfirstlane(MI, MRI, 2); // M0
return;
}
case Intrinsic::amdgcn_s_setreg: {
constrainOpWithReadfirstlane(MI, MRI, 2);
return;
}
default: {
if (const AMDGPU::RsrcIntrinsic *RSrcIntrin =
AMDGPU::lookupRsrcIntrinsic(IntrID)) {
@ -3924,6 +3928,13 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
break;
}
case Intrinsic::amdgcn_s_setreg: {
// This must be an SGPR, but accept a VGPR.
unsigned Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
AMDGPU::SGPRRegBankID);
OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
break;
}
case Intrinsic::amdgcn_end_cf:
case Intrinsic::amdgcn_init_exec: {
unsigned Size = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);

View File

@ -7977,32 +7977,32 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
const unsigned Denorm32Reg = AMDGPU::Hwreg::ID_MODE |
(4 << AMDGPU::Hwreg::OFFSET_SHIFT_) |
(1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_);
const SDValue BitField = DAG.getTargetConstant(Denorm32Reg, SL, MVT::i16);
const SDValue BitField = DAG.getTargetConstant(Denorm32Reg, SL, MVT::i32);
const bool HasFP32Denormals = hasFP32Denormals(DAG.getMachineFunction());
if (!HasFP32Denormals) {
SDVTList BindParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue EnableDenorm;
SDNode *EnableDenorm;
if (Subtarget->hasDenormModeInst()) {
const SDValue EnableDenormValue =
getSPDenormModeValue(FP_DENORM_FLUSH_NONE, DAG, SL, Subtarget);
EnableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, BindParamVTs,
DAG.getEntryNode(), EnableDenormValue);
DAG.getEntryNode(), EnableDenormValue).getNode();
} else {
const SDValue EnableDenormValue = DAG.getConstant(FP_DENORM_FLUSH_NONE,
SL, MVT::i32);
EnableDenorm = DAG.getNode(AMDGPUISD::SETREG, SL, BindParamVTs,
DAG.getEntryNode(), EnableDenormValue,
BitField);
EnableDenorm =
DAG.getMachineNode(AMDGPU::S_SETREG_B32, SL, BindParamVTs,
{EnableDenormValue, BitField, DAG.getEntryNode()});
}
SDValue Ops[3] = {
NegDivScale0,
EnableDenorm.getValue(0),
EnableDenorm.getValue(1)
SDValue(EnableDenorm, 0),
SDValue(EnableDenorm, 1)
};
NegDivScale0 = DAG.getMergeValues(Ops, SL);
@ -8026,25 +8026,25 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
NumeratorScaled, Fma3);
if (!HasFP32Denormals) {
SDValue DisableDenorm;
SDNode *DisableDenorm;
if (Subtarget->hasDenormModeInst()) {
const SDValue DisableDenormValue =
getSPDenormModeValue(FP_DENORM_FLUSH_IN_FLUSH_OUT, DAG, SL, Subtarget);
DisableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, MVT::Other,
Fma4.getValue(1), DisableDenormValue,
Fma4.getValue(2));
Fma4.getValue(2)).getNode();
} else {
const SDValue DisableDenormValue =
DAG.getConstant(FP_DENORM_FLUSH_IN_FLUSH_OUT, SL, MVT::i32);
DisableDenorm = DAG.getNode(AMDGPUISD::SETREG, SL, MVT::Other,
Fma4.getValue(1), DisableDenormValue,
BitField, Fma4.getValue(2));
DisableDenorm = DAG.getMachineNode(
AMDGPU::S_SETREG_B32, SL, MVT::Other,
{DisableDenormValue, BitField, Fma4.getValue(1), Fma4.getValue(2)});
}
SDValue OutputChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other,
DisableDenorm, DAG.getRoot());
SDValue(DisableDenorm, 0), DAG.getRoot());
DAG.setRoot(OutputChain);
}

View File

@ -1131,7 +1131,7 @@ def blgp : NamedOperandU32<"BLGP", NamedMatchClass<"BLGP">>;
def cbsz : NamedOperandU32<"CBSZ", NamedMatchClass<"CBSZ">>;
def abid : NamedOperandU32<"ABID", NamedMatchClass<"ABID">>;
def hwreg : NamedOperandU16<"Hwreg", NamedMatchClass<"Hwreg", 0>>;
def hwreg : NamedOperandU32<"Hwreg", NamedMatchClass<"Hwreg", 0>>;
def exp_tgt : NamedOperandU32<"ExpTgt", NamedMatchClass<"ExpTgt", 0>> {

View File

@ -801,13 +801,13 @@ def S_GETREG_B32 : SOPK_Pseudo <
>;
}
let hasSideEffects = 1 in {
let hasSideEffects = 1, mayLoad = 0, mayStore =0 in {
def S_SETREG_B32 : SOPK_Pseudo <
"s_setreg_b32",
(outs), (ins SReg_32:$sdst, hwreg:$simm16),
"$simm16, $sdst",
[(AMDGPUsetreg i32:$sdst, (i16 timm:$simm16))]> {
[(int_amdgcn_s_setreg (i32 timm:$simm16), i32:$sdst)]> {
let Defs = [MODE];
let Uses = [MODE];
}

View File

@ -0,0 +1,59 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s
; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
; Set FP32 fp_round to round to zero
define amdgpu_kernel void @test_setreg_f32_round_mode_rtz() {
; GCN-LABEL: test_setreg_f32_round_mode_rtz:
; GCN: ; %bb.0:
; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 3), 3
; GCN-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 4097, i32 3)
ret void
}
; Set FP64/FP16 fp_round to round to zero
define amdgpu_kernel void @test_setreg_f64_round_mode_rtz() {
; GCN-LABEL: test_setreg_f64_round_mode_rtz:
; GCN: ; %bb.0:
; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 3), 3
; GCN-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 4225, i32 3)
ret void
}
; Set all fp_round to round to zero
define amdgpu_kernel void @test_setreg_all_round_mode_rtz() {
; GCN-LABEL: test_setreg_all_round_mode_rtz:
; GCN: ; %bb.0:
; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 5), 7
; GCN-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 8193, i32 7)
ret void
}
; Set FP32 fp_round to dynamic mode
define amdgpu_cs void @test_setreg_roundingmode_var(i32 inreg %var.mode) {
; GCN-LABEL: test_setreg_roundingmode_var:
; GCN: ; %bb.0:
; GCN-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 3), s0
; GCN-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 4097, i32 %var.mode)
ret void
}
define void @test_setreg_roundingmode_var_vgpr(i32 %var.mode) {
; GCN-LABEL: test_setreg_roundingmode_var_vgpr:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_readfirstlane_b32 s4, v0
; GCN-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 3), s4
; GCN-NEXT: s_setpc_b64 s[30:31]
call void @llvm.amdgcn.s.setreg(i32 4097, i32 %var.mode)
ret void
}
declare void @llvm.amdgcn.s.setreg(i32 immarg, i32) #0
attributes #0 = { nounwind }

View File

@ -0,0 +1,55 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
; FIXME: This copy of the test is a subset of the -global-isel version, since the VGPR case doesn't work.
; Set FP32 fp_round to round to zero
define amdgpu_kernel void @test_setreg_f32_round_mode_rtz() {
; GCN-LABEL: test_setreg_f32_round_mode_rtz:
; GCN: ; %bb.0:
; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 3), 3
; GCN-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 4097, i32 3)
ret void
}
; Set FP64/FP16 fp_round to round to zero
define amdgpu_kernel void @test_setreg_f64_round_mode_rtz() {
; GCN-LABEL: test_setreg_f64_round_mode_rtz:
; GCN: ; %bb.0:
; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 3), 3
; GCN-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 4225, i32 3)
ret void
}
; Set all fp_round to round to zero
define amdgpu_kernel void @test_setreg_all_round_mode_rtz() {
; GCN-LABEL: test_setreg_all_round_mode_rtz:
; GCN: ; %bb.0:
; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 5), 7
; GCN-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 8193, i32 7)
ret void
}
; Set FP32 fp_round to dynamic mode
define amdgpu_cs void @test_setreg_roundingmode_var(i32 inreg %var.mode) {
; GCN-LABEL: test_setreg_roundingmode_var:
; GCN: ; %bb.0:
; GCN-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 3), s0
; GCN-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 4097, i32 %var.mode)
ret void
}
; FIXME: Broken for DAG
; define void @test_setreg_roundingmode_var_vgpr(i32 %var.mode) {
; call void @llvm.amdgcn.s.setreg(i32 4097, i32 %var.mode)
; ret void
; }
declare void @llvm.amdgcn.s.setreg(i32 immarg, i32) #0
attributes #0 = { nounwind }