forked from OSchip/llvm-project
AMDGPU: Add intrinsic for s_setreg
This will be more useful with fenv access implemented.
This commit is contained in:
parent
504d8d9d8a
commit
97f3f0bab0
|
@ -44,6 +44,7 @@ BUILTIN(__builtin_amdgcn_mbcnt_lo, "UiUiUi", "nc")
|
|||
// Instruction builtins.
|
||||
//===----------------------------------------------------------------------===//
|
||||
BUILTIN(__builtin_amdgcn_s_getreg, "UiIi", "n")
|
||||
BUILTIN(__builtin_amdgcn_s_setreg, "vIiUi", "n")
|
||||
BUILTIN(__builtin_amdgcn_s_getpc, "LUi", "n")
|
||||
BUILTIN(__builtin_amdgcn_s_waitcnt, "vIi", "n")
|
||||
BUILTIN(__builtin_amdgcn_s_sendmsg, "vIiUi", "n")
|
||||
|
|
|
@ -715,6 +715,12 @@ kernel void test_mqsad_u32_u8(global uint4* out, ulong src0, uint src1, uint4 sr
|
|||
*out = __builtin_amdgcn_mqsad_u32_u8(src0, src1, src2);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_s_setreg(
|
||||
// CHECK: call void @llvm.amdgcn.s.setreg(i32 8193, i32 %val)
|
||||
kernel void test_s_setreg(uint val) {
|
||||
__builtin_amdgcn_s_setreg(8193, val);
|
||||
}
|
||||
|
||||
// CHECK-DAG: [[$WI_RANGE]] = !{i32 0, i32 1024}
|
||||
// CHECK-DAG: [[$WS_RANGE]] = !{i16 1, i16 1025}
|
||||
// CHECK-DAG: attributes #[[$NOUNWIND_READONLY:[0-9]+]] = { nounwind readonly }
|
||||
|
|
|
@ -139,3 +139,8 @@ void test_fence() {
|
|||
const char ptr[] = "workgroup";
|
||||
__builtin_amdgcn_fence(__ATOMIC_ACQUIRE, ptr); // expected-error {{expression is not a string literal}}
|
||||
}
|
||||
|
||||
void test_s_setreg(int x, int y) {
|
||||
__builtin_amdgcn_s_setreg(x, 0); // expected-error {{argument to '__builtin_amdgcn_s_setreg' must be a constant integer}}
|
||||
__builtin_amdgcn_s_setreg(x, y); // expected-error {{argument to '__builtin_amdgcn_s_setreg' must be a constant integer}}
|
||||
}
|
||||
|
|
|
@ -1207,6 +1207,16 @@ def int_amdgcn_s_getreg :
|
|||
[IntrInaccessibleMemOnly, IntrReadMem, IntrSpeculatable, ImmArg<ArgIndex<0>>]
|
||||
>;
|
||||
|
||||
// Note this can be used to set FP environment properties that are
|
||||
// unsafe to change in non-strictfp functions. The register properties
|
||||
// available (and value required to access them) may differ per
|
||||
// subtarget. llvm.amdgcn.s.setreg(hwmode, value)
|
||||
def int_amdgcn_s_setreg :
|
||||
GCCBuiltin<"__builtin_amdgcn_s_setreg">,
|
||||
Intrinsic<[], [llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>]
|
||||
>;
|
||||
|
||||
// int_amdgcn_s_getpc is provided to allow a specific style of position
|
||||
// independent code to determine the high part of its address when it is
|
||||
// known (through convention) that the code and any data of interest does
|
||||
|
|
|
@ -202,13 +202,6 @@ def AMDGPUSetCCOp : SDTypeProfile<1, 3, [ // setcc
|
|||
|
||||
def AMDGPUsetcc : SDNode<"AMDGPUISD::SETCC", AMDGPUSetCCOp>;
|
||||
|
||||
def AMDGPUSetRegOp : SDTypeProfile<0, 2, [
|
||||
SDTCisInt<0>, SDTCisInt<1>
|
||||
]>;
|
||||
|
||||
def AMDGPUsetreg : SDNode<"AMDGPUISD::SETREG", AMDGPUSetRegOp, [
|
||||
SDNPHasChain, SDNPSideEffect, SDNPOptInGlue, SDNPOutGlue]>;
|
||||
|
||||
def AMDGPUfma : SDNode<"AMDGPUISD::FMA_W_CHAIN", SDTFPTernaryOp, [
|
||||
SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
|
||||
|
||||
|
|
|
@ -2783,6 +2783,10 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
|
|||
constrainOpWithReadfirstlane(MI, MRI, 2); // M0
|
||||
return;
|
||||
}
|
||||
case Intrinsic::amdgcn_s_setreg: {
|
||||
constrainOpWithReadfirstlane(MI, MRI, 2);
|
||||
return;
|
||||
}
|
||||
default: {
|
||||
if (const AMDGPU::RsrcIntrinsic *RSrcIntrin =
|
||||
AMDGPU::lookupRsrcIntrinsic(IntrID)) {
|
||||
|
@ -3924,6 +3928,13 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
|
|||
OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
|
||||
break;
|
||||
}
|
||||
case Intrinsic::amdgcn_s_setreg: {
|
||||
// This must be an SGPR, but accept a VGPR.
|
||||
unsigned Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
|
||||
AMDGPU::SGPRRegBankID);
|
||||
OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
|
||||
break;
|
||||
}
|
||||
case Intrinsic::amdgcn_end_cf:
|
||||
case Intrinsic::amdgcn_init_exec: {
|
||||
unsigned Size = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
|
||||
|
|
|
@ -7977,32 +7977,32 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
|
|||
const unsigned Denorm32Reg = AMDGPU::Hwreg::ID_MODE |
|
||||
(4 << AMDGPU::Hwreg::OFFSET_SHIFT_) |
|
||||
(1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_);
|
||||
const SDValue BitField = DAG.getTargetConstant(Denorm32Reg, SL, MVT::i16);
|
||||
const SDValue BitField = DAG.getTargetConstant(Denorm32Reg, SL, MVT::i32);
|
||||
|
||||
const bool HasFP32Denormals = hasFP32Denormals(DAG.getMachineFunction());
|
||||
|
||||
if (!HasFP32Denormals) {
|
||||
SDVTList BindParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
|
||||
|
||||
SDValue EnableDenorm;
|
||||
SDNode *EnableDenorm;
|
||||
if (Subtarget->hasDenormModeInst()) {
|
||||
const SDValue EnableDenormValue =
|
||||
getSPDenormModeValue(FP_DENORM_FLUSH_NONE, DAG, SL, Subtarget);
|
||||
|
||||
EnableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, BindParamVTs,
|
||||
DAG.getEntryNode(), EnableDenormValue);
|
||||
DAG.getEntryNode(), EnableDenormValue).getNode();
|
||||
} else {
|
||||
const SDValue EnableDenormValue = DAG.getConstant(FP_DENORM_FLUSH_NONE,
|
||||
SL, MVT::i32);
|
||||
EnableDenorm = DAG.getNode(AMDGPUISD::SETREG, SL, BindParamVTs,
|
||||
DAG.getEntryNode(), EnableDenormValue,
|
||||
BitField);
|
||||
EnableDenorm =
|
||||
DAG.getMachineNode(AMDGPU::S_SETREG_B32, SL, BindParamVTs,
|
||||
{EnableDenormValue, BitField, DAG.getEntryNode()});
|
||||
}
|
||||
|
||||
SDValue Ops[3] = {
|
||||
NegDivScale0,
|
||||
EnableDenorm.getValue(0),
|
||||
EnableDenorm.getValue(1)
|
||||
SDValue(EnableDenorm, 0),
|
||||
SDValue(EnableDenorm, 1)
|
||||
};
|
||||
|
||||
NegDivScale0 = DAG.getMergeValues(Ops, SL);
|
||||
|
@ -8026,25 +8026,25 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
|
|||
NumeratorScaled, Fma3);
|
||||
|
||||
if (!HasFP32Denormals) {
|
||||
SDValue DisableDenorm;
|
||||
SDNode *DisableDenorm;
|
||||
if (Subtarget->hasDenormModeInst()) {
|
||||
const SDValue DisableDenormValue =
|
||||
getSPDenormModeValue(FP_DENORM_FLUSH_IN_FLUSH_OUT, DAG, SL, Subtarget);
|
||||
|
||||
DisableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, MVT::Other,
|
||||
Fma4.getValue(1), DisableDenormValue,
|
||||
Fma4.getValue(2));
|
||||
Fma4.getValue(2)).getNode();
|
||||
} else {
|
||||
const SDValue DisableDenormValue =
|
||||
DAG.getConstant(FP_DENORM_FLUSH_IN_FLUSH_OUT, SL, MVT::i32);
|
||||
|
||||
DisableDenorm = DAG.getNode(AMDGPUISD::SETREG, SL, MVT::Other,
|
||||
Fma4.getValue(1), DisableDenormValue,
|
||||
BitField, Fma4.getValue(2));
|
||||
DisableDenorm = DAG.getMachineNode(
|
||||
AMDGPU::S_SETREG_B32, SL, MVT::Other,
|
||||
{DisableDenormValue, BitField, Fma4.getValue(1), Fma4.getValue(2)});
|
||||
}
|
||||
|
||||
SDValue OutputChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other,
|
||||
DisableDenorm, DAG.getRoot());
|
||||
SDValue(DisableDenorm, 0), DAG.getRoot());
|
||||
DAG.setRoot(OutputChain);
|
||||
}
|
||||
|
||||
|
|
|
@ -1131,7 +1131,7 @@ def blgp : NamedOperandU32<"BLGP", NamedMatchClass<"BLGP">>;
|
|||
def cbsz : NamedOperandU32<"CBSZ", NamedMatchClass<"CBSZ">>;
|
||||
def abid : NamedOperandU32<"ABID", NamedMatchClass<"ABID">>;
|
||||
|
||||
def hwreg : NamedOperandU16<"Hwreg", NamedMatchClass<"Hwreg", 0>>;
|
||||
def hwreg : NamedOperandU32<"Hwreg", NamedMatchClass<"Hwreg", 0>>;
|
||||
|
||||
def exp_tgt : NamedOperandU32<"ExpTgt", NamedMatchClass<"ExpTgt", 0>> {
|
||||
|
||||
|
|
|
@ -801,13 +801,13 @@ def S_GETREG_B32 : SOPK_Pseudo <
|
|||
>;
|
||||
}
|
||||
|
||||
let hasSideEffects = 1 in {
|
||||
let hasSideEffects = 1, mayLoad = 0, mayStore =0 in {
|
||||
|
||||
def S_SETREG_B32 : SOPK_Pseudo <
|
||||
"s_setreg_b32",
|
||||
(outs), (ins SReg_32:$sdst, hwreg:$simm16),
|
||||
"$simm16, $sdst",
|
||||
[(AMDGPUsetreg i32:$sdst, (i16 timm:$simm16))]> {
|
||||
[(int_amdgcn_s_setreg (i32 timm:$simm16), i32:$sdst)]> {
|
||||
let Defs = [MODE];
|
||||
let Uses = [MODE];
|
||||
}
|
||||
|
|
|
@ -0,0 +1,59 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -global-isel -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s
|
||||
; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
|
||||
; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
|
||||
|
||||
; Set FP32 fp_round to round to zero
|
||||
define amdgpu_kernel void @test_setreg_f32_round_mode_rtz() {
|
||||
; GCN-LABEL: test_setreg_f32_round_mode_rtz:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 3), 3
|
||||
; GCN-NEXT: s_endpgm
|
||||
call void @llvm.amdgcn.s.setreg(i32 4097, i32 3)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Set FP64/FP16 fp_round to round to zero
|
||||
define amdgpu_kernel void @test_setreg_f64_round_mode_rtz() {
|
||||
; GCN-LABEL: test_setreg_f64_round_mode_rtz:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 3), 3
|
||||
; GCN-NEXT: s_endpgm
|
||||
call void @llvm.amdgcn.s.setreg(i32 4225, i32 3)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Set all fp_round to round to zero
|
||||
define amdgpu_kernel void @test_setreg_all_round_mode_rtz() {
|
||||
; GCN-LABEL: test_setreg_all_round_mode_rtz:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 5), 7
|
||||
; GCN-NEXT: s_endpgm
|
||||
call void @llvm.amdgcn.s.setreg(i32 8193, i32 7)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Set FP32 fp_round to dynamic mode
|
||||
define amdgpu_cs void @test_setreg_roundingmode_var(i32 inreg %var.mode) {
|
||||
; GCN-LABEL: test_setreg_roundingmode_var:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 3), s0
|
||||
; GCN-NEXT: s_endpgm
|
||||
call void @llvm.amdgcn.s.setreg(i32 4097, i32 %var.mode)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_setreg_roundingmode_var_vgpr(i32 %var.mode) {
|
||||
; GCN-LABEL: test_setreg_roundingmode_var_vgpr:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GCN-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 3), s4
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
call void @llvm.amdgcn.s.setreg(i32 4097, i32 %var.mode)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.amdgcn.s.setreg(i32 immarg, i32) #0
|
||||
|
||||
attributes #0 = { nounwind }
|
|
@ -0,0 +1,55 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
|
||||
; FIXME: This copy of the test is a subset of the -global-isel version, since the VGPR case doesn't work.
|
||||
|
||||
; Set FP32 fp_round to round to zero
|
||||
define amdgpu_kernel void @test_setreg_f32_round_mode_rtz() {
|
||||
; GCN-LABEL: test_setreg_f32_round_mode_rtz:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 3), 3
|
||||
; GCN-NEXT: s_endpgm
|
||||
call void @llvm.amdgcn.s.setreg(i32 4097, i32 3)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Set FP64/FP16 fp_round to round to zero
|
||||
define amdgpu_kernel void @test_setreg_f64_round_mode_rtz() {
|
||||
; GCN-LABEL: test_setreg_f64_round_mode_rtz:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 3), 3
|
||||
; GCN-NEXT: s_endpgm
|
||||
call void @llvm.amdgcn.s.setreg(i32 4225, i32 3)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Set all fp_round to round to zero
|
||||
define amdgpu_kernel void @test_setreg_all_round_mode_rtz() {
|
||||
; GCN-LABEL: test_setreg_all_round_mode_rtz:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 5), 7
|
||||
; GCN-NEXT: s_endpgm
|
||||
call void @llvm.amdgcn.s.setreg(i32 8193, i32 7)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Set FP32 fp_round to dynamic mode
|
||||
define amdgpu_cs void @test_setreg_roundingmode_var(i32 inreg %var.mode) {
|
||||
; GCN-LABEL: test_setreg_roundingmode_var:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 3), s0
|
||||
; GCN-NEXT: s_endpgm
|
||||
call void @llvm.amdgcn.s.setreg(i32 4097, i32 %var.mode)
|
||||
ret void
|
||||
}
|
||||
|
||||
; FIXME: Broken for DAG
|
||||
; define void @test_setreg_roundingmode_var_vgpr(i32 %var.mode) {
|
||||
; call void @llvm.amdgcn.s.setreg(i32 4097, i32 %var.mode)
|
||||
; ret void
|
||||
; }
|
||||
|
||||
declare void @llvm.amdgcn.s.setreg(i32 immarg, i32) #0
|
||||
|
||||
attributes #0 = { nounwind }
|
Loading…
Reference in New Issue