AMDGPU: Optimize s_setreg_b32 to s_denorm_mode/s_round_mode

This is a custom inserter because it was less work than teaching
tablegen a way to indicate that it is sometimes OK to have a no side
effect instruction in the output of a side effecting pattern.

The asm is needed to look like a read of the mode register to prevent
it from being deleted. However, there seems to be a bug where the mode
register def instructions are moved across the asm sideeffect by the
post-RA scheduler.

Another oddity is the immediate is formatted differently between
s_denorm_mode and s_round_mode.
This commit is contained in:
Matt Arsenault 2020-05-28 20:55:45 -04:00
parent 4f300d4996
commit 0892a96a05
5 changed files with 171 additions and 52 deletions

View File

@ -366,6 +366,28 @@ enum Width : unsigned {
WIDTH_DEFAULT_ = WIDTH_M1_DEFAULT_ + 1,
};
enum ModeRegisterMasks : uint32_t {
FP_ROUND_MASK = 0xf << 0, // Bits 0..3
FP_DENORM_MASK = 0xf << 4, // Bits 4..7
DX10_CLAMP_MASK = 1 << 8,
IEEE_MODE_MASK = 1 << 9,
LOD_CLAMP_MASK = 1 << 10,
DEBUG_MASK = 1 << 11,
// EXCP_EN fields.
EXCP_EN_INVALID_MASK = 1 << 12,
EXCP_EN_INPUT_DENORMAL_MASK = 1 << 13,
EXCP_EN_FLOAT_DIV0_MASK = 1 << 14,
EXCP_EN_OVERFLOW_MASK = 1 << 15,
EXCP_EN_UNDERFLOW_MASK = 1 << 16,
EXCP_EN_INEXACT_MASK = 1 << 17,
EXCP_EN_INT_DIV0_MASK = 1 << 18,
GPR_IDX_EN_MASK = 1 << 27,
VSKIP_MASK = 1 << 28,
CSP_MASK = 0x7u << 29 // Bits 29..31
};
} // namespace Hwreg
namespace Swizzle { // Encoding of swizzle macro used in ds_swizzle_b32.

View File

@ -4119,6 +4119,75 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
}
return emitGWSMemViolTestLoop(MI, BB);
case AMDGPU::S_SETREG_B32: {
if (!getSubtarget()->hasDenormModeInst())
return BB;
// Try to optimize cases that only set the denormal mode or rounding mode.
//
// If the s_setreg_b32 fully sets all of the bits in the rounding mode or
// denormal mode to a constant, we can use s_round_mode or s_denorm_mode
// instead.
//
// FIXME: This could be predicates on the immediate, but tablegen doesn't
// allow you to have a no side effect instruction in the output of a
// sideeffecting pattern.
// TODO: Should also emit a no side effects pseudo if only FP bits are
// touched, even if not all of them or to a variable.
unsigned ID, Offset, Width;
AMDGPU::Hwreg::decodeHwreg(MI.getOperand(1).getImm(), ID, Offset, Width);
if (ID != AMDGPU::Hwreg::ID_MODE)
return BB;
const unsigned WidthMask = maskTrailingOnes<unsigned>(Width);
const unsigned SetMask = WidthMask << Offset;
unsigned SetDenormOp = 0;
unsigned SetRoundOp = 0;
// The dedicated instructions can only set the whole denorm or round mode at
// once, not a subset of bits in either.
if (Width == 8 && (SetMask & (AMDGPU::Hwreg::FP_ROUND_MASK |
AMDGPU::Hwreg::FP_DENORM_MASK)) == SetMask) {
// If this fully sets both the round and denorm mode, emit the two
// dedicated instructions for these.
assert(Offset == 0);
SetRoundOp = AMDGPU::S_ROUND_MODE;
SetDenormOp = AMDGPU::S_DENORM_MODE;
} else if (Width == 4) {
if ((SetMask & AMDGPU::Hwreg::FP_ROUND_MASK) == SetMask) {
SetRoundOp = AMDGPU::S_ROUND_MODE;
assert(Offset == 0);
} else if ((SetMask & AMDGPU::Hwreg::FP_DENORM_MASK) == SetMask) {
SetDenormOp = AMDGPU::S_DENORM_MODE;
assert(Offset == 4);
}
}
if (SetRoundOp || SetDenormOp) {
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
MachineInstr *Def = MRI.getVRegDef(MI.getOperand(0).getReg());
if (Def && Def->isMoveImmediate() && Def->getOperand(1).isImm()) {
unsigned ImmVal = Def->getOperand(1).getImm();
if (SetRoundOp) {
BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(SetRoundOp))
.addImm(ImmVal & 0xf);
// If we also have the denorm mode, get just the denorm mode bits.
ImmVal >>= 4;
}
if (SetDenormOp) {
BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(SetDenormOp))
.addImm(ImmVal & 0xf);
}
MI.eraseFromParent();
}
}
return BB;
}
default:
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
}

View File

@ -808,6 +808,10 @@ def S_SETREG_B32 : SOPK_Pseudo <
(outs), (ins SReg_32:$sdst, hwreg:$simm16),
"$simm16, $sdst",
[(int_amdgcn_s_setreg (i32 timm:$simm16), i32:$sdst)]> {
// Use custom inserter to optimize some cases to
// S_DENORM_MODE/S_ROUND_MODE.
let usesCustomInserter = 1;
let Defs = [MODE];
let Uses = [MODE];
}

View File

@ -309,7 +309,7 @@ define amdgpu_kernel void @test_setreg_full_round_mode_0() {
; GFX10-LABEL: test_setreg_full_round_mode_0:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
; GFX10-NEXT: s_round_mode 0x0
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@ -329,7 +329,7 @@ define amdgpu_kernel void @test_setreg_full_round_mode_1() {
; GFX10-LABEL: test_setreg_full_round_mode_1:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 1
; GFX10-NEXT: s_round_mode 0x1
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@ -349,7 +349,7 @@ define amdgpu_kernel void @test_setreg_full_round_mode_2() {
; GFX10-LABEL: test_setreg_full_round_mode_2:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 2
; GFX10-NEXT: s_round_mode 0x2
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@ -369,7 +369,7 @@ define amdgpu_kernel void @test_setreg_full_round_mode_4() {
; GFX10-LABEL: test_setreg_full_round_mode_4:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 4
; GFX10-NEXT: s_round_mode 0x4
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@ -389,7 +389,7 @@ define amdgpu_kernel void @test_setreg_full_round_mode_8() {
; GFX10-LABEL: test_setreg_full_round_mode_8:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 8
; GFX10-NEXT: s_round_mode 0x8
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@ -409,7 +409,7 @@ define amdgpu_kernel void @test_setreg_full_round_mode_15() {
; GFX10-LABEL: test_setreg_full_round_mode_15:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15
; GFX10-NEXT: s_round_mode 0xf
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@ -430,7 +430,7 @@ define amdgpu_kernel void @test_setreg_full_round_mode_42() {
; GFX10-LABEL: test_setreg_full_round_mode_42:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 42
; GFX10-NEXT: s_round_mode 0xa
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@ -450,7 +450,7 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_0() {
; GFX10-LABEL: test_setreg_full_denorm_mode_0:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 0
; GFX10-NEXT: s_denorm_mode 0
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@ -470,7 +470,7 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_1() {
; GFX10-LABEL: test_setreg_full_denorm_mode_1:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 1
; GFX10-NEXT: s_denorm_mode 1
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@ -491,7 +491,7 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_2() {
; GFX10-LABEL: test_setreg_full_denorm_mode_2:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 2
; GFX10-NEXT: s_denorm_mode 2
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@ -511,7 +511,7 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_4() {
; GFX10-LABEL: test_setreg_full_denorm_mode_4:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 4
; GFX10-NEXT: s_denorm_mode 4
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@ -531,7 +531,7 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_8() {
; GFX10-LABEL: test_setreg_full_denorm_mode_8:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 8
; GFX10-NEXT: s_denorm_mode 8
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@ -551,7 +551,7 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_15() {
; GFX10-LABEL: test_setreg_full_denorm_mode_15:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 15
; GFX10-NEXT: s_denorm_mode 15
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@ -571,7 +571,7 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_42() {
; GFX10-LABEL: test_setreg_full_denorm_mode_42:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 42
; GFX10-NEXT: s_denorm_mode 10
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@ -591,10 +591,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_0()
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_0:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_round_mode 0x0
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_denorm_mode 0
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 0)
call void asm sideeffect "", ""()
@ -611,10 +612,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_1()
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_round_mode 0x1
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 1
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_denorm_mode 0
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 1)
call void asm sideeffect "", ""()
@ -631,10 +633,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_2()
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_2:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_round_mode 0x2
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 2
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_denorm_mode 0
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 2)
call void asm sideeffect "", ""()
@ -651,10 +654,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_4()
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_4:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_round_mode 0x4
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 4
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_denorm_mode 0
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 4)
call void asm sideeffect "", ""()
@ -671,10 +675,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_8()
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_8:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_round_mode 0x8
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 8
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_denorm_mode 0
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 8)
call void asm sideeffect "", ""()
@ -691,10 +696,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_16()
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_round_mode 0x0
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 16
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_denorm_mode 1
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 16)
call void asm sideeffect "", ""()
@ -711,10 +717,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_32()
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_round_mode 0x0
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 32
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_denorm_mode 2
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 32)
call void asm sideeffect "", ""()
@ -731,10 +738,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_64()
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_round_mode 0x0
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 64
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_denorm_mode 4
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 64)
call void asm sideeffect "", ""()
@ -751,10 +759,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_128(
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_128:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_round_mode 0x0
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0x80
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_denorm_mode 8
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 128)
call void asm sideeffect "", ""()
@ -771,10 +780,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_15()
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_15:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_round_mode 0xf
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 15
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_denorm_mode 0
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 15)
call void asm sideeffect "", ""()
@ -791,10 +801,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_255(
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_255:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_round_mode 0xf
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0xff
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_denorm_mode 15
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 255)
call void asm sideeffect "", ""()
@ -812,10 +823,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_597(
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_597:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_round_mode 0x5
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0x255
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_denorm_mode 5
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 597)
call void asm sideeffect "", ""()

View File

@ -309,7 +309,7 @@ define amdgpu_kernel void @test_setreg_full_round_mode_0() {
; GFX10-LABEL: test_setreg_full_round_mode_0:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
; GFX10-NEXT: s_round_mode 0x0
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@ -329,7 +329,7 @@ define amdgpu_kernel void @test_setreg_full_round_mode_1() {
; GFX10-LABEL: test_setreg_full_round_mode_1:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 1
; GFX10-NEXT: s_round_mode 0x1
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@ -349,7 +349,7 @@ define amdgpu_kernel void @test_setreg_full_round_mode_2() {
; GFX10-LABEL: test_setreg_full_round_mode_2:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 2
; GFX10-NEXT: s_round_mode 0x2
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@ -369,7 +369,7 @@ define amdgpu_kernel void @test_setreg_full_round_mode_4() {
; GFX10-LABEL: test_setreg_full_round_mode_4:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 4
; GFX10-NEXT: s_round_mode 0x4
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@ -389,7 +389,7 @@ define amdgpu_kernel void @test_setreg_full_round_mode_8() {
; GFX10-LABEL: test_setreg_full_round_mode_8:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 8
; GFX10-NEXT: s_round_mode 0x8
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@ -409,7 +409,7 @@ define amdgpu_kernel void @test_setreg_full_round_mode_15() {
; GFX10-LABEL: test_setreg_full_round_mode_15:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15
; GFX10-NEXT: s_round_mode 0xf
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@ -430,7 +430,7 @@ define amdgpu_kernel void @test_setreg_full_round_mode_42() {
; GFX10-LABEL: test_setreg_full_round_mode_42:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 42
; GFX10-NEXT: s_round_mode 0xa
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@ -450,7 +450,7 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_0() {
; GFX10-LABEL: test_setreg_full_denorm_mode_0:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 0
; GFX10-NEXT: s_denorm_mode 0
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@ -470,7 +470,7 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_1() {
; GFX10-LABEL: test_setreg_full_denorm_mode_1:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 1
; GFX10-NEXT: s_denorm_mode 1
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@ -491,7 +491,7 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_2() {
; GFX10-LABEL: test_setreg_full_denorm_mode_2:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 2
; GFX10-NEXT: s_denorm_mode 2
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@ -511,7 +511,7 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_4() {
; GFX10-LABEL: test_setreg_full_denorm_mode_4:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 4
; GFX10-NEXT: s_denorm_mode 4
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@ -531,7 +531,7 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_8() {
; GFX10-LABEL: test_setreg_full_denorm_mode_8:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 8
; GFX10-NEXT: s_denorm_mode 8
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@ -551,7 +551,7 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_15() {
; GFX10-LABEL: test_setreg_full_denorm_mode_15:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 15
; GFX10-NEXT: s_denorm_mode 15
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@ -571,7 +571,7 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_42() {
; GFX10-LABEL: test_setreg_full_denorm_mode_42:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 42
; GFX10-NEXT: s_denorm_mode 10
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@ -591,10 +591,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_0()
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_0:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_round_mode 0x0
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_denorm_mode 0
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 0)
call void asm sideeffect "", ""()
@ -611,10 +612,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_1()
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_round_mode 0x1
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 1
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_denorm_mode 0
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 1)
call void asm sideeffect "", ""()
@ -631,10 +633,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_2()
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_2:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_round_mode 0x2
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 2
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_denorm_mode 0
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 2)
call void asm sideeffect "", ""()
@ -651,10 +654,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_4()
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_4:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_round_mode 0x4
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 4
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_denorm_mode 0
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 4)
call void asm sideeffect "", ""()
@ -671,10 +675,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_8()
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_8:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_round_mode 0x8
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 8
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_denorm_mode 0
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 8)
call void asm sideeffect "", ""()
@ -691,10 +696,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_16()
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_round_mode 0x0
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 16
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_denorm_mode 1
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 16)
call void asm sideeffect "", ""()
@ -711,10 +717,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_32()
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_round_mode 0x0
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 32
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_denorm_mode 2
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 32)
call void asm sideeffect "", ""()
@ -731,10 +738,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_64()
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_round_mode 0x0
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 64
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_denorm_mode 4
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 64)
call void asm sideeffect "", ""()
@ -751,10 +759,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_128(
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_128:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_round_mode 0x0
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0x80
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_denorm_mode 8
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 128)
call void asm sideeffect "", ""()
@ -771,10 +780,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_15()
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_15:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_round_mode 0xf
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 15
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_denorm_mode 0
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 15)
call void asm sideeffect "", ""()
@ -791,10 +801,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_255(
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_255:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_round_mode 0xf
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0xff
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_denorm_mode 15
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 255)
call void asm sideeffect "", ""()
@ -812,10 +823,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_597(
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_597:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_round_mode 0x5
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0x255
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_denorm_mode 5
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 597)
call void asm sideeffect "", ""()