forked from OSchip/llvm-project
[AMDGPU] Invert the handling of skip insertion.
The current implementation of skip insertion (SIInsertSkip) makes it a mandatory pass required for correctness. Initially, the idea was to have an optional pass. This patch inserts the s_cbranch_execz upfront during SILowerControlFlow to skip over the sections of code when no lanes are active. Later, SIRemoveShortExecBranches removes the skips for short branches, unless there is a sideeffect and the skip branch is really necessary. This new pass will replace the handling of skip insertion in the existing SIInsertSkip Pass. Differential revision: https://reviews.llvm.org/D68092
This commit is contained in:
parent
064859bde7
commit
0dc6c249bf
|
@ -156,6 +156,9 @@ extern char &SIWholeQuadModeID;
|
|||
void initializeSILowerControlFlowPass(PassRegistry &);
|
||||
extern char &SILowerControlFlowID;
|
||||
|
||||
void initializeSIRemoveShortExecBranchesPass(PassRegistry &);
|
||||
extern char &SIRemoveShortExecBranchesID;
|
||||
|
||||
void initializeSIInsertSkipsPass(PassRegistry &);
|
||||
extern char &SIInsertSkipsPassID;
|
||||
|
||||
|
|
|
@ -228,6 +228,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
|
|||
initializeSIModeRegisterPass(*PR);
|
||||
initializeSIWholeQuadModePass(*PR);
|
||||
initializeSILowerControlFlowPass(*PR);
|
||||
initializeSIRemoveShortExecBranchesPass(*PR);
|
||||
initializeSIInsertSkipsPass(*PR);
|
||||
initializeSIMemoryLegalizerPass(*PR);
|
||||
initializeSIOptimizeExecMaskingPass(*PR);
|
||||
|
@ -993,6 +994,7 @@ void GCNPassConfig::addPreEmitPass() {
|
|||
// be better for it to emit S_NOP <N> when possible.
|
||||
addPass(&PostRAHazardRecognizerID);
|
||||
|
||||
addPass(&SIRemoveShortExecBranchesID);
|
||||
addPass(&SIInsertSkipsPassID);
|
||||
addPass(&BranchRelaxationPassID);
|
||||
}
|
||||
|
|
|
@ -116,6 +116,7 @@ add_llvm_target(AMDGPUCodeGen
|
|||
SIOptimizeExecMaskingPreRA.cpp
|
||||
SIPeepholeSDWA.cpp
|
||||
SIRegisterInfo.cpp
|
||||
SIRemoveShortExecBranches.cpp
|
||||
SIShrinkInstructions.cpp
|
||||
SIWholeQuadMode.cpp
|
||||
GCNILPSched.cpp
|
||||
|
|
|
@ -41,7 +41,7 @@ using namespace llvm;
|
|||
#define DEBUG_TYPE "si-insert-skips"
|
||||
|
||||
static cl::opt<unsigned> SkipThresholdFlag(
|
||||
"amdgpu-skip-threshold",
|
||||
"amdgpu-skip-threshold-legacy",
|
||||
cl::desc("Number of instructions before jumping over divergent control flow"),
|
||||
cl::init(12), cl::Hidden);
|
||||
|
||||
|
@ -466,6 +466,9 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
|
|||
MachineInstr &MI = *I;
|
||||
|
||||
switch (MI.getOpcode()) {
|
||||
case AMDGPU::S_CBRANCH_EXECZ:
|
||||
ExecBranchStack.push_back(MI.getOperand(0).getMBB());
|
||||
break;
|
||||
case AMDGPU::SI_MASK_BRANCH:
|
||||
ExecBranchStack.push_back(MI.getOperand(0).getMBB());
|
||||
MadeChange |= skipMaskBranch(MI, MBB);
|
||||
|
|
|
@ -244,9 +244,9 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {
|
|||
BuildMI(MBB, I, DL, TII->get(MovTermOpc), Exec)
|
||||
.addReg(Tmp, RegState::Kill);
|
||||
|
||||
// Insert a pseudo terminator to help keep the verifier happy. This will also
|
||||
// be used later when inserting skips.
|
||||
MachineInstr *NewBr = BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
|
||||
// Insert the S_CBRANCH_EXECZ instruction which will be optimized later
|
||||
// during SIRemoveShortExecBranches.
|
||||
MachineInstr *NewBr = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
|
||||
.add(MI.getOperand(2));
|
||||
|
||||
if (!LIS) {
|
||||
|
@ -323,8 +323,8 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
|
|||
.addReg(DstReg);
|
||||
|
||||
MachineInstr *Branch =
|
||||
BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
|
||||
.addMBB(DestBB);
|
||||
BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
|
||||
.addMBB(DestBB);
|
||||
|
||||
if (!LIS) {
|
||||
MI.eraseFromParent();
|
||||
|
|
|
@ -0,0 +1,158 @@
|
|||
//===-- SIRemoveShortExecBranches.cpp ------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
/// This pass optmizes the s_cbranch_execz instructions.
|
||||
/// The pass removes this skip instruction for short branches,
|
||||
/// if there is no unwanted sideeffect in the fallthrough code sequence.
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
||||
#include "SIInstrInfo.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "si-remove-short-exec-branches"
|
||||
|
||||
static unsigned SkipThreshold;
|
||||
|
||||
static cl::opt<unsigned, true> SkipThresholdFlag(
|
||||
"amdgpu-skip-threshold", cl::Hidden,
|
||||
cl::desc(
|
||||
"Number of instructions before jumping over divergent control flow"),
|
||||
cl::location(SkipThreshold), cl::init(12));
|
||||
|
||||
namespace {
|
||||
|
||||
class SIRemoveShortExecBranches : public MachineFunctionPass {
|
||||
private:
|
||||
const SIInstrInfo *TII = nullptr;
|
||||
bool getBlockDestinations(MachineBasicBlock &SrcMBB,
|
||||
MachineBasicBlock *&TrueMBB,
|
||||
MachineBasicBlock *&FalseMBB,
|
||||
SmallVectorImpl<MachineOperand> &Cond);
|
||||
bool mustRetainExeczBranch(const MachineBasicBlock &From,
|
||||
const MachineBasicBlock &To) const;
|
||||
bool removeExeczBranch(MachineInstr &MI, MachineBasicBlock &SrcMBB);
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
|
||||
SIRemoveShortExecBranches() : MachineFunctionPass(ID) {
|
||||
initializeSIRemoveShortExecBranchesPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
bool runOnMachineFunction(MachineFunction &MF) override;
|
||||
};
|
||||
|
||||
} // End anonymous namespace.
|
||||
|
||||
INITIALIZE_PASS(SIRemoveShortExecBranches, DEBUG_TYPE,
|
||||
"SI remove short exec branches", false, false)
|
||||
|
||||
char SIRemoveShortExecBranches::ID = 0;
|
||||
|
||||
char &llvm::SIRemoveShortExecBranchesID = SIRemoveShortExecBranches::ID;
|
||||
|
||||
bool SIRemoveShortExecBranches::getBlockDestinations(
|
||||
MachineBasicBlock &SrcMBB, MachineBasicBlock *&TrueMBB,
|
||||
MachineBasicBlock *&FalseMBB, SmallVectorImpl<MachineOperand> &Cond) {
|
||||
if (TII->analyzeBranch(SrcMBB, TrueMBB, FalseMBB, Cond))
|
||||
return false;
|
||||
|
||||
if (!FalseMBB)
|
||||
FalseMBB = SrcMBB.getNextNode();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SIRemoveShortExecBranches::mustRetainExeczBranch(
|
||||
const MachineBasicBlock &From, const MachineBasicBlock &To) const {
|
||||
unsigned NumInstr = 0;
|
||||
const MachineFunction *MF = From.getParent();
|
||||
|
||||
for (MachineFunction::const_iterator MBBI(&From), ToI(&To), End = MF->end();
|
||||
MBBI != End && MBBI != ToI; ++MBBI) {
|
||||
const MachineBasicBlock &MBB = *MBBI;
|
||||
|
||||
for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end();
|
||||
I != E; ++I) {
|
||||
// When a uniform loop is inside non-uniform control flow, the branch
|
||||
// leaving the loop might be an S_CBRANCH_VCCNZ, which is never taken
|
||||
// when EXEC = 0. We should skip the loop lest it becomes infinite.
|
||||
if (I->getOpcode() == AMDGPU::S_CBRANCH_VCCNZ ||
|
||||
I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ)
|
||||
return true;
|
||||
|
||||
if (TII->hasUnwantedEffectsWhenEXECEmpty(*I))
|
||||
return true;
|
||||
|
||||
// These instructions are potentially expensive even if EXEC = 0.
|
||||
if (TII->isSMRD(*I) || TII->isVMEM(*I) || TII->isFLAT(*I) ||
|
||||
I->getOpcode() == AMDGPU::S_WAITCNT)
|
||||
return true;
|
||||
|
||||
++NumInstr;
|
||||
if (NumInstr >= SkipThreshold)
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Returns true if the skip branch instruction is removed.
|
||||
bool SIRemoveShortExecBranches::removeExeczBranch(MachineInstr &MI,
|
||||
MachineBasicBlock &SrcMBB) {
|
||||
MachineBasicBlock *TrueMBB = nullptr;
|
||||
MachineBasicBlock *FalseMBB = nullptr;
|
||||
SmallVector<MachineOperand, 1> Cond;
|
||||
|
||||
if (!getBlockDestinations(SrcMBB, TrueMBB, FalseMBB, Cond))
|
||||
return false;
|
||||
|
||||
// Consider only the forward branches.
|
||||
if ((SrcMBB.getNumber() >= TrueMBB->getNumber()) ||
|
||||
mustRetainExeczBranch(*FalseMBB, *TrueMBB))
|
||||
return false;
|
||||
|
||||
LLVM_DEBUG(dbgs() << "Removing the execz branch: " << MI);
|
||||
MI.eraseFromParent();
|
||||
SrcMBB.removeSuccessor(TrueMBB);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SIRemoveShortExecBranches::runOnMachineFunction(MachineFunction &MF) {
|
||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
TII = ST.getInstrInfo();
|
||||
MF.RenumberBlocks();
|
||||
bool Changed = false;
|
||||
|
||||
for (MachineBasicBlock &MBB : MF) {
|
||||
MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
|
||||
if (MBBI == MBB.end())
|
||||
continue;
|
||||
|
||||
MachineInstr &MI = *MBBI;
|
||||
switch (MI.getOpcode()) {
|
||||
case AMDGPU::S_CBRANCH_EXECZ:
|
||||
Changed = removeExeczBranch(MI, MBB);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return Changed;
|
||||
}
|
|
@ -10,9 +10,8 @@ define i32 @divergent_if_swap_brtarget_order0(i32 %value) {
|
|||
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr0
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CHECK-NEXT: ; mask branch BB0_2
|
||||
; CHECK-NEXT: s_cbranch_execz BB0_2
|
||||
; CHECK-NEXT: BB0_1: ; %if.true
|
||||
; CHECK-NEXT: ; %bb.1: ; %if.true
|
||||
; CHECK-NEXT: global_load_dword v0, v[0:1], off
|
||||
; CHECK-NEXT: BB0_2: ; %endif
|
||||
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
|
@ -38,12 +37,10 @@ define i32 @divergent_if_swap_brtarget_order1(i32 %value) {
|
|||
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr0
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CHECK-NEXT: ; mask branch BB1_2
|
||||
; CHECK-NEXT: BB1_1: ; %endif
|
||||
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
; CHECK-NEXT: BB1_2: ; %if.true
|
||||
; CHECK-NEXT: s_cbranch_execnz BB1_2
|
||||
; CHECK-NEXT: ; %bb.1: ; %if.true
|
||||
; CHECK-NEXT: global_load_dword v0, v[0:1], off
|
||||
; CHECK-NEXT: BB1_2: ; %endif
|
||||
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -11,7 +11,7 @@ declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1)
|
|||
; Show that what the atomic optimization pass will do for raw buffers.
|
||||
|
||||
; GCN-LABEL: add_i32_constant:
|
||||
; GCN-LABEL: BB0_1:
|
||||
; %bb.{{[0-9]+}}:
|
||||
; GCN32: v_cmp_ne_u32_e64 s[[exec_lo:[0-9]+]], 1, 0
|
||||
; GCN64: v_cmp_ne_u32_e64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, 1, 0
|
||||
; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt:[0-9]+]], s[[exec_lo]], 0
|
||||
|
|
|
@ -14,12 +14,11 @@
|
|||
; GCN-DAG: v_cmp_lt_f32_e32 vcc,
|
||||
; GCN: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], vcc, [[OTHERCC]]
|
||||
; GCN: s_and_saveexec_b64 [[SAVED:s\[[0-9]+:[0-9]+\]]], [[AND]]
|
||||
; GCN: ; mask branch [[BB5:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN-NEXT: BB{{[0-9]+_[0-9]+}}: ; %bb4
|
||||
; GCN-NEXT: ; %bb.{{[0-9]+}}: ; %bb4
|
||||
; GCN: ds_write_b32
|
||||
|
||||
; GCN: [[BB5]]
|
||||
; GCN: ; %bb.{{[0-9]+}}:
|
||||
; GCN-NEXT: s_endpgm
|
||||
; GCN-NEXT: .Lfunc_end
|
||||
define amdgpu_ps void @ham(float %arg, float %arg1) #0 {
|
||||
|
|
|
@ -389,7 +389,6 @@ bb3:
|
|||
; GCN-LABEL: {{^}}uniform_inside_divergent:
|
||||
; GCN: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}}
|
||||
; GCN-NEXT: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc
|
||||
; GCN-NEXT: ; mask branch [[ENDIF:BB[0-9]+_[0-9]+]]
|
||||
; GCN-NEXT: s_cbranch_execnz [[IF:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN-NEXT: [[LONGBB:BB[0-9]+_[0-9]+]]: ; %entry
|
||||
|
@ -401,7 +400,7 @@ bb3:
|
|||
; GCN-NEXT: [[IF]]: ; %if
|
||||
; GCN: buffer_store_dword
|
||||
; GCN: s_cmp_lg_u32
|
||||
; GCN: s_cbranch_scc1 [[ENDIF]]
|
||||
; GCN: s_cbranch_scc1 [[ENDIF:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN-NEXT: ; %bb.2: ; %if_uniform
|
||||
; GCN: buffer_store_dword
|
||||
|
@ -438,12 +437,10 @@ endif:
|
|||
; GCN: v_cmp_nlt_f32_e32 vcc
|
||||
; GCN-NEXT: s_and_saveexec_b64 [[TEMP_MASK:s\[[0-9]+:[0-9]+\]]], vcc
|
||||
; GCN-NEXT: s_xor_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], exec, [[TEMP_MASK]]
|
||||
; GCN-NEXT: ; mask branch [[FLOW:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN: [[FLOW]]: ; %Flow
|
||||
; GCN: BB{{[0-9]+_[0-9]+}}: ; %Flow
|
||||
; GCN-NEXT: s_or_saveexec_b64 [[TEMP_MASK1:s\[[0-9]+:[0-9]+\]]], [[MASK]]
|
||||
; GCN-NEXT: s_xor_b64 exec, exec, [[TEMP_MASK1]]
|
||||
; GCN-NEXT: ; mask branch [[RET:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN: [[LOOP_BODY:BB[0-9]+_[0-9]+]]: ; %loop{{$}}
|
||||
; GCN: ;;#ASMSTART
|
||||
|
@ -454,7 +451,7 @@ endif:
|
|||
; GCN: v_nop_e64
|
||||
; GCN: v_nop_e64
|
||||
; GCN: ;;#ASMEND
|
||||
; GCN: s_cbranch_vccz [[RET]]
|
||||
; GCN: s_cbranch_vccz [[RET:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN-NEXT: [[LONGBB:BB[0-9]+_[0-9]+]]: ; %loop
|
||||
; GCN-NEXT: ; in Loop: Header=[[LOOP_BODY]] Depth=1
|
||||
|
|
|
@ -8,8 +8,7 @@ define hidden void @func() #1 {
|
|||
|
||||
; GCN-LABEL: {{^}}if_call:
|
||||
; GCN: s_and_saveexec_b64
|
||||
; GCN-NEXT: ; mask branch [[END:BB[0-9]+_[0-9]+]]
|
||||
; GCN-NEXT: s_cbranch_execz [[END]]
|
||||
; GCN-NEXT: s_cbranch_execz [[END:BB[0-9]+_[0-9]+]]
|
||||
; GCN: s_swappc_b64
|
||||
; GCN: [[END]]:
|
||||
define void @if_call(i32 %flag) #0 {
|
||||
|
@ -26,8 +25,7 @@ end:
|
|||
|
||||
; GCN-LABEL: {{^}}if_asm:
|
||||
; GCN: s_and_saveexec_b64
|
||||
; GCN-NEXT: ; mask branch [[END:BB[0-9]+_[0-9]+]]
|
||||
; GCN-NEXT: s_cbranch_execz [[END]]
|
||||
; GCN-NEXT: s_cbranch_execz [[END:BB[0-9]+_[0-9]+]]
|
||||
; GCN: ; sample asm
|
||||
; GCN: [[END]]:
|
||||
define void @if_asm(i32 %flag) #0 {
|
||||
|
@ -44,8 +42,7 @@ end:
|
|||
|
||||
; GCN-LABEL: {{^}}if_call_kernel:
|
||||
; GCN: s_and_saveexec_b64
|
||||
; GCN-NEXT: ; mask branch [[END:BB[0-9]+_[0-9]+]]
|
||||
; GCN-NEXT: s_cbranch_execz [[END]]
|
||||
; GCN-NEXT: s_cbranch_execz BB3_2
|
||||
; GCN: s_swappc_b64
|
||||
define amdgpu_kernel void @if_call_kernel() #0 {
|
||||
%id = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
|
|
|
@ -3,12 +3,10 @@
|
|||
|
||||
; ALL-LABEL: {{^}}simple_nested_if:
|
||||
; GCN: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]]
|
||||
; GCN-NEXT: ; mask branch [[ENDIF:BB[0-9_]+]]
|
||||
; GCN-NEXT: s_cbranch_execz [[ENDIF]]
|
||||
; GCN-NEXT: s_cbranch_execz [[ENDIF:BB[0-9_]+]]
|
||||
; GCN: s_and_b64 exec, exec, vcc
|
||||
; GCN-NEXT: ; mask branch [[ENDIF]]
|
||||
; GCN-NEXT: s_cbranch_execz [[ENDIF]]
|
||||
; GCN-NEXT: {{^BB[0-9_]+}}:
|
||||
; GCN-NEXT: ; %bb.{{[0-9]+}}:
|
||||
; GCN: store_dword
|
||||
; GCN-NEXT: {{^}}[[ENDIF]]:
|
||||
; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC]]
|
||||
|
@ -43,12 +41,10 @@ bb.outer.end: ; preds = %bb.outer.then, %bb.
|
|||
|
||||
; ALL-LABEL: {{^}}uncollapsable_nested_if:
|
||||
; GCN: s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]]
|
||||
; GCN-NEXT: ; mask branch [[ENDIF_OUTER:BB[0-9_]+]]
|
||||
; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER]]
|
||||
; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER:BB[0-9_]+]]
|
||||
; GCN: s_and_saveexec_b64 [[SAVEEXEC_INNER:s\[[0-9:]+\]]]
|
||||
; GCN-NEXT: ; mask branch [[ENDIF_INNER:BB[0-9_]+]]
|
||||
; GCN-NEXT: s_cbranch_execz [[ENDIF_INNER]]
|
||||
; GCN-NEXT: {{^BB[0-9_]+}}:
|
||||
; GCN-NEXT: s_cbranch_execz [[ENDIF_INNER:BB[0-9_]+]]
|
||||
; GCN-NEXT: ; %bb.{{[0-9]+}}:
|
||||
; GCN: store_dword
|
||||
; GCN-NEXT: {{^}}[[ENDIF_INNER]]:
|
||||
; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_INNER]]
|
||||
|
@ -88,18 +84,16 @@ bb.outer.end: ; preds = %bb.inner.then, %bb
|
|||
|
||||
; ALL-LABEL: {{^}}nested_if_if_else:
|
||||
; GCN: s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]]
|
||||
; GCN-NEXT: ; mask branch [[ENDIF_OUTER:BB[0-9_]+]]
|
||||
; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER]]
|
||||
; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER:BB[0-9_]+]]
|
||||
; GCN: s_and_saveexec_b64 [[SAVEEXEC_INNER:s\[[0-9:]+\]]]
|
||||
; GCN-NEXT: s_xor_b64 [[SAVEEXEC_INNER2:s\[[0-9:]+\]]], exec, [[SAVEEXEC_INNER]]
|
||||
; GCN-NEXT: ; mask branch [[THEN_INNER:BB[0-9_]+]]
|
||||
; GCN-NEXT: s_cbranch_execz [[THEN_INNER]]
|
||||
; GCN-NEXT: {{^BB[0-9_]+}}:
|
||||
; GCN-NEXT: s_cbranch_execz [[THEN_INNER:BB[0-9_]+]]
|
||||
; GCN-NEXT: ; %bb.{{[0-9]+}}:
|
||||
; GCN: store_dword
|
||||
; GCN-NEXT: {{^}}[[THEN_INNER]]:
|
||||
; GCN-NEXT: s_or_saveexec_b64 [[SAVEEXEC_INNER3:s\[[0-9:]+\]]], [[SAVEEXEC_INNER2]]
|
||||
; GCN-NEXT: s_xor_b64 exec, exec, [[SAVEEXEC_INNER3]]
|
||||
; GCN-NEXT: ; mask branch [[ENDIF_OUTER]]
|
||||
; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER]]
|
||||
; GCN: store_dword
|
||||
; GCN-NEXT: {{^}}[[ENDIF_OUTER]]:
|
||||
; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_OUTER]]
|
||||
|
@ -137,28 +131,24 @@ bb.outer.end: ; preds = %bb, %bb.then, %b
|
|||
; ALL-LABEL: {{^}}nested_if_else_if:
|
||||
; GCN: s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]]
|
||||
; GCN-NEXT: s_xor_b64 [[SAVEEXEC_OUTER2:s\[[0-9:]+\]]], exec, [[SAVEEXEC_OUTER]]
|
||||
; GCN-NEXT: ; mask branch [[THEN_OUTER:BB[0-9_]+]]
|
||||
; GCN-NEXT: s_cbranch_execz [[THEN_OUTER]]
|
||||
; GCN-NEXT: {{^BB[0-9_]+}}:
|
||||
; GCN-NEXT: s_cbranch_execz [[THEN_OUTER:BB[0-9_]+]]
|
||||
; GCN-NEXT: ; %bb.{{[0-9]+}}:
|
||||
; GCN: store_dword
|
||||
; GCN-NEXT: s_and_saveexec_b64 [[SAVEEXEC_INNER_IF_OUTER_ELSE:s\[[0-9:]+\]]]
|
||||
; GCN-NEXT: ; mask branch [[THEN_OUTER_FLOW:BB[0-9_]+]]
|
||||
; GCN-NEXT: s_cbranch_execz [[THEN_OUTER_FLOW]]
|
||||
; GCN-NEXT: {{^BB[0-9_]+}}:
|
||||
; GCN-NEXT: s_cbranch_execz [[THEN_OUTER_FLOW:BB[0-9_]+]]
|
||||
; GCN-NEXT: ; %bb.{{[0-9]+}}:
|
||||
; GCN: store_dword
|
||||
; GCN-NEXT: {{^}}[[THEN_OUTER_FLOW]]:
|
||||
; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_INNER_IF_OUTER_ELSE]]
|
||||
; GCN-NEXT: {{^}}[[THEN_OUTER]]:
|
||||
; GCN-NEXT: s_or_saveexec_b64 [[SAVEEXEC_OUTER3:s\[[0-9:]+\]]], [[SAVEEXEC_OUTER2]]
|
||||
; GCN-NEXT: s_xor_b64 exec, exec, [[SAVEEXEC_OUTER3]]
|
||||
; GCN-NEXT: ; mask branch [[ENDIF_OUTER:BB[0-9_]+]]
|
||||
; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER]]
|
||||
; GCN-NEXT: {{^BB[0-9_]+}}:
|
||||
; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER:BB[0-9_]+]]
|
||||
; GCN-NEXT: ; %bb.{{[0-9]+}}:
|
||||
; GCN: store_dword
|
||||
; GCN-NEXT: s_and_saveexec_b64 [[SAVEEXEC_INNER_IF_OUTER_THEN:s\[[0-9:]+\]]]
|
||||
; GCN-NEXT: ; mask branch [[FLOW1:BB[0-9_]+]]
|
||||
; GCN-NEXT: s_cbranch_execz [[FLOW1]]
|
||||
; GCN-NEXT: {{^BB[0-9_]+}}:
|
||||
; GCN-NEXT: s_cbranch_execz [[FLOW1:BB[0-9_]+]]
|
||||
; GCN-NEXT: ; %bb.{{[0-9]+}}:
|
||||
; GCN: store_dword
|
||||
; GCN-NEXT: [[FLOW1]]:
|
||||
; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_INNER_IF_OUTER_THEN]]
|
||||
|
@ -203,9 +193,8 @@ bb.outer.end:
|
|||
|
||||
; ALL-LABEL: {{^}}s_endpgm_unsafe_barrier:
|
||||
; GCN: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]]
|
||||
; GCN-NEXT: ; mask branch [[ENDIF:BB[0-9_]+]]
|
||||
; GCN-NEXT: s_cbranch_execz [[ENDIF]]
|
||||
; GCN-NEXT: {{^BB[0-9_]+}}:
|
||||
; GCN-NEXT: s_cbranch_execz [[ENDIF:BB[0-9_]+]]
|
||||
; GCN-NEXT: ; %bb.{{[0-9]+}}:
|
||||
; GCN: store_dword
|
||||
; GCN-NEXT: {{^}}[[ENDIF]]:
|
||||
; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC]]
|
||||
|
|
|
@ -35,9 +35,9 @@
|
|||
|
||||
; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}
|
||||
|
||||
; GCN: mask branch [[ENDIF:BB[0-9]+_[0-9]+]]
|
||||
; GCN: s_cbranch_execz [[ENDIF:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN: {{^}}BB{{[0-9]+}}_1: ; %if
|
||||
; GCN: ; %bb.{{[0-9]+}}: ; %if
|
||||
; GCN: s_mov_b32 m0, -1
|
||||
; GCN: ds_read_b32 [[LOAD1:v[0-9]+]]
|
||||
; GCN: buffer_load_dword [[RELOAD_LOAD0:v[0-9]+]], off, s[0:3], s7 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload
|
||||
|
@ -116,8 +116,7 @@ endif:
|
|||
|
||||
; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}
|
||||
|
||||
; GCN-NEXT: ; mask branch [[END:BB[0-9]+_[0-9]+]]
|
||||
; GCN-NEXT: s_cbranch_execz [[END]]
|
||||
; GCN-NEXT: s_cbranch_execz [[END:BB[0-9]+_[0-9]+]]
|
||||
|
||||
|
||||
; GCN: [[LOOP:BB[0-9]+_[0-9]+]]:
|
||||
|
@ -194,8 +193,7 @@ end:
|
|||
; GCN: s_mov_b64 exec, [[CMP0]]
|
||||
|
||||
; FIXME: It makes no sense to put this skip here
|
||||
; GCN-NEXT: ; mask branch [[FLOW:BB[0-9]+_[0-9]+]]
|
||||
; GCN: s_cbranch_execz [[FLOW]]
|
||||
; GCN: s_cbranch_execz [[FLOW:BB[0-9]+_[0-9]+]]
|
||||
; GCN-NEXT: s_branch [[ELSE:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN: [[FLOW]]: ; %Flow
|
||||
|
@ -229,11 +227,10 @@ end:
|
|||
|
||||
; GCN: buffer_store_dword [[FLOW_VAL]], off, s[0:3], s7 offset:[[RESULT_OFFSET:[0-9]+]] ; 4-byte Folded Spill
|
||||
; GCN: s_xor_b64 exec, exec, s{{\[}}[[FLOW_S_RELOAD_SAVEEXEC_LO]]:[[FLOW_S_RELOAD_SAVEEXEC_HI]]{{\]}}
|
||||
; GCN-NEXT: ; mask branch [[ENDIF:BB[0-9]+_[0-9]+]]
|
||||
; GCN-NEXT: s_cbranch_execz [[ENDIF]]
|
||||
; GCN-NEXT: s_cbranch_execz [[ENDIF:BB[0-9]+_[0-9]+]]
|
||||
|
||||
|
||||
; GCN: BB{{[0-9]+}}_2: ; %if
|
||||
; GCN: ; %bb.{{[0-9]+}}: ; %if
|
||||
; GCN: ds_read_b32
|
||||
; GCN: buffer_load_dword v[[LOAD0_RELOAD:[0-9]+]], off, s[0:3], s7 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload
|
||||
; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, v{{[0-9]+}}, v[[LOAD0_RELOAD]]
|
||||
|
|
|
@ -4,8 +4,8 @@ declare i32 @llvm.amdgcn.workitem.id.x() #0
|
|||
; GCN-LABEL: {{^}}convergent_inlineasm:
|
||||
; GCN: %bb.0:
|
||||
; GCN: v_cmp_ne_u32_e64
|
||||
; GCN: ; mask branch
|
||||
; GCN: BB{{[0-9]+_[0-9]+}}:
|
||||
; GCN: s_cbranch_execz
|
||||
; GCN: ; %bb.{{[0-9]+}}:
|
||||
define amdgpu_kernel void @convergent_inlineasm(i64 addrspace(1)* nocapture %arg) {
|
||||
bb:
|
||||
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
|
@ -23,9 +23,9 @@ bb5: ; preds = %bb3, %bb
|
|||
}
|
||||
|
||||
; GCN-LABEL: {{^}}nonconvergent_inlineasm:
|
||||
; GCN: ; mask branch
|
||||
; GCN: s_cbranch_execz
|
||||
|
||||
; GCN: BB{{[0-9]+_[0-9]+}}:
|
||||
; GCN: ; %bb.{{[0-9]+}}:
|
||||
; GCN: v_cmp_ne_u32_e64
|
||||
|
||||
; GCN: BB{{[0-9]+_[0-9]+}}:
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
; CHECK: s_mov_b32 [[SREG:s[0-9]+]], 1.0
|
||||
; CHECK: %bb.1:
|
||||
; CHECK-NOT: v_mov_b32_e32 {{v[0-9]+}}, 1.0
|
||||
; CHECK: BB0_4:
|
||||
; CHECK: BB0_3:
|
||||
; CHECK: v_mov_b32_e32 v{{[0-9]+}}, [[SREG]]
|
||||
|
||||
define amdgpu_ps void @mov_opt(i32 %arg, i32 inreg %arg1, i32 inreg %arg2) local_unnamed_addr #0 {
|
||||
|
|
|
@ -32,7 +32,6 @@ define amdgpu_ps void @main(i32, float) {
|
|||
; CHECK-NEXT: s_and_b64 s[8:9], s[8:9], exec
|
||||
; CHECK-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
|
||||
; CHECK-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; CHECK-NEXT: s_cbranch_execz BB0_6
|
||||
; CHECK-NEXT: BB0_3: ; %loop
|
||||
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: s_or_b64 s[6:7], s[6:7], exec
|
||||
|
@ -44,21 +43,19 @@ define amdgpu_ps void @main(i32, float) {
|
|||
; CHECK-NEXT: s_mov_b64 s[6:7], -1
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[8:9], vcc
|
||||
; CHECK-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
|
||||
; CHECK-NEXT: ; mask branch BB0_1
|
||||
; CHECK-NEXT: s_cbranch_execz BB0_1
|
||||
; CHECK-NEXT: BB0_5: ; %endif2
|
||||
; CHECK-NEXT: ; %bb.5: ; %endif2
|
||||
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
|
||||
; CHECK-NEXT: s_add_i32 s0, s0, 1
|
||||
; CHECK-NEXT: s_xor_b64 s[6:7], exec, -1
|
||||
; CHECK-NEXT: s_branch BB0_1
|
||||
; CHECK-NEXT: BB0_6: ; %Flow2
|
||||
; CHECK-NEXT: ; %bb.6: ; %Flow2
|
||||
; CHECK-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 0
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[0:1], s[4:5]
|
||||
; CHECK-NEXT: ; mask branch BB0_8
|
||||
; CHECK-NEXT: BB0_7: ; %if1
|
||||
; CHECK-NEXT: ; %bb.7: ; %if1
|
||||
; CHECK-NEXT: v_sqrt_f32_e32 v1, v0
|
||||
; CHECK-NEXT: BB0_8: ; %endloop
|
||||
; CHECK-NEXT: ; %bb.8: ; %endloop
|
||||
; CHECK-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; CHECK-NEXT: exp mrt0 v1, v1, v1, v1 done vm
|
||||
; CHECK-NEXT: s_endpgm
|
||||
|
|
|
@ -5,7 +5,6 @@
|
|||
; CHECK: ; %Flow
|
||||
; CHECK-NEXT: s_or_saveexec_b64 [[DST:s\[[0-9]+:[0-9]+\]]],
|
||||
; CHECK-NEXT: s_xor_b64 exec, exec, [[DST]]
|
||||
; CHECK-NEXT: ; mask branch
|
||||
define amdgpu_ps float @else_no_execfix(i32 %z, float %v) #0 {
|
||||
main_body:
|
||||
%cc = icmp sgt i32 %z, 5
|
||||
|
@ -32,7 +31,7 @@ end:
|
|||
; CHECK-NEXT: s_and_b64 exec, exec, [[INIT_EXEC]]
|
||||
; CHECK-NEXT: s_and_b64 [[AND_INIT:s\[[0-9]+:[0-9]+\]]], exec, [[DST]]
|
||||
; CHECK-NEXT: s_xor_b64 exec, exec, [[AND_INIT]]
|
||||
; CHECK-NEXT: ; mask branch
|
||||
; CHECK-NEXT: s_cbranch_execz
|
||||
define amdgpu_ps void @else_execfix_leave_wqm(i32 %z, float %v) #0 {
|
||||
main_body:
|
||||
%cc = icmp sgt i32 %z, 5
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
; CHECK-NOT: v_cmp
|
||||
; CHECK_NOT: v_cndmask
|
||||
; CHECK: s_and_saveexec_b64 s[{{[[0-9]+:[0-9]+}}], [[COND]]
|
||||
; CHECK: BB0_2:
|
||||
; CHECK: ; %bb.2:
|
||||
|
||||
define amdgpu_kernel void @hoist_cond(float addrspace(1)* nocapture %arg, float addrspace(1)* noalias nocapture readonly %arg1, i32 %arg3, i32 %arg4) {
|
||||
bb:
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=polaris10 -run-pass si-insert-skips -amdgpu-skip-threshold=1 -verify-machineinstrs %s -o - | FileCheck %s
|
||||
# RUN: llc -march=amdgcn -mcpu=polaris10 -run-pass si-insert-skips -amdgpu-skip-threshold-legacy=1 -verify-machineinstrs %s -o - | FileCheck %s
|
||||
|
||||
---
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-insert-skips -amdgpu-skip-threshold=1 -verify-machineinstrs %s -o - | FileCheck %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-insert-skips -amdgpu-skip-threshold-legacy=1 -verify-machineinstrs %s -o - | FileCheck %s
|
||||
# Make sure mandatory skips are inserted to ensure GWS ops aren't run with exec = 0
|
||||
|
||||
---
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass si-insert-skips -amdgpu-skip-threshold=2 %s -o - | FileCheck %s
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass si-insert-skips -amdgpu-skip-threshold-legacy=2 %s -o - | FileCheck %s
|
||||
|
||||
---
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=polaris10 -run-pass si-insert-skips -amdgpu-skip-threshold=1 %s -o - | FileCheck %s
|
||||
# RUN: llc -march=amdgcn -mcpu=polaris10 -run-pass si-insert-skips -amdgpu-skip-threshold-legacy=1 %s -o - | FileCheck %s
|
||||
# https://bugs.freedesktop.org/show_bug.cgi?id=99019
|
||||
--- |
|
||||
define amdgpu_ps void @kill_uncond_branch() {
|
||||
|
|
|
@ -158,7 +158,7 @@ entry:
|
|||
; W64: s_mov_b64 exec, [[SAVEEXEC]]
|
||||
; W64: s_cbranch_execz [[TERMBB:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; W64: BB{{[0-9]+_[0-9]+}}:
|
||||
; W64: ; %bb.{{[0-9]+}}:
|
||||
; W64-DAG: v_mov_b32_e32 [[IDX:v[0-9]+]], s4
|
||||
; W64-DAG: s_mov_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], exec
|
||||
|
||||
|
@ -204,7 +204,7 @@ entry:
|
|||
; W32: s_mov_b32 exec_lo, [[SAVEEXEC]]
|
||||
; W32: s_cbranch_execz [[TERMBB:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; W32: BB{{[0-9]+_[0-9]+}}:
|
||||
; W32: ; %bb.{{[0-9]+}}:
|
||||
; W32-DAG: v_mov_b32_e32 [[IDX:v[0-9]+]], s4
|
||||
; W32-DAG: s_mov_b32 [[SAVEEXEC:s[0-9]+]], exec_lo
|
||||
|
||||
|
@ -270,7 +270,7 @@ entry:
|
|||
; W64-O0: buffer_store_dword [[RES]], off, s[0:3], s32 offset:[[RES_OFF:[0-9]+]] ; 4-byte Folded Spill
|
||||
; W64-O0: s_cbranch_execz [[TERMBB:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; W64-O0: BB{{[0-9]+_[0-9]+}}:
|
||||
; W64-O0: ; %bb.{{[0-9]+}}:
|
||||
; W64-O0-DAG: s_mov_b64 s{{\[}}[[SAVEEXEC0:[0-9]+]]:[[SAVEEXEC1:[0-9]+]]{{\]}}, exec
|
||||
; W64-O0-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s32 offset:[[IDX_OFF:[0-9]+]] ; 4-byte Folded Spill
|
||||
; W64-O0: v_writelane_b32 [[VSAVEEXEC:v[0-9]+]], s[[SAVEEXEC0]], [[SAVEEXEC_IDX0:[0-9]+]]
|
||||
|
|
|
@ -58,9 +58,8 @@ define void @lsr_order_mul24_1(i32 %arg, i32 %arg1, i32 %arg2, float addrspace(3
|
|||
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v5
|
||||
; GFX9-NEXT: v_cmp_lt_u32_e64 s[4:5], v0, v1
|
||||
; GFX9-NEXT: s_and_saveexec_b64 s[10:11], s[4:5]
|
||||
; GFX9-NEXT: ; mask branch BB1_4
|
||||
; GFX9-NEXT: s_cbranch_execz BB1_4
|
||||
; GFX9-NEXT: BB1_1: ; %bb19
|
||||
; GFX9-NEXT: ; %bb.1: ; %bb19
|
||||
; GFX9-NEXT: v_cvt_f32_u32_e32 v7, v6
|
||||
; GFX9-NEXT: v_and_b32_e32 v5, 0xffffff, v6
|
||||
; GFX9-NEXT: v_add_u32_e32 v6, v4, v0
|
||||
|
|
|
@ -11,12 +11,11 @@
|
|||
; GCN-NEXT: ; %else
|
||||
|
||||
; GCN: s_and_saveexec_b64 [[SAVE_EXEC:s\[[0-9]+:[0-9]+\]]], vcc
|
||||
; GCN-NEXT: ; mask branch [[FLOW:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN: BB{{[0-9]+_[0-9]+}}: ; %unreachable.bb
|
||||
; GCN: ; %bb.{{[0-9]+}}: ; %unreachable.bb
|
||||
; GCN-NEXT: ; divergent unreachable
|
||||
|
||||
; GCN-NEXT: {{^}}[[FLOW]]: ; %Flow
|
||||
; GCN-NEXT: ; %bb.{{[0-9]+}}: ; %Flow
|
||||
; GCN-NEXT: s_or_b64 exec, exec
|
||||
|
||||
; GCN-NEXT: [[RET_BB]]:
|
||||
|
@ -55,11 +54,17 @@ ret.bb: ; preds = %else, %main_body
|
|||
}
|
||||
|
||||
; GCN-LABEL: {{^}}uniform_br_nontrivial_ret_divergent_br_nontrivial_unreachable:
|
||||
; GCN: s_cbranch_vccnz [[RET_BB:BB[0-9]+_[0-9]+]]
|
||||
; GCN: s_cbranch_vccz
|
||||
|
||||
; GCN: ; %bb.{{[0-9]+}}: ; %else
|
||||
; GCN: ; %bb.{{[0-9]+}}: ; %Flow
|
||||
; GCN: s_cbranch_execnz [[RETURN:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN: ; %UnifiedReturnBlock
|
||||
; GCN-NEXT: s_or_b64 exec, exec
|
||||
; GCN-NEXT: s_waitcnt
|
||||
|
||||
; GCN: BB{{[0-9]+_[0-9]+}}: ; %else
|
||||
; GCN: s_and_saveexec_b64 [[SAVE_EXEC:s\[[0-9]+:[0-9]+\]]], vcc
|
||||
; GCN-NEXT: ; mask branch [[FLOW1:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN-NEXT: ; %unreachable.bb
|
||||
; GCN: ds_write_b32
|
||||
|
@ -67,12 +72,6 @@ ret.bb: ; preds = %else, %main_body
|
|||
|
||||
; GCN: ; %ret.bb
|
||||
; GCN: store_dword
|
||||
|
||||
; GCN: ; %UnifiedReturnBlock
|
||||
; GCN-NEXT: s_or_b64 exec, exec
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: ; return
|
||||
; GCN-NEXT: .Lfunc_end
|
||||
define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @uniform_br_nontrivial_ret_divergent_br_nontrivial_unreachable([9 x <4 x i32>] addrspace(4)* inreg %arg, [17 x <4 x i32>] addrspace(4)* inreg %arg1, [17 x <8 x i32>] addrspace(4)* inreg %arg2, i32 addrspace(4)* inreg %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, i32 inreg %arg18, i32 %arg19, float %arg20, i32 %arg21) #0 {
|
||||
main_body:
|
||||
%i.i = extractelement <2 x i32> %arg7, i32 0
|
||||
|
|
|
@ -40,8 +40,6 @@ bb5: ; preds = %bb3, %bb1
|
|||
; GCN: load_dwordx4
|
||||
; GCN: v_cmp_nlt_f32
|
||||
; GCN: s_and_saveexec_b64
|
||||
; GCN: ; mask branch [[UNIFIED_RET:BB[0-9]+_[0-9]+]]
|
||||
; GCN-NEXT: [[UNIFIED_RET]]:
|
||||
; GCN-NEXT: s_endpgm
|
||||
; GCN: .Lfunc_end
|
||||
define amdgpu_kernel void @annotate_ret_noloop(<4 x float> addrspace(1)* noalias nocapture readonly %arg) #0 {
|
||||
|
|
|
@ -3,13 +3,12 @@
|
|||
; GCN-LABEL: {{^}}lower_control_flow_unreachable_terminator:
|
||||
; GCN: v_cmp_eq_u32
|
||||
; GCN: s_and_saveexec_b64
|
||||
; GCN: ; mask branch [[RET:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN-NEXT: BB{{[0-9]+_[0-9]+}}: ; %unreachable
|
||||
; GCN-NEXT: ; %bb.{{[0-9]+}}: ; %unreachable
|
||||
; GCN: ds_write_b32
|
||||
; GCN: ; divergent unreachable
|
||||
|
||||
; GCN-NEXT: [[RET]]: ; %UnifiedReturnBlock
|
||||
; GCN-NEXT: ; %bb.{{[0-9]+}}: ; %UnifiedReturnBlock
|
||||
; GCN: s_endpgm
|
||||
|
||||
define amdgpu_kernel void @lower_control_flow_unreachable_terminator() #0 {
|
||||
|
@ -29,13 +28,12 @@ ret:
|
|||
; GCN-LABEL: {{^}}lower_control_flow_unreachable_terminator_swap_block_order:
|
||||
; GCN: v_cmp_ne_u32
|
||||
; GCN: s_and_saveexec_b64
|
||||
; GCN: ; mask branch [[RETURN:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN-NEXT: {{^BB[0-9]+_[0-9]+}}: ; %unreachable
|
||||
; GCN-NEXT: ; %bb.{{[0-9]+}}: ; %unreachable
|
||||
; GCN: ds_write_b32
|
||||
; GCN: ; divergent unreachable
|
||||
|
||||
; GCN: [[RETURN]]:
|
||||
; GCN: ; %bb.{{[0-9]+}}:
|
||||
; GCN-NEXT: s_endpgm
|
||||
define amdgpu_kernel void @lower_control_flow_unreachable_terminator_swap_block_order() #0 {
|
||||
bb:
|
||||
|
|
|
@ -32,7 +32,7 @@ body: |
|
|||
; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], undef %1:sreg_64, implicit-def dead $scc
|
||||
; GCN: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[S_AND_B64_]], [[COPY]], implicit-def dead $scc
|
||||
; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]]
|
||||
; GCN: SI_MASK_BRANCH %bb.2, implicit $exec
|
||||
; GCN: S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
; GCN: S_BRANCH %bb.1
|
||||
; GCN: bb.1:
|
||||
; GCN: successors: %bb.2(0x80000000)
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=si-insert-skips -amdgpu-skip-threshold=1000000 -o - %s | FileCheck %s
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=si-insert-skips -amdgpu-skip-threshold-legacy=1000000 -o - %s | FileCheck %s
|
||||
|
||||
---
|
||||
name: skip_branch_taildup_endpgm
|
||||
|
|
|
@ -5,9 +5,8 @@
|
|||
; An s_cbranch_execnz is required to avoid trapping if all lanes are 0
|
||||
; GCN-LABEL: {{^}}trap_divergent_branch:
|
||||
; GCN: s_and_saveexec_b64
|
||||
; GCN: s_cbranch_execz [[ENDPGM:BB[0-9]+_[0-9]+]]
|
||||
; GCN: s_branch [[TRAP:BB[0-9]+_[0-9]+]]
|
||||
; GCN: [[ENDPGM]]:
|
||||
; GCN: s_cbranch_execnz [[TRAP:BB[0-9]+_[0-9]+]]
|
||||
; GCN: ; %bb.{{[0-9]+}}:
|
||||
; GCN-NEXT: s_endpgm
|
||||
; GCN: [[TRAP]]:
|
||||
; GCN: s_trap 2
|
||||
|
@ -30,7 +29,7 @@ end:
|
|||
; GCN-LABEL: {{^}}debugtrap_divergent_branch:
|
||||
; GCN: s_and_saveexec_b64
|
||||
; GCN: s_cbranch_execz [[ENDPGM:BB[0-9]+_[0-9]+]]
|
||||
; GCN: BB{{[0-9]+}}_{{[0-9]+}}:
|
||||
; GCN: ; %bb.{{[0-9]+}}:
|
||||
; GCN: s_trap 3
|
||||
; GCN-NEXT: [[ENDPGM]]:
|
||||
; GCN-NEXT: s_endpgm
|
||||
|
|
|
@ -220,10 +220,9 @@ exit:
|
|||
; CHECK: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; CHECK-NEXT: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], vcc
|
||||
; CHECK-NEXT: s_xor_b64 [[SAVEEXEC]], exec, [[SAVEEXEC]]
|
||||
; CHECK-NEXT: ; mask branch [[EXIT:BB[0-9]+_[0-9]+]]
|
||||
; CHECK-NEXT: s_cbranch_execz [[EXIT]]
|
||||
; CHECK-NEXT: s_cbranch_execz [[EXIT:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; CHECK: {{BB[0-9]+_[0-9]+}}: ; %bb.preheader
|
||||
; CHECK: ; %bb.{{[0-9]+}}: ; %bb.preheader
|
||||
; CHECK: s_mov_b32
|
||||
|
||||
; CHECK: [[LOOP_BB:BB[0-9]+_[0-9]+]]:
|
||||
|
@ -357,20 +356,18 @@ bb7: ; preds = %bb4
|
|||
; CHECK: ; %bb.0:
|
||||
; CHECK: s_and_saveexec_b64
|
||||
; CHECK: s_xor_b64
|
||||
; CHECK-NEXT: mask branch [[BB4:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; CHECK: v_cmpx_gt_f32_e32 vcc, 0,
|
||||
; CHECK: [[BB4]]:
|
||||
; CHECK: BB{{[0-9]+_[0-9]+}}:
|
||||
; CHECK: s_or_b64 exec, exec
|
||||
; CHECK: image_sample_c
|
||||
|
||||
; CHECK: v_cmp_neq_f32_e32 vcc, 0,
|
||||
; CHECK: s_and_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, vcc
|
||||
; CHECK: mask branch [[END:BB[0-9]+_[0-9]+]]
|
||||
; CHECK-NEXT: s_cbranch_execz [[END]]
|
||||
; CHECK-NEXT: s_cbranch_execz [[END:BB[0-9]+_[0-9]+]]
|
||||
; CHECK-NOT: branch
|
||||
|
||||
; CHECK: BB{{[0-9]+_[0-9]+}}: ; %bb8
|
||||
; CHECK: ; %bb.{{[0-9]+}}: ; %bb8
|
||||
; CHECK: buffer_store_dword
|
||||
|
||||
; CHECK: [[END]]:
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GCN
|
||||
|
||||
; GCN-LABEL: BB0_1
|
||||
; GCN-LABEL: ; %bb.0:
|
||||
; GCN: s_load_dword s{{[0-9]+}}, s{{\[}}[[ADDR_LO:[0-9]+]]{{\:}}[[ADDR_HI:[0-9]+]]{{\]}}, 0x0
|
||||
; GCN: s_waitcnt lgkmcnt(0)
|
||||
; GCN: global_store_dword v{{\[}}[[ADDR_LO]]{{\:}}[[ADDR_HI]]{{\]}}, v{{[0-9]+}}, off
|
||||
|
|
|
@ -28,9 +28,8 @@ define amdgpu_kernel void @kernel_background_evaluate(float addrspace(5)* %kg, <
|
|||
; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; GCN-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
|
||||
; GCN-NEXT: s_and_saveexec_b32 s0, vcc_lo
|
||||
; GCN-NEXT: ; mask branch BB0_2
|
||||
; GCN-NEXT: s_cbranch_execz BB0_2
|
||||
; GCN-NEXT: BB0_1: ; %if.then4.i
|
||||
; GCN-NEXT: ; %bb.1: ; %if.then4.i
|
||||
; GCN-NEXT: buffer_load_dword v0, v32, s[36:39], s32 offen
|
||||
; GCN-NEXT: buffer_load_dword v1, v32, s[36:39], s32 offen offset:4
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
|
|
|
@ -18,14 +18,13 @@ define amdgpu_kernel void @foobar(float %a0, float %a1, float addrspace(1)* %out
|
|||
; CHECK-NEXT: v_mov_b32_e32 v2, s6
|
||||
; CHECK-NEXT: v_mov_b32_e32 v3, s7
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[6:7], vcc
|
||||
; CHECK-NEXT: ; mask branch BB0_2
|
||||
; CHECK-NEXT: BB0_1: ; %ift
|
||||
; CHECK-NEXT: ; %bb.1: ; %ift
|
||||
; CHECK-NEXT: s_mov_b32 s4, s5
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, s4
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, s5
|
||||
; CHECK-NEXT: v_mov_b32_e32 v2, s6
|
||||
; CHECK-NEXT: v_mov_b32_e32 v3, s7
|
||||
; CHECK-NEXT: BB0_2: ; %ife
|
||||
; CHECK-NEXT: ; %bb.2: ; %ife
|
||||
; CHECK-NEXT: s_or_b64 exec, exec, s[6:7]
|
||||
; CHECK-NEXT: s_mov_b32 s3, 0xf000
|
||||
; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], 0
|
||||
|
|
|
@ -335,7 +335,7 @@ endif:
|
|||
; GCN: [[IF_LABEL]]:
|
||||
; GCN: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}}
|
||||
; GCN: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc
|
||||
; GCN: ; mask branch [[ENDIF_LABEL]]
|
||||
; GCN: s_cbranch_execz [[ENDIF_LABEL]]
|
||||
; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
|
||||
; GCN: buffer_store_dword [[ONE]]
|
||||
; GCN: s_endpgm
|
||||
|
|
|
@ -5,7 +5,6 @@
|
|||
; CHECK-LABEL: {{^}}test1:
|
||||
; CHECK: v_cmp_ne_u32_e32 vcc, 0
|
||||
; CHECK: s_and_saveexec_b64
|
||||
; CHECK-NEXT: ; mask branch
|
||||
; CHECK-NEXT: s_cbranch_execz BB{{[0-9]+_[0-9]+}}
|
||||
|
||||
; CHECK: [[LOOP_BODY_LABEL:BB[0-9]+_[0-9]+]]: ; %loop_body
|
||||
|
@ -33,7 +32,6 @@ out:
|
|||
|
||||
; CHECK-LABEL: {{^}}test2:
|
||||
; CHECK: s_and_saveexec_b64
|
||||
; CHECK-NEXT: ; mask branch
|
||||
; CHECK-NEXT: s_cbranch_execz
|
||||
define amdgpu_kernel void @test2(i32 addrspace(1)* %out, i32 %a, i32 %b) {
|
||||
main_body:
|
||||
|
|
|
@ -13,19 +13,16 @@ declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
|||
; SI-NEXT: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, 0
|
||||
; SI-NEXT: s_and_saveexec_b64 [[SAVE1:s\[[0-9]+:[0-9]+\]]], vcc
|
||||
; SI-NEXT: s_xor_b64 [[SAVE2:s\[[0-9]+:[0-9]+\]]], exec, [[SAVE1]]
|
||||
; SI-NEXT: ; mask branch [[FLOW_BB:BB[0-9]+_[0-9]+]]
|
||||
; SI-NEXT: s_cbranch_execz [[FLOW_BB]]
|
||||
|
||||
; SI-NEXT: BB{{[0-9]+}}_1: ; %LeafBlock3
|
||||
; SI-NEXT: ; %bb.{{[0-9]+}}: ; %LeafBlock3
|
||||
; SI: s_mov_b64 s[{{[0-9]:[0-9]}}], -1
|
||||
; SI: s_and_saveexec_b64
|
||||
; SI-NEXT: ; mask branch
|
||||
; SI-NEXT: s_cbranch_execnz
|
||||
|
||||
; v_mov should be after exec modification
|
||||
; SI: [[FLOW_BB]]:
|
||||
; SI: ; %bb.{{[0-9]+}}:
|
||||
; SI-NEXT: s_or_saveexec_b64 [[SAVE3:s\[[0-9]+:[0-9]+\]]], [[SAVE2]]
|
||||
; SI-NEXT: s_xor_b64 exec, exec, [[SAVE3]]
|
||||
; SI-NEXT: ; mask branch
|
||||
;
|
||||
define amdgpu_kernel void @test_if(i32 %b, i32 addrspace(1)* %src, i32 addrspace(1)* %dst) #1 {
|
||||
entry:
|
||||
|
@ -65,10 +62,9 @@ end:
|
|||
; SI-LABEL: {{^}}simple_test_v_if:
|
||||
; SI: v_cmp_ne_u32_e32 vcc, 0, v{{[0-9]+}}
|
||||
; SI: s_and_saveexec_b64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], vcc
|
||||
; SI-NEXT: ; mask branch [[EXIT:BB[0-9]+_[0-9]+]]
|
||||
; SI-NEXT: s_cbranch_execz [[EXIT]]
|
||||
; SI-NEXT: s_cbranch_execz [[EXIT:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; SI-NEXT: BB{{[0-9]+_[0-9]+}}:
|
||||
; SI-NEXT: ; %bb.{{[0-9]+}}:
|
||||
; SI: buffer_store_dword
|
||||
|
||||
; SI-NEXT: {{^}}[[EXIT]]:
|
||||
|
@ -92,10 +88,9 @@ exit:
|
|||
; SI-LABEL: {{^}}simple_test_v_if_ret_else_ret:
|
||||
; SI: v_cmp_ne_u32_e32 vcc, 0, v{{[0-9]+}}
|
||||
; SI: s_and_saveexec_b64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], vcc
|
||||
; SI-NEXT: ; mask branch [[EXIT:BB[0-9]+_[0-9]+]]
|
||||
; SI-NEXT: s_cbranch_execz [[EXIT]]
|
||||
; SI-NEXT: s_cbranch_execz [[EXIT:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; SI-NEXT: BB{{[0-9]+_[0-9]+}}:
|
||||
; SI-NEXT: ; %bb.{{[0-9]+}}:
|
||||
; SI: buffer_store_dword
|
||||
|
||||
; SI-NEXT: {{^}}[[EXIT]]:
|
||||
|
@ -122,23 +117,22 @@ exit:
|
|||
; SI: v_cmp_eq_u32_e32 vcc, 0, v{{[0-9]+}}
|
||||
; SI: s_and_saveexec_b64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], vcc
|
||||
; SI: s_xor_b64 [[BR_SREG]], exec, [[BR_SREG]]
|
||||
; SI: ; mask branch [[FLOW:BB[0-9]+_[0-9]+]]
|
||||
; SI: s_cbranch_execnz [[EXIT:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; SI-NEXT: {{^BB[0-9]+_[0-9]+}}: ; %exit
|
||||
; SI: ds_write_b32
|
||||
|
||||
; SI-NEXT: {{^}}[[FLOW]]:
|
||||
; SI-NEXT: {{^BB[0-9]+_[0-9]+}}: ; %Flow
|
||||
; SI-NEXT: s_or_saveexec_b64
|
||||
; SI-NEXT: s_xor_b64 exec, exec
|
||||
; SI-NEXT: ; mask branch [[UNIFIED_RETURN:BB[0-9]+_[0-9]+]]
|
||||
; SI-NEXT: s_cbranch_execz [[UNIFIED_RETURN]]
|
||||
; SI-NEXT: s_cbranch_execz [[UNIFIED_RETURN:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; SI-NEXT: {{^BB[0-9]+_[0-9]+}}: ; %then
|
||||
; SI-NEXT: ; %bb.{{[0-9]+}}: ; %then
|
||||
; SI: s_waitcnt
|
||||
; SI-NEXT: buffer_store_dword
|
||||
|
||||
; SI-NEXT: {{^}}[[UNIFIED_RETURN]]: ; %UnifiedReturnBlock
|
||||
; SI: s_endpgm
|
||||
|
||||
; SI-NEXT: {{^}}[[EXIT]]:
|
||||
; SI: ds_write_b32
|
||||
define amdgpu_kernel void @simple_test_v_if_ret_else_code_ret(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%is.0 = icmp ne i32 %tid, 0
|
||||
|
@ -157,7 +151,6 @@ exit:
|
|||
; SI-LABEL: {{^}}simple_test_v_loop:
|
||||
; SI: v_cmp_ne_u32_e32 vcc, 0, v{{[0-9]+}}
|
||||
; SI: s_and_saveexec_b64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], vcc
|
||||
; SI-NEXT: ; mask branch
|
||||
; SI-NEXT: s_cbranch_execz [[LABEL_EXIT:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; SI: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, 0{{$}}
|
||||
|
@ -199,11 +192,10 @@ exit:
|
|||
; SI: buffer_load_dword [[VBOUND:v[0-9]+]]
|
||||
; SI: v_cmp_lt_i32_e32 vcc
|
||||
; SI: s_and_saveexec_b64 [[OUTER_CMP_SREG:s\[[0-9]+:[0-9]+\]]], vcc
|
||||
; SI-NEXT: ; mask branch
|
||||
; SI-NEXT: s_cbranch_execz [[LABEL_EXIT:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; Initialize inner condition to false
|
||||
; SI: BB{{[0-9]+_[0-9]+}}: ; %bb10.preheader
|
||||
; SI: ; %bb.{{[0-9]+}}: ; %bb10.preheader
|
||||
; SI: s_mov_b64 [[COND_STATE:s\[[0-9]+:[0-9]+\]]], 0{{$}}
|
||||
|
||||
; Clear exec bits for workitems that load -1s
|
||||
|
@ -214,9 +206,9 @@ exit:
|
|||
; SI-DAG: v_cmp_ne_u32_e32 [[NEG1_CHECK_1:vcc]], -1, [[B]]
|
||||
; SI: s_and_b64 [[ORNEG1:s\[[0-9]+:[0-9]+\]]], [[NEG1_CHECK_1]], [[NEG1_CHECK_0]]
|
||||
; SI: s_and_saveexec_b64 [[ORNEG2:s\[[0-9]+:[0-9]+\]]], [[ORNEG1]]
|
||||
; SI: ; mask branch [[LABEL_FLOW:BB[0-9]+_[0-9]+]]
|
||||
; SI: s_cbranch_execz [[LABEL_FLOW:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; SI: BB{{[0-9]+_[0-9]+}}: ; %bb20
|
||||
; SI: ; %bb.{{[0-9]+}}: ; %bb20
|
||||
; SI: buffer_store_dword
|
||||
|
||||
; SI: [[LABEL_FLOW]]:
|
||||
|
|
|
@ -151,7 +151,7 @@ define amdgpu_kernel void @test_vop3_cmp_u32_sop_or(i32 addrspace(1)* %arg) {
|
|||
; GCN-LABEL: {{^}}test_mask_if:
|
||||
; GFX1032: s_and_saveexec_b32 s{{[0-9]+}}, vcc_lo
|
||||
; GFX1064: s_and_saveexec_b64 s[{{[0-9:]+}}], vcc{{$}}
|
||||
; GCN: ; mask branch
|
||||
; GCN: s_cbranch_execz
|
||||
define amdgpu_kernel void @test_mask_if(i32 addrspace(1)* %arg) #0 {
|
||||
%lid = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%cmp = icmp ugt i32 %lid, 10
|
||||
|
@ -175,19 +175,18 @@ endif:
|
|||
; GFX1032: s_and_saveexec_b32 s{{[0-9]+}}, vcc_lo
|
||||
; GFX1064: s_and_saveexec_b64 s[{{[0-9:]+}}], vcc{{$}}
|
||||
; GCN: s_cbranch_execz
|
||||
; GCN: BB{{.*}}:
|
||||
; GCN: ; %bb.{{[0-9]+}}:
|
||||
; GCN: BB{{.*}}:
|
||||
; GFX1032: s_xor_b32 s{{[0-9]+}}, exec_lo, s{{[0-9]+}}
|
||||
; GFX1064: s_xor_b64 s[{{[0-9:]+}}], exec, s[{{[0-9:]+}}]
|
||||
; GCN: ; mask branch BB
|
||||
; GCN: BB{{.*}}:
|
||||
; GCN: BB{{.*}}:
|
||||
; GCN: ; %bb.{{[0-9]+}}:
|
||||
; GCN: ; %bb.{{[0-9]+}}:
|
||||
; GFX1032: s_or_b32 exec_lo, exec_lo, s{{[0-9]+}}
|
||||
; GFX1032: s_and_saveexec_b32 s{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GFX1064: s_or_b64 exec, exec, s[{{[0-9:]+}}]
|
||||
; GFX1064: s_and_saveexec_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}}
|
||||
; GCN: ; mask branch BB
|
||||
; GCN: BB{{.*}}:
|
||||
; GCN: s_cbranch_execz BB
|
||||
; GCN: ; %bb.{{[0-9]+}}:
|
||||
; GCN: BB{{.*}}:
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @test_loop_with_if(i32 addrspace(1)* %arg) #0 {
|
||||
|
@ -228,9 +227,8 @@ bb13:
|
|||
; GCN-LABEL: {{^}}test_loop_with_if_else_break:
|
||||
; GFX1032: s_and_saveexec_b32 s{{[0-9]+}}, vcc_lo
|
||||
; GFX1064: s_and_saveexec_b64 s[{{[0-9:]+}}], vcc{{$}}
|
||||
; GCN: ; mask branch
|
||||
; GCN: s_cbranch_execz
|
||||
; GCN: BB{{.*}}:
|
||||
; GCN: ; %bb.{{[0-9]+}}: ; %.preheader
|
||||
; GCN: BB{{.*}}:
|
||||
|
||||
; GFX1032: s_or_b32 [[MASK0:s[0-9]+]], [[MASK0]], vcc_lo
|
||||
|
|
|
@ -425,9 +425,8 @@ END:
|
|||
;CHECK-NEXT: s_and_b64 exec, exec, [[ORIG]]
|
||||
;CHECK-NEXT: s_and_b64 [[SAVED]], exec, [[SAVED]]
|
||||
;CHECK-NEXT: s_xor_b64 exec, exec, [[SAVED]]
|
||||
;CHECK-NEXT: mask branch [[END_BB:BB[0-9]+_[0-9]+]]
|
||||
;CHECK-NEXT: s_cbranch_execz [[END_BB]]
|
||||
;CHECK-NEXT: BB{{[0-9]+_[0-9]+}}: ; %ELSE
|
||||
;CHECK-NEXT: s_cbranch_execz [[END_BB:BB[0-9]+_[0-9]+]]
|
||||
;CHECK-NEXT: ; %bb.{{[0-9]+}}: ; %ELSE
|
||||
;CHECK: store_dword
|
||||
;CHECK: [[END_BB]]: ; %END
|
||||
;CHECK: s_or_b64 exec, exec,
|
||||
|
|
Loading…
Reference in New Issue