forked from OSchip/llvm-project
[AMDGPU] Add insert nops pass based on subtarget features instead of cl::opt
Also, - Skip pass if machine module does not have debug info - Minor comment changes - Added test Differential Revision: http://reviews.llvm.org/D19079 llvm-svn: 266626
This commit is contained in:
parent
b061313c5e
commit
8c273ad719
|
@ -317,6 +317,17 @@ def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
|
|||
]
|
||||
>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Debugger related subtarget features.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def FeatureDebuggerInsertNops : SubtargetFeature<
|
||||
"amdgpu-debugger-insert-nops",
|
||||
"DebuggerInsertNops",
|
||||
"true",
|
||||
"Insert two nop instructions for each high level source statement"
|
||||
>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def AMDGPUInstrInfo : InstrInfo {
|
||||
|
|
|
@ -97,7 +97,9 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
|||
HasSMemRealTime(false), Has16BitInsts(false),
|
||||
LDSBankCount(0),
|
||||
IsaVersion(ISAVersion0_0_0),
|
||||
EnableSIScheduler(false), FrameLowering(nullptr),
|
||||
EnableSIScheduler(false),
|
||||
DebuggerInsertNops(false),
|
||||
FrameLowering(nullptr),
|
||||
GISel(),
|
||||
InstrItins(getInstrItineraryForCPU(GPU)), TargetTriple(TT) {
|
||||
|
||||
|
|
|
@ -95,6 +95,7 @@ private:
|
|||
int LDSBankCount;
|
||||
unsigned IsaVersion;
|
||||
bool EnableSIScheduler;
|
||||
bool DebuggerInsertNops;
|
||||
|
||||
std::unique_ptr<AMDGPUFrameLowering> FrameLowering;
|
||||
std::unique_ptr<AMDGPUTargetLowering> TLInfo;
|
||||
|
@ -304,6 +305,10 @@ public:
|
|||
return EnableSIScheduler;
|
||||
}
|
||||
|
||||
bool debuggerInsertNops() const {
|
||||
return DebuggerInsertNops;
|
||||
}
|
||||
|
||||
bool dumpCode() const {
|
||||
return DumpCode;
|
||||
}
|
||||
|
|
|
@ -31,7 +31,6 @@
|
|||
#include "llvm/IR/Verifier.h"
|
||||
#include "llvm/MC/MCAsmInfo.h"
|
||||
#include "llvm/IR/LegacyPassManager.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/TargetRegistry.h"
|
||||
#include "llvm/Support/raw_os_ostream.h"
|
||||
#include "llvm/Transforms/IPO.h"
|
||||
|
@ -149,11 +148,6 @@ GCNTargetMachine::GCNTargetMachine(const Target &T, const Triple &TT,
|
|||
|
||||
namespace {
|
||||
|
||||
cl::opt<bool> InsertNops(
|
||||
"amdgpu-insert-nops",
|
||||
cl::desc("Insert two nop instructions for each high level source statement"),
|
||||
cl::init(false));
|
||||
|
||||
class AMDGPUPassConfig : public TargetPassConfig {
|
||||
public:
|
||||
AMDGPUPassConfig(TargetMachine *TM, PassManagerBase &PM)
|
||||
|
@ -397,7 +391,9 @@ void GCNPassConfig::addPreSched2() {
|
|||
void GCNPassConfig::addPreEmitPass() {
|
||||
addPass(createSIInsertWaitsPass(), false);
|
||||
addPass(createSILowerControlFlowPass(), false);
|
||||
if (InsertNops) {
|
||||
|
||||
const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
|
||||
if (ST.debuggerInsertNops()) {
|
||||
addPass(createSIInsertNopsPass(), false);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8,14 +8,14 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
/// \brief Insert two S_NOP instructions for every high level source statement.
|
||||
/// \brief Insert two nop instructions for each high level source statement.
|
||||
///
|
||||
/// Tools, such as debugger, need to pause execution based on user input (i.e.
|
||||
/// breakpoint). In order to do this, two S_NOP instructions are inserted for
|
||||
/// each high level source statement: one before first isa instruction of high
|
||||
/// level source statement, and one after last isa instruction of high level
|
||||
/// source statement. Further, debugger may replace S_NOP instructions with
|
||||
/// S_TRAP instructions based on user input.
|
||||
/// breakpoint). In order to do this, two nop instructions are inserted for each
|
||||
/// high level source statement: one before first isa instruction of high level
|
||||
/// source statement, and one after last isa instruction of high level source
|
||||
/// statement. Further, debugger may replace nop instructions with trap
|
||||
/// instructions based on user input.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
@ -24,6 +24,7 @@
|
|||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineModuleInfo.h"
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "si-insert-nops"
|
||||
|
@ -53,10 +54,21 @@ FunctionPass *llvm::createSIInsertNopsPass() {
|
|||
}
|
||||
|
||||
bool SIInsertNops::runOnMachineFunction(MachineFunction &MF) {
|
||||
// Skip machine functions without debug info.
|
||||
if (!MF.getMMI().hasDebugInfo()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Target instruction info.
|
||||
const SIInstrInfo *TII =
|
||||
static_cast<const SIInstrInfo*>(MF.getSubtarget().getInstrInfo());
|
||||
|
||||
// Mapping from high level source statement line number to last corresponding
|
||||
// isa instruction.
|
||||
DenseMap<unsigned, MachineBasicBlock::iterator> LineToInst;
|
||||
// Insert nop instruction before first isa instruction of each high level
|
||||
// source statement and collect last isa instruction for each high level
|
||||
// source statement.
|
||||
for (auto MBB = MF.begin(); MBB != MF.end(); ++MBB) {
|
||||
for (auto MI = MBB->begin(); MI != MBB->end(); ++MI) {
|
||||
if (MI->isDebugValue() || !MI->getDebugLoc()) {
|
||||
|
@ -74,6 +86,8 @@ bool SIInsertNops::runOnMachineFunction(MachineFunction &MF) {
|
|||
}
|
||||
}
|
||||
}
|
||||
// Insert nop instruction after last isa instruction of each high level source
|
||||
// statement.
|
||||
for (auto LineToInstEntry = LineToInst.begin();
|
||||
LineToInstEntry != LineToInst.end(); ++LineToInstEntry) {
|
||||
auto MBB = LineToInstEntry->second->getParent();
|
||||
|
@ -85,6 +99,7 @@ bool SIInsertNops::runOnMachineFunction(MachineFunction &MF) {
|
|||
.addImm(0);
|
||||
}
|
||||
}
|
||||
// Insert nop instruction before prologue.
|
||||
MachineBasicBlock &MBB = MF.front();
|
||||
MachineInstr &MI = MBB.front();
|
||||
BuildMI(MBB, MI, DebugLoc(), TII->get(AMDGPU::S_NOP))
|
||||
|
|
|
@ -0,0 +1,75 @@
|
|||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+amdgpu-debugger-insert-nops -verify-machineinstrs < %s | FileCheck %s
|
||||
|
||||
; CHECK: debugger_insert_nops.cl:2:3
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK: debugger_insert_nops.cl:3:3
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK: debugger_insert_nops.cl:4:3
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK: debugger_insert_nops.cl:5:3
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK: debugger_insert_nops.cl:6:1
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_endpgm
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define void @debugger_insert_nops(i32 addrspace(1)* %A) #0 !dbg !12 {
|
||||
entry:
|
||||
%A.addr = alloca i32 addrspace(1)*, align 4
|
||||
store i32 addrspace(1)* %A, i32 addrspace(1)** %A.addr, align 4
|
||||
call void @llvm.dbg.declare(metadata i32 addrspace(1)** %A.addr, metadata !17, metadata !18), !dbg !19
|
||||
%0 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4, !dbg !20
|
||||
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i32 0, !dbg !20
|
||||
store i32 1, i32 addrspace(1)* %arrayidx, align 4, !dbg !21
|
||||
%1 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4, !dbg !22
|
||||
%arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %1, i32 1, !dbg !22
|
||||
store i32 2, i32 addrspace(1)* %arrayidx1, align 4, !dbg !23
|
||||
%2 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4, !dbg !24
|
||||
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %2, i32 2, !dbg !24
|
||||
store i32 3, i32 addrspace(1)* %arrayidx2, align 4, !dbg !25
|
||||
%3 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4, !dbg !26
|
||||
%arrayidx3 = getelementptr inbounds i32, i32 addrspace(1)* %3, i32 4, !dbg !26
|
||||
store i32 4, i32 addrspace(1)* %arrayidx3, align 4, !dbg !27
|
||||
ret void, !dbg !28
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
|
||||
|
||||
attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="fiji" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
||||
!llvm.dbg.cu = !{!0}
|
||||
!opencl.kernels = !{!3}
|
||||
!llvm.module.flags = !{!9, !10}
|
||||
!llvm.ident = !{!11}
|
||||
|
||||
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 (trunk 266620)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
|
||||
!1 = !DIFile(filename: "debugger_insert_nops.cl", directory: "/home/kzhuravl/Sandbox")
|
||||
!2 = !{}
|
||||
!3 = !{void (i32 addrspace(1)*)* @debugger_insert_nops, !4, !5, !6, !7, !8}
|
||||
!4 = !{!"kernel_arg_addr_space", i32 1}
|
||||
!5 = !{!"kernel_arg_access_qual", !"none"}
|
||||
!6 = !{!"kernel_arg_type", !"int*"}
|
||||
!7 = !{!"kernel_arg_base_type", !"int*"}
|
||||
!8 = !{!"kernel_arg_type_qual", !""}
|
||||
!9 = !{i32 2, !"Dwarf Version", i32 4}
|
||||
!10 = !{i32 2, !"Debug Info Version", i32 3}
|
||||
!11 = !{!"clang version 3.9.0 (trunk 266620)"}
|
||||
!12 = distinct !DISubprogram(name: "debugger_insert_nops", scope: !1, file: !1, line: 1, type: !13, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
|
||||
!13 = !DISubroutineType(types: !14)
|
||||
!14 = !{null, !15}
|
||||
!15 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !16, size: 64, align: 32)
|
||||
!16 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
|
||||
!17 = !DILocalVariable(name: "A", arg: 1, scope: !12, file: !1, line: 1, type: !15)
|
||||
!18 = !DIExpression()
|
||||
!19 = !DILocation(line: 1, column: 46, scope: !12)
|
||||
!20 = !DILocation(line: 2, column: 3, scope: !12)
|
||||
!21 = !DILocation(line: 2, column: 8, scope: !12)
|
||||
!22 = !DILocation(line: 3, column: 3, scope: !12)
|
||||
!23 = !DILocation(line: 3, column: 8, scope: !12)
|
||||
!24 = !DILocation(line: 4, column: 3, scope: !12)
|
||||
!25 = !DILocation(line: 4, column: 8, scope: !12)
|
||||
!26 = !DILocation(line: 5, column: 3, scope: !12)
|
||||
!27 = !DILocation(line: 5, column: 8, scope: !12)
|
||||
!28 = !DILocation(line: 6, column: 1, scope: !12)
|
Loading…
Reference in New Issue