llvm-project/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll

; RUN: llc -O0 -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOOPT %s
; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,OPT %s

; GCN-LABEL: {{^}}test_debug_value:
; NOOPT: .loc	1 1 42 prologue_end     ; /tmp/test_debug_value.cl:1:42
; NOOPT-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; NOOPT-NEXT: .Ltmp
; NOOPT-NEXT: ;DEBUG_VALUE: test_debug_value:globalptr_arg <- $sgpr4_sgpr5

; GCN: flat_store_dword
; GCN: s_endpgm
define amdgpu_kernel void @test_debug_value(i32 addrspace(1)* nocapture %globalptr_arg) #0 !dbg !4 {
entry:
  tail call void @llvm.dbg.value(metadata i32 addrspace(1)* %globalptr_arg, metadata !10, metadata !13), !dbg !14
  store i32 123, i32 addrspace(1)* %globalptr_arg, align 4
  ret void
}

; Check for infinite loop in some cases with dbg_value in
; SIOptimizeExecMaskingPreRA (somehow related to undef argument).

; GCN-LABEL: {{^}}only_undef_dbg_value:
; NOOPT: ;DEBUG_VALUE: test_debug_value:globalptr_arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef] undef
; NOOPT-NEXT: s_endpgm

; OPT: s_endpgm
define amdgpu_kernel void @only_undef_dbg_value() #1 {
bb:
  call void @llvm.dbg.value(metadata <4 x float> undef, metadata !10, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)) #2, !dbg !14
  ret void
}

declare void @llvm.dbg.value(metadata, metadata, metadata) #1

attributes #0 = { nounwind  }
attributes #1 = { nounwind readnone }

!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!11, !12}

!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 244715) (llvm/trunk 244718)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
!1 = !DIFile(filename: "/tmp/test_debug_value.cl", directory: "/Users/matt/src/llvm/build_debug")
!2 = !{}
!4 = distinct !DISubprogram(name: "test_debug_value", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !9)
!5 = !DISubroutineType(types: !6)
!6 = !{null, !7}
!7 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !8, size: 64, align: 32)
!8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!9 = !{!10}
!10 = !DILocalVariable(name: "globalptr_arg", arg: 1, scope: !4, file: !1, line: 1, type: !7)
!11 = !{i32 2, !"Dwarf Version", i32 4}
!12 = !{i32 2, !"Debug Info Version", i32 3}
!13 = !DIExpression()
!14 = !DILocation(line: 1, column: 42, scope: !4)
AMDGPU: Fix infinite loop with dbg_value Surprisingly SIOptimizeExecMaskingPreRA can infinite loop in some case with DBG_VALUE. Most tests using dbg_value are run at -O0, so don't run this pass. This seems to only happen when the value argument is undef. llvm-svn: 319808 2017-12-06 02:23:17 +08:00			`; RUN: llc -O0 -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs < %s \| FileCheck -check-prefixes=GCN,NOOPT %s`
			`; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs < %s \| FileCheck -check-prefixes=GCN,OPT %s`
AMDGPU: Fix assert on dbg_value instructions llvm-svn: 244728 2015-08-12 17:04:44 +08:00
AMDGPU: Fix infinite loop with dbg_value Surprisingly SIOptimizeExecMaskingPreRA can infinite loop in some case with DBG_VALUE. Most tests using dbg_value are run at -O0, so don't run this pass. This seems to only happen when the value argument is undef. llvm-svn: 319808 2017-12-06 02:23:17 +08:00			`; GCN-LABEL: {{^}}test_debug_value:`
AMDGPU: Add pass to lower kernel arguments to loads This replaces most argument uses with loads, but for now not all. The code in SelectionDAG for calling convention lowering is actively harmful for amdgpu_kernel. It attempts to split the argument types into register legal types, which results in low quality code for arbitary types. Since all kernel arguments are passed in memory, we just want the raw types. I've tried a couple of methods of mitigating this in SelectionDAG, but it's easier to just bypass this problem alltogether. It's possible to hack around the problem in the initial lowering, but the real problem is the DAG then expects to be able to use CopyToReg/CopyFromReg for uses of the arguments outside the block. Exposing the argument loads in the IR also has the advantage that the LoadStoreVectorizer can merge them. I'm not sure the best approach to dealing with the IR argument list is. The patch as-is just leaves the IR arguments in place, so all the existing code will still compute the same kernarg size and pointlessly lowers the arguments. Arguably the frontend should emit kernels with an empty argument list in the first place. Alternatively a dummy array could be inserted as a single argument just to reserve space. This does have some disadvantages. Local pointer kernel arguments can no longer have AssertZext placed on them as the equivalent !range metadata is not valid on pointer typed loads. This is mostly bad for SI which needs to know about the known bits in order to use the DS instruction offset, so in this case this is not done. More importantly, this skips noalias arguments since this pass does not yet convert this to the equivalent !alias.scope and !noalias metadata. Producing this metadata correctly seems to be tricky, although this logically is the same as inlining into a function which doesn't exist. Additionally, exposing these loads to the vectorizer may result in degraded aliasing information if a pointer load is merged with another argument load. I'm also not entirely sure this is preserving the current clover ABI, although I would greatly prefer if it would stop widening arguments and match the HSA ABI. As-is I think it is extending < 4-byte arguments to 4-bytes but doesn't align them to 4-bytes. llvm-svn: 335650 2018-06-27 03:10:00 +08:00			`; NOOPT: .loc 1 1 42 prologue_end ; /tmp/test_debug_value.cl:1:42`
			`; NOOPT-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0`
[DebugInfo] Stop changing labels for register-described parameter DBG_VALUEs Summary: This is a follow-up to D57510. This patch stops DebugHandlerBase from changing the starting label for the first non-overlapping, register-described parameter DBG_VALUEs to the beginning of the function. That code did not consider what defined the registers, which could result in the ranges for the debug values starting before their defining instructions. We currently do not emit debug values for constant values directly at the start of the function, so this code is still useful for such values, but my intention is to remove the code from DebugHandlerBase completely when we get there. One reason for removing it is that the code violates the history map's ranges, which I think can make it quite confusing when troubleshooting. In D57510, PrologEpilogInserter was amended so that parameter DBG_VALUEs now are kept at the start of the entry block, even after emission of prologue code. That was done to reduce the degradation of debug completeness from this patch. PR40638 is another example, where the lexical-scope trimming that LDV does, in combination with scheduling, results in instructions after the prologue being left without locations. There might be other cases where the DBG_VALUEs are pushed further down, for which the DebugHandlerBase code may be helpful, but as it now quite often result in incorrect locations, even after the prologue, it seems better to remove that code, and try to work our way up with accurate locations. In the long run we should maybe not aim to provide accurate locations inside the prologue. Some single location descriptions, at least those referring to stack values, generate inaccurate values inside the epilogue, so we maybe should not aim to achieve accuracy for location lists. However, it seems that we now emit line number programs that can result in GDB and LLDB stopping inside the prologue when doing line number stepping into functions. See PR40188 for more information. A summary of some of the changed test cases is available in PR40188#c2. Reviewers: aprantl, dblaikie, rnk, jmorse Reviewed By: aprantl Subscribers: jdoerfert, jholewinski, jvesely, javed.absar, llvm-commits Tags: #debug-info, #llvm Differential Revision: https://reviews.llvm.org/D57511 llvm-svn: 353928 2019-02-13 17:34:07 +08:00			`; NOOPT-NEXT: .Ltmp`
AMDGPU: Add pass to lower kernel arguments to loads This replaces most argument uses with loads, but for now not all. The code in SelectionDAG for calling convention lowering is actively harmful for amdgpu_kernel. It attempts to split the argument types into register legal types, which results in low quality code for arbitary types. Since all kernel arguments are passed in memory, we just want the raw types. I've tried a couple of methods of mitigating this in SelectionDAG, but it's easier to just bypass this problem alltogether. It's possible to hack around the problem in the initial lowering, but the real problem is the DAG then expects to be able to use CopyToReg/CopyFromReg for uses of the arguments outside the block. Exposing the argument loads in the IR also has the advantage that the LoadStoreVectorizer can merge them. I'm not sure the best approach to dealing with the IR argument list is. The patch as-is just leaves the IR arguments in place, so all the existing code will still compute the same kernarg size and pointlessly lowers the arguments. Arguably the frontend should emit kernels with an empty argument list in the first place. Alternatively a dummy array could be inserted as a single argument just to reserve space. This does have some disadvantages. Local pointer kernel arguments can no longer have AssertZext placed on them as the equivalent !range metadata is not valid on pointer typed loads. This is mostly bad for SI which needs to know about the known bits in order to use the DS instruction offset, so in this case this is not done. More importantly, this skips noalias arguments since this pass does not yet convert this to the equivalent !alias.scope and !noalias metadata. Producing this metadata correctly seems to be tricky, although this logically is the same as inlining into a function which doesn't exist. Additionally, exposing these loads to the vectorizer may result in degraded aliasing information if a pointer load is merged with another argument load. I'm also not entirely sure this is preserving the current clover ABI, although I would greatly prefer if it would stop widening arguments and match the HSA ABI. As-is I think it is extending < 4-byte arguments to 4-bytes but doesn't align them to 4-bytes. llvm-svn: 335650 2018-06-27 03:10:00 +08:00			`; NOOPT-NEXT: ;DEBUG_VALUE: test_debug_value:globalptr_arg <- $sgpr4_sgpr5`
AMDGPU: Disallow exec as SMEM instruction operand This is not in the list of valid inputs for the encoding. When spilling, copies from exec can be folded directly into the spill instruction which results in broken stores. This only fixes the operand constraints, more codegen work is required to avoid emitting the invalid spills. This sort of breaks the dbg.value test. Because the register class of the s_load_dwordx2 changes, there is a copy to SReg_64, and the copy is the operand of dbg_value. The copy is later dead, and removed from the dbg_value. llvm-svn: 288191 2016-11-30 03:39:53 +08:00
AMDGPU: Fix infinite loop with dbg_value Surprisingly SIOptimizeExecMaskingPreRA can infinite loop in some case with DBG_VALUE. Most tests using dbg_value are run at -O0, so don't run this pass. This seems to only happen when the value argument is undef. llvm-svn: 319808 2017-12-06 02:23:17 +08:00			`; GCN: flat_store_dword`
			`; GCN: s_endpgm`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @test_debug_value(i32 addrspace(1)* nocapture %globalptr_arg) #0 !dbg !4 {`
AMDGPU: Fix assert on dbg_value instructions llvm-svn: 244728 2015-08-12 17:04:44 +08:00			`entry:`
AMDGPU: Fix infinite loop with dbg_value Surprisingly SIOptimizeExecMaskingPreRA can infinite loop in some case with DBG_VALUE. Most tests using dbg_value are run at -O0, so don't run this pass. This seems to only happen when the value argument is undef. llvm-svn: 319808 2017-12-06 02:23:17 +08:00			`tail call void @llvm.dbg.value(metadata i32 addrspace(1)* %globalptr_arg, metadata !10, metadata !13), !dbg !14`
AMDGPU: Fix assert on dbg_value instructions llvm-svn: 244728 2015-08-12 17:04:44 +08:00			`store i32 123, i32 addrspace(1)* %globalptr_arg, align 4`
			`ret void`
			`}`

AMDGPU: Fix infinite loop with dbg_value Surprisingly SIOptimizeExecMaskingPreRA can infinite loop in some case with DBG_VALUE. Most tests using dbg_value are run at -O0, so don't run this pass. This seems to only happen when the value argument is undef. llvm-svn: 319808 2017-12-06 02:23:17 +08:00			`; Check for infinite loop in some cases with dbg_value in`
			`; SIOptimizeExecMaskingPreRA (somehow related to undef argument).`

			`; GCN-LABEL: {{^}}only_undef_dbg_value:`
			`; NOOPT: ;DEBUG_VALUE: test_debug_value:globalptr_arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef] undef`
			`; NOOPT-NEXT: s_endpgm`

			`; OPT: s_endpgm`
			`define amdgpu_kernel void @only_undef_dbg_value() #1 {`
			`bb:`
			`call void @llvm.dbg.value(metadata <4 x float> undef, metadata !10, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)) #2, !dbg !14`
			`ret void`
			`}`

			`declare void @llvm.dbg.value(metadata, metadata, metadata) #1`
AMDGPU: Fix assert on dbg_value instructions llvm-svn: 244728 2015-08-12 17:04:44 +08:00
AMDGPU: Remove superfluous string attributes from tests Also fix v_mac.ll not testing right thing for fneg llvm-svn: 275129 2016-07-12 07:35:48 +08:00			`attributes #0 = { nounwind }`
AMDGPU: Fix assert on dbg_value instructions llvm-svn: 244728 2015-08-12 17:04:44 +08:00			`attributes #1 = { nounwind readnone }`

			`!llvm.dbg.cu = !{!0}`
			`!llvm.module.flags = !{!11, !12}`

[PR27284] Reverse the ownership between DICompileUnit and DISubprogram. Currently each Function points to a DISubprogram and DISubprogram has a scope field. For member functions the scope is a DICompositeType. DIScopes point to the DICompileUnit to facilitate type uniquing. Distinct DISubprograms (with isDefinition: true) are not part of the type hierarchy and cannot be uniqued. This change removes the subprograms list from DICompileUnit and instead adds a pointer to the owning compile unit to distinct DISubprograms. This would make it easy for ThinLTO to strip unneeded DISubprograms and their transitively referenced debug info. Motivation ---------- Materializing DISubprograms is currently the most expensive operation when doing a ThinLTO build of clang. We want the DISubprogram to be stored in a separate Bitcode block (or the same block as the function body) so we can avoid having to expensively deserialize all DISubprograms together with the global metadata. If a function has been inlined into another subprogram we need to store a reference the block containing the inlined subprogram. Attached to https://llvm.org/bugs/show_bug.cgi?id=27284 is a python script that updates LLVM IR testcases to the new format. http://reviews.llvm.org/D19034 <rdar://problem/25256815> llvm-svn: 266446 2016-04-15 23:57:41 +08:00			`!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 244715) (llvm/trunk 244718)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)`
AMDGPU: Fix assert on dbg_value instructions llvm-svn: 244728 2015-08-12 17:04:44 +08:00			`!1 = !DIFile(filename: "/tmp/test_debug_value.cl", directory: "/Users/matt/src/llvm/build_debug")`
			`!2 = !{}`
[DebugInfo] Add DILabel metadata and intrinsic llvm.dbg.label. In order to set breakpoints on labels and list source code around labels, we need collect debug information for labels, i.e., label name, the function label belong, line number in the file, and the address label located. In order to keep these information in LLVM IR and to allow backend to generate debug information correctly. We create a new kind of metadata for labels, DILabel. The format of DILabel is !DILabel(scope: !1, name: "foo", file: !2, line: 3) We hope to keep debug information as much as possible even the code is optimized. So, we create a new kind of intrinsic for label metadata to avoid the metadata is eliminated with basic block. The intrinsic will keep existing if we keep it from optimized out. The format of the intrinsic is llvm.dbg.label(metadata !1) It has only one argument, that is the DILabel metadata. The intrinsic will follow the label immediately. Backend could get the label metadata through the intrinsic's parameter. We also create DIBuilder API for labels to be used by Frontend. Frontend could use createLabel() to allocate DILabel objects, and use insertLabel() to insert llvm.dbg.label intrinsic in LLVM IR. Differential Revision: https://reviews.llvm.org/D45024 Patch by Hsiangkai Wang. llvm-svn: 331841 2018-05-09 10:40:45 +08:00			`!4 = distinct !DISubprogram(name: "test_debug_value", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !9)`
AMDGPU: Fix assert on dbg_value instructions llvm-svn: 244728 2015-08-12 17:04:44 +08:00			`!5 = !DISubroutineType(types: !6)`
			`!6 = !{null, !7}`
			`!7 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !8, size: 64, align: 32)`
			`!8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)`
			`!9 = !{!10}`
			`!10 = !DILocalVariable(name: "globalptr_arg", arg: 1, scope: !4, file: !1, line: 1, type: !7)`
			`!11 = !{i32 2, !"Dwarf Version", i32 4}`
			`!12 = !{i32 2, !"Debug Info Version", i32 3}`
			`!13 = !DIExpression()`
DI: Require subprogram definitions to be distinct As a follow-up to r246098, require `DISubprogram` definitions (`isDefinition: true`) to be 'distinct'. Specifically, add an assembler check, a verifier check, and bitcode upgrading logic to combat testcase bitrot after the `DIBuilder` change. While working on the testcases, I realized that test/Linker/subprogram-linkonce-weak-odr.ll isn't relevant anymore. Its purpose was to check for a corner case in PR22792 where two subprogram definitions match exactly and share the same metadata node. The new verifier check, requiring that subprogram definitions are 'distinct', precludes that possibility. I updated almost all the IR with the following script: git grep -l -E -e '= !DISubprogram\(.* isDefinition: true' \| grep -v test/Bitcode \| xargs sed -i '' -e 's/= \(!DISubprogram(.*, isDefinition: true\)/= distinct \1/' Likely some variant of would work for out-of-tree testcases. llvm-svn: 246327 2015-08-29 04:26:49 +08:00			`!14 = !DILocation(line: 1, column: 42, scope: !4)`