[LoopRotate] Calls not lowered to calls should not block rotation.

83daa49758 made loop-rotate more conservative in the presence of
function calls in the prepare-for-lto stage. The code did not properly
account for calls that are no actual function calls, like calls to
intrinsics. This patch updates the code to ensure only calls that are
lowered to actual calls are considered inline candidates.
This commit is contained in:
Florian Hahn 2021-01-19 14:34:55 +00:00
parent c42f5ca3d8
commit 3747b69b53
No known key found for this signature in database
GPG Key ID: 61D7554B5CECDC0D
2 changed files with 61 additions and 2 deletions

View File

@ -125,12 +125,13 @@ void CodeMetrics::analyzeBasicBlock(
// Special handling for calls.
if (const auto *Call = dyn_cast<CallBase>(&I)) {
if (const Function *F = Call->getCalledFunction()) {
bool IsLoweredToCall = TTI.isLoweredToCall(F);
// If a function is both internal and has a single use, then it is
// extremely likely to get inlined in the future (it was probably
// exposed by an interleaved devirtualization pass).
// When preparing for LTO, liberally consider calls as inline
// candidates.
if (!Call->isNoInline() &&
if (!Call->isNoInline() && IsLoweredToCall &&
((F->hasInternalLinkage() && F->hasOneUse()) || PrepareForLTO)) {
++NumInlineCandidates;
}
@ -142,7 +143,7 @@ void CodeMetrics::analyzeBasicBlock(
if (F == BB->getParent())
isRecursive = true;
if (TTI.isLoweredToCall(F))
if (IsLoweredToCall)
++NumCalls;
} else {
// We don't want inline asm to count as a call - that would prevent loop

View File

@ -41,3 +41,61 @@ for.end: ; preds = %for.cond
define void @may_be_inlined() {
ret void
}
; Intrinsics, like @llvm.dbg.value are never inlined and should not block loop
; rotation, even when preparing for LTO.
define void @test_prepare_for_lto_intrinsic() !dbg !7 {
; FULL-LABEL: @test_prepare_for_lto_intrinsic(
; FULL-NEXT: entry:
; FULL-NEXT: %array = alloca [20 x i32], align 16
; FULL-NEXT: call void @llvm.dbg.value(metadata i32 0, metadata !12, metadata !DIExpression()), !dbg !13
; FULL-NEXT: %arrayidx = getelementptr inbounds [20 x i32], [20 x i32]* %array, i64 0, i64 0
; FULL-NEXT: br label %for.body
;
; PREPARE-LABEL: @test_prepare_for_lto_intrinsic(
; PREPARE-NEXT: entry:
; PREPARE-NEXT: %array = alloca [20 x i32], align 16
; PREPARE-NEXT: call void @llvm.dbg.value(metadata i32 0, metadata !12, metadata !DIExpression()), !dbg !13
; PREPARE-NEXT: %arrayidx = getelementptr inbounds [20 x i32], [20 x i32]* %array, i64 0, i64 0
; PREPARE-NEXT: br label %for.body
;
entry:
%array = alloca [20 x i32], align 16
br label %for.cond
for.cond: ; preds = %for.body, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
call void @llvm.dbg.value(metadata i32 %i.0, metadata !12, metadata !DIExpression()), !dbg !13
%cmp = icmp slt i32 %i.0, 100
%arrayidx = getelementptr inbounds [20 x i32], [20 x i32]* %array, i64 0, i64 0
br i1 %cmp, label %for.body, label %for.end
for.body: ; preds = %for.cond
store i32 0, i32* %arrayidx, align 16
%inc = add nsw i32 %i.0, 1
br label %for.cond
for.end: ; preds = %for.cond
ret void
}
declare void @llvm.dbg.value(metadata, metadata, metadata) #2
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5, !6}
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
!1 = !DIFile(filename: "test.c", directory: "/tmp")
!2 = !{}
!3 = !{i32 2, !"Dwarf Version", i32 4}
!4 = !{i32 2, !"Debug Info Version", i32 3}
!5 = !{i32 1, !"wchar_size", i32 4}
!6 = !{i32 7, !"PIC Level", i32 2}
!7 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 2, type: !8, scopeLine: 2, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !11)
!8 = !DISubroutineType(types: !9)
!9 = !{null, !10}
!10 = !DIBasicType(name: "long long int", size: 64, encoding: DW_ATE_signed)
!11 = !{!12}
!12 = !DILocalVariable(name: "input", arg: 1, scope: !7, file: !1, line: 2, type: !10)
!13 = !DILocation(line: 2, column: 15, scope: !7)