2019-10-09 00:21:13 +08:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
|
|
; RUN: opt < %s -correlated-propagation -S | FileCheck %s
|
|
|
|
|
|
|
|
; Check that debug locations are preserved. For more info see:
|
|
|
|
; https://llvm.org/docs/SourceLevelDebugging.html#fixing-errors
|
|
|
|
; RUN: opt < %s -enable-debugify -correlated-propagation -S 2>&1 | \
|
|
|
|
; RUN: FileCheck %s -check-prefix=DEBUG
|
|
|
|
; DEBUG: CheckModuleDebugify: PASS
|
|
|
|
|
|
|
|
declare void @use64(i64)
|
|
|
|
|
|
|
|
define void @test1(i32 %n) {
|
|
|
|
; CHECK-LABEL: @test1(
|
|
|
|
; CHECK-NEXT: entry:
|
|
|
|
; CHECK-NEXT: br label [[FOR_COND:%.*]]
|
|
|
|
; CHECK: for.cond:
|
|
|
|
; CHECK-NEXT: [[A:%.*]] = phi i32 [ [[N:%.*]], [[ENTRY:%.*]] ], [ [[EXT:%.*]], [[FOR_BODY:%.*]] ]
|
2019-10-09 04:29:36 +08:00
|
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[A]], -1
|
2019-10-09 00:21:13 +08:00
|
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
|
|
|
|
; CHECK: for.body:
|
[CVP} Replace SExt with ZExt if the input is known-non-negative
Summary:
zero-extension is far more friendly for further analysis.
While this doesn't directly help with the shift-by-signext problem, this is not unrelated.
This has the following effect on test-suite (numbers collected after the finish of middle-end module pass manager):
| Statistic | old | new | delta | percent change |
| correlated-value-propagation.NumSExt | 0 | 6026 | 6026 | +100.00% |
| instcount.NumAddInst | 272860 | 271283 | -1577 | -0.58% |
| instcount.NumAllocaInst | 27227 | 27226 | -1 | 0.00% |
| instcount.NumAndInst | 63502 | 63320 | -182 | -0.29% |
| instcount.NumAShrInst | 13498 | 13407 | -91 | -0.67% |
| instcount.NumAtomicCmpXchgInst | 1159 | 1159 | 0 | 0.00% |
| instcount.NumAtomicRMWInst | 5036 | 5036 | 0 | 0.00% |
| instcount.NumBitCastInst | 672482 | 672353 | -129 | -0.02% |
| instcount.NumBrInst | 702768 | 702195 | -573 | -0.08% |
| instcount.NumCallInst | 518285 | 518205 | -80 | -0.02% |
| instcount.NumExtractElementInst | 18481 | 18482 | 1 | 0.01% |
| instcount.NumExtractValueInst | 18290 | 18288 | -2 | -0.01% |
| instcount.NumFAddInst | 139035 | 138963 | -72 | -0.05% |
| instcount.NumFCmpInst | 10358 | 10348 | -10 | -0.10% |
| instcount.NumFDivInst | 30310 | 30302 | -8 | -0.03% |
| instcount.NumFenceInst | 387 | 387 | 0 | 0.00% |
| instcount.NumFMulInst | 93873 | 93806 | -67 | -0.07% |
| instcount.NumFPExtInst | 7148 | 7144 | -4 | -0.06% |
| instcount.NumFPToSIInst | 2823 | 2838 | 15 | 0.53% |
| instcount.NumFPToUIInst | 1251 | 1251 | 0 | 0.00% |
| instcount.NumFPTruncInst | 2195 | 2191 | -4 | -0.18% |
| instcount.NumFSubInst | 92109 | 92103 | -6 | -0.01% |
| instcount.NumGetElementPtrInst | 1221423 | 1219157 | -2266 | -0.19% |
| instcount.NumICmpInst | 479140 | 478929 | -211 | -0.04% |
| instcount.NumIndirectBrInst | 2 | 2 | 0 | 0.00% |
| instcount.NumInsertElementInst | 66089 | 66094 | 5 | 0.01% |
| instcount.NumInsertValueInst | 2032 | 2030 | -2 | -0.10% |
| instcount.NumIntToPtrInst | 19641 | 19641 | 0 | 0.00% |
| instcount.NumInvokeInst | 21789 | 21788 | -1 | 0.00% |
| instcount.NumLandingPadInst | 12051 | 12051 | 0 | 0.00% |
| instcount.NumLoadInst | 880079 | 878673 | -1406 | -0.16% |
| instcount.NumLShrInst | 25919 | 25921 | 2 | 0.01% |
| instcount.NumMulInst | 42416 | 42417 | 1 | 0.00% |
| instcount.NumOrInst | 100826 | 100576 | -250 | -0.25% |
| instcount.NumPHIInst | 315118 | 314092 | -1026 | -0.33% |
| instcount.NumPtrToIntInst | 15933 | 15939 | 6 | 0.04% |
| instcount.NumResumeInst | 2156 | 2156 | 0 | 0.00% |
| instcount.NumRetInst | 84485 | 84484 | -1 | 0.00% |
| instcount.NumSDivInst | 8599 | 8597 | -2 | -0.02% |
| instcount.NumSelectInst | 45577 | 45913 | 336 | 0.74% |
| instcount.NumSExtInst | 84026 | 78278 | -5748 | -6.84% |
| instcount.NumShlInst | 39796 | 39726 | -70 | -0.18% |
| instcount.NumShuffleVectorInst | 100272 | 100292 | 20 | 0.02% |
| instcount.NumSIToFPInst | 29131 | 29113 | -18 | -0.06% |
| instcount.NumSRemInst | 1543 | 1543 | 0 | 0.00% |
| instcount.NumStoreInst | 805394 | 804351 | -1043 | -0.13% |
| instcount.NumSubInst | 61337 | 61414 | 77 | 0.13% |
| instcount.NumSwitchInst | 8527 | 8524 | -3 | -0.04% |
| instcount.NumTruncInst | 60523 | 60484 | -39 | -0.06% |
| instcount.NumUDivInst | 2381 | 2381 | 0 | 0.00% |
| instcount.NumUIToFPInst | 5549 | 5549 | 0 | 0.00% |
| instcount.NumUnreachableInst | 9855 | 9855 | 0 | 0.00% |
| instcount.NumURemInst | 1305 | 1305 | 0 | 0.00% |
| instcount.NumXorInst | 10230 | 10081 | -149 | -1.46% |
| instcount.NumZExtInst | 60353 | 66840 | 6487 | 10.75% |
| instcount.TotalBlocks | 829582 | 829004 | -578 | -0.07% |
| instcount.TotalFuncs | 83818 | 83817 | -1 | 0.00% |
| instcount.TotalInsts | 7316574 | 7308483 | -8091 | -0.11% |
TLDR: we produce -0.11% less instructions, -6.84% less `sext`, +10.75% more `zext`.
To be noted, clearly, not all new `zext`'s are produced by this fold.
(And now i guess it might have been interesting to measure this for D68103 :S)
Reviewers: nikic, spatel, reames, dberlin
Reviewed By: nikic
Subscribers: hiraditya, jfb, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D68654
llvm-svn: 374112
2019-10-09 04:29:48 +08:00
|
|
|
; CHECK-NEXT: [[EXT_WIDE1:%.*]] = zext i32 [[A]] to i64
|
|
|
|
; CHECK-NEXT: call void @use64(i64 [[EXT_WIDE1]])
|
|
|
|
; CHECK-NEXT: [[EXT]] = trunc i64 [[EXT_WIDE1]] to i32
|
2019-10-09 00:21:13 +08:00
|
|
|
; CHECK-NEXT: br label [[FOR_COND]]
|
|
|
|
; CHECK: for.end:
|
|
|
|
; CHECK-NEXT: ret void
|
|
|
|
;
|
|
|
|
entry:
|
|
|
|
br label %for.cond
|
|
|
|
|
|
|
|
for.cond: ; preds = %for.body, %entry
|
|
|
|
%a = phi i32 [ %n, %entry ], [ %ext, %for.body ]
|
2019-10-09 04:29:36 +08:00
|
|
|
%cmp = icmp sgt i32 %a, -1
|
2019-10-09 00:21:13 +08:00
|
|
|
br i1 %cmp, label %for.body, label %for.end
|
|
|
|
|
|
|
|
for.body: ; preds = %for.cond
|
|
|
|
%ext.wide = sext i32 %a to i64
|
|
|
|
call void @use64(i64 %ext.wide)
|
|
|
|
%ext = trunc i64 %ext.wide to i32
|
|
|
|
br label %for.cond
|
|
|
|
|
|
|
|
for.end: ; preds = %for.cond
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2019-10-09 04:29:36 +08:00
|
|
|
;; Negative test to show transform doesn't happen unless n >= 0.
|
2019-10-09 00:21:13 +08:00
|
|
|
define void @test2(i32 %n) {
|
|
|
|
; CHECK-LABEL: @test2(
|
|
|
|
; CHECK-NEXT: entry:
|
|
|
|
; CHECK-NEXT: br label [[FOR_COND:%.*]]
|
|
|
|
; CHECK: for.cond:
|
|
|
|
; CHECK-NEXT: [[A:%.*]] = phi i32 [ [[N:%.*]], [[ENTRY:%.*]] ], [ [[EXT:%.*]], [[FOR_BODY:%.*]] ]
|
|
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[A]], -2
|
|
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
|
|
|
|
; CHECK: for.body:
|
|
|
|
; CHECK-NEXT: [[EXT_WIDE:%.*]] = sext i32 [[A]] to i64
|
|
|
|
; CHECK-NEXT: call void @use64(i64 [[EXT_WIDE]])
|
|
|
|
; CHECK-NEXT: [[EXT]] = trunc i64 [[EXT_WIDE]] to i32
|
|
|
|
; CHECK-NEXT: br label [[FOR_COND]]
|
|
|
|
; CHECK: for.end:
|
|
|
|
; CHECK-NEXT: ret void
|
|
|
|
;
|
|
|
|
entry:
|
|
|
|
br label %for.cond
|
|
|
|
|
|
|
|
for.cond: ; preds = %for.body, %entry
|
|
|
|
%a = phi i32 [ %n, %entry ], [ %ext, %for.body ]
|
|
|
|
%cmp = icmp sgt i32 %a, -2
|
|
|
|
br i1 %cmp, label %for.body, label %for.end
|
|
|
|
|
|
|
|
for.body: ; preds = %for.cond
|
|
|
|
%ext.wide = sext i32 %a to i64
|
|
|
|
call void @use64(i64 %ext.wide)
|
|
|
|
%ext = trunc i64 %ext.wide to i32
|
|
|
|
br label %for.cond
|
|
|
|
|
|
|
|
for.end: ; preds = %for.cond
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
;; Non looping test case.
|
|
|
|
define void @test3(i32 %n) {
|
|
|
|
; CHECK-LABEL: @test3(
|
|
|
|
; CHECK-NEXT: entry:
|
2019-10-09 04:29:36 +08:00
|
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[N:%.*]], -1
|
2019-10-09 00:21:13 +08:00
|
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]]
|
|
|
|
; CHECK: bb:
|
[CVP} Replace SExt with ZExt if the input is known-non-negative
Summary:
zero-extension is far more friendly for further analysis.
While this doesn't directly help with the shift-by-signext problem, this is not unrelated.
This has the following effect on test-suite (numbers collected after the finish of middle-end module pass manager):
| Statistic | old | new | delta | percent change |
| correlated-value-propagation.NumSExt | 0 | 6026 | 6026 | +100.00% |
| instcount.NumAddInst | 272860 | 271283 | -1577 | -0.58% |
| instcount.NumAllocaInst | 27227 | 27226 | -1 | 0.00% |
| instcount.NumAndInst | 63502 | 63320 | -182 | -0.29% |
| instcount.NumAShrInst | 13498 | 13407 | -91 | -0.67% |
| instcount.NumAtomicCmpXchgInst | 1159 | 1159 | 0 | 0.00% |
| instcount.NumAtomicRMWInst | 5036 | 5036 | 0 | 0.00% |
| instcount.NumBitCastInst | 672482 | 672353 | -129 | -0.02% |
| instcount.NumBrInst | 702768 | 702195 | -573 | -0.08% |
| instcount.NumCallInst | 518285 | 518205 | -80 | -0.02% |
| instcount.NumExtractElementInst | 18481 | 18482 | 1 | 0.01% |
| instcount.NumExtractValueInst | 18290 | 18288 | -2 | -0.01% |
| instcount.NumFAddInst | 139035 | 138963 | -72 | -0.05% |
| instcount.NumFCmpInst | 10358 | 10348 | -10 | -0.10% |
| instcount.NumFDivInst | 30310 | 30302 | -8 | -0.03% |
| instcount.NumFenceInst | 387 | 387 | 0 | 0.00% |
| instcount.NumFMulInst | 93873 | 93806 | -67 | -0.07% |
| instcount.NumFPExtInst | 7148 | 7144 | -4 | -0.06% |
| instcount.NumFPToSIInst | 2823 | 2838 | 15 | 0.53% |
| instcount.NumFPToUIInst | 1251 | 1251 | 0 | 0.00% |
| instcount.NumFPTruncInst | 2195 | 2191 | -4 | -0.18% |
| instcount.NumFSubInst | 92109 | 92103 | -6 | -0.01% |
| instcount.NumGetElementPtrInst | 1221423 | 1219157 | -2266 | -0.19% |
| instcount.NumICmpInst | 479140 | 478929 | -211 | -0.04% |
| instcount.NumIndirectBrInst | 2 | 2 | 0 | 0.00% |
| instcount.NumInsertElementInst | 66089 | 66094 | 5 | 0.01% |
| instcount.NumInsertValueInst | 2032 | 2030 | -2 | -0.10% |
| instcount.NumIntToPtrInst | 19641 | 19641 | 0 | 0.00% |
| instcount.NumInvokeInst | 21789 | 21788 | -1 | 0.00% |
| instcount.NumLandingPadInst | 12051 | 12051 | 0 | 0.00% |
| instcount.NumLoadInst | 880079 | 878673 | -1406 | -0.16% |
| instcount.NumLShrInst | 25919 | 25921 | 2 | 0.01% |
| instcount.NumMulInst | 42416 | 42417 | 1 | 0.00% |
| instcount.NumOrInst | 100826 | 100576 | -250 | -0.25% |
| instcount.NumPHIInst | 315118 | 314092 | -1026 | -0.33% |
| instcount.NumPtrToIntInst | 15933 | 15939 | 6 | 0.04% |
| instcount.NumResumeInst | 2156 | 2156 | 0 | 0.00% |
| instcount.NumRetInst | 84485 | 84484 | -1 | 0.00% |
| instcount.NumSDivInst | 8599 | 8597 | -2 | -0.02% |
| instcount.NumSelectInst | 45577 | 45913 | 336 | 0.74% |
| instcount.NumSExtInst | 84026 | 78278 | -5748 | -6.84% |
| instcount.NumShlInst | 39796 | 39726 | -70 | -0.18% |
| instcount.NumShuffleVectorInst | 100272 | 100292 | 20 | 0.02% |
| instcount.NumSIToFPInst | 29131 | 29113 | -18 | -0.06% |
| instcount.NumSRemInst | 1543 | 1543 | 0 | 0.00% |
| instcount.NumStoreInst | 805394 | 804351 | -1043 | -0.13% |
| instcount.NumSubInst | 61337 | 61414 | 77 | 0.13% |
| instcount.NumSwitchInst | 8527 | 8524 | -3 | -0.04% |
| instcount.NumTruncInst | 60523 | 60484 | -39 | -0.06% |
| instcount.NumUDivInst | 2381 | 2381 | 0 | 0.00% |
| instcount.NumUIToFPInst | 5549 | 5549 | 0 | 0.00% |
| instcount.NumUnreachableInst | 9855 | 9855 | 0 | 0.00% |
| instcount.NumURemInst | 1305 | 1305 | 0 | 0.00% |
| instcount.NumXorInst | 10230 | 10081 | -149 | -1.46% |
| instcount.NumZExtInst | 60353 | 66840 | 6487 | 10.75% |
| instcount.TotalBlocks | 829582 | 829004 | -578 | -0.07% |
| instcount.TotalFuncs | 83818 | 83817 | -1 | 0.00% |
| instcount.TotalInsts | 7316574 | 7308483 | -8091 | -0.11% |
TLDR: we produce -0.11% less instructions, -6.84% less `sext`, +10.75% more `zext`.
To be noted, clearly, not all new `zext`'s are produced by this fold.
(And now i guess it might have been interesting to measure this for D68103 :S)
Reviewers: nikic, spatel, reames, dberlin
Reviewed By: nikic
Subscribers: hiraditya, jfb, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D68654
llvm-svn: 374112
2019-10-09 04:29:48 +08:00
|
|
|
; CHECK-NEXT: [[EXT_WIDE1:%.*]] = zext i32 [[N]] to i64
|
|
|
|
; CHECK-NEXT: call void @use64(i64 [[EXT_WIDE1]])
|
|
|
|
; CHECK-NEXT: [[EXT:%.*]] = trunc i64 [[EXT_WIDE1]] to i32
|
2019-10-09 00:21:13 +08:00
|
|
|
; CHECK-NEXT: br label [[EXIT]]
|
|
|
|
; CHECK: exit:
|
|
|
|
; CHECK-NEXT: ret void
|
|
|
|
;
|
|
|
|
entry:
|
2019-10-09 04:29:36 +08:00
|
|
|
%cmp = icmp sgt i32 %n, -1
|
|
|
|
br i1 %cmp, label %bb, label %exit
|
|
|
|
|
|
|
|
bb:
|
|
|
|
%ext.wide = sext i32 %n to i64
|
|
|
|
call void @use64(i64 %ext.wide)
|
|
|
|
%ext = trunc i64 %ext.wide to i32
|
|
|
|
br label %exit
|
|
|
|
|
|
|
|
exit:
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
;; Non looping negative test case.
|
|
|
|
define void @test4(i32 %n) {
|
|
|
|
; CHECK-LABEL: @test4(
|
|
|
|
; CHECK-NEXT: entry:
|
|
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[N:%.*]], -2
|
|
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]]
|
|
|
|
; CHECK: bb:
|
|
|
|
; CHECK-NEXT: [[EXT_WIDE:%.*]] = sext i32 [[N]] to i64
|
|
|
|
; CHECK-NEXT: call void @use64(i64 [[EXT_WIDE]])
|
|
|
|
; CHECK-NEXT: [[EXT:%.*]] = trunc i64 [[EXT_WIDE]] to i32
|
|
|
|
; CHECK-NEXT: br label [[EXIT]]
|
|
|
|
; CHECK: exit:
|
|
|
|
; CHECK-NEXT: ret void
|
|
|
|
;
|
|
|
|
entry:
|
|
|
|
%cmp = icmp sgt i32 %n, -2
|
2019-10-09 00:21:13 +08:00
|
|
|
br i1 %cmp, label %bb, label %exit
|
|
|
|
|
|
|
|
bb:
|
|
|
|
%ext.wide = sext i32 %n to i64
|
|
|
|
call void @use64(i64 %ext.wide)
|
|
|
|
%ext = trunc i64 %ext.wide to i32
|
|
|
|
br label %exit
|
|
|
|
|
|
|
|
exit:
|
|
|
|
ret void
|
|
|
|
}
|