2019-09-07 21:35:54 +08:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
|
|
; RUN: opt -S -simplifycfg < %s | FileCheck %s --check-prefix=ALL --check-prefix=EXPENSIVE
|
|
|
|
; RUN: opt -S -simplifycfg -speculate-one-expensive-inst=false < %s | FileCheck %s --check-prefix=ALL --check-prefix=CHEAP
|
2019-04-17 12:52:47 +08:00
|
|
|
|
|
|
|
declare float @llvm.sqrt.f32(float) nounwind readonly
|
|
|
|
declare float @llvm.fma.f32(float, float, float) nounwind readonly
|
|
|
|
declare float @llvm.fmuladd.f32(float, float, float) nounwind readonly
|
|
|
|
declare float @llvm.fabs.f32(float) nounwind readonly
|
|
|
|
declare float @llvm.minnum.f32(float, float) nounwind readonly
|
|
|
|
declare float @llvm.maxnum.f32(float, float) nounwind readonly
|
|
|
|
declare float @llvm.minimum.f32(float, float) nounwind readonly
|
|
|
|
declare float @llvm.maximum.f32(float, float) nounwind readonly
|
|
|
|
|
|
|
|
define double @fdiv_test(double %a, double %b) {
|
[SimplifyCFG] FoldTwoEntryPHINode(): consider *total* speculation cost, not per-BB cost
Summary:
Previously, if the threshold was 2, we were willing to speculatively
execute 2 cheap instructions in both basic blocks (thus we were willing
to speculatively execute cost = 4), but weren't willing to speculate
when one BB had 3 instructions and other one had no instructions,
even thought that would have total cost of 3.
This looks inconsistent to me.
I don't think `cmov`-like instructions will start executing
until both of it's inputs are available: https://godbolt.org/z/zgHePf
So i don't see why the existing behavior is the correct one.
Also, let's add it's own `cl::opt` for this threshold,
with default=4, so it is not stricter than the previous threshold:
will allow to fold when there are 2 BB's each with cost=2.
And since the logic has changed, it will also allow to fold when
one BB has cost=3 and other cost=1, or there is only one BB with cost=4.
This is an alternative solution to D65148:
This fix is mainly motivated by `signbit-like-value-extension.ll` test.
That pattern comes up in JPEG decoding, see e.g.
`Figure F.12 – Extending the sign bit of a decoded value in V`
of `ITU T.81` (JPEG specification).
That branch is not predictable, and it is within the innermost loop,
so the fact that that pattern ends up being stuck with a branch
instead of `select` (i.e. `CMOV` for x86) is unlikely to be beneficial.
This has great results on the final assembly (vanilla test-suite + RawSpeed): (metric pass - D67240)
| metric | old | new | delta | % |
| x86-mi-counting.NumMachineFunctions | 37720 | 37721 | 1 | 0.00% |
| x86-mi-counting.NumMachineBasicBlocks | 773545 | 771181 | -2364 | -0.31% |
| x86-mi-counting.NumMachineInstructions | 7488843 | 7486442 | -2401 | -0.03% |
| x86-mi-counting.NumUncondBR | 135770 | 135543 | -227 | -0.17% |
| x86-mi-counting.NumCondBR | 423753 | 422187 | -1566 | -0.37% |
| x86-mi-counting.NumCMOV | 24815 | 25731 | 916 | 3.69% |
| x86-mi-counting.NumVecBlend | 17 | 17 | 0 | 0.00% |
We significantly decrease basic block count, notably decrease instruction count,
significantly decrease branch count and very significantly increase `cmov` count.
Performance-wise, unsurprisingly, this has great effect on
target RawSpeed benchmark. I'm seeing 5 **major** improvements:
```
Benchmark Time CPU Time Old Time New CPU Old CPU New
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Samsung/NX3000/_3184416.SRW/threads:8/process_time/real_time_pvalue 0.0000 0.0000 U Test, Repetitions: 49 vs 49
Samsung/NX3000/_3184416.SRW/threads:8/process_time/real_time_mean -0.3064 -0.3064 226.9913 157.4452 226.9800 157.4384
Samsung/NX3000/_3184416.SRW/threads:8/process_time/real_time_median -0.3057 -0.3057 226.8407 157.4926 226.8282 157.4828
Samsung/NX3000/_3184416.SRW/threads:8/process_time/real_time_stddev -0.4985 -0.4954 0.3051 0.1530 0.3040 0.1534
Kodak/DCS760C/86L57188.DCR/threads:8/process_time/real_time_pvalue 0.0000 0.0000 U Test, Repetitions: 49 vs 49
Kodak/DCS760C/86L57188.DCR/threads:8/process_time/real_time_mean -0.1747 -0.1747 80.4787 66.4227 80.4771 66.4146
Kodak/DCS760C/86L57188.DCR/threads:8/process_time/real_time_median -0.1742 -0.1743 80.4686 66.4542 80.4690 66.4436
Kodak/DCS760C/86L57188.DCR/threads:8/process_time/real_time_stddev +0.6089 +0.5797 0.0670 0.1078 0.0673 0.1062
Sony/DSLR-A230/DSC08026.ARW/threads:8/process_time/real_time_pvalue 0.0000 0.0000 U Test, Repetitions: 49 vs 49
Sony/DSLR-A230/DSC08026.ARW/threads:8/process_time/real_time_mean -0.1598 -0.1598 171.6996 144.2575 171.6915 144.2538
Sony/DSLR-A230/DSC08026.ARW/threads:8/process_time/real_time_median -0.1598 -0.1597 171.7109 144.2755 171.7018 144.2766
Sony/DSLR-A230/DSC08026.ARW/threads:8/process_time/real_time_stddev +0.4024 +0.3850 0.0847 0.1187 0.0848 0.1175
Canon/EOS 77D/IMG_4049.CR2/threads:8/process_time/real_time_pvalue 0.0000 0.0000 U Test, Repetitions: 49 vs 49
Canon/EOS 77D/IMG_4049.CR2/threads:8/process_time/real_time_mean -0.0550 -0.0551 280.3046 264.8800 280.3017 264.8559
Canon/EOS 77D/IMG_4049.CR2/threads:8/process_time/real_time_median -0.0554 -0.0554 280.2628 264.7360 280.2574 264.7297
Canon/EOS 77D/IMG_4049.CR2/threads:8/process_time/real_time_stddev +0.7005 +0.7041 0.2779 0.4725 0.2775 0.4729
Canon/EOS 5DS/2K4A9929.CR2/threads:8/process_time/real_time_pvalue 0.0000 0.0000 U Test, Repetitions: 49 vs 49
Canon/EOS 5DS/2K4A9929.CR2/threads:8/process_time/real_time_mean -0.0354 -0.0355 316.7396 305.5208 316.7342 305.4890
Canon/EOS 5DS/2K4A9929.CR2/threads:8/process_time/real_time_median -0.0354 -0.0356 316.6969 305.4798 316.6917 305.4324
Canon/EOS 5DS/2K4A9929.CR2/threads:8/process_time/real_time_stddev +0.0493 +0.0330 0.3562 0.3737 0.3563 0.3681
```
That being said, it's always best-effort, so there will likely
be cases where this worsens things.
Reviewers: efriedma, craig.topper, dmgreen, jmolloy, fhahn, Carrot, hfinkel, chandlerc
Reviewed By: jmolloy
Subscribers: xbolva00, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67318
llvm-svn: 372009
2019-09-17 00:18:24 +08:00
|
|
|
; ALL-LABEL: @fdiv_test(
|
|
|
|
; ALL-NEXT: entry:
|
|
|
|
; ALL-NEXT: [[CMP:%.*]] = fcmp ogt double [[A:%.*]], 0.000000e+00
|
|
|
|
; ALL-NEXT: [[DIV:%.*]] = fdiv double [[B:%.*]], [[A]]
|
2019-11-18 00:23:44 +08:00
|
|
|
; ALL-NEXT: [[COND:%.*]] = select nsz i1 [[CMP]], double [[DIV]], double 0.000000e+00
|
[SimplifyCFG] FoldTwoEntryPHINode(): consider *total* speculation cost, not per-BB cost
Summary:
Previously, if the threshold was 2, we were willing to speculatively
execute 2 cheap instructions in both basic blocks (thus we were willing
to speculatively execute cost = 4), but weren't willing to speculate
when one BB had 3 instructions and other one had no instructions,
even thought that would have total cost of 3.
This looks inconsistent to me.
I don't think `cmov`-like instructions will start executing
until both of it's inputs are available: https://godbolt.org/z/zgHePf
So i don't see why the existing behavior is the correct one.
Also, let's add it's own `cl::opt` for this threshold,
with default=4, so it is not stricter than the previous threshold:
will allow to fold when there are 2 BB's each with cost=2.
And since the logic has changed, it will also allow to fold when
one BB has cost=3 and other cost=1, or there is only one BB with cost=4.
This is an alternative solution to D65148:
This fix is mainly motivated by `signbit-like-value-extension.ll` test.
That pattern comes up in JPEG decoding, see e.g.
`Figure F.12 – Extending the sign bit of a decoded value in V`
of `ITU T.81` (JPEG specification).
That branch is not predictable, and it is within the innermost loop,
so the fact that that pattern ends up being stuck with a branch
instead of `select` (i.e. `CMOV` for x86) is unlikely to be beneficial.
This has great results on the final assembly (vanilla test-suite + RawSpeed): (metric pass - D67240)
| metric | old | new | delta | % |
| x86-mi-counting.NumMachineFunctions | 37720 | 37721 | 1 | 0.00% |
| x86-mi-counting.NumMachineBasicBlocks | 773545 | 771181 | -2364 | -0.31% |
| x86-mi-counting.NumMachineInstructions | 7488843 | 7486442 | -2401 | -0.03% |
| x86-mi-counting.NumUncondBR | 135770 | 135543 | -227 | -0.17% |
| x86-mi-counting.NumCondBR | 423753 | 422187 | -1566 | -0.37% |
| x86-mi-counting.NumCMOV | 24815 | 25731 | 916 | 3.69% |
| x86-mi-counting.NumVecBlend | 17 | 17 | 0 | 0.00% |
We significantly decrease basic block count, notably decrease instruction count,
significantly decrease branch count and very significantly increase `cmov` count.
Performance-wise, unsurprisingly, this has great effect on
target RawSpeed benchmark. I'm seeing 5 **major** improvements:
```
Benchmark Time CPU Time Old Time New CPU Old CPU New
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Samsung/NX3000/_3184416.SRW/threads:8/process_time/real_time_pvalue 0.0000 0.0000 U Test, Repetitions: 49 vs 49
Samsung/NX3000/_3184416.SRW/threads:8/process_time/real_time_mean -0.3064 -0.3064 226.9913 157.4452 226.9800 157.4384
Samsung/NX3000/_3184416.SRW/threads:8/process_time/real_time_median -0.3057 -0.3057 226.8407 157.4926 226.8282 157.4828
Samsung/NX3000/_3184416.SRW/threads:8/process_time/real_time_stddev -0.4985 -0.4954 0.3051 0.1530 0.3040 0.1534
Kodak/DCS760C/86L57188.DCR/threads:8/process_time/real_time_pvalue 0.0000 0.0000 U Test, Repetitions: 49 vs 49
Kodak/DCS760C/86L57188.DCR/threads:8/process_time/real_time_mean -0.1747 -0.1747 80.4787 66.4227 80.4771 66.4146
Kodak/DCS760C/86L57188.DCR/threads:8/process_time/real_time_median -0.1742 -0.1743 80.4686 66.4542 80.4690 66.4436
Kodak/DCS760C/86L57188.DCR/threads:8/process_time/real_time_stddev +0.6089 +0.5797 0.0670 0.1078 0.0673 0.1062
Sony/DSLR-A230/DSC08026.ARW/threads:8/process_time/real_time_pvalue 0.0000 0.0000 U Test, Repetitions: 49 vs 49
Sony/DSLR-A230/DSC08026.ARW/threads:8/process_time/real_time_mean -0.1598 -0.1598 171.6996 144.2575 171.6915 144.2538
Sony/DSLR-A230/DSC08026.ARW/threads:8/process_time/real_time_median -0.1598 -0.1597 171.7109 144.2755 171.7018 144.2766
Sony/DSLR-A230/DSC08026.ARW/threads:8/process_time/real_time_stddev +0.4024 +0.3850 0.0847 0.1187 0.0848 0.1175
Canon/EOS 77D/IMG_4049.CR2/threads:8/process_time/real_time_pvalue 0.0000 0.0000 U Test, Repetitions: 49 vs 49
Canon/EOS 77D/IMG_4049.CR2/threads:8/process_time/real_time_mean -0.0550 -0.0551 280.3046 264.8800 280.3017 264.8559
Canon/EOS 77D/IMG_4049.CR2/threads:8/process_time/real_time_median -0.0554 -0.0554 280.2628 264.7360 280.2574 264.7297
Canon/EOS 77D/IMG_4049.CR2/threads:8/process_time/real_time_stddev +0.7005 +0.7041 0.2779 0.4725 0.2775 0.4729
Canon/EOS 5DS/2K4A9929.CR2/threads:8/process_time/real_time_pvalue 0.0000 0.0000 U Test, Repetitions: 49 vs 49
Canon/EOS 5DS/2K4A9929.CR2/threads:8/process_time/real_time_mean -0.0354 -0.0355 316.7396 305.5208 316.7342 305.4890
Canon/EOS 5DS/2K4A9929.CR2/threads:8/process_time/real_time_median -0.0354 -0.0356 316.6969 305.4798 316.6917 305.4324
Canon/EOS 5DS/2K4A9929.CR2/threads:8/process_time/real_time_stddev +0.0493 +0.0330 0.3562 0.3737 0.3563 0.3681
```
That being said, it's always best-effort, so there will likely
be cases where this worsens things.
Reviewers: efriedma, craig.topper, dmgreen, jmolloy, fhahn, Carrot, hfinkel, chandlerc
Reviewed By: jmolloy
Subscribers: xbolva00, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67318
llvm-svn: 372009
2019-09-17 00:18:24 +08:00
|
|
|
; ALL-NEXT: ret double [[COND]]
|
2019-09-07 21:35:54 +08:00
|
|
|
;
|
2019-04-17 12:52:47 +08:00
|
|
|
entry:
|
|
|
|
%cmp = fcmp ogt double %a, 0.0
|
|
|
|
br i1 %cmp, label %cond.true, label %cond.end
|
|
|
|
|
|
|
|
cond.true:
|
|
|
|
%div = fdiv double %b, %a
|
|
|
|
br label %cond.end
|
|
|
|
|
|
|
|
cond.end:
|
2019-11-17 23:37:42 +08:00
|
|
|
%cond = phi nsz double [ %div, %cond.true ], [ 0.0, %entry ]
|
2019-04-17 12:52:47 +08:00
|
|
|
ret double %cond
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @sqrt_test(float addrspace(1)* noalias nocapture %out, float %a) nounwind {
|
2019-09-07 21:35:54 +08:00
|
|
|
; ALL-LABEL: @sqrt_test(
|
|
|
|
; ALL-NEXT: entry:
|
|
|
|
; ALL-NEXT: [[CMP_I:%.*]] = fcmp olt float [[A:%.*]], 0.000000e+00
|
|
|
|
; ALL-NEXT: [[TMP0:%.*]] = tail call float @llvm.sqrt.f32(float [[A]]) #2
|
2019-11-18 00:23:44 +08:00
|
|
|
; ALL-NEXT: [[COND_I:%.*]] = select afn i1 [[CMP_I]], float 0x7FF8000000000000, float [[TMP0]]
|
2019-09-07 21:35:54 +08:00
|
|
|
; ALL-NEXT: store float [[COND_I]], float addrspace(1)* [[OUT:%.*]], align 4
|
|
|
|
; ALL-NEXT: ret void
|
|
|
|
;
|
2019-04-17 12:52:47 +08:00
|
|
|
entry:
|
|
|
|
%cmp.i = fcmp olt float %a, 0.000000e+00
|
|
|
|
br i1 %cmp.i, label %test_sqrt.exit, label %cond.else.i
|
|
|
|
|
|
|
|
cond.else.i: ; preds = %entry
|
|
|
|
%0 = tail call float @llvm.sqrt.f32(float %a) nounwind readnone
|
|
|
|
br label %test_sqrt.exit
|
|
|
|
|
|
|
|
test_sqrt.exit: ; preds = %cond.else.i, %entry
|
2019-11-17 23:37:42 +08:00
|
|
|
%cond.i = phi afn float [ %0, %cond.else.i ], [ 0x7FF8000000000000, %entry ]
|
2019-04-17 12:52:47 +08:00
|
|
|
store float %cond.i, float addrspace(1)* %out, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @fabs_test(float addrspace(1)* noalias nocapture %out, float %a) nounwind {
|
2019-09-07 21:35:54 +08:00
|
|
|
; ALL-LABEL: @fabs_test(
|
|
|
|
; ALL-NEXT: entry:
|
|
|
|
; ALL-NEXT: [[CMP_I:%.*]] = fcmp olt float [[A:%.*]], 0.000000e+00
|
|
|
|
; ALL-NEXT: [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[A]]) #2
|
2019-11-18 00:23:44 +08:00
|
|
|
; ALL-NEXT: [[COND_I:%.*]] = select reassoc i1 [[CMP_I]], float 0x7FF8000000000000, float [[TMP0]]
|
2019-09-07 21:35:54 +08:00
|
|
|
; ALL-NEXT: store float [[COND_I]], float addrspace(1)* [[OUT:%.*]], align 4
|
|
|
|
; ALL-NEXT: ret void
|
|
|
|
;
|
2019-04-17 12:52:47 +08:00
|
|
|
entry:
|
|
|
|
%cmp.i = fcmp olt float %a, 0.000000e+00
|
|
|
|
br i1 %cmp.i, label %test_fabs.exit, label %cond.else.i
|
|
|
|
|
|
|
|
cond.else.i: ; preds = %entry
|
|
|
|
%0 = tail call float @llvm.fabs.f32(float %a) nounwind readnone
|
|
|
|
br label %test_fabs.exit
|
|
|
|
|
|
|
|
test_fabs.exit: ; preds = %cond.else.i, %entry
|
2019-11-17 23:37:42 +08:00
|
|
|
%cond.i = phi reassoc float [ %0, %cond.else.i ], [ 0x7FF8000000000000, %entry ]
|
2019-04-17 12:52:47 +08:00
|
|
|
store float %cond.i, float addrspace(1)* %out, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @fma_test(float addrspace(1)* noalias nocapture %out, float %a, float %b, float %c) nounwind {
|
2019-09-07 21:35:54 +08:00
|
|
|
; ALL-LABEL: @fma_test(
|
|
|
|
; ALL-NEXT: entry:
|
|
|
|
; ALL-NEXT: [[CMP_I:%.*]] = fcmp olt float [[A:%.*]], 0.000000e+00
|
|
|
|
; ALL-NEXT: [[TMP0:%.*]] = tail call float @llvm.fma.f32(float [[A]], float [[B:%.*]], float [[C:%.*]]) #2
|
2019-11-18 00:23:44 +08:00
|
|
|
; ALL-NEXT: [[COND_I:%.*]] = select reassoc nsz i1 [[CMP_I]], float 0x7FF8000000000000, float [[TMP0]]
|
2019-09-07 21:35:54 +08:00
|
|
|
; ALL-NEXT: store float [[COND_I]], float addrspace(1)* [[OUT:%.*]], align 4
|
|
|
|
; ALL-NEXT: ret void
|
|
|
|
;
|
2019-04-17 12:52:47 +08:00
|
|
|
entry:
|
|
|
|
%cmp.i = fcmp olt float %a, 0.000000e+00
|
|
|
|
br i1 %cmp.i, label %test_fma.exit, label %cond.else.i
|
|
|
|
|
|
|
|
cond.else.i: ; preds = %entry
|
|
|
|
%0 = tail call float @llvm.fma.f32(float %a, float %b, float %c) nounwind readnone
|
|
|
|
br label %test_fma.exit
|
|
|
|
|
|
|
|
test_fma.exit: ; preds = %cond.else.i, %entry
|
2019-11-17 23:37:42 +08:00
|
|
|
%cond.i = phi nsz reassoc float [ %0, %cond.else.i ], [ 0x7FF8000000000000, %entry ]
|
2019-04-17 12:52:47 +08:00
|
|
|
store float %cond.i, float addrspace(1)* %out, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @fmuladd_test(float addrspace(1)* noalias nocapture %out, float %a, float %b, float %c) nounwind {
|
2019-09-07 21:35:54 +08:00
|
|
|
; ALL-LABEL: @fmuladd_test(
|
|
|
|
; ALL-NEXT: entry:
|
|
|
|
; ALL-NEXT: [[CMP_I:%.*]] = fcmp olt float [[A:%.*]], 0.000000e+00
|
|
|
|
; ALL-NEXT: [[TMP0:%.*]] = tail call float @llvm.fmuladd.f32(float [[A]], float [[B:%.*]], float [[C:%.*]]) #2
|
2019-11-18 00:23:44 +08:00
|
|
|
; ALL-NEXT: [[COND_I:%.*]] = select ninf i1 [[CMP_I]], float 0x7FF8000000000000, float [[TMP0]]
|
2019-09-07 21:35:54 +08:00
|
|
|
; ALL-NEXT: store float [[COND_I]], float addrspace(1)* [[OUT:%.*]], align 4
|
|
|
|
; ALL-NEXT: ret void
|
|
|
|
;
|
2019-04-17 12:52:47 +08:00
|
|
|
entry:
|
|
|
|
%cmp.i = fcmp olt float %a, 0.000000e+00
|
|
|
|
br i1 %cmp.i, label %test_fmuladd.exit, label %cond.else.i
|
|
|
|
|
|
|
|
cond.else.i: ; preds = %entry
|
|
|
|
%0 = tail call float @llvm.fmuladd.f32(float %a, float %b, float %c) nounwind readnone
|
|
|
|
br label %test_fmuladd.exit
|
|
|
|
|
|
|
|
test_fmuladd.exit: ; preds = %cond.else.i, %entry
|
2019-11-17 23:37:42 +08:00
|
|
|
%cond.i = phi ninf float [ %0, %cond.else.i ], [ 0x7FF8000000000000, %entry ]
|
2019-04-17 12:52:47 +08:00
|
|
|
store float %cond.i, float addrspace(1)* %out, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @minnum_test(float addrspace(1)* noalias nocapture %out, float %a, float %b) nounwind {
|
2019-09-07 21:35:54 +08:00
|
|
|
; ALL-LABEL: @minnum_test(
|
|
|
|
; ALL-NEXT: entry:
|
|
|
|
; ALL-NEXT: [[CMP_I:%.*]] = fcmp olt float [[A:%.*]], 0.000000e+00
|
|
|
|
; ALL-NEXT: [[TMP0:%.*]] = tail call float @llvm.minnum.f32(float [[A]], float [[B:%.*]]) #2
|
|
|
|
; ALL-NEXT: [[COND_I:%.*]] = select i1 [[CMP_I]], float 0x7FF8000000000000, float [[TMP0]]
|
|
|
|
; ALL-NEXT: store float [[COND_I]], float addrspace(1)* [[OUT:%.*]], align 4
|
|
|
|
; ALL-NEXT: ret void
|
|
|
|
;
|
2019-04-17 12:52:47 +08:00
|
|
|
entry:
|
|
|
|
%cmp.i = fcmp olt float %a, 0.000000e+00
|
|
|
|
br i1 %cmp.i, label %test_minnum.exit, label %cond.else.i
|
|
|
|
|
|
|
|
cond.else.i: ; preds = %entry
|
|
|
|
%0 = tail call float @llvm.minnum.f32(float %a, float %b) nounwind readnone
|
|
|
|
br label %test_minnum.exit
|
|
|
|
|
|
|
|
test_minnum.exit: ; preds = %cond.else.i, %entry
|
|
|
|
%cond.i = phi float [ %0, %cond.else.i ], [ 0x7FF8000000000000, %entry ]
|
|
|
|
store float %cond.i, float addrspace(1)* %out, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @maxnum_test(float addrspace(1)* noalias nocapture %out, float %a, float %b) nounwind {
|
2019-09-07 21:35:54 +08:00
|
|
|
; ALL-LABEL: @maxnum_test(
|
|
|
|
; ALL-NEXT: entry:
|
|
|
|
; ALL-NEXT: [[CMP_I:%.*]] = fcmp olt float [[A:%.*]], 0.000000e+00
|
|
|
|
; ALL-NEXT: [[TMP0:%.*]] = tail call float @llvm.maxnum.f32(float [[A]], float [[B:%.*]]) #2
|
2019-11-18 00:23:44 +08:00
|
|
|
; ALL-NEXT: [[COND_I:%.*]] = select ninf nsz i1 [[CMP_I]], float 0x7FF8000000000000, float [[TMP0]]
|
2019-09-07 21:35:54 +08:00
|
|
|
; ALL-NEXT: store float [[COND_I]], float addrspace(1)* [[OUT:%.*]], align 4
|
|
|
|
; ALL-NEXT: ret void
|
|
|
|
;
|
2019-04-17 12:52:47 +08:00
|
|
|
entry:
|
|
|
|
%cmp.i = fcmp olt float %a, 0.000000e+00
|
|
|
|
br i1 %cmp.i, label %test_maxnum.exit, label %cond.else.i
|
|
|
|
|
|
|
|
cond.else.i: ; preds = %entry
|
|
|
|
%0 = tail call float @llvm.maxnum.f32(float %a, float %b) nounwind readnone
|
|
|
|
br label %test_maxnum.exit
|
|
|
|
|
|
|
|
test_maxnum.exit: ; preds = %cond.else.i, %entry
|
2019-11-17 23:37:42 +08:00
|
|
|
%cond.i = phi ninf nsz float [ %0, %cond.else.i ], [ 0x7FF8000000000000, %entry ]
|
2019-04-17 12:52:47 +08:00
|
|
|
store float %cond.i, float addrspace(1)* %out, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @minimum_test(float addrspace(1)* noalias nocapture %out, float %a, float %b) nounwind {
|
2019-09-07 21:35:54 +08:00
|
|
|
; ALL-LABEL: @minimum_test(
|
|
|
|
; ALL-NEXT: entry:
|
|
|
|
; ALL-NEXT: [[CMP_I:%.*]] = fcmp olt float [[A:%.*]], 0.000000e+00
|
|
|
|
; ALL-NEXT: [[TMP0:%.*]] = tail call float @llvm.minimum.f32(float [[A]], float [[B:%.*]]) #2
|
2019-11-18 00:23:44 +08:00
|
|
|
; ALL-NEXT: [[COND_I:%.*]] = select reassoc i1 [[CMP_I]], float 0x7FF8000000000000, float [[TMP0]]
|
2019-09-07 21:35:54 +08:00
|
|
|
; ALL-NEXT: store float [[COND_I]], float addrspace(1)* [[OUT:%.*]], align 4
|
|
|
|
; ALL-NEXT: ret void
|
|
|
|
;
|
2019-04-17 12:52:47 +08:00
|
|
|
entry:
|
|
|
|
%cmp.i = fcmp olt float %a, 0.000000e+00
|
|
|
|
br i1 %cmp.i, label %test_minimum.exit, label %cond.else.i
|
|
|
|
|
|
|
|
cond.else.i: ; preds = %entry
|
|
|
|
%0 = tail call float @llvm.minimum.f32(float %a, float %b) nounwind readnone
|
|
|
|
br label %test_minimum.exit
|
|
|
|
|
|
|
|
test_minimum.exit: ; preds = %cond.else.i, %entry
|
2019-11-17 23:37:42 +08:00
|
|
|
%cond.i = phi reassoc float [ %0, %cond.else.i ], [ 0x7FF8000000000000, %entry ]
|
2019-04-17 12:52:47 +08:00
|
|
|
store float %cond.i, float addrspace(1)* %out, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @maximum_test(float addrspace(1)* noalias nocapture %out, float %a, float %b) nounwind {
|
2019-09-07 21:35:54 +08:00
|
|
|
; ALL-LABEL: @maximum_test(
|
|
|
|
; ALL-NEXT: entry:
|
|
|
|
; ALL-NEXT: [[CMP_I:%.*]] = fcmp olt float [[A:%.*]], 0.000000e+00
|
|
|
|
; ALL-NEXT: [[TMP0:%.*]] = tail call float @llvm.maximum.f32(float [[A]], float [[B:%.*]]) #2
|
2019-11-18 00:23:44 +08:00
|
|
|
; ALL-NEXT: [[COND_I:%.*]] = select nsz i1 [[CMP_I]], float 0x7FF8000000000000, float [[TMP0]]
|
2019-09-07 21:35:54 +08:00
|
|
|
; ALL-NEXT: store float [[COND_I]], float addrspace(1)* [[OUT:%.*]], align 4
|
|
|
|
; ALL-NEXT: ret void
|
|
|
|
;
|
2019-04-17 12:52:47 +08:00
|
|
|
entry:
|
|
|
|
%cmp.i = fcmp olt float %a, 0.000000e+00
|
|
|
|
br i1 %cmp.i, label %test_maximum.exit, label %cond.else.i
|
|
|
|
|
|
|
|
cond.else.i: ; preds = %entry
|
|
|
|
%0 = tail call float @llvm.maximum.f32(float %a, float %b) nounwind readnone
|
|
|
|
br label %test_maximum.exit
|
|
|
|
|
|
|
|
test_maximum.exit: ; preds = %cond.else.i, %entry
|
2019-11-17 23:37:42 +08:00
|
|
|
%cond.i = phi nsz float [ %0, %cond.else.i ], [ 0x7FF8000000000000, %entry ]
|
2019-04-17 12:52:47 +08:00
|
|
|
store float %cond.i, float addrspace(1)* %out, align 4
|
|
|
|
ret void
|
|
|
|
}
|