2020-01-28 07:11:45 +08:00
|
|
|
; RUN: llc -aarch64-load-store-renaming=true -verify-machineinstrs -mtriple=arm64-linux-gnu -pre-RA-sched=linearize -enable-misched=false -disable-post-ra < %s | FileCheck %s
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
%va_list = type {i8*, i8*, i8*, i32, i32}
|
|
|
|
|
|
|
|
@var = global %va_list zeroinitializer, align 8
|
|
|
|
|
|
|
|
declare void @llvm.va_start(i8*)
|
|
|
|
|
|
|
|
define void @test_simple(i32 %n, ...) {
|
|
|
|
; CHECK-LABEL: test_simple:
|
|
|
|
; CHECK: sub sp, sp, #[[STACKSIZE:[0-9]+]]
|
|
|
|
; CHECK: add [[STACK_TOP:x[0-9]+]], sp, #[[STACKSIZE]]
|
|
|
|
|
2018-04-11 00:19:30 +08:00
|
|
|
; CHECK: adrp x[[VA_LIST_HI:[0-9]+]], var
|
|
|
|
; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, :lo12:var
|
|
|
|
|
[DAGCombiner] If a TokenFactor would be merged into its user, consider the user later.
Summary:
A number of optimizations are inhibited by single-use TokenFactors not
being merged into the TokenFactor using it. This makes we consider if
we can do the merge immediately.
Most tests changes here are due to the change in visitation causing
minor reorderings and associated reassociation of paired memory
operations.
CodeGen tests with non-reordering changes:
X86/aligned-variadic.ll -- memory-based add folded into stored leaq
value.
X86/constant-combiners.ll -- Optimizes out overlap between stores.
X86/pr40631_deadstore_elision -- folds constant byte store into
preceding quad word constant store.
Reviewers: RKSimon, craig.topper, spatel, efriedma, courbet
Reviewed By: courbet
Subscribers: dylanmckay, sdardis, nemanjai, jvesely, nhaehnle, javed.absar, eraman, hiraditya, kbarton, jrtc27, atanasyan, jsji, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59260
llvm-svn: 356068
2019-03-14 01:07:09 +08:00
|
|
|
; CHECK-DAG: stp x6, x7, [sp, #
|
2014-03-29 18:18:08 +08:00
|
|
|
; ... omit middle ones ...
|
[DAGCombiner] If a TokenFactor would be merged into its user, consider the user later.
Summary:
A number of optimizations are inhibited by single-use TokenFactors not
being merged into the TokenFactor using it. This makes we consider if
we can do the merge immediately.
Most tests changes here are due to the change in visitation causing
minor reorderings and associated reassociation of paired memory
operations.
CodeGen tests with non-reordering changes:
X86/aligned-variadic.ll -- memory-based add folded into stored leaq
value.
X86/constant-combiners.ll -- Optimizes out overlap between stores.
X86/pr40631_deadstore_elision -- folds constant byte store into
preceding quad word constant store.
Reviewers: RKSimon, craig.topper, spatel, efriedma, courbet
Reviewed By: courbet
Subscribers: dylanmckay, sdardis, nemanjai, jvesely, nhaehnle, javed.absar, eraman, hiraditya, kbarton, jrtc27, atanasyan, jsji, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59260
llvm-svn: 356068
2019-03-14 01:07:09 +08:00
|
|
|
; CHECK-DAG: str x1, [sp, #[[GR_BASE:[0-9]+]]]
|
2014-03-29 18:18:08 +08:00
|
|
|
|
[DAGCombiner] If a TokenFactor would be merged into its user, consider the user later.
Summary:
A number of optimizations are inhibited by single-use TokenFactors not
being merged into the TokenFactor using it. This makes we consider if
we can do the merge immediately.
Most tests changes here are due to the change in visitation causing
minor reorderings and associated reassociation of paired memory
operations.
CodeGen tests with non-reordering changes:
X86/aligned-variadic.ll -- memory-based add folded into stored leaq
value.
X86/constant-combiners.ll -- Optimizes out overlap between stores.
X86/pr40631_deadstore_elision -- folds constant byte store into
preceding quad word constant store.
Reviewers: RKSimon, craig.topper, spatel, efriedma, courbet
Reviewed By: courbet
Subscribers: dylanmckay, sdardis, nemanjai, jvesely, nhaehnle, javed.absar, eraman, hiraditya, kbarton, jrtc27, atanasyan, jsji, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59260
llvm-svn: 356068
2019-03-14 01:07:09 +08:00
|
|
|
; CHECK-DAG: stp q0, q1, [sp]
|
2014-03-29 18:18:08 +08:00
|
|
|
; ... omit middle ones ...
|
[DAGCombiner] If a TokenFactor would be merged into its user, consider the user later.
Summary:
A number of optimizations are inhibited by single-use TokenFactors not
being merged into the TokenFactor using it. This makes we consider if
we can do the merge immediately.
Most tests changes here are due to the change in visitation causing
minor reorderings and associated reassociation of paired memory
operations.
CodeGen tests with non-reordering changes:
X86/aligned-variadic.ll -- memory-based add folded into stored leaq
value.
X86/constant-combiners.ll -- Optimizes out overlap between stores.
X86/pr40631_deadstore_elision -- folds constant byte store into
preceding quad word constant store.
Reviewers: RKSimon, craig.topper, spatel, efriedma, courbet
Reviewed By: courbet
Subscribers: dylanmckay, sdardis, nemanjai, jvesely, nhaehnle, javed.absar, eraman, hiraditya, kbarton, jrtc27, atanasyan, jsji, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59260
llvm-svn: 356068
2019-03-14 01:07:09 +08:00
|
|
|
; CHECK-DAG: stp q6, q7, [sp, #
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2018-04-11 00:19:30 +08:00
|
|
|
; CHECK: str [[STACK_TOP]], [x[[VA_LIST]]]
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
; CHECK: add [[GR_TOPTMP:x[0-9]+]], sp, #[[GR_BASE]]
|
|
|
|
; CHECK: add [[GR_TOP:x[0-9]+]], [[GR_TOPTMP]], #56
|
[AArch64] Teach Load/Store optimizier to rename store operands for pairing.
In some cases, we can rename a store operand, in order to enable pairing
of stores. For store pairs, that cannot be merged because the first
tored register is defined in between the second store, we try to find
suitable rename register.
First, we check if we can rename the given register:
1. The first store register must be killed at the store, which means we
do not have to rename instructions after the first store.
2. We scan backwards from the first store, to find the definition of the
stored register and check all uses in between are renamable. Along
they way, we collect the minimal register classes of the uses for
overlapping (sub/super)registers.
Second, we try to find an available register from the minimal physical
register class of the original register. A suitable register must not be
1. defined before FirstMI
2. between the previous definition of the register to rename
3. a callee saved register.
We use KILL flags to clear defined registers while scanning from the
beginning to the end of the block.
This triggers quite often, here are the top changes for MultiSource,
SPEC2000, SPEC2006 compiled with -O3 for iOS:
Metric: aarch64-ldst-opt.NumPairCreated
Program base patch diff
test-suite...nch/fourinarow/fourinarow.test 2.00 39.00 1850.0%
test-suite...s/ASC_Sequoia/IRSmk/IRSmk.test 46.00 80.00 73.9%
test-suite...chmarks/Olden/power/power.test 70.00 96.00 37.1%
test-suite...cations/hexxagon/hexxagon.test 29.00 39.00 34.5%
test-suite...nchmarks/McCat/05-eks/eks.test 100.00 132.00 32.0%
test-suite.../Trimaran/enc-rc4/enc-rc4.test 46.00 59.00 28.3%
test-suite...T2006/473.astar/473.astar.test 160.00 200.00 25.0%
test-suite.../Trimaran/enc-md5/enc-md5.test 8.00 10.00 25.0%
test-suite...telecomm-gsm/telecomm-gsm.test 113.00 139.00 23.0%
test-suite...ediabench/gsm/toast/toast.test 113.00 139.00 23.0%
test-suite...Source/Benchmarks/sim/sim.test 91.00 111.00 22.0%
test-suite...C/CFP2000/179.art/179.art.test 41.00 49.00 19.5%
test-suite...peg2/mpeg2dec/mpeg2decode.test 245.00 279.00 13.9%
test-suite...marks/Olden/health/health.test 16.00 18.00 12.5%
test-suite...ks/Prolangs-C/cdecl/cdecl.test 90.00 101.00 12.2%
test-suite...fice-ispell/office-ispell.test 91.00 100.00 9.9%
test-suite...oxyApps-C/miniGMG/miniGMG.test 430.00 465.00 8.1%
test-suite...lowfish/security-blowfish.test 39.00 42.00 7.7%
test-suite.../Applications/spiff/spiff.test 42.00 45.00 7.1%
test-suite...arks/mafft/pairlocalalign.test 2473.00 2646.00 7.0%
test-suite.../VersaBench/ecbdes/ecbdes.test 29.00 31.00 6.9%
test-suite...nch/beamformer/beamformer.test 220.00 235.00 6.8%
test-suite...CFP2000/177.mesa/177.mesa.test 2110.00 2252.00 6.7%
test-suite...ve-susan/automotive-susan.test 109.00 116.00 6.4%
test-suite...s-C/unix-smail/unix-smail.test 65.00 69.00 6.2%
test-suite...CI_Purple/SMG2000/smg2000.test 1194.00 1265.00 5.9%
test-suite.../Benchmarks/nbench/nbench.test 472.00 500.00 5.9%
test-suite...oxyApps-C/miniAMR/miniAMR.test 248.00 262.00 5.6%
test-suite...quoia/CrystalMk/CrystalMk.test 18.00 19.00 5.6%
test-suite...rks/tramp3d-v4/tramp3d-v4.test 7331.00 7710.00 5.2%
test-suite.../Benchmarks/Bullet/bullet.test 5651.00 5938.00 5.1%
test-suite...ternal/HMMER/hmmcalibrate.test 750.00 788.00 5.1%
test-suite...T2006/456.hmmer/456.hmmer.test 764.00 802.00 5.0%
test-suite...ications/JM/ldecod/ldecod.test 1028.00 1079.00 5.0%
test-suite...CFP2006/444.namd/444.namd.test 1368.00 1434.00 4.8%
test-suite...marks/7zip/7zip-benchmark.test 4471.00 4685.00 4.8%
test-suite...6/464.h264ref/464.h264ref.test 3122.00 3271.00 4.8%
test-suite...pplications/oggenc/oggenc.test 1497.00 1565.00 4.5%
test-suite...T2000/300.twolf/300.twolf.test 742.00 774.00 4.3%
test-suite.../Prolangs-C/loader/loader.test 24.00 25.00 4.2%
test-suite...0.perlbench/400.perlbench.test 1983.00 2058.00 3.8%
test-suite...ications/JM/lencod/lencod.test 4612.00 4785.00 3.8%
test-suite...yApps-C++/PENNANT/PENNANT.test 995.00 1032.00 3.7%
test-suite...arks/VersaBench/dbms/dbms.test 54.00 56.00 3.7%
Reviewers: efriedma, thegameg, samparker, dmgreen, paquette, evandro
Reviewed By: paquette
Differential Revision: https://reviews.llvm.org/D70450
2019-12-11 17:59:18 +08:00
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
; CHECK: mov [[VR_TOPTMP:x[0-9]+]], sp
|
|
|
|
; CHECK: add [[VR_TOP:x[0-9]+]], [[VR_TOPTMP]], #128
|
[AArch64] Teach Load/Store optimizier to rename store operands for pairing.
In some cases, we can rename a store operand, in order to enable pairing
of stores. For store pairs, that cannot be merged because the first
tored register is defined in between the second store, we try to find
suitable rename register.
First, we check if we can rename the given register:
1. The first store register must be killed at the store, which means we
do not have to rename instructions after the first store.
2. We scan backwards from the first store, to find the definition of the
stored register and check all uses in between are renamable. Along
they way, we collect the minimal register classes of the uses for
overlapping (sub/super)registers.
Second, we try to find an available register from the minimal physical
register class of the original register. A suitable register must not be
1. defined before FirstMI
2. between the previous definition of the register to rename
3. a callee saved register.
We use KILL flags to clear defined registers while scanning from the
beginning to the end of the block.
This triggers quite often, here are the top changes for MultiSource,
SPEC2000, SPEC2006 compiled with -O3 for iOS:
Metric: aarch64-ldst-opt.NumPairCreated
Program base patch diff
test-suite...nch/fourinarow/fourinarow.test 2.00 39.00 1850.0%
test-suite...s/ASC_Sequoia/IRSmk/IRSmk.test 46.00 80.00 73.9%
test-suite...chmarks/Olden/power/power.test 70.00 96.00 37.1%
test-suite...cations/hexxagon/hexxagon.test 29.00 39.00 34.5%
test-suite...nchmarks/McCat/05-eks/eks.test 100.00 132.00 32.0%
test-suite.../Trimaran/enc-rc4/enc-rc4.test 46.00 59.00 28.3%
test-suite...T2006/473.astar/473.astar.test 160.00 200.00 25.0%
test-suite.../Trimaran/enc-md5/enc-md5.test 8.00 10.00 25.0%
test-suite...telecomm-gsm/telecomm-gsm.test 113.00 139.00 23.0%
test-suite...ediabench/gsm/toast/toast.test 113.00 139.00 23.0%
test-suite...Source/Benchmarks/sim/sim.test 91.00 111.00 22.0%
test-suite...C/CFP2000/179.art/179.art.test 41.00 49.00 19.5%
test-suite...peg2/mpeg2dec/mpeg2decode.test 245.00 279.00 13.9%
test-suite...marks/Olden/health/health.test 16.00 18.00 12.5%
test-suite...ks/Prolangs-C/cdecl/cdecl.test 90.00 101.00 12.2%
test-suite...fice-ispell/office-ispell.test 91.00 100.00 9.9%
test-suite...oxyApps-C/miniGMG/miniGMG.test 430.00 465.00 8.1%
test-suite...lowfish/security-blowfish.test 39.00 42.00 7.7%
test-suite.../Applications/spiff/spiff.test 42.00 45.00 7.1%
test-suite...arks/mafft/pairlocalalign.test 2473.00 2646.00 7.0%
test-suite.../VersaBench/ecbdes/ecbdes.test 29.00 31.00 6.9%
test-suite...nch/beamformer/beamformer.test 220.00 235.00 6.8%
test-suite...CFP2000/177.mesa/177.mesa.test 2110.00 2252.00 6.7%
test-suite...ve-susan/automotive-susan.test 109.00 116.00 6.4%
test-suite...s-C/unix-smail/unix-smail.test 65.00 69.00 6.2%
test-suite...CI_Purple/SMG2000/smg2000.test 1194.00 1265.00 5.9%
test-suite.../Benchmarks/nbench/nbench.test 472.00 500.00 5.9%
test-suite...oxyApps-C/miniAMR/miniAMR.test 248.00 262.00 5.6%
test-suite...quoia/CrystalMk/CrystalMk.test 18.00 19.00 5.6%
test-suite...rks/tramp3d-v4/tramp3d-v4.test 7331.00 7710.00 5.2%
test-suite.../Benchmarks/Bullet/bullet.test 5651.00 5938.00 5.1%
test-suite...ternal/HMMER/hmmcalibrate.test 750.00 788.00 5.1%
test-suite...T2006/456.hmmer/456.hmmer.test 764.00 802.00 5.0%
test-suite...ications/JM/ldecod/ldecod.test 1028.00 1079.00 5.0%
test-suite...CFP2006/444.namd/444.namd.test 1368.00 1434.00 4.8%
test-suite...marks/7zip/7zip-benchmark.test 4471.00 4685.00 4.8%
test-suite...6/464.h264ref/464.h264ref.test 3122.00 3271.00 4.8%
test-suite...pplications/oggenc/oggenc.test 1497.00 1565.00 4.5%
test-suite...T2000/300.twolf/300.twolf.test 742.00 774.00 4.3%
test-suite.../Prolangs-C/loader/loader.test 24.00 25.00 4.2%
test-suite...0.perlbench/400.perlbench.test 1983.00 2058.00 3.8%
test-suite...ications/JM/lencod/lencod.test 4612.00 4785.00 3.8%
test-suite...yApps-C++/PENNANT/PENNANT.test 995.00 1032.00 3.7%
test-suite...arks/VersaBench/dbms/dbms.test 54.00 56.00 3.7%
Reviewers: efriedma, thegameg, samparker, dmgreen, paquette, evandro
Reviewed By: paquette
Differential Revision: https://reviews.llvm.org/D70450
2019-12-11 17:59:18 +08:00
|
|
|
; CHECK: stp [[GR_TOP]], [[VR_TOP]], [x[[VA_LIST]], #8]
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2018-05-25 03:38:23 +08:00
|
|
|
; CHECK: mov [[GRVR:x[0-9]+]], #-56
|
|
|
|
; CHECK: movk [[GRVR]], #65408, lsl #32
|
2018-04-11 00:19:30 +08:00
|
|
|
; CHECK: str [[GRVR]], [x[[VA_LIST]], #24]
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
%addr = bitcast %va_list* @var to i8*
|
|
|
|
call void @llvm.va_start(i8* %addr)
|
|
|
|
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @test_fewargs(i32 %n, i32 %n1, i32 %n2, float %m, ...) {
|
|
|
|
; CHECK-LABEL: test_fewargs:
|
|
|
|
; CHECK: sub sp, sp, #[[STACKSIZE:[0-9]+]]
|
|
|
|
; CHECK: add [[STACK_TOP:x[0-9]+]], sp, #[[STACKSIZE]]
|
|
|
|
|
2018-04-11 00:19:30 +08:00
|
|
|
; CHECK: adrp x[[VA_LIST_HI:[0-9]+]], var
|
|
|
|
; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, :lo12:var
|
|
|
|
|
[DAGCombiner] If a TokenFactor would be merged into its user, consider the user later.
Summary:
A number of optimizations are inhibited by single-use TokenFactors not
being merged into the TokenFactor using it. This makes we consider if
we can do the merge immediately.
Most tests changes here are due to the change in visitation causing
minor reorderings and associated reassociation of paired memory
operations.
CodeGen tests with non-reordering changes:
X86/aligned-variadic.ll -- memory-based add folded into stored leaq
value.
X86/constant-combiners.ll -- Optimizes out overlap between stores.
X86/pr40631_deadstore_elision -- folds constant byte store into
preceding quad word constant store.
Reviewers: RKSimon, craig.topper, spatel, efriedma, courbet
Reviewed By: courbet
Subscribers: dylanmckay, sdardis, nemanjai, jvesely, nhaehnle, javed.absar, eraman, hiraditya, kbarton, jrtc27, atanasyan, jsji, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59260
llvm-svn: 356068
2019-03-14 01:07:09 +08:00
|
|
|
; CHECK-DAG: stp x6, x7, [sp, #
|
2014-03-29 18:18:08 +08:00
|
|
|
; ... omit middle ones ...
|
[DAGCombiner] If a TokenFactor would be merged into its user, consider the user later.
Summary:
A number of optimizations are inhibited by single-use TokenFactors not
being merged into the TokenFactor using it. This makes we consider if
we can do the merge immediately.
Most tests changes here are due to the change in visitation causing
minor reorderings and associated reassociation of paired memory
operations.
CodeGen tests with non-reordering changes:
X86/aligned-variadic.ll -- memory-based add folded into stored leaq
value.
X86/constant-combiners.ll -- Optimizes out overlap between stores.
X86/pr40631_deadstore_elision -- folds constant byte store into
preceding quad word constant store.
Reviewers: RKSimon, craig.topper, spatel, efriedma, courbet
Reviewed By: courbet
Subscribers: dylanmckay, sdardis, nemanjai, jvesely, nhaehnle, javed.absar, eraman, hiraditya, kbarton, jrtc27, atanasyan, jsji, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59260
llvm-svn: 356068
2019-03-14 01:07:09 +08:00
|
|
|
; CHECK-DAG: str x3, [sp, #[[GR_BASE:[0-9]+]]]
|
2014-03-29 18:18:08 +08:00
|
|
|
|
[DAGCombiner] If a TokenFactor would be merged into its user, consider the user later.
Summary:
A number of optimizations are inhibited by single-use TokenFactors not
being merged into the TokenFactor using it. This makes we consider if
we can do the merge immediately.
Most tests changes here are due to the change in visitation causing
minor reorderings and associated reassociation of paired memory
operations.
CodeGen tests with non-reordering changes:
X86/aligned-variadic.ll -- memory-based add folded into stored leaq
value.
X86/constant-combiners.ll -- Optimizes out overlap between stores.
X86/pr40631_deadstore_elision -- folds constant byte store into
preceding quad word constant store.
Reviewers: RKSimon, craig.topper, spatel, efriedma, courbet
Reviewed By: courbet
Subscribers: dylanmckay, sdardis, nemanjai, jvesely, nhaehnle, javed.absar, eraman, hiraditya, kbarton, jrtc27, atanasyan, jsji, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59260
llvm-svn: 356068
2019-03-14 01:07:09 +08:00
|
|
|
; CHECK-DAG: stp q6, q7, [sp, #80]
|
2014-03-29 18:18:08 +08:00
|
|
|
; ... omit middle ones ...
|
[DAGCombiner] If a TokenFactor would be merged into its user, consider the user later.
Summary:
A number of optimizations are inhibited by single-use TokenFactors not
being merged into the TokenFactor using it. This makes we consider if
we can do the merge immediately.
Most tests changes here are due to the change in visitation causing
minor reorderings and associated reassociation of paired memory
operations.
CodeGen tests with non-reordering changes:
X86/aligned-variadic.ll -- memory-based add folded into stored leaq
value.
X86/constant-combiners.ll -- Optimizes out overlap between stores.
X86/pr40631_deadstore_elision -- folds constant byte store into
preceding quad word constant store.
Reviewers: RKSimon, craig.topper, spatel, efriedma, courbet
Reviewed By: courbet
Subscribers: dylanmckay, sdardis, nemanjai, jvesely, nhaehnle, javed.absar, eraman, hiraditya, kbarton, jrtc27, atanasyan, jsji, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59260
llvm-svn: 356068
2019-03-14 01:07:09 +08:00
|
|
|
; CHECK-DAG: str q1, [sp]
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2018-04-11 00:19:30 +08:00
|
|
|
; CHECK: str [[STACK_TOP]], [x[[VA_LIST]]]
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
; CHECK: add [[GR_TOPTMP:x[0-9]+]], sp, #[[GR_BASE]]
|
|
|
|
; CHECK: add [[GR_TOP:x[0-9]+]], [[GR_TOPTMP]], #40
|
|
|
|
|
|
|
|
; CHECK: mov [[VR_TOPTMP:x[0-9]+]], sp
|
|
|
|
; CHECK: add [[VR_TOP:x[0-9]+]], [[VR_TOPTMP]], #112
|
[AArch64] Teach Load/Store optimizier to rename store operands for pairing.
In some cases, we can rename a store operand, in order to enable pairing
of stores. For store pairs, that cannot be merged because the first
tored register is defined in between the second store, we try to find
suitable rename register.
First, we check if we can rename the given register:
1. The first store register must be killed at the store, which means we
do not have to rename instructions after the first store.
2. We scan backwards from the first store, to find the definition of the
stored register and check all uses in between are renamable. Along
they way, we collect the minimal register classes of the uses for
overlapping (sub/super)registers.
Second, we try to find an available register from the minimal physical
register class of the original register. A suitable register must not be
1. defined before FirstMI
2. between the previous definition of the register to rename
3. a callee saved register.
We use KILL flags to clear defined registers while scanning from the
beginning to the end of the block.
This triggers quite often, here are the top changes for MultiSource,
SPEC2000, SPEC2006 compiled with -O3 for iOS:
Metric: aarch64-ldst-opt.NumPairCreated
Program base patch diff
test-suite...nch/fourinarow/fourinarow.test 2.00 39.00 1850.0%
test-suite...s/ASC_Sequoia/IRSmk/IRSmk.test 46.00 80.00 73.9%
test-suite...chmarks/Olden/power/power.test 70.00 96.00 37.1%
test-suite...cations/hexxagon/hexxagon.test 29.00 39.00 34.5%
test-suite...nchmarks/McCat/05-eks/eks.test 100.00 132.00 32.0%
test-suite.../Trimaran/enc-rc4/enc-rc4.test 46.00 59.00 28.3%
test-suite...T2006/473.astar/473.astar.test 160.00 200.00 25.0%
test-suite.../Trimaran/enc-md5/enc-md5.test 8.00 10.00 25.0%
test-suite...telecomm-gsm/telecomm-gsm.test 113.00 139.00 23.0%
test-suite...ediabench/gsm/toast/toast.test 113.00 139.00 23.0%
test-suite...Source/Benchmarks/sim/sim.test 91.00 111.00 22.0%
test-suite...C/CFP2000/179.art/179.art.test 41.00 49.00 19.5%
test-suite...peg2/mpeg2dec/mpeg2decode.test 245.00 279.00 13.9%
test-suite...marks/Olden/health/health.test 16.00 18.00 12.5%
test-suite...ks/Prolangs-C/cdecl/cdecl.test 90.00 101.00 12.2%
test-suite...fice-ispell/office-ispell.test 91.00 100.00 9.9%
test-suite...oxyApps-C/miniGMG/miniGMG.test 430.00 465.00 8.1%
test-suite...lowfish/security-blowfish.test 39.00 42.00 7.7%
test-suite.../Applications/spiff/spiff.test 42.00 45.00 7.1%
test-suite...arks/mafft/pairlocalalign.test 2473.00 2646.00 7.0%
test-suite.../VersaBench/ecbdes/ecbdes.test 29.00 31.00 6.9%
test-suite...nch/beamformer/beamformer.test 220.00 235.00 6.8%
test-suite...CFP2000/177.mesa/177.mesa.test 2110.00 2252.00 6.7%
test-suite...ve-susan/automotive-susan.test 109.00 116.00 6.4%
test-suite...s-C/unix-smail/unix-smail.test 65.00 69.00 6.2%
test-suite...CI_Purple/SMG2000/smg2000.test 1194.00 1265.00 5.9%
test-suite.../Benchmarks/nbench/nbench.test 472.00 500.00 5.9%
test-suite...oxyApps-C/miniAMR/miniAMR.test 248.00 262.00 5.6%
test-suite...quoia/CrystalMk/CrystalMk.test 18.00 19.00 5.6%
test-suite...rks/tramp3d-v4/tramp3d-v4.test 7331.00 7710.00 5.2%
test-suite.../Benchmarks/Bullet/bullet.test 5651.00 5938.00 5.1%
test-suite...ternal/HMMER/hmmcalibrate.test 750.00 788.00 5.1%
test-suite...T2006/456.hmmer/456.hmmer.test 764.00 802.00 5.0%
test-suite...ications/JM/ldecod/ldecod.test 1028.00 1079.00 5.0%
test-suite...CFP2006/444.namd/444.namd.test 1368.00 1434.00 4.8%
test-suite...marks/7zip/7zip-benchmark.test 4471.00 4685.00 4.8%
test-suite...6/464.h264ref/464.h264ref.test 3122.00 3271.00 4.8%
test-suite...pplications/oggenc/oggenc.test 1497.00 1565.00 4.5%
test-suite...T2000/300.twolf/300.twolf.test 742.00 774.00 4.3%
test-suite.../Prolangs-C/loader/loader.test 24.00 25.00 4.2%
test-suite...0.perlbench/400.perlbench.test 1983.00 2058.00 3.8%
test-suite...ications/JM/lencod/lencod.test 4612.00 4785.00 3.8%
test-suite...yApps-C++/PENNANT/PENNANT.test 995.00 1032.00 3.7%
test-suite...arks/VersaBench/dbms/dbms.test 54.00 56.00 3.7%
Reviewers: efriedma, thegameg, samparker, dmgreen, paquette, evandro
Reviewed By: paquette
Differential Revision: https://reviews.llvm.org/D70450
2019-12-11 17:59:18 +08:00
|
|
|
; CHECK: stp [[GR_TOP]], [[VR_TOP]], [x[[VA_LIST]], #8]
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2018-04-11 00:19:30 +08:00
|
|
|
; CHECK: mov [[GRVR_OFFS:x[0-9]+]], #-40
|
|
|
|
; CHECK: movk [[GRVR_OFFS]], #65424, lsl #32
|
|
|
|
; CHECK: str [[GRVR_OFFS]], [x[[VA_LIST]], #24]
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
%addr = bitcast %va_list* @var to i8*
|
|
|
|
call void @llvm.va_start(i8* %addr)
|
|
|
|
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @test_nospare([8 x i64], [8 x float], ...) {
|
|
|
|
; CHECK-LABEL: test_nospare:
|
|
|
|
|
|
|
|
%addr = bitcast %va_list* @var to i8*
|
|
|
|
call void @llvm.va_start(i8* %addr)
|
|
|
|
; CHECK-NOT: sub sp, sp
|
|
|
|
; CHECK: mov [[STACK:x[0-9]+]], sp
|
2018-04-11 00:19:30 +08:00
|
|
|
; CHECK: add x[[VAR:[0-9]+]], {{x[0-9]+}}, :lo12:var
|
|
|
|
; CHECK: str [[STACK]], [x[[VAR]]]
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; If there are non-variadic arguments on the stack (here two i64s) then the
|
|
|
|
; __stack field should point just past them.
|
2014-11-28 05:02:42 +08:00
|
|
|
define void @test_offsetstack([8 x i64], [2 x i64], [3 x float], ...) {
|
2014-03-29 18:18:08 +08:00
|
|
|
; CHECK-LABEL: test_offsetstack:
|
[DAGCombiner] If a TokenFactor would be merged into its user, consider the user later.
Summary:
A number of optimizations are inhibited by single-use TokenFactors not
being merged into the TokenFactor using it. This makes we consider if
we can do the merge immediately.
Most tests changes here are due to the change in visitation causing
minor reorderings and associated reassociation of paired memory
operations.
CodeGen tests with non-reordering changes:
X86/aligned-variadic.ll -- memory-based add folded into stored leaq
value.
X86/constant-combiners.ll -- Optimizes out overlap between stores.
X86/pr40631_deadstore_elision -- folds constant byte store into
preceding quad word constant store.
Reviewers: RKSimon, craig.topper, spatel, efriedma, courbet
Reviewed By: courbet
Subscribers: dylanmckay, sdardis, nemanjai, jvesely, nhaehnle, javed.absar, eraman, hiraditya, kbarton, jrtc27, atanasyan, jsji, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59260
llvm-svn: 356068
2019-03-14 01:07:09 +08:00
|
|
|
|
|
|
|
; CHECK-DAG: stp {{q[0-9]+}}, {{q[0-9]+}}, [sp, #48]
|
|
|
|
; CHECK-DAG: stp {{q[0-9]+}}, {{q[0-9]+}}, [sp, #16]
|
|
|
|
; CHECK-DAG: str {{q[0-9]+}}, [sp]
|
|
|
|
; CHECK-DAG: add [[STACK_TOP:x[0-9]+]], sp, #96
|
|
|
|
; CHECK-DAG: add x[[VAR:[0-9]+]], {{x[0-9]+}}, :lo12:var
|
|
|
|
; CHECK-DAG: str [[STACK_TOP]], [x[[VAR]]]
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
%addr = bitcast %va_list* @var to i8*
|
|
|
|
call void @llvm.va_start(i8* %addr)
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare void @llvm.va_end(i8*)
|
|
|
|
|
|
|
|
define void @test_va_end() nounwind {
|
|
|
|
; CHECK-LABEL: test_va_end:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK-NEXT: %bb.0
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
%addr = bitcast %va_list* @var to i8*
|
|
|
|
call void @llvm.va_end(i8* %addr)
|
|
|
|
|
|
|
|
ret void
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
}
|
|
|
|
|
|
|
|
declare void @llvm.va_copy(i8* %dest, i8* %src)
|
|
|
|
|
|
|
|
@second_list = global %va_list zeroinitializer
|
|
|
|
|
|
|
|
define void @test_va_copy() {
|
|
|
|
; CHECK-LABEL: test_va_copy:
|
|
|
|
%srcaddr = bitcast %va_list* @var to i8*
|
|
|
|
%dstaddr = bitcast %va_list* @second_list to i8*
|
|
|
|
call void @llvm.va_copy(i8* %dstaddr, i8* %srcaddr)
|
|
|
|
|
|
|
|
; CHECK: add x[[SRC:[0-9]+]], {{x[0-9]+}}, :lo12:var
|
2018-04-11 00:19:30 +08:00
|
|
|
|
2018-05-16 23:36:52 +08:00
|
|
|
; CHECK: ldp [[BLOCK:q[0-9]+]], [[BLOCK:q[0-9]+]], [x[[SRC]]]
|
2014-03-29 18:18:08 +08:00
|
|
|
; CHECK: add x[[DST:[0-9]+]], {{x[0-9]+}}, :lo12:second_list
|
2018-05-16 23:36:52 +08:00
|
|
|
; CHECK: stp [[BLOCK:q[0-9]+]], [[BLOCK:q[0-9]+]], [x[[DST]]]
|
2014-03-29 18:18:08 +08:00
|
|
|
ret void
|
|
|
|
; CHECK: ret
|
|
|
|
}
|