2019-05-22 04:14:54 +08:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
|
|
; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s
|
|
|
|
|
2019-05-22 22:42:41 +08:00
|
|
|
; Scalar tests.
|
2019-05-22 04:14:54 +08:00
|
|
|
|
2019-05-22 22:42:41 +08:00
|
|
|
; add (add %x, C), %y
|
|
|
|
; Outer 'add' is commutative - 2 variants.
|
|
|
|
|
2019-06-01 19:08:29 +08:00
|
|
|
define i32 @sink_add_of_const_to_add0(i32 %a, i32 %b) {
|
2019-05-22 22:42:41 +08:00
|
|
|
; CHECK-LABEL: sink_add_of_const_to_add0:
|
2019-05-22 04:14:54 +08:00
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: add w8, w0, w1
|
|
|
|
; CHECK-NEXT: add w0, w8, #32 // =32
|
|
|
|
; CHECK-NEXT: ret
|
2019-06-01 19:08:29 +08:00
|
|
|
%t0 = add i32 %a, 32 ; constant always on RHS
|
|
|
|
%r = add i32 %t0, %b
|
2019-05-22 04:14:54 +08:00
|
|
|
ret i32 %r
|
|
|
|
}
|
2019-06-01 19:08:29 +08:00
|
|
|
define i32 @sink_add_of_const_to_add1(i32 %a, i32 %b) {
|
2019-05-22 22:42:41 +08:00
|
|
|
; CHECK-LABEL: sink_add_of_const_to_add1:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: add w8, w0, w1
|
|
|
|
; CHECK-NEXT: add w0, w8, #32 // =32
|
|
|
|
; CHECK-NEXT: ret
|
2019-06-01 19:08:29 +08:00
|
|
|
%t0 = add i32 %a, 32 ; constant always on RHS
|
|
|
|
%r = add i32 %b, %t0
|
2019-05-22 22:42:41 +08:00
|
|
|
ret i32 %r
|
|
|
|
}
|
|
|
|
|
|
|
|
; add (sub %x, C), %y
|
|
|
|
; Outer 'add' is commutative - 2 variants.
|
|
|
|
|
2019-06-01 19:08:29 +08:00
|
|
|
define i32 @sink_sub_of_const_to_add0(i32 %a, i32 %b) {
|
2019-05-22 22:42:41 +08:00
|
|
|
; CHECK-LABEL: sink_sub_of_const_to_add0:
|
2019-05-22 04:14:54 +08:00
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: add w8, w0, w1
|
|
|
|
; CHECK-NEXT: sub w0, w8, #32 // =32
|
|
|
|
; CHECK-NEXT: ret
|
2019-06-01 19:08:29 +08:00
|
|
|
%t0 = sub i32 %a, 32
|
|
|
|
%r = add i32 %t0, %b
|
2019-05-22 04:14:54 +08:00
|
|
|
ret i32 %r
|
|
|
|
}
|
2019-06-01 19:08:29 +08:00
|
|
|
define i32 @sink_sub_of_const_to_add1(i32 %a, i32 %b) {
|
2019-05-22 22:42:41 +08:00
|
|
|
; CHECK-LABEL: sink_sub_of_const_to_add1:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: add w8, w0, w1
|
|
|
|
; CHECK-NEXT: sub w0, w8, #32 // =32
|
|
|
|
; CHECK-NEXT: ret
|
2019-06-01 19:08:29 +08:00
|
|
|
%t0 = sub i32 %a, 32
|
|
|
|
%r = add i32 %b, %t0
|
2019-05-22 22:42:41 +08:00
|
|
|
ret i32 %r
|
|
|
|
}
|
2019-05-22 04:14:54 +08:00
|
|
|
|
2019-05-22 22:42:41 +08:00
|
|
|
; add (sub C, %x), %y
|
|
|
|
; Outer 'add' is commutative - 2 variants.
|
|
|
|
|
2019-06-01 19:08:29 +08:00
|
|
|
define i32 @sink_sub_from_const_to_add0(i32 %a, i32 %b) {
|
2019-05-22 22:42:41 +08:00
|
|
|
; CHECK-LABEL: sink_sub_from_const_to_add0:
|
2019-05-22 04:14:54 +08:00
|
|
|
; CHECK: // %bb.0:
|
2019-06-04 19:06:08 +08:00
|
|
|
; CHECK-NEXT: sub w8, w1, w0
|
|
|
|
; CHECK-NEXT: add w0, w8, #32 // =32
|
2019-05-22 04:14:54 +08:00
|
|
|
; CHECK-NEXT: ret
|
2019-06-01 19:08:29 +08:00
|
|
|
%t0 = sub i32 32, %a
|
|
|
|
%r = add i32 %t0, %b
|
2019-05-22 04:14:54 +08:00
|
|
|
ret i32 %r
|
|
|
|
}
|
2019-06-01 19:08:29 +08:00
|
|
|
define i32 @sink_sub_from_const_to_add1(i32 %a, i32 %b) {
|
2019-05-22 22:42:41 +08:00
|
|
|
; CHECK-LABEL: sink_sub_from_const_to_add1:
|
2019-05-22 04:14:54 +08:00
|
|
|
; CHECK: // %bb.0:
|
2019-06-04 19:06:08 +08:00
|
|
|
; CHECK-NEXT: sub w8, w1, w0
|
|
|
|
; CHECK-NEXT: add w0, w8, #32 // =32
|
2019-05-22 04:14:54 +08:00
|
|
|
; CHECK-NEXT: ret
|
2019-06-01 19:08:29 +08:00
|
|
|
%t0 = sub i32 32, %a
|
|
|
|
%r = add i32 %b, %t0
|
2019-05-22 04:14:54 +08:00
|
|
|
ret i32 %r
|
|
|
|
}
|
|
|
|
|
2019-05-22 22:42:41 +08:00
|
|
|
; sub (add %x, C), %y
|
|
|
|
; sub %y, (add %x, C)
|
|
|
|
|
2019-06-01 19:08:29 +08:00
|
|
|
define i32 @sink_add_of_const_to_sub(i32 %a, i32 %b) {
|
2019-05-22 22:42:41 +08:00
|
|
|
; CHECK-LABEL: sink_add_of_const_to_sub:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: sub w8, w0, w1
|
[DAGCombiner][X86][AArch64][AMDGPU] (x + C) - y -> (x - y) + C fold. Try 3
Summary:
The main motivation is shown by all these `neg` instructions that are now created.
In particular, the `@reg32_lshr_by_negated_unfolded_sub_b` test.
AArch64 test changes all look good (`neg` created), or neutral.
X86 changes look neutral (vectors), or good (`neg` / `xor eax, eax` created).
I'm not sure about `X86/ragreedy-hoist-spill.ll`, it looks like the spill
is now hoisted into preheader (which should still be good?),
2 4-byte reloads become 1 8-byte reload, and are elsewhere,
but i'm not sure how that affects that loop.
I'm unable to interpret AMDGPU change, looks neutral-ish?
This is hopefully a step towards solving [[ https://bugs.llvm.org/show_bug.cgi?id=41952 | PR41952 ]].
https://rise4fun.com/Alive/pkdq (we are missing more patterns, i'll submit them later)
This is a recommit, originally committed in rL361852, but reverted
to investigate test-suite compile-time hangs, and then reverted in
rL362109 to fix missing constant folds that were causing
endless combine loops.
Reviewers: craig.topper, RKSimon, spatel, arsenm
Reviewed By: RKSimon
Subscribers: bjope, qcolombet, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, javed.absar, dstuttard, tpr, t-tye, kristof.beyls, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62223
llvm-svn: 362142
2019-05-31 04:36:54 +08:00
|
|
|
; CHECK-NEXT: add w0, w8, #32 // =32
|
2019-05-22 22:42:41 +08:00
|
|
|
; CHECK-NEXT: ret
|
2019-06-01 19:08:29 +08:00
|
|
|
%t0 = add i32 %a, 32 ; constant always on RHS
|
|
|
|
%r = sub i32 %t0, %b
|
2019-05-22 22:42:41 +08:00
|
|
|
ret i32 %r
|
|
|
|
}
|
2019-06-01 19:08:29 +08:00
|
|
|
define i32 @sink_add_of_const_to_sub2(i32 %a, i32 %b) {
|
2019-05-22 04:14:54 +08:00
|
|
|
; CHECK-LABEL: sink_add_of_const_to_sub2:
|
|
|
|
; CHECK: // %bb.0:
|
[DAGCombiner][X86][AArch64][SPARC][SystemZ] y - (x + C) -> (y - x) - C fold. Try 3
Summary:
Direct sibling of D62223 patch.
While i don't have a direct motivational pattern for this,
it would seem to make sense to handle both patterns (or none),
for symmetry?
The aarch64 changes look neutral;
sparc and systemz look like improvement (one less instruction each);
x86 changes - 32bit case improves, 64bit case shows that LEA no longer
gets constructed, which may be because that whole test is `-mattr=+slow-lea,+slow-3ops-lea`
https://rise4fun.com/Alive/ffh
This is a recommit, originally committed in rL361852, but reverted
to investigate test-suite compile-time hangs, and then reverted in
rL362109 to fix missing constant folds that were causing
endless combine loops.
Reviewers: RKSimon, craig.topper, spatel, t.p.northover
Reviewed By: t.p.northover
Subscribers: t.p.northover, jyknight, javed.absar, kristof.beyls, fedor.sergeev, jrtc27, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62252
llvm-svn: 362143
2019-05-31 04:37:18 +08:00
|
|
|
; CHECK-NEXT: sub w8, w1, w0
|
|
|
|
; CHECK-NEXT: sub w0, w8, #32 // =32
|
2019-05-22 04:14:54 +08:00
|
|
|
; CHECK-NEXT: ret
|
2019-06-01 19:08:29 +08:00
|
|
|
%t0 = add i32 %a, 32 ; constant always on RHS
|
|
|
|
%r = sub i32 %b, %t0
|
2019-05-22 04:14:54 +08:00
|
|
|
ret i32 %r
|
|
|
|
}
|
2019-05-22 22:42:41 +08:00
|
|
|
|
|
|
|
; sub (sub %x, C), %y
|
|
|
|
; sub %y, (sub %x, C)
|
|
|
|
|
2019-06-01 19:08:29 +08:00
|
|
|
define i32 @sink_sub_of_const_to_sub(i32 %a, i32 %b) {
|
2019-05-22 04:14:54 +08:00
|
|
|
; CHECK-LABEL: sink_sub_of_const_to_sub:
|
|
|
|
; CHECK: // %bb.0:
|
2019-05-22 22:42:41 +08:00
|
|
|
; CHECK-NEXT: sub w8, w0, w1
|
[DAGCombiner][X86][AArch64][AMDGPU] (x + C) - y -> (x - y) + C fold. Try 3
Summary:
The main motivation is shown by all these `neg` instructions that are now created.
In particular, the `@reg32_lshr_by_negated_unfolded_sub_b` test.
AArch64 test changes all look good (`neg` created), or neutral.
X86 changes look neutral (vectors), or good (`neg` / `xor eax, eax` created).
I'm not sure about `X86/ragreedy-hoist-spill.ll`, it looks like the spill
is now hoisted into preheader (which should still be good?),
2 4-byte reloads become 1 8-byte reload, and are elsewhere,
but i'm not sure how that affects that loop.
I'm unable to interpret AMDGPU change, looks neutral-ish?
This is hopefully a step towards solving [[ https://bugs.llvm.org/show_bug.cgi?id=41952 | PR41952 ]].
https://rise4fun.com/Alive/pkdq (we are missing more patterns, i'll submit them later)
This is a recommit, originally committed in rL361852, but reverted
to investigate test-suite compile-time hangs, and then reverted in
rL362109 to fix missing constant folds that were causing
endless combine loops.
Reviewers: craig.topper, RKSimon, spatel, arsenm
Reviewed By: RKSimon
Subscribers: bjope, qcolombet, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, javed.absar, dstuttard, tpr, t-tye, kristof.beyls, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62223
llvm-svn: 362142
2019-05-31 04:36:54 +08:00
|
|
|
; CHECK-NEXT: sub w0, w8, #32 // =32
|
2019-05-22 04:14:54 +08:00
|
|
|
; CHECK-NEXT: ret
|
2019-06-01 19:08:29 +08:00
|
|
|
%t0 = sub i32 %a, 32
|
|
|
|
%r = sub i32 %t0, %b
|
2019-05-22 04:14:54 +08:00
|
|
|
ret i32 %r
|
|
|
|
}
|
2019-06-01 19:08:29 +08:00
|
|
|
define i32 @sink_sub_of_const_to_sub2(i32 %a, i32 %b) {
|
2019-05-22 22:42:41 +08:00
|
|
|
; CHECK-LABEL: sink_sub_of_const_to_sub2:
|
|
|
|
; CHECK: // %bb.0:
|
2019-06-04 19:06:08 +08:00
|
|
|
; CHECK-NEXT: sub w8, w1, w0
|
|
|
|
; CHECK-NEXT: add w0, w8, #32 // =32
|
2019-05-22 22:42:41 +08:00
|
|
|
; CHECK-NEXT: ret
|
2019-06-01 19:08:29 +08:00
|
|
|
%t0 = sub i32 %a, 32
|
|
|
|
%r = sub i32 %b, %t0
|
2019-05-22 22:42:41 +08:00
|
|
|
ret i32 %r
|
|
|
|
}
|
2019-05-22 04:14:54 +08:00
|
|
|
|
2019-05-22 22:42:41 +08:00
|
|
|
; sub (sub C, %x), %y
|
|
|
|
; sub %y, (sub C, %x)
|
|
|
|
|
2019-06-01 19:08:29 +08:00
|
|
|
define i32 @sink_sub_from_const_to_sub(i32 %a, i32 %b) {
|
2019-05-22 22:42:41 +08:00
|
|
|
; CHECK-LABEL: sink_sub_from_const_to_sub:
|
|
|
|
; CHECK: // %bb.0:
|
[DAGCombine][X86][AArch64][MIPS][LANAI] (C - x) - y -> C - (x + y) fold (PR41952)
Summary:
This *might* be the last fold for `sink-addsub-of-const.ll`, but i'm not sure yet.
As far as i can tell, there are no regressions here (ignoring x86-32),
all changes are either good or neutral.
This, almost surprisingly to me, fixes the motivational tests (in `shift-amount-mod.ll`)
`@reg32_lshr_by_sub_from_negated` from [[ https://bugs.llvm.org/show_bug.cgi?id=41952 | PR41952 ]].
https://rise4fun.com/Alive/vMd3
Reviewers: RKSimon, t.p.northover, craig.topper, spatel, efriedma
Reviewed By: RKSimon
Subscribers: sdardis, javed.absar, arichardson, kristof.beyls, jrtc27, atanasyan, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62774
llvm-svn: 362488
2019-06-04 19:06:21 +08:00
|
|
|
; CHECK-NEXT: add w8, w0, w1
|
|
|
|
; CHECK-NEXT: mov w9, #32
|
|
|
|
; CHECK-NEXT: sub w0, w9, w8
|
2019-05-22 22:42:41 +08:00
|
|
|
; CHECK-NEXT: ret
|
2019-06-01 19:08:29 +08:00
|
|
|
%t0 = sub i32 32, %a
|
|
|
|
%r = sub i32 %t0, %b
|
2019-05-22 22:42:41 +08:00
|
|
|
ret i32 %r
|
|
|
|
}
|
2019-06-01 19:08:29 +08:00
|
|
|
define i32 @sink_sub_from_const_to_sub2(i32 %a, i32 %b) {
|
2019-05-22 22:42:41 +08:00
|
|
|
; CHECK-LABEL: sink_sub_from_const_to_sub2:
|
|
|
|
; CHECK: // %bb.0:
|
2019-06-01 19:08:29 +08:00
|
|
|
; CHECK-NEXT: add w8, w0, w1
|
2019-05-22 22:42:41 +08:00
|
|
|
; CHECK-NEXT: sub w0, w8, #32 // =32
|
|
|
|
; CHECK-NEXT: ret
|
2019-06-01 19:08:29 +08:00
|
|
|
%t0 = sub i32 32, %a
|
|
|
|
%r = sub i32 %b, %t0
|
2019-05-22 22:42:41 +08:00
|
|
|
ret i32 %r
|
|
|
|
}
|
|
|
|
|
|
|
|
;------------------------------------------------------------------------------;
|
2019-05-22 04:14:54 +08:00
|
|
|
; Basic vector tests. Here it is easier to see where the constant operand is.
|
2019-05-22 22:42:41 +08:00
|
|
|
;------------------------------------------------------------------------------;
|
|
|
|
|
|
|
|
; add (add %x, C), %y
|
|
|
|
; Outer 'add' is commutative - 2 variants.
|
2019-05-22 04:14:54 +08:00
|
|
|
|
2019-06-01 19:08:29 +08:00
|
|
|
define <4 x i32> @vec_sink_add_of_const_to_add0(<4 x i32> %a, <4 x i32> %b) {
|
2019-05-22 22:42:41 +08:00
|
|
|
; CHECK-LABEL: vec_sink_add_of_const_to_add0:
|
2019-05-22 04:14:54 +08:00
|
|
|
; CHECK: // %bb.0:
|
2019-05-22 22:42:41 +08:00
|
|
|
; CHECK-NEXT: adrp x8, .LCPI12_0
|
2019-06-01 19:08:29 +08:00
|
|
|
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI12_0]
|
2019-05-22 04:14:54 +08:00
|
|
|
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: add v0.4s, v0.4s, v2.4s
|
|
|
|
; CHECK-NEXT: ret
|
2019-06-01 19:08:29 +08:00
|
|
|
%t0 = add <4 x i32> %a, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
|
|
|
|
%r = add <4 x i32> %t0, %b
|
2019-05-22 04:14:54 +08:00
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
2019-06-01 19:08:29 +08:00
|
|
|
define <4 x i32> @vec_sink_add_of_const_to_add1(<4 x i32> %a, <4 x i32> %b) {
|
2019-05-22 22:42:41 +08:00
|
|
|
; CHECK-LABEL: vec_sink_add_of_const_to_add1:
|
2019-05-22 04:14:54 +08:00
|
|
|
; CHECK: // %bb.0:
|
2019-05-22 22:42:41 +08:00
|
|
|
; CHECK-NEXT: adrp x8, .LCPI13_0
|
2019-06-01 19:08:29 +08:00
|
|
|
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI13_0]
|
2019-05-22 22:42:41 +08:00
|
|
|
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: add v0.4s, v0.4s, v2.4s
|
|
|
|
; CHECK-NEXT: ret
|
2019-06-01 19:08:29 +08:00
|
|
|
%t0 = add <4 x i32> %a, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
|
|
|
|
%r = add <4 x i32> %b, %t0
|
2019-05-22 22:42:41 +08:00
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
; add (sub %x, C), %y
|
|
|
|
; Outer 'add' is commutative - 2 variants.
|
|
|
|
|
2019-06-01 19:08:29 +08:00
|
|
|
define <4 x i32> @vec_sink_sub_of_const_to_add0(<4 x i32> %a, <4 x i32> %b) {
|
2019-05-22 22:42:41 +08:00
|
|
|
; CHECK-LABEL: vec_sink_sub_of_const_to_add0:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: adrp x8, .LCPI14_0
|
2019-06-01 19:08:29 +08:00
|
|
|
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI14_0]
|
2019-05-22 04:14:54 +08:00
|
|
|
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
|
2019-06-01 19:08:29 +08:00
|
|
|
; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s
|
2019-05-22 04:14:54 +08:00
|
|
|
; CHECK-NEXT: ret
|
2019-06-01 19:08:29 +08:00
|
|
|
%t0 = sub <4 x i32> %a, <i32 42, i32 24, i32 undef, i32 46>
|
|
|
|
%r = add <4 x i32> %t0, %b
|
2019-05-22 04:14:54 +08:00
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
2019-06-01 19:08:29 +08:00
|
|
|
define <4 x i32> @vec_sink_sub_of_const_to_add1(<4 x i32> %a, <4 x i32> %b) {
|
2019-05-22 22:42:41 +08:00
|
|
|
; CHECK-LABEL: vec_sink_sub_of_const_to_add1:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: adrp x8, .LCPI15_0
|
2019-06-01 19:08:29 +08:00
|
|
|
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI15_0]
|
2019-05-22 22:42:41 +08:00
|
|
|
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
|
2019-06-01 19:08:29 +08:00
|
|
|
; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s
|
2019-05-22 22:42:41 +08:00
|
|
|
; CHECK-NEXT: ret
|
2019-06-01 19:08:29 +08:00
|
|
|
%t0 = sub <4 x i32> %a, <i32 42, i32 24, i32 undef, i32 46>
|
|
|
|
%r = add <4 x i32> %b, %t0
|
2019-05-22 22:42:41 +08:00
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
2019-05-22 04:14:54 +08:00
|
|
|
|
2019-05-22 22:42:41 +08:00
|
|
|
; add (sub C, %x), %y
|
|
|
|
; Outer 'add' is commutative - 2 variants.
|
|
|
|
|
2019-06-01 19:08:29 +08:00
|
|
|
define <4 x i32> @vec_sink_sub_from_const_to_add0(<4 x i32> %a, <4 x i32> %b) {
|
2019-05-22 22:42:41 +08:00
|
|
|
; CHECK-LABEL: vec_sink_sub_from_const_to_add0:
|
2019-05-22 04:14:54 +08:00
|
|
|
; CHECK: // %bb.0:
|
2019-05-22 22:42:41 +08:00
|
|
|
; CHECK-NEXT: adrp x8, .LCPI16_0
|
2019-06-01 19:08:29 +08:00
|
|
|
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI16_0]
|
2019-06-04 19:06:08 +08:00
|
|
|
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
|
|
|
|
; CHECK-NEXT: add v0.4s, v0.4s, v2.4s
|
2019-05-22 04:14:54 +08:00
|
|
|
; CHECK-NEXT: ret
|
2019-06-01 19:08:29 +08:00
|
|
|
%t0 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %a
|
|
|
|
%r = add <4 x i32> %t0, %b
|
2019-05-22 04:14:54 +08:00
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
2019-06-01 19:08:29 +08:00
|
|
|
define <4 x i32> @vec_sink_sub_from_const_to_add1(<4 x i32> %a, <4 x i32> %b) {
|
2019-05-22 22:42:41 +08:00
|
|
|
; CHECK-LABEL: vec_sink_sub_from_const_to_add1:
|
2019-05-22 04:14:54 +08:00
|
|
|
; CHECK: // %bb.0:
|
2019-05-22 22:42:41 +08:00
|
|
|
; CHECK-NEXT: adrp x8, .LCPI17_0
|
2019-06-01 19:08:29 +08:00
|
|
|
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI17_0]
|
2019-06-04 19:06:08 +08:00
|
|
|
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
|
|
|
|
; CHECK-NEXT: add v0.4s, v0.4s, v2.4s
|
2019-05-22 04:14:54 +08:00
|
|
|
; CHECK-NEXT: ret
|
2019-06-01 19:08:29 +08:00
|
|
|
%t0 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %a
|
|
|
|
%r = add <4 x i32> %b, %t0
|
2019-05-22 04:14:54 +08:00
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
2019-05-22 22:42:41 +08:00
|
|
|
; sub (add %x, C), %y
|
|
|
|
; sub %y, (add %x, C)
|
|
|
|
|
2019-06-01 19:08:29 +08:00
|
|
|
define <4 x i32> @vec_sink_add_of_const_to_sub(<4 x i32> %a, <4 x i32> %b) {
|
2019-05-22 22:42:41 +08:00
|
|
|
; CHECK-LABEL: vec_sink_add_of_const_to_sub:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: adrp x8, .LCPI18_0
|
2019-06-01 19:08:29 +08:00
|
|
|
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI18_0]
|
2019-05-22 22:42:41 +08:00
|
|
|
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
|
2019-06-01 19:08:29 +08:00
|
|
|
; CHECK-NEXT: add v0.4s, v0.4s, v2.4s
|
2019-05-22 22:42:41 +08:00
|
|
|
; CHECK-NEXT: ret
|
2019-06-01 19:08:29 +08:00
|
|
|
%t0 = add <4 x i32> %a, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
|
|
|
|
%r = sub <4 x i32> %t0, %b
|
2019-05-22 22:42:41 +08:00
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
2019-06-01 19:08:29 +08:00
|
|
|
define <4 x i32> @vec_sink_add_of_const_to_sub2(<4 x i32> %a, <4 x i32> %b) {
|
2019-05-22 04:14:54 +08:00
|
|
|
; CHECK-LABEL: vec_sink_add_of_const_to_sub2:
|
|
|
|
; CHECK: // %bb.0:
|
2019-05-22 22:42:41 +08:00
|
|
|
; CHECK-NEXT: adrp x8, .LCPI19_0
|
2019-06-01 19:08:29 +08:00
|
|
|
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI19_0]
|
[DAGCombiner][X86][AArch64][SPARC][SystemZ] y - (x + C) -> (y - x) - C fold. Try 3
Summary:
Direct sibling of D62223 patch.
While i don't have a direct motivational pattern for this,
it would seem to make sense to handle both patterns (or none),
for symmetry?
The aarch64 changes look neutral;
sparc and systemz look like improvement (one less instruction each);
x86 changes - 32bit case improves, 64bit case shows that LEA no longer
gets constructed, which may be because that whole test is `-mattr=+slow-lea,+slow-3ops-lea`
https://rise4fun.com/Alive/ffh
This is a recommit, originally committed in rL361852, but reverted
to investigate test-suite compile-time hangs, and then reverted in
rL362109 to fix missing constant folds that were causing
endless combine loops.
Reviewers: RKSimon, craig.topper, spatel, t.p.northover
Reviewed By: t.p.northover
Subscribers: t.p.northover, jyknight, javed.absar, kristof.beyls, fedor.sergeev, jrtc27, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62252
llvm-svn: 362143
2019-05-31 04:37:18 +08:00
|
|
|
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
|
2019-06-01 19:08:29 +08:00
|
|
|
; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s
|
2019-05-22 04:14:54 +08:00
|
|
|
; CHECK-NEXT: ret
|
2019-06-01 19:08:29 +08:00
|
|
|
%t0 = add <4 x i32> %a, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
|
|
|
|
%r = sub <4 x i32> %b, %t0
|
2019-05-22 04:14:54 +08:00
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
2019-05-22 22:42:41 +08:00
|
|
|
|
|
|
|
; sub (sub %x, C), %y
|
|
|
|
; sub %y, (sub %x, C)
|
|
|
|
|
2019-06-01 19:08:29 +08:00
|
|
|
define <4 x i32> @vec_sink_sub_of_const_to_sub(<4 x i32> %a, <4 x i32> %b) {
|
2019-05-22 04:14:54 +08:00
|
|
|
; CHECK-LABEL: vec_sink_sub_of_const_to_sub:
|
|
|
|
; CHECK: // %bb.0:
|
2019-05-22 22:42:41 +08:00
|
|
|
; CHECK-NEXT: adrp x8, .LCPI20_0
|
2019-06-01 19:08:29 +08:00
|
|
|
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI20_0]
|
2019-05-22 22:42:41 +08:00
|
|
|
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
|
2019-05-31 00:07:11 +08:00
|
|
|
; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s
|
2019-05-22 04:14:54 +08:00
|
|
|
; CHECK-NEXT: ret
|
2019-06-01 19:08:29 +08:00
|
|
|
%t0 = sub <4 x i32> %a, <i32 42, i32 24, i32 undef, i32 46>
|
|
|
|
%r = sub <4 x i32> %t0, %b
|
2019-05-22 22:42:41 +08:00
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
2019-06-01 19:08:29 +08:00
|
|
|
define <4 x i32> @vec_sink_sub_of_const_to_sub2(<4 x i32> %a, <4 x i32> %b) {
|
2019-05-22 22:42:41 +08:00
|
|
|
; CHECK-LABEL: vec_sink_sub_of_const_to_sub2:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: adrp x8, .LCPI21_0
|
2019-06-01 19:08:29 +08:00
|
|
|
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI21_0]
|
2019-06-04 19:06:08 +08:00
|
|
|
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
|
|
|
|
; CHECK-NEXT: add v0.4s, v0.4s, v2.4s
|
2019-05-22 22:42:41 +08:00
|
|
|
; CHECK-NEXT: ret
|
2019-06-01 19:08:29 +08:00
|
|
|
%t0 = sub <4 x i32> %a, <i32 42, i32 24, i32 undef, i32 46>
|
|
|
|
%r = sub <4 x i32> %b, %t0
|
2019-05-22 22:42:41 +08:00
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
; sub (sub C, %x), %y
|
|
|
|
; sub %y, (sub C, %x)
|
|
|
|
|
2019-06-01 19:08:29 +08:00
|
|
|
define <4 x i32> @vec_sink_sub_from_const_to_sub(<4 x i32> %a, <4 x i32> %b) {
|
2019-05-22 22:42:41 +08:00
|
|
|
; CHECK-LABEL: vec_sink_sub_from_const_to_sub:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: adrp x8, .LCPI22_0
|
2019-06-01 19:08:29 +08:00
|
|
|
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI22_0]
|
[DAGCombine][X86][AArch64][MIPS][LANAI] (C - x) - y -> C - (x + y) fold (PR41952)
Summary:
This *might* be the last fold for `sink-addsub-of-const.ll`, but i'm not sure yet.
As far as i can tell, there are no regressions here (ignoring x86-32),
all changes are either good or neutral.
This, almost surprisingly to me, fixes the motivational tests (in `shift-amount-mod.ll`)
`@reg32_lshr_by_sub_from_negated` from [[ https://bugs.llvm.org/show_bug.cgi?id=41952 | PR41952 ]].
https://rise4fun.com/Alive/vMd3
Reviewers: RKSimon, t.p.northover, craig.topper, spatel, efriedma
Reviewed By: RKSimon
Subscribers: sdardis, javed.absar, arichardson, kristof.beyls, jrtc27, atanasyan, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62774
llvm-svn: 362488
2019-06-04 19:06:21 +08:00
|
|
|
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
|
2019-06-01 19:08:29 +08:00
|
|
|
; CHECK-NEXT: sub v0.4s, v2.4s, v0.4s
|
2019-05-22 22:42:41 +08:00
|
|
|
; CHECK-NEXT: ret
|
2019-06-01 19:08:29 +08:00
|
|
|
%t0 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %a
|
|
|
|
%r = sub <4 x i32> %t0, %b
|
2019-05-22 04:14:54 +08:00
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|
2019-06-01 19:08:29 +08:00
|
|
|
define <4 x i32> @vec_sink_sub_from_const_to_sub2(<4 x i32> %a, <4 x i32> %b) {
|
2019-05-22 22:42:41 +08:00
|
|
|
; CHECK-LABEL: vec_sink_sub_from_const_to_sub2:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: adrp x8, .LCPI23_0
|
2019-06-01 19:08:29 +08:00
|
|
|
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI23_0]
|
|
|
|
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
|
|
|
|
; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s
|
2019-05-22 22:42:41 +08:00
|
|
|
; CHECK-NEXT: ret
|
2019-06-01 19:08:29 +08:00
|
|
|
%t0 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %a
|
|
|
|
%r = sub <4 x i32> %b, %t0
|
2019-05-22 22:42:41 +08:00
|
|
|
ret <4 x i32> %r
|
|
|
|
}
|