2016-03-16 08:41:21 +08:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
2016-10-09 03:54:28 +08:00
|
|
|
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X86
|
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X64
|
2010-01-24 08:05:03 +08:00
|
|
|
|
|
|
|
; Test based on pr5626 to load/store
|
|
|
|
;
|
|
|
|
|
|
|
|
%i32vec3 = type <3 x i32>
|
|
|
|
define void @add3i32(%i32vec3* sret %ret, %i32vec3* %ap, %i32vec3* %bp) {
|
2016-10-09 03:54:28 +08:00
|
|
|
; X86-LABEL: add3i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X86: # %bb.0:
|
2016-10-09 03:54:28 +08:00
|
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
|
|
|
; X86-NEXT: movdqa (%edx), %xmm0
|
|
|
|
; X86-NEXT: paddd (%ecx), %xmm0
|
|
|
|
; X86-NEXT: pextrd $2, %xmm0, 8(%eax)
|
2019-08-08 00:24:26 +08:00
|
|
|
; X86-NEXT: pextrd $1, %xmm0, 4(%eax)
|
|
|
|
; X86-NEXT: movd %xmm0, (%eax)
|
2016-10-09 03:54:28 +08:00
|
|
|
; X86-NEXT: retl $4
|
|
|
|
;
|
|
|
|
; X64-LABEL: add3i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X64: # %bb.0:
|
2018-09-20 02:59:08 +08:00
|
|
|
; X64-NEXT: movq %rdi, %rax
|
2016-10-09 03:54:28 +08:00
|
|
|
; X64-NEXT: movdqa (%rsi), %xmm0
|
|
|
|
; X64-NEXT: paddd (%rdx), %xmm0
|
|
|
|
; X64-NEXT: pextrd $2, %xmm0, 8(%rdi)
|
|
|
|
; X64-NEXT: movq %xmm0, (%rdi)
|
|
|
|
; X64-NEXT: retq
|
2015-02-28 05:17:42 +08:00
|
|
|
%a = load %i32vec3, %i32vec3* %ap, align 16
|
|
|
|
%b = load %i32vec3, %i32vec3* %bp, align 16
|
2010-01-24 08:05:03 +08:00
|
|
|
%x = add %i32vec3 %a, %b
|
|
|
|
store %i32vec3 %x, %i32vec3* %ret, align 16
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @add3i32_2(%i32vec3* sret %ret, %i32vec3* %ap, %i32vec3* %bp) {
|
2016-10-09 03:54:28 +08:00
|
|
|
; X86-LABEL: add3i32_2:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X86: # %bb.0:
|
2016-10-09 03:54:28 +08:00
|
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
2019-08-08 00:24:26 +08:00
|
|
|
; X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; X86-NEXT: pinsrd $1, 4(%edx), %xmm0
|
2016-10-09 03:54:28 +08:00
|
|
|
; X86-NEXT: pinsrd $2, 8(%edx), %xmm0
|
2019-08-08 00:24:26 +08:00
|
|
|
; X86-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; X86-NEXT: pinsrd $1, 4(%ecx), %xmm1
|
2016-10-09 03:54:28 +08:00
|
|
|
; X86-NEXT: pinsrd $2, 8(%ecx), %xmm1
|
|
|
|
; X86-NEXT: paddd %xmm0, %xmm1
|
2020-04-08 05:05:29 +08:00
|
|
|
; X86-NEXT: movd %xmm1, (%eax)
|
2019-08-08 00:24:26 +08:00
|
|
|
; X86-NEXT: pextrd $1, %xmm1, 4(%eax)
|
[DAGCombiner] If a TokenFactor would be merged into its user, consider the user later.
Summary:
A number of optimizations are inhibited by single-use TokenFactors not
being merged into the TokenFactor using it. This makes we consider if
we can do the merge immediately.
Most tests changes here are due to the change in visitation causing
minor reorderings and associated reassociation of paired memory
operations.
CodeGen tests with non-reordering changes:
X86/aligned-variadic.ll -- memory-based add folded into stored leaq
value.
X86/constant-combiners.ll -- Optimizes out overlap between stores.
X86/pr40631_deadstore_elision -- folds constant byte store into
preceding quad word constant store.
Reviewers: RKSimon, craig.topper, spatel, efriedma, courbet
Reviewed By: courbet
Subscribers: dylanmckay, sdardis, nemanjai, jvesely, nhaehnle, javed.absar, eraman, hiraditya, kbarton, jrtc27, atanasyan, jsji, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59260
llvm-svn: 356068
2019-03-14 01:07:09 +08:00
|
|
|
; X86-NEXT: pextrd $2, %xmm1, 8(%eax)
|
2016-10-09 03:54:28 +08:00
|
|
|
; X86-NEXT: retl $4
|
|
|
|
;
|
|
|
|
; X64-LABEL: add3i32_2:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X64: # %bb.0:
|
2018-09-20 02:59:08 +08:00
|
|
|
; X64-NEXT: movq %rdi, %rax
|
2016-10-09 03:54:28 +08:00
|
|
|
; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; X64-NEXT: pinsrd $2, 8(%rsi), %xmm0
|
|
|
|
; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; X64-NEXT: pinsrd $2, 8(%rdx), %xmm1
|
|
|
|
; X64-NEXT: paddd %xmm0, %xmm1
|
|
|
|
; X64-NEXT: pextrd $2, %xmm1, 8(%rdi)
|
|
|
|
; X64-NEXT: movq %xmm1, (%rdi)
|
|
|
|
; X64-NEXT: retq
|
2015-02-28 05:17:42 +08:00
|
|
|
%a = load %i32vec3, %i32vec3* %ap, align 8
|
|
|
|
%b = load %i32vec3, %i32vec3* %bp, align 8
|
2010-01-24 08:05:03 +08:00
|
|
|
%x = add %i32vec3 %a, %b
|
2010-04-24 03:41:15 +08:00
|
|
|
store %i32vec3 %x, %i32vec3* %ret, align 8
|
2010-01-24 08:05:03 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
%i32vec7 = type <7 x i32>
|
|
|
|
define void @add7i32(%i32vec7* sret %ret, %i32vec7* %ap, %i32vec7* %bp) {
|
2016-10-09 03:54:28 +08:00
|
|
|
; X86-LABEL: add7i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X86: # %bb.0:
|
2016-10-09 03:54:28 +08:00
|
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
|
|
|
; X86-NEXT: movdqa (%edx), %xmm0
|
|
|
|
; X86-NEXT: movdqa 16(%edx), %xmm1
|
|
|
|
; X86-NEXT: paddd (%ecx), %xmm0
|
|
|
|
; X86-NEXT: paddd 16(%ecx), %xmm1
|
2019-08-08 00:24:26 +08:00
|
|
|
; X86-NEXT: movd %xmm1, 16(%eax)
|
|
|
|
; X86-NEXT: pextrd $1, %xmm1, 20(%eax)
|
[DAGCombiner] If a TokenFactor would be merged into its user, consider the user later.
Summary:
A number of optimizations are inhibited by single-use TokenFactors not
being merged into the TokenFactor using it. This makes we consider if
we can do the merge immediately.
Most tests changes here are due to the change in visitation causing
minor reorderings and associated reassociation of paired memory
operations.
CodeGen tests with non-reordering changes:
X86/aligned-variadic.ll -- memory-based add folded into stored leaq
value.
X86/constant-combiners.ll -- Optimizes out overlap between stores.
X86/pr40631_deadstore_elision -- folds constant byte store into
preceding quad word constant store.
Reviewers: RKSimon, craig.topper, spatel, efriedma, courbet
Reviewed By: courbet
Subscribers: dylanmckay, sdardis, nemanjai, jvesely, nhaehnle, javed.absar, eraman, hiraditya, kbarton, jrtc27, atanasyan, jsji, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59260
llvm-svn: 356068
2019-03-14 01:07:09 +08:00
|
|
|
; X86-NEXT: pextrd $2, %xmm1, 24(%eax)
|
2016-10-09 03:54:28 +08:00
|
|
|
; X86-NEXT: movdqa %xmm0, (%eax)
|
|
|
|
; X86-NEXT: retl $4
|
|
|
|
;
|
|
|
|
; X64-LABEL: add7i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X64: # %bb.0:
|
2018-09-20 02:59:08 +08:00
|
|
|
; X64-NEXT: movq %rdi, %rax
|
2016-10-09 03:54:28 +08:00
|
|
|
; X64-NEXT: movdqa (%rsi), %xmm0
|
|
|
|
; X64-NEXT: movdqa 16(%rsi), %xmm1
|
|
|
|
; X64-NEXT: paddd (%rdx), %xmm0
|
|
|
|
; X64-NEXT: paddd 16(%rdx), %xmm1
|
|
|
|
; X64-NEXT: movq %xmm1, 16(%rdi)
|
[DAGCombiner] If a TokenFactor would be merged into its user, consider the user later.
Summary:
A number of optimizations are inhibited by single-use TokenFactors not
being merged into the TokenFactor using it. This makes we consider if
we can do the merge immediately.
Most tests changes here are due to the change in visitation causing
minor reorderings and associated reassociation of paired memory
operations.
CodeGen tests with non-reordering changes:
X86/aligned-variadic.ll -- memory-based add folded into stored leaq
value.
X86/constant-combiners.ll -- Optimizes out overlap between stores.
X86/pr40631_deadstore_elision -- folds constant byte store into
preceding quad word constant store.
Reviewers: RKSimon, craig.topper, spatel, efriedma, courbet
Reviewed By: courbet
Subscribers: dylanmckay, sdardis, nemanjai, jvesely, nhaehnle, javed.absar, eraman, hiraditya, kbarton, jrtc27, atanasyan, jsji, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59260
llvm-svn: 356068
2019-03-14 01:07:09 +08:00
|
|
|
; X64-NEXT: pextrd $2, %xmm1, 24(%rdi)
|
2016-10-09 03:54:28 +08:00
|
|
|
; X64-NEXT: movdqa %xmm0, (%rdi)
|
|
|
|
; X64-NEXT: retq
|
2015-02-28 05:17:42 +08:00
|
|
|
%a = load %i32vec7, %i32vec7* %ap, align 16
|
|
|
|
%b = load %i32vec7, %i32vec7* %bp, align 16
|
2010-01-24 08:05:03 +08:00
|
|
|
%x = add %i32vec7 %a, %b
|
|
|
|
store %i32vec7 %x, %i32vec7* %ret, align 16
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
%i32vec12 = type <12 x i32>
|
|
|
|
define void @add12i32(%i32vec12* sret %ret, %i32vec12* %ap, %i32vec12* %bp) {
|
2016-10-09 03:54:28 +08:00
|
|
|
; X86-LABEL: add12i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X86: # %bb.0:
|
2016-10-09 03:54:28 +08:00
|
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
|
|
|
; X86-NEXT: movdqa 32(%edx), %xmm0
|
|
|
|
; X86-NEXT: movdqa (%edx), %xmm1
|
|
|
|
; X86-NEXT: movdqa 16(%edx), %xmm2
|
|
|
|
; X86-NEXT: paddd (%ecx), %xmm1
|
|
|
|
; X86-NEXT: paddd 32(%ecx), %xmm0
|
[DAGCombiner] If a TokenFactor would be merged into its user, consider the user later.
Summary:
A number of optimizations are inhibited by single-use TokenFactors not
being merged into the TokenFactor using it. This makes we consider if
we can do the merge immediately.
Most tests changes here are due to the change in visitation causing
minor reorderings and associated reassociation of paired memory
operations.
CodeGen tests with non-reordering changes:
X86/aligned-variadic.ll -- memory-based add folded into stored leaq
value.
X86/constant-combiners.ll -- Optimizes out overlap between stores.
X86/pr40631_deadstore_elision -- folds constant byte store into
preceding quad word constant store.
Reviewers: RKSimon, craig.topper, spatel, efriedma, courbet
Reviewed By: courbet
Subscribers: dylanmckay, sdardis, nemanjai, jvesely, nhaehnle, javed.absar, eraman, hiraditya, kbarton, jrtc27, atanasyan, jsji, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59260
llvm-svn: 356068
2019-03-14 01:07:09 +08:00
|
|
|
; X86-NEXT: paddd 16(%ecx), %xmm2
|
2016-10-09 03:54:28 +08:00
|
|
|
; X86-NEXT: movdqa %xmm2, 16(%eax)
|
[DAGCombiner] If a TokenFactor would be merged into its user, consider the user later.
Summary:
A number of optimizations are inhibited by single-use TokenFactors not
being merged into the TokenFactor using it. This makes we consider if
we can do the merge immediately.
Most tests changes here are due to the change in visitation causing
minor reorderings and associated reassociation of paired memory
operations.
CodeGen tests with non-reordering changes:
X86/aligned-variadic.ll -- memory-based add folded into stored leaq
value.
X86/constant-combiners.ll -- Optimizes out overlap between stores.
X86/pr40631_deadstore_elision -- folds constant byte store into
preceding quad word constant store.
Reviewers: RKSimon, craig.topper, spatel, efriedma, courbet
Reviewed By: courbet
Subscribers: dylanmckay, sdardis, nemanjai, jvesely, nhaehnle, javed.absar, eraman, hiraditya, kbarton, jrtc27, atanasyan, jsji, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59260
llvm-svn: 356068
2019-03-14 01:07:09 +08:00
|
|
|
; X86-NEXT: movdqa %xmm0, 32(%eax)
|
2016-10-09 03:54:28 +08:00
|
|
|
; X86-NEXT: movdqa %xmm1, (%eax)
|
|
|
|
; X86-NEXT: retl $4
|
|
|
|
;
|
|
|
|
; X64-LABEL: add12i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X64: # %bb.0:
|
2018-09-20 02:59:08 +08:00
|
|
|
; X64-NEXT: movq %rdi, %rax
|
2016-10-09 03:54:28 +08:00
|
|
|
; X64-NEXT: movdqa (%rsi), %xmm0
|
|
|
|
; X64-NEXT: movdqa 16(%rsi), %xmm1
|
|
|
|
; X64-NEXT: movdqa 32(%rsi), %xmm2
|
|
|
|
; X64-NEXT: paddd (%rdx), %xmm0
|
|
|
|
; X64-NEXT: paddd 32(%rdx), %xmm2
|
[DAGCombiner] If a TokenFactor would be merged into its user, consider the user later.
Summary:
A number of optimizations are inhibited by single-use TokenFactors not
being merged into the TokenFactor using it. This makes we consider if
we can do the merge immediately.
Most tests changes here are due to the change in visitation causing
minor reorderings and associated reassociation of paired memory
operations.
CodeGen tests with non-reordering changes:
X86/aligned-variadic.ll -- memory-based add folded into stored leaq
value.
X86/constant-combiners.ll -- Optimizes out overlap between stores.
X86/pr40631_deadstore_elision -- folds constant byte store into
preceding quad word constant store.
Reviewers: RKSimon, craig.topper, spatel, efriedma, courbet
Reviewed By: courbet
Subscribers: dylanmckay, sdardis, nemanjai, jvesely, nhaehnle, javed.absar, eraman, hiraditya, kbarton, jrtc27, atanasyan, jsji, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59260
llvm-svn: 356068
2019-03-14 01:07:09 +08:00
|
|
|
; X64-NEXT: paddd 16(%rdx), %xmm1
|
2016-10-09 03:54:28 +08:00
|
|
|
; X64-NEXT: movdqa %xmm1, 16(%rdi)
|
[DAGCombiner] If a TokenFactor would be merged into its user, consider the user later.
Summary:
A number of optimizations are inhibited by single-use TokenFactors not
being merged into the TokenFactor using it. This makes we consider if
we can do the merge immediately.
Most tests changes here are due to the change in visitation causing
minor reorderings and associated reassociation of paired memory
operations.
CodeGen tests with non-reordering changes:
X86/aligned-variadic.ll -- memory-based add folded into stored leaq
value.
X86/constant-combiners.ll -- Optimizes out overlap between stores.
X86/pr40631_deadstore_elision -- folds constant byte store into
preceding quad word constant store.
Reviewers: RKSimon, craig.topper, spatel, efriedma, courbet
Reviewed By: courbet
Subscribers: dylanmckay, sdardis, nemanjai, jvesely, nhaehnle, javed.absar, eraman, hiraditya, kbarton, jrtc27, atanasyan, jsji, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59260
llvm-svn: 356068
2019-03-14 01:07:09 +08:00
|
|
|
; X64-NEXT: movdqa %xmm2, 32(%rdi)
|
2016-10-09 03:54:28 +08:00
|
|
|
; X64-NEXT: movdqa %xmm0, (%rdi)
|
|
|
|
; X64-NEXT: retq
|
2015-02-28 05:17:42 +08:00
|
|
|
%a = load %i32vec12, %i32vec12* %ap, align 16
|
|
|
|
%b = load %i32vec12, %i32vec12* %bp, align 16
|
2010-01-24 08:05:03 +08:00
|
|
|
%x = add %i32vec12 %a, %b
|
|
|
|
store %i32vec12 %x, %i32vec12* %ret, align 16
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
%i16vec3 = type <3 x i16>
|
|
|
|
define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp) nounwind {
|
2016-10-09 03:54:28 +08:00
|
|
|
; X86-LABEL: add3i16:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X86: # %bb.0:
|
2019-08-08 00:24:26 +08:00
|
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
2019-08-18 14:28:06 +08:00
|
|
|
; X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; X86-NEXT: pinsrw $2, 4(%edx), %xmm0
|
|
|
|
; X86-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; X86-NEXT: pinsrw $2, 4(%ecx), %xmm1
|
|
|
|
; X86-NEXT: paddw %xmm0, %xmm1
|
|
|
|
; X86-NEXT: pextrw $2, %xmm1, 4(%eax)
|
|
|
|
; X86-NEXT: movd %xmm1, (%eax)
|
2016-10-09 03:54:28 +08:00
|
|
|
; X86-NEXT: retl $4
|
|
|
|
;
|
|
|
|
; X64-LABEL: add3i16:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X64: # %bb.0:
|
2018-09-20 02:59:08 +08:00
|
|
|
; X64-NEXT: movq %rdi, %rax
|
2019-08-18 14:28:06 +08:00
|
|
|
; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; X64-NEXT: paddw %xmm0, %xmm1
|
|
|
|
; X64-NEXT: pextrw $2, %xmm1, 4(%rdi)
|
|
|
|
; X64-NEXT: movd %xmm1, (%rdi)
|
2016-10-09 03:54:28 +08:00
|
|
|
; X64-NEXT: retq
|
2015-02-28 05:17:42 +08:00
|
|
|
%a = load %i16vec3, %i16vec3* %ap, align 16
|
|
|
|
%b = load %i16vec3, %i16vec3* %bp, align 16
|
2010-01-24 08:05:03 +08:00
|
|
|
%x = add %i16vec3 %a, %b
|
|
|
|
store %i16vec3 %x, %i16vec3* %ret, align 16
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
%i16vec4 = type <4 x i16>
|
|
|
|
define void @add4i16(%i16vec4* nocapture sret %ret, %i16vec4* %ap, %i16vec4* %bp) nounwind {
|
2016-10-09 03:54:28 +08:00
|
|
|
; X86-LABEL: add4i16:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X86: # %bb.0:
|
2016-10-09 03:54:28 +08:00
|
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
2016-12-16 00:05:29 +08:00
|
|
|
; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; X86-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
|
2016-10-09 03:54:28 +08:00
|
|
|
; X86-NEXT: paddw %xmm0, %xmm1
|
|
|
|
; X86-NEXT: movq %xmm1, (%eax)
|
|
|
|
; X86-NEXT: retl $4
|
|
|
|
;
|
|
|
|
; X64-LABEL: add4i16:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X64: # %bb.0:
|
2018-09-20 02:59:08 +08:00
|
|
|
; X64-NEXT: movq %rdi, %rax
|
2016-10-09 03:54:28 +08:00
|
|
|
; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; X64-NEXT: paddw %xmm0, %xmm1
|
|
|
|
; X64-NEXT: movq %xmm1, (%rdi)
|
|
|
|
; X64-NEXT: retq
|
2015-02-28 05:17:42 +08:00
|
|
|
%a = load %i16vec4, %i16vec4* %ap, align 16
|
|
|
|
%b = load %i16vec4, %i16vec4* %bp, align 16
|
2010-01-24 08:05:03 +08:00
|
|
|
%x = add %i16vec4 %a, %b
|
|
|
|
store %i16vec4 %x, %i16vec4* %ret, align 16
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
%i16vec12 = type <12 x i16>
|
|
|
|
define void @add12i16(%i16vec12* nocapture sret %ret, %i16vec12* %ap, %i16vec12* %bp) nounwind {
|
2016-10-09 03:54:28 +08:00
|
|
|
; X86-LABEL: add12i16:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X86: # %bb.0:
|
2016-10-09 03:54:28 +08:00
|
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
|
|
|
; X86-NEXT: movdqa (%edx), %xmm0
|
|
|
|
; X86-NEXT: movdqa 16(%edx), %xmm1
|
|
|
|
; X86-NEXT: paddw (%ecx), %xmm0
|
|
|
|
; X86-NEXT: paddw 16(%ecx), %xmm1
|
2019-08-08 00:24:26 +08:00
|
|
|
; X86-NEXT: movd %xmm1, 16(%eax)
|
|
|
|
; X86-NEXT: pextrd $1, %xmm1, 20(%eax)
|
2016-10-09 03:54:28 +08:00
|
|
|
; X86-NEXT: movdqa %xmm0, (%eax)
|
|
|
|
; X86-NEXT: retl $4
|
|
|
|
;
|
|
|
|
; X64-LABEL: add12i16:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X64: # %bb.0:
|
2018-09-20 02:59:08 +08:00
|
|
|
; X64-NEXT: movq %rdi, %rax
|
2016-10-09 03:54:28 +08:00
|
|
|
; X64-NEXT: movdqa (%rsi), %xmm0
|
|
|
|
; X64-NEXT: movdqa 16(%rsi), %xmm1
|
|
|
|
; X64-NEXT: paddw (%rdx), %xmm0
|
|
|
|
; X64-NEXT: paddw 16(%rdx), %xmm1
|
|
|
|
; X64-NEXT: movq %xmm1, 16(%rdi)
|
|
|
|
; X64-NEXT: movdqa %xmm0, (%rdi)
|
|
|
|
; X64-NEXT: retq
|
2015-02-28 05:17:42 +08:00
|
|
|
%a = load %i16vec12, %i16vec12* %ap, align 16
|
|
|
|
%b = load %i16vec12, %i16vec12* %bp, align 16
|
2010-01-24 08:05:03 +08:00
|
|
|
%x = add %i16vec12 %a, %b
|
|
|
|
store %i16vec12 %x, %i16vec12* %ret, align 16
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
%i16vec18 = type <18 x i16>
|
|
|
|
define void @add18i16(%i16vec18* nocapture sret %ret, %i16vec18* %ap, %i16vec18* %bp) nounwind {
|
2016-10-09 03:54:28 +08:00
|
|
|
; X86-LABEL: add18i16:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X86: # %bb.0:
|
2016-10-09 03:54:28 +08:00
|
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
|
|
|
; X86-NEXT: movdqa 32(%edx), %xmm0
|
|
|
|
; X86-NEXT: movdqa (%edx), %xmm1
|
|
|
|
; X86-NEXT: movdqa 16(%edx), %xmm2
|
|
|
|
; X86-NEXT: paddw (%ecx), %xmm1
|
|
|
|
; X86-NEXT: paddw 32(%ecx), %xmm0
|
[DAGCombiner] If a TokenFactor would be merged into its user, consider the user later.
Summary:
A number of optimizations are inhibited by single-use TokenFactors not
being merged into the TokenFactor using it. This makes we consider if
we can do the merge immediately.
Most tests changes here are due to the change in visitation causing
minor reorderings and associated reassociation of paired memory
operations.
CodeGen tests with non-reordering changes:
X86/aligned-variadic.ll -- memory-based add folded into stored leaq
value.
X86/constant-combiners.ll -- Optimizes out overlap between stores.
X86/pr40631_deadstore_elision -- folds constant byte store into
preceding quad word constant store.
Reviewers: RKSimon, craig.topper, spatel, efriedma, courbet
Reviewed By: courbet
Subscribers: dylanmckay, sdardis, nemanjai, jvesely, nhaehnle, javed.absar, eraman, hiraditya, kbarton, jrtc27, atanasyan, jsji, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59260
llvm-svn: 356068
2019-03-14 01:07:09 +08:00
|
|
|
; X86-NEXT: paddw 16(%ecx), %xmm2
|
2016-10-09 03:54:28 +08:00
|
|
|
; X86-NEXT: movdqa %xmm2, 16(%eax)
|
[DAGCombiner] If a TokenFactor would be merged into its user, consider the user later.
Summary:
A number of optimizations are inhibited by single-use TokenFactors not
being merged into the TokenFactor using it. This makes we consider if
we can do the merge immediately.
Most tests changes here are due to the change in visitation causing
minor reorderings and associated reassociation of paired memory
operations.
CodeGen tests with non-reordering changes:
X86/aligned-variadic.ll -- memory-based add folded into stored leaq
value.
X86/constant-combiners.ll -- Optimizes out overlap between stores.
X86/pr40631_deadstore_elision -- folds constant byte store into
preceding quad word constant store.
Reviewers: RKSimon, craig.topper, spatel, efriedma, courbet
Reviewed By: courbet
Subscribers: dylanmckay, sdardis, nemanjai, jvesely, nhaehnle, javed.absar, eraman, hiraditya, kbarton, jrtc27, atanasyan, jsji, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59260
llvm-svn: 356068
2019-03-14 01:07:09 +08:00
|
|
|
; X86-NEXT: movd %xmm0, 32(%eax)
|
2016-10-09 03:54:28 +08:00
|
|
|
; X86-NEXT: movdqa %xmm1, (%eax)
|
|
|
|
; X86-NEXT: retl $4
|
|
|
|
;
|
|
|
|
; X64-LABEL: add18i16:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X64: # %bb.0:
|
2018-09-20 02:59:08 +08:00
|
|
|
; X64-NEXT: movq %rdi, %rax
|
2016-10-09 03:54:28 +08:00
|
|
|
; X64-NEXT: movdqa (%rsi), %xmm0
|
|
|
|
; X64-NEXT: movdqa 16(%rsi), %xmm1
|
|
|
|
; X64-NEXT: movdqa 32(%rsi), %xmm2
|
|
|
|
; X64-NEXT: paddw (%rdx), %xmm0
|
|
|
|
; X64-NEXT: paddw 32(%rdx), %xmm2
|
[DAGCombiner] If a TokenFactor would be merged into its user, consider the user later.
Summary:
A number of optimizations are inhibited by single-use TokenFactors not
being merged into the TokenFactor using it. This makes we consider if
we can do the merge immediately.
Most tests changes here are due to the change in visitation causing
minor reorderings and associated reassociation of paired memory
operations.
CodeGen tests with non-reordering changes:
X86/aligned-variadic.ll -- memory-based add folded into stored leaq
value.
X86/constant-combiners.ll -- Optimizes out overlap between stores.
X86/pr40631_deadstore_elision -- folds constant byte store into
preceding quad word constant store.
Reviewers: RKSimon, craig.topper, spatel, efriedma, courbet
Reviewed By: courbet
Subscribers: dylanmckay, sdardis, nemanjai, jvesely, nhaehnle, javed.absar, eraman, hiraditya, kbarton, jrtc27, atanasyan, jsji, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59260
llvm-svn: 356068
2019-03-14 01:07:09 +08:00
|
|
|
; X64-NEXT: paddw 16(%rdx), %xmm1
|
2016-10-09 03:54:28 +08:00
|
|
|
; X64-NEXT: movdqa %xmm1, 16(%rdi)
|
[DAGCombiner] If a TokenFactor would be merged into its user, consider the user later.
Summary:
A number of optimizations are inhibited by single-use TokenFactors not
being merged into the TokenFactor using it. This makes we consider if
we can do the merge immediately.
Most tests changes here are due to the change in visitation causing
minor reorderings and associated reassociation of paired memory
operations.
CodeGen tests with non-reordering changes:
X86/aligned-variadic.ll -- memory-based add folded into stored leaq
value.
X86/constant-combiners.ll -- Optimizes out overlap between stores.
X86/pr40631_deadstore_elision -- folds constant byte store into
preceding quad word constant store.
Reviewers: RKSimon, craig.topper, spatel, efriedma, courbet
Reviewed By: courbet
Subscribers: dylanmckay, sdardis, nemanjai, jvesely, nhaehnle, javed.absar, eraman, hiraditya, kbarton, jrtc27, atanasyan, jsji, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59260
llvm-svn: 356068
2019-03-14 01:07:09 +08:00
|
|
|
; X64-NEXT: movd %xmm2, 32(%rdi)
|
2016-10-09 03:54:28 +08:00
|
|
|
; X64-NEXT: movdqa %xmm0, (%rdi)
|
|
|
|
; X64-NEXT: retq
|
2015-02-28 05:17:42 +08:00
|
|
|
%a = load %i16vec18, %i16vec18* %ap, align 16
|
|
|
|
%b = load %i16vec18, %i16vec18* %bp, align 16
|
2010-01-24 08:05:03 +08:00
|
|
|
%x = add %i16vec18 %a, %b
|
|
|
|
store %i16vec18 %x, %i16vec18* %ret, align 16
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
%i8vec3 = type <3 x i8>
|
|
|
|
define void @add3i8(%i8vec3* nocapture sret %ret, %i8vec3* %ap, %i8vec3* %bp) nounwind {
|
2016-10-09 03:54:28 +08:00
|
|
|
; X86-LABEL: add3i8:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X86: # %bb.0:
|
2016-10-09 03:54:28 +08:00
|
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
2019-08-08 00:24:26 +08:00
|
|
|
; X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; X86-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; X86-NEXT: paddb %xmm0, %xmm1
|
|
|
|
; X86-NEXT: pextrb $2, %xmm1, 2(%eax)
|
2017-02-28 05:01:57 +08:00
|
|
|
; X86-NEXT: pextrw $0, %xmm1, (%eax)
|
2016-10-09 03:54:28 +08:00
|
|
|
; X86-NEXT: retl $4
|
|
|
|
;
|
|
|
|
; X64-LABEL: add3i8:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X64: # %bb.0:
|
2018-09-20 02:59:08 +08:00
|
|
|
; X64-NEXT: movq %rdi, %rax
|
2019-08-08 00:24:26 +08:00
|
|
|
; X64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; X64-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; X64-NEXT: paddb %xmm0, %xmm1
|
|
|
|
; X64-NEXT: pextrb $2, %xmm1, 2(%rdi)
|
2017-02-28 05:01:57 +08:00
|
|
|
; X64-NEXT: pextrw $0, %xmm1, (%rdi)
|
2016-10-09 03:54:28 +08:00
|
|
|
; X64-NEXT: retq
|
2015-02-28 05:17:42 +08:00
|
|
|
%a = load %i8vec3, %i8vec3* %ap, align 16
|
|
|
|
%b = load %i8vec3, %i8vec3* %bp, align 16
|
2010-01-24 08:05:03 +08:00
|
|
|
%x = add %i8vec3 %a, %b
|
|
|
|
store %i8vec3 %x, %i8vec3* %ret, align 16
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
%i8vec31 = type <31 x i8>
|
|
|
|
define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp) nounwind {
|
2016-10-09 03:54:28 +08:00
|
|
|
; X86-LABEL: add31i8:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X86: # %bb.0:
|
2016-10-09 03:54:28 +08:00
|
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
|
|
|
; X86-NEXT: movdqa (%edx), %xmm0
|
|
|
|
; X86-NEXT: movdqa 16(%edx), %xmm1
|
|
|
|
; X86-NEXT: paddb (%ecx), %xmm0
|
|
|
|
; X86-NEXT: paddb 16(%ecx), %xmm1
|
2019-08-08 00:24:26 +08:00
|
|
|
; X86-NEXT: movd %xmm1, 16(%eax)
|
|
|
|
; X86-NEXT: pextrd $1, %xmm1, 20(%eax)
|
[DAGCombiner] If a TokenFactor would be merged into its user, consider the user later.
Summary:
A number of optimizations are inhibited by single-use TokenFactors not
being merged into the TokenFactor using it. This makes we consider if
we can do the merge immediately.
Most tests changes here are due to the change in visitation causing
minor reorderings and associated reassociation of paired memory
operations.
CodeGen tests with non-reordering changes:
X86/aligned-variadic.ll -- memory-based add folded into stored leaq
value.
X86/constant-combiners.ll -- Optimizes out overlap between stores.
X86/pr40631_deadstore_elision -- folds constant byte store into
preceding quad word constant store.
Reviewers: RKSimon, craig.topper, spatel, efriedma, courbet
Reviewed By: courbet
Subscribers: dylanmckay, sdardis, nemanjai, jvesely, nhaehnle, javed.absar, eraman, hiraditya, kbarton, jrtc27, atanasyan, jsji, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59260
llvm-svn: 356068
2019-03-14 01:07:09 +08:00
|
|
|
; X86-NEXT: pextrd $2, %xmm1, 24(%eax)
|
|
|
|
; X86-NEXT: pextrw $6, %xmm1, 28(%eax)
|
|
|
|
; X86-NEXT: pextrb $14, %xmm1, 30(%eax)
|
2016-10-09 03:54:28 +08:00
|
|
|
; X86-NEXT: movdqa %xmm0, (%eax)
|
|
|
|
; X86-NEXT: retl $4
|
|
|
|
;
|
|
|
|
; X64-LABEL: add31i8:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X64: # %bb.0:
|
2018-09-20 02:59:08 +08:00
|
|
|
; X64-NEXT: movq %rdi, %rax
|
2016-10-09 03:54:28 +08:00
|
|
|
; X64-NEXT: movdqa (%rsi), %xmm0
|
|
|
|
; X64-NEXT: movdqa 16(%rsi), %xmm1
|
|
|
|
; X64-NEXT: paddb (%rdx), %xmm0
|
|
|
|
; X64-NEXT: paddb 16(%rdx), %xmm1
|
|
|
|
; X64-NEXT: movq %xmm1, 16(%rdi)
|
[DAGCombiner] If a TokenFactor would be merged into its user, consider the user later.
Summary:
A number of optimizations are inhibited by single-use TokenFactors not
being merged into the TokenFactor using it. This makes we consider if
we can do the merge immediately.
Most tests changes here are due to the change in visitation causing
minor reorderings and associated reassociation of paired memory
operations.
CodeGen tests with non-reordering changes:
X86/aligned-variadic.ll -- memory-based add folded into stored leaq
value.
X86/constant-combiners.ll -- Optimizes out overlap between stores.
X86/pr40631_deadstore_elision -- folds constant byte store into
preceding quad word constant store.
Reviewers: RKSimon, craig.topper, spatel, efriedma, courbet
Reviewed By: courbet
Subscribers: dylanmckay, sdardis, nemanjai, jvesely, nhaehnle, javed.absar, eraman, hiraditya, kbarton, jrtc27, atanasyan, jsji, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59260
llvm-svn: 356068
2019-03-14 01:07:09 +08:00
|
|
|
; X64-NEXT: pextrd $2, %xmm1, 24(%rdi)
|
|
|
|
; X64-NEXT: pextrw $6, %xmm1, 28(%rdi)
|
|
|
|
; X64-NEXT: pextrb $14, %xmm1, 30(%rdi)
|
2016-10-09 03:54:28 +08:00
|
|
|
; X64-NEXT: movdqa %xmm0, (%rdi)
|
|
|
|
; X64-NEXT: retq
|
2015-02-28 05:17:42 +08:00
|
|
|
%a = load %i8vec31, %i8vec31* %ap, align 16
|
|
|
|
%b = load %i8vec31, %i8vec31* %bp, align 16
|
2010-01-24 08:05:03 +08:00
|
|
|
%x = add %i8vec31 %a, %b
|
|
|
|
store %i8vec31 %x, %i8vec31* %ret, align 16
|
|
|
|
ret void
|
2010-03-19 09:19:52 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
%i8vec3pack = type { <3 x i8>, i8 }
|
2014-07-23 17:11:48 +08:00
|
|
|
define void @rot(%i8vec3pack* nocapture sret %result, %i8vec3pack* %X, %i8vec3pack* %rot) nounwind {
|
2016-10-09 03:54:28 +08:00
|
|
|
; X86-LABEL: rot:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X86: # %bb.0: # %entry
|
2016-10-09 03:54:28 +08:00
|
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
|
|
|
; X86-NEXT: movb $-98, 2(%edx)
|
2019-03-16 23:02:00 +08:00
|
|
|
; X86-NEXT: movw $-24930, (%edx) # imm = 0x9E9E
|
2016-10-09 03:54:28 +08:00
|
|
|
; X86-NEXT: movb $1, 2(%ecx)
|
2019-03-16 23:02:00 +08:00
|
|
|
; X86-NEXT: movw $257, (%ecx) # imm = 0x101
|
2019-08-08 00:24:26 +08:00
|
|
|
; X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; X86-NEXT: psrlw $1, %xmm0
|
|
|
|
; X86-NEXT: pand {{\.LCPI.*}}, %xmm0
|
|
|
|
; X86-NEXT: pextrb $2, %xmm0, 2(%eax)
|
2016-10-09 03:54:28 +08:00
|
|
|
; X86-NEXT: pextrw $0, %xmm0, (%eax)
|
|
|
|
; X86-NEXT: retl $4
|
|
|
|
;
|
|
|
|
; X64-LABEL: rot:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X64: # %bb.0: # %entry
|
2018-09-20 02:59:08 +08:00
|
|
|
; X64-NEXT: movq %rdi, %rax
|
2016-10-09 03:54:28 +08:00
|
|
|
; X64-NEXT: movb $-98, 2(%rsi)
|
2019-03-16 23:02:00 +08:00
|
|
|
; X64-NEXT: movw $-24930, (%rsi) # imm = 0x9E9E
|
2016-10-09 03:54:28 +08:00
|
|
|
; X64-NEXT: movb $1, 2(%rdx)
|
2019-03-16 23:02:00 +08:00
|
|
|
; X64-NEXT: movw $257, (%rdx) # imm = 0x101
|
2019-08-08 00:24:26 +08:00
|
|
|
; X64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; X64-NEXT: psrlw $1, %xmm0
|
|
|
|
; X64-NEXT: pand {{.*}}(%rip), %xmm0
|
|
|
|
; X64-NEXT: pextrb $2, %xmm0, 2(%rdi)
|
2016-10-09 03:54:28 +08:00
|
|
|
; X64-NEXT: pextrw $0, %xmm0, (%rdi)
|
|
|
|
; X64-NEXT: retq
|
2010-03-19 09:19:52 +08:00
|
|
|
entry:
|
|
|
|
%storetmp = bitcast %i8vec3pack* %X to <3 x i8>*
|
|
|
|
store <3 x i8> <i8 -98, i8 -98, i8 -98>, <3 x i8>* %storetmp
|
|
|
|
%storetmp1 = bitcast %i8vec3pack* %rot to <3 x i8>*
|
|
|
|
store <3 x i8> <i8 1, i8 1, i8 1>, <3 x i8>* %storetmp1
|
2015-02-28 05:17:42 +08:00
|
|
|
%tmp = load %i8vec3pack, %i8vec3pack* %X
|
2010-03-19 09:19:52 +08:00
|
|
|
%extractVec = extractvalue %i8vec3pack %tmp, 0
|
2015-02-28 05:17:42 +08:00
|
|
|
%tmp2 = load %i8vec3pack, %i8vec3pack* %rot
|
2010-03-19 09:19:52 +08:00
|
|
|
%extractVec3 = extractvalue %i8vec3pack %tmp2, 0
|
|
|
|
%shr = lshr <3 x i8> %extractVec, %extractVec3
|
|
|
|
%storetmp4 = bitcast %i8vec3pack* %result to <3 x i8>*
|
|
|
|
store <3 x i8> %shr, <3 x i8>* %storetmp4
|
2014-07-23 17:11:48 +08:00
|
|
|
ret void
|
2010-03-19 09:19:52 +08:00
|
|
|
}
|
|
|
|
|