2016-07-18 01:44:18 +08:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
2017-07-26 18:54:51 +08:00
|
|
|
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X32
|
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64
|
2016-07-18 01:44:18 +08:00
|
|
|
|
|
|
|
define <4 x double> @test_broadcast_2f64_4f64(<2 x double> *%p) nounwind {
|
|
|
|
; X32-LABEL: test_broadcast_2f64_4f64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X32: # %bb.0:
|
2016-07-18 01:44:18 +08:00
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
2016-07-22 21:58:44 +08:00
|
|
|
; X32-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
|
2016-07-18 01:44:18 +08:00
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_broadcast_2f64_4f64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X64: # %bb.0:
|
2016-07-22 21:58:44 +08:00
|
|
|
; X64-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
|
2016-07-18 01:44:18 +08:00
|
|
|
; X64-NEXT: retq
|
|
|
|
%1 = load <2 x double>, <2 x double> *%p
|
|
|
|
%2 = shufflevector <2 x double> %1, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
|
|
|
|
ret <4 x double> %2
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @test_broadcast_2i64_4i64(<2 x i64> *%p) nounwind {
|
|
|
|
; X32-LABEL: test_broadcast_2i64_4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X32: # %bb.0:
|
2016-07-18 01:44:18 +08:00
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
2016-07-22 21:58:44 +08:00
|
|
|
; X32-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
|
2016-07-18 01:44:18 +08:00
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_broadcast_2i64_4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X64: # %bb.0:
|
2016-07-22 21:58:44 +08:00
|
|
|
; X64-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
|
2016-07-18 01:44:18 +08:00
|
|
|
; X64-NEXT: retq
|
|
|
|
%1 = load <2 x i64>, <2 x i64> *%p
|
|
|
|
%2 = shufflevector <2 x i64> %1, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
|
|
|
|
ret <4 x i64> %2
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x float> @test_broadcast_4f32_8f32(<4 x float> *%p) nounwind {
|
|
|
|
; X32-LABEL: test_broadcast_4f32_8f32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X32: # %bb.0:
|
2016-07-18 01:44:18 +08:00
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
2016-07-22 21:58:44 +08:00
|
|
|
; X32-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
|
2016-07-18 01:44:18 +08:00
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_broadcast_4f32_8f32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X64: # %bb.0:
|
2016-07-22 21:58:44 +08:00
|
|
|
; X64-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
|
2016-07-18 01:44:18 +08:00
|
|
|
; X64-NEXT: retq
|
|
|
|
%1 = load <4 x float>, <4 x float> *%p
|
|
|
|
%2 = shufflevector <4 x float> %1, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
|
|
|
|
ret <8 x float> %2
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x i32> @test_broadcast_4i32_8i32(<4 x i32> *%p) nounwind {
|
|
|
|
; X32-LABEL: test_broadcast_4i32_8i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X32: # %bb.0:
|
2016-07-18 01:44:18 +08:00
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
2016-07-22 21:58:44 +08:00
|
|
|
; X32-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
|
2016-07-18 01:44:18 +08:00
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_broadcast_4i32_8i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X64: # %bb.0:
|
2016-07-22 21:58:44 +08:00
|
|
|
; X64-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
|
2016-07-18 01:44:18 +08:00
|
|
|
; X64-NEXT: retq
|
|
|
|
%1 = load <4 x i32>, <4 x i32> *%p
|
|
|
|
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
|
|
|
|
ret <8 x i32> %2
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x i16> @test_broadcast_8i16_16i16(<8 x i16> *%p) nounwind {
|
|
|
|
; X32-LABEL: test_broadcast_8i16_16i16:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X32: # %bb.0:
|
2016-07-18 01:44:18 +08:00
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
2016-07-22 21:58:44 +08:00
|
|
|
; X32-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
|
2016-07-18 01:44:18 +08:00
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_broadcast_8i16_16i16:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X64: # %bb.0:
|
2016-07-22 21:58:44 +08:00
|
|
|
; X64-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
|
2016-07-18 01:44:18 +08:00
|
|
|
; X64-NEXT: retq
|
|
|
|
%1 = load <8 x i16>, <8 x i16> *%p
|
|
|
|
%2 = shufflevector <8 x i16> %1, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
|
|
ret <16 x i16> %2
|
|
|
|
}
|
|
|
|
|
2016-07-20 00:52:05 +08:00
|
|
|
define <32 x i8> @test_broadcast_16i8_32i8(<16 x i8> *%p) nounwind {
|
|
|
|
; X32-LABEL: test_broadcast_16i8_32i8:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X32: # %bb.0:
|
2016-07-18 01:44:18 +08:00
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
2016-07-22 21:58:44 +08:00
|
|
|
; X32-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
|
2016-07-18 01:44:18 +08:00
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
2016-07-20 00:52:05 +08:00
|
|
|
; X64-LABEL: test_broadcast_16i8_32i8:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X64: # %bb.0:
|
2016-07-22 21:58:44 +08:00
|
|
|
; X64-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
|
2016-07-18 01:44:18 +08:00
|
|
|
; X64-NEXT: retq
|
|
|
|
%1 = load <16 x i8>, <16 x i8> *%p
|
|
|
|
%2 = shufflevector <16 x i8> %1, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
|
|
ret <32 x i8> %2
|
|
|
|
}
|
2016-07-30 05:05:10 +08:00
|
|
|
|
2018-09-16 22:59:04 +08:00
|
|
|
; PR38949 - https://bugs.llvm.org/show_bug.cgi?id=38949
|
|
|
|
; Don't limit the transform based on extra uses of the load itself (the store is a user of the load's chain value).
|
|
|
|
|
|
|
|
define void @subv_reuse_is_ok(<4 x float>* %a, <8 x float>* %b) {
|
|
|
|
; X32-LABEL: subv_reuse_is_ok:
|
|
|
|
; X32: # %bb.0:
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
[x86] fix uses check in broadcast transform (PR38949)
https://bugs.llvm.org/show_bug.cgi?id=38949
It's not clear to me that we even need a one-use check in this fold.
Ie, 2 independent loads might be better than a load+dependent shuffle.
Note that the existing re-use tests are not affected. We actually do form a
broadcast node in those tests now because there's no extra use of the
insert_subvector node in those cases. But something later in isel pattern
matching decides that it is not worth using a broadcast for the full load in
those tests:
Legalized selection DAG: %bb.0 'test_broadcast_2f64_4f64_reuse:'
t7: v2f64,ch = load<(load 16 from %ir.p0)> t0, t2, undef:i64
t4: i64,ch = CopyFromReg t0, Register:i64 %1
t10: ch = store<(store 16 into %ir.p1)> t7:1, t7, t4, undef:i64
t18: v4f64 = insert_subvector undef:v4f64, t7, Constant:i64<0>
t20: v4f64 = insert_subvector t18, t7, Constant:i64<2>
Becomes:
t7: v2f64,ch = load<(load 16 from %ir.p0)> t0, t2, undef:i64
t4: i64,ch = CopyFromReg t0, Register:i64 %1
t10: ch = store<(store 16 into %ir.p1)> t7:1, t7, t4, undef:i64
t21: v4f64 = X86ISD::SUBV_BROADCAST t7
ISEL: Starting selection on root node: t21: v4f64 = X86ISD::SUBV_BROADCAST t7
...
Created node: t27: v4f64 = INSERT_SUBREG IMPLICIT_DEF:v4f64, t7, TargetConstant:i32<7>
Morphed node: t21: v4f64 = VINSERTF128rr t27, t7, TargetConstant:i8<1>
llvm-svn: 342347
2018-09-16 23:41:56 +08:00
|
|
|
; X32-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
|
2018-09-16 22:59:04 +08:00
|
|
|
; X32-NEXT: vmovups %ymm0, (%eax)
|
|
|
|
; X32-NEXT: vzeroupper
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: subv_reuse_is_ok:
|
|
|
|
; X64: # %bb.0:
|
[x86] fix uses check in broadcast transform (PR38949)
https://bugs.llvm.org/show_bug.cgi?id=38949
It's not clear to me that we even need a one-use check in this fold.
Ie, 2 independent loads might be better than a load+dependent shuffle.
Note that the existing re-use tests are not affected. We actually do form a
broadcast node in those tests now because there's no extra use of the
insert_subvector node in those cases. But something later in isel pattern
matching decides that it is not worth using a broadcast for the full load in
those tests:
Legalized selection DAG: %bb.0 'test_broadcast_2f64_4f64_reuse:'
t7: v2f64,ch = load<(load 16 from %ir.p0)> t0, t2, undef:i64
t4: i64,ch = CopyFromReg t0, Register:i64 %1
t10: ch = store<(store 16 into %ir.p1)> t7:1, t7, t4, undef:i64
t18: v4f64 = insert_subvector undef:v4f64, t7, Constant:i64<0>
t20: v4f64 = insert_subvector t18, t7, Constant:i64<2>
Becomes:
t7: v2f64,ch = load<(load 16 from %ir.p0)> t0, t2, undef:i64
t4: i64,ch = CopyFromReg t0, Register:i64 %1
t10: ch = store<(store 16 into %ir.p1)> t7:1, t7, t4, undef:i64
t21: v4f64 = X86ISD::SUBV_BROADCAST t7
ISEL: Starting selection on root node: t21: v4f64 = X86ISD::SUBV_BROADCAST t7
...
Created node: t27: v4f64 = INSERT_SUBREG IMPLICIT_DEF:v4f64, t7, TargetConstant:i32<7>
Morphed node: t21: v4f64 = VINSERTF128rr t27, t7, TargetConstant:i8<1>
llvm-svn: 342347
2018-09-16 23:41:56 +08:00
|
|
|
; X64-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
|
2018-09-16 22:59:04 +08:00
|
|
|
; X64-NEXT: vmovups %ymm0, (%rsi)
|
|
|
|
; X64-NEXT: vzeroupper
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%ld = load <4 x float>, <4 x float>* %a, align 1
|
|
|
|
%splat128 = shufflevector <4 x float> %ld, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
|
|
|
|
store <8 x float> %splat128, <8 x float>* %b, align 16
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2016-07-30 05:05:10 +08:00
|
|
|
define <4 x double> @test_broadcast_2f64_4f64_reuse(<2 x double>* %p0, <2 x double>* %p1) {
|
|
|
|
; X32-LABEL: test_broadcast_2f64_4f64_reuse:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X32: # %bb.0:
|
2016-07-30 05:05:10 +08:00
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
|
|
; X32-NEXT: vmovaps (%ecx), %xmm1
|
|
|
|
; X32-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm0
|
|
|
|
; X32-NEXT: vmovaps %xmm1, (%eax)
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_broadcast_2f64_4f64_reuse:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X64: # %bb.0:
|
2016-07-30 05:05:10 +08:00
|
|
|
; X64-NEXT: vmovaps (%rdi), %xmm1
|
|
|
|
; X64-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm0
|
|
|
|
; X64-NEXT: vmovaps %xmm1, (%rsi)
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%1 = load <2 x double>, <2 x double>* %p0
|
|
|
|
%2 = shufflevector <2 x double> %1, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
|
|
|
|
store <2 x double> %1, <2 x double>* %p1
|
|
|
|
ret <4 x double> %2
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @test_broadcast_2i64_4i64_reuse(<2 x i64>* %p0, <2 x i64>* %p1) {
|
|
|
|
; X32-LABEL: test_broadcast_2i64_4i64_reuse:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X32: # %bb.0:
|
2016-07-30 05:05:10 +08:00
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
|
|
; X32-NEXT: vmovaps (%ecx), %xmm1
|
|
|
|
; X32-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm0
|
|
|
|
; X32-NEXT: vmovaps %xmm1, (%eax)
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_broadcast_2i64_4i64_reuse:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X64: # %bb.0:
|
2016-07-30 05:05:10 +08:00
|
|
|
; X64-NEXT: vmovaps (%rdi), %xmm1
|
|
|
|
; X64-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm0
|
|
|
|
; X64-NEXT: vmovaps %xmm1, (%rsi)
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%1 = load <2 x i64>, <2 x i64>* %p0
|
|
|
|
%2 = shufflevector <2 x i64> %1, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
|
|
|
|
store <2 x i64> %1, <2 x i64>* %p1
|
|
|
|
ret <4 x i64> %2
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x float> @test_broadcast_4f32_8f32_reuse(<4 x float>* %p0, <4 x float>* %p1) {
|
|
|
|
; X32-LABEL: test_broadcast_4f32_8f32_reuse:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X32: # %bb.0:
|
2016-07-30 05:05:10 +08:00
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
|
|
; X32-NEXT: vmovaps (%ecx), %xmm1
|
|
|
|
; X32-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm0
|
|
|
|
; X32-NEXT: vmovaps %xmm1, (%eax)
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_broadcast_4f32_8f32_reuse:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X64: # %bb.0:
|
2016-07-30 05:05:10 +08:00
|
|
|
; X64-NEXT: vmovaps (%rdi), %xmm1
|
|
|
|
; X64-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm0
|
|
|
|
; X64-NEXT: vmovaps %xmm1, (%rsi)
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%1 = load <4 x float>, <4 x float>* %p0
|
|
|
|
%2 = shufflevector <4 x float> %1, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
|
|
|
|
store <4 x float> %1, <4 x float>* %p1
|
|
|
|
ret <8 x float> %2
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x i32> @test_broadcast_4i32_8i32_reuse(<4 x i32>* %p0, <4 x i32>* %p1) {
|
|
|
|
; X32-LABEL: test_broadcast_4i32_8i32_reuse:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X32: # %bb.0:
|
2016-07-30 05:05:10 +08:00
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
|
|
; X32-NEXT: vmovaps (%ecx), %xmm1
|
|
|
|
; X32-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm0
|
|
|
|
; X32-NEXT: vmovaps %xmm1, (%eax)
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_broadcast_4i32_8i32_reuse:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X64: # %bb.0:
|
2016-07-30 05:05:10 +08:00
|
|
|
; X64-NEXT: vmovaps (%rdi), %xmm1
|
|
|
|
; X64-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm0
|
|
|
|
; X64-NEXT: vmovaps %xmm1, (%rsi)
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%1 = load <4 x i32>, <4 x i32>* %p0
|
|
|
|
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
|
|
|
|
store <4 x i32> %1, <4 x i32>* %p1
|
|
|
|
ret <8 x i32> %2
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x i16> @test_broadcast_8i16_16i16_reuse(<8 x i16> *%p0, <8 x i16> *%p1) nounwind {
|
|
|
|
; X32-LABEL: test_broadcast_8i16_16i16_reuse:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X32: # %bb.0:
|
2016-07-30 05:05:10 +08:00
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
|
|
; X32-NEXT: vmovaps (%ecx), %xmm1
|
|
|
|
; X32-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm0
|
|
|
|
; X32-NEXT: vmovaps %xmm1, (%eax)
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_broadcast_8i16_16i16_reuse:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X64: # %bb.0:
|
2016-07-30 05:05:10 +08:00
|
|
|
; X64-NEXT: vmovaps (%rdi), %xmm1
|
|
|
|
; X64-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm0
|
|
|
|
; X64-NEXT: vmovaps %xmm1, (%rsi)
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%1 = load <8 x i16>, <8 x i16> *%p0
|
|
|
|
%2 = shufflevector <8 x i16> %1, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
|
|
store <8 x i16> %1, <8 x i16>* %p1
|
|
|
|
ret <16 x i16> %2
|
|
|
|
}
|
|
|
|
|
|
|
|
define <32 x i8> @test_broadcast_16i8_32i8_reuse(<16 x i8> *%p0, <16 x i8> *%p1) nounwind {
|
|
|
|
; X32-LABEL: test_broadcast_16i8_32i8_reuse:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X32: # %bb.0:
|
2016-07-30 05:05:10 +08:00
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
|
|
; X32-NEXT: vmovaps (%ecx), %xmm1
|
|
|
|
; X32-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm0
|
|
|
|
; X32-NEXT: vmovaps %xmm1, (%eax)
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_broadcast_16i8_32i8_reuse:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X64: # %bb.0:
|
2016-07-30 05:05:10 +08:00
|
|
|
; X64-NEXT: vmovaps (%rdi), %xmm1
|
|
|
|
; X64-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm0
|
|
|
|
; X64-NEXT: vmovaps %xmm1, (%rsi)
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%1 = load <16 x i8>, <16 x i8> *%p0
|
|
|
|
%2 = shufflevector <16 x i8> %1, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
|
|
store <16 x i8> %1, <16 x i8>* %p1
|
|
|
|
ret <32 x i8> %2
|
|
|
|
}
|
2016-08-23 00:47:55 +08:00
|
|
|
|
|
|
|
define <8 x i32> @PR29088(<4 x i32>* %p0, <8 x float>* %p1) {
|
|
|
|
; X32-LABEL: PR29088:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X32: # %bb.0:
|
2016-08-23 00:47:55 +08:00
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
2017-07-28 01:47:01 +08:00
|
|
|
; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
2018-03-20 04:19:46 +08:00
|
|
|
; X32-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
|
2016-08-23 00:47:55 +08:00
|
|
|
; X32-NEXT: vmovaps %ymm1, (%eax)
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: PR29088:
|
2017-12-05 01:18:51 +08:00
|
|
|
; X64: # %bb.0:
|
2017-07-28 01:47:01 +08:00
|
|
|
; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
2018-03-20 04:19:46 +08:00
|
|
|
; X64-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
|
2016-08-23 00:47:55 +08:00
|
|
|
; X64-NEXT: vmovaps %ymm1, (%rsi)
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%ld = load <4 x i32>, <4 x i32>* %p0
|
|
|
|
store <8 x float> zeroinitializer, <8 x float>* %p1
|
|
|
|
%shuf = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
|
|
|
|
ret <8 x i32> %shuf
|
|
|
|
}
|