2017-07-03 23:04:05 +08:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx,slow-unaligned-mem-32 | FileCheck %s
|
|
|
|
; RUN: llc -O0 < %s -mtriple=x86_64-unknown-unknown -mattr=avx,slow-unaligned-mem-32 | FileCheck %s -check-prefix=CHECK_O0
|
|
|
|
|
|
|
|
define void @test_256_load(double* nocapture %d, float* nocapture %f, <4 x i64>* nocapture %i) nounwind {
|
|
|
|
; CHECK-LABEL: test_256_load:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0: # %entry
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK-NEXT: pushq %r15
|
|
|
|
; CHECK-NEXT: pushq %r14
|
|
|
|
; CHECK-NEXT: pushq %rbx
|
|
|
|
; CHECK-NEXT: subq $96, %rsp
|
|
|
|
; CHECK-NEXT: movq %rdx, %r14
|
|
|
|
; CHECK-NEXT: movq %rsi, %r15
|
|
|
|
; CHECK-NEXT: movq %rdi, %rbx
|
[MachineCopyPropagation] Extend pass to do COPY source forwarding
Summary:
This change extends MachineCopyPropagation to do COPY source forwarding
and adds an additional run of the pass to the default pass pipeline just
after register allocation.
This version of this patch uses the newly added
MachineOperand::isRenamable bit to avoid forwarding registers is such a
way as to violate constraints that aren't captured in the
Machine IR (e.g. ABI or ISA constraints).
This change is a continuation of the work started in D30751.
Reviewers: qcolombet, javed.absar, MatzeB, jonpa, tstellar
Subscribers: tpr, mgorny, mcrosier, nhaehnle, nemanjai, jyknight, hfinkel, arsenm, inouehrs, eraman, sdardis, guyblank, fedor.sergeev, aheejin, dschuff, jfb, myatsina, llvm-commits
Differential Revision: https://reviews.llvm.org/D41835
llvm-svn: 323991
2018-02-02 02:54:01 +08:00
|
|
|
; CHECK-NEXT: vmovaps (%rdi), %ymm0
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill
|
[MachineCopyPropagation] Extend pass to do COPY source forwarding
Summary:
This change extends MachineCopyPropagation to do COPY source forwarding
and adds an additional run of the pass to the default pass pipeline just
after register allocation.
This version of this patch uses the newly added
MachineOperand::isRenamable bit to avoid forwarding registers is such a
way as to violate constraints that aren't captured in the
Machine IR (e.g. ABI or ISA constraints).
This change is a continuation of the work started in D30751.
Reviewers: qcolombet, javed.absar, MatzeB, jonpa, tstellar
Subscribers: tpr, mgorny, mcrosier, nhaehnle, nemanjai, jyknight, hfinkel, arsenm, inouehrs, eraman, sdardis, guyblank, fedor.sergeev, aheejin, dschuff, jfb, myatsina, llvm-commits
Differential Revision: https://reviews.llvm.org/D41835
llvm-svn: 323991
2018-02-02 02:54:01 +08:00
|
|
|
; CHECK-NEXT: vmovaps (%rsi), %ymm1
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp) # 32-byte Spill
|
[MachineCopyPropagation] Extend pass to do COPY source forwarding
Summary:
This change extends MachineCopyPropagation to do COPY source forwarding
and adds an additional run of the pass to the default pass pipeline just
after register allocation.
This version of this patch uses the newly added
MachineOperand::isRenamable bit to avoid forwarding registers is such a
way as to violate constraints that aren't captured in the
Machine IR (e.g. ABI or ISA constraints).
This change is a continuation of the work started in D30751.
Reviewers: qcolombet, javed.absar, MatzeB, jonpa, tstellar
Subscribers: tpr, mgorny, mcrosier, nhaehnle, nemanjai, jyknight, hfinkel, arsenm, inouehrs, eraman, sdardis, guyblank, fedor.sergeev, aheejin, dschuff, jfb, myatsina, llvm-commits
Differential Revision: https://reviews.llvm.org/D41835
llvm-svn: 323991
2018-02-02 02:54:01 +08:00
|
|
|
; CHECK-NEXT: vmovaps (%rdx), %ymm2
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK-NEXT: vmovups %ymm2, (%rsp) # 32-byte Spill
|
|
|
|
; CHECK-NEXT: callq dummy
|
|
|
|
; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
|
|
|
|
; CHECK-NEXT: vmovaps %ymm0, (%rbx)
|
|
|
|
; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
|
|
|
|
; CHECK-NEXT: vmovaps %ymm0, (%r15)
|
|
|
|
; CHECK-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
|
|
|
|
; CHECK-NEXT: vmovaps %ymm0, (%r14)
|
|
|
|
; CHECK-NEXT: addq $96, %rsp
|
|
|
|
; CHECK-NEXT: popq %rbx
|
|
|
|
; CHECK-NEXT: popq %r14
|
|
|
|
; CHECK-NEXT: popq %r15
|
|
|
|
; CHECK-NEXT: vzeroupper
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; CHECK_O0-LABEL: test_256_load:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK_O0: # %bb.0: # %entry
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK_O0-NEXT: subq $152, %rsp
|
|
|
|
; CHECK_O0-NEXT: vmovapd (%rdi), %ymm0
|
|
|
|
; CHECK_O0-NEXT: vmovaps (%rsi), %ymm1
|
|
|
|
; CHECK_O0-NEXT: vmovdqa (%rdx), %ymm2
|
|
|
|
; CHECK_O0-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill
|
|
|
|
; CHECK_O0-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp) # 32-byte Spill
|
|
|
|
; CHECK_O0-NEXT: vmovups %ymm2, {{[0-9]+}}(%rsp) # 32-byte Spill
|
|
|
|
; CHECK_O0-NEXT: movq %rsi, {{[0-9]+}}(%rsp) # 8-byte Spill
|
|
|
|
; CHECK_O0-NEXT: movq %rdi, {{[0-9]+}}(%rsp) # 8-byte Spill
|
|
|
|
; CHECK_O0-NEXT: movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill
|
|
|
|
; CHECK_O0-NEXT: callq dummy
|
|
|
|
; CHECK_O0-NEXT: movq {{[0-9]+}}(%rsp), %rdx # 8-byte Reload
|
|
|
|
; CHECK_O0-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
|
|
|
|
; CHECK_O0-NEXT: vmovapd %ymm0, (%rdx)
|
|
|
|
; CHECK_O0-NEXT: movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload
|
|
|
|
; CHECK_O0-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm1 # 32-byte Reload
|
|
|
|
; CHECK_O0-NEXT: vmovaps %ymm1, (%rsi)
|
|
|
|
; CHECK_O0-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload
|
|
|
|
; CHECK_O0-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm2 # 32-byte Reload
|
|
|
|
; CHECK_O0-NEXT: vmovdqa %ymm2, (%rdi)
|
|
|
|
; CHECK_O0-NEXT: addq $152, %rsp
|
|
|
|
; CHECK_O0-NEXT: vzeroupper
|
|
|
|
; CHECK_O0-NEXT: retq
|
2011-07-15 02:50:58 +08:00
|
|
|
entry:
|
|
|
|
%0 = bitcast double* %d to <4 x double>*
|
2015-02-28 05:17:42 +08:00
|
|
|
%tmp1.i = load <4 x double>, <4 x double>* %0, align 32
|
2011-07-15 02:50:58 +08:00
|
|
|
%1 = bitcast float* %f to <8 x float>*
|
2015-02-28 05:17:42 +08:00
|
|
|
%tmp1.i17 = load <8 x float>, <8 x float>* %1, align 32
|
|
|
|
%tmp1.i16 = load <4 x i64>, <4 x i64>* %i, align 32
|
2011-07-15 02:50:58 +08:00
|
|
|
tail call void @dummy(<4 x double> %tmp1.i, <8 x float> %tmp1.i17, <4 x i64> %tmp1.i16) nounwind
|
|
|
|
store <4 x double> %tmp1.i, <4 x double>* %0, align 32
|
|
|
|
store <8 x float> %tmp1.i17, <8 x float>* %1, align 32
|
|
|
|
store <4 x i64> %tmp1.i16, <4 x i64>* %i, align 32
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare void @dummy(<4 x double>, <8 x float>, <4 x i64>)
|
|
|
|
|
2011-08-09 09:43:09 +08:00
|
|
|
;;
|
|
|
|
;; The two tests below check that we must fold load + scalar_to_vector
|
2012-01-11 22:07:51 +08:00
|
|
|
;; + ins_subvec+ zext into only a single vmovss or vmovsd or vinsertps from memory
|
2011-08-09 09:43:09 +08:00
|
|
|
|
|
|
|
define <8 x float> @mov00(<8 x float> %v, float * %ptr) nounwind {
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK-LABEL: mov00:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; CHECK_O0-LABEL: mov00:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK_O0: # %bb.0:
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK_O0-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
2018-02-01 06:04:26 +08:00
|
|
|
; CHECK_O0-NEXT: # implicit-def: $ymm1
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK_O0-NEXT: vmovaps %xmm0, %xmm1
|
2017-07-28 01:47:01 +08:00
|
|
|
; CHECK_O0-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK_O0-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm2[1,2,3,4,5,6,7]
|
|
|
|
; CHECK_O0-NEXT: retq
|
2015-02-28 05:17:42 +08:00
|
|
|
%val = load float, float* %ptr
|
2011-08-09 09:43:09 +08:00
|
|
|
%i0 = insertelement <8 x float> zeroinitializer, float %val, i32 0
|
|
|
|
ret <8 x float> %i0
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x double> @mov01(<4 x double> %v, double * %ptr) nounwind {
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK-LABEL: mov01:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; CHECK_O0-LABEL: mov01:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK_O0: # %bb.0:
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK_O0-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
2018-02-01 06:04:26 +08:00
|
|
|
; CHECK_O0-NEXT: # implicit-def: $ymm1
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK_O0-NEXT: vmovaps %xmm0, %xmm1
|
2017-07-28 01:47:01 +08:00
|
|
|
; CHECK_O0-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK_O0-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm2[1,2,3]
|
|
|
|
; CHECK_O0-NEXT: retq
|
2015-02-28 05:17:42 +08:00
|
|
|
%val = load double, double* %ptr
|
2011-08-09 09:43:09 +08:00
|
|
|
%i0 = insertelement <4 x double> zeroinitializer, double %val, i32 0
|
|
|
|
ret <4 x double> %i0
|
|
|
|
}
|
|
|
|
|
2011-08-10 06:39:53 +08:00
|
|
|
define void @storev16i16(<16 x i16> %a) nounwind {
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK-LABEL: storev16i16:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK-NEXT: vmovaps %ymm0, (%rax)
|
|
|
|
;
|
|
|
|
; CHECK_O0-LABEL: storev16i16:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK_O0: # %bb.0:
|
2018-02-01 06:04:26 +08:00
|
|
|
; CHECK_O0-NEXT: # implicit-def: $rax
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK_O0-NEXT: vmovdqa %ymm0, (%rax)
|
2011-08-10 06:39:53 +08:00
|
|
|
store <16 x i16> %a, <16 x i16>* undef, align 32
|
|
|
|
unreachable
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @storev16i16_01(<16 x i16> %a) nounwind {
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK-LABEL: storev16i16_01:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK-NEXT: vextractf128 $1, %ymm0, (%rax)
|
|
|
|
; CHECK-NEXT: vmovups %xmm0, (%rax)
|
|
|
|
;
|
|
|
|
; CHECK_O0-LABEL: storev16i16_01:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK_O0: # %bb.0:
|
2018-02-01 06:04:26 +08:00
|
|
|
; CHECK_O0-NEXT: # implicit-def: $rax
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK_O0-NEXT: vmovdqu %ymm0, (%rax)
|
2011-08-10 06:39:53 +08:00
|
|
|
store <16 x i16> %a, <16 x i16>* undef, align 4
|
|
|
|
unreachable
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @storev32i8(<32 x i8> %a) nounwind {
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK-LABEL: storev32i8:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK-NEXT: vmovaps %ymm0, (%rax)
|
|
|
|
;
|
|
|
|
; CHECK_O0-LABEL: storev32i8:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK_O0: # %bb.0:
|
2018-02-01 06:04:26 +08:00
|
|
|
; CHECK_O0-NEXT: # implicit-def: $rax
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK_O0-NEXT: vmovdqa %ymm0, (%rax)
|
2011-08-10 06:39:53 +08:00
|
|
|
store <32 x i8> %a, <32 x i8>* undef, align 32
|
|
|
|
unreachable
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @storev32i8_01(<32 x i8> %a) nounwind {
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK-LABEL: storev32i8_01:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK-NEXT: vextractf128 $1, %ymm0, (%rax)
|
|
|
|
; CHECK-NEXT: vmovups %xmm0, (%rax)
|
|
|
|
;
|
|
|
|
; CHECK_O0-LABEL: storev32i8_01:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK_O0: # %bb.0:
|
2018-02-01 06:04:26 +08:00
|
|
|
; CHECK_O0-NEXT: # implicit-def: $rax
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK_O0-NEXT: vmovdqu %ymm0, (%rax)
|
2011-08-10 06:39:53 +08:00
|
|
|
store <32 x i8> %a, <32 x i8>* undef, align 4
|
|
|
|
unreachable
|
|
|
|
}
|
|
|
|
|
2017-11-29 01:15:09 +08:00
|
|
|
; It is faster to make two saves, if the data is already in xmm registers. For
|
2011-08-12 05:50:35 +08:00
|
|
|
; example, after making an integer operation.
|
|
|
|
define void @double_save(<4 x i32> %A, <4 x i32> %B, <8 x i32>* %P) nounwind ssp {
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK-LABEL: double_save:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK-NEXT: vmovaps %xmm1, 16(%rdi)
|
|
|
|
; CHECK-NEXT: vmovaps %xmm0, (%rdi)
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; CHECK_O0-LABEL: double_save:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK_O0: # %bb.0:
|
2018-02-01 06:04:26 +08:00
|
|
|
; CHECK_O0-NEXT: # implicit-def: $ymm2
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK_O0-NEXT: vmovaps %xmm0, %xmm2
|
|
|
|
; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm2
|
|
|
|
; CHECK_O0-NEXT: vmovdqu %ymm2, (%rdi)
|
|
|
|
; CHECK_O0-NEXT: vzeroupper
|
|
|
|
; CHECK_O0-NEXT: retq
|
2011-08-12 05:50:35 +08:00
|
|
|
%Z = shufflevector <4 x i32>%A, <4 x i32>%B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
|
|
store <8 x i32> %Z, <8 x i32>* %P, align 16
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2015-10-20 19:20:13 +08:00
|
|
|
declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x i32>, <8 x float>) nounwind
|
2011-09-09 02:05:02 +08:00
|
|
|
|
|
|
|
define void @f_f() nounwind {
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK-LABEL: f_f:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0: # %allocas
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
|
|
; CHECK-NEXT: testb %al, %al
|
|
|
|
; CHECK-NEXT: jne .LBB8_2
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK-NEXT: # %bb.1: # %cif_mask_all
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK-NEXT: .LBB8_2: # %cif_mask_mixed
|
|
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
|
|
; CHECK-NEXT: testb %al, %al
|
|
|
|
; CHECK-NEXT: jne .LBB8_4
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK-NEXT: # %bb.3: # %cif_mixed_test_all
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK-NEXT: movl $-1, %eax
|
|
|
|
; CHECK-NEXT: vmovd %eax, %xmm0
|
|
|
|
; CHECK-NEXT: vmaskmovps %ymm0, %ymm0, (%rax)
|
|
|
|
; CHECK-NEXT: .LBB8_4: # %cif_mixed_test_any_check
|
|
|
|
;
|
|
|
|
; CHECK_O0-LABEL: f_f:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK_O0: # %bb.0: # %allocas
|
2018-02-01 06:04:26 +08:00
|
|
|
; CHECK_O0-NEXT: # implicit-def: $al
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK_O0-NEXT: testb $1, %al
|
|
|
|
; CHECK_O0-NEXT: jne .LBB8_1
|
|
|
|
; CHECK_O0-NEXT: jmp .LBB8_2
|
|
|
|
; CHECK_O0-NEXT: .LBB8_1: # %cif_mask_all
|
|
|
|
; CHECK_O0-NEXT: .LBB8_2: # %cif_mask_mixed
|
2018-02-01 06:04:26 +08:00
|
|
|
; CHECK_O0-NEXT: # implicit-def: $al
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK_O0-NEXT: testb $1, %al
|
|
|
|
; CHECK_O0-NEXT: jne .LBB8_3
|
|
|
|
; CHECK_O0-NEXT: jmp .LBB8_4
|
|
|
|
; CHECK_O0-NEXT: .LBB8_3: # %cif_mixed_test_all
|
|
|
|
; CHECK_O0-NEXT: movl $-1, %eax
|
|
|
|
; CHECK_O0-NEXT: vmovd %eax, %xmm0
|
|
|
|
; CHECK_O0-NEXT: vmovaps %xmm0, %xmm1
|
2018-02-01 06:04:26 +08:00
|
|
|
; CHECK_O0-NEXT: # implicit-def: $rcx
|
|
|
|
; CHECK_O0-NEXT: # implicit-def: $ymm2
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK_O0-NEXT: vmaskmovps %ymm2, %ymm1, (%rcx)
|
|
|
|
; CHECK_O0-NEXT: .LBB8_4: # %cif_mixed_test_any_check
|
2011-09-09 02:05:02 +08:00
|
|
|
allocas:
|
|
|
|
br i1 undef, label %cif_mask_all, label %cif_mask_mixed
|
|
|
|
|
2017-07-03 23:04:05 +08:00
|
|
|
cif_mask_all:
|
2011-09-09 02:05:02 +08:00
|
|
|
unreachable
|
|
|
|
|
2017-07-03 23:04:05 +08:00
|
|
|
cif_mask_mixed:
|
2011-09-09 02:05:02 +08:00
|
|
|
br i1 undef, label %cif_mixed_test_all, label %cif_mixed_test_any_check
|
|
|
|
|
2017-07-03 23:04:05 +08:00
|
|
|
cif_mixed_test_all:
|
2015-10-20 19:20:13 +08:00
|
|
|
call void @llvm.x86.avx.maskstore.ps.256(i8* undef, <8 x i32> <i32 -1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, <8 x float> undef) nounwind
|
2011-09-09 02:05:02 +08:00
|
|
|
unreachable
|
|
|
|
|
2017-07-03 23:04:05 +08:00
|
|
|
cif_mixed_test_any_check:
|
2011-09-09 02:05:02 +08:00
|
|
|
unreachable
|
|
|
|
}
|
|
|
|
|
2013-03-26 07:47:35 +08:00
|
|
|
define void @add8i32(<8 x i32>* %ret, <8 x i32>* %bp) nounwind {
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK-LABEL: add8i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK-NEXT: vmovups (%rsi), %xmm0
|
|
|
|
; CHECK-NEXT: vmovups 16(%rsi), %xmm1
|
|
|
|
; CHECK-NEXT: vmovups %xmm1, 16(%rdi)
|
|
|
|
; CHECK-NEXT: vmovups %xmm0, (%rdi)
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; CHECK_O0-LABEL: add8i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK_O0: # %bb.0:
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK_O0-NEXT: vmovdqu (%rsi), %xmm0
|
|
|
|
; CHECK_O0-NEXT: vmovdqu 16(%rsi), %xmm1
|
2018-02-01 06:04:26 +08:00
|
|
|
; CHECK_O0-NEXT: # implicit-def: $ymm2
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK_O0-NEXT: vmovaps %xmm0, %xmm2
|
|
|
|
; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm2
|
|
|
|
; CHECK_O0-NEXT: vmovdqu %ymm2, (%rdi)
|
|
|
|
; CHECK_O0-NEXT: vzeroupper
|
|
|
|
; CHECK_O0-NEXT: retq
|
2015-02-28 05:17:42 +08:00
|
|
|
%b = load <8 x i32>, <8 x i32>* %bp, align 1
|
2013-03-26 07:47:35 +08:00
|
|
|
%x = add <8 x i32> zeroinitializer, %b
|
|
|
|
store <8 x i32> %x, <8 x i32>* %ret, align 1
|
|
|
|
ret void
|
|
|
|
}
|
2013-03-26 07:50:10 +08:00
|
|
|
|
|
|
|
define void @add4i64a64(<4 x i64>* %ret, <4 x i64>* %bp) nounwind {
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK-LABEL: add4i64a64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK-NEXT: vmovaps (%rsi), %ymm0
|
|
|
|
; CHECK-NEXT: vmovaps %ymm0, (%rdi)
|
|
|
|
; CHECK-NEXT: vzeroupper
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; CHECK_O0-LABEL: add4i64a64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK_O0: # %bb.0:
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK_O0-NEXT: vmovaps (%rsi), %ymm0
|
|
|
|
; CHECK_O0-NEXT: vmovdqa %ymm0, (%rdi)
|
|
|
|
; CHECK_O0-NEXT: vzeroupper
|
|
|
|
; CHECK_O0-NEXT: retq
|
2015-02-28 05:17:42 +08:00
|
|
|
%b = load <4 x i64>, <4 x i64>* %bp, align 64
|
2013-03-26 07:50:10 +08:00
|
|
|
%x = add <4 x i64> zeroinitializer, %b
|
|
|
|
store <4 x i64> %x, <4 x i64>* %ret, align 64
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @add4i64a16(<4 x i64>* %ret, <4 x i64>* %bp) nounwind {
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK-LABEL: add4i64a16:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK-NEXT: vmovaps (%rsi), %xmm0
|
|
|
|
; CHECK-NEXT: vmovaps 16(%rsi), %xmm1
|
|
|
|
; CHECK-NEXT: vmovaps %xmm1, 16(%rdi)
|
|
|
|
; CHECK-NEXT: vmovaps %xmm0, (%rdi)
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
;
|
|
|
|
; CHECK_O0-LABEL: add4i64a16:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK_O0: # %bb.0:
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK_O0-NEXT: vmovdqa (%rsi), %xmm0
|
|
|
|
; CHECK_O0-NEXT: vmovdqa 16(%rsi), %xmm1
|
2018-02-01 06:04:26 +08:00
|
|
|
; CHECK_O0-NEXT: # implicit-def: $ymm2
|
2017-07-03 23:04:05 +08:00
|
|
|
; CHECK_O0-NEXT: vmovaps %xmm0, %xmm2
|
|
|
|
; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm2
|
|
|
|
; CHECK_O0-NEXT: vmovdqu %ymm2, (%rdi)
|
|
|
|
; CHECK_O0-NEXT: vzeroupper
|
|
|
|
; CHECK_O0-NEXT: retq
|
2015-02-28 05:17:42 +08:00
|
|
|
%b = load <4 x i64>, <4 x i64>* %bp, align 16
|
2013-03-26 07:50:10 +08:00
|
|
|
%x = add <4 x i64> zeroinitializer, %b
|
|
|
|
store <4 x i64> %x, <4 x i64>* %ret, align 16
|
|
|
|
ret void
|
|
|
|
}
|
2017-07-03 23:04:05 +08:00
|
|
|
|