2017-09-14 21:00:27 +08:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
|
|
; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+sse2 -verify-machineinstrs | FileCheck %s
|
2012-01-07 11:02:36 +08:00
|
|
|
|
|
|
|
; After tail duplication, two copies in an early exit BB can be cancelled out.
|
|
|
|
; rdar://10640363
|
|
|
|
define i32 @t1(i32 %a, i32 %b) nounwind {
|
2013-07-14 14:24:09 +08:00
|
|
|
; CHECK-LABEL: t1:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0: ## %entry
|
2017-09-14 21:00:27 +08:00
|
|
|
; CHECK-NEXT: movl %edi, %eax
|
2018-02-28 00:59:10 +08:00
|
|
|
; CHECK-NEXT: testl %esi, %esi
|
2017-09-14 21:00:27 +08:00
|
|
|
; CHECK-NEXT: je LBB0_1
|
[CodeGen] Add a new pass for PostRA sink
Summary:
This pass sinks COPY instructions into a successor block, if the COPY is not
used in the current block and the COPY is live-in to a single successor
(i.e., doesn't require the COPY to be duplicated). This avoids executing the
the copy on paths where their results aren't needed. This also exposes
additional opportunites for dead copy elimination and shrink wrapping.
These copies were either not handled by or are inserted after the MachineSink
pass. As an example of the former case, the MachineSink pass cannot sink
COPY instructions with allocatable source registers; for AArch64 these type
of copy instructions are frequently used to move function parameters (PhyReg)
into virtual registers in the entry block..
For the machine IR below, this pass will sink %w19 in the entry into its
successor (%bb.1) because %w19 is only live-in in %bb.1.
```
%bb.0:
%wzr = SUBSWri %w1, 1
%w19 = COPY %w0
Bcc 11, %bb.2
%bb.1:
Live Ins: %w19
BL @fun
%w0 = ADDWrr %w0, %w19
RET %w0
%bb.2:
%w0 = COPY %wzr
RET %w0
```
As we sink %w19 (CSR in AArch64) into %bb.1, the shrink-wrapping pass will be
able to see %bb.0 as a candidate.
With this change I observed 12% more shrink-wrapping candidate and 13% more dead copies deleted in spec2000/2006/2017 on AArch64.
Reviewers: qcolombet, MatzeB, thegameg, mcrosier, gberry, hfinkel, john.brawn, twoh, RKSimon, sebpop, kparzysz
Reviewed By: sebpop
Subscribers: evandro, sebpop, sfertile, aemerson, mgorny, javed.absar, kristof.beyls, llvm-commits
Differential Revision: https://reviews.llvm.org/D41463
llvm-svn: 328237
2018-03-23 04:06:47 +08:00
|
|
|
; CHECK-NEXT: ## %bb.2: ## %while.body.preheader
|
2018-05-22 05:40:51 +08:00
|
|
|
; CHECK-NEXT: movl %esi, %edx
|
2017-09-14 21:00:27 +08:00
|
|
|
; CHECK-NEXT: .p2align 4, 0x90
|
[CodeGen] Add a new pass for PostRA sink
Summary:
This pass sinks COPY instructions into a successor block, if the COPY is not
used in the current block and the COPY is live-in to a single successor
(i.e., doesn't require the COPY to be duplicated). This avoids executing the
the copy on paths where their results aren't needed. This also exposes
additional opportunites for dead copy elimination and shrink wrapping.
These copies were either not handled by or are inserted after the MachineSink
pass. As an example of the former case, the MachineSink pass cannot sink
COPY instructions with allocatable source registers; for AArch64 these type
of copy instructions are frequently used to move function parameters (PhyReg)
into virtual registers in the entry block..
For the machine IR below, this pass will sink %w19 in the entry into its
successor (%bb.1) because %w19 is only live-in in %bb.1.
```
%bb.0:
%wzr = SUBSWri %w1, 1
%w19 = COPY %w0
Bcc 11, %bb.2
%bb.1:
Live Ins: %w19
BL @fun
%w0 = ADDWrr %w0, %w19
RET %w0
%bb.2:
%w0 = COPY %wzr
RET %w0
```
As we sink %w19 (CSR in AArch64) into %bb.1, the shrink-wrapping pass will be
able to see %bb.0 as a candidate.
With this change I observed 12% more shrink-wrapping candidate and 13% more dead copies deleted in spec2000/2006/2017 on AArch64.
Reviewers: qcolombet, MatzeB, thegameg, mcrosier, gberry, hfinkel, john.brawn, twoh, RKSimon, sebpop, kparzysz
Reviewed By: sebpop
Subscribers: evandro, sebpop, sfertile, aemerson, mgorny, javed.absar, kristof.beyls, llvm-commits
Differential Revision: https://reviews.llvm.org/D41463
llvm-svn: 328237
2018-03-23 04:06:47 +08:00
|
|
|
; CHECK-NEXT: LBB0_3: ## %while.body
|
2017-09-14 21:00:27 +08:00
|
|
|
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
|
|
|
|
; CHECK-NEXT: movl %edx, %ecx
|
|
|
|
; CHECK-NEXT: cltd
|
|
|
|
; CHECK-NEXT: idivl %ecx
|
|
|
|
; CHECK-NEXT: testl %edx, %edx
|
|
|
|
; CHECK-NEXT: movl %ecx, %eax
|
[CodeGen] Add a new pass for PostRA sink
Summary:
This pass sinks COPY instructions into a successor block, if the COPY is not
used in the current block and the COPY is live-in to a single successor
(i.e., doesn't require the COPY to be duplicated). This avoids executing the
the copy on paths where their results aren't needed. This also exposes
additional opportunites for dead copy elimination and shrink wrapping.
These copies were either not handled by or are inserted after the MachineSink
pass. As an example of the former case, the MachineSink pass cannot sink
COPY instructions with allocatable source registers; for AArch64 these type
of copy instructions are frequently used to move function parameters (PhyReg)
into virtual registers in the entry block..
For the machine IR below, this pass will sink %w19 in the entry into its
successor (%bb.1) because %w19 is only live-in in %bb.1.
```
%bb.0:
%wzr = SUBSWri %w1, 1
%w19 = COPY %w0
Bcc 11, %bb.2
%bb.1:
Live Ins: %w19
BL @fun
%w0 = ADDWrr %w0, %w19
RET %w0
%bb.2:
%w0 = COPY %wzr
RET %w0
```
As we sink %w19 (CSR in AArch64) into %bb.1, the shrink-wrapping pass will be
able to see %bb.0 as a candidate.
With this change I observed 12% more shrink-wrapping candidate and 13% more dead copies deleted in spec2000/2006/2017 on AArch64.
Reviewers: qcolombet, MatzeB, thegameg, mcrosier, gberry, hfinkel, john.brawn, twoh, RKSimon, sebpop, kparzysz
Reviewed By: sebpop
Subscribers: evandro, sebpop, sfertile, aemerson, mgorny, javed.absar, kristof.beyls, llvm-commits
Differential Revision: https://reviews.llvm.org/D41463
llvm-svn: 328237
2018-03-23 04:06:47 +08:00
|
|
|
; CHECK-NEXT: jne LBB0_3
|
|
|
|
; CHECK-NEXT: ## %bb.4: ## %while.end
|
2017-09-14 21:00:27 +08:00
|
|
|
; CHECK-NEXT: movl %ecx, %eax
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
; CHECK-NEXT: LBB0_1:
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
entry:
|
2012-01-07 11:02:36 +08:00
|
|
|
%cmp1 = icmp eq i32 %b, 0
|
|
|
|
br i1 %cmp1, label %while.end, label %while.body
|
|
|
|
|
|
|
|
while.body: ; preds = %entry, %while.body
|
|
|
|
%a.addr.03 = phi i32 [ %b.addr.02, %while.body ], [ %a, %entry ]
|
|
|
|
%b.addr.02 = phi i32 [ %rem, %while.body ], [ %b, %entry ]
|
|
|
|
%rem = srem i32 %a.addr.03, %b.addr.02
|
|
|
|
%cmp = icmp eq i32 %rem, 0
|
|
|
|
br i1 %cmp, label %while.end, label %while.body
|
|
|
|
|
|
|
|
while.end: ; preds = %while.body, %entry
|
|
|
|
%a.addr.0.lcssa = phi i32 [ %a, %entry ], [ %b.addr.02, %while.body ]
|
|
|
|
ret i32 %a.addr.0.lcssa
|
|
|
|
}
|
|
|
|
|
|
|
|
; Two movdqa (from phi-elimination) in the entry BB cancels out.
|
|
|
|
; rdar://10428165
|
|
|
|
define <8 x i16> @t2(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
|
2013-07-14 14:24:09 +08:00
|
|
|
; CHECK-LABEL: t2:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0: ## %entry
|
2017-09-14 21:00:27 +08:00
|
|
|
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
|
|
|
|
; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,2,4,5,6,7]
|
|
|
|
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
entry:
|
2012-01-07 11:02:36 +08:00
|
|
|
%tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef >
|
|
|
|
ret <8 x i16> %tmp8
|
|
|
|
}
|
MachineCopyPropagation has special logic for removing COPY instructions. It will remove plain COPYs using eraseFromParent(), but if the COPY has imp-defs/imp-uses it will convert it to a KILL, to keep the imp-def around.
This actually totally breaks and causes the machine verifier to cry in several cases, one of which being:
%RAX<def> = COPY %RCX<kill>
%ECX<def> = COPY %EAX<kill>, %RAX<imp-use,kill>
These subregister copies are together identified as noops, so are both removed. However, the second one as it has an imp-use gets converted into a kill:
%ECX<def> = KILL %EAX<kill>, %RAX<imp-use,kill>
As the original COPY has been removed, the verifier goes into tears at the use of undefined EAX and RAX.
There are several hacky solutions to this hacky problem (which is all to do with imp-use/def weirdnesses), but the least hacky I've come up with is to *always* remove COPYs by converting to KILLs. KILLs are no-ops to the code generator so the generated code doesn't change (which is why they were partially used in the first place), but using them also keeps the def/use and imp-def/imp-use chains alive:
%RAX<def> = KILL %RCX<kill>
%ECX<def> = KILL %EAX<kill>, %RAX<imp-use,kill>
The patch passes all test cases including the ones that check the removal of MOVs in this circumstance, along with an extra test I added to check subregister behaviour (which made the machine verifier fall over before my patch).
The patch also adds some DEBUG() statements because the file hadn't got any.
llvm-svn: 199797
2014-01-22 17:12:27 +08:00
|
|
|
|
|
|
|
define i32 @t3(i64 %a, i64 %b) nounwind {
|
|
|
|
; CHECK-LABEL: t3:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0: ## %entry
|
2017-09-14 21:00:27 +08:00
|
|
|
; CHECK-NEXT: movq %rdi, %rax
|
2018-02-28 00:59:10 +08:00
|
|
|
; CHECK-NEXT: testq %rsi, %rsi
|
2017-09-14 21:00:27 +08:00
|
|
|
; CHECK-NEXT: je LBB2_1
|
[CodeGen] Add a new pass for PostRA sink
Summary:
This pass sinks COPY instructions into a successor block, if the COPY is not
used in the current block and the COPY is live-in to a single successor
(i.e., doesn't require the COPY to be duplicated). This avoids executing the
the copy on paths where their results aren't needed. This also exposes
additional opportunites for dead copy elimination and shrink wrapping.
These copies were either not handled by or are inserted after the MachineSink
pass. As an example of the former case, the MachineSink pass cannot sink
COPY instructions with allocatable source registers; for AArch64 these type
of copy instructions are frequently used to move function parameters (PhyReg)
into virtual registers in the entry block..
For the machine IR below, this pass will sink %w19 in the entry into its
successor (%bb.1) because %w19 is only live-in in %bb.1.
```
%bb.0:
%wzr = SUBSWri %w1, 1
%w19 = COPY %w0
Bcc 11, %bb.2
%bb.1:
Live Ins: %w19
BL @fun
%w0 = ADDWrr %w0, %w19
RET %w0
%bb.2:
%w0 = COPY %wzr
RET %w0
```
As we sink %w19 (CSR in AArch64) into %bb.1, the shrink-wrapping pass will be
able to see %bb.0 as a candidate.
With this change I observed 12% more shrink-wrapping candidate and 13% more dead copies deleted in spec2000/2006/2017 on AArch64.
Reviewers: qcolombet, MatzeB, thegameg, mcrosier, gberry, hfinkel, john.brawn, twoh, RKSimon, sebpop, kparzysz
Reviewed By: sebpop
Subscribers: evandro, sebpop, sfertile, aemerson, mgorny, javed.absar, kristof.beyls, llvm-commits
Differential Revision: https://reviews.llvm.org/D41463
llvm-svn: 328237
2018-03-23 04:06:47 +08:00
|
|
|
; CHECK-NEXT: ## %bb.2: ## %while.body.preheader
|
|
|
|
; CHECK-NEXT: movq %rsi, %rdx
|
2017-09-14 21:00:27 +08:00
|
|
|
; CHECK-NEXT: .p2align 4, 0x90
|
[CodeGen] Add a new pass for PostRA sink
Summary:
This pass sinks COPY instructions into a successor block, if the COPY is not
used in the current block and the COPY is live-in to a single successor
(i.e., doesn't require the COPY to be duplicated). This avoids executing the
the copy on paths where their results aren't needed. This also exposes
additional opportunites for dead copy elimination and shrink wrapping.
These copies were either not handled by or are inserted after the MachineSink
pass. As an example of the former case, the MachineSink pass cannot sink
COPY instructions with allocatable source registers; for AArch64 these type
of copy instructions are frequently used to move function parameters (PhyReg)
into virtual registers in the entry block..
For the machine IR below, this pass will sink %w19 in the entry into its
successor (%bb.1) because %w19 is only live-in in %bb.1.
```
%bb.0:
%wzr = SUBSWri %w1, 1
%w19 = COPY %w0
Bcc 11, %bb.2
%bb.1:
Live Ins: %w19
BL @fun
%w0 = ADDWrr %w0, %w19
RET %w0
%bb.2:
%w0 = COPY %wzr
RET %w0
```
As we sink %w19 (CSR in AArch64) into %bb.1, the shrink-wrapping pass will be
able to see %bb.0 as a candidate.
With this change I observed 12% more shrink-wrapping candidate and 13% more dead copies deleted in spec2000/2006/2017 on AArch64.
Reviewers: qcolombet, MatzeB, thegameg, mcrosier, gberry, hfinkel, john.brawn, twoh, RKSimon, sebpop, kparzysz
Reviewed By: sebpop
Subscribers: evandro, sebpop, sfertile, aemerson, mgorny, javed.absar, kristof.beyls, llvm-commits
Differential Revision: https://reviews.llvm.org/D41463
llvm-svn: 328237
2018-03-23 04:06:47 +08:00
|
|
|
; CHECK-NEXT: LBB2_3: ## %while.body
|
2017-09-14 21:00:27 +08:00
|
|
|
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
|
|
|
|
; CHECK-NEXT: movq %rdx, %rcx
|
|
|
|
; CHECK-NEXT: cqto
|
|
|
|
; CHECK-NEXT: idivq %rcx
|
|
|
|
; CHECK-NEXT: testq %rdx, %rdx
|
|
|
|
; CHECK-NEXT: movq %rcx, %rax
|
[CodeGen] Add a new pass for PostRA sink
Summary:
This pass sinks COPY instructions into a successor block, if the COPY is not
used in the current block and the COPY is live-in to a single successor
(i.e., doesn't require the COPY to be duplicated). This avoids executing the
the copy on paths where their results aren't needed. This also exposes
additional opportunites for dead copy elimination and shrink wrapping.
These copies were either not handled by or are inserted after the MachineSink
pass. As an example of the former case, the MachineSink pass cannot sink
COPY instructions with allocatable source registers; for AArch64 these type
of copy instructions are frequently used to move function parameters (PhyReg)
into virtual registers in the entry block..
For the machine IR below, this pass will sink %w19 in the entry into its
successor (%bb.1) because %w19 is only live-in in %bb.1.
```
%bb.0:
%wzr = SUBSWri %w1, 1
%w19 = COPY %w0
Bcc 11, %bb.2
%bb.1:
Live Ins: %w19
BL @fun
%w0 = ADDWrr %w0, %w19
RET %w0
%bb.2:
%w0 = COPY %wzr
RET %w0
```
As we sink %w19 (CSR in AArch64) into %bb.1, the shrink-wrapping pass will be
able to see %bb.0 as a candidate.
With this change I observed 12% more shrink-wrapping candidate and 13% more dead copies deleted in spec2000/2006/2017 on AArch64.
Reviewers: qcolombet, MatzeB, thegameg, mcrosier, gberry, hfinkel, john.brawn, twoh, RKSimon, sebpop, kparzysz
Reviewed By: sebpop
Subscribers: evandro, sebpop, sfertile, aemerson, mgorny, javed.absar, kristof.beyls, llvm-commits
Differential Revision: https://reviews.llvm.org/D41463
llvm-svn: 328237
2018-03-23 04:06:47 +08:00
|
|
|
; CHECK-NEXT: jne LBB2_3
|
|
|
|
; CHECK-NEXT: ## %bb.4: ## %while.end
|
2017-09-14 21:00:27 +08:00
|
|
|
; CHECK-NEXT: movl %ecx, %eax
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
; CHECK-NEXT: LBB2_1:
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
entry:
|
MachineCopyPropagation has special logic for removing COPY instructions. It will remove plain COPYs using eraseFromParent(), but if the COPY has imp-defs/imp-uses it will convert it to a KILL, to keep the imp-def around.
This actually totally breaks and causes the machine verifier to cry in several cases, one of which being:
%RAX<def> = COPY %RCX<kill>
%ECX<def> = COPY %EAX<kill>, %RAX<imp-use,kill>
These subregister copies are together identified as noops, so are both removed. However, the second one as it has an imp-use gets converted into a kill:
%ECX<def> = KILL %EAX<kill>, %RAX<imp-use,kill>
As the original COPY has been removed, the verifier goes into tears at the use of undefined EAX and RAX.
There are several hacky solutions to this hacky problem (which is all to do with imp-use/def weirdnesses), but the least hacky I've come up with is to *always* remove COPYs by converting to KILLs. KILLs are no-ops to the code generator so the generated code doesn't change (which is why they were partially used in the first place), but using them also keeps the def/use and imp-def/imp-use chains alive:
%RAX<def> = KILL %RCX<kill>
%ECX<def> = KILL %EAX<kill>, %RAX<imp-use,kill>
The patch passes all test cases including the ones that check the removal of MOVs in this circumstance, along with an extra test I added to check subregister behaviour (which made the machine verifier fall over before my patch).
The patch also adds some DEBUG() statements because the file hadn't got any.
llvm-svn: 199797
2014-01-22 17:12:27 +08:00
|
|
|
%cmp1 = icmp eq i64 %b, 0
|
|
|
|
br i1 %cmp1, label %while.end, label %while.body
|
|
|
|
|
|
|
|
while.body: ; preds = %entry, %while.body
|
|
|
|
%a.addr.03 = phi i64 [ %b.addr.02, %while.body ], [ %a, %entry ]
|
|
|
|
%b.addr.02 = phi i64 [ %rem, %while.body ], [ %b, %entry ]
|
|
|
|
%rem = srem i64 %a.addr.03, %b.addr.02
|
|
|
|
%cmp = icmp eq i64 %rem, 0
|
|
|
|
br i1 %cmp, label %while.end, label %while.body
|
|
|
|
|
|
|
|
while.end: ; preds = %while.body, %entry
|
|
|
|
%a.addr.0.lcssa = phi i64 [ %a, %entry ], [ %b.addr.02, %while.body ]
|
|
|
|
%t = trunc i64 %a.addr.0.lcssa to i32
|
|
|
|
ret i32 %t
|
|
|
|
}
|
2015-05-29 09:40:00 +08:00
|
|
|
|
|
|
|
; Check that copy propagation does not kill thing like:
|
|
|
|
; dst = copy src <-- do not kill that.
|
2017-12-07 18:40:31 +08:00
|
|
|
; ... = op1 undef dst
|
2015-05-29 09:40:00 +08:00
|
|
|
; ... = op2 dst <-- this is used here.
|
|
|
|
define <16 x float> @foo(<16 x float> %x) {
|
2017-09-14 21:00:27 +08:00
|
|
|
; CHECK-LABEL: foo:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0: ## %bb
|
2018-05-22 05:41:02 +08:00
|
|
|
; CHECK-NEXT: movaps %xmm3, %xmm9
|
|
|
|
; CHECK-NEXT: movaps %xmm2, %xmm8
|
|
|
|
; CHECK-NEXT: movaps %xmm0, %xmm7
|
|
|
|
; CHECK-NEXT: xorps %xmm0, %xmm0
|
2018-09-20 02:59:08 +08:00
|
|
|
; CHECK-NEXT: movaps %xmm3, %xmm2
|
|
|
|
; CHECK-NEXT: cmpltps %xmm0, %xmm2
|
|
|
|
; CHECK-NEXT: movaps %xmm2, %xmm4
|
2018-05-22 05:41:02 +08:00
|
|
|
; CHECK-NEXT: orps {{.*}}(%rip), %xmm4
|
|
|
|
; CHECK-NEXT: movaps %xmm4, %xmm10
|
2018-09-20 02:59:08 +08:00
|
|
|
; CHECK-NEXT: andnps %xmm2, %xmm10
|
|
|
|
; CHECK-NEXT: movaps %xmm8, %xmm5
|
|
|
|
; CHECK-NEXT: cmpltps %xmm0, %xmm5
|
2018-05-22 05:41:02 +08:00
|
|
|
; CHECK-NEXT: movaps {{.*#+}} xmm11 = [9,10,11,12]
|
2018-09-20 02:59:08 +08:00
|
|
|
; CHECK-NEXT: movaps %xmm5, %xmm2
|
|
|
|
; CHECK-NEXT: orps %xmm11, %xmm2
|
|
|
|
; CHECK-NEXT: movaps %xmm2, %xmm14
|
|
|
|
; CHECK-NEXT: andnps %xmm5, %xmm14
|
|
|
|
; CHECK-NEXT: cvttps2dq %xmm1, %xmm12
|
|
|
|
; CHECK-NEXT: cmpltps %xmm0, %xmm1
|
2018-05-22 05:41:02 +08:00
|
|
|
; CHECK-NEXT: movaps {{.*#+}} xmm13 = [5,6,7,8]
|
2018-09-20 02:59:08 +08:00
|
|
|
; CHECK-NEXT: movaps %xmm1, %xmm6
|
|
|
|
; CHECK-NEXT: orps %xmm13, %xmm6
|
|
|
|
; CHECK-NEXT: movaps %xmm6, %xmm5
|
|
|
|
; CHECK-NEXT: andnps %xmm1, %xmm5
|
|
|
|
; CHECK-NEXT: cvttps2dq %xmm7, %xmm3
|
2018-05-22 05:41:02 +08:00
|
|
|
; CHECK-NEXT: cmpltps %xmm0, %xmm7
|
|
|
|
; CHECK-NEXT: movaps {{.*#+}} xmm15 = [1,2,3,4]
|
|
|
|
; CHECK-NEXT: movaps %xmm7, %xmm0
|
|
|
|
; CHECK-NEXT: orps %xmm15, %xmm0
|
|
|
|
; CHECK-NEXT: movaps %xmm0, %xmm1
|
|
|
|
; CHECK-NEXT: andnps %xmm7, %xmm1
|
|
|
|
; CHECK-NEXT: andps %xmm15, %xmm0
|
2018-09-20 02:59:08 +08:00
|
|
|
; CHECK-NEXT: cvtdq2ps %xmm3, %xmm3
|
|
|
|
; CHECK-NEXT: andps %xmm3, %xmm0
|
|
|
|
; CHECK-NEXT: movaps {{.*#+}} xmm3 = [1,1,1,1]
|
|
|
|
; CHECK-NEXT: andps %xmm3, %xmm1
|
2018-05-22 05:41:02 +08:00
|
|
|
; CHECK-NEXT: orps %xmm1, %xmm0
|
2018-09-20 02:59:08 +08:00
|
|
|
; CHECK-NEXT: andps %xmm13, %xmm6
|
2018-05-22 05:41:02 +08:00
|
|
|
; CHECK-NEXT: cvtdq2ps %xmm12, %xmm1
|
2018-09-20 02:59:08 +08:00
|
|
|
; CHECK-NEXT: andps %xmm1, %xmm6
|
|
|
|
; CHECK-NEXT: andps %xmm3, %xmm5
|
|
|
|
; CHECK-NEXT: orps %xmm5, %xmm6
|
|
|
|
; CHECK-NEXT: andps %xmm11, %xmm2
|
2018-05-22 05:41:02 +08:00
|
|
|
; CHECK-NEXT: cvttps2dq %xmm8, %xmm1
|
|
|
|
; CHECK-NEXT: cvtdq2ps %xmm1, %xmm1
|
2018-09-20 02:59:08 +08:00
|
|
|
; CHECK-NEXT: andps %xmm1, %xmm2
|
|
|
|
; CHECK-NEXT: andps %xmm3, %xmm14
|
|
|
|
; CHECK-NEXT: orps %xmm14, %xmm2
|
|
|
|
; CHECK-NEXT: andps %xmm3, %xmm10
|
2018-05-22 05:41:02 +08:00
|
|
|
; CHECK-NEXT: andps {{.*}}(%rip), %xmm4
|
|
|
|
; CHECK-NEXT: cvttps2dq %xmm9, %xmm1
|
|
|
|
; CHECK-NEXT: cvtdq2ps %xmm1, %xmm1
|
|
|
|
; CHECK-NEXT: andps %xmm1, %xmm4
|
|
|
|
; CHECK-NEXT: orps %xmm10, %xmm4
|
2018-09-20 02:59:08 +08:00
|
|
|
; CHECK-NEXT: movaps %xmm6, %xmm1
|
2018-05-22 05:41:02 +08:00
|
|
|
; CHECK-NEXT: movaps %xmm4, %xmm3
|
2017-09-14 21:00:27 +08:00
|
|
|
; CHECK-NEXT: retq
|
2015-05-29 09:40:00 +08:00
|
|
|
bb:
|
2018-02-10 23:36:23 +08:00
|
|
|
%v3 = icmp slt <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, zeroinitializer
|
2015-05-29 09:40:00 +08:00
|
|
|
%v14 = zext <16 x i1> %v3 to <16 x i32>
|
|
|
|
%v16 = fcmp olt <16 x float> %x, zeroinitializer
|
|
|
|
%v17 = sext <16 x i1> %v16 to <16 x i32>
|
|
|
|
%v18 = zext <16 x i1> %v16 to <16 x i32>
|
|
|
|
%v19 = xor <16 x i32> %v14, %v18
|
2018-02-10 23:36:23 +08:00
|
|
|
%v20 = or <16 x i32> %v17, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
|
2015-05-29 09:40:00 +08:00
|
|
|
%v21 = fptosi <16 x float> %x to <16 x i32>
|
|
|
|
%v22 = sitofp <16 x i32> %v21 to <16 x float>
|
|
|
|
%v69 = fcmp ogt <16 x float> %v22, zeroinitializer
|
|
|
|
%v75 = and <16 x i1> %v69, %v3
|
|
|
|
%v77 = bitcast <16 x float> %v22 to <16 x i32>
|
|
|
|
%v79 = sext <16 x i1> %v75 to <16 x i32>
|
2018-02-10 23:36:23 +08:00
|
|
|
%v80 = and <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>, %v79
|
2015-05-29 09:40:00 +08:00
|
|
|
%v81 = xor <16 x i32> %v77, %v80
|
2018-02-10 23:36:23 +08:00
|
|
|
%v82 = and <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>, %v81
|
2015-05-29 09:40:00 +08:00
|
|
|
%v83 = xor <16 x i32> %v19, %v82
|
|
|
|
%v84 = and <16 x i32> %v83, %v20
|
|
|
|
%v85 = xor <16 x i32> %v19, %v84
|
|
|
|
%v86 = bitcast <16 x i32> %v85 to <16 x float>
|
|
|
|
ret <16 x float> %v86
|
|
|
|
}
|