forked from OSchip/llvm-project
[DAGCombine] narrowInsertExtractVectorBinOp - add CONCAT_VECTORS support
We already split extract_subvector(binop(insert_subvector(v,x),insert_subvector(w,y))) -> binop(x,y). This patch adds support for extract_subvector(binop(concat_vectors(),concat_vectors())) cases as well. In particular this means we don't have to wait for X86 lowering to convert concat_vectors to insert_subvector chains, which helps avoid some cases where demandedelts/combine calls occur too late to split large vector ops. The fast-isel-store.ll load folding regression is annoying but I don't think is that critical. Differential Revision: https://reviews.llvm.org/D63653 llvm-svn: 365785
This commit is contained in:
parent
6eb8ae8f17
commit
d0307f93a7
|
@ -18002,11 +18002,21 @@ static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
|
|||
SDValue Index = Extract->getOperand(1);
|
||||
EVT VT = Extract->getValueType(0);
|
||||
|
||||
// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
|
||||
// if the source subvector is the same type as the one being extracted.
|
||||
auto GetSubVector = [VT, Index](SDValue V) -> SDValue {
|
||||
if (V.getOpcode() != ISD::INSERT_SUBVECTOR ||
|
||||
V.getOperand(1).getValueType() != VT || V.getOperand(2) != Index)
|
||||
return SDValue();
|
||||
return V.getOperand(1);
|
||||
if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
|
||||
V.getOperand(1).getValueType() == VT && V.getOperand(2) == Index) {
|
||||
return V.getOperand(1);
|
||||
}
|
||||
auto *IndexC = dyn_cast<ConstantSDNode>(Index);
|
||||
if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
|
||||
V.getOperand(0).getValueType() == VT &&
|
||||
(IndexC->getZExtValue() % VT.getVectorNumElements()) == 0) {
|
||||
uint64_t SubIdx = IndexC->getZExtValue() / VT.getVectorNumElements();
|
||||
return V.getOperand(SubIdx);
|
||||
}
|
||||
return SDValue();
|
||||
};
|
||||
SDValue Sub0 = GetSubVector(Bop0);
|
||||
SDValue Sub1 = GetSubVector(Bop1);
|
||||
|
|
|
@ -153,8 +153,8 @@ define <3 x half> @test_fmax_legacy_ugt_v3f16(<3 x half> %a, <3 x half> %b) #0 {
|
|||
; GFX9-NNAN-LABEL: test_fmax_legacy_ugt_v3f16:
|
||||
; GFX9-NNAN: ; %bb.0:
|
||||
; GFX9-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NNAN-NEXT: v_pk_max_f16 v0, v0, v2
|
||||
; GFX9-NNAN-NEXT: v_pk_max_f16 v1, v1, v3
|
||||
; GFX9-NNAN-NEXT: v_pk_max_f16 v0, v0, v2
|
||||
; GFX9-NNAN-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; VI-SAFE-LABEL: test_fmax_legacy_ugt_v3f16:
|
||||
|
|
|
@ -154,8 +154,8 @@ define <3 x half> @test_fmin_legacy_ule_v3f16(<3 x half> %a, <3 x half> %b) #0 {
|
|||
; GFX9-NNAN-LABEL: test_fmin_legacy_ule_v3f16:
|
||||
; GFX9-NNAN: ; %bb.0:
|
||||
; GFX9-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NNAN-NEXT: v_pk_min_f16 v0, v0, v2
|
||||
; GFX9-NNAN-NEXT: v_pk_min_f16 v1, v1, v3
|
||||
; GFX9-NNAN-NEXT: v_pk_min_f16 v0, v0, v2
|
||||
; GFX9-NNAN-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; VI-SAFE-LABEL: test_fmin_legacy_ule_v3f16:
|
||||
|
|
|
@ -336,9 +336,9 @@ define <8 x float> @test_store_8xf32_aligned(<8 x float>* nocapture %addr, <8 x
|
|||
define <4 x double> @test_store_4xf64(<4 x double>* nocapture %addr, <4 x double> %value, <4 x double> %value2) {
|
||||
; SSE32-LABEL: test_store_4xf64:
|
||||
; SSE32: # %bb.0:
|
||||
; SSE32-NEXT: addpd %xmm3, %xmm1
|
||||
; SSE32-NEXT: addpd %xmm2, %xmm0
|
||||
; SSE32-NEXT: movupd %xmm0, (%rdi)
|
||||
; SSE32-NEXT: addpd %xmm3, %xmm1
|
||||
; SSE32-NEXT: movupd %xmm1, 16(%rdi)
|
||||
; SSE32-NEXT: retq
|
||||
;
|
||||
|
@ -346,10 +346,11 @@ define <4 x double> @test_store_4xf64(<4 x double>* nocapture %addr, <4 x double
|
|||
; SSE64: # %bb.0:
|
||||
; SSE64-NEXT: subl $12, %esp
|
||||
; SSE64-NEXT: .cfi_def_cfa_offset 16
|
||||
; SSE64-NEXT: movapd {{[0-9]+}}(%esp), %xmm3
|
||||
; SSE64-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; SSE64-NEXT: addpd {{[0-9]+}}(%esp), %xmm1
|
||||
; SSE64-NEXT: addpd %xmm2, %xmm0
|
||||
; SSE64-NEXT: movupd %xmm0, (%eax)
|
||||
; SSE64-NEXT: addpd %xmm3, %xmm1
|
||||
; SSE64-NEXT: movupd %xmm1, 16(%eax)
|
||||
; SSE64-NEXT: addl $12, %esp
|
||||
; SSE64-NEXT: .cfi_def_cfa_offset 4
|
||||
|
@ -375,9 +376,9 @@ define <4 x double> @test_store_4xf64(<4 x double>* nocapture %addr, <4 x double
|
|||
define <4 x double> @test_store_4xf64_aligned(<4 x double>* nocapture %addr, <4 x double> %value, <4 x double> %value2) {
|
||||
; SSE32-LABEL: test_store_4xf64_aligned:
|
||||
; SSE32: # %bb.0:
|
||||
; SSE32-NEXT: addpd %xmm3, %xmm1
|
||||
; SSE32-NEXT: addpd %xmm2, %xmm0
|
||||
; SSE32-NEXT: movapd %xmm0, (%rdi)
|
||||
; SSE32-NEXT: addpd %xmm3, %xmm1
|
||||
; SSE32-NEXT: movapd %xmm1, 16(%rdi)
|
||||
; SSE32-NEXT: retq
|
||||
;
|
||||
|
@ -385,10 +386,11 @@ define <4 x double> @test_store_4xf64_aligned(<4 x double>* nocapture %addr, <4
|
|||
; SSE64: # %bb.0:
|
||||
; SSE64-NEXT: subl $12, %esp
|
||||
; SSE64-NEXT: .cfi_def_cfa_offset 16
|
||||
; SSE64-NEXT: movapd {{[0-9]+}}(%esp), %xmm3
|
||||
; SSE64-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; SSE64-NEXT: addpd {{[0-9]+}}(%esp), %xmm1
|
||||
; SSE64-NEXT: addpd %xmm2, %xmm0
|
||||
; SSE64-NEXT: movapd %xmm0, (%eax)
|
||||
; SSE64-NEXT: addpd %xmm3, %xmm1
|
||||
; SSE64-NEXT: movapd %xmm1, 16(%eax)
|
||||
; SSE64-NEXT: addl $12, %esp
|
||||
; SSE64-NEXT: .cfi_def_cfa_offset 4
|
||||
|
@ -614,13 +616,13 @@ define <16 x float> @test_store_16xf32_aligned(<16 x float>* nocapture %addr, <1
|
|||
define <8 x double> @test_store_8xf64(<8 x double>* nocapture %addr, <8 x double> %value, <8 x double> %value2) {
|
||||
; SSE32-LABEL: test_store_8xf64:
|
||||
; SSE32: # %bb.0:
|
||||
; SSE32-NEXT: addpd %xmm7, %xmm3
|
||||
; SSE32-NEXT: addpd %xmm6, %xmm2
|
||||
; SSE32-NEXT: addpd %xmm5, %xmm1
|
||||
; SSE32-NEXT: addpd %xmm4, %xmm0
|
||||
; SSE32-NEXT: movupd %xmm0, (%rdi)
|
||||
; SSE32-NEXT: addpd %xmm5, %xmm1
|
||||
; SSE32-NEXT: movupd %xmm1, 16(%rdi)
|
||||
; SSE32-NEXT: addpd %xmm6, %xmm2
|
||||
; SSE32-NEXT: movupd %xmm2, 32(%rdi)
|
||||
; SSE32-NEXT: addpd %xmm7, %xmm3
|
||||
; SSE32-NEXT: movupd %xmm3, 48(%rdi)
|
||||
; SSE32-NEXT: retq
|
||||
;
|
||||
|
@ -628,14 +630,17 @@ define <8 x double> @test_store_8xf64(<8 x double>* nocapture %addr, <8 x double
|
|||
; SSE64: # %bb.0:
|
||||
; SSE64-NEXT: subl $12, %esp
|
||||
; SSE64-NEXT: .cfi_def_cfa_offset 16
|
||||
; SSE64-NEXT: movapd {{[0-9]+}}(%esp), %xmm4
|
||||
; SSE64-NEXT: movapd {{[0-9]+}}(%esp), %xmm5
|
||||
; SSE64-NEXT: movapd {{[0-9]+}}(%esp), %xmm6
|
||||
; SSE64-NEXT: movapd {{[0-9]+}}(%esp), %xmm3
|
||||
; SSE64-NEXT: addpd %xmm4, %xmm3
|
||||
; SSE64-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; SSE64-NEXT: addpd {{[0-9]+}}(%esp), %xmm3
|
||||
; SSE64-NEXT: addpd {{[0-9]+}}(%esp), %xmm2
|
||||
; SSE64-NEXT: addpd {{[0-9]+}}(%esp), %xmm1
|
||||
; SSE64-NEXT: addpd {{[0-9]+}}(%esp), %xmm0
|
||||
; SSE64-NEXT: movupd %xmm0, (%eax)
|
||||
; SSE64-NEXT: addpd %xmm6, %xmm1
|
||||
; SSE64-NEXT: movupd %xmm1, 16(%eax)
|
||||
; SSE64-NEXT: addpd %xmm5, %xmm2
|
||||
; SSE64-NEXT: movupd %xmm2, 32(%eax)
|
||||
; SSE64-NEXT: movupd %xmm3, 48(%eax)
|
||||
; SSE64-NEXT: addl $12, %esp
|
||||
|
@ -644,9 +649,9 @@ define <8 x double> @test_store_8xf64(<8 x double>* nocapture %addr, <8 x double
|
|||
;
|
||||
; AVXONLY32-LABEL: test_store_8xf64:
|
||||
; AVXONLY32: # %bb.0:
|
||||
; AVXONLY32-NEXT: vaddpd %ymm3, %ymm1, %ymm1
|
||||
; AVXONLY32-NEXT: vaddpd %ymm2, %ymm0, %ymm0
|
||||
; AVXONLY32-NEXT: vmovupd %ymm0, (%rdi)
|
||||
; AVXONLY32-NEXT: vaddpd %ymm3, %ymm1, %ymm1
|
||||
; AVXONLY32-NEXT: vmovupd %ymm1, 32(%rdi)
|
||||
; AVXONLY32-NEXT: retq
|
||||
;
|
||||
|
@ -659,10 +664,11 @@ define <8 x double> @test_store_8xf64(<8 x double>* nocapture %addr, <8 x double
|
|||
; AVXONLY64-NEXT: .cfi_def_cfa_register %ebp
|
||||
; AVXONLY64-NEXT: andl $-32, %esp
|
||||
; AVXONLY64-NEXT: subl $32, %esp
|
||||
; AVXONLY64-NEXT: vmovapd 40(%ebp), %ymm3
|
||||
; AVXONLY64-NEXT: movl 8(%ebp), %eax
|
||||
; AVXONLY64-NEXT: vaddpd 40(%ebp), %ymm1, %ymm1
|
||||
; AVXONLY64-NEXT: vaddpd %ymm2, %ymm0, %ymm0
|
||||
; AVXONLY64-NEXT: vmovupd %ymm0, (%eax)
|
||||
; AVXONLY64-NEXT: vaddpd %ymm3, %ymm1, %ymm1
|
||||
; AVXONLY64-NEXT: vmovupd %ymm1, 32(%eax)
|
||||
; AVXONLY64-NEXT: movl %ebp, %esp
|
||||
; AVXONLY64-NEXT: popl %ebp
|
||||
|
@ -689,13 +695,13 @@ define <8 x double> @test_store_8xf64(<8 x double>* nocapture %addr, <8 x double
|
|||
define <8 x double> @test_store_8xf64_aligned(<8 x double>* nocapture %addr, <8 x double> %value, <8 x double> %value2) {
|
||||
; SSE32-LABEL: test_store_8xf64_aligned:
|
||||
; SSE32: # %bb.0:
|
||||
; SSE32-NEXT: addpd %xmm7, %xmm3
|
||||
; SSE32-NEXT: addpd %xmm6, %xmm2
|
||||
; SSE32-NEXT: addpd %xmm5, %xmm1
|
||||
; SSE32-NEXT: addpd %xmm4, %xmm0
|
||||
; SSE32-NEXT: movapd %xmm0, (%rdi)
|
||||
; SSE32-NEXT: addpd %xmm5, %xmm1
|
||||
; SSE32-NEXT: movapd %xmm1, 16(%rdi)
|
||||
; SSE32-NEXT: addpd %xmm6, %xmm2
|
||||
; SSE32-NEXT: movapd %xmm2, 32(%rdi)
|
||||
; SSE32-NEXT: addpd %xmm7, %xmm3
|
||||
; SSE32-NEXT: movapd %xmm3, 48(%rdi)
|
||||
; SSE32-NEXT: retq
|
||||
;
|
||||
|
@ -703,14 +709,17 @@ define <8 x double> @test_store_8xf64_aligned(<8 x double>* nocapture %addr, <8
|
|||
; SSE64: # %bb.0:
|
||||
; SSE64-NEXT: subl $12, %esp
|
||||
; SSE64-NEXT: .cfi_def_cfa_offset 16
|
||||
; SSE64-NEXT: movapd {{[0-9]+}}(%esp), %xmm4
|
||||
; SSE64-NEXT: movapd {{[0-9]+}}(%esp), %xmm5
|
||||
; SSE64-NEXT: movapd {{[0-9]+}}(%esp), %xmm6
|
||||
; SSE64-NEXT: movapd {{[0-9]+}}(%esp), %xmm3
|
||||
; SSE64-NEXT: addpd %xmm4, %xmm3
|
||||
; SSE64-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; SSE64-NEXT: addpd {{[0-9]+}}(%esp), %xmm3
|
||||
; SSE64-NEXT: addpd {{[0-9]+}}(%esp), %xmm2
|
||||
; SSE64-NEXT: addpd {{[0-9]+}}(%esp), %xmm1
|
||||
; SSE64-NEXT: addpd {{[0-9]+}}(%esp), %xmm0
|
||||
; SSE64-NEXT: movapd %xmm0, (%eax)
|
||||
; SSE64-NEXT: addpd %xmm6, %xmm1
|
||||
; SSE64-NEXT: movapd %xmm1, 16(%eax)
|
||||
; SSE64-NEXT: addpd %xmm5, %xmm2
|
||||
; SSE64-NEXT: movapd %xmm2, 32(%eax)
|
||||
; SSE64-NEXT: movapd %xmm3, 48(%eax)
|
||||
; SSE64-NEXT: addl $12, %esp
|
||||
|
@ -719,9 +728,9 @@ define <8 x double> @test_store_8xf64_aligned(<8 x double>* nocapture %addr, <8
|
|||
;
|
||||
; AVXONLY32-LABEL: test_store_8xf64_aligned:
|
||||
; AVXONLY32: # %bb.0:
|
||||
; AVXONLY32-NEXT: vaddpd %ymm3, %ymm1, %ymm1
|
||||
; AVXONLY32-NEXT: vaddpd %ymm2, %ymm0, %ymm0
|
||||
; AVXONLY32-NEXT: vmovapd %ymm0, (%rdi)
|
||||
; AVXONLY32-NEXT: vaddpd %ymm3, %ymm1, %ymm1
|
||||
; AVXONLY32-NEXT: vmovapd %ymm1, 32(%rdi)
|
||||
; AVXONLY32-NEXT: retq
|
||||
;
|
||||
|
@ -734,10 +743,11 @@ define <8 x double> @test_store_8xf64_aligned(<8 x double>* nocapture %addr, <8
|
|||
; AVXONLY64-NEXT: .cfi_def_cfa_register %ebp
|
||||
; AVXONLY64-NEXT: andl $-32, %esp
|
||||
; AVXONLY64-NEXT: subl $32, %esp
|
||||
; AVXONLY64-NEXT: vmovapd 40(%ebp), %ymm3
|
||||
; AVXONLY64-NEXT: movl 8(%ebp), %eax
|
||||
; AVXONLY64-NEXT: vaddpd 40(%ebp), %ymm1, %ymm1
|
||||
; AVXONLY64-NEXT: vaddpd %ymm2, %ymm0, %ymm0
|
||||
; AVXONLY64-NEXT: vmovapd %ymm0, (%eax)
|
||||
; AVXONLY64-NEXT: vaddpd %ymm3, %ymm1, %ymm1
|
||||
; AVXONLY64-NEXT: vmovapd %ymm1, 32(%eax)
|
||||
; AVXONLY64-NEXT: movl %ebp, %esp
|
||||
; AVXONLY64-NEXT: popl %ebp
|
||||
|
|
|
@ -73,10 +73,10 @@ define <4 x i32> @reassociate_xor_v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>
|
|||
define <8 x i32> @reassociate_and_v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, <8 x i32> %x3) {
|
||||
; SSE-LABEL: reassociate_and_v8i32:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: paddd %xmm3, %xmm1
|
||||
; SSE-NEXT: paddd %xmm2, %xmm0
|
||||
; SSE-NEXT: pand %xmm6, %xmm4
|
||||
; SSE-NEXT: pand %xmm4, %xmm0
|
||||
; SSE-NEXT: paddd %xmm3, %xmm1
|
||||
; SSE-NEXT: pand %xmm7, %xmm5
|
||||
; SSE-NEXT: pand %xmm5, %xmm1
|
||||
; SSE-NEXT: retq
|
||||
|
@ -97,10 +97,10 @@ define <8 x i32> @reassociate_and_v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>
|
|||
define <8 x i32> @reassociate_or_v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, <8 x i32> %x3) {
|
||||
; SSE-LABEL: reassociate_or_v8i32:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: paddd %xmm3, %xmm1
|
||||
; SSE-NEXT: paddd %xmm2, %xmm0
|
||||
; SSE-NEXT: por %xmm6, %xmm4
|
||||
; SSE-NEXT: por %xmm4, %xmm0
|
||||
; SSE-NEXT: paddd %xmm3, %xmm1
|
||||
; SSE-NEXT: por %xmm7, %xmm5
|
||||
; SSE-NEXT: por %xmm5, %xmm1
|
||||
; SSE-NEXT: retq
|
||||
|
@ -121,10 +121,10 @@ define <8 x i32> @reassociate_or_v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %
|
|||
define <8 x i32> @reassociate_xor_v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, <8 x i32> %x3) {
|
||||
; SSE-LABEL: reassociate_xor_v8i32:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: paddd %xmm3, %xmm1
|
||||
; SSE-NEXT: paddd %xmm2, %xmm0
|
||||
; SSE-NEXT: pxor %xmm6, %xmm4
|
||||
; SSE-NEXT: pxor %xmm4, %xmm0
|
||||
; SSE-NEXT: paddd %xmm3, %xmm1
|
||||
; SSE-NEXT: pxor %xmm7, %xmm5
|
||||
; SSE-NEXT: pxor %xmm5, %xmm1
|
||||
; SSE-NEXT: retq
|
||||
|
@ -164,10 +164,10 @@ define <16 x i32> @reassociate_and_v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x
|
|||
;
|
||||
; AVX2-LABEL: reassociate_and_v16i32:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpaddd %ymm3, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpaddd %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand %ymm6, %ymm4, %ymm2
|
||||
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpaddd %ymm3, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpand %ymm7, %ymm5, %ymm2
|
||||
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
|
||||
; AVX2-NEXT: retq
|
||||
|
@ -204,10 +204,10 @@ define <16 x i32> @reassociate_or_v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x i
|
|||
;
|
||||
; AVX2-LABEL: reassociate_or_v16i32:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpaddd %ymm3, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpaddd %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpor %ymm6, %ymm4, %ymm2
|
||||
; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpaddd %ymm3, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpor %ymm7, %ymm5, %ymm2
|
||||
; AVX2-NEXT: vpor %ymm2, %ymm1, %ymm1
|
||||
; AVX2-NEXT: retq
|
||||
|
@ -244,10 +244,10 @@ define <16 x i32> @reassociate_xor_v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x
|
|||
;
|
||||
; AVX2-LABEL: reassociate_xor_v16i32:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpaddd %ymm3, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpaddd %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpxor %ymm6, %ymm4, %ymm2
|
||||
; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpaddd %ymm3, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpxor %ymm7, %ymm5, %ymm2
|
||||
; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm1
|
||||
; AVX2-NEXT: retq
|
||||
|
|
|
@ -1234,9 +1234,9 @@ define void @test_op_v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8>* %dst) {
|
|||
define void @test_unaligned_v8f32(<8 x float> %a, <8 x float> %b, <8 x float>* %dst) {
|
||||
; SSE-LABEL: test_unaligned_v8f32:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: addps %xmm2, %xmm0
|
||||
; SSE-NEXT: addps %xmm3, %xmm1
|
||||
; SSE-NEXT: movntps %xmm1, 16(%rdi)
|
||||
; SSE-NEXT: addps %xmm2, %xmm0
|
||||
; SSE-NEXT: movntps %xmm0, (%rdi)
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
|
|
|
@ -807,50 +807,46 @@ define <16 x i32> @saddo_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %p2)
|
|||
; AVX1-NEXT: vpxor %xmm6, %xmm7, %xmm8
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm7
|
||||
; AVX1-NEXT: vpcmpgtd %xmm7, %xmm5, %xmm4
|
||||
; AVX1-NEXT: vpxor %xmm6, %xmm4, %xmm10
|
||||
; AVX1-NEXT: vpcmpeqd %xmm8, %xmm10, %xmm8
|
||||
; AVX1-NEXT: vpcmpgtd %xmm3, %xmm5, %xmm4
|
||||
; AVX1-NEXT: vpxor %xmm6, %xmm4, %xmm11
|
||||
; AVX1-NEXT: vpcmpgtd %xmm1, %xmm5, %xmm4
|
||||
; AVX1-NEXT: vpxor %xmm6, %xmm4, %xmm4
|
||||
; AVX1-NEXT: vpcmpeqd %xmm11, %xmm4, %xmm11
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm8, %ymm11, %ymm11
|
||||
; AVX1-NEXT: vpcmpeqd %xmm8, %xmm4, %xmm8
|
||||
; AVX1-NEXT: vpaddd %xmm9, %xmm7, %xmm9
|
||||
; AVX1-NEXT: vpcmpgtd %xmm9, %xmm5, %xmm7
|
||||
; AVX1-NEXT: vpxor %xmm6, %xmm7, %xmm7
|
||||
; AVX1-NEXT: vpcmpeqd %xmm7, %xmm10, %xmm7
|
||||
; AVX1-NEXT: vpcmpeqd %xmm7, %xmm4, %xmm4
|
||||
; AVX1-NEXT: vpandn %xmm8, %xmm4, %xmm8
|
||||
; AVX1-NEXT: vpcmpgtd %xmm3, %xmm5, %xmm7
|
||||
; AVX1-NEXT: vpxor %xmm6, %xmm7, %xmm7
|
||||
; AVX1-NEXT: vpcmpgtd %xmm1, %xmm5, %xmm4
|
||||
; AVX1-NEXT: vpxor %xmm6, %xmm4, %xmm4
|
||||
; AVX1-NEXT: vpcmpeqd %xmm7, %xmm4, %xmm7
|
||||
; AVX1-NEXT: vpaddd %xmm3, %xmm1, %xmm10
|
||||
; AVX1-NEXT: vpcmpgtd %xmm10, %xmm5, %xmm1
|
||||
; AVX1-NEXT: vpxor %xmm6, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm4, %xmm1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm7, %ymm1, %ymm1
|
||||
; AVX1-NEXT: vandnps %ymm11, %ymm1, %ymm1
|
||||
; AVX1-NEXT: vpandn %xmm8, %xmm7, %xmm4
|
||||
; AVX1-NEXT: vpackssdw %xmm4, %xmm1, %xmm8
|
||||
; AVX1-NEXT: vpandn %xmm7, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpackssdw %xmm8, %xmm1, %xmm8
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
|
||||
; AVX1-NEXT: vpcmpgtd %xmm4, %xmm5, %xmm7
|
||||
; AVX1-NEXT: vpxor %xmm6, %xmm7, %xmm7
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpcmpgtd %xmm1, %xmm5, %xmm3
|
||||
; AVX1-NEXT: vpxor %xmm6, %xmm3, %xmm3
|
||||
; AVX1-NEXT: vpcmpeqd %xmm7, %xmm3, %xmm11
|
||||
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm5, %xmm7
|
||||
; AVX1-NEXT: vpxor %xmm6, %xmm7, %xmm12
|
||||
; AVX1-NEXT: vpcmpgtd %xmm0, %xmm5, %xmm7
|
||||
; AVX1-NEXT: vpxor %xmm6, %xmm7, %xmm7
|
||||
; AVX1-NEXT: vpcmpeqd %xmm12, %xmm7, %xmm12
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm11, %ymm12, %ymm12
|
||||
; AVX1-NEXT: vpcmpeqd %xmm7, %xmm3, %xmm7
|
||||
; AVX1-NEXT: vpaddd %xmm4, %xmm1, %xmm4
|
||||
; AVX1-NEXT: vpcmpgtd %xmm4, %xmm5, %xmm1
|
||||
; AVX1-NEXT: vpxor %xmm6, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm3, %xmm1
|
||||
; AVX1-NEXT: vpandn %xmm7, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm5, %xmm3
|
||||
; AVX1-NEXT: vpxor %xmm6, %xmm3, %xmm3
|
||||
; AVX1-NEXT: vpcmpgtd %xmm0, %xmm5, %xmm7
|
||||
; AVX1-NEXT: vpxor %xmm6, %xmm7, %xmm7
|
||||
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm7, %xmm3
|
||||
; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm5, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm6, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpcmpeqd %xmm0, %xmm7, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vandnps %ymm12, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vpandn %xmm11, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpandn %xmm3, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpacksswb %xmm8, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm0
|
||||
|
|
|
@ -826,75 +826,71 @@ define <16 x i32> @ssubo_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %p2)
|
|||
;
|
||||
; AVX1-LABEL: ssubo_v16i32:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm9
|
||||
; AVX1-NEXT: vpxor %xmm6, %xmm6, %xmm6
|
||||
; AVX1-NEXT: vpcmpgtd %xmm9, %xmm6, %xmm7
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
|
||||
; AVX1-NEXT: vpxor %xmm9, %xmm9, %xmm9
|
||||
; AVX1-NEXT: vpcmpgtd %xmm4, %xmm9, %xmm7
|
||||
; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
|
||||
; AVX1-NEXT: vpxor %xmm5, %xmm7, %xmm8
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm7
|
||||
; AVX1-NEXT: vpcmpgtd %xmm7, %xmm6, %xmm4
|
||||
; AVX1-NEXT: vpxor %xmm5, %xmm4, %xmm10
|
||||
; AVX1-NEXT: vpcmpeqd %xmm8, %xmm10, %xmm8
|
||||
; AVX1-NEXT: vpcmpgtd %xmm3, %xmm6, %xmm4
|
||||
; AVX1-NEXT: vpxor %xmm5, %xmm4, %xmm11
|
||||
; AVX1-NEXT: vpcmpgtd %xmm1, %xmm6, %xmm4
|
||||
; AVX1-NEXT: vpcmpgtd %xmm7, %xmm9, %xmm6
|
||||
; AVX1-NEXT: vpxor %xmm5, %xmm6, %xmm6
|
||||
; AVX1-NEXT: vpcmpeqd %xmm8, %xmm6, %xmm8
|
||||
; AVX1-NEXT: vpsubd %xmm4, %xmm7, %xmm10
|
||||
; AVX1-NEXT: vpcmpgtd %xmm10, %xmm9, %xmm7
|
||||
; AVX1-NEXT: vpxor %xmm5, %xmm7, %xmm7
|
||||
; AVX1-NEXT: vpcmpeqd %xmm7, %xmm6, %xmm6
|
||||
; AVX1-NEXT: vpxor %xmm5, %xmm6, %xmm6
|
||||
; AVX1-NEXT: vpandn %xmm6, %xmm8, %xmm6
|
||||
; AVX1-NEXT: vpcmpgtd %xmm3, %xmm9, %xmm7
|
||||
; AVX1-NEXT: vpxor %xmm5, %xmm7, %xmm7
|
||||
; AVX1-NEXT: vpcmpgtd %xmm1, %xmm9, %xmm4
|
||||
; AVX1-NEXT: vpxor %xmm5, %xmm4, %xmm4
|
||||
; AVX1-NEXT: vpcmpeqd %xmm11, %xmm4, %xmm11
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm8, %ymm11, %ymm11
|
||||
; AVX1-NEXT: vpsubd %xmm9, %xmm7, %xmm9
|
||||
; AVX1-NEXT: vpcmpgtd %xmm9, %xmm6, %xmm7
|
||||
; AVX1-NEXT: vpxor %xmm5, %xmm7, %xmm7
|
||||
; AVX1-NEXT: vpcmpeqd %xmm7, %xmm10, %xmm7
|
||||
; AVX1-NEXT: vpxor %xmm5, %xmm7, %xmm7
|
||||
; AVX1-NEXT: vpsubd %xmm3, %xmm1, %xmm10
|
||||
; AVX1-NEXT: vpcmpgtd %xmm10, %xmm6, %xmm1
|
||||
; AVX1-NEXT: vpcmpeqd %xmm7, %xmm4, %xmm7
|
||||
; AVX1-NEXT: vpsubd %xmm3, %xmm1, %xmm3
|
||||
; AVX1-NEXT: vpcmpgtd %xmm3, %xmm9, %xmm1
|
||||
; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm4, %xmm1
|
||||
; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm7, %ymm1, %ymm1
|
||||
; AVX1-NEXT: vandnps %ymm1, %ymm11, %ymm1
|
||||
; AVX1-NEXT: vpandn %xmm7, %xmm8, %xmm4
|
||||
; AVX1-NEXT: vpackssdw %xmm4, %xmm1, %xmm8
|
||||
; AVX1-NEXT: vpandn %xmm1, %xmm7, %xmm1
|
||||
; AVX1-NEXT: vpackssdw %xmm6, %xmm1, %xmm8
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
|
||||
; AVX1-NEXT: vpcmpgtd %xmm4, %xmm6, %xmm7
|
||||
; AVX1-NEXT: vpxor %xmm5, %xmm7, %xmm7
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpcmpgtd %xmm1, %xmm6, %xmm3
|
||||
; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3
|
||||
; AVX1-NEXT: vpcmpeqd %xmm7, %xmm3, %xmm11
|
||||
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm6, %xmm7
|
||||
; AVX1-NEXT: vpxor %xmm5, %xmm7, %xmm12
|
||||
; AVX1-NEXT: vpcmpgtd %xmm0, %xmm6, %xmm7
|
||||
; AVX1-NEXT: vpxor %xmm5, %xmm7, %xmm7
|
||||
; AVX1-NEXT: vpcmpeqd %xmm12, %xmm7, %xmm12
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm11, %ymm12, %ymm12
|
||||
; AVX1-NEXT: vpsubd %xmm4, %xmm1, %xmm4
|
||||
; AVX1-NEXT: vpcmpgtd %xmm4, %xmm6, %xmm1
|
||||
; AVX1-NEXT: vpcmpgtd %xmm4, %xmm9, %xmm6
|
||||
; AVX1-NEXT: vpxor %xmm5, %xmm6, %xmm6
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm7
|
||||
; AVX1-NEXT: vpcmpgtd %xmm7, %xmm9, %xmm1
|
||||
; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm3, %xmm1
|
||||
; AVX1-NEXT: vpcmpeqd %xmm6, %xmm1, %xmm6
|
||||
; AVX1-NEXT: vpsubd %xmm4, %xmm7, %xmm7
|
||||
; AVX1-NEXT: vpcmpgtd %xmm7, %xmm9, %xmm4
|
||||
; AVX1-NEXT: vpxor %xmm5, %xmm4, %xmm4
|
||||
; AVX1-NEXT: vpcmpeqd %xmm4, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpandn %xmm1, %xmm6, %xmm1
|
||||
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm9, %xmm4
|
||||
; AVX1-NEXT: vpxor %xmm5, %xmm4, %xmm4
|
||||
; AVX1-NEXT: vpcmpgtd %xmm0, %xmm9, %xmm6
|
||||
; AVX1-NEXT: vpxor %xmm5, %xmm6, %xmm6
|
||||
; AVX1-NEXT: vpcmpeqd %xmm4, %xmm6, %xmm4
|
||||
; AVX1-NEXT: vpsubd %xmm2, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm6, %xmm0
|
||||
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm9, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpcmpeqd %xmm0, %xmm7, %xmm0
|
||||
; AVX1-NEXT: vpcmpeqd %xmm0, %xmm6, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vandnps %ymm0, %ymm12, %ymm0
|
||||
; AVX1-NEXT: vpandn %xmm1, %xmm11, %xmm1
|
||||
; AVX1-NEXT: vpandn %xmm0, %xmm4, %xmm0
|
||||
; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpacksswb %xmm8, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[1,1,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm3, %xmm3
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm3, %xmm3
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm1[1,1,2,3]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm4, %xmm4
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm4, %xmm4
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,3,0,1]
|
||||
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
|
||||
; AVX1-NEXT: vmovdqa %xmm9, 48(%rdi)
|
||||
; AVX1-NEXT: vmovdqa %xmm10, 32(%rdi)
|
||||
; AVX1-NEXT: vmovdqa %xmm4, 16(%rdi)
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm4, %ymm1
|
||||
; AVX1-NEXT: vmovdqa %xmm10, 48(%rdi)
|
||||
; AVX1-NEXT: vmovdqa %xmm3, 32(%rdi)
|
||||
; AVX1-NEXT: vmovdqa %xmm7, 16(%rdi)
|
||||
; AVX1-NEXT: vmovdqa %xmm2, (%rdi)
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
|
|
Loading…
Reference in New Issue