forked from OSchip/llvm-project
[X86][SSE] Allow folding of store/zext with PEXTRW of 0'th element
Under normal circumstances we prefer the higher performance MOVD to extract the 0'th element of a v8i16 vector instead of PEXTRW. But as detailed on PR27265, this prevents the SSE41 implementation of PEXTRW from folding the store of the 0'th element. Additionally it prevents us from making use of the fact that the (SSE2) reg-reg version of PEXTRW implicitly zero-extends the i16 element to the i32/i64 destination register. This patch only preferentially lowers to MOVD if we will not be zero-extending the extracted i16, nor prevent a store from being folded (on SSSE41). Fix for PR27265. Differential Revision: https://reviews.llvm.org/D22509 llvm-svn: 276289
This commit is contained in:
parent
5ad891f719
commit
88e0940d3b
|
@ -3779,6 +3779,14 @@ static bool MayFoldIntoStore(SDValue Op) {
|
|||
return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
|
||||
}
|
||||
|
||||
static bool MayFoldIntoZeroExtend(SDValue Op) {
|
||||
if (Op.hasOneUse()) {
|
||||
unsigned Opcode = Op.getNode()->use_begin()->getOpcode();
|
||||
return (ISD::ZERO_EXTEND == Opcode);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool isTargetShuffle(unsigned Opcode) {
|
||||
switch(Opcode) {
|
||||
default: return false;
|
||||
|
@ -12501,12 +12509,13 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
|
|||
MVT VT = Op.getSimpleValueType();
|
||||
|
||||
if (VT.getSizeInBits() == 16) {
|
||||
// If IdxVal is 0, it's cheaper to do a move instead of a pextrw.
|
||||
if (IdxVal == 0)
|
||||
return DAG.getNode(
|
||||
ISD::TRUNCATE, dl, MVT::i16,
|
||||
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
|
||||
DAG.getBitcast(MVT::v4i32, Vec), Idx));
|
||||
// If IdxVal is 0, it's cheaper to do a move instead of a pextrw, unless
|
||||
// we're going to zero extend the register or fold the store (SSE41 only).
|
||||
if (IdxVal == 0 && !MayFoldIntoZeroExtend(Op) &&
|
||||
!(Subtarget.hasSSE41() && MayFoldIntoStore(Op)))
|
||||
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16,
|
||||
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
|
||||
DAG.getBitcast(MVT::v4i32, Vec), Idx));
|
||||
|
||||
// Transform it so it match pextrw which produces a 32-bit result.
|
||||
SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, MVT::i32,
|
||||
|
|
|
@ -19,8 +19,7 @@ define void @prom_bug(<4 x i8> %t, i16* %p) {
|
|||
; SSE41: ## BB#0:
|
||||
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
|
||||
; SSE41-NEXT: movd %xmm0, %eax
|
||||
; SSE41-NEXT: movw %ax, (%rdi)
|
||||
; SSE41-NEXT: pextrw $0, %xmm0, (%rdi)
|
||||
; SSE41-NEXT: retq
|
||||
%r = bitcast <4 x i8> %t to <2 x i16>
|
||||
%o = extractelement <2 x i16> %r, i32 0
|
||||
|
|
|
@ -96,8 +96,7 @@ define void @trunc_qb_128_mem(<2 x i64> %i, <2 x i8>* %res) #0 {
|
|||
; KNL-LABEL: trunc_qb_128_mem:
|
||||
; KNL: ## BB#0:
|
||||
; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; KNL-NEXT: vmovd %xmm0, %eax
|
||||
; KNL-NEXT: movw %ax, (%rdi)
|
||||
; KNL-NEXT: vpextrw $0, %xmm0, (%rdi)
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: trunc_qb_128_mem:
|
||||
|
|
|
@ -48,16 +48,20 @@ define void @extract_i8_15(i8* nocapture %dst, <16 x i8> %foo) {
|
|||
}
|
||||
|
||||
define void @extract_i16_0(i16* nocapture %dst, <8 x i16> %foo) {
|
||||
; SSE-LABEL: extract_i16_0:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: movd %xmm0, %eax
|
||||
; SSE-NEXT: movw %ax, (%rdi)
|
||||
; SSE-NEXT: retq
|
||||
; SSE2-LABEL: extract_i16_0:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movd %xmm0, %eax
|
||||
; SSE2-NEXT: movw %ax, (%rdi)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: extract_i16_0:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: pextrw $0, %xmm0, (%rdi)
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: extract_i16_0:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vmovd %xmm0, %eax
|
||||
; AVX-NEXT: movw %ax, (%rdi)
|
||||
; AVX-NEXT: vpextrw $0, %xmm0, (%rdi)
|
||||
; AVX-NEXT: retq
|
||||
%vecext = extractelement <8 x i16> %foo, i32 0
|
||||
store i16 %vecext, i16* %dst, align 1
|
||||
|
|
|
@ -5,8 +5,7 @@
|
|||
define <8 x i16> @test1(<8 x i16> %A, <8 x i16> %B) {
|
||||
; SSE2-LABEL: test1:
|
||||
; SSE2: # BB#0: # %entry
|
||||
; SSE2-NEXT: movd %xmm1, %eax
|
||||
; SSE2-NEXT: movzwl %ax, %eax
|
||||
; SSE2-NEXT: pextrw $0, %xmm1, %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm1
|
||||
; SSE2-NEXT: psllw %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
|
@ -62,8 +61,7 @@ entry:
|
|||
define <8 x i16> @test4(<8 x i16> %A, <8 x i16> %B) {
|
||||
; SSE2-LABEL: test4:
|
||||
; SSE2: # BB#0: # %entry
|
||||
; SSE2-NEXT: movd %xmm1, %eax
|
||||
; SSE2-NEXT: movzwl %ax, %eax
|
||||
; SSE2-NEXT: pextrw $0, %xmm1, %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm1
|
||||
; SSE2-NEXT: psrlw %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
|
@ -119,8 +117,7 @@ entry:
|
|||
define <8 x i16> @test7(<8 x i16> %A, <8 x i16> %B) {
|
||||
; SSE2-LABEL: test7:
|
||||
; SSE2: # BB#0: # %entry
|
||||
; SSE2-NEXT: movd %xmm1, %eax
|
||||
; SSE2-NEXT: movzwl %ax, %eax
|
||||
; SSE2-NEXT: pextrw $0, %xmm1, %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm1
|
||||
; SSE2-NEXT: psraw %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
|
|
|
@ -9230,8 +9230,7 @@ define void @test_mask_store_8xi16(<8 x i1> %mask, <8 x i16>* %addr, <8 x i16> %
|
|||
; AVX-NEXT: testb $1, %al
|
||||
; AVX-NEXT: je LBB59_2
|
||||
; AVX-NEXT: ## BB#1: ## %cond.store
|
||||
; AVX-NEXT: vmovd %xmm1, %eax
|
||||
; AVX-NEXT: movw %ax, (%rdi)
|
||||
; AVX-NEXT: vpextrw $0, %xmm1, (%rdi)
|
||||
; AVX-NEXT: LBB59_2: ## %else
|
||||
; AVX-NEXT: vpextrb $2, %xmm0, %eax
|
||||
; AVX-NEXT: testb $1, %al
|
||||
|
@ -9288,8 +9287,7 @@ define void @test_mask_store_8xi16(<8 x i1> %mask, <8 x i16>* %addr, <8 x i16> %
|
|||
; AVX512F-NEXT: testb %al, %al
|
||||
; AVX512F-NEXT: je LBB59_2
|
||||
; AVX512F-NEXT: ## BB#1: ## %cond.store
|
||||
; AVX512F-NEXT: vmovd %xmm1, %eax
|
||||
; AVX512F-NEXT: movw %ax, (%rdi)
|
||||
; AVX512F-NEXT: vpextrw $0, %xmm1, (%rdi)
|
||||
; AVX512F-NEXT: LBB59_2: ## %else
|
||||
; AVX512F-NEXT: kshiftlw $14, %k0, %k1
|
||||
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
|
||||
|
@ -9367,8 +9365,7 @@ define void @test_mask_store_16xi16(<16 x i1> %mask, <16 x i16>* %addr, <16 x i1
|
|||
; AVX1-NEXT: testb $1, %al
|
||||
; AVX1-NEXT: je LBB60_2
|
||||
; AVX1-NEXT: ## BB#1: ## %cond.store
|
||||
; AVX1-NEXT: vmovd %xmm1, %eax
|
||||
; AVX1-NEXT: movw %ax, (%rdi)
|
||||
; AVX1-NEXT: vpextrw $0, %xmm1, (%rdi)
|
||||
; AVX1-NEXT: LBB60_2: ## %else
|
||||
; AVX1-NEXT: vpextrb $1, %xmm0, %eax
|
||||
; AVX1-NEXT: testb $1, %al
|
||||
|
@ -9417,8 +9414,7 @@ define void @test_mask_store_16xi16(<16 x i1> %mask, <16 x i16>* %addr, <16 x i1
|
|||
; AVX1-NEXT: je LBB60_18
|
||||
; AVX1-NEXT: ## BB#17: ## %cond.store15
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
|
||||
; AVX1-NEXT: vmovd %xmm2, %eax
|
||||
; AVX1-NEXT: movw %ax, 16(%rdi)
|
||||
; AVX1-NEXT: vpextrw $0, %xmm2, 16(%rdi)
|
||||
; AVX1-NEXT: LBB60_18: ## %else16
|
||||
; AVX1-NEXT: vpextrb $9, %xmm0, %eax
|
||||
; AVX1-NEXT: testb $1, %al
|
||||
|
@ -9478,8 +9474,7 @@ define void @test_mask_store_16xi16(<16 x i1> %mask, <16 x i16>* %addr, <16 x i1
|
|||
; AVX2-NEXT: testb $1, %al
|
||||
; AVX2-NEXT: je LBB60_2
|
||||
; AVX2-NEXT: ## BB#1: ## %cond.store
|
||||
; AVX2-NEXT: vmovd %xmm1, %eax
|
||||
; AVX2-NEXT: movw %ax, (%rdi)
|
||||
; AVX2-NEXT: vpextrw $0, %xmm1, (%rdi)
|
||||
; AVX2-NEXT: LBB60_2: ## %else
|
||||
; AVX2-NEXT: vpextrb $1, %xmm0, %eax
|
||||
; AVX2-NEXT: testb $1, %al
|
||||
|
@ -9528,8 +9523,7 @@ define void @test_mask_store_16xi16(<16 x i1> %mask, <16 x i16>* %addr, <16 x i1
|
|||
; AVX2-NEXT: je LBB60_18
|
||||
; AVX2-NEXT: ## BB#17: ## %cond.store15
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX2-NEXT: vmovd %xmm2, %eax
|
||||
; AVX2-NEXT: movw %ax, 16(%rdi)
|
||||
; AVX2-NEXT: vpextrw $0, %xmm2, 16(%rdi)
|
||||
; AVX2-NEXT: LBB60_18: ## %else16
|
||||
; AVX2-NEXT: vpextrb $9, %xmm0, %eax
|
||||
; AVX2-NEXT: testb $1, %al
|
||||
|
@ -9594,8 +9588,7 @@ define void @test_mask_store_16xi16(<16 x i1> %mask, <16 x i16>* %addr, <16 x i1
|
|||
; AVX512F-NEXT: testb %al, %al
|
||||
; AVX512F-NEXT: je LBB60_2
|
||||
; AVX512F-NEXT: ## BB#1: ## %cond.store
|
||||
; AVX512F-NEXT: vmovd %xmm1, %eax
|
||||
; AVX512F-NEXT: movw %ax, (%rdi)
|
||||
; AVX512F-NEXT: vpextrw $0, %xmm1, (%rdi)
|
||||
; AVX512F-NEXT: LBB60_2: ## %else
|
||||
; AVX512F-NEXT: kshiftlw $14, %k0, %k1
|
||||
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
|
||||
|
@ -9660,8 +9653,7 @@ define void @test_mask_store_16xi16(<16 x i1> %mask, <16 x i16>* %addr, <16 x i1
|
|||
; AVX512F-NEXT: je LBB60_18
|
||||
; AVX512F-NEXT: ## BB#17: ## %cond.store15
|
||||
; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm0
|
||||
; AVX512F-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512F-NEXT: movw %ax, 16(%rdi)
|
||||
; AVX512F-NEXT: vpextrw $0, %xmm0, 16(%rdi)
|
||||
; AVX512F-NEXT: LBB60_18: ## %else16
|
||||
; AVX512F-NEXT: kshiftlw $6, %k0, %k1
|
||||
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
|
||||
|
@ -9746,8 +9738,7 @@ define void @test_mask_store_32xi16(<32 x i1> %mask, <32 x i16>* %addr, <32 x i1
|
|||
; AVX1-NEXT: testb $1, %al
|
||||
; AVX1-NEXT: je LBB61_2
|
||||
; AVX1-NEXT: ## BB#1: ## %cond.store
|
||||
; AVX1-NEXT: vmovd %xmm1, %eax
|
||||
; AVX1-NEXT: movw %ax, (%rdi)
|
||||
; AVX1-NEXT: vpextrw $0, %xmm1, (%rdi)
|
||||
; AVX1-NEXT: LBB61_2: ## %else
|
||||
; AVX1-NEXT: vpextrb $1, %xmm0, %eax
|
||||
; AVX1-NEXT: testb $1, %al
|
||||
|
@ -9796,8 +9787,7 @@ define void @test_mask_store_32xi16(<32 x i1> %mask, <32 x i16>* %addr, <32 x i1
|
|||
; AVX1-NEXT: je LBB61_18
|
||||
; AVX1-NEXT: ## BB#17: ## %cond.store15
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
|
||||
; AVX1-NEXT: vmovd %xmm3, %eax
|
||||
; AVX1-NEXT: movw %ax, 16(%rdi)
|
||||
; AVX1-NEXT: vpextrw $0, %xmm3, 16(%rdi)
|
||||
; AVX1-NEXT: LBB61_18: ## %else16
|
||||
; AVX1-NEXT: vpextrb $9, %xmm0, %eax
|
||||
; AVX1-NEXT: testb $1, %al
|
||||
|
@ -9853,8 +9843,7 @@ define void @test_mask_store_32xi16(<32 x i1> %mask, <32 x i16>* %addr, <32 x i1
|
|||
; AVX1-NEXT: testb $1, %al
|
||||
; AVX1-NEXT: je LBB61_34
|
||||
; AVX1-NEXT: ## BB#33: ## %cond.store31
|
||||
; AVX1-NEXT: vmovd %xmm2, %eax
|
||||
; AVX1-NEXT: movw %ax, 32(%rdi)
|
||||
; AVX1-NEXT: vpextrw $0, %xmm2, 32(%rdi)
|
||||
; AVX1-NEXT: LBB61_34: ## %else32
|
||||
; AVX1-NEXT: vpextrb $1, %xmm0, %eax
|
||||
; AVX1-NEXT: testb $1, %al
|
||||
|
@ -9903,8 +9892,7 @@ define void @test_mask_store_32xi16(<32 x i1> %mask, <32 x i16>* %addr, <32 x i1
|
|||
; AVX1-NEXT: je LBB61_50
|
||||
; AVX1-NEXT: ## BB#49: ## %cond.store47
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm1
|
||||
; AVX1-NEXT: vmovd %xmm1, %eax
|
||||
; AVX1-NEXT: movw %ax, 48(%rdi)
|
||||
; AVX1-NEXT: vpextrw $0, %xmm1, 48(%rdi)
|
||||
; AVX1-NEXT: LBB61_50: ## %else48
|
||||
; AVX1-NEXT: vpextrb $9, %xmm0, %eax
|
||||
; AVX1-NEXT: testb $1, %al
|
||||
|
@ -9964,8 +9952,7 @@ define void @test_mask_store_32xi16(<32 x i1> %mask, <32 x i16>* %addr, <32 x i1
|
|||
; AVX2-NEXT: testb $1, %al
|
||||
; AVX2-NEXT: je LBB61_2
|
||||
; AVX2-NEXT: ## BB#1: ## %cond.store
|
||||
; AVX2-NEXT: vmovd %xmm1, %eax
|
||||
; AVX2-NEXT: movw %ax, (%rdi)
|
||||
; AVX2-NEXT: vpextrw $0, %xmm1, (%rdi)
|
||||
; AVX2-NEXT: LBB61_2: ## %else
|
||||
; AVX2-NEXT: vpextrb $1, %xmm0, %eax
|
||||
; AVX2-NEXT: testb $1, %al
|
||||
|
@ -10014,8 +10001,7 @@ define void @test_mask_store_32xi16(<32 x i1> %mask, <32 x i16>* %addr, <32 x i1
|
|||
; AVX2-NEXT: je LBB61_18
|
||||
; AVX2-NEXT: ## BB#17: ## %cond.store15
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
|
||||
; AVX2-NEXT: vmovd %xmm3, %eax
|
||||
; AVX2-NEXT: movw %ax, 16(%rdi)
|
||||
; AVX2-NEXT: vpextrw $0, %xmm3, 16(%rdi)
|
||||
; AVX2-NEXT: LBB61_18: ## %else16
|
||||
; AVX2-NEXT: vpextrb $9, %xmm0, %eax
|
||||
; AVX2-NEXT: testb $1, %al
|
||||
|
@ -10071,8 +10057,7 @@ define void @test_mask_store_32xi16(<32 x i1> %mask, <32 x i16>* %addr, <32 x i1
|
|||
; AVX2-NEXT: testb $1, %al
|
||||
; AVX2-NEXT: je LBB61_34
|
||||
; AVX2-NEXT: ## BB#33: ## %cond.store31
|
||||
; AVX2-NEXT: vmovd %xmm2, %eax
|
||||
; AVX2-NEXT: movw %ax, 32(%rdi)
|
||||
; AVX2-NEXT: vpextrw $0, %xmm2, 32(%rdi)
|
||||
; AVX2-NEXT: LBB61_34: ## %else32
|
||||
; AVX2-NEXT: vpextrb $1, %xmm0, %eax
|
||||
; AVX2-NEXT: testb $1, %al
|
||||
|
@ -10121,8 +10106,7 @@ define void @test_mask_store_32xi16(<32 x i1> %mask, <32 x i16>* %addr, <32 x i1
|
|||
; AVX2-NEXT: je LBB61_50
|
||||
; AVX2-NEXT: ## BB#49: ## %cond.store47
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm1
|
||||
; AVX2-NEXT: vmovd %xmm1, %eax
|
||||
; AVX2-NEXT: movw %ax, 48(%rdi)
|
||||
; AVX2-NEXT: vpextrw $0, %xmm1, 48(%rdi)
|
||||
; AVX2-NEXT: LBB61_50: ## %else48
|
||||
; AVX2-NEXT: vpextrb $9, %xmm0, %eax
|
||||
; AVX2-NEXT: testb $1, %al
|
||||
|
@ -10182,8 +10166,7 @@ define void @test_mask_store_32xi16(<32 x i1> %mask, <32 x i16>* %addr, <32 x i1
|
|||
; AVX512F-NEXT: testb $1, %al
|
||||
; AVX512F-NEXT: je LBB61_2
|
||||
; AVX512F-NEXT: ## BB#1: ## %cond.store
|
||||
; AVX512F-NEXT: vmovd %xmm1, %eax
|
||||
; AVX512F-NEXT: movw %ax, (%rdi)
|
||||
; AVX512F-NEXT: vpextrw $0, %xmm1, (%rdi)
|
||||
; AVX512F-NEXT: LBB61_2: ## %else
|
||||
; AVX512F-NEXT: vpextrb $1, %xmm0, %eax
|
||||
; AVX512F-NEXT: testb $1, %al
|
||||
|
@ -10232,8 +10215,7 @@ define void @test_mask_store_32xi16(<32 x i1> %mask, <32 x i16>* %addr, <32 x i1
|
|||
; AVX512F-NEXT: je LBB61_18
|
||||
; AVX512F-NEXT: ## BB#17: ## %cond.store15
|
||||
; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3
|
||||
; AVX512F-NEXT: vmovd %xmm3, %eax
|
||||
; AVX512F-NEXT: movw %ax, 16(%rdi)
|
||||
; AVX512F-NEXT: vpextrw $0, %xmm3, 16(%rdi)
|
||||
; AVX512F-NEXT: LBB61_18: ## %else16
|
||||
; AVX512F-NEXT: vpextrb $9, %xmm0, %eax
|
||||
; AVX512F-NEXT: testb $1, %al
|
||||
|
@ -10289,8 +10271,7 @@ define void @test_mask_store_32xi16(<32 x i1> %mask, <32 x i16>* %addr, <32 x i1
|
|||
; AVX512F-NEXT: testb $1, %al
|
||||
; AVX512F-NEXT: je LBB61_34
|
||||
; AVX512F-NEXT: ## BB#33: ## %cond.store31
|
||||
; AVX512F-NEXT: vmovd %xmm2, %eax
|
||||
; AVX512F-NEXT: movw %ax, 32(%rdi)
|
||||
; AVX512F-NEXT: vpextrw $0, %xmm2, 32(%rdi)
|
||||
; AVX512F-NEXT: LBB61_34: ## %else32
|
||||
; AVX512F-NEXT: vpextrb $1, %xmm0, %eax
|
||||
; AVX512F-NEXT: testb $1, %al
|
||||
|
@ -10339,8 +10320,7 @@ define void @test_mask_store_32xi16(<32 x i1> %mask, <32 x i16>* %addr, <32 x i1
|
|||
; AVX512F-NEXT: je LBB61_50
|
||||
; AVX512F-NEXT: ## BB#49: ## %cond.store47
|
||||
; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm1
|
||||
; AVX512F-NEXT: vmovd %xmm1, %eax
|
||||
; AVX512F-NEXT: movw %ax, 48(%rdi)
|
||||
; AVX512F-NEXT: vpextrw $0, %xmm1, 48(%rdi)
|
||||
; AVX512F-NEXT: LBB61_50: ## %else48
|
||||
; AVX512F-NEXT: vpextrb $9, %xmm0, %eax
|
||||
; AVX512F-NEXT: testb $1, %al
|
||||
|
|
|
@ -25,8 +25,7 @@ define void @load_2_i8(<2 x i8>* %A) {
|
|||
; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; SSE41-NEXT: paddq {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; SSE41-NEXT: movd %xmm0, %eax
|
||||
; SSE41-NEXT: movw %ax, (%rdi)
|
||||
; SSE41-NEXT: pextrw $0, %xmm0, (%rdi)
|
||||
; SSE41-NEXT: retq
|
||||
%T = load <2 x i8>, <2 x i8>* %A
|
||||
%G = add <2 x i8> %T, <i8 9, i8 7>
|
||||
|
|
|
@ -699,8 +699,7 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
|
|||
define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
|
||||
; SSE2-LABEL: splatvar_shift_v8i16:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movd %xmm1, %eax
|
||||
; SSE2-NEXT: movzwl %ax, %eax
|
||||
; SSE2-NEXT: pextrw $0, %xmm1, %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm1
|
||||
; SSE2-NEXT: psraw %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
|
@ -735,8 +734,7 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
|
|||
;
|
||||
; X32-SSE-LABEL: splatvar_shift_v8i16:
|
||||
; X32-SSE: # BB#0:
|
||||
; X32-SSE-NEXT: movd %xmm1, %eax
|
||||
; X32-SSE-NEXT: movzwl %ax, %eax
|
||||
; X32-SSE-NEXT: pextrw $0, %xmm1, %eax
|
||||
; X32-SSE-NEXT: movd %eax, %xmm1
|
||||
; X32-SSE-NEXT: psraw %xmm1, %xmm0
|
||||
; X32-SSE-NEXT: retl
|
||||
|
|
|
@ -473,8 +473,7 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
|
|||
; AVX1-LABEL: splatvar_shift_v16i16:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vmovd %xmm1, %eax
|
||||
; AVX1-NEXT: movzwl %ax, %eax
|
||||
; AVX1-NEXT: vpextrw $0, %xmm1, %eax
|
||||
; AVX1-NEXT: vmovd %eax, %xmm1
|
||||
; AVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0
|
||||
|
@ -483,8 +482,7 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
|
|||
;
|
||||
; AVX2-LABEL: splatvar_shift_v16i16:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vmovd %xmm1, %eax
|
||||
; AVX2-NEXT: movzwl %ax, %eax
|
||||
; AVX2-NEXT: vpextrw $0, %xmm1, %eax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm1
|
||||
; AVX2-NEXT: vpsraw %xmm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
|
@ -492,8 +490,7 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
|
|||
; XOPAVX1-LABEL: splatvar_shift_v16i16:
|
||||
; XOPAVX1: # BB#0:
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; XOPAVX1-NEXT: vmovd %xmm1, %eax
|
||||
; XOPAVX1-NEXT: movzwl %ax, %eax
|
||||
; XOPAVX1-NEXT: vpextrw $0, %xmm1, %eax
|
||||
; XOPAVX1-NEXT: vmovd %eax, %xmm1
|
||||
; XOPAVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2
|
||||
; XOPAVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0
|
||||
|
@ -502,16 +499,14 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
|
|||
;
|
||||
; XOPAVX2-LABEL: splatvar_shift_v16i16:
|
||||
; XOPAVX2: # BB#0:
|
||||
; XOPAVX2-NEXT: vmovd %xmm1, %eax
|
||||
; XOPAVX2-NEXT: movzwl %ax, %eax
|
||||
; XOPAVX2-NEXT: vpextrw $0, %xmm1, %eax
|
||||
; XOPAVX2-NEXT: vmovd %eax, %xmm1
|
||||
; XOPAVX2-NEXT: vpsraw %xmm1, %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatvar_shift_v16i16:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vmovd %xmm1, %eax
|
||||
; AVX512-NEXT: movzwl %ax, %eax
|
||||
; AVX512-NEXT: vpextrw $0, %xmm1, %eax
|
||||
; AVX512-NEXT: vmovd %eax, %xmm1
|
||||
; AVX512-NEXT: vpsraw %xmm1, %ymm0, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
|
|
|
@ -140,8 +140,7 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind
|
|||
define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
|
||||
; AVX512DQ-LABEL: splatvar_shift_v32i16:
|
||||
; AVX512DQ: ## BB#0:
|
||||
; AVX512DQ-NEXT: vmovd %xmm2, %eax
|
||||
; AVX512DQ-NEXT: movzwl %ax, %eax
|
||||
; AVX512DQ-NEXT: vpextrw $0, %xmm2, %eax
|
||||
; AVX512DQ-NEXT: vmovd %eax, %xmm2
|
||||
; AVX512DQ-NEXT: vpsraw %xmm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpsraw %xmm2, %ymm1, %ymm1
|
||||
|
@ -149,8 +148,7 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind
|
|||
;
|
||||
; AVX512BW-LABEL: splatvar_shift_v32i16:
|
||||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: vmovd %xmm1, %eax
|
||||
; AVX512BW-NEXT: movzwl %ax, %eax
|
||||
; AVX512BW-NEXT: vpextrw $0, %xmm1, %eax
|
||||
; AVX512BW-NEXT: vmovd %eax, %xmm1
|
||||
; AVX512BW-NEXT: vpsraw %xmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
|
|
|
@ -551,8 +551,7 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
|
|||
define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
|
||||
; SSE2-LABEL: splatvar_shift_v8i16:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movd %xmm1, %eax
|
||||
; SSE2-NEXT: movzwl %ax, %eax
|
||||
; SSE2-NEXT: pextrw $0, %xmm1, %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm1
|
||||
; SSE2-NEXT: psrlw %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
|
@ -587,8 +586,7 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
|
|||
;
|
||||
; X32-SSE-LABEL: splatvar_shift_v8i16:
|
||||
; X32-SSE: # BB#0:
|
||||
; X32-SSE-NEXT: movd %xmm1, %eax
|
||||
; X32-SSE-NEXT: movzwl %ax, %eax
|
||||
; X32-SSE-NEXT: pextrw $0, %xmm1, %eax
|
||||
; X32-SSE-NEXT: movd %eax, %xmm1
|
||||
; X32-SSE-NEXT: psrlw %xmm1, %xmm0
|
||||
; X32-SSE-NEXT: retl
|
||||
|
|
|
@ -384,8 +384,7 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
|
|||
; AVX1-LABEL: splatvar_shift_v16i16:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vmovd %xmm1, %eax
|
||||
; AVX1-NEXT: movzwl %ax, %eax
|
||||
; AVX1-NEXT: vpextrw $0, %xmm1, %eax
|
||||
; AVX1-NEXT: vmovd %eax, %xmm1
|
||||
; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
|
||||
|
@ -394,8 +393,7 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
|
|||
;
|
||||
; AVX2-LABEL: splatvar_shift_v16i16:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vmovd %xmm1, %eax
|
||||
; AVX2-NEXT: movzwl %ax, %eax
|
||||
; AVX2-NEXT: vpextrw $0, %xmm1, %eax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm1
|
||||
; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
|
@ -403,8 +401,7 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
|
|||
; XOPAVX1-LABEL: splatvar_shift_v16i16:
|
||||
; XOPAVX1: # BB#0:
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; XOPAVX1-NEXT: vmovd %xmm1, %eax
|
||||
; XOPAVX1-NEXT: movzwl %ax, %eax
|
||||
; XOPAVX1-NEXT: vpextrw $0, %xmm1, %eax
|
||||
; XOPAVX1-NEXT: vmovd %eax, %xmm1
|
||||
; XOPAVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
|
||||
; XOPAVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
|
||||
|
@ -413,16 +410,14 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
|
|||
;
|
||||
; XOPAVX2-LABEL: splatvar_shift_v16i16:
|
||||
; XOPAVX2: # BB#0:
|
||||
; XOPAVX2-NEXT: vmovd %xmm1, %eax
|
||||
; XOPAVX2-NEXT: movzwl %ax, %eax
|
||||
; XOPAVX2-NEXT: vpextrw $0, %xmm1, %eax
|
||||
; XOPAVX2-NEXT: vmovd %eax, %xmm1
|
||||
; XOPAVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatvar_shift_v16i16:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vmovd %xmm1, %eax
|
||||
; AVX512-NEXT: movzwl %ax, %eax
|
||||
; AVX512-NEXT: vpextrw $0, %xmm1, %eax
|
||||
; AVX512-NEXT: vmovd %eax, %xmm1
|
||||
; AVX512-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
|
|
|
@ -121,8 +121,7 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind
|
|||
define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
|
||||
; AVX512DQ-LABEL: splatvar_shift_v32i16:
|
||||
; AVX512DQ: ## BB#0:
|
||||
; AVX512DQ-NEXT: vmovd %xmm2, %eax
|
||||
; AVX512DQ-NEXT: movzwl %ax, %eax
|
||||
; AVX512DQ-NEXT: vpextrw $0, %xmm2, %eax
|
||||
; AVX512DQ-NEXT: vmovd %eax, %xmm2
|
||||
; AVX512DQ-NEXT: vpsrlw %xmm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpsrlw %xmm2, %ymm1, %ymm1
|
||||
|
@ -130,8 +129,7 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind
|
|||
;
|
||||
; AVX512BW-LABEL: splatvar_shift_v32i16:
|
||||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: vmovd %xmm1, %eax
|
||||
; AVX512BW-NEXT: movzwl %ax, %eax
|
||||
; AVX512BW-NEXT: vpextrw $0, %xmm1, %eax
|
||||
; AVX512BW-NEXT: vmovd %eax, %xmm1
|
||||
; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
|
|
|
@ -499,8 +499,7 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
|
|||
define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
|
||||
; SSE2-LABEL: splatvar_shift_v8i16:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movd %xmm1, %eax
|
||||
; SSE2-NEXT: movzwl %ax, %eax
|
||||
; SSE2-NEXT: pextrw $0, %xmm1, %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm1
|
||||
; SSE2-NEXT: psllw %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
|
@ -535,8 +534,7 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
|
|||
;
|
||||
; X32-SSE-LABEL: splatvar_shift_v8i16:
|
||||
; X32-SSE: # BB#0:
|
||||
; X32-SSE-NEXT: movd %xmm1, %eax
|
||||
; X32-SSE-NEXT: movzwl %ax, %eax
|
||||
; X32-SSE-NEXT: pextrw $0, %xmm1, %eax
|
||||
; X32-SSE-NEXT: movd %eax, %xmm1
|
||||
; X32-SSE-NEXT: psllw %xmm1, %xmm0
|
||||
; X32-SSE-NEXT: retl
|
||||
|
|
|
@ -348,8 +348,7 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
|
|||
; AVX1-LABEL: splatvar_shift_v16i16:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vmovd %xmm1, %eax
|
||||
; AVX1-NEXT: movzwl %ax, %eax
|
||||
; AVX1-NEXT: vpextrw $0, %xmm1, %eax
|
||||
; AVX1-NEXT: vmovd %eax, %xmm1
|
||||
; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
|
||||
|
@ -358,8 +357,7 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
|
|||
;
|
||||
; AVX2-LABEL: splatvar_shift_v16i16:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vmovd %xmm1, %eax
|
||||
; AVX2-NEXT: movzwl %ax, %eax
|
||||
; AVX2-NEXT: vpextrw $0, %xmm1, %eax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm1
|
||||
; AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
|
@ -367,8 +365,7 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
|
|||
; XOPAVX1-LABEL: splatvar_shift_v16i16:
|
||||
; XOPAVX1: # BB#0:
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; XOPAVX1-NEXT: vmovd %xmm1, %eax
|
||||
; XOPAVX1-NEXT: movzwl %ax, %eax
|
||||
; XOPAVX1-NEXT: vpextrw $0, %xmm1, %eax
|
||||
; XOPAVX1-NEXT: vmovd %eax, %xmm1
|
||||
; XOPAVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
|
||||
; XOPAVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
|
||||
|
@ -377,16 +374,14 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
|
|||
;
|
||||
; XOPAVX2-LABEL: splatvar_shift_v16i16:
|
||||
; XOPAVX2: # BB#0:
|
||||
; XOPAVX2-NEXT: vmovd %xmm1, %eax
|
||||
; XOPAVX2-NEXT: movzwl %ax, %eax
|
||||
; XOPAVX2-NEXT: vpextrw $0, %xmm1, %eax
|
||||
; XOPAVX2-NEXT: vmovd %eax, %xmm1
|
||||
; XOPAVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatvar_shift_v16i16:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vmovd %xmm1, %eax
|
||||
; AVX512-NEXT: movzwl %ax, %eax
|
||||
; AVX512-NEXT: vpextrw $0, %xmm1, %eax
|
||||
; AVX512-NEXT: vmovd %eax, %xmm1
|
||||
; AVX512-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
|
|
|
@ -117,8 +117,7 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind
|
|||
define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
|
||||
; AVX512DQ-LABEL: splatvar_shift_v32i16:
|
||||
; AVX512DQ: ## BB#0:
|
||||
; AVX512DQ-NEXT: vmovd %xmm2, %eax
|
||||
; AVX512DQ-NEXT: movzwl %ax, %eax
|
||||
; AVX512DQ-NEXT: vpextrw $0, %xmm2, %eax
|
||||
; AVX512DQ-NEXT: vmovd %eax, %xmm2
|
||||
; AVX512DQ-NEXT: vpsllw %xmm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpsllw %xmm2, %ymm1, %ymm1
|
||||
|
@ -126,8 +125,7 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind
|
|||
;
|
||||
; AVX512BW-LABEL: splatvar_shift_v32i16:
|
||||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: vmovd %xmm1, %eax
|
||||
; AVX512BW-NEXT: movzwl %ax, %eax
|
||||
; AVX512BW-NEXT: vpextrw $0, %xmm1, %eax
|
||||
; AVX512BW-NEXT: vmovd %eax, %xmm1
|
||||
; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
|
|
|
@ -39,8 +39,7 @@ define void @convert_v3i32_to_v3i8(<3 x i8>* %dst.addr, <3 x i32>* %src.addr) no
|
|||
; X86-NEXT: pextrb $8, %xmm0, 2(%eax)
|
||||
; X86-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; X86-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
|
||||
; X86-NEXT: movd %xmm0, %ecx
|
||||
; X86-NEXT: movw %cx, (%eax)
|
||||
; X86-NEXT: pextrw $0, %xmm0, (%eax)
|
||||
; X86-NEXT: popl %eax
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
|
@ -51,8 +50,7 @@ define void @convert_v3i32_to_v3i8(<3 x i8>* %dst.addr, <3 x i32>* %src.addr) no
|
|||
; X64-NEXT: pextrb $8, %xmm0, 2(%rdi)
|
||||
; X64-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; X64-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
|
||||
; X64-NEXT: movd %xmm0, %eax
|
||||
; X64-NEXT: movw %ax, (%rdi)
|
||||
; X64-NEXT: pextrw $0, %xmm0, (%rdi)
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%load = load <3 x i32>, <3 x i32>* %src.addr
|
||||
|
|
|
@ -172,8 +172,7 @@ define void @add3i8(%i8vec3* nocapture sret %ret, %i8vec3* %ap, %i8vec3* %bp) no
|
|||
; CHECK-NEXT: pextrb $8, %xmm1, 2(%rdi)
|
||||
; CHECK-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; CHECK-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; CHECK-NEXT: movd %xmm0, %eax
|
||||
; CHECK-NEXT: movw %ax, (%rdi)
|
||||
; CHECK-NEXT: pextrw $0, %xmm0, (%rdi)
|
||||
; CHECK-NEXT: movq %rdi, %rax
|
||||
; CHECK-NEXT: retq
|
||||
%a = load %i8vec3, %i8vec3* %ap, align 16
|
||||
|
@ -214,14 +213,12 @@ define void @rot(%i8vec3pack* nocapture sret %result, %i8vec3pack* %X, %i8vec3pa
|
|||
; CHECK-NEXT: movdqa {{.*#+}} xmm1 = <158,158,158,u>
|
||||
; CHECK-NEXT: pshufb %xmm0, %xmm1
|
||||
; CHECK-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; CHECK-NEXT: movd %xmm1, %eax
|
||||
; CHECK-NEXT: movw %ax, (%rsi)
|
||||
; CHECK-NEXT: pextrw $0, %xmm1, (%rsi)
|
||||
; CHECK-NEXT: movb $-98, 2(%rsi)
|
||||
; CHECK-NEXT: movdqa {{.*#+}} xmm1 = <1,1,1,u>
|
||||
; CHECK-NEXT: pshufb %xmm0, %xmm1
|
||||
; CHECK-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; CHECK-NEXT: movd %xmm0, %eax
|
||||
; CHECK-NEXT: movw %ax, (%rdx)
|
||||
; CHECK-NEXT: pextrw $0, %xmm0, (%rdx)
|
||||
; CHECK-NEXT: movb $1, 2(%rdx)
|
||||
; CHECK-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
|
||||
; CHECK-NEXT: movdqa %xmm0, %xmm1
|
||||
|
@ -230,8 +227,7 @@ define void @rot(%i8vec3pack* nocapture sret %result, %i8vec3pack* %X, %i8vec3pa
|
|||
; CHECK-NEXT: pextrb $8, %xmm1, 2(%rdi)
|
||||
; CHECK-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; CHECK-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; CHECK-NEXT: movd %xmm0, %eax
|
||||
; CHECK-NEXT: movw %ax, (%rdi)
|
||||
; CHECK-NEXT: pextrw $0, %xmm0, (%rdi)
|
||||
; CHECK-NEXT: movq %rdi, %rax
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
|
|
Loading…
Reference in New Issue