[X86][SSE] combineExtractWithShuffle - extract(bictcast(scalar_to_vector(x))) --> x

Removes some unnecessary gpr<-->fpu traffic
This commit is contained in:
Simon Pilgrim 2020-01-22 15:47:59 +00:00
parent 52ec7379ad
commit a14aa7dabd
6 changed files with 199 additions and 214 deletions

View File

@ -37126,6 +37126,28 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
}
}
// Handle extract(scalar_to_vector(scalar_value)) for integers.
// TODO: Move to DAGCombine?
if (SrcBC.getOpcode() == ISD::SCALAR_TO_VECTOR && VT.isInteger() &&
SrcBC.getValueType().isInteger() &&
(SrcBC.getScalarValueSizeInBits() % SrcSVT.getSizeInBits()) == 0 &&
SrcBC.getScalarValueSizeInBits() ==
SrcBC.getOperand(0).getValueSizeInBits()) {
unsigned Scale = SrcBC.getScalarValueSizeInBits() / SrcSVT.getSizeInBits();
if (IdxC.ult(Scale)) {
unsigned Offset = IdxC.getZExtValue() * SrcVT.getScalarSizeInBits();
SDValue Scl = SrcBC.getOperand(0);
EVT SclVT = Scl.getValueType();
if (Offset) {
Scl = DAG.getNode(ISD::SRL, dl, SclVT, Scl,
DAG.getShiftAmountConstant(Offset, SclVT, dl));
}
Scl = DAG.getZExtOrTrunc(Scl, dl, SrcVT.getScalarType());
Scl = DAG.getZExtOrTrunc(Scl, dl, VT);
return Scl;
}
}
// Handle extract(truncate(x)) for 0'th index.
// TODO: Treat this as a faux shuffle?
// TODO: When can we use this for general indices?

View File

@ -17,18 +17,18 @@ target triple = "x86_64-unknown-linux-gnu"
define i32 @main() nounwind uwtable {
; CHECK-LABEL: main:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: pextrb $1, %xmm0, %ecx
; CHECK-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; CHECK-NEXT: pextrb $1, %xmm1, %eax
; CHECK-NEXT: movq {{.*}}(%rip), %rsi
; CHECK-NEXT: movq {{.*}}(%rip), %rax
; CHECK-NEXT: movq %rsi, %rdx
; CHECK-NEXT: shrq $8, %rdx
; CHECK-NEXT: movsbl %al, %ecx
; CHECK-NEXT: shrq $8, %rax
; CHECK-NEXT: cbtw
; CHECK-NEXT: pextrb $0, %xmm0, %edx
; CHECK-NEXT: pextrb $0, %xmm1, %esi
; CHECK-NEXT: idivb %cl
; CHECK-NEXT: movl %eax, %ecx
; CHECK-NEXT: movsbl %sil, %eax
; CHECK-NEXT: idivb %dl
; CHECK-NEXT: movzbl %cl, %ecx
; CHECK-NEXT: movl %eax, %edx
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: idivb %sil
; CHECK-NEXT: movzbl %dl, %ecx
; CHECK-NEXT: movzbl %al, %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: pinsrb $1, %ecx, %xmm0

View File

@ -61,14 +61,10 @@ define i2 @bitcast_v4i32_to_v2i2(<4 x i32> %a0) nounwind {
;
; AVX12-LABEL: bitcast_v4i32_to_v2i2:
; AVX12: # %bb.0:
; AVX12-NEXT: vmovmskps %xmm0, %eax
; AVX12-NEXT: movl %eax, %ecx
; AVX12-NEXT: shrl $2, %ecx
; AVX12-NEXT: vmovd %ecx, %xmm0
; AVX12-NEXT: andl $3, %eax
; AVX12-NEXT: vmovd %eax, %xmm1
; AVX12-NEXT: vpextrb $0, %xmm1, %ecx
; AVX12-NEXT: vpextrb $0, %xmm0, %eax
; AVX12-NEXT: vmovmskps %xmm0, %ecx
; AVX12-NEXT: movl %ecx, %eax
; AVX12-NEXT: shrl $2, %eax
; AVX12-NEXT: andl $3, %ecx
; AVX12-NEXT: addb %cl, %al
; AVX12-NEXT: # kill: def $al killed $al killed $eax
; AVX12-NEXT: retq
@ -77,15 +73,13 @@ define i2 @bitcast_v4i32_to_v2i2(<4 x i32> %a0) nounwind {
; AVX512: # %bb.0:
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512-NEXT: vpcmpgtd %xmm0, %xmm1, %k0
; AVX512-NEXT: kmovd %k0, %eax
; AVX512-NEXT: movzbl %al, %ecx
; AVX512-NEXT: shrl $2, %ecx
; AVX512-NEXT: andl $3, %ecx
; AVX512-NEXT: vmovd %ecx, %xmm0
; AVX512-NEXT: kmovd %k0, %ecx
; AVX512-NEXT: movzbl %cl, %eax
; AVX512-NEXT: shrl $2, %eax
; AVX512-NEXT: andl $3, %eax
; AVX512-NEXT: vmovd %eax, %xmm1
; AVX512-NEXT: vpextrb $0, %xmm1, %ecx
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: vpbroadcastq %rax, %xmm0
; AVX512-NEXT: andl $3, %ecx
; AVX512-NEXT: vpextrb $8, %xmm0, %eax
; AVX512-NEXT: addb %cl, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
@ -116,14 +110,10 @@ define i4 @bitcast_v8i16_to_v2i4(<8 x i16> %a0) nounwind {
; AVX12-LABEL: bitcast_v8i16_to_v2i4:
; AVX12: # %bb.0:
; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX12-NEXT: vpmovmskb %xmm0, %eax
; AVX12-NEXT: movzbl %al, %ecx
; AVX12-NEXT: shrl $4, %ecx
; AVX12-NEXT: vmovd %ecx, %xmm0
; AVX12-NEXT: andl $15, %eax
; AVX12-NEXT: vmovd %eax, %xmm1
; AVX12-NEXT: vpextrb $0, %xmm1, %ecx
; AVX12-NEXT: vpextrb $0, %xmm0, %eax
; AVX12-NEXT: vpmovmskb %xmm0, %ecx
; AVX12-NEXT: movzbl %cl, %eax
; AVX12-NEXT: shrl $4, %eax
; AVX12-NEXT: andl $15, %ecx
; AVX12-NEXT: addb %cl, %al
; AVX12-NEXT: # kill: def $al killed $al killed $eax
; AVX12-NEXT: retq
@ -131,14 +121,12 @@ define i4 @bitcast_v8i16_to_v2i4(<8 x i16> %a0) nounwind {
; AVX512-LABEL: bitcast_v8i16_to_v2i4:
; AVX512: # %bb.0:
; AVX512-NEXT: vpmovw2m %xmm0, %k0
; AVX512-NEXT: kmovd %k0, %eax
; AVX512-NEXT: movzbl %al, %ecx
; AVX512-NEXT: shrl $4, %ecx
; AVX512-NEXT: vmovd %ecx, %xmm0
; AVX512-NEXT: andl $15, %eax
; AVX512-NEXT: vmovd %eax, %xmm1
; AVX512-NEXT: vpextrb $0, %xmm1, %ecx
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: kmovd %k0, %ecx
; AVX512-NEXT: movzbl %cl, %eax
; AVX512-NEXT: shrl $4, %eax
; AVX512-NEXT: vpbroadcastq %rax, %xmm0
; AVX512-NEXT: andl $15, %ecx
; AVX512-NEXT: vpextrb $8, %xmm0, %eax
; AVX512-NEXT: addb %cl, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
@ -162,10 +150,9 @@ define i8 @bitcast_v16i8_to_v2i8(<16 x i8> %a0) nounwind {
;
; AVX12-LABEL: bitcast_v16i8_to_v2i8:
; AVX12: # %bb.0:
; AVX12-NEXT: vpmovmskb %xmm0, %eax
; AVX12-NEXT: vmovd %eax, %xmm0
; AVX12-NEXT: vpextrb $0, %xmm0, %ecx
; AVX12-NEXT: vpextrb $1, %xmm0, %eax
; AVX12-NEXT: vpmovmskb %xmm0, %ecx
; AVX12-NEXT: movl %ecx, %eax
; AVX12-NEXT: shrl $8, %eax
; AVX12-NEXT: addb %cl, %al
; AVX12-NEXT: # kill: def $al killed $al killed $eax
; AVX12-NEXT: retq
@ -210,14 +197,10 @@ define i2 @bitcast_v4i64_to_v2i2(<4 x i64> %a0) nounwind {
;
; AVX12-LABEL: bitcast_v4i64_to_v2i2:
; AVX12: # %bb.0:
; AVX12-NEXT: vmovmskpd %ymm0, %eax
; AVX12-NEXT: movl %eax, %ecx
; AVX12-NEXT: shrl $2, %ecx
; AVX12-NEXT: vmovd %ecx, %xmm0
; AVX12-NEXT: andl $3, %eax
; AVX12-NEXT: vmovd %eax, %xmm1
; AVX12-NEXT: vpextrb $0, %xmm1, %ecx
; AVX12-NEXT: vpextrb $0, %xmm0, %eax
; AVX12-NEXT: vmovmskpd %ymm0, %ecx
; AVX12-NEXT: movl %ecx, %eax
; AVX12-NEXT: shrl $2, %eax
; AVX12-NEXT: andl $3, %ecx
; AVX12-NEXT: addb %cl, %al
; AVX12-NEXT: # kill: def $al killed $al killed $eax
; AVX12-NEXT: vzeroupper
@ -227,15 +210,13 @@ define i2 @bitcast_v4i64_to_v2i2(<4 x i64> %a0) nounwind {
; AVX512: # %bb.0:
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512-NEXT: vpcmpgtq %ymm0, %ymm1, %k0
; AVX512-NEXT: kmovd %k0, %eax
; AVX512-NEXT: movzbl %al, %ecx
; AVX512-NEXT: shrl $2, %ecx
; AVX512-NEXT: andl $3, %ecx
; AVX512-NEXT: vmovd %ecx, %xmm0
; AVX512-NEXT: kmovd %k0, %ecx
; AVX512-NEXT: movzbl %cl, %eax
; AVX512-NEXT: shrl $2, %eax
; AVX512-NEXT: andl $3, %eax
; AVX512-NEXT: vmovd %eax, %xmm1
; AVX512-NEXT: vpextrb $0, %xmm1, %ecx
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: vpbroadcastq %rax, %xmm0
; AVX512-NEXT: andl $3, %ecx
; AVX512-NEXT: vpextrb $8, %xmm0, %eax
; AVX512-NEXT: addb %cl, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
@ -267,14 +248,10 @@ define i4 @bitcast_v8i32_to_v2i4(<8 x i32> %a0) nounwind {
;
; AVX12-LABEL: bitcast_v8i32_to_v2i4:
; AVX12: # %bb.0:
; AVX12-NEXT: vmovmskps %ymm0, %eax
; AVX12-NEXT: movl %eax, %ecx
; AVX12-NEXT: shrl $4, %ecx
; AVX12-NEXT: vmovd %ecx, %xmm0
; AVX12-NEXT: andl $15, %eax
; AVX12-NEXT: vmovd %eax, %xmm1
; AVX12-NEXT: vpextrb $0, %xmm1, %ecx
; AVX12-NEXT: vpextrb $0, %xmm0, %eax
; AVX12-NEXT: vmovmskps %ymm0, %ecx
; AVX12-NEXT: movl %ecx, %eax
; AVX12-NEXT: shrl $4, %eax
; AVX12-NEXT: andl $15, %ecx
; AVX12-NEXT: addb %cl, %al
; AVX12-NEXT: # kill: def $al killed $al killed $eax
; AVX12-NEXT: vzeroupper
@ -284,14 +261,12 @@ define i4 @bitcast_v8i32_to_v2i4(<8 x i32> %a0) nounwind {
; AVX512: # %bb.0:
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512-NEXT: vpcmpgtd %ymm0, %ymm1, %k0
; AVX512-NEXT: kmovd %k0, %eax
; AVX512-NEXT: movzbl %al, %ecx
; AVX512-NEXT: shrl $4, %ecx
; AVX512-NEXT: vmovd %ecx, %xmm0
; AVX512-NEXT: andl $15, %eax
; AVX512-NEXT: vmovd %eax, %xmm1
; AVX512-NEXT: vpextrb $0, %xmm1, %ecx
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: kmovd %k0, %ecx
; AVX512-NEXT: movzbl %cl, %eax
; AVX512-NEXT: shrl $4, %eax
; AVX512-NEXT: vpbroadcastq %rax, %xmm0
; AVX512-NEXT: andl $15, %ecx
; AVX512-NEXT: vpextrb $8, %xmm0, %eax
; AVX512-NEXT: addb %cl, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
@ -319,10 +294,9 @@ define i8 @bitcast_v16i16_to_v2i8(<16 x i16> %a0) nounwind {
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
; AVX1-NEXT: vmovd %eax, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %ecx
; AVX1-NEXT: vpextrb $1, %xmm0, %eax
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
; AVX1-NEXT: movl %ecx, %eax
; AVX1-NEXT: shrl $8, %eax
; AVX1-NEXT: addb %cl, %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
@ -334,10 +308,9 @@ define i8 @bitcast_v16i16_to_v2i8(<16 x i16> %a0) nounwind {
; AVX2-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpmovmskb %xmm0, %eax
; AVX2-NEXT: vmovd %eax, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %ecx
; AVX2-NEXT: vpextrb $1, %xmm0, %eax
; AVX2-NEXT: vpmovmskb %xmm0, %ecx
; AVX2-NEXT: movl %ecx, %eax
; AVX2-NEXT: shrl $8, %eax
; AVX2-NEXT: addb %cl, %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
@ -365,23 +338,17 @@ define i8 @bitcast_v16i16_to_v2i8(<16 x i16> %a0) nounwind {
define i16 @bitcast_v32i8_to_v2i16(<32 x i8> %a0) nounwind {
; SSE2-SSSE3-LABEL: bitcast_v32i8_to_v2i16:
; SSE2-SSSE3: # %bb.0:
; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %ecx
; SSE2-SSSE3-NEXT: pmovmskb %xmm1, %eax
; SSE2-SSSE3-NEXT: shll $16, %eax
; SSE2-SSSE3-NEXT: movd %eax, %xmm0
; SSE2-SSSE3-NEXT: pextrw $1, %xmm0, %eax
; SSE2-SSSE3-NEXT: pmovmskb %xmm1, %ecx
; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
; SSE2-SSSE3-NEXT: addl %ecx, %eax
; SSE2-SSSE3-NEXT: # kill: def $ax killed $ax killed $eax
; SSE2-SSSE3-NEXT: retq
;
; AVX1-LABEL: bitcast_v32i8_to_v2i16:
; AVX1: # %bb.0:
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpmovmskb %xmm1, %ecx
; AVX1-NEXT: vpmovmskb %xmm0, %eax
; AVX1-NEXT: shll $16, %eax
; AVX1-NEXT: vmovd %eax, %xmm0
; AVX1-NEXT: vpextrw $1, %xmm0, %eax
; AVX1-NEXT: addl %ecx, %eax
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
@ -390,8 +357,8 @@ define i16 @bitcast_v32i8_to_v2i16(<32 x i8> %a0) nounwind {
; AVX2-LABEL: bitcast_v32i8_to_v2i16:
; AVX2: # %bb.0:
; AVX2-NEXT: vpmovmskb %ymm0, %ecx
; AVX2-NEXT: vmovd %ecx, %xmm0
; AVX2-NEXT: vpextrw $1, %xmm0, %eax
; AVX2-NEXT: movl %ecx, %eax
; AVX2-NEXT: shrl $16, %eax
; AVX2-NEXT: addl %ecx, %eax
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
@ -455,14 +422,10 @@ define i4 @bitcast_v8i64_to_v2i4(<8 x i64> %a0) nounwind {
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: vmovmskps %ymm0, %eax
; AVX1-NEXT: movl %eax, %ecx
; AVX1-NEXT: shrl $4, %ecx
; AVX1-NEXT: vmovd %ecx, %xmm0
; AVX1-NEXT: andl $15, %eax
; AVX1-NEXT: vmovd %eax, %xmm1
; AVX1-NEXT: vpextrb $0, %xmm1, %ecx
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
; AVX1-NEXT: vmovmskps %ymm0, %ecx
; AVX1-NEXT: movl %ecx, %eax
; AVX1-NEXT: shrl $4, %eax
; AVX1-NEXT: andl $15, %ecx
; AVX1-NEXT: addb %cl, %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
@ -472,14 +435,10 @@ define i4 @bitcast_v8i64_to_v2i4(<8 x i64> %a0) nounwind {
; AVX2: # %bb.0:
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
; AVX2-NEXT: vmovmskps %ymm0, %eax
; AVX2-NEXT: movl %eax, %ecx
; AVX2-NEXT: shrl $4, %ecx
; AVX2-NEXT: vmovd %ecx, %xmm0
; AVX2-NEXT: andl $15, %eax
; AVX2-NEXT: vmovd %eax, %xmm1
; AVX2-NEXT: vpextrb $0, %xmm1, %ecx
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
; AVX2-NEXT: vmovmskps %ymm0, %ecx
; AVX2-NEXT: movl %ecx, %eax
; AVX2-NEXT: shrl $4, %eax
; AVX2-NEXT: andl $15, %ecx
; AVX2-NEXT: addb %cl, %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
@ -489,14 +448,12 @@ define i4 @bitcast_v8i64_to_v2i4(<8 x i64> %a0) nounwind {
; AVX512: # %bb.0:
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
; AVX512-NEXT: kmovd %k0, %eax
; AVX512-NEXT: movzbl %al, %ecx
; AVX512-NEXT: shrl $4, %ecx
; AVX512-NEXT: vmovd %ecx, %xmm0
; AVX512-NEXT: andl $15, %eax
; AVX512-NEXT: vmovd %eax, %xmm1
; AVX512-NEXT: vpextrb $0, %xmm1, %ecx
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: kmovd %k0, %ecx
; AVX512-NEXT: movzbl %cl, %eax
; AVX512-NEXT: shrl $4, %eax
; AVX512-NEXT: vpbroadcastq %rax, %xmm0
; AVX512-NEXT: andl $15, %ecx
; AVX512-NEXT: vpextrb $8, %xmm0, %eax
; AVX512-NEXT: addb %cl, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
@ -529,10 +486,9 @@ define i8 @bitcast_v16i32_to_v2i8(<16 x i32> %a0) nounwind {
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
; AVX1-NEXT: vmovd %eax, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %ecx
; AVX1-NEXT: vpextrb $1, %xmm0, %eax
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
; AVX1-NEXT: movl %ecx, %eax
; AVX1-NEXT: shrl $8, %eax
; AVX1-NEXT: addb %cl, %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
@ -547,10 +503,9 @@ define i8 @bitcast_v16i32_to_v2i8(<16 x i32> %a0) nounwind {
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpmovmskb %xmm0, %eax
; AVX2-NEXT: vmovd %eax, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %ecx
; AVX2-NEXT: vpextrb $1, %xmm0, %eax
; AVX2-NEXT: vpmovmskb %xmm0, %ecx
; AVX2-NEXT: movl %ecx, %eax
; AVX2-NEXT: shrl $8, %eax
; AVX2-NEXT: addb %cl, %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
@ -579,28 +534,22 @@ define i8 @bitcast_v16i32_to_v2i8(<16 x i32> %a0) nounwind {
define i16 @bitcast_v32i16_to_v2i16(<32 x i16> %a0) nounwind {
; SSE2-SSSE3-LABEL: bitcast_v32i16_to_v2i16:
; SSE2-SSSE3: # %bb.0:
; SSE2-SSSE3-NEXT: packsswb %xmm1, %xmm0
; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %ecx
; SSE2-SSSE3-NEXT: packsswb %xmm3, %xmm2
; SSE2-SSSE3-NEXT: pmovmskb %xmm2, %eax
; SSE2-SSSE3-NEXT: shll $16, %eax
; SSE2-SSSE3-NEXT: movd %eax, %xmm0
; SSE2-SSSE3-NEXT: pextrw $1, %xmm0, %eax
; SSE2-SSSE3-NEXT: pmovmskb %xmm2, %ecx
; SSE2-SSSE3-NEXT: packsswb %xmm1, %xmm0
; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
; SSE2-SSSE3-NEXT: addl %ecx, %eax
; SSE2-SSSE3-NEXT: # kill: def $ax killed $ax killed $eax
; SSE2-SSSE3-NEXT: retq
;
; AVX1-LABEL: bitcast_v32i16_to_v2i16:
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
; AVX1-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpmovmskb %xmm1, %ecx
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
; AVX1-NEXT: shll $16, %eax
; AVX1-NEXT: vmovd %eax, %xmm0
; AVX1-NEXT: vpextrw $1, %xmm0, %eax
; AVX1-NEXT: addl %ecx, %eax
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
@ -611,8 +560,8 @@ define i16 @bitcast_v32i16_to_v2i16(<32 x i16> %a0) nounwind {
; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
; AVX2-NEXT: vpmovmskb %ymm0, %ecx
; AVX2-NEXT: vmovd %ecx, %xmm0
; AVX2-NEXT: vpextrw $1, %xmm0, %eax
; AVX2-NEXT: movl %ecx, %eax
; AVX2-NEXT: shrl $16, %eax
; AVX2-NEXT: addl %ecx, %eax
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper

View File

@ -9,65 +9,73 @@
; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+xop | FileCheck %s --check-prefixes=AVX,XOP
define void @insert_v7i8_v2i16_2(<7 x i8> *%a0, <2 x i16> *%a1) nounwind {
; SSE2-LABEL: insert_v7i8_v2i16_2:
; SSE2: # %bb.0:
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; SSE2-NEXT: pextrw $3, %xmm1, %eax
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; SSE2-NEXT: movd %xmm1, (%rdi)
; SSE2-NEXT: movb %al, 6(%rdi)
; SSE2-NEXT: pextrw $1, %xmm0, %eax
; SSE2-NEXT: movw %ax, 4(%rdi)
; SSE2-NEXT: retq
;
; SSE42-LABEL: insert_v7i8_v2i16_2:
; SSE42: # %bb.0:
; SSE42-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE42-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; SSE42-NEXT: pextrb $6, %xmm1, 6(%rdi)
; SSE42-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; SSE42-NEXT: pextrw $1, %xmm0, 4(%rdi)
; SSE42-NEXT: movd %xmm1, (%rdi)
; SSE42-NEXT: retq
; SSE-LABEL: insert_v7i8_v2i16_2:
; SSE: # %bb.0:
; SSE-NEXT: movl (%rsi), %eax
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: movq (%rdi), %rcx
; SSE-NEXT: movq %rcx, %xmm1
; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; SSE-NEXT: shrq $48, %rcx
; SSE-NEXT: movb %cl, 6(%rdi)
; SSE-NEXT: shrl $16, %eax
; SSE-NEXT: movw %ax, 4(%rdi)
; SSE-NEXT: movd %xmm1, (%rdi)
; SSE-NEXT: retq
;
; AVX1-LABEL: insert_v7i8_v2i16_2:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; AVX1-NEXT: vpextrb $6, %xmm1, 6(%rdi)
; AVX1-NEXT: vpextrw $1, %xmm0, 4(%rdi)
; AVX1-NEXT: vmovd %xmm2, (%rdi)
; AVX1-NEXT: movl (%rsi), %eax
; AVX1-NEXT: vmovd %eax, %xmm0
; AVX1-NEXT: movq (%rdi), %rcx
; AVX1-NEXT: vmovq %rcx, %xmm1
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; AVX1-NEXT: shrq $48, %rcx
; AVX1-NEXT: movb %cl, 6(%rdi)
; AVX1-NEXT: shrl $16, %eax
; AVX1-NEXT: movw %ax, 4(%rdi)
; AVX1-NEXT: vmovd %xmm0, (%rdi)
; AVX1-NEXT: retq
;
; AVX2-LABEL: insert_v7i8_v2i16_2:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; AVX2-NEXT: vpextrb $6, %xmm1, 6(%rdi)
; AVX2-NEXT: vpextrw $1, %xmm0, 4(%rdi)
; AVX2-NEXT: vmovd %xmm2, (%rdi)
; AVX2-NEXT: movl (%rsi), %eax
; AVX2-NEXT: vmovd %eax, %xmm0
; AVX2-NEXT: movq (%rdi), %rcx
; AVX2-NEXT: vmovq %rcx, %xmm1
; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; AVX2-NEXT: shrq $48, %rcx
; AVX2-NEXT: movb %cl, 6(%rdi)
; AVX2-NEXT: shrl $16, %eax
; AVX2-NEXT: movw %ax, 4(%rdi)
; AVX2-NEXT: vmovd %xmm0, (%rdi)
; AVX2-NEXT: retq
;
; AVX512-LABEL: insert_v7i8_v2i16_2:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; AVX512-NEXT: vpextrb $6, %xmm1, 6(%rdi)
; AVX512-NEXT: vpextrw $1, %xmm0, 4(%rdi)
; AVX512-NEXT: vmovd %xmm2, (%rdi)
; AVX512-NEXT: movl (%rsi), %eax
; AVX512-NEXT: vmovd %eax, %xmm0
; AVX512-NEXT: movq (%rdi), %rcx
; AVX512-NEXT: vmovq %rcx, %xmm1
; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; AVX512-NEXT: shrq $48, %rcx
; AVX512-NEXT: movb %cl, 6(%rdi)
; AVX512-NEXT: shrl $16, %eax
; AVX512-NEXT: movw %ax, 4(%rdi)
; AVX512-NEXT: vmovd %xmm0, (%rdi)
; AVX512-NEXT: retq
;
; XOP-LABEL: insert_v7i8_v2i16_2:
; XOP: # %bb.0:
; XOP-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; XOP-NEXT: vpextrb $6, %xmm1, 6(%rdi)
; XOP-NEXT: movl (%rsi), %eax
; XOP-NEXT: vmovd %eax, %xmm0
; XOP-NEXT: movq (%rdi), %rcx
; XOP-NEXT: vmovq %rcx, %xmm1
; XOP-NEXT: insertq {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,1,2,3],xmm1[6,7,u,u,u,u,u,u,u,u]
; XOP-NEXT: vpextrw $1, %xmm0, 4(%rdi)
; XOP-NEXT: shrq $48, %rcx
; XOP-NEXT: movb %cl, 6(%rdi)
; XOP-NEXT: shrl $16, %eax
; XOP-NEXT: movw %ax, 4(%rdi)
; XOP-NEXT: vmovd %xmm1, (%rdi)
; XOP-NEXT: retq
%1 = load <2 x i16>, <2 x i16> *%a1

View File

@ -13,19 +13,21 @@ define void @vectorDiv (<2 x i32> addrspace(1)* %nsource, <2 x i32> addrspace(1)
; CHECK-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movslq -{{[0-9]+}}(%rsp), %rcx
; CHECK-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; CHECK-NEXT: pextrd $1, %xmm0, %eax
; CHECK-NEXT: pextrd $1, %xmm1, %esi
; CHECK-NEXT: movq (%rdi,%rcx,8), %rdi
; CHECK-NEXT: movq (%rsi,%rcx,8), %r10
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: shrq $32, %rax
; CHECK-NEXT: movq %r10, %rsi
; CHECK-NEXT: shrq $32, %rsi
; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-NEXT: cltd
; CHECK-NEXT: idivl %esi
; CHECK-NEXT: movl %eax, %esi
; CHECK-NEXT: movd %xmm0, %eax
; CHECK-NEXT: movd %xmm1, %edi
; CHECK-NEXT: movl %eax, %r9d
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: cltd
; CHECK-NEXT: idivl %edi
; CHECK-NEXT: idivl %r10d
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: pinsrd $1, %esi, %xmm0
; CHECK-NEXT: pinsrd $1, %r9d, %xmm0
; CHECK-NEXT: movq %xmm0, (%r8,%rcx,8)
; CHECK-NEXT: retq
entry:

View File

@ -324,10 +324,11 @@ define void @test_udiv_v2i32(<2 x i32>* %x, <2 x i32>* %y, <2 x i32>* %z) nounwi
; X64-LABEL: test_udiv_v2i32:
; X64: # %bb.0:
; X64-NEXT: movq %rdx, %rcx
; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; X64-NEXT: movd %xmm0, %eax
; X64-NEXT: movd %xmm1, %esi
; X64-NEXT: movq (%rdi), %rax
; X64-NEXT: movq %rax, %xmm0
; X64-NEXT: movq (%rsi), %rsi
; X64-NEXT: movq %rsi, %xmm1
; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: divl %esi
; X64-NEXT: movd %eax, %xmm2
@ -377,10 +378,11 @@ define void @test_urem_v2i32(<2 x i32>* %x, <2 x i32>* %y, <2 x i32>* %z) nounwi
; X64-LABEL: test_urem_v2i32:
; X64: # %bb.0:
; X64-NEXT: movq %rdx, %rcx
; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; X64-NEXT: movd %xmm0, %eax
; X64-NEXT: movd %xmm1, %esi
; X64-NEXT: movq (%rdi), %rax
; X64-NEXT: movq %rax, %xmm0
; X64-NEXT: movq (%rsi), %rsi
; X64-NEXT: movq %rsi, %xmm1
; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: divl %esi
; X64-NEXT: movd %edx, %xmm2
@ -430,10 +432,11 @@ define void @test_sdiv_v2i32(<2 x i32>* %x, <2 x i32>* %y, <2 x i32>* %z) nounwi
; X64-LABEL: test_sdiv_v2i32:
; X64: # %bb.0:
; X64-NEXT: movq %rdx, %rcx
; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; X64-NEXT: movd %xmm0, %eax
; X64-NEXT: movd %xmm1, %esi
; X64-NEXT: movq (%rdi), %rax
; X64-NEXT: movq %rax, %xmm0
; X64-NEXT: movq (%rsi), %rsi
; X64-NEXT: movq %rsi, %xmm1
; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: cltd
; X64-NEXT: idivl %esi
; X64-NEXT: movd %eax, %xmm2
@ -488,10 +491,11 @@ define void @test_srem_v2i32(<2 x i32>* %x, <2 x i32>* %y, <2 x i32>* %z) nounwi
; X64-LABEL: test_srem_v2i32:
; X64: # %bb.0:
; X64-NEXT: movq %rdx, %rcx
; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; X64-NEXT: movd %xmm0, %eax
; X64-NEXT: movd %xmm1, %esi
; X64-NEXT: movq (%rdi), %rax
; X64-NEXT: movq %rax, %xmm0
; X64-NEXT: movq (%rsi), %rsi
; X64-NEXT: movq %rsi, %xmm1
; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: cltd
; X64-NEXT: idivl %esi
; X64-NEXT: movd %eax, %xmm2