diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index cebb4e20e59f..54613776a3e4 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -37126,6 +37126,28 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG, } } + // Handle extract(scalar_to_vector(scalar_value)) for integers. + // TODO: Move to DAGCombine? + if (SrcBC.getOpcode() == ISD::SCALAR_TO_VECTOR && VT.isInteger() && + SrcBC.getValueType().isInteger() && + (SrcBC.getScalarValueSizeInBits() % SrcSVT.getSizeInBits()) == 0 && + SrcBC.getScalarValueSizeInBits() == + SrcBC.getOperand(0).getValueSizeInBits()) { + unsigned Scale = SrcBC.getScalarValueSizeInBits() / SrcSVT.getSizeInBits(); + if (IdxC.ult(Scale)) { + unsigned Offset = IdxC.getZExtValue() * SrcVT.getScalarSizeInBits(); + SDValue Scl = SrcBC.getOperand(0); + EVT SclVT = Scl.getValueType(); + if (Offset) { + Scl = DAG.getNode(ISD::SRL, dl, SclVT, Scl, + DAG.getShiftAmountConstant(Offset, SclVT, dl)); + } + Scl = DAG.getZExtOrTrunc(Scl, dl, SrcVT.getScalarType()); + Scl = DAG.getZExtOrTrunc(Scl, dl, VT); + return Scl; + } + } + // Handle extract(truncate(x)) for 0'th index. // TODO: Treat this as a faux shuffle? // TODO: When can we use this for general indices? diff --git a/llvm/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll b/llvm/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll index d181af7c75e7..2fd61879c163 100644 --- a/llvm/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll +++ b/llvm/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll @@ -17,18 +17,18 @@ target triple = "x86_64-unknown-linux-gnu" define i32 @main() nounwind uwtable { ; CHECK-LABEL: main: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: pextrb $1, %xmm0, %ecx -; CHECK-NEXT: movq {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: pextrb $1, %xmm1, %eax +; CHECK-NEXT: movq {{.*}}(%rip), %rsi +; CHECK-NEXT: movq {{.*}}(%rip), %rax +; CHECK-NEXT: movq %rsi, %rdx +; CHECK-NEXT: shrq $8, %rdx +; CHECK-NEXT: movsbl %al, %ecx +; CHECK-NEXT: shrq $8, %rax ; CHECK-NEXT: cbtw -; CHECK-NEXT: pextrb $0, %xmm0, %edx -; CHECK-NEXT: pextrb $0, %xmm1, %esi -; CHECK-NEXT: idivb %cl -; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: movsbl %sil, %eax ; CHECK-NEXT: idivb %dl -; CHECK-NEXT: movzbl %cl, %ecx +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: idivb %sil +; CHECK-NEXT: movzbl %dl, %ecx ; CHECK-NEXT: movzbl %al, %eax ; CHECK-NEXT: movd %eax, %xmm0 ; CHECK-NEXT: pinsrb $1, %ecx, %xmm0 diff --git a/llvm/test/CodeGen/X86/bitcast-vector-bool.ll b/llvm/test/CodeGen/X86/bitcast-vector-bool.ll index 083d0c6f3d16..8dde976912b0 100644 --- a/llvm/test/CodeGen/X86/bitcast-vector-bool.ll +++ b/llvm/test/CodeGen/X86/bitcast-vector-bool.ll @@ -61,14 +61,10 @@ define i2 @bitcast_v4i32_to_v2i2(<4 x i32> %a0) nounwind { ; ; AVX12-LABEL: bitcast_v4i32_to_v2i2: ; AVX12: # %bb.0: -; AVX12-NEXT: vmovmskps %xmm0, %eax -; AVX12-NEXT: movl %eax, %ecx -; AVX12-NEXT: shrl $2, %ecx -; AVX12-NEXT: vmovd %ecx, %xmm0 -; AVX12-NEXT: andl $3, %eax -; AVX12-NEXT: vmovd %eax, %xmm1 -; AVX12-NEXT: vpextrb $0, %xmm1, %ecx -; AVX12-NEXT: vpextrb $0, %xmm0, %eax +; AVX12-NEXT: vmovmskps %xmm0, %ecx +; AVX12-NEXT: movl %ecx, %eax +; AVX12-NEXT: shrl $2, %eax +; AVX12-NEXT: andl $3, %ecx ; AVX12-NEXT: addb %cl, %al ; AVX12-NEXT: # kill: def $al killed $al killed $eax ; AVX12-NEXT: retq @@ -77,15 +73,13 @@ define i2 @bitcast_v4i32_to_v2i2(<4 x i32> %a0) nounwind { ; AVX512: # %bb.0: ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512-NEXT: vpcmpgtd %xmm0, %xmm1, %k0 -; AVX512-NEXT: kmovd %k0, %eax -; AVX512-NEXT: movzbl %al, %ecx -; AVX512-NEXT: shrl $2, %ecx -; AVX512-NEXT: andl $3, %ecx -; AVX512-NEXT: vmovd %ecx, %xmm0 +; AVX512-NEXT: kmovd %k0, %ecx +; AVX512-NEXT: movzbl %cl, %eax +; AVX512-NEXT: shrl $2, %eax ; AVX512-NEXT: andl $3, %eax -; AVX512-NEXT: vmovd %eax, %xmm1 -; AVX512-NEXT: vpextrb $0, %xmm1, %ecx -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vpbroadcastq %rax, %xmm0 +; AVX512-NEXT: andl $3, %ecx +; AVX512-NEXT: vpextrb $8, %xmm0, %eax ; AVX512-NEXT: addb %cl, %al ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq @@ -116,14 +110,10 @@ define i4 @bitcast_v8i16_to_v2i4(<8 x i16> %a0) nounwind { ; AVX12-LABEL: bitcast_v8i16_to_v2i4: ; AVX12: # %bb.0: ; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 -; AVX12-NEXT: vpmovmskb %xmm0, %eax -; AVX12-NEXT: movzbl %al, %ecx -; AVX12-NEXT: shrl $4, %ecx -; AVX12-NEXT: vmovd %ecx, %xmm0 -; AVX12-NEXT: andl $15, %eax -; AVX12-NEXT: vmovd %eax, %xmm1 -; AVX12-NEXT: vpextrb $0, %xmm1, %ecx -; AVX12-NEXT: vpextrb $0, %xmm0, %eax +; AVX12-NEXT: vpmovmskb %xmm0, %ecx +; AVX12-NEXT: movzbl %cl, %eax +; AVX12-NEXT: shrl $4, %eax +; AVX12-NEXT: andl $15, %ecx ; AVX12-NEXT: addb %cl, %al ; AVX12-NEXT: # kill: def $al killed $al killed $eax ; AVX12-NEXT: retq @@ -131,14 +121,12 @@ define i4 @bitcast_v8i16_to_v2i4(<8 x i16> %a0) nounwind { ; AVX512-LABEL: bitcast_v8i16_to_v2i4: ; AVX512: # %bb.0: ; AVX512-NEXT: vpmovw2m %xmm0, %k0 -; AVX512-NEXT: kmovd %k0, %eax -; AVX512-NEXT: movzbl %al, %ecx -; AVX512-NEXT: shrl $4, %ecx -; AVX512-NEXT: vmovd %ecx, %xmm0 -; AVX512-NEXT: andl $15, %eax -; AVX512-NEXT: vmovd %eax, %xmm1 -; AVX512-NEXT: vpextrb $0, %xmm1, %ecx -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: kmovd %k0, %ecx +; AVX512-NEXT: movzbl %cl, %eax +; AVX512-NEXT: shrl $4, %eax +; AVX512-NEXT: vpbroadcastq %rax, %xmm0 +; AVX512-NEXT: andl $15, %ecx +; AVX512-NEXT: vpextrb $8, %xmm0, %eax ; AVX512-NEXT: addb %cl, %al ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq @@ -162,10 +150,9 @@ define i8 @bitcast_v16i8_to_v2i8(<16 x i8> %a0) nounwind { ; ; AVX12-LABEL: bitcast_v16i8_to_v2i8: ; AVX12: # %bb.0: -; AVX12-NEXT: vpmovmskb %xmm0, %eax -; AVX12-NEXT: vmovd %eax, %xmm0 -; AVX12-NEXT: vpextrb $0, %xmm0, %ecx -; AVX12-NEXT: vpextrb $1, %xmm0, %eax +; AVX12-NEXT: vpmovmskb %xmm0, %ecx +; AVX12-NEXT: movl %ecx, %eax +; AVX12-NEXT: shrl $8, %eax ; AVX12-NEXT: addb %cl, %al ; AVX12-NEXT: # kill: def $al killed $al killed $eax ; AVX12-NEXT: retq @@ -210,14 +197,10 @@ define i2 @bitcast_v4i64_to_v2i2(<4 x i64> %a0) nounwind { ; ; AVX12-LABEL: bitcast_v4i64_to_v2i2: ; AVX12: # %bb.0: -; AVX12-NEXT: vmovmskpd %ymm0, %eax -; AVX12-NEXT: movl %eax, %ecx -; AVX12-NEXT: shrl $2, %ecx -; AVX12-NEXT: vmovd %ecx, %xmm0 -; AVX12-NEXT: andl $3, %eax -; AVX12-NEXT: vmovd %eax, %xmm1 -; AVX12-NEXT: vpextrb $0, %xmm1, %ecx -; AVX12-NEXT: vpextrb $0, %xmm0, %eax +; AVX12-NEXT: vmovmskpd %ymm0, %ecx +; AVX12-NEXT: movl %ecx, %eax +; AVX12-NEXT: shrl $2, %eax +; AVX12-NEXT: andl $3, %ecx ; AVX12-NEXT: addb %cl, %al ; AVX12-NEXT: # kill: def $al killed $al killed $eax ; AVX12-NEXT: vzeroupper @@ -227,15 +210,13 @@ define i2 @bitcast_v4i64_to_v2i2(<4 x i64> %a0) nounwind { ; AVX512: # %bb.0: ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512-NEXT: vpcmpgtq %ymm0, %ymm1, %k0 -; AVX512-NEXT: kmovd %k0, %eax -; AVX512-NEXT: movzbl %al, %ecx -; AVX512-NEXT: shrl $2, %ecx -; AVX512-NEXT: andl $3, %ecx -; AVX512-NEXT: vmovd %ecx, %xmm0 +; AVX512-NEXT: kmovd %k0, %ecx +; AVX512-NEXT: movzbl %cl, %eax +; AVX512-NEXT: shrl $2, %eax ; AVX512-NEXT: andl $3, %eax -; AVX512-NEXT: vmovd %eax, %xmm1 -; AVX512-NEXT: vpextrb $0, %xmm1, %ecx -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vpbroadcastq %rax, %xmm0 +; AVX512-NEXT: andl $3, %ecx +; AVX512-NEXT: vpextrb $8, %xmm0, %eax ; AVX512-NEXT: addb %cl, %al ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper @@ -267,14 +248,10 @@ define i4 @bitcast_v8i32_to_v2i4(<8 x i32> %a0) nounwind { ; ; AVX12-LABEL: bitcast_v8i32_to_v2i4: ; AVX12: # %bb.0: -; AVX12-NEXT: vmovmskps %ymm0, %eax -; AVX12-NEXT: movl %eax, %ecx -; AVX12-NEXT: shrl $4, %ecx -; AVX12-NEXT: vmovd %ecx, %xmm0 -; AVX12-NEXT: andl $15, %eax -; AVX12-NEXT: vmovd %eax, %xmm1 -; AVX12-NEXT: vpextrb $0, %xmm1, %ecx -; AVX12-NEXT: vpextrb $0, %xmm0, %eax +; AVX12-NEXT: vmovmskps %ymm0, %ecx +; AVX12-NEXT: movl %ecx, %eax +; AVX12-NEXT: shrl $4, %eax +; AVX12-NEXT: andl $15, %ecx ; AVX12-NEXT: addb %cl, %al ; AVX12-NEXT: # kill: def $al killed $al killed $eax ; AVX12-NEXT: vzeroupper @@ -284,14 +261,12 @@ define i4 @bitcast_v8i32_to_v2i4(<8 x i32> %a0) nounwind { ; AVX512: # %bb.0: ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 -; AVX512-NEXT: kmovd %k0, %eax -; AVX512-NEXT: movzbl %al, %ecx -; AVX512-NEXT: shrl $4, %ecx -; AVX512-NEXT: vmovd %ecx, %xmm0 -; AVX512-NEXT: andl $15, %eax -; AVX512-NEXT: vmovd %eax, %xmm1 -; AVX512-NEXT: vpextrb $0, %xmm1, %ecx -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: kmovd %k0, %ecx +; AVX512-NEXT: movzbl %cl, %eax +; AVX512-NEXT: shrl $4, %eax +; AVX512-NEXT: vpbroadcastq %rax, %xmm0 +; AVX512-NEXT: andl $15, %ecx +; AVX512-NEXT: vpextrb $8, %xmm0, %eax ; AVX512-NEXT: addb %cl, %al ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper @@ -319,10 +294,9 @@ define i8 @bitcast_v16i16_to_v2i8(<16 x i16> %a0) nounwind { ; AVX1: # %bb.0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: vmovd %eax, %xmm0 -; AVX1-NEXT: vpextrb $0, %xmm0, %ecx -; AVX1-NEXT: vpextrb $1, %xmm0, %eax +; AVX1-NEXT: vpmovmskb %xmm0, %ecx +; AVX1-NEXT: movl %ecx, %eax +; AVX1-NEXT: shrl $8, %eax ; AVX1-NEXT: addb %cl, %al ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper @@ -334,10 +308,9 @@ define i8 @bitcast_v16i16_to_v2i8(<16 x i16> %a0) nounwind { ; AVX2-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpmovmskb %xmm0, %eax -; AVX2-NEXT: vmovd %eax, %xmm0 -; AVX2-NEXT: vpextrb $0, %xmm0, %ecx -; AVX2-NEXT: vpextrb $1, %xmm0, %eax +; AVX2-NEXT: vpmovmskb %xmm0, %ecx +; AVX2-NEXT: movl %ecx, %eax +; AVX2-NEXT: shrl $8, %eax ; AVX2-NEXT: addb %cl, %al ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper @@ -365,23 +338,17 @@ define i8 @bitcast_v16i16_to_v2i8(<16 x i16> %a0) nounwind { define i16 @bitcast_v32i8_to_v2i16(<32 x i8> %a0) nounwind { ; SSE2-SSSE3-LABEL: bitcast_v32i8_to_v2i16: ; SSE2-SSSE3: # %bb.0: -; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %ecx -; SSE2-SSSE3-NEXT: pmovmskb %xmm1, %eax -; SSE2-SSSE3-NEXT: shll $16, %eax -; SSE2-SSSE3-NEXT: movd %eax, %xmm0 -; SSE2-SSSE3-NEXT: pextrw $1, %xmm0, %eax +; SSE2-SSSE3-NEXT: pmovmskb %xmm1, %ecx +; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax ; SSE2-SSSE3-NEXT: addl %ecx, %eax ; SSE2-SSSE3-NEXT: # kill: def $ax killed $ax killed $eax ; SSE2-SSSE3-NEXT: retq ; ; AVX1-LABEL: bitcast_v32i8_to_v2i16: ; AVX1: # %bb.0: -; AVX1-NEXT: vpmovmskb %xmm0, %ecx -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpmovmskb %xmm1, %ecx ; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: shll $16, %eax -; AVX1-NEXT: vmovd %eax, %xmm0 -; AVX1-NEXT: vpextrw $1, %xmm0, %eax ; AVX1-NEXT: addl %ecx, %eax ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax ; AVX1-NEXT: vzeroupper @@ -390,8 +357,8 @@ define i16 @bitcast_v32i8_to_v2i16(<32 x i8> %a0) nounwind { ; AVX2-LABEL: bitcast_v32i8_to_v2i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vpmovmskb %ymm0, %ecx -; AVX2-NEXT: vmovd %ecx, %xmm0 -; AVX2-NEXT: vpextrw $1, %xmm0, %eax +; AVX2-NEXT: movl %ecx, %eax +; AVX2-NEXT: shrl $16, %eax ; AVX2-NEXT: addl %ecx, %eax ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax ; AVX2-NEXT: vzeroupper @@ -455,14 +422,10 @@ define i4 @bitcast_v8i64_to_v2i4(<8 x i64> %a0) nounwind { ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: vmovmskps %ymm0, %eax -; AVX1-NEXT: movl %eax, %ecx -; AVX1-NEXT: shrl $4, %ecx -; AVX1-NEXT: vmovd %ecx, %xmm0 -; AVX1-NEXT: andl $15, %eax -; AVX1-NEXT: vmovd %eax, %xmm1 -; AVX1-NEXT: vpextrb $0, %xmm1, %ecx -; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: vmovmskps %ymm0, %ecx +; AVX1-NEXT: movl %ecx, %eax +; AVX1-NEXT: shrl $4, %eax +; AVX1-NEXT: andl $15, %ecx ; AVX1-NEXT: addb %cl, %al ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper @@ -472,14 +435,10 @@ define i4 @bitcast_v8i64_to_v2i4(<8 x i64> %a0) nounwind { ; AVX2: # %bb.0: ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] -; AVX2-NEXT: vmovmskps %ymm0, %eax -; AVX2-NEXT: movl %eax, %ecx -; AVX2-NEXT: shrl $4, %ecx -; AVX2-NEXT: vmovd %ecx, %xmm0 -; AVX2-NEXT: andl $15, %eax -; AVX2-NEXT: vmovd %eax, %xmm1 -; AVX2-NEXT: vpextrb $0, %xmm1, %ecx -; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: vmovmskps %ymm0, %ecx +; AVX2-NEXT: movl %ecx, %eax +; AVX2-NEXT: shrl $4, %eax +; AVX2-NEXT: andl $15, %ecx ; AVX2-NEXT: addb %cl, %al ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper @@ -489,14 +448,12 @@ define i4 @bitcast_v8i64_to_v2i4(<8 x i64> %a0) nounwind { ; AVX512: # %bb.0: ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 -; AVX512-NEXT: kmovd %k0, %eax -; AVX512-NEXT: movzbl %al, %ecx -; AVX512-NEXT: shrl $4, %ecx -; AVX512-NEXT: vmovd %ecx, %xmm0 -; AVX512-NEXT: andl $15, %eax -; AVX512-NEXT: vmovd %eax, %xmm1 -; AVX512-NEXT: vpextrb $0, %xmm1, %ecx -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: kmovd %k0, %ecx +; AVX512-NEXT: movzbl %cl, %eax +; AVX512-NEXT: shrl $4, %eax +; AVX512-NEXT: vpbroadcastq %rax, %xmm0 +; AVX512-NEXT: andl $15, %ecx +; AVX512-NEXT: vpextrb $8, %xmm0, %eax ; AVX512-NEXT: addb %cl, %al ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper @@ -529,10 +486,9 @@ define i8 @bitcast_v16i32_to_v2i8(<16 x i32> %a0) nounwind { ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: vmovd %eax, %xmm0 -; AVX1-NEXT: vpextrb $0, %xmm0, %ecx -; AVX1-NEXT: vpextrb $1, %xmm0, %eax +; AVX1-NEXT: vpmovmskb %xmm0, %ecx +; AVX1-NEXT: movl %ecx, %eax +; AVX1-NEXT: shrl $8, %eax ; AVX1-NEXT: addb %cl, %al ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper @@ -547,10 +503,9 @@ define i8 @bitcast_v16i32_to_v2i8(<16 x i32> %a0) nounwind { ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpmovmskb %xmm0, %eax -; AVX2-NEXT: vmovd %eax, %xmm0 -; AVX2-NEXT: vpextrb $0, %xmm0, %ecx -; AVX2-NEXT: vpextrb $1, %xmm0, %eax +; AVX2-NEXT: vpmovmskb %xmm0, %ecx +; AVX2-NEXT: movl %ecx, %eax +; AVX2-NEXT: shrl $8, %eax ; AVX2-NEXT: addb %cl, %al ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper @@ -579,28 +534,22 @@ define i8 @bitcast_v16i32_to_v2i8(<16 x i32> %a0) nounwind { define i16 @bitcast_v32i16_to_v2i16(<32 x i16> %a0) nounwind { ; SSE2-SSSE3-LABEL: bitcast_v32i16_to_v2i16: ; SSE2-SSSE3: # %bb.0: -; SSE2-SSSE3-NEXT: packsswb %xmm1, %xmm0 -; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %ecx ; SSE2-SSSE3-NEXT: packsswb %xmm3, %xmm2 -; SSE2-SSSE3-NEXT: pmovmskb %xmm2, %eax -; SSE2-SSSE3-NEXT: shll $16, %eax -; SSE2-SSSE3-NEXT: movd %eax, %xmm0 -; SSE2-SSSE3-NEXT: pextrw $1, %xmm0, %eax +; SSE2-SSSE3-NEXT: pmovmskb %xmm2, %ecx +; SSE2-SSSE3-NEXT: packsswb %xmm1, %xmm0 +; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax ; SSE2-SSSE3-NEXT: addl %ecx, %eax ; SSE2-SSSE3-NEXT: # kill: def $ax killed $ax killed $eax ; SSE2-SSSE3-NEXT: retq ; ; AVX1-LABEL: bitcast_v32i16_to_v2i16: ; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %ecx -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 -; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpacksswb %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpmovmskb %xmm1, %ecx +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: shll $16, %eax -; AVX1-NEXT: vmovd %eax, %xmm0 -; AVX1-NEXT: vpextrw $1, %xmm0, %eax ; AVX1-NEXT: addl %ecx, %eax ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax ; AVX1-NEXT: vzeroupper @@ -611,8 +560,8 @@ define i16 @bitcast_v32i16_to_v2i16(<32 x i16> %a0) nounwind { ; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: vpmovmskb %ymm0, %ecx -; AVX2-NEXT: vmovd %ecx, %xmm0 -; AVX2-NEXT: vpextrw $1, %xmm0, %eax +; AVX2-NEXT: movl %ecx, %eax +; AVX2-NEXT: shrl $16, %eax ; AVX2-NEXT: addl %ecx, %eax ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax ; AVX2-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/oddsubvector.ll b/llvm/test/CodeGen/X86/oddsubvector.ll index e4cc9b84773b..674c79de4c73 100644 --- a/llvm/test/CodeGen/X86/oddsubvector.ll +++ b/llvm/test/CodeGen/X86/oddsubvector.ll @@ -9,65 +9,73 @@ ; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+xop | FileCheck %s --check-prefixes=AVX,XOP define void @insert_v7i8_v2i16_2(<7 x i8> *%a0, <2 x i16> *%a1) nounwind { -; SSE2-LABEL: insert_v7i8_v2i16_2: -; SSE2: # %bb.0: -; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero -; SSE2-NEXT: pextrw $3, %xmm1, %eax -; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; SSE2-NEXT: movd %xmm1, (%rdi) -; SSE2-NEXT: movb %al, 6(%rdi) -; SSE2-NEXT: pextrw $1, %xmm0, %eax -; SSE2-NEXT: movw %ax, 4(%rdi) -; SSE2-NEXT: retq -; -; SSE42-LABEL: insert_v7i8_v2i16_2: -; SSE42: # %bb.0: -; SSE42-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE42-NEXT: movq {{.*#+}} xmm1 = mem[0],zero -; SSE42-NEXT: pextrb $6, %xmm1, 6(%rdi) -; SSE42-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; SSE42-NEXT: pextrw $1, %xmm0, 4(%rdi) -; SSE42-NEXT: movd %xmm1, (%rdi) -; SSE42-NEXT: retq +; SSE-LABEL: insert_v7i8_v2i16_2: +; SSE: # %bb.0: +; SSE-NEXT: movl (%rsi), %eax +; SSE-NEXT: movd %eax, %xmm0 +; SSE-NEXT: movq (%rdi), %rcx +; SSE-NEXT: movq %rcx, %xmm1 +; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; SSE-NEXT: shrq $48, %rcx +; SSE-NEXT: movb %cl, 6(%rdi) +; SSE-NEXT: shrl $16, %eax +; SSE-NEXT: movw %ax, 4(%rdi) +; SSE-NEXT: movd %xmm1, (%rdi) +; SSE-NEXT: retq ; ; AVX1-LABEL: insert_v7i8_v2i16_2: ; AVX1: # %bb.0: -; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; AVX1-NEXT: vpextrb $6, %xmm1, 6(%rdi) -; AVX1-NEXT: vpextrw $1, %xmm0, 4(%rdi) -; AVX1-NEXT: vmovd %xmm2, (%rdi) +; AVX1-NEXT: movl (%rsi), %eax +; AVX1-NEXT: vmovd %eax, %xmm0 +; AVX1-NEXT: movq (%rdi), %rcx +; AVX1-NEXT: vmovq %rcx, %xmm1 +; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; AVX1-NEXT: shrq $48, %rcx +; AVX1-NEXT: movb %cl, 6(%rdi) +; AVX1-NEXT: shrl $16, %eax +; AVX1-NEXT: movw %ax, 4(%rdi) +; AVX1-NEXT: vmovd %xmm0, (%rdi) ; AVX1-NEXT: retq ; ; AVX2-LABEL: insert_v7i8_v2i16_2: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; AVX2-NEXT: vpextrb $6, %xmm1, 6(%rdi) -; AVX2-NEXT: vpextrw $1, %xmm0, 4(%rdi) -; AVX2-NEXT: vmovd %xmm2, (%rdi) +; AVX2-NEXT: movl (%rsi), %eax +; AVX2-NEXT: vmovd %eax, %xmm0 +; AVX2-NEXT: movq (%rdi), %rcx +; AVX2-NEXT: vmovq %rcx, %xmm1 +; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; AVX2-NEXT: shrq $48, %rcx +; AVX2-NEXT: movb %cl, 6(%rdi) +; AVX2-NEXT: shrl $16, %eax +; AVX2-NEXT: movw %ax, 4(%rdi) +; AVX2-NEXT: vmovd %xmm0, (%rdi) ; AVX2-NEXT: retq ; ; AVX512-LABEL: insert_v7i8_v2i16_2: ; AVX512: # %bb.0: -; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX512-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; AVX512-NEXT: vpextrb $6, %xmm1, 6(%rdi) -; AVX512-NEXT: vpextrw $1, %xmm0, 4(%rdi) -; AVX512-NEXT: vmovd %xmm2, (%rdi) +; AVX512-NEXT: movl (%rsi), %eax +; AVX512-NEXT: vmovd %eax, %xmm0 +; AVX512-NEXT: movq (%rdi), %rcx +; AVX512-NEXT: vmovq %rcx, %xmm1 +; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; AVX512-NEXT: shrq $48, %rcx +; AVX512-NEXT: movb %cl, 6(%rdi) +; AVX512-NEXT: shrl $16, %eax +; AVX512-NEXT: movw %ax, 4(%rdi) +; AVX512-NEXT: vmovd %xmm0, (%rdi) ; AVX512-NEXT: retq ; ; XOP-LABEL: insert_v7i8_v2i16_2: ; XOP: # %bb.0: -; XOP-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; XOP-NEXT: vpextrb $6, %xmm1, 6(%rdi) +; XOP-NEXT: movl (%rsi), %eax +; XOP-NEXT: vmovd %eax, %xmm0 +; XOP-NEXT: movq (%rdi), %rcx +; XOP-NEXT: vmovq %rcx, %xmm1 ; XOP-NEXT: insertq {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,1,2,3],xmm1[6,7,u,u,u,u,u,u,u,u] -; XOP-NEXT: vpextrw $1, %xmm0, 4(%rdi) +; XOP-NEXT: shrq $48, %rcx +; XOP-NEXT: movb %cl, 6(%rdi) +; XOP-NEXT: shrl $16, %eax +; XOP-NEXT: movw %ax, 4(%rdi) ; XOP-NEXT: vmovd %xmm1, (%rdi) ; XOP-NEXT: retq %1 = load <2 x i16>, <2 x i16> *%a1 diff --git a/llvm/test/CodeGen/X86/scalar_widen_div.ll b/llvm/test/CodeGen/X86/scalar_widen_div.ll index c6deb686e961..f658df20990b 100644 --- a/llvm/test/CodeGen/X86/scalar_widen_div.ll +++ b/llvm/test/CodeGen/X86/scalar_widen_div.ll @@ -13,19 +13,21 @@ define void @vectorDiv (<2 x i32> addrspace(1)* %nsource, <2 x i32> addrspace(1) ; CHECK-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movslq -{{[0-9]+}}(%rsp), %rcx -; CHECK-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: movq {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: pextrd $1, %xmm0, %eax -; CHECK-NEXT: pextrd $1, %xmm1, %esi +; CHECK-NEXT: movq (%rdi,%rcx,8), %rdi +; CHECK-NEXT: movq (%rsi,%rcx,8), %r10 +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: shrq $32, %rax +; CHECK-NEXT: movq %r10, %rsi +; CHECK-NEXT: shrq $32, %rsi +; CHECK-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-NEXT: cltd ; CHECK-NEXT: idivl %esi -; CHECK-NEXT: movl %eax, %esi -; CHECK-NEXT: movd %xmm0, %eax -; CHECK-NEXT: movd %xmm1, %edi +; CHECK-NEXT: movl %eax, %r9d +; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: cltd -; CHECK-NEXT: idivl %edi +; CHECK-NEXT: idivl %r10d ; CHECK-NEXT: movd %eax, %xmm0 -; CHECK-NEXT: pinsrd $1, %esi, %xmm0 +; CHECK-NEXT: pinsrd $1, %r9d, %xmm0 ; CHECK-NEXT: movq %xmm0, (%r8,%rcx,8) ; CHECK-NEXT: retq entry: diff --git a/llvm/test/CodeGen/X86/vector-idiv-v2i32.ll b/llvm/test/CodeGen/X86/vector-idiv-v2i32.ll index 6e6dd6982993..b3accf80744f 100644 --- a/llvm/test/CodeGen/X86/vector-idiv-v2i32.ll +++ b/llvm/test/CodeGen/X86/vector-idiv-v2i32.ll @@ -324,10 +324,11 @@ define void @test_udiv_v2i32(<2 x i32>* %x, <2 x i32>* %y, <2 x i32>* %z) nounwi ; X64-LABEL: test_udiv_v2i32: ; X64: # %bb.0: ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero -; X64-NEXT: movd %xmm0, %eax -; X64-NEXT: movd %xmm1, %esi +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq %rax, %xmm0 +; X64-NEXT: movq (%rsi), %rsi +; X64-NEXT: movq %rsi, %xmm1 +; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: divl %esi ; X64-NEXT: movd %eax, %xmm2 @@ -377,10 +378,11 @@ define void @test_urem_v2i32(<2 x i32>* %x, <2 x i32>* %y, <2 x i32>* %z) nounwi ; X64-LABEL: test_urem_v2i32: ; X64: # %bb.0: ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero -; X64-NEXT: movd %xmm0, %eax -; X64-NEXT: movd %xmm1, %esi +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq %rax, %xmm0 +; X64-NEXT: movq (%rsi), %rsi +; X64-NEXT: movq %rsi, %xmm1 +; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: divl %esi ; X64-NEXT: movd %edx, %xmm2 @@ -430,10 +432,11 @@ define void @test_sdiv_v2i32(<2 x i32>* %x, <2 x i32>* %y, <2 x i32>* %z) nounwi ; X64-LABEL: test_sdiv_v2i32: ; X64: # %bb.0: ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero -; X64-NEXT: movd %xmm0, %eax -; X64-NEXT: movd %xmm1, %esi +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq %rax, %xmm0 +; X64-NEXT: movq (%rsi), %rsi +; X64-NEXT: movq %rsi, %xmm1 +; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: cltd ; X64-NEXT: idivl %esi ; X64-NEXT: movd %eax, %xmm2 @@ -488,10 +491,11 @@ define void @test_srem_v2i32(<2 x i32>* %x, <2 x i32>* %y, <2 x i32>* %z) nounwi ; X64-LABEL: test_srem_v2i32: ; X64: # %bb.0: ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero -; X64-NEXT: movd %xmm0, %eax -; X64-NEXT: movd %xmm1, %esi +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq %rax, %xmm0 +; X64-NEXT: movq (%rsi), %rsi +; X64-NEXT: movq %rsi, %xmm1 +; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: cltd ; X64-NEXT: idivl %esi ; X64-NEXT: movd %eax, %xmm2