diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 769fc7262499..979f79478b05 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6982,13 +6982,14 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // (vextract (scalar_to_vector val, 0) -> val SDValue InVec = N->getOperand(0); + EVT VT = InVec.getValueType(); + EVT NVT = N->getValueType(0); if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { // Check if the result type doesn't match the inserted element type. A // SCALAR_TO_VECTOR may truncate the inserted element and the // EXTRACT_VECTOR_ELT may widen the extracted vector. SDValue InOp = InVec.getOperand(0); - EVT NVT = N->getValueType(0); if (InOp.getValueType() != NVT) { assert(InOp.getValueType().isInteger() && NVT.isInteger()); return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT); @@ -6996,6 +6997,38 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { return InOp; } + SDValue EltNo = N->getOperand(1); + bool ConstEltNo = isa(EltNo); + + // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT. + // We only perform this optimization before the op legalization phase because + // we may introduce new vector instructions which are not backed by TD patterns. + // For example on AVX, extracting elements from a wide vector without using + // extract_subvector. + if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE + && ConstEltNo && !LegalOperations) { + int Elt = cast(EltNo)->getZExtValue(); + int NumElem = VT.getVectorNumElements(); + ShuffleVectorSDNode *SVOp = cast(InVec); + // Find the new index to extract from. + int OrigElt = SVOp->getMaskElt(Elt); + + // Extracting an undef index is undef. + if (OrigElt == -1) + return DAG.getUNDEF(NVT); + + // Select the right vector half to extract from. + if (OrigElt < NumElem) { + InVec = InVec->getOperand(0); + } else { + InVec = InVec->getOperand(1); + OrigElt -= NumElem; + } + + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), NVT, + InVec, DAG.getConstant(OrigElt, MVT::i32)); + } + // Perform only after legalization to ensure build_vector / vector_shuffle // optimizations have already been done. if (!LegalOperations) return SDValue(); @@ -7003,13 +7036,11 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size) // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size) // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr) - SDValue EltNo = N->getOperand(1); - if (isa(EltNo)) { + if (ConstEltNo) { int Elt = cast(EltNo)->getZExtValue(); bool NewLoad = false; bool BCNumEltsChanged = false; - EVT VT = InVec.getValueType(); EVT ExtVT = VT.getVectorElementType(); EVT LVT = ExtVT; diff --git a/llvm/test/CodeGen/CellSPU/rotate_ops.ll b/llvm/test/CodeGen/CellSPU/rotate_ops.ll index b1219e6f56e5..8b7af20b4a98 100644 --- a/llvm/test/CodeGen/CellSPU/rotate_ops.ll +++ b/llvm/test/CodeGen/CellSPU/rotate_ops.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=cellspu -o %t1.s -; RUN: grep rot %t1.s | count 86 +; RUN: grep rot %t1.s | count 85 ; RUN: grep roth %t1.s | count 8 ; RUN: grep roti.*5 %t1.s | count 1 ; RUN: grep roti.*27 %t1.s | count 1 @@ -163,7 +163,7 @@ define i8 @rotri8(i8 %A) { define <2 x float> @test1(<4 x float> %param ) { ; CHECK: test1 -; CHECK: rotqbyi +; CHECK: shufb %el = extractelement <4 x float> %param, i32 1 %vec1 = insertelement <1 x float> undef, float %el, i32 0 %rv = shufflevector <1 x float> %vec1, <1 x float> undef, <2 x i32> diff --git a/llvm/test/CodeGen/X86/avx-shuffle.ll b/llvm/test/CodeGen/X86/avx-shuffle.ll index 232fc7851205..ee038ce21ee5 100644 --- a/llvm/test/CodeGen/X86/avx-shuffle.ll +++ b/llvm/test/CodeGen/X86/avx-shuffle.ll @@ -12,11 +12,11 @@ define <4 x float> @test1(<4 x float> %a) nounwind { ; rdar://10538417 define <3 x i64> @test2(<2 x i64> %v) nounwind readnone { ; CHECK: test2: -; CHECK: vxorpd -; CHECK: vperm2f128 +; CHECK: vinsertf128 %1 = shufflevector <2 x i64> %v, <2 x i64> %v, <3 x i32> %2 = shufflevector <3 x i64> zeroinitializer, <3 x i64> %1, <3 x i32> ret <3 x i64> %2 +; CHECK: ret } define <4 x i64> @test3(<4 x i64> %a, <4 x i64> %b) nounwind { @@ -24,6 +24,7 @@ define <4 x i64> @test3(<4 x i64> %a, <4 x i64> %b) nounwind { ret <4 x i64> %c ; CHECK: test3: ; CHECK: vperm2f128 +; CHECK: ret } define <8 x float> @test4(float %a) nounwind { @@ -75,3 +76,23 @@ entry: ; CHECK: ret ret void } + +; Extract a value from a shufflevector.. +define i32 @test9(<4 x i32> %a) nounwind { +; CHECK: test9 +; CHECK: vpextrd + %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> + %r = extractelement <8 x i32> %b, i32 2 +; CHECK: ret + ret i32 %r +} + +; Extract a value which is the result of an undef mask. +define i32 @test10(<4 x i32> %a) nounwind { +; CHECK: @test10 +; CHECK-NEXT: # +; CHECK-NEXT: ret + %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> + %r = extractelement <8 x i32> %b, i32 2 + ret i32 %r +} diff --git a/llvm/test/CodeGen/X86/widen_shuffle-1.ll b/llvm/test/CodeGen/X86/widen_shuffle-1.ll index 24608d03bf10..7bebb274f6ec 100644 --- a/llvm/test/CodeGen/X86/widen_shuffle-1.ll +++ b/llvm/test/CodeGen/X86/widen_shuffle-1.ll @@ -10,6 +10,7 @@ entry: %val = fadd <3 x float> %x, %src2 store <3 x float> %val, <3 x float>* %dst.addr ret void +; CHECK: ret } @@ -23,6 +24,7 @@ entry: %val = fadd <3 x float> %x, %src2 store <3 x float> %val, <3 x float>* %dst.addr ret void +; CHECK: ret } ; Example of when widening a v3float operation causes the DAG to replace a node @@ -31,7 +33,7 @@ entry: define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) nounwind { entry: ; CHECK: shuf3: -; CHECK: pshufd +; CHECK: shufps %shuffle.i.i.i12 = shufflevector <4 x float> %tmp10, <4 x float> %vecinit15, <4 x i32> %tmp25.i.i = shufflevector <4 x float> %shuffle.i.i.i12, <4 x float> undef, <3 x i32> %tmp1.i.i = shufflevector <3 x float> %tmp25.i.i, <3 x float> zeroinitializer, <4 x i32> @@ -45,6 +47,7 @@ entry: %shuffle.i.i.i21 = shufflevector <4 x float> %tmp2.i18, <4 x float> undef, <4 x i32> store <4 x float> %shuffle.i.i.i21, <4 x float>* %dst ret void +; CHECK: ret } ; PR10421: make sure we correctly handle extreme widening with CONCAT_VECTORS @@ -53,6 +56,7 @@ define <8 x i8> @shuf4(<4 x i8> %a, <4 x i8> %b) nounwind readnone { ; CHECK-NOT: punpckldq %vshuf = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> ret <8 x i8> %vshuf +; CHECK: ret } ; PR11389: another CONCAT_VECTORS case @@ -61,4 +65,5 @@ define void @shuf5(<8 x i8>* %p) nounwind { %v = shufflevector <2 x i8> , <2 x i8> undef, <8 x i32> store <8 x i8> %v, <8 x i8>* %p, align 8 ret void +; CHECK: ret }