From 16ef94f4e8c1d980cd19201fe5ede97262ef7af9 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Sat, 22 Apr 2006 06:21:46 +0000 Subject: [PATCH] Fix a performance regression. Use {p}shuf* when there are only two distinct elements in a build_vector. llvm-svn: 27945 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 30 ++++++++++++++----------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1eaa51e00f70..248393b89933 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3274,22 +3274,26 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask); } - // Expand into a number of unpckl*. - // e.g. for v4f32 - // Step 1: unpcklps 0, 2 ==> X: - // : unpcklps 1, 3 ==> Y: - // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> - SDOperand UnpckMask = getUnpacklMask(NumElems, DAG); - for (unsigned i = 0; i < NumElems; ++i) - V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); - NumElems >>= 1; - while (NumElems != 0) { + if (Values.size() > 2) { + // Expand into a number of unpckl*. + // e.g. for v4f32 + // Step 1: unpcklps 0, 2 ==> X: + // : unpcklps 1, 3 ==> Y: + // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> + SDOperand UnpckMask = getUnpacklMask(NumElems, DAG); for (unsigned i = 0; i < NumElems; ++i) - V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems], - UnpckMask); + V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); NumElems >>= 1; + while (NumElems != 0) { + for (unsigned i = 0; i < NumElems; ++i) + V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems], + UnpckMask); + NumElems >>= 1; + } + return V[0]; } - return V[0]; + + return SDOperand(); } case ISD::EXTRACT_VECTOR_ELT: { if (!isa(Op.getOperand(1)))