From 6652e22bad8962b79430108a7fed10f83a59b3a3 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Wed, 19 Oct 2011 09:45:11 +0000 Subject: [PATCH] Add support for the vector-widening of vselect and vector-setcc llvm-svn: 142488 --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 28 +++++++- .../CodeGen/X86/2011-10-19-widen_vselect.ll | 68 +++++++++++++++++++ 3 files changed, 96 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/X86/2011-10-19-widen_vselect.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index abacdac686bc..0a8db0897b5b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -633,6 +633,7 @@ private: SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue WidenVecOp_STORE(SDNode* N); + SDValue WidenVecOp_SETCC(SDNode* N, unsigned ResNo); SDValue WidenVecOp_Convert(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 107a42b2951c..7fc7525a5b90 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1239,6 +1239,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break; case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break; + case ISD::VSELECT: case ISD::SELECT: Res = WidenVecRes_SELECT(N); break; case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break; case ISD::SETCC: Res = WidenVecRes_SETCC(N); break; @@ -1928,7 +1929,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) { SDValue InOp1 = GetWidenedVector(N->getOperand(1)); SDValue InOp2 = GetWidenedVector(N->getOperand(2)); assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT); - return DAG.getNode(ISD::SELECT, N->getDebugLoc(), + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, Cond1, InOp1, InOp2); } @@ -2032,6 +2033,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) { case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break; case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::STORE: Res = WidenVecOp_STORE(N); break; + case ISD::SETCC: Res = WidenVecOp_SETCC(N, ResNo); break; case ISD::FP_EXTEND: case ISD::FP_TO_SINT: @@ -2165,6 +2167,30 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { MVT::Other,&StChain[0],StChain.size()); } +SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N, unsigned ResNo) { + assert(ResNo < 2 && "Invalid res num to widen"); + SDValue InOp0 = GetWidenedVector(N->getOperand(0)); + SDValue InOp1 = GetWidenedVector(N->getOperand(1)); + EVT VT = InOp0.getValueType(); + DebugLoc dl = N->getDebugLoc(); + + // WARNING: In this code we widen the compare instruction with garbage. + // This garbage may contain denormal floats which may be slow. Is this a real + // concern ? Should we zero the unused lanes if this is a float compare ? + + SDValue Zero = DAG.getIntPtrConstant(0); + EVT ResVT = EVT::getVectorVT(*DAG.getContext(), + N->getValueType(0).getVectorElementType(), + VT.getVectorNumElements()); + + SDValue WideSETCC = DAG.getNode(ISD::SETCC, N->getDebugLoc(), + ResVT, InOp0, InOp1, N->getOperand(2)); + + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, N->getValueType(0), + WideSETCC, Zero); +} + + //===----------------------------------------------------------------------===// // Vector Widening Utilities //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/X86/2011-10-19-widen_vselect.ll b/llvm/test/CodeGen/X86/2011-10-19-widen_vselect.ll new file mode 100644 index 000000000000..e08c5b28c5ec --- /dev/null +++ b/llvm/test/CodeGen/X86/2011-10-19-widen_vselect.ll @@ -0,0 +1,68 @@ +; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s + +target triple = "x86_64-unknown-linux-gnu" + +; Make sure that we don't crash when legalizng vselect and vsetcc and that +; we are able to generate vector blend instructions. + +; CHECK: simple_widen +; CHECK: blend +; CHECK: ret +define void @simple_widen() { +entry: + %0 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef + store <2 x float> %0, <2 x float>* undef + ret void +} + +; CHECK: complex_inreg_work +; CHECK: blend +; CHECK: ret + +define void @complex_inreg_work() { +entry: + %0 = fcmp oeq <2 x float> undef, undef + %1 = select <2 x i1> %0, <2 x float> undef, <2 x float> undef + store <2 x float> %1, <2 x float>* undef + ret void +} + +; CHECK: zero_test +; CHECK: blend +; CHECK: ret + +define void @zero_test() { +entry: + %0 = select <2 x i1> undef, <2 x float> undef, <2 x float> zeroinitializer + store <2 x float> %0, <2 x float>* undef + ret void +} + +; CHECK: full_test +; CHECK: blend +; CHECK: ret + +define void @full_test() { + entry: + %Cy300 = alloca <4 x float> + %Cy11a = alloca <2 x float> + %Cy118 = alloca <2 x float> + %Cy119 = alloca <2 x float> + br label %B1 + + B1: ; preds = %entry + %0 = load <2 x float>* %Cy119 + %1 = fptosi <2 x float> %0 to <2 x i32> + %2 = sitofp <2 x i32> %1 to <2 x float> + %3 = fcmp ogt <2 x float> %0, zeroinitializer + %4 = fadd <2 x float> %2, + %5 = select <2 x i1> %3, <2 x float> %4, <2 x float> %2 + %6 = fcmp oeq <2 x float> %2, %0 + %7 = select <2 x i1> %6, <2 x float> %0, <2 x float> %5 + store <2 x float> %7, <2 x float>* %Cy118 + %8 = load <2 x float>* %Cy118 + store <2 x float> %8, <2 x float>* %Cy11a + ret void +} + +