From dff28d215fa5812b72fec7e7299eab1c5654944e Mon Sep 17 00:00:00 2001 From: Justin Holewinski Date: Mon, 1 Jul 2013 12:59:01 +0000 Subject: [PATCH] [NVPTX] Fix vector loads from parameters that span multiple loads, and fix some typos llvm-svn: 185332 --- llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 5 +- llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 160 +------------------- llvm/test/CodeGen/NVPTX/vec-param-load.ll | 13 ++ 3 files changed, 22 insertions(+), 156 deletions(-) create mode 100644 llvm/test/CodeGen/NVPTX/vec-param-load.ll diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 03a3aa4f4da9..b613587f2d03 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -1997,7 +1997,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) { Ops.push_back(Flag); SDNode *Ret = - CurDAG->getMachineNode(Opc, DL, Node->getVTList(), Ops); + CurDAG->getMachineNode(Opc, DL, VTs, Ops); return Ret; } @@ -2270,8 +2270,9 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) { } } + SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue); SDNode *Ret = - CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); + CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops); MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); MemRefs0[0] = cast(N)->getMemOperand(); cast(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1); diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 4590916a385e..f2578584bc64 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -340,158 +340,6 @@ NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op); } -/* -std::string NVPTXTargetLowering::getPrototype( - Type *retTy, const ArgListTy &Args, - const SmallVectorImpl &Outs, unsigned retAlignment) const { - - bool isABI = (nvptxSubtarget.getSmVersion() >= 20); - - std::stringstream O; - O << "prototype_" << uniqueCallSite << " : .callprototype "; - - if (retTy->getTypeID() == Type::VoidTyID) - O << "()"; - else { - O << "("; - if (isABI) { - if (retTy->isPrimitiveType() || retTy->isIntegerTy()) { - unsigned size = 0; - if (const IntegerType *ITy = dyn_cast(retTy)) { - size = ITy->getBitWidth(); - if (size < 32) - size = 32; - } else { - assert(retTy->isFloatingPointTy() && - "Floating point type expected here"); - size = retTy->getPrimitiveSizeInBits(); - } - - O << ".param .b" << size << " _"; - } else if (isa(retTy)) - O << ".param .b" << getPointerTy().getSizeInBits() << " _"; - else { - if ((retTy->getTypeID() == Type::StructTyID) || - isa(retTy)) { - SmallVector vtparts; - ComputeValueVTs(*this, retTy, vtparts); - unsigned totalsz = 0; - for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { - unsigned elems = 1; - EVT elemtype = vtparts[i]; - if (vtparts[i].isVector()) { - elems = vtparts[i].getVectorNumElements(); - elemtype = vtparts[i].getVectorElementType(); - } - for (unsigned j = 0, je = elems; j != je; ++j) { - unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 8)) - sz = 8; - totalsz += sz / 8; - } - } - O << ".param .align " << retAlignment << " .b8 _[" << totalsz << "]"; - } else { - assert(false && "Unknown return type"); - } - } - } else { - SmallVector vtparts; - ComputeValueVTs(*this, retTy, vtparts); - unsigned idx = 0; - for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { - unsigned elems = 1; - EVT elemtype = vtparts[i]; - if (vtparts[i].isVector()) { - elems = vtparts[i].getVectorNumElements(); - elemtype = vtparts[i].getVectorElementType(); - } - - for (unsigned j = 0, je = elems; j != je; ++j) { - unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 32)) - sz = 32; - O << ".reg .b" << sz << " _"; - if (j < je - 1) - O << ", "; - ++idx; - } - if (i < e - 1) - O << ", "; - } - } - O << ") "; - } - O << "_ ("; - - bool first = true; - MVT thePointerTy = getPointerTy(); - - for (unsigned i = 0, e = Args.size(); i != e; ++i) { - const Type *Ty = Args[i].Ty; - if (!first) { - O << ", "; - } - first = false; - - if (Outs[i].Flags.isByVal() == false) { - unsigned sz = 0; - if (isa(Ty)) { - sz = cast(Ty)->getBitWidth(); - if (sz < 32) - sz = 32; - } else if (isa(Ty)) - sz = thePointerTy.getSizeInBits(); - else - sz = Ty->getPrimitiveSizeInBits(); - if (isABI) - O << ".param .b" << sz << " "; - else - O << ".reg .b" << sz << " "; - O << "_"; - continue; - } - const PointerType *PTy = dyn_cast(Ty); - assert(PTy && "Param with byval attribute should be a pointer type"); - Type *ETy = PTy->getElementType(); - - if (isABI) { - unsigned align = Outs[i].Flags.getByValAlign(); - unsigned sz = getDataLayout()->getTypeAllocSize(ETy); - O << ".param .align " << align << " .b8 "; - O << "_"; - O << "[" << sz << "]"; - continue; - } else { - SmallVector vtparts; - ComputeValueVTs(*this, ETy, vtparts); - for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { - unsigned elems = 1; - EVT elemtype = vtparts[i]; - if (vtparts[i].isVector()) { - elems = vtparts[i].getVectorNumElements(); - elemtype = vtparts[i].getVectorElementType(); - } - - for (unsigned j = 0, je = elems; j != je; ++j) { - unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 32)) - sz = 32; - O << ".reg .b" << sz << " "; - O << "_"; - if (j < je - 1) - O << ", "; - } - if (i < e - 1) - O << ", "; - } - continue; - } - } - O << ");"; - return O.str(); -}*/ - std::string NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args, const SmallVectorImpl &Outs, @@ -584,7 +432,9 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args, OIdx += len - 1; continue; } - assert(getValueType(Ty) == Outs[OIdx].VT && + // i8 types in IR will be i16 types in SDAG + assert((getValueType(Ty) == Outs[OIdx].VT || + (getValueType(Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) && "type mismatch between callee prototype and arguments"); // scalar type unsigned sz = 0; @@ -854,6 +704,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Ops.push_back(StoreVal); } + Ops.push_back(InFlag); + SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); Chain = DAG.getMemIntrinsicNode(Opc, dl, CopyParamVTs, &Ops[0], Ops.size(), MemVT, @@ -1733,8 +1585,8 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( InVals.push_back(Elt); } Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); - InsIdx += VecSize; } + InsIdx += VecSize; } if (NumElts > 0) diff --git a/llvm/test/CodeGen/NVPTX/vec-param-load.ll b/llvm/test/CodeGen/NVPTX/vec-param-load.ll new file mode 100644 index 000000000000..a384348a6590 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/vec-param-load.ll @@ -0,0 +1,13 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" + + +define <16 x float> @foo(<16 x float> %a) { +; Make sure we index into vectors properly +; CHECK: ld.param.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [foo_param_0]; +; CHECK: ld.param.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [foo_param_0+16]; +; CHECK: ld.param.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [foo_param_0+32]; +; CHECK: ld.param.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [foo_param_0+48]; + ret <16 x float> %a +}