forked from OSchip/llvm-project
[NVPTX] Fix vector loads from parameters that span multiple loads, and fix some typos
llvm-svn: 185332
This commit is contained in:
parent
a2911283e4
commit
dff28d215f
|
@ -1997,7 +1997,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
|
||||||
Ops.push_back(Flag);
|
Ops.push_back(Flag);
|
||||||
|
|
||||||
SDNode *Ret =
|
SDNode *Ret =
|
||||||
CurDAG->getMachineNode(Opc, DL, Node->getVTList(), Ops);
|
CurDAG->getMachineNode(Opc, DL, VTs, Ops);
|
||||||
return Ret;
|
return Ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2270,8 +2270,9 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
|
||||||
SDNode *Ret =
|
SDNode *Ret =
|
||||||
CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
|
CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops);
|
||||||
MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
|
MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
|
||||||
MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
|
MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
|
||||||
cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
|
cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
|
||||||
|
|
|
@ -340,158 +340,6 @@ NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
|
||||||
return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op);
|
return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
std::string NVPTXTargetLowering::getPrototype(
|
|
||||||
Type *retTy, const ArgListTy &Args,
|
|
||||||
const SmallVectorImpl<ISD::OutputArg> &Outs, unsigned retAlignment) const {
|
|
||||||
|
|
||||||
bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
|
|
||||||
|
|
||||||
std::stringstream O;
|
|
||||||
O << "prototype_" << uniqueCallSite << " : .callprototype ";
|
|
||||||
|
|
||||||
if (retTy->getTypeID() == Type::VoidTyID)
|
|
||||||
O << "()";
|
|
||||||
else {
|
|
||||||
O << "(";
|
|
||||||
if (isABI) {
|
|
||||||
if (retTy->isPrimitiveType() || retTy->isIntegerTy()) {
|
|
||||||
unsigned size = 0;
|
|
||||||
if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) {
|
|
||||||
size = ITy->getBitWidth();
|
|
||||||
if (size < 32)
|
|
||||||
size = 32;
|
|
||||||
} else {
|
|
||||||
assert(retTy->isFloatingPointTy() &&
|
|
||||||
"Floating point type expected here");
|
|
||||||
size = retTy->getPrimitiveSizeInBits();
|
|
||||||
}
|
|
||||||
|
|
||||||
O << ".param .b" << size << " _";
|
|
||||||
} else if (isa<PointerType>(retTy))
|
|
||||||
O << ".param .b" << getPointerTy().getSizeInBits() << " _";
|
|
||||||
else {
|
|
||||||
if ((retTy->getTypeID() == Type::StructTyID) ||
|
|
||||||
isa<VectorType>(retTy)) {
|
|
||||||
SmallVector<EVT, 16> vtparts;
|
|
||||||
ComputeValueVTs(*this, retTy, vtparts);
|
|
||||||
unsigned totalsz = 0;
|
|
||||||
for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
|
|
||||||
unsigned elems = 1;
|
|
||||||
EVT elemtype = vtparts[i];
|
|
||||||
if (vtparts[i].isVector()) {
|
|
||||||
elems = vtparts[i].getVectorNumElements();
|
|
||||||
elemtype = vtparts[i].getVectorElementType();
|
|
||||||
}
|
|
||||||
for (unsigned j = 0, je = elems; j != je; ++j) {
|
|
||||||
unsigned sz = elemtype.getSizeInBits();
|
|
||||||
if (elemtype.isInteger() && (sz < 8))
|
|
||||||
sz = 8;
|
|
||||||
totalsz += sz / 8;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
O << ".param .align " << retAlignment << " .b8 _[" << totalsz << "]";
|
|
||||||
} else {
|
|
||||||
assert(false && "Unknown return type");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
SmallVector<EVT, 16> vtparts;
|
|
||||||
ComputeValueVTs(*this, retTy, vtparts);
|
|
||||||
unsigned idx = 0;
|
|
||||||
for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
|
|
||||||
unsigned elems = 1;
|
|
||||||
EVT elemtype = vtparts[i];
|
|
||||||
if (vtparts[i].isVector()) {
|
|
||||||
elems = vtparts[i].getVectorNumElements();
|
|
||||||
elemtype = vtparts[i].getVectorElementType();
|
|
||||||
}
|
|
||||||
|
|
||||||
for (unsigned j = 0, je = elems; j != je; ++j) {
|
|
||||||
unsigned sz = elemtype.getSizeInBits();
|
|
||||||
if (elemtype.isInteger() && (sz < 32))
|
|
||||||
sz = 32;
|
|
||||||
O << ".reg .b" << sz << " _";
|
|
||||||
if (j < je - 1)
|
|
||||||
O << ", ";
|
|
||||||
++idx;
|
|
||||||
}
|
|
||||||
if (i < e - 1)
|
|
||||||
O << ", ";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
O << ") ";
|
|
||||||
}
|
|
||||||
O << "_ (";
|
|
||||||
|
|
||||||
bool first = true;
|
|
||||||
MVT thePointerTy = getPointerTy();
|
|
||||||
|
|
||||||
for (unsigned i = 0, e = Args.size(); i != e; ++i) {
|
|
||||||
const Type *Ty = Args[i].Ty;
|
|
||||||
if (!first) {
|
|
||||||
O << ", ";
|
|
||||||
}
|
|
||||||
first = false;
|
|
||||||
|
|
||||||
if (Outs[i].Flags.isByVal() == false) {
|
|
||||||
unsigned sz = 0;
|
|
||||||
if (isa<IntegerType>(Ty)) {
|
|
||||||
sz = cast<IntegerType>(Ty)->getBitWidth();
|
|
||||||
if (sz < 32)
|
|
||||||
sz = 32;
|
|
||||||
} else if (isa<PointerType>(Ty))
|
|
||||||
sz = thePointerTy.getSizeInBits();
|
|
||||||
else
|
|
||||||
sz = Ty->getPrimitiveSizeInBits();
|
|
||||||
if (isABI)
|
|
||||||
O << ".param .b" << sz << " ";
|
|
||||||
else
|
|
||||||
O << ".reg .b" << sz << " ";
|
|
||||||
O << "_";
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
const PointerType *PTy = dyn_cast<PointerType>(Ty);
|
|
||||||
assert(PTy && "Param with byval attribute should be a pointer type");
|
|
||||||
Type *ETy = PTy->getElementType();
|
|
||||||
|
|
||||||
if (isABI) {
|
|
||||||
unsigned align = Outs[i].Flags.getByValAlign();
|
|
||||||
unsigned sz = getDataLayout()->getTypeAllocSize(ETy);
|
|
||||||
O << ".param .align " << align << " .b8 ";
|
|
||||||
O << "_";
|
|
||||||
O << "[" << sz << "]";
|
|
||||||
continue;
|
|
||||||
} else {
|
|
||||||
SmallVector<EVT, 16> vtparts;
|
|
||||||
ComputeValueVTs(*this, ETy, vtparts);
|
|
||||||
for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
|
|
||||||
unsigned elems = 1;
|
|
||||||
EVT elemtype = vtparts[i];
|
|
||||||
if (vtparts[i].isVector()) {
|
|
||||||
elems = vtparts[i].getVectorNumElements();
|
|
||||||
elemtype = vtparts[i].getVectorElementType();
|
|
||||||
}
|
|
||||||
|
|
||||||
for (unsigned j = 0, je = elems; j != je; ++j) {
|
|
||||||
unsigned sz = elemtype.getSizeInBits();
|
|
||||||
if (elemtype.isInteger() && (sz < 32))
|
|
||||||
sz = 32;
|
|
||||||
O << ".reg .b" << sz << " ";
|
|
||||||
O << "_";
|
|
||||||
if (j < je - 1)
|
|
||||||
O << ", ";
|
|
||||||
}
|
|
||||||
if (i < e - 1)
|
|
||||||
O << ", ";
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
O << ");";
|
|
||||||
return O.str();
|
|
||||||
}*/
|
|
||||||
|
|
||||||
std::string
|
std::string
|
||||||
NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args,
|
NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args,
|
||||||
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
||||||
|
@ -584,7 +432,9 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args,
|
||||||
OIdx += len - 1;
|
OIdx += len - 1;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
assert(getValueType(Ty) == Outs[OIdx].VT &&
|
// i8 types in IR will be i16 types in SDAG
|
||||||
|
assert((getValueType(Ty) == Outs[OIdx].VT ||
|
||||||
|
(getValueType(Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) &&
|
||||||
"type mismatch between callee prototype and arguments");
|
"type mismatch between callee prototype and arguments");
|
||||||
// scalar type
|
// scalar type
|
||||||
unsigned sz = 0;
|
unsigned sz = 0;
|
||||||
|
@ -854,6 +704,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||||
Ops.push_back(StoreVal);
|
Ops.push_back(StoreVal);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Ops.push_back(InFlag);
|
||||||
|
|
||||||
SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
|
SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
|
||||||
Chain = DAG.getMemIntrinsicNode(Opc, dl, CopyParamVTs, &Ops[0],
|
Chain = DAG.getMemIntrinsicNode(Opc, dl, CopyParamVTs, &Ops[0],
|
||||||
Ops.size(), MemVT,
|
Ops.size(), MemVT,
|
||||||
|
@ -1733,8 +1585,8 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
|
||||||
InVals.push_back(Elt);
|
InVals.push_back(Elt);
|
||||||
}
|
}
|
||||||
Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
|
Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
|
||||||
InsIdx += VecSize;
|
|
||||||
}
|
}
|
||||||
|
InsIdx += VecSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (NumElts > 0)
|
if (NumElts > 0)
|
||||||
|
|
|
@ -0,0 +1,13 @@
|
||||||
|
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||||
|
|
||||||
|
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
|
||||||
|
|
||||||
|
|
||||||
|
define <16 x float> @foo(<16 x float> %a) {
|
||||||
|
; Make sure we index into vectors properly
|
||||||
|
; CHECK: ld.param.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [foo_param_0];
|
||||||
|
; CHECK: ld.param.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [foo_param_0+16];
|
||||||
|
; CHECK: ld.param.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [foo_param_0+32];
|
||||||
|
; CHECK: ld.param.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [foo_param_0+48];
|
||||||
|
ret <16 x float> %a
|
||||||
|
}
|
Loading…
Reference in New Issue