forked from OSchip/llvm-project
[NVPTX] Allow libcalls that are defined in the current module.
The patch adds a possibility to make library calls on NVPTX. An important thing about library functions - they must be defined within the current module. This basically should guarantee that we produce a valid PTX assembly (without calls to not defined functions). The one who wants to use the libcalls is probably will have to link against compiler-rt or any other implementation. Currently, it's completely impossible to make library calls because of error LLVM ERROR: Cannot select: i32 = ExternalSymbol '...'. But we can lower ExternalSymbol to TargetExternalSymbol and verify if the function definition is available. Also, there was an issue with a DAG during legalisation. When we expand instruction into libcall, the inner call-chain isn't being "integrated" into outer chain. Since the last "data-flow" (call retval load) node is located in call-chain earlier than CALLSEQ_END node, the latter becomes a leaf and therefore a dead node (and is being removed quite fast). Proposed here solution relies on another data-flow pseudo nodes (ProxyReg) which purpose is only to keep CALLSEQ_END at legalisation and instruction selection phases - we remove the pseudo instructions before register scheduling phase. Patch by Denys Zariaiev! Differential Revision: https://reviews.llvm.org/D34708 llvm-svn: 350069
This commit is contained in:
parent
423b65333d
commit
49fac56ea3
|
@ -1128,6 +1128,13 @@ public:
|
|||
/// Expand the specified \c ISD::VACOPY node as the Legalize pass would.
|
||||
SDValue expandVACopy(SDNode *Node);
|
||||
|
||||
/// Returs an GlobalAddress of the function from the current module with
|
||||
/// name matching the given ExternalSymbol. Additionally can provide the
|
||||
/// matched function.
|
||||
/// Panics the function doesn't exists.
|
||||
SDValue getSymbolFunctionGlobalAddress(SDValue Op,
|
||||
Function **TargetFunction = nullptr);
|
||||
|
||||
/// *Mutate* the specified node in-place to have the
|
||||
/// specified operands. If the resultant node already exists in the DAG,
|
||||
/// this does not modify the specified node, instead it returns the node that
|
||||
|
|
|
@ -8464,6 +8464,32 @@ SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,
|
|||
return TokenFactor;
|
||||
}
|
||||
|
||||
SDValue SelectionDAG::getSymbolFunctionGlobalAddress(SDValue Op,
|
||||
Function **OutFunction) {
|
||||
assert(isa<ExternalSymbolSDNode>(Op) && "Node should be an ExternalSymbol");
|
||||
|
||||
auto *Symbol = cast<ExternalSymbolSDNode>(Op)->getSymbol();
|
||||
auto *Module = MF->getFunction().getParent();
|
||||
auto *Function = Module->getFunction(Symbol);
|
||||
|
||||
if (OutFunction != nullptr)
|
||||
*OutFunction = Function;
|
||||
|
||||
if (Function != nullptr) {
|
||||
auto PtrTy = TLI->getPointerTy(getDataLayout(), Function->getAddressSpace());
|
||||
return getGlobalAddress(Function, SDLoc(Op), PtrTy);
|
||||
}
|
||||
|
||||
std::string ErrorStr;
|
||||
raw_string_ostream ErrorFormatter(ErrorStr);
|
||||
|
||||
ErrorFormatter << "Undefined external symbol ";
|
||||
ErrorFormatter << '"' << Symbol << '"';
|
||||
ErrorFormatter.flush();
|
||||
|
||||
report_fatal_error(ErrorStr);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SDNode Class
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -32,6 +32,7 @@ set(NVPTXCodeGen_sources
|
|||
NVPTXUtilities.cpp
|
||||
NVVMIntrRange.cpp
|
||||
NVVMReflect.cpp
|
||||
NVPTXProxyRegErasure.cpp
|
||||
)
|
||||
|
||||
add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources})
|
||||
|
|
|
@ -53,6 +53,7 @@ FunctionPass *createNVPTXImageOptimizerPass();
|
|||
FunctionPass *createNVPTXLowerArgsPass(const NVPTXTargetMachine *TM);
|
||||
BasicBlockPass *createNVPTXLowerAllocaPass();
|
||||
MachineFunctionPass *createNVPTXPeephole();
|
||||
MachineFunctionPass *createNVPTXProxyRegErasurePass();
|
||||
|
||||
Target &getTheNVPTXTarget32();
|
||||
Target &getTheNVPTXTarget64();
|
||||
|
|
|
@ -730,6 +730,11 @@ void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
|
|||
for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
|
||||
const Function *F = &*FI;
|
||||
|
||||
if (F->getAttributes().hasFnAttribute("nvptx-libcall-callee")) {
|
||||
emitDeclaration(F, O);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (F->isDeclaration()) {
|
||||
if (F->use_empty())
|
||||
continue;
|
||||
|
|
|
@ -663,6 +663,8 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
return "NVPTXISD::CallSeqEnd";
|
||||
case NVPTXISD::CallPrototype:
|
||||
return "NVPTXISD::CallPrototype";
|
||||
case NVPTXISD::ProxyReg:
|
||||
return "NVPTXISD::ProxyReg";
|
||||
case NVPTXISD::LoadV2:
|
||||
return "NVPTXISD::LoadV2";
|
||||
case NVPTXISD::LoadV4:
|
||||
|
@ -1666,6 +1668,18 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
|||
// indirect calls but is always null for libcalls.
|
||||
bool isIndirectCall = !Func && CS;
|
||||
|
||||
if (isa<ExternalSymbolSDNode>(Callee)) {
|
||||
Function* CalleeFunc = nullptr;
|
||||
|
||||
// Try to find the callee in the current module.
|
||||
Callee = DAG.getSymbolFunctionGlobalAddress(Callee, &CalleeFunc);
|
||||
assert(CalleeFunc != nullptr && "Libcall callee must be set.");
|
||||
|
||||
// Set the "libcall callee" attribute to indicate that the function
|
||||
// must always have a declaration.
|
||||
CalleeFunc->addFnAttr("nvptx-libcall-callee", "true");
|
||||
}
|
||||
|
||||
if (isIndirectCall) {
|
||||
// This is indirect function call case : PTX requires a prototype of the
|
||||
// form
|
||||
|
@ -1738,6 +1752,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
|||
InFlag = Chain.getValue(1);
|
||||
}
|
||||
|
||||
SmallVector<SDValue, 16> ProxyRegOps;
|
||||
SmallVector<Optional<MVT>, 16> ProxyRegTruncates;
|
||||
|
||||
// Generate loads from param memory/moves from registers for result
|
||||
if (Ins.size() > 0) {
|
||||
SmallVector<EVT, 16> VTs;
|
||||
|
@ -1808,11 +1825,14 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
|||
MachineMemOperand::MOLoad);
|
||||
|
||||
for (unsigned j = 0; j < NumElts; ++j) {
|
||||
SDValue Ret = RetVal.getValue(j);
|
||||
ProxyRegOps.push_back(RetVal.getValue(j));
|
||||
|
||||
if (needTruncate)
|
||||
Ret = DAG.getNode(ISD::TRUNCATE, dl, Ins[VecIdx + j].VT, Ret);
|
||||
InVals.push_back(Ret);
|
||||
ProxyRegTruncates.push_back(Optional<MVT>(Ins[VecIdx + j].VT));
|
||||
else
|
||||
ProxyRegTruncates.push_back(Optional<MVT>());
|
||||
}
|
||||
|
||||
Chain = RetVal.getValue(NumElts);
|
||||
InFlag = RetVal.getValue(NumElts + 1);
|
||||
|
||||
|
@ -1828,8 +1848,29 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
|||
DAG.getIntPtrConstant(uniqueCallSite + 1, dl,
|
||||
true),
|
||||
InFlag, dl);
|
||||
InFlag = Chain.getValue(1);
|
||||
uniqueCallSite++;
|
||||
|
||||
// Append ProxyReg instructions to the chain to make sure that `callseq_end`
|
||||
// will not get lost. Otherwise, during libcalls expansion, the nodes can become
|
||||
// dangling.
|
||||
for (unsigned i = 0; i < ProxyRegOps.size(); ++i) {
|
||||
SDValue Ret = DAG.getNode(
|
||||
NVPTXISD::ProxyReg, dl,
|
||||
DAG.getVTList(ProxyRegOps[i].getSimpleValueType(), MVT::Other, MVT::Glue),
|
||||
{ Chain, ProxyRegOps[i], InFlag }
|
||||
);
|
||||
|
||||
Chain = Ret.getValue(1);
|
||||
InFlag = Ret.getValue(2);
|
||||
|
||||
if (ProxyRegTruncates[i].hasValue()) {
|
||||
Ret = DAG.getNode(ISD::TRUNCATE, dl, ProxyRegTruncates[i].getValue(), Ret);
|
||||
}
|
||||
|
||||
InVals.push_back(Ret);
|
||||
}
|
||||
|
||||
// set isTailCall to false for now, until we figure out how to express
|
||||
// tail call optimization in PTX
|
||||
isTailCall = false;
|
||||
|
|
|
@ -51,6 +51,7 @@ enum NodeType : unsigned {
|
|||
CallSeqBegin,
|
||||
CallSeqEnd,
|
||||
CallPrototype,
|
||||
ProxyReg,
|
||||
FUN_SHFL_CLAMP,
|
||||
FUN_SHFR_CLAMP,
|
||||
MUL_WIDE_SIGNED,
|
||||
|
|
|
@ -1885,6 +1885,7 @@ def SDTStoreRetvalProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>;
|
|||
def SDTStoreRetvalV2Profile : SDTypeProfile<0, 3, [SDTCisInt<0>]>;
|
||||
def SDTStoreRetvalV4Profile : SDTypeProfile<0, 5, [SDTCisInt<0>]>;
|
||||
def SDTPseudoUseParamProfile : SDTypeProfile<0, 1, []>;
|
||||
def SDTProxyRegProfile : SDTypeProfile<1, 1, []>;
|
||||
|
||||
def DeclareParam :
|
||||
SDNode<"NVPTXISD::DeclareParam", SDTDeclareParamProfile,
|
||||
|
@ -1972,6 +1973,9 @@ def PseudoUseParam :
|
|||
def RETURNNode :
|
||||
SDNode<"NVPTXISD::RETURN", SDTCallArgMarkProfile,
|
||||
[SDNPHasChain, SDNPSideEffect]>;
|
||||
def ProxyReg :
|
||||
SDNode<"NVPTXISD::ProxyReg", SDTProxyRegProfile,
|
||||
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
|
||||
|
||||
let mayLoad = 1 in {
|
||||
class LoadParamMemInst<NVPTXRegClass regclass, string opstr> :
|
||||
|
@ -2249,6 +2253,21 @@ def PseudoUseParamI16 : PseudoUseParamInst<Int16Regs>;
|
|||
def PseudoUseParamF64 : PseudoUseParamInst<Float64Regs>;
|
||||
def PseudoUseParamF32 : PseudoUseParamInst<Float32Regs>;
|
||||
|
||||
class ProxyRegInst<string SzStr, NVPTXRegClass regclass> :
|
||||
NVPTXInst<(outs regclass:$dst), (ins regclass:$src),
|
||||
!strconcat("mov.", SzStr, " \t$dst, $src;"),
|
||||
[(set regclass:$dst, (ProxyReg regclass:$src))]>;
|
||||
|
||||
let isCodeGenOnly=1, isPseudo=1 in {
|
||||
def ProxyRegI1 : ProxyRegInst<"pred", Int1Regs>;
|
||||
def ProxyRegI16 : ProxyRegInst<"b16", Int16Regs>;
|
||||
def ProxyRegI32 : ProxyRegInst<"b32", Int32Regs>;
|
||||
def ProxyRegI64 : ProxyRegInst<"b64", Int64Regs>;
|
||||
def ProxyRegF16 : ProxyRegInst<"b16", Float16Regs>;
|
||||
def ProxyRegF32 : ProxyRegInst<"f32", Float32Regs>;
|
||||
def ProxyRegF64 : ProxyRegInst<"f64", Float64Regs>;
|
||||
def ProxyRegF16x2 : ProxyRegInst<"b32", Float16x2Regs>;
|
||||
}
|
||||
|
||||
//
|
||||
// Load / Store Handling
|
||||
|
@ -2541,7 +2560,7 @@ let mayStore=1, hasSideEffects=0 in {
|
|||
class F_BITCONVERT<string SzStr, NVPTXRegClass regclassIn,
|
||||
NVPTXRegClass regclassOut> :
|
||||
NVPTXInst<(outs regclassOut:$d), (ins regclassIn:$a),
|
||||
!strconcat("mov.b", !strconcat(SzStr, " \t$d, $a;")),
|
||||
!strconcat("mov.b", SzStr, " \t$d, $a;"),
|
||||
[(set regclassOut:$d, (bitconvert regclassIn:$a))]>;
|
||||
|
||||
def BITCONVERT_16_I2F : F_BITCONVERT<"16", Int16Regs, Float16Regs>;
|
||||
|
|
|
@ -0,0 +1,122 @@
|
|||
//===- NVPTXProxyRegErasure.cpp - NVPTX Proxy Register Instruction Erasure -==//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// The pass is needed to remove ProxyReg instructions and restore related
|
||||
// registers. The instructions were needed at instruction selection stage to
|
||||
// make sure that callseq_end nodes won't be removed as "dead nodes". This can
|
||||
// happen when we expand instructions into libcalls and the call site doesn't
|
||||
// care about the libcall chain. Call site cares about data flow only, and the
|
||||
// latest data flow node happens to be before callseq_end. Therefore the node
|
||||
// becomes dangling and "dead". The ProxyReg acts like an additional data flow
|
||||
// node *after* the callseq_end in the chain and ensures that everything will be
|
||||
// preserved.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "NVPTX.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/TargetInstrInfo.h"
|
||||
#include "llvm/CodeGen/TargetRegisterInfo.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace llvm {
|
||||
void initializeNVPTXProxyRegErasurePass(PassRegistry &);
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
struct NVPTXProxyRegErasure : public MachineFunctionPass {
|
||||
public:
|
||||
static char ID;
|
||||
NVPTXProxyRegErasure() : MachineFunctionPass(ID) {
|
||||
initializeNVPTXProxyRegErasurePass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
bool runOnMachineFunction(MachineFunction &MF) override;
|
||||
|
||||
StringRef getPassName() const override {
|
||||
return "NVPTX Proxy Register Instruction Erasure";
|
||||
}
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
|
||||
private:
|
||||
void replaceMachineInstructionUsage(MachineFunction &MF, MachineInstr &MI);
|
||||
|
||||
void replaceRegisterUsage(MachineInstr &Instr, MachineOperand &From,
|
||||
MachineOperand &To);
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
char NVPTXProxyRegErasure::ID = 0;
|
||||
|
||||
INITIALIZE_PASS(NVPTXProxyRegErasure, "nvptx-proxyreg-erasure", "NVPTX ProxyReg Erasure", false, false)
|
||||
|
||||
bool NVPTXProxyRegErasure::runOnMachineFunction(MachineFunction &MF) {
|
||||
SmallVector<MachineInstr *, 16> RemoveList;
|
||||
|
||||
for (auto &BB : MF) {
|
||||
for (auto &MI : BB) {
|
||||
switch (MI.getOpcode()) {
|
||||
case NVPTX::ProxyRegI1:
|
||||
case NVPTX::ProxyRegI16:
|
||||
case NVPTX::ProxyRegI32:
|
||||
case NVPTX::ProxyRegI64:
|
||||
case NVPTX::ProxyRegF16:
|
||||
case NVPTX::ProxyRegF16x2:
|
||||
case NVPTX::ProxyRegF32:
|
||||
case NVPTX::ProxyRegF64:
|
||||
replaceMachineInstructionUsage(MF, MI);
|
||||
RemoveList.push_back(&MI);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (auto *MI : RemoveList) {
|
||||
MI->eraseFromParent();
|
||||
}
|
||||
|
||||
return !RemoveList.empty();
|
||||
}
|
||||
|
||||
void NVPTXProxyRegErasure::replaceMachineInstructionUsage(MachineFunction &MF,
|
||||
MachineInstr &MI) {
|
||||
auto &InOp = *MI.uses().begin();
|
||||
auto &OutOp = *MI.defs().begin();
|
||||
|
||||
assert(InOp.isReg() && "ProxyReg input operand should be a register.");
|
||||
assert(OutOp.isReg() && "ProxyReg output operand should be a register.");
|
||||
|
||||
for (auto &BB : MF) {
|
||||
for (auto &I : BB) {
|
||||
replaceRegisterUsage(I, OutOp, InOp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void NVPTXProxyRegErasure::replaceRegisterUsage(MachineInstr &Instr,
|
||||
MachineOperand &From,
|
||||
MachineOperand &To) {
|
||||
for (auto &Op : Instr.uses()) {
|
||||
if (Op.isReg() && Op.getReg() == From.getReg()) {
|
||||
Op.setReg(To.getReg());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MachineFunctionPass *llvm::createNVPTXProxyRegErasurePass() {
|
||||
return new NVPTXProxyRegErasure();
|
||||
}
|
|
@ -68,6 +68,7 @@ void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
|
|||
void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
|
||||
void initializeNVPTXLowerArgsPass(PassRegistry &);
|
||||
void initializeNVPTXLowerAllocaPass(PassRegistry &);
|
||||
void initializeNVPTXProxyRegErasurePass(PassRegistry &);
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
|
@ -87,6 +88,7 @@ extern "C" void LLVMInitializeNVPTXTarget() {
|
|||
initializeNVPTXLowerArgsPass(PR);
|
||||
initializeNVPTXLowerAllocaPass(PR);
|
||||
initializeNVPTXLowerAggrCopiesPass(PR);
|
||||
initializeNVPTXProxyRegErasurePass(PR);
|
||||
}
|
||||
|
||||
static std::string computeDataLayout(bool is64Bit, bool UseShortPointers) {
|
||||
|
@ -160,6 +162,7 @@ public:
|
|||
|
||||
void addIRPasses() override;
|
||||
bool addInstSelector() override;
|
||||
void addPreRegAlloc() override;
|
||||
void addPostRegAlloc() override;
|
||||
void addMachineSSAOptimization() override;
|
||||
|
||||
|
@ -301,6 +304,11 @@ bool NVPTXPassConfig::addInstSelector() {
|
|||
return false;
|
||||
}
|
||||
|
||||
void NVPTXPassConfig::addPreRegAlloc() {
|
||||
// Remove Proxy Register pseudo instructions used to keep `callseq_end` alive.
|
||||
addPass(createNVPTXProxyRegErasurePass());
|
||||
}
|
||||
|
||||
void NVPTXPassConfig::addPostRegAlloc() {
|
||||
addPass(createNVPTXPrologEpilogPass(), false);
|
||||
if (getOptLevel() != CodeGenOpt::None) {
|
||||
|
|
|
@ -0,0 +1,22 @@
|
|||
; RUN: llc < %s -march=nvptx 2>&1 | FileCheck %s
|
||||
; Make sure the example doesn't crash with segfault
|
||||
|
||||
; CHECK: .visible .func ({{.*}}) loop
|
||||
define i32 @loop(i32, i32) {
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%i = phi i32 [ %0, %entry ], [ %res, %loop ]
|
||||
%res = call i32 @div(i32 %i, i32 %1)
|
||||
|
||||
%exitcond = icmp eq i32 %res, %0
|
||||
br i1 %exitcond, label %exit, label %loop
|
||||
|
||||
exit:
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
define i32 @div(i32, i32) {
|
||||
ret i32 0
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
; RUN: llc < %s -march=nvptx 2>&1 | FileCheck %s
|
||||
; Allow to make libcalls that are defined in the current module
|
||||
|
||||
; Underlying libcall declaration
|
||||
; CHECK: .visible .func (.param .align 16 .b8 func_retval0[16]) __umodti3
|
||||
|
||||
define i128 @remainder(i128, i128) {
|
||||
bb0:
|
||||
; CHECK: { // callseq 0, 0
|
||||
; CHECK: call.uni (retval0),
|
||||
; CHECK-NEXT: __umodti3,
|
||||
; CHECK-NEXT: (
|
||||
; CHECK-NEXT: param0,
|
||||
; CHECK-NEXT: param1
|
||||
; CHECK-NEXT: );
|
||||
; CHECK-NEXT: ld.param.v2.b64 {%[[REG0:rd[0-9]+]], %[[REG1:rd[0-9]+]]}, [retval0+0];
|
||||
; CHECK-NEXT: } // callseq 0
|
||||
%a = urem i128 %0, %1
|
||||
br label %bb1
|
||||
|
||||
bb1:
|
||||
; CHECK-NEXT: st.param.v2.b64 [func_retval0+0], {%[[REG0]], %[[REG1]]};
|
||||
; CHECK-NEXT: ret;
|
||||
ret i128 %a
|
||||
}
|
||||
|
||||
; Underlying libcall definition
|
||||
; CHECK: .visible .func (.param .align 16 .b8 func_retval0[16]) __umodti3(
|
||||
define i128 @__umodti3(i128, i128) {
|
||||
ret i128 0
|
||||
}
|
|
@ -1,7 +1,7 @@
|
|||
; RUN: not llc < %s -march=nvptx 2>&1 | FileCheck %s
|
||||
; used to panic on failed assetion and now fails with a "Cannot select"
|
||||
; used to panic on failed assertion and now fails with an "Undefined external symbol"
|
||||
|
||||
; CHECK: LLVM ERROR: Cannot select: {{t28|0x[0-9a-f]+}}: i32 = ExternalSymbol'__umodti3'
|
||||
; CHECK: LLVM ERROR: Undefined external symbol "__umodti3"
|
||||
define hidden i128 @remainder(i128, i128) {
|
||||
%3 = urem i128 %0, %1
|
||||
ret i128 %3
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
; RUN: not llc < %s -march=nvptx 2>&1 | FileCheck %s
|
||||
; used to seqfault and now fails with an "Undefined external symbol"
|
||||
|
||||
; CHECK: LLVM ERROR: Undefined external symbol "__powidf2"
|
||||
define double @powi(double, i32) {
|
||||
%a = call double @llvm.powi.f64(double %0, i32 %1)
|
||||
ret double %a
|
||||
}
|
||||
|
||||
declare double @llvm.powi.f64(double, i32) nounwind readnone
|
|
@ -0,0 +1,25 @@
|
|||
; RUN: llc -march=nvptx64 -stop-before=nvptx-proxyreg-erasure < %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefix=MIR --check-prefix=MIR-BEFORE
|
||||
|
||||
; RUN: llc -march=nvptx64 -stop-after=nvptx-proxyreg-erasure < %s 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefix=MIR --check-prefix=MIR-AFTER
|
||||
|
||||
; Check ProxyRegErasure pass MIR manipulation.
|
||||
|
||||
declare <4 x i32> @callee_vec_i32()
|
||||
define <4 x i32> @check_vec_i32() {
|
||||
; MIR: body:
|
||||
; MIR-DAG: Callseq_Start {{[0-9]+}}, {{[0-9]+}}
|
||||
; MIR-DAG: %0:int32regs, %1:int32regs, %2:int32regs, %3:int32regs = LoadParamMemV4I32 0
|
||||
; MIR-DAG: Callseq_End {{[0-9]+}}
|
||||
|
||||
; MIR-BEFORE-DAG: %4:int32regs = ProxyRegI32 killed %0
|
||||
; MIR-BEFORE-DAG: %5:int32regs = ProxyRegI32 killed %1
|
||||
; MIR-BEFORE-DAG: %6:int32regs = ProxyRegI32 killed %2
|
||||
; MIR-BEFORE-DAG: %7:int32regs = ProxyRegI32 killed %3
|
||||
; MIR-BEFORE-DAG: StoreRetvalV4I32 killed %4, killed %5, killed %6, killed %7, 0
|
||||
; MIR-AFTER-DAG: StoreRetvalV4I32 killed %0, killed %1, killed %2, killed %3, 0
|
||||
|
||||
%ret = call <4 x i32> @callee_vec_i32()
|
||||
ret <4 x i32> %ret
|
||||
}
|
|
@ -0,0 +1,183 @@
|
|||
; RUN: llc -march=nvptx64 -stop-before=nvptx-proxyreg-erasure < %s 2>&1 \
|
||||
; RUN: | llc -x mir -march=nvptx64 -start-before=nvptx-proxyreg-erasure 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefix=PTX --check-prefix=PTX-WITH
|
||||
|
||||
; RUN: llc -march=nvptx64 -stop-before=nvptx-proxyreg-erasure < %s 2>&1 \
|
||||
; RUN: | llc -x mir -march=nvptx64 -start-after=nvptx-proxyreg-erasure 2>&1 \
|
||||
; RUN: | FileCheck %s --check-prefix=PTX --check-prefix=PTX-WITHOUT
|
||||
|
||||
; Thorough testing of ProxyRegErasure: PTX assembly with and without the pass.
|
||||
|
||||
declare i1 @callee_i1()
|
||||
define i1 @check_i1() {
|
||||
; PTX-LABEL: check_i1
|
||||
; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
|
||||
; PTX-DAG: ld.param.b32 [[LD:%r[0-9]+]], [retval0+0];
|
||||
; PTX-DAG: } // callseq {{[0-9]+}}
|
||||
|
||||
; PTX-WITHOUT-DAG: mov.b32 [[PROXY:%r[0-9]+]], [[LD]];
|
||||
; PTX-WITHOUT-DAG: and.b32 [[RES:%r[0-9]+]], [[PROXY]], 1;
|
||||
; PTX-WITH-DAG: and.b32 [[RES:%r[0-9]+]], [[LD]], 1;
|
||||
|
||||
; PTX-DAG: st.param.b32 [func_retval0+0], [[RES]];
|
||||
|
||||
%ret = call i1 @callee_i1()
|
||||
ret i1 %ret
|
||||
}
|
||||
|
||||
declare i16 @callee_i16()
|
||||
define i16 @check_i16() {
|
||||
; PTX-LABEL: check_i16
|
||||
; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
|
||||
; PTX-DAG: ld.param.b32 [[LD:%r[0-9]+]], [retval0+0];
|
||||
; PTX-DAG: } // callseq {{[0-9]+}}
|
||||
|
||||
; PTX-WITHOUT-DAG: mov.b32 [[PROXY:%r[0-9]+]], [[LD]];
|
||||
; PTX-WITHOUT-DAG: and.b32 [[RES:%r[0-9]+]], [[PROXY]], 65535;
|
||||
; PTX-WITH-DAG: and.b32 [[RES:%r[0-9]+]], [[LD]], 65535;
|
||||
|
||||
; PTX-DAG: st.param.b32 [func_retval0+0], [[RES]];
|
||||
|
||||
%ret = call i16 @callee_i16()
|
||||
ret i16 %ret
|
||||
}
|
||||
|
||||
declare i32 @callee_i32()
|
||||
define i32 @check_i32() {
|
||||
; PTX-LABEL: check_i32
|
||||
; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
|
||||
; PTX-DAG: ld.param.b32 [[LD:%r[0-9]+]], [retval0+0];
|
||||
; PTX-DAG: } // callseq {{[0-9]+}}
|
||||
|
||||
; PTX-WITHOUT-DAG: mov.b32 [[PROXY:%r[0-9]+]], [[LD]];
|
||||
; PTX-WITHOUT-DAG: st.param.b32 [func_retval0+0], [[PROXY]];
|
||||
; PTX-WITH-DAG: st.param.b32 [func_retval0+0], [[LD]];
|
||||
|
||||
%ret = call i32 @callee_i32()
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
declare i64 @callee_i64()
|
||||
define i64 @check_i64() {
|
||||
; PTX-LABEL: check_i64
|
||||
; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
|
||||
; PTX-DAG: ld.param.b64 [[LD:%rd[0-9]+]], [retval0+0];
|
||||
; PTX-DAG: } // callseq {{[0-9]+}}
|
||||
|
||||
; PTX-WITHOUT-DAG: mov.b64 [[PROXY:%rd[0-9]+]], [[LD]];
|
||||
; PTX-WITHOUT-DAG: st.param.b64 [func_retval0+0], [[PROXY]];
|
||||
; PTX-WITH-DAG: st.param.b64 [func_retval0+0], [[LD]];
|
||||
|
||||
%ret = call i64 @callee_i64()
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
declare i128 @callee_i128()
|
||||
define i128 @check_i128() {
|
||||
; PTX-LABEL: check_i128
|
||||
; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
|
||||
; PTX-DAG: ld.param.v2.b64 {[[LD0:%rd[0-9]+]], [[LD1:%rd[0-9]+]]}, [retval0+0];
|
||||
; PTX-DAG: } // callseq {{[0-9]+}}
|
||||
|
||||
; PTX-WITHOUT-DAG: mov.b64 [[PROXY0:%rd[0-9]+]], [[LD0]];
|
||||
; PTX-WITHOUT-DAG: mov.b64 [[PROXY1:%rd[0-9]+]], [[LD1]];
|
||||
; PTX-WITHOUT-DAG: st.param.v2.b64 [func_retval0+0], {[[PROXY0]], [[PROXY1]]};
|
||||
; PTX-WITH-DAG: st.param.v2.b64 [func_retval0+0], {[[LD0]], [[LD1]]};
|
||||
|
||||
%ret = call i128 @callee_i128()
|
||||
ret i128 %ret
|
||||
}
|
||||
|
||||
declare half @callee_f16()
|
||||
define half @check_f16() {
|
||||
; PTX-LABEL: check_f16
|
||||
; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
|
||||
; PTX-DAG: ld.param.b16 [[LD:%h[0-9]+]], [retval0+0];
|
||||
; PTX-DAG: } // callseq {{[0-9]+}}
|
||||
|
||||
; PTX-WITHOUT-DAG: mov.b16 [[PROXY:%h[0-9]+]], [[LD]];
|
||||
; PTX-WITHOUT-DAG: st.param.b16 [func_retval0+0], [[PROXY]];
|
||||
; PTX-WITH-DAG: st.param.b16 [func_retval0+0], [[LD]];
|
||||
|
||||
%ret = call half @callee_f16()
|
||||
ret half %ret
|
||||
}
|
||||
|
||||
declare float @callee_f32()
|
||||
define float @check_f32() {
|
||||
; PTX-LABEL: check_f32
|
||||
; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
|
||||
; PTX-DAG: ld.param.f32 [[LD:%f[0-9]+]], [retval0+0];
|
||||
; PTX-DAG: } // callseq {{[0-9]+}}
|
||||
|
||||
; PTX-WITHOUT-DAG: mov.f32 [[PROXY:%f[0-9]+]], [[LD]];
|
||||
; PTX-WITHOUT-DAG: st.param.f32 [func_retval0+0], [[PROXY]];
|
||||
; PTX-WITH-DAG: st.param.f32 [func_retval0+0], [[LD]];
|
||||
|
||||
%ret = call float @callee_f32()
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
declare double @callee_f64()
|
||||
define double @check_f64() {
|
||||
; PTX-LABEL: check_f64
|
||||
; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
|
||||
; PTX-DAG: ld.param.f64 [[LD:%fd[0-9]+]], [retval0+0];
|
||||
; PTX-DAG: } // callseq {{[0-9]+}}
|
||||
|
||||
; PTX-WITHOUT-DAG: mov.f64 [[PROXY:%fd[0-9]+]], [[LD]];
|
||||
; PTX-WITHOUT-DAG: st.param.f64 [func_retval0+0], [[PROXY]];
|
||||
; PTX-WITH-DAG: st.param.f64 [func_retval0+0], [[LD]];
|
||||
|
||||
%ret = call double @callee_f64()
|
||||
ret double %ret
|
||||
}
|
||||
|
||||
declare <4 x i32> @callee_vec_i32()
|
||||
define <4 x i32> @check_vec_i32() {
|
||||
; PTX-LABEL: check_vec_i32
|
||||
; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
|
||||
; PTX-DAG: ld.param.v4.b32 {[[LD0:%r[0-9]+]], [[LD1:%r[0-9]+]], [[LD2:%r[0-9]+]], [[LD3:%r[0-9]+]]}, [retval0+0];
|
||||
; PTX-DAG: } // callseq {{[0-9]+}}
|
||||
|
||||
; PTX-WITHOUT-DAG: mov.b32 [[PROXY0:%r[0-9]+]], [[LD0]];
|
||||
; PTX-WITHOUT-DAG: mov.b32 [[PROXY1:%r[0-9]+]], [[LD1]];
|
||||
; PTX-WITHOUT-DAG: mov.b32 [[PROXY2:%r[0-9]+]], [[LD2]];
|
||||
; PTX-WITHOUT-DAG: mov.b32 [[PROXY3:%r[0-9]+]], [[LD3]];
|
||||
; PTX-WITHOUT-DAG: st.param.v4.b32 [func_retval0+0], {[[PROXY0]], [[PROXY1]], [[PROXY2]], [[PROXY3]]};
|
||||
; PTX-WITH-DAG: st.param.v4.b32 [func_retval0+0], {[[LD0]], [[LD1]], [[LD2]], [[LD3]]};
|
||||
|
||||
%ret = call <4 x i32> @callee_vec_i32()
|
||||
ret <4 x i32> %ret
|
||||
}
|
||||
|
||||
declare <2 x half> @callee_vec_f16()
|
||||
define <2 x half> @check_vec_f16() {
|
||||
; PTX-LABEL: check_vec_f16
|
||||
; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
|
||||
; PTX-DAG: ld.param.b32 [[LD:%hh[0-9]+]], [retval0+0];
|
||||
; PTX-DAG: } // callseq {{[0-9]+}}
|
||||
|
||||
; PTX-WITHOUT-DAG: mov.b32 [[PROXY:%hh[0-9]+]], [[LD]];
|
||||
; PTX-WITHOUT-DAG: st.param.b32 [func_retval0+0], [[PROXY]];
|
||||
; PTX-WITH-DAG: st.param.b32 [func_retval0+0], [[LD]];
|
||||
|
||||
%ret = call <2 x half> @callee_vec_f16()
|
||||
ret <2 x half> %ret
|
||||
}
|
||||
|
||||
declare <2 x double> @callee_vec_f64()
|
||||
define <2 x double> @check_vec_f64() {
|
||||
; PTX-LABEL: check_vec_f64
|
||||
; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
|
||||
; PTX-DAG: ld.param.v2.f64 {[[LD0:%fd[0-9]+]], [[LD1:%fd[0-9]+]]}, [retval0+0];
|
||||
; PTX-DAG: } // callseq {{[0-9]+}}
|
||||
|
||||
; PTX-WITHOUT-DAG: mov.f64 [[PROXY0:%fd[0-9]+]], [[LD0]];
|
||||
; PTX-WITHOUT-DAG: mov.f64 [[PROXY1:%fd[0-9]+]], [[LD1]];
|
||||
; PTX-WITHOUT-DAG: st.param.v2.f64 [func_retval0+0], {[[PROXY0]], [[PROXY1]]};
|
||||
; PTX-WITH-DAG: st.param.v2.f64 [func_retval0+0], {[[LD0]], [[LD1]]};
|
||||
|
||||
%ret = call <2 x double> @callee_vec_f64()
|
||||
ret <2 x double> %ret
|
||||
}
|
|
@ -1,10 +0,0 @@
|
|||
; RUN: not llc < %s -march=nvptx 2>&1 | FileCheck %s
|
||||
; used to seqfault and now fails with a "Cannot select"
|
||||
|
||||
; CHECK: LLVM ERROR: Cannot select: {{t7|0x[0-9a-f]+}}: i32 = ExternalSymbol'__powidf2'
|
||||
define double @powi() {
|
||||
%1 = call double @llvm.powi.f64(double 1.000000e+00, i32 undef)
|
||||
ret double %1
|
||||
}
|
||||
|
||||
declare double @llvm.powi.f64(double, i32) nounwind readnone
|
Loading…
Reference in New Issue