2017-08-12 00:42:09 +08:00
|
|
|
//===- AMDGPULibCalls.cpp -------------------------------------------------===//
|
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2017-08-12 00:42:09 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
/// \file
|
2018-05-01 23:54:18 +08:00
|
|
|
/// This file does AMD library function optimizations.
|
2017-08-12 00:42:09 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "AMDGPU.h"
|
|
|
|
#include "AMDGPULibFunc.h"
|
2019-06-18 01:57:50 +08:00
|
|
|
#include "AMDGPUSubtarget.h"
|
Sink all InitializePasses.h includes
This file lists every pass in LLVM, and is included by Pass.h, which is
very popular. Every time we add, remove, or rename a pass in LLVM, it
caused lots of recompilation.
I found this fact by looking at this table, which is sorted by the
number of times a file was changed over the last 100,000 git commits
multiplied by the number of object files that depend on it in the
current checkout:
recompiles touches affected_files header
342380 95 3604 llvm/include/llvm/ADT/STLExtras.h
314730 234 1345 llvm/include/llvm/InitializePasses.h
307036 118 2602 llvm/include/llvm/ADT/APInt.h
213049 59 3611 llvm/include/llvm/Support/MathExtras.h
170422 47 3626 llvm/include/llvm/Support/Compiler.h
162225 45 3605 llvm/include/llvm/ADT/Optional.h
158319 63 2513 llvm/include/llvm/ADT/Triple.h
140322 39 3598 llvm/include/llvm/ADT/StringRef.h
137647 59 2333 llvm/include/llvm/Support/Error.h
131619 73 1803 llvm/include/llvm/Support/FileSystem.h
Before this change, touching InitializePasses.h would cause 1345 files
to recompile. After this change, touching it only causes 550 compiles in
an incremental rebuild.
Reviewers: bkramer, asbirlea, bollu, jdoerfert
Differential Revision: https://reviews.llvm.org/D70211
2019-11-14 05:15:01 +08:00
|
|
|
#include "llvm/ADT/StringRef.h"
|
|
|
|
#include "llvm/ADT/StringSet.h"
|
2017-08-12 00:42:09 +08:00
|
|
|
#include "llvm/Analysis/AliasAnalysis.h"
|
|
|
|
#include "llvm/Analysis/Loads.h"
|
|
|
|
#include "llvm/IR/Constants.h"
|
|
|
|
#include "llvm/IR/DerivedTypes.h"
|
Sink all InitializePasses.h includes
This file lists every pass in LLVM, and is included by Pass.h, which is
very popular. Every time we add, remove, or rename a pass in LLVM, it
caused lots of recompilation.
I found this fact by looking at this table, which is sorted by the
number of times a file was changed over the last 100,000 git commits
multiplied by the number of object files that depend on it in the
current checkout:
recompiles touches affected_files header
342380 95 3604 llvm/include/llvm/ADT/STLExtras.h
314730 234 1345 llvm/include/llvm/InitializePasses.h
307036 118 2602 llvm/include/llvm/ADT/APInt.h
213049 59 3611 llvm/include/llvm/Support/MathExtras.h
170422 47 3626 llvm/include/llvm/Support/Compiler.h
162225 45 3605 llvm/include/llvm/ADT/Optional.h
158319 63 2513 llvm/include/llvm/ADT/Triple.h
140322 39 3598 llvm/include/llvm/ADT/StringRef.h
137647 59 2333 llvm/include/llvm/Support/Error.h
131619 73 1803 llvm/include/llvm/Support/FileSystem.h
Before this change, touching InitializePasses.h would cause 1345 files
to recompile. After this change, touching it only causes 550 compiles in
an incremental rebuild.
Reviewers: bkramer, asbirlea, bollu, jdoerfert
Differential Revision: https://reviews.llvm.org/D70211
2019-11-14 05:15:01 +08:00
|
|
|
#include "llvm/IR/Function.h"
|
|
|
|
#include "llvm/IR/IRBuilder.h"
|
2017-08-12 00:42:09 +08:00
|
|
|
#include "llvm/IR/Instructions.h"
|
2019-06-18 01:57:50 +08:00
|
|
|
#include "llvm/IR/Intrinsics.h"
|
2017-08-12 00:42:09 +08:00
|
|
|
#include "llvm/IR/LLVMContext.h"
|
|
|
|
#include "llvm/IR/Module.h"
|
|
|
|
#include "llvm/IR/ValueSymbolTable.h"
|
Sink all InitializePasses.h includes
This file lists every pass in LLVM, and is included by Pass.h, which is
very popular. Every time we add, remove, or rename a pass in LLVM, it
caused lots of recompilation.
I found this fact by looking at this table, which is sorted by the
number of times a file was changed over the last 100,000 git commits
multiplied by the number of object files that depend on it in the
current checkout:
recompiles touches affected_files header
342380 95 3604 llvm/include/llvm/ADT/STLExtras.h
314730 234 1345 llvm/include/llvm/InitializePasses.h
307036 118 2602 llvm/include/llvm/ADT/APInt.h
213049 59 3611 llvm/include/llvm/Support/MathExtras.h
170422 47 3626 llvm/include/llvm/Support/Compiler.h
162225 45 3605 llvm/include/llvm/ADT/Optional.h
158319 63 2513 llvm/include/llvm/ADT/Triple.h
140322 39 3598 llvm/include/llvm/ADT/StringRef.h
137647 59 2333 llvm/include/llvm/Support/Error.h
131619 73 1803 llvm/include/llvm/Support/FileSystem.h
Before this change, touching InitializePasses.h would cause 1345 files
to recompile. After this change, touching it only causes 550 compiles in
an incremental rebuild.
Reviewers: bkramer, asbirlea, bollu, jdoerfert
Differential Revision: https://reviews.llvm.org/D70211
2019-11-14 05:15:01 +08:00
|
|
|
#include "llvm/InitializePasses.h"
|
2017-08-12 00:42:09 +08:00
|
|
|
#include "llvm/Support/Debug.h"
|
2019-10-10 04:00:43 +08:00
|
|
|
#include "llvm/Support/MathExtras.h"
|
2017-08-12 00:42:09 +08:00
|
|
|
#include "llvm/Support/raw_ostream.h"
|
2019-06-18 01:57:50 +08:00
|
|
|
#include "llvm/Target/TargetMachine.h"
|
2017-08-12 00:42:09 +08:00
|
|
|
#include <cmath>
|
Sink all InitializePasses.h includes
This file lists every pass in LLVM, and is included by Pass.h, which is
very popular. Every time we add, remove, or rename a pass in LLVM, it
caused lots of recompilation.
I found this fact by looking at this table, which is sorted by the
number of times a file was changed over the last 100,000 git commits
multiplied by the number of object files that depend on it in the
current checkout:
recompiles touches affected_files header
342380 95 3604 llvm/include/llvm/ADT/STLExtras.h
314730 234 1345 llvm/include/llvm/InitializePasses.h
307036 118 2602 llvm/include/llvm/ADT/APInt.h
213049 59 3611 llvm/include/llvm/Support/MathExtras.h
170422 47 3626 llvm/include/llvm/Support/Compiler.h
162225 45 3605 llvm/include/llvm/ADT/Optional.h
158319 63 2513 llvm/include/llvm/ADT/Triple.h
140322 39 3598 llvm/include/llvm/ADT/StringRef.h
137647 59 2333 llvm/include/llvm/Support/Error.h
131619 73 1803 llvm/include/llvm/Support/FileSystem.h
Before this change, touching InitializePasses.h would cause 1345 files
to recompile. After this change, touching it only causes 550 compiles in
an incremental rebuild.
Reviewers: bkramer, asbirlea, bollu, jdoerfert
Differential Revision: https://reviews.llvm.org/D70211
2019-11-14 05:15:01 +08:00
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
#define DEBUG_TYPE "amdgpu-simplifylib"
|
2017-08-12 00:42:09 +08:00
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
|
|
|
|
static cl::opt<bool> EnablePreLink("amdgpu-prelink",
|
|
|
|
cl::desc("Enable pre-link mode optimizations"),
|
|
|
|
cl::init(false),
|
|
|
|
cl::Hidden);
|
|
|
|
|
|
|
|
static cl::list<std::string> UseNative("amdgpu-use-native",
|
|
|
|
cl::desc("Comma separated list of functions to replace with native, or all"),
|
|
|
|
cl::CommaSeparated, cl::ValueOptional,
|
|
|
|
cl::Hidden);
|
|
|
|
|
2019-10-10 04:00:43 +08:00
|
|
|
#define MATH_PI numbers::pi
|
|
|
|
#define MATH_E numbers::e
|
|
|
|
#define MATH_SQRT2 numbers::sqrt2
|
|
|
|
#define MATH_SQRT1_2 numbers::inv_sqrt2
|
2017-08-12 00:42:09 +08:00
|
|
|
|
|
|
|
namespace llvm {
|
|
|
|
|
|
|
|
class AMDGPULibCalls {
|
|
|
|
private:
|
|
|
|
|
|
|
|
typedef llvm::AMDGPULibFunc FuncInfo;
|
|
|
|
|
2019-06-18 01:57:50 +08:00
|
|
|
const TargetMachine *TM;
|
|
|
|
|
2017-08-12 00:42:09 +08:00
|
|
|
// -fuse-native.
|
|
|
|
bool AllNative = false;
|
|
|
|
|
|
|
|
bool useNativeFunc(const StringRef F) const;
|
|
|
|
|
|
|
|
// Return a pointer (pointer expr) to the function if function defintion with
|
|
|
|
// "FuncName" exists. It may create a new function prototype in pre-link mode.
|
[opaque pointer types] Add a FunctionCallee wrapper type, and use it.
Recommit r352791 after tweaking DerivedTypes.h slightly, so that gcc
doesn't choke on it, hopefully.
Original Message:
The FunctionCallee type is effectively a {FunctionType*,Value*} pair,
and is a useful convenience to enable code to continue passing the
result of getOrInsertFunction() through to EmitCall, even once pointer
types lose their pointee-type.
Then:
- update the CallInst/InvokeInst instruction creation functions to
take a Callee,
- modify getOrInsertFunction to return FunctionCallee, and
- update all callers appropriately.
One area of particular note is the change to the sanitizer
code. Previously, they had been casting the result of
`getOrInsertFunction` to a `Function*` via
`checkSanitizerInterfaceFunction`, and storing that. That would report
an error if someone had already inserted a function declaraction with
a mismatching signature.
However, in general, LLVM allows for such mismatches, as
`getOrInsertFunction` will automatically insert a bitcast if
needed. As part of this cleanup, cause the sanitizer code to do the
same. (It will call its functions using the expected signature,
however they may have been declared.)
Finally, in a small number of locations, callers of
`getOrInsertFunction` actually were expecting/requiring that a brand
new function was being created. In such cases, I've switched them to
Function::Create instead.
Differential Revision: https://reviews.llvm.org/D57315
llvm-svn: 352827
2019-02-01 10:28:03 +08:00
|
|
|
FunctionCallee getFunction(Module *M, const FuncInfo &fInfo);
|
2017-08-12 00:42:09 +08:00
|
|
|
|
|
|
|
// Replace a normal function with its native version.
|
|
|
|
bool replaceWithNative(CallInst *CI, const FuncInfo &FInfo);
|
|
|
|
|
|
|
|
bool parseFunctionName(const StringRef& FMangledName,
|
|
|
|
FuncInfo *FInfo=nullptr /*out*/);
|
|
|
|
|
|
|
|
bool TDOFold(CallInst *CI, const FuncInfo &FInfo);
|
|
|
|
|
|
|
|
/* Specialized optimizations */
|
|
|
|
|
|
|
|
// recip (half or native)
|
|
|
|
bool fold_recip(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
|
|
|
|
|
|
|
|
// divide (half or native)
|
|
|
|
bool fold_divide(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
|
|
|
|
|
|
|
|
// pow/powr/pown
|
|
|
|
bool fold_pow(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
|
|
|
|
|
|
|
|
// rootn
|
|
|
|
bool fold_rootn(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
|
|
|
|
|
|
|
|
// fma/mad
|
|
|
|
bool fold_fma_mad(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
|
|
|
|
|
|
|
|
// -fuse-native for sincos
|
|
|
|
bool sincosUseNative(CallInst *aCI, const FuncInfo &FInfo);
|
|
|
|
|
|
|
|
// evaluate calls if calls' arguments are constants.
|
|
|
|
bool evaluateScalarMathFunc(FuncInfo &FInfo, double& Res0,
|
|
|
|
double& Res1, Constant *copr0, Constant *copr1, Constant *copr2);
|
|
|
|
bool evaluateCall(CallInst *aCI, FuncInfo &FInfo);
|
|
|
|
|
|
|
|
// exp
|
|
|
|
bool fold_exp(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
|
|
|
|
|
|
|
|
// exp2
|
|
|
|
bool fold_exp2(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
|
|
|
|
|
|
|
|
// exp10
|
|
|
|
bool fold_exp10(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
|
|
|
|
|
|
|
|
// log
|
|
|
|
bool fold_log(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
|
|
|
|
|
|
|
|
// log2
|
|
|
|
bool fold_log2(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
|
|
|
|
|
|
|
|
// log10
|
|
|
|
bool fold_log10(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
|
|
|
|
|
|
|
|
// sqrt
|
|
|
|
bool fold_sqrt(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
|
|
|
|
|
|
|
|
// sin/cos
|
|
|
|
bool fold_sincos(CallInst * CI, IRBuilder<> &B, AliasAnalysis * AA);
|
|
|
|
|
2017-09-06 08:30:27 +08:00
|
|
|
// __read_pipe/__write_pipe
|
|
|
|
bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B, FuncInfo &FInfo);
|
|
|
|
|
2019-06-18 01:57:50 +08:00
|
|
|
// llvm.amdgcn.wavefrontsize
|
|
|
|
bool fold_wavefrontsize(CallInst *CI, IRBuilder<> &B);
|
|
|
|
|
2017-08-12 00:42:09 +08:00
|
|
|
// Get insertion point at entry.
|
|
|
|
BasicBlock::iterator getEntryIns(CallInst * UI);
|
|
|
|
// Insert an Alloc instruction.
|
|
|
|
AllocaInst* insertAlloca(CallInst * UI, IRBuilder<> &B, const char *prefix);
|
|
|
|
// Get a scalar native builtin signle argument FP function
|
[opaque pointer types] Add a FunctionCallee wrapper type, and use it.
Recommit r352791 after tweaking DerivedTypes.h slightly, so that gcc
doesn't choke on it, hopefully.
Original Message:
The FunctionCallee type is effectively a {FunctionType*,Value*} pair,
and is a useful convenience to enable code to continue passing the
result of getOrInsertFunction() through to EmitCall, even once pointer
types lose their pointee-type.
Then:
- update the CallInst/InvokeInst instruction creation functions to
take a Callee,
- modify getOrInsertFunction to return FunctionCallee, and
- update all callers appropriately.
One area of particular note is the change to the sanitizer
code. Previously, they had been casting the result of
`getOrInsertFunction` to a `Function*` via
`checkSanitizerInterfaceFunction`, and storing that. That would report
an error if someone had already inserted a function declaraction with
a mismatching signature.
However, in general, LLVM allows for such mismatches, as
`getOrInsertFunction` will automatically insert a bitcast if
needed. As part of this cleanup, cause the sanitizer code to do the
same. (It will call its functions using the expected signature,
however they may have been declared.)
Finally, in a small number of locations, callers of
`getOrInsertFunction` actually were expecting/requiring that a brand
new function was being created. In such cases, I've switched them to
Function::Create instead.
Differential Revision: https://reviews.llvm.org/D57315
llvm-svn: 352827
2019-02-01 10:28:03 +08:00
|
|
|
FunctionCallee getNativeFunction(Module *M, const FuncInfo &FInfo);
|
2017-08-12 00:42:09 +08:00
|
|
|
|
|
|
|
protected:
|
|
|
|
CallInst *CI;
|
|
|
|
|
|
|
|
bool isUnsafeMath(const CallInst *CI) const;
|
|
|
|
|
|
|
|
void replaceCall(Value *With) {
|
|
|
|
CI->replaceAllUsesWith(With);
|
|
|
|
CI->eraseFromParent();
|
|
|
|
}
|
|
|
|
|
|
|
|
public:
|
2019-06-18 01:57:50 +08:00
|
|
|
AMDGPULibCalls(const TargetMachine *TM_ = nullptr) : TM(TM_) {}
|
|
|
|
|
2017-08-12 00:42:09 +08:00
|
|
|
bool fold(CallInst *CI, AliasAnalysis *AA = nullptr);
|
|
|
|
|
|
|
|
void initNativeFuncs();
|
|
|
|
|
|
|
|
// Replace a normal math function call with that native version
|
|
|
|
bool useNative(CallInst *CI);
|
|
|
|
};
|
|
|
|
|
|
|
|
} // end llvm namespace
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
|
|
|
class AMDGPUSimplifyLibCalls : public FunctionPass {
|
|
|
|
|
2019-06-18 01:57:50 +08:00
|
|
|
AMDGPULibCalls Simplifier;
|
|
|
|
|
2017-08-12 00:42:09 +08:00
|
|
|
public:
|
|
|
|
static char ID; // Pass identification
|
|
|
|
|
2019-12-05 17:45:32 +08:00
|
|
|
AMDGPUSimplifyLibCalls(const TargetMachine *TM = nullptr)
|
|
|
|
: FunctionPass(ID), Simplifier(TM) {
|
2017-08-12 00:42:09 +08:00
|
|
|
initializeAMDGPUSimplifyLibCallsPass(*PassRegistry::getPassRegistry());
|
|
|
|
}
|
|
|
|
|
|
|
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
|
|
|
AU.addRequired<AAResultsWrapperPass>();
|
|
|
|
}
|
|
|
|
|
|
|
|
bool runOnFunction(Function &M) override;
|
|
|
|
};
|
|
|
|
|
|
|
|
class AMDGPUUseNativeCalls : public FunctionPass {
|
|
|
|
|
|
|
|
AMDGPULibCalls Simplifier;
|
|
|
|
|
|
|
|
public:
|
|
|
|
static char ID; // Pass identification
|
|
|
|
|
|
|
|
AMDGPUUseNativeCalls() : FunctionPass(ID) {
|
|
|
|
initializeAMDGPUUseNativeCallsPass(*PassRegistry::getPassRegistry());
|
|
|
|
Simplifier.initNativeFuncs();
|
|
|
|
}
|
|
|
|
|
|
|
|
bool runOnFunction(Function &F) override;
|
|
|
|
};
|
|
|
|
|
|
|
|
} // end anonymous namespace.
|
|
|
|
|
|
|
|
char AMDGPUSimplifyLibCalls::ID = 0;
|
|
|
|
char AMDGPUUseNativeCalls::ID = 0;
|
|
|
|
|
|
|
|
INITIALIZE_PASS_BEGIN(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib",
|
|
|
|
"Simplify well-known AMD library calls", false, false)
|
|
|
|
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
|
|
|
|
INITIALIZE_PASS_END(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib",
|
|
|
|
"Simplify well-known AMD library calls", false, false)
|
|
|
|
|
|
|
|
INITIALIZE_PASS(AMDGPUUseNativeCalls, "amdgpu-usenative",
|
|
|
|
"Replace builtin math calls with that native versions.",
|
|
|
|
false, false)
|
|
|
|
|
|
|
|
template <typename IRB>
|
[opaque pointer types] Add a FunctionCallee wrapper type, and use it.
Recommit r352791 after tweaking DerivedTypes.h slightly, so that gcc
doesn't choke on it, hopefully.
Original Message:
The FunctionCallee type is effectively a {FunctionType*,Value*} pair,
and is a useful convenience to enable code to continue passing the
result of getOrInsertFunction() through to EmitCall, even once pointer
types lose their pointee-type.
Then:
- update the CallInst/InvokeInst instruction creation functions to
take a Callee,
- modify getOrInsertFunction to return FunctionCallee, and
- update all callers appropriately.
One area of particular note is the change to the sanitizer
code. Previously, they had been casting the result of
`getOrInsertFunction` to a `Function*` via
`checkSanitizerInterfaceFunction`, and storing that. That would report
an error if someone had already inserted a function declaraction with
a mismatching signature.
However, in general, LLVM allows for such mismatches, as
`getOrInsertFunction` will automatically insert a bitcast if
needed. As part of this cleanup, cause the sanitizer code to do the
same. (It will call its functions using the expected signature,
however they may have been declared.)
Finally, in a small number of locations, callers of
`getOrInsertFunction` actually were expecting/requiring that a brand
new function was being created. In such cases, I've switched them to
Function::Create instead.
Differential Revision: https://reviews.llvm.org/D57315
llvm-svn: 352827
2019-02-01 10:28:03 +08:00
|
|
|
static CallInst *CreateCallEx(IRB &B, FunctionCallee Callee, Value *Arg,
|
2017-11-24 22:55:41 +08:00
|
|
|
const Twine &Name = "") {
|
2017-08-12 00:42:09 +08:00
|
|
|
CallInst *R = B.CreateCall(Callee, Arg, Name);
|
[opaque pointer types] Add a FunctionCallee wrapper type, and use it.
Recommit r352791 after tweaking DerivedTypes.h slightly, so that gcc
doesn't choke on it, hopefully.
Original Message:
The FunctionCallee type is effectively a {FunctionType*,Value*} pair,
and is a useful convenience to enable code to continue passing the
result of getOrInsertFunction() through to EmitCall, even once pointer
types lose their pointee-type.
Then:
- update the CallInst/InvokeInst instruction creation functions to
take a Callee,
- modify getOrInsertFunction to return FunctionCallee, and
- update all callers appropriately.
One area of particular note is the change to the sanitizer
code. Previously, they had been casting the result of
`getOrInsertFunction` to a `Function*` via
`checkSanitizerInterfaceFunction`, and storing that. That would report
an error if someone had already inserted a function declaraction with
a mismatching signature.
However, in general, LLVM allows for such mismatches, as
`getOrInsertFunction` will automatically insert a bitcast if
needed. As part of this cleanup, cause the sanitizer code to do the
same. (It will call its functions using the expected signature,
however they may have been declared.)
Finally, in a small number of locations, callers of
`getOrInsertFunction` actually were expecting/requiring that a brand
new function was being created. In such cases, I've switched them to
Function::Create instead.
Differential Revision: https://reviews.llvm.org/D57315
llvm-svn: 352827
2019-02-01 10:28:03 +08:00
|
|
|
if (Function *F = dyn_cast<Function>(Callee.getCallee()))
|
2017-08-12 00:42:09 +08:00
|
|
|
R->setCallingConv(F->getCallingConv());
|
|
|
|
return R;
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename IRB>
|
[opaque pointer types] Add a FunctionCallee wrapper type, and use it.
Recommit r352791 after tweaking DerivedTypes.h slightly, so that gcc
doesn't choke on it, hopefully.
Original Message:
The FunctionCallee type is effectively a {FunctionType*,Value*} pair,
and is a useful convenience to enable code to continue passing the
result of getOrInsertFunction() through to EmitCall, even once pointer
types lose their pointee-type.
Then:
- update the CallInst/InvokeInst instruction creation functions to
take a Callee,
- modify getOrInsertFunction to return FunctionCallee, and
- update all callers appropriately.
One area of particular note is the change to the sanitizer
code. Previously, they had been casting the result of
`getOrInsertFunction` to a `Function*` via
`checkSanitizerInterfaceFunction`, and storing that. That would report
an error if someone had already inserted a function declaraction with
a mismatching signature.
However, in general, LLVM allows for such mismatches, as
`getOrInsertFunction` will automatically insert a bitcast if
needed. As part of this cleanup, cause the sanitizer code to do the
same. (It will call its functions using the expected signature,
however they may have been declared.)
Finally, in a small number of locations, callers of
`getOrInsertFunction` actually were expecting/requiring that a brand
new function was being created. In such cases, I've switched them to
Function::Create instead.
Differential Revision: https://reviews.llvm.org/D57315
llvm-svn: 352827
2019-02-01 10:28:03 +08:00
|
|
|
static CallInst *CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1,
|
|
|
|
Value *Arg2, const Twine &Name = "") {
|
2017-08-12 00:42:09 +08:00
|
|
|
CallInst *R = B.CreateCall(Callee, {Arg1, Arg2}, Name);
|
[opaque pointer types] Add a FunctionCallee wrapper type, and use it.
Recommit r352791 after tweaking DerivedTypes.h slightly, so that gcc
doesn't choke on it, hopefully.
Original Message:
The FunctionCallee type is effectively a {FunctionType*,Value*} pair,
and is a useful convenience to enable code to continue passing the
result of getOrInsertFunction() through to EmitCall, even once pointer
types lose their pointee-type.
Then:
- update the CallInst/InvokeInst instruction creation functions to
take a Callee,
- modify getOrInsertFunction to return FunctionCallee, and
- update all callers appropriately.
One area of particular note is the change to the sanitizer
code. Previously, they had been casting the result of
`getOrInsertFunction` to a `Function*` via
`checkSanitizerInterfaceFunction`, and storing that. That would report
an error if someone had already inserted a function declaraction with
a mismatching signature.
However, in general, LLVM allows for such mismatches, as
`getOrInsertFunction` will automatically insert a bitcast if
needed. As part of this cleanup, cause the sanitizer code to do the
same. (It will call its functions using the expected signature,
however they may have been declared.)
Finally, in a small number of locations, callers of
`getOrInsertFunction` actually were expecting/requiring that a brand
new function was being created. In such cases, I've switched them to
Function::Create instead.
Differential Revision: https://reviews.llvm.org/D57315
llvm-svn: 352827
2019-02-01 10:28:03 +08:00
|
|
|
if (Function *F = dyn_cast<Function>(Callee.getCallee()))
|
2017-08-12 00:42:09 +08:00
|
|
|
R->setCallingConv(F->getCallingConv());
|
|
|
|
return R;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Data structures for table-driven optimizations.
|
|
|
|
// FuncTbl works for both f32 and f64 functions with 1 input argument
|
|
|
|
|
|
|
|
struct TableEntry {
|
|
|
|
double result;
|
|
|
|
double input;
|
|
|
|
};
|
|
|
|
|
|
|
|
/* a list of {result, input} */
|
|
|
|
static const TableEntry tbl_acos[] = {
|
2019-10-10 04:00:43 +08:00
|
|
|
{MATH_PI / 2.0, 0.0},
|
|
|
|
{MATH_PI / 2.0, -0.0},
|
2017-08-12 00:42:09 +08:00
|
|
|
{0.0, 1.0},
|
|
|
|
{MATH_PI, -1.0}
|
|
|
|
};
|
|
|
|
static const TableEntry tbl_acosh[] = {
|
|
|
|
{0.0, 1.0}
|
|
|
|
};
|
|
|
|
static const TableEntry tbl_acospi[] = {
|
|
|
|
{0.5, 0.0},
|
|
|
|
{0.5, -0.0},
|
|
|
|
{0.0, 1.0},
|
|
|
|
{1.0, -1.0}
|
|
|
|
};
|
|
|
|
static const TableEntry tbl_asin[] = {
|
|
|
|
{0.0, 0.0},
|
|
|
|
{-0.0, -0.0},
|
2019-10-10 04:00:43 +08:00
|
|
|
{MATH_PI / 2.0, 1.0},
|
|
|
|
{-MATH_PI / 2.0, -1.0}
|
2017-08-12 00:42:09 +08:00
|
|
|
};
|
|
|
|
static const TableEntry tbl_asinh[] = {
|
|
|
|
{0.0, 0.0},
|
|
|
|
{-0.0, -0.0}
|
|
|
|
};
|
|
|
|
static const TableEntry tbl_asinpi[] = {
|
|
|
|
{0.0, 0.0},
|
|
|
|
{-0.0, -0.0},
|
|
|
|
{0.5, 1.0},
|
|
|
|
{-0.5, -1.0}
|
|
|
|
};
|
|
|
|
static const TableEntry tbl_atan[] = {
|
|
|
|
{0.0, 0.0},
|
|
|
|
{-0.0, -0.0},
|
2019-10-10 04:00:43 +08:00
|
|
|
{MATH_PI / 4.0, 1.0},
|
|
|
|
{-MATH_PI / 4.0, -1.0}
|
2017-08-12 00:42:09 +08:00
|
|
|
};
|
|
|
|
static const TableEntry tbl_atanh[] = {
|
|
|
|
{0.0, 0.0},
|
|
|
|
{-0.0, -0.0}
|
|
|
|
};
|
|
|
|
static const TableEntry tbl_atanpi[] = {
|
|
|
|
{0.0, 0.0},
|
|
|
|
{-0.0, -0.0},
|
|
|
|
{0.25, 1.0},
|
|
|
|
{-0.25, -1.0}
|
|
|
|
};
|
|
|
|
static const TableEntry tbl_cbrt[] = {
|
|
|
|
{0.0, 0.0},
|
|
|
|
{-0.0, -0.0},
|
|
|
|
{1.0, 1.0},
|
|
|
|
{-1.0, -1.0},
|
|
|
|
};
|
|
|
|
static const TableEntry tbl_cos[] = {
|
|
|
|
{1.0, 0.0},
|
|
|
|
{1.0, -0.0}
|
|
|
|
};
|
|
|
|
static const TableEntry tbl_cosh[] = {
|
|
|
|
{1.0, 0.0},
|
|
|
|
{1.0, -0.0}
|
|
|
|
};
|
|
|
|
static const TableEntry tbl_cospi[] = {
|
|
|
|
{1.0, 0.0},
|
|
|
|
{1.0, -0.0}
|
|
|
|
};
|
|
|
|
static const TableEntry tbl_erfc[] = {
|
|
|
|
{1.0, 0.0},
|
|
|
|
{1.0, -0.0}
|
|
|
|
};
|
|
|
|
static const TableEntry tbl_erf[] = {
|
|
|
|
{0.0, 0.0},
|
|
|
|
{-0.0, -0.0}
|
|
|
|
};
|
|
|
|
static const TableEntry tbl_exp[] = {
|
|
|
|
{1.0, 0.0},
|
|
|
|
{1.0, -0.0},
|
|
|
|
{MATH_E, 1.0}
|
|
|
|
};
|
|
|
|
static const TableEntry tbl_exp2[] = {
|
|
|
|
{1.0, 0.0},
|
|
|
|
{1.0, -0.0},
|
|
|
|
{2.0, 1.0}
|
|
|
|
};
|
|
|
|
static const TableEntry tbl_exp10[] = {
|
|
|
|
{1.0, 0.0},
|
|
|
|
{1.0, -0.0},
|
|
|
|
{10.0, 1.0}
|
|
|
|
};
|
|
|
|
static const TableEntry tbl_expm1[] = {
|
|
|
|
{0.0, 0.0},
|
|
|
|
{-0.0, -0.0}
|
|
|
|
};
|
|
|
|
static const TableEntry tbl_log[] = {
|
|
|
|
{0.0, 1.0},
|
|
|
|
{1.0, MATH_E}
|
|
|
|
};
|
|
|
|
static const TableEntry tbl_log2[] = {
|
|
|
|
{0.0, 1.0},
|
|
|
|
{1.0, 2.0}
|
|
|
|
};
|
|
|
|
static const TableEntry tbl_log10[] = {
|
|
|
|
{0.0, 1.0},
|
|
|
|
{1.0, 10.0}
|
|
|
|
};
|
|
|
|
static const TableEntry tbl_rsqrt[] = {
|
|
|
|
{1.0, 1.0},
|
2019-10-10 04:00:43 +08:00
|
|
|
{MATH_SQRT1_2, 2.0}
|
2017-08-12 00:42:09 +08:00
|
|
|
};
|
|
|
|
static const TableEntry tbl_sin[] = {
|
|
|
|
{0.0, 0.0},
|
|
|
|
{-0.0, -0.0}
|
|
|
|
};
|
|
|
|
static const TableEntry tbl_sinh[] = {
|
|
|
|
{0.0, 0.0},
|
|
|
|
{-0.0, -0.0}
|
|
|
|
};
|
|
|
|
static const TableEntry tbl_sinpi[] = {
|
|
|
|
{0.0, 0.0},
|
|
|
|
{-0.0, -0.0}
|
|
|
|
};
|
|
|
|
static const TableEntry tbl_sqrt[] = {
|
|
|
|
{0.0, 0.0},
|
|
|
|
{1.0, 1.0},
|
|
|
|
{MATH_SQRT2, 2.0}
|
|
|
|
};
|
|
|
|
static const TableEntry tbl_tan[] = {
|
|
|
|
{0.0, 0.0},
|
|
|
|
{-0.0, -0.0}
|
|
|
|
};
|
|
|
|
static const TableEntry tbl_tanh[] = {
|
|
|
|
{0.0, 0.0},
|
|
|
|
{-0.0, -0.0}
|
|
|
|
};
|
|
|
|
static const TableEntry tbl_tanpi[] = {
|
|
|
|
{0.0, 0.0},
|
|
|
|
{-0.0, -0.0}
|
|
|
|
};
|
|
|
|
static const TableEntry tbl_tgamma[] = {
|
|
|
|
{1.0, 1.0},
|
|
|
|
{1.0, 2.0},
|
|
|
|
{2.0, 3.0},
|
|
|
|
{6.0, 4.0}
|
|
|
|
};
|
|
|
|
|
|
|
|
static bool HasNative(AMDGPULibFunc::EFuncId id) {
|
|
|
|
switch(id) {
|
|
|
|
case AMDGPULibFunc::EI_DIVIDE:
|
|
|
|
case AMDGPULibFunc::EI_COS:
|
|
|
|
case AMDGPULibFunc::EI_EXP:
|
|
|
|
case AMDGPULibFunc::EI_EXP2:
|
|
|
|
case AMDGPULibFunc::EI_EXP10:
|
|
|
|
case AMDGPULibFunc::EI_LOG:
|
|
|
|
case AMDGPULibFunc::EI_LOG2:
|
|
|
|
case AMDGPULibFunc::EI_LOG10:
|
|
|
|
case AMDGPULibFunc::EI_POWR:
|
|
|
|
case AMDGPULibFunc::EI_RECIP:
|
|
|
|
case AMDGPULibFunc::EI_RSQRT:
|
|
|
|
case AMDGPULibFunc::EI_SIN:
|
|
|
|
case AMDGPULibFunc::EI_SINCOS:
|
|
|
|
case AMDGPULibFunc::EI_SQRT:
|
|
|
|
case AMDGPULibFunc::EI_TAN:
|
|
|
|
return true;
|
|
|
|
default:;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct TableRef {
|
|
|
|
size_t size;
|
|
|
|
const TableEntry *table; // variable size: from 0 to (size - 1)
|
|
|
|
|
|
|
|
TableRef() : size(0), table(nullptr) {}
|
|
|
|
|
|
|
|
template <size_t N>
|
|
|
|
TableRef(const TableEntry (&tbl)[N]) : size(N), table(&tbl[0]) {}
|
|
|
|
};
|
|
|
|
|
|
|
|
static TableRef getOptTable(AMDGPULibFunc::EFuncId id) {
|
|
|
|
switch(id) {
|
|
|
|
case AMDGPULibFunc::EI_ACOS: return TableRef(tbl_acos);
|
|
|
|
case AMDGPULibFunc::EI_ACOSH: return TableRef(tbl_acosh);
|
|
|
|
case AMDGPULibFunc::EI_ACOSPI: return TableRef(tbl_acospi);
|
|
|
|
case AMDGPULibFunc::EI_ASIN: return TableRef(tbl_asin);
|
|
|
|
case AMDGPULibFunc::EI_ASINH: return TableRef(tbl_asinh);
|
|
|
|
case AMDGPULibFunc::EI_ASINPI: return TableRef(tbl_asinpi);
|
|
|
|
case AMDGPULibFunc::EI_ATAN: return TableRef(tbl_atan);
|
|
|
|
case AMDGPULibFunc::EI_ATANH: return TableRef(tbl_atanh);
|
|
|
|
case AMDGPULibFunc::EI_ATANPI: return TableRef(tbl_atanpi);
|
|
|
|
case AMDGPULibFunc::EI_CBRT: return TableRef(tbl_cbrt);
|
|
|
|
case AMDGPULibFunc::EI_NCOS:
|
|
|
|
case AMDGPULibFunc::EI_COS: return TableRef(tbl_cos);
|
|
|
|
case AMDGPULibFunc::EI_COSH: return TableRef(tbl_cosh);
|
|
|
|
case AMDGPULibFunc::EI_COSPI: return TableRef(tbl_cospi);
|
|
|
|
case AMDGPULibFunc::EI_ERFC: return TableRef(tbl_erfc);
|
|
|
|
case AMDGPULibFunc::EI_ERF: return TableRef(tbl_erf);
|
|
|
|
case AMDGPULibFunc::EI_EXP: return TableRef(tbl_exp);
|
|
|
|
case AMDGPULibFunc::EI_NEXP2:
|
|
|
|
case AMDGPULibFunc::EI_EXP2: return TableRef(tbl_exp2);
|
|
|
|
case AMDGPULibFunc::EI_EXP10: return TableRef(tbl_exp10);
|
|
|
|
case AMDGPULibFunc::EI_EXPM1: return TableRef(tbl_expm1);
|
|
|
|
case AMDGPULibFunc::EI_LOG: return TableRef(tbl_log);
|
|
|
|
case AMDGPULibFunc::EI_NLOG2:
|
|
|
|
case AMDGPULibFunc::EI_LOG2: return TableRef(tbl_log2);
|
|
|
|
case AMDGPULibFunc::EI_LOG10: return TableRef(tbl_log10);
|
|
|
|
case AMDGPULibFunc::EI_NRSQRT:
|
|
|
|
case AMDGPULibFunc::EI_RSQRT: return TableRef(tbl_rsqrt);
|
|
|
|
case AMDGPULibFunc::EI_NSIN:
|
|
|
|
case AMDGPULibFunc::EI_SIN: return TableRef(tbl_sin);
|
|
|
|
case AMDGPULibFunc::EI_SINH: return TableRef(tbl_sinh);
|
|
|
|
case AMDGPULibFunc::EI_SINPI: return TableRef(tbl_sinpi);
|
|
|
|
case AMDGPULibFunc::EI_NSQRT:
|
|
|
|
case AMDGPULibFunc::EI_SQRT: return TableRef(tbl_sqrt);
|
|
|
|
case AMDGPULibFunc::EI_TAN: return TableRef(tbl_tan);
|
|
|
|
case AMDGPULibFunc::EI_TANH: return TableRef(tbl_tanh);
|
|
|
|
case AMDGPULibFunc::EI_TANPI: return TableRef(tbl_tanpi);
|
|
|
|
case AMDGPULibFunc::EI_TGAMMA: return TableRef(tbl_tgamma);
|
|
|
|
default:;
|
|
|
|
}
|
|
|
|
return TableRef();
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int getVecSize(const AMDGPULibFunc& FInfo) {
|
2017-09-06 08:30:27 +08:00
|
|
|
return FInfo.getLeads()[0].VectorSize;
|
2017-08-12 00:42:09 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline AMDGPULibFunc::EType getArgType(const AMDGPULibFunc& FInfo) {
|
2017-09-06 08:30:27 +08:00
|
|
|
return (AMDGPULibFunc::EType)FInfo.getLeads()[0].ArgType;
|
2017-08-12 00:42:09 +08:00
|
|
|
}
|
|
|
|
|
[opaque pointer types] Add a FunctionCallee wrapper type, and use it.
Recommit r352791 after tweaking DerivedTypes.h slightly, so that gcc
doesn't choke on it, hopefully.
Original Message:
The FunctionCallee type is effectively a {FunctionType*,Value*} pair,
and is a useful convenience to enable code to continue passing the
result of getOrInsertFunction() through to EmitCall, even once pointer
types lose their pointee-type.
Then:
- update the CallInst/InvokeInst instruction creation functions to
take a Callee,
- modify getOrInsertFunction to return FunctionCallee, and
- update all callers appropriately.
One area of particular note is the change to the sanitizer
code. Previously, they had been casting the result of
`getOrInsertFunction` to a `Function*` via
`checkSanitizerInterfaceFunction`, and storing that. That would report
an error if someone had already inserted a function declaraction with
a mismatching signature.
However, in general, LLVM allows for such mismatches, as
`getOrInsertFunction` will automatically insert a bitcast if
needed. As part of this cleanup, cause the sanitizer code to do the
same. (It will call its functions using the expected signature,
however they may have been declared.)
Finally, in a small number of locations, callers of
`getOrInsertFunction` actually were expecting/requiring that a brand
new function was being created. In such cases, I've switched them to
Function::Create instead.
Differential Revision: https://reviews.llvm.org/D57315
llvm-svn: 352827
2019-02-01 10:28:03 +08:00
|
|
|
FunctionCallee AMDGPULibCalls::getFunction(Module *M, const FuncInfo &fInfo) {
|
2017-08-12 00:42:09 +08:00
|
|
|
// If we are doing PreLinkOpt, the function is external. So it is safe to
|
|
|
|
// use getOrInsertFunction() at this stage.
|
|
|
|
|
|
|
|
return EnablePreLink ? AMDGPULibFunc::getOrInsertFunction(M, fInfo)
|
|
|
|
: AMDGPULibFunc::getFunction(M, fInfo);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool AMDGPULibCalls::parseFunctionName(const StringRef& FMangledName,
|
|
|
|
FuncInfo *FInfo) {
|
|
|
|
return AMDGPULibFunc::parse(FMangledName, *FInfo);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool AMDGPULibCalls::isUnsafeMath(const CallInst *CI) const {
|
|
|
|
if (auto Op = dyn_cast<FPMathOperator>(CI))
|
[IR] redefine 'UnsafeAlgebra' / 'reassoc' fast-math-flags and add 'trans' fast-math-flag
As discussed on llvm-dev:
http://lists.llvm.org/pipermail/llvm-dev/2016-November/107104.html
and again more recently:
http://lists.llvm.org/pipermail/llvm-dev/2017-October/118118.html
...this is a step in cleaning up our fast-math-flags implementation in IR to better match
the capabilities of both clang's user-visible flags and the backend's flags for SDNode.
As proposed in the above threads, we're replacing the 'UnsafeAlgebra' bit (which had the
'umbrella' meaning that all flags are set) with a new bit that only applies to algebraic
reassociation - 'AllowReassoc'.
We're also adding a bit to allow approximations for library functions called 'ApproxFunc'
(this was initially proposed as 'libm' or similar).
...and we're out of bits. 7 bits ought to be enough for anyone, right? :) FWIW, I did
look at getting this out of SubclassOptionalData via SubclassData (spacious 16-bits),
but that's apparently already used for other purposes. Also, I don't think we can just
add a field to FPMathOperator because Operator is not intended to be instantiated.
We'll defer movement of FMF to another day.
We keep the 'fast' keyword. I thought about removing that, but seeing IR like this:
%f.fast = fadd reassoc nnan ninf nsz arcp contract afn float %op1, %op2
...made me think we want to keep the shortcut synonym.
Finally, this change is binary incompatible with existing IR as seen in the
compatibility tests. This statement:
"Newer releases can ignore features from older releases, but they cannot miscompile
them. For example, if nsw is ever replaced with something else, dropping it would be
a valid way to upgrade the IR."
( http://llvm.org/docs/DeveloperPolicy.html#ir-backwards-compatibility )
...provides the flexibility we want to make this change without requiring a new IR
version. Ie, we're not loosening the FP strictness of existing IR. At worst, we will
fail to optimize some previously 'fast' code because it's no longer recognized as
'fast'. This should get fixed as we audit/squash all of the uses of 'isFast()'.
Note: an inter-dependent clang commit to use the new API name should closely follow
commit.
Differential Revision: https://reviews.llvm.org/D39304
llvm-svn: 317488
2017-11-07 00:27:15 +08:00
|
|
|
if (Op->isFast())
|
2017-08-12 00:42:09 +08:00
|
|
|
return true;
|
|
|
|
const Function *F = CI->getParent()->getParent();
|
|
|
|
Attribute Attr = F->getFnAttribute("unsafe-fp-math");
|
|
|
|
return Attr.getValueAsString() == "true";
|
|
|
|
}
|
|
|
|
|
|
|
|
bool AMDGPULibCalls::useNativeFunc(const StringRef F) const {
|
2020-07-24 14:13:44 +08:00
|
|
|
return AllNative || llvm::is_contained(UseNative, F);
|
2017-08-12 00:42:09 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void AMDGPULibCalls::initNativeFuncs() {
|
|
|
|
AllNative = useNativeFunc("all") ||
|
|
|
|
(UseNative.getNumOccurrences() && UseNative.size() == 1 &&
|
|
|
|
UseNative.begin()->empty());
|
|
|
|
}
|
|
|
|
|
|
|
|
bool AMDGPULibCalls::sincosUseNative(CallInst *aCI, const FuncInfo &FInfo) {
|
|
|
|
bool native_sin = useNativeFunc("sin");
|
|
|
|
bool native_cos = useNativeFunc("cos");
|
|
|
|
|
|
|
|
if (native_sin && native_cos) {
|
|
|
|
Module *M = aCI->getModule();
|
|
|
|
Value *opr0 = aCI->getArgOperand(0);
|
|
|
|
|
|
|
|
AMDGPULibFunc nf;
|
2017-09-06 08:30:27 +08:00
|
|
|
nf.getLeads()[0].ArgType = FInfo.getLeads()[0].ArgType;
|
|
|
|
nf.getLeads()[0].VectorSize = FInfo.getLeads()[0].VectorSize;
|
2017-08-12 00:42:09 +08:00
|
|
|
|
|
|
|
nf.setPrefix(AMDGPULibFunc::NATIVE);
|
|
|
|
nf.setId(AMDGPULibFunc::EI_SIN);
|
[opaque pointer types] Add a FunctionCallee wrapper type, and use it.
Recommit r352791 after tweaking DerivedTypes.h slightly, so that gcc
doesn't choke on it, hopefully.
Original Message:
The FunctionCallee type is effectively a {FunctionType*,Value*} pair,
and is a useful convenience to enable code to continue passing the
result of getOrInsertFunction() through to EmitCall, even once pointer
types lose their pointee-type.
Then:
- update the CallInst/InvokeInst instruction creation functions to
take a Callee,
- modify getOrInsertFunction to return FunctionCallee, and
- update all callers appropriately.
One area of particular note is the change to the sanitizer
code. Previously, they had been casting the result of
`getOrInsertFunction` to a `Function*` via
`checkSanitizerInterfaceFunction`, and storing that. That would report
an error if someone had already inserted a function declaraction with
a mismatching signature.
However, in general, LLVM allows for such mismatches, as
`getOrInsertFunction` will automatically insert a bitcast if
needed. As part of this cleanup, cause the sanitizer code to do the
same. (It will call its functions using the expected signature,
however they may have been declared.)
Finally, in a small number of locations, callers of
`getOrInsertFunction` actually were expecting/requiring that a brand
new function was being created. In such cases, I've switched them to
Function::Create instead.
Differential Revision: https://reviews.llvm.org/D57315
llvm-svn: 352827
2019-02-01 10:28:03 +08:00
|
|
|
FunctionCallee sinExpr = getFunction(M, nf);
|
2017-08-12 00:42:09 +08:00
|
|
|
|
|
|
|
nf.setPrefix(AMDGPULibFunc::NATIVE);
|
|
|
|
nf.setId(AMDGPULibFunc::EI_COS);
|
[opaque pointer types] Add a FunctionCallee wrapper type, and use it.
Recommit r352791 after tweaking DerivedTypes.h slightly, so that gcc
doesn't choke on it, hopefully.
Original Message:
The FunctionCallee type is effectively a {FunctionType*,Value*} pair,
and is a useful convenience to enable code to continue passing the
result of getOrInsertFunction() through to EmitCall, even once pointer
types lose their pointee-type.
Then:
- update the CallInst/InvokeInst instruction creation functions to
take a Callee,
- modify getOrInsertFunction to return FunctionCallee, and
- update all callers appropriately.
One area of particular note is the change to the sanitizer
code. Previously, they had been casting the result of
`getOrInsertFunction` to a `Function*` via
`checkSanitizerInterfaceFunction`, and storing that. That would report
an error if someone had already inserted a function declaraction with
a mismatching signature.
However, in general, LLVM allows for such mismatches, as
`getOrInsertFunction` will automatically insert a bitcast if
needed. As part of this cleanup, cause the sanitizer code to do the
same. (It will call its functions using the expected signature,
however they may have been declared.)
Finally, in a small number of locations, callers of
`getOrInsertFunction` actually were expecting/requiring that a brand
new function was being created. In such cases, I've switched them to
Function::Create instead.
Differential Revision: https://reviews.llvm.org/D57315
llvm-svn: 352827
2019-02-01 10:28:03 +08:00
|
|
|
FunctionCallee cosExpr = getFunction(M, nf);
|
2017-08-12 00:42:09 +08:00
|
|
|
if (sinExpr && cosExpr) {
|
|
|
|
Value *sinval = CallInst::Create(sinExpr, opr0, "splitsin", aCI);
|
|
|
|
Value *cosval = CallInst::Create(cosExpr, opr0, "splitcos", aCI);
|
|
|
|
new StoreInst(cosval, aCI->getArgOperand(1), aCI);
|
|
|
|
|
|
|
|
DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI
|
|
|
|
<< " with native version of sin/cos");
|
|
|
|
|
|
|
|
replaceCall(sinval);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool AMDGPULibCalls::useNative(CallInst *aCI) {
|
|
|
|
CI = aCI;
|
|
|
|
Function *Callee = aCI->getCalledFunction();
|
|
|
|
|
|
|
|
FuncInfo FInfo;
|
2017-09-06 08:30:27 +08:00
|
|
|
if (!parseFunctionName(Callee->getName(), &FInfo) || !FInfo.isMangled() ||
|
2017-08-12 00:42:09 +08:00
|
|
|
FInfo.getPrefix() != AMDGPULibFunc::NOPFX ||
|
2017-09-06 08:30:27 +08:00
|
|
|
getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()) ||
|
|
|
|
!(AllNative || useNativeFunc(FInfo.getName()))) {
|
2017-08-12 00:42:09 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (FInfo.getId() == AMDGPULibFunc::EI_SINCOS)
|
|
|
|
return sincosUseNative(aCI, FInfo);
|
|
|
|
|
|
|
|
FInfo.setPrefix(AMDGPULibFunc::NATIVE);
|
[opaque pointer types] Add a FunctionCallee wrapper type, and use it.
Recommit r352791 after tweaking DerivedTypes.h slightly, so that gcc
doesn't choke on it, hopefully.
Original Message:
The FunctionCallee type is effectively a {FunctionType*,Value*} pair,
and is a useful convenience to enable code to continue passing the
result of getOrInsertFunction() through to EmitCall, even once pointer
types lose their pointee-type.
Then:
- update the CallInst/InvokeInst instruction creation functions to
take a Callee,
- modify getOrInsertFunction to return FunctionCallee, and
- update all callers appropriately.
One area of particular note is the change to the sanitizer
code. Previously, they had been casting the result of
`getOrInsertFunction` to a `Function*` via
`checkSanitizerInterfaceFunction`, and storing that. That would report
an error if someone had already inserted a function declaraction with
a mismatching signature.
However, in general, LLVM allows for such mismatches, as
`getOrInsertFunction` will automatically insert a bitcast if
needed. As part of this cleanup, cause the sanitizer code to do the
same. (It will call its functions using the expected signature,
however they may have been declared.)
Finally, in a small number of locations, callers of
`getOrInsertFunction` actually were expecting/requiring that a brand
new function was being created. In such cases, I've switched them to
Function::Create instead.
Differential Revision: https://reviews.llvm.org/D57315
llvm-svn: 352827
2019-02-01 10:28:03 +08:00
|
|
|
FunctionCallee F = getFunction(aCI->getModule(), FInfo);
|
2017-08-12 00:42:09 +08:00
|
|
|
if (!F)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
aCI->setCalledFunction(F);
|
|
|
|
DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI
|
|
|
|
<< " with native version");
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2017-09-06 08:30:27 +08:00
|
|
|
// Clang emits call of __read_pipe_2 or __read_pipe_4 for OpenCL read_pipe
|
|
|
|
// builtin, with appended type size and alignment arguments, where 2 or 4
|
|
|
|
// indicates the original number of arguments. The library has optimized version
|
|
|
|
// of __read_pipe_2/__read_pipe_4 when the type size and alignment has the same
|
|
|
|
// power of 2 value. This function transforms __read_pipe_2 to __read_pipe_2_N
|
|
|
|
// for such cases where N is the size in bytes of the type (N = 1, 2, 4, 8, ...,
|
|
|
|
// 128). The same for __read_pipe_4, write_pipe_2, and write_pipe_4.
|
|
|
|
bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
|
|
|
|
FuncInfo &FInfo) {
|
|
|
|
auto *Callee = CI->getCalledFunction();
|
|
|
|
if (!Callee->isDeclaration())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
assert(Callee->hasName() && "Invalid read_pipe/write_pipe function");
|
|
|
|
auto *M = Callee->getParent();
|
|
|
|
auto &Ctx = M->getContext();
|
2020-01-29 03:23:46 +08:00
|
|
|
std::string Name = std::string(Callee->getName());
|
2017-09-06 08:30:27 +08:00
|
|
|
auto NumArg = CI->getNumArgOperands();
|
|
|
|
if (NumArg != 4 && NumArg != 6)
|
|
|
|
return false;
|
|
|
|
auto *PacketSize = CI->getArgOperand(NumArg - 2);
|
|
|
|
auto *PacketAlign = CI->getArgOperand(NumArg - 1);
|
|
|
|
if (!isa<ConstantInt>(PacketSize) || !isa<ConstantInt>(PacketAlign))
|
|
|
|
return false;
|
|
|
|
unsigned Size = cast<ConstantInt>(PacketSize)->getZExtValue();
|
2020-07-03 16:06:43 +08:00
|
|
|
Align Alignment = cast<ConstantInt>(PacketAlign)->getAlignValue();
|
|
|
|
if (Alignment != Size)
|
2017-09-06 08:30:27 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
Type *PtrElemTy;
|
|
|
|
if (Size <= 8)
|
|
|
|
PtrElemTy = Type::getIntNTy(Ctx, Size * 8);
|
|
|
|
else
|
[SVE] Eliminate calls to default-false VectorType::get() from AMDGPU
Reviewers: efriedma, david-arm, fpetrogalli, arsenm
Reviewed By: david-arm
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, tschuett, hiraditya, rkruppe, psnobl, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D80328
2020-05-30 08:44:51 +08:00
|
|
|
PtrElemTy = FixedVectorType::get(Type::getInt64Ty(Ctx), Size / 8);
|
2017-09-06 08:30:27 +08:00
|
|
|
unsigned PtrArgLoc = CI->getNumArgOperands() - 3;
|
|
|
|
auto PtrArg = CI->getArgOperand(PtrArgLoc);
|
|
|
|
unsigned PtrArgAS = PtrArg->getType()->getPointerAddressSpace();
|
|
|
|
auto *PtrTy = llvm::PointerType::get(PtrElemTy, PtrArgAS);
|
|
|
|
|
|
|
|
SmallVector<llvm::Type *, 6> ArgTys;
|
|
|
|
for (unsigned I = 0; I != PtrArgLoc; ++I)
|
|
|
|
ArgTys.push_back(CI->getArgOperand(I)->getType());
|
|
|
|
ArgTys.push_back(PtrTy);
|
|
|
|
|
|
|
|
Name = Name + "_" + std::to_string(Size);
|
|
|
|
auto *FTy = FunctionType::get(Callee->getReturnType(),
|
|
|
|
ArrayRef<Type *>(ArgTys), false);
|
|
|
|
AMDGPULibFunc NewLibFunc(Name, FTy);
|
[opaque pointer types] Add a FunctionCallee wrapper type, and use it.
Recommit r352791 after tweaking DerivedTypes.h slightly, so that gcc
doesn't choke on it, hopefully.
Original Message:
The FunctionCallee type is effectively a {FunctionType*,Value*} pair,
and is a useful convenience to enable code to continue passing the
result of getOrInsertFunction() through to EmitCall, even once pointer
types lose their pointee-type.
Then:
- update the CallInst/InvokeInst instruction creation functions to
take a Callee,
- modify getOrInsertFunction to return FunctionCallee, and
- update all callers appropriately.
One area of particular note is the change to the sanitizer
code. Previously, they had been casting the result of
`getOrInsertFunction` to a `Function*` via
`checkSanitizerInterfaceFunction`, and storing that. That would report
an error if someone had already inserted a function declaraction with
a mismatching signature.
However, in general, LLVM allows for such mismatches, as
`getOrInsertFunction` will automatically insert a bitcast if
needed. As part of this cleanup, cause the sanitizer code to do the
same. (It will call its functions using the expected signature,
however they may have been declared.)
Finally, in a small number of locations, callers of
`getOrInsertFunction` actually were expecting/requiring that a brand
new function was being created. In such cases, I've switched them to
Function::Create instead.
Differential Revision: https://reviews.llvm.org/D57315
llvm-svn: 352827
2019-02-01 10:28:03 +08:00
|
|
|
FunctionCallee F = AMDGPULibFunc::getOrInsertFunction(M, NewLibFunc);
|
2017-09-06 08:30:27 +08:00
|
|
|
if (!F)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
auto *BCast = B.CreatePointerCast(PtrArg, PtrTy);
|
|
|
|
SmallVector<Value *, 6> Args;
|
|
|
|
for (unsigned I = 0; I != PtrArgLoc; ++I)
|
|
|
|
Args.push_back(CI->getArgOperand(I));
|
|
|
|
Args.push_back(BCast);
|
|
|
|
|
|
|
|
auto *NCI = B.CreateCall(F, Args);
|
|
|
|
NCI->setAttributes(CI->getAttributes());
|
|
|
|
CI->replaceAllUsesWith(NCI);
|
|
|
|
CI->dropAllReferences();
|
|
|
|
CI->eraseFromParent();
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2017-08-12 00:42:09 +08:00
|
|
|
// This function returns false if no change; return true otherwise.
|
|
|
|
bool AMDGPULibCalls::fold(CallInst *CI, AliasAnalysis *AA) {
|
|
|
|
this->CI = CI;
|
|
|
|
Function *Callee = CI->getCalledFunction();
|
|
|
|
|
|
|
|
// Ignore indirect calls.
|
|
|
|
if (Callee == 0) return false;
|
|
|
|
|
|
|
|
BasicBlock *BB = CI->getParent();
|
|
|
|
LLVMContext &Context = CI->getParent()->getContext();
|
|
|
|
IRBuilder<> B(Context);
|
|
|
|
|
|
|
|
// Set the builder to the instruction after the call.
|
|
|
|
B.SetInsertPoint(BB, CI->getIterator());
|
|
|
|
|
|
|
|
// Copy fast flags from the original call.
|
|
|
|
if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(CI))
|
|
|
|
B.setFastMathFlags(FPOp->getFastMathFlags());
|
|
|
|
|
2019-06-18 01:57:50 +08:00
|
|
|
switch (Callee->getIntrinsicID()) {
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
case Intrinsic::amdgcn_wavefrontsize:
|
|
|
|
return !EnablePreLink && fold_wavefrontsize(CI, B);
|
|
|
|
}
|
|
|
|
|
|
|
|
FuncInfo FInfo;
|
|
|
|
if (!parseFunctionName(Callee->getName(), &FInfo))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Further check the number of arguments to see if they match.
|
|
|
|
if (CI->getNumArgOperands() != FInfo.getNumArgs())
|
|
|
|
return false;
|
|
|
|
|
2017-08-12 00:42:09 +08:00
|
|
|
if (TDOFold(CI, FInfo))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
// Under unsafe-math, evaluate calls if possible.
|
|
|
|
// According to Brian Sumner, we can do this for all f32 function calls
|
|
|
|
// using host's double function calls.
|
|
|
|
if (isUnsafeMath(CI) && evaluateCall(CI, FInfo))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
// Specilized optimizations for each function call
|
|
|
|
switch (FInfo.getId()) {
|
|
|
|
case AMDGPULibFunc::EI_RECIP:
|
|
|
|
// skip vector function
|
|
|
|
assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE ||
|
|
|
|
FInfo.getPrefix() == AMDGPULibFunc::HALF) &&
|
|
|
|
"recip must be an either native or half function");
|
|
|
|
return (getVecSize(FInfo) != 1) ? false : fold_recip(CI, B, FInfo);
|
|
|
|
|
|
|
|
case AMDGPULibFunc::EI_DIVIDE:
|
|
|
|
// skip vector function
|
|
|
|
assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE ||
|
|
|
|
FInfo.getPrefix() == AMDGPULibFunc::HALF) &&
|
|
|
|
"divide must be an either native or half function");
|
|
|
|
return (getVecSize(FInfo) != 1) ? false : fold_divide(CI, B, FInfo);
|
|
|
|
|
|
|
|
case AMDGPULibFunc::EI_POW:
|
|
|
|
case AMDGPULibFunc::EI_POWR:
|
|
|
|
case AMDGPULibFunc::EI_POWN:
|
|
|
|
return fold_pow(CI, B, FInfo);
|
|
|
|
|
|
|
|
case AMDGPULibFunc::EI_ROOTN:
|
|
|
|
// skip vector function
|
|
|
|
return (getVecSize(FInfo) != 1) ? false : fold_rootn(CI, B, FInfo);
|
|
|
|
|
|
|
|
case AMDGPULibFunc::EI_FMA:
|
|
|
|
case AMDGPULibFunc::EI_MAD:
|
|
|
|
case AMDGPULibFunc::EI_NFMA:
|
|
|
|
// skip vector function
|
|
|
|
return (getVecSize(FInfo) != 1) ? false : fold_fma_mad(CI, B, FInfo);
|
|
|
|
|
|
|
|
case AMDGPULibFunc::EI_SQRT:
|
|
|
|
return isUnsafeMath(CI) && fold_sqrt(CI, B, FInfo);
|
|
|
|
case AMDGPULibFunc::EI_COS:
|
|
|
|
case AMDGPULibFunc::EI_SIN:
|
|
|
|
if ((getArgType(FInfo) == AMDGPULibFunc::F32 ||
|
|
|
|
getArgType(FInfo) == AMDGPULibFunc::F64)
|
|
|
|
&& (FInfo.getPrefix() == AMDGPULibFunc::NOPFX))
|
|
|
|
return fold_sincos(CI, B, AA);
|
|
|
|
|
|
|
|
break;
|
2017-09-06 08:30:27 +08:00
|
|
|
case AMDGPULibFunc::EI_READ_PIPE_2:
|
|
|
|
case AMDGPULibFunc::EI_READ_PIPE_4:
|
|
|
|
case AMDGPULibFunc::EI_WRITE_PIPE_2:
|
|
|
|
case AMDGPULibFunc::EI_WRITE_PIPE_4:
|
|
|
|
return fold_read_write_pipe(CI, B, FInfo);
|
2017-08-12 00:42:09 +08:00
|
|
|
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) {
|
|
|
|
// Table-Driven optimization
|
|
|
|
const TableRef tr = getOptTable(FInfo.getId());
|
|
|
|
if (tr.size==0)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
int const sz = (int)tr.size;
|
|
|
|
const TableEntry * const ftbl = tr.table;
|
|
|
|
Value *opr0 = CI->getArgOperand(0);
|
|
|
|
|
|
|
|
if (getVecSize(FInfo) > 1) {
|
|
|
|
if (ConstantDataVector *CV = dyn_cast<ConstantDataVector>(opr0)) {
|
|
|
|
SmallVector<double, 0> DVal;
|
|
|
|
for (int eltNo = 0; eltNo < getVecSize(FInfo); ++eltNo) {
|
|
|
|
ConstantFP *eltval = dyn_cast<ConstantFP>(
|
|
|
|
CV->getElementAsConstant((unsigned)eltNo));
|
|
|
|
assert(eltval && "Non-FP arguments in math function!");
|
|
|
|
bool found = false;
|
|
|
|
for (int i=0; i < sz; ++i) {
|
|
|
|
if (eltval->isExactlyValue(ftbl[i].input)) {
|
|
|
|
DVal.push_back(ftbl[i].result);
|
|
|
|
found = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!found) {
|
|
|
|
// This vector constants not handled yet.
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
LLVMContext &context = CI->getParent()->getParent()->getContext();
|
|
|
|
Constant *nval;
|
|
|
|
if (getArgType(FInfo) == AMDGPULibFunc::F32) {
|
|
|
|
SmallVector<float, 0> FVal;
|
|
|
|
for (unsigned i = 0; i < DVal.size(); ++i) {
|
|
|
|
FVal.push_back((float)DVal[i]);
|
|
|
|
}
|
|
|
|
ArrayRef<float> tmp(FVal);
|
|
|
|
nval = ConstantDataVector::get(context, tmp);
|
|
|
|
} else { // F64
|
|
|
|
ArrayRef<double> tmp(DVal);
|
|
|
|
nval = ConstantDataVector::get(context, tmp);
|
|
|
|
}
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
|
2017-08-12 00:42:09 +08:00
|
|
|
replaceCall(nval);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// Scalar version
|
|
|
|
if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
|
|
|
|
for (int i = 0; i < sz; ++i) {
|
|
|
|
if (CF->isExactlyValue(ftbl[i].input)) {
|
|
|
|
Value *nval = ConstantFP::get(CF->getType(), ftbl[i].result);
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
|
2017-08-12 00:42:09 +08:00
|
|
|
replaceCall(nval);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool AMDGPULibCalls::replaceWithNative(CallInst *CI, const FuncInfo &FInfo) {
|
|
|
|
Module *M = CI->getModule();
|
|
|
|
if (getArgType(FInfo) != AMDGPULibFunc::F32 ||
|
|
|
|
FInfo.getPrefix() != AMDGPULibFunc::NOPFX ||
|
|
|
|
!HasNative(FInfo.getId()))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
AMDGPULibFunc nf = FInfo;
|
|
|
|
nf.setPrefix(AMDGPULibFunc::NATIVE);
|
[opaque pointer types] Add a FunctionCallee wrapper type, and use it.
Recommit r352791 after tweaking DerivedTypes.h slightly, so that gcc
doesn't choke on it, hopefully.
Original Message:
The FunctionCallee type is effectively a {FunctionType*,Value*} pair,
and is a useful convenience to enable code to continue passing the
result of getOrInsertFunction() through to EmitCall, even once pointer
types lose their pointee-type.
Then:
- update the CallInst/InvokeInst instruction creation functions to
take a Callee,
- modify getOrInsertFunction to return FunctionCallee, and
- update all callers appropriately.
One area of particular note is the change to the sanitizer
code. Previously, they had been casting the result of
`getOrInsertFunction` to a `Function*` via
`checkSanitizerInterfaceFunction`, and storing that. That would report
an error if someone had already inserted a function declaraction with
a mismatching signature.
However, in general, LLVM allows for such mismatches, as
`getOrInsertFunction` will automatically insert a bitcast if
needed. As part of this cleanup, cause the sanitizer code to do the
same. (It will call its functions using the expected signature,
however they may have been declared.)
Finally, in a small number of locations, callers of
`getOrInsertFunction` actually were expecting/requiring that a brand
new function was being created. In such cases, I've switched them to
Function::Create instead.
Differential Revision: https://reviews.llvm.org/D57315
llvm-svn: 352827
2019-02-01 10:28:03 +08:00
|
|
|
if (FunctionCallee FPExpr = getFunction(M, nf)) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "AMDIC: " << *CI << " ---> ");
|
2017-08-12 00:42:09 +08:00
|
|
|
|
|
|
|
CI->setCalledFunction(FPExpr);
|
|
|
|
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << *CI << '\n');
|
2017-08-12 00:42:09 +08:00
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// [native_]half_recip(c) ==> 1.0/c
|
|
|
|
bool AMDGPULibCalls::fold_recip(CallInst *CI, IRBuilder<> &B,
|
|
|
|
const FuncInfo &FInfo) {
|
|
|
|
Value *opr0 = CI->getArgOperand(0);
|
|
|
|
if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
|
|
|
|
// Just create a normal div. Later, InstCombine will be able
|
|
|
|
// to compute the divide into a constant (avoid check float infinity
|
|
|
|
// or subnormal at this point).
|
|
|
|
Value *nval = B.CreateFDiv(ConstantFP::get(CF->getType(), 1.0),
|
|
|
|
opr0,
|
|
|
|
"recip2div");
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
|
2017-08-12 00:42:09 +08:00
|
|
|
replaceCall(nval);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// [native_]half_divide(x, c) ==> x/c
|
|
|
|
bool AMDGPULibCalls::fold_divide(CallInst *CI, IRBuilder<> &B,
|
|
|
|
const FuncInfo &FInfo) {
|
|
|
|
Value *opr0 = CI->getArgOperand(0);
|
|
|
|
Value *opr1 = CI->getArgOperand(1);
|
|
|
|
ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0);
|
|
|
|
ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1);
|
|
|
|
|
|
|
|
if ((CF0 && CF1) || // both are constants
|
|
|
|
(CF1 && (getArgType(FInfo) == AMDGPULibFunc::F32)))
|
|
|
|
// CF1 is constant && f32 divide
|
|
|
|
{
|
|
|
|
Value *nval1 = B.CreateFDiv(ConstantFP::get(opr1->getType(), 1.0),
|
|
|
|
opr1, "__div2recip");
|
|
|
|
Value *nval = B.CreateFMul(opr0, nval1, "__div2mul");
|
|
|
|
replaceCall(nval);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
namespace llvm {
|
|
|
|
static double log2(double V) {
|
2019-07-13 04:12:15 +08:00
|
|
|
#if _XOPEN_SOURCE >= 600 || defined(_ISOC99_SOURCE) || _POSIX_C_SOURCE >= 200112L
|
2017-08-12 00:42:09 +08:00
|
|
|
return ::log2(V);
|
|
|
|
#else
|
2019-10-10 04:00:43 +08:00
|
|
|
return log(V) / numbers::ln2;
|
2017-08-12 00:42:09 +08:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B,
|
|
|
|
const FuncInfo &FInfo) {
|
|
|
|
assert((FInfo.getId() == AMDGPULibFunc::EI_POW ||
|
|
|
|
FInfo.getId() == AMDGPULibFunc::EI_POWR ||
|
|
|
|
FInfo.getId() == AMDGPULibFunc::EI_POWN) &&
|
|
|
|
"fold_pow: encounter a wrong function call");
|
|
|
|
|
|
|
|
Value *opr0, *opr1;
|
|
|
|
ConstantFP *CF;
|
|
|
|
ConstantInt *CINT;
|
|
|
|
ConstantAggregateZero *CZero;
|
|
|
|
Type *eltType;
|
|
|
|
|
|
|
|
opr0 = CI->getArgOperand(0);
|
|
|
|
opr1 = CI->getArgOperand(1);
|
|
|
|
CZero = dyn_cast<ConstantAggregateZero>(opr1);
|
|
|
|
if (getVecSize(FInfo) == 1) {
|
|
|
|
eltType = opr0->getType();
|
|
|
|
CF = dyn_cast<ConstantFP>(opr1);
|
|
|
|
CINT = dyn_cast<ConstantInt>(opr1);
|
|
|
|
} else {
|
|
|
|
VectorType *VTy = dyn_cast<VectorType>(opr0->getType());
|
|
|
|
assert(VTy && "Oprand of vector function should be of vectortype");
|
|
|
|
eltType = VTy->getElementType();
|
|
|
|
ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1);
|
|
|
|
|
|
|
|
// Now, only Handle vector const whose elements have the same value.
|
|
|
|
CF = CDV ? dyn_cast_or_null<ConstantFP>(CDV->getSplatValue()) : nullptr;
|
|
|
|
CINT = CDV ? dyn_cast_or_null<ConstantInt>(CDV->getSplatValue()) : nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
// No unsafe math , no constant argument, do nothing
|
|
|
|
if (!isUnsafeMath(CI) && !CF && !CINT && !CZero)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// 0x1111111 means that we don't do anything for this call.
|
|
|
|
int ci_opr1 = (CINT ? (int)CINT->getSExtValue() : 0x1111111);
|
|
|
|
|
|
|
|
if ((CF && CF->isZero()) || (CINT && ci_opr1 == 0) || CZero) {
|
|
|
|
// pow/powr/pown(x, 0) == 1
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1\n");
|
2017-08-12 00:42:09 +08:00
|
|
|
Constant *cnval = ConstantFP::get(eltType, 1.0);
|
|
|
|
if (getVecSize(FInfo) > 1) {
|
|
|
|
cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
|
|
|
|
}
|
|
|
|
replaceCall(cnval);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
if ((CF && CF->isExactlyValue(1.0)) || (CINT && ci_opr1 == 1)) {
|
|
|
|
// pow/powr/pown(x, 1.0) = x
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << "\n");
|
2017-08-12 00:42:09 +08:00
|
|
|
replaceCall(opr0);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
if ((CF && CF->isExactlyValue(2.0)) || (CINT && ci_opr1 == 2)) {
|
|
|
|
// pow/powr/pown(x, 2.0) = x*x
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " * " << *opr0
|
|
|
|
<< "\n");
|
2017-08-12 00:42:09 +08:00
|
|
|
Value *nval = B.CreateFMul(opr0, opr0, "__pow2");
|
|
|
|
replaceCall(nval);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
if ((CF && CF->isExactlyValue(-1.0)) || (CINT && ci_opr1 == -1)) {
|
|
|
|
// pow/powr/pown(x, -1.0) = 1.0/x
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1 / " << *opr0 << "\n");
|
2017-08-12 00:42:09 +08:00
|
|
|
Constant *cnval = ConstantFP::get(eltType, 1.0);
|
|
|
|
if (getVecSize(FInfo) > 1) {
|
|
|
|
cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
|
|
|
|
}
|
|
|
|
Value *nval = B.CreateFDiv(cnval, opr0, "__powrecip");
|
|
|
|
replaceCall(nval);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
Module *M = CI->getModule();
|
|
|
|
if (CF && (CF->isExactlyValue(0.5) || CF->isExactlyValue(-0.5))) {
|
|
|
|
// pow[r](x, [-]0.5) = sqrt(x)
|
|
|
|
bool issqrt = CF->isExactlyValue(0.5);
|
[opaque pointer types] Add a FunctionCallee wrapper type, and use it.
Recommit r352791 after tweaking DerivedTypes.h slightly, so that gcc
doesn't choke on it, hopefully.
Original Message:
The FunctionCallee type is effectively a {FunctionType*,Value*} pair,
and is a useful convenience to enable code to continue passing the
result of getOrInsertFunction() through to EmitCall, even once pointer
types lose their pointee-type.
Then:
- update the CallInst/InvokeInst instruction creation functions to
take a Callee,
- modify getOrInsertFunction to return FunctionCallee, and
- update all callers appropriately.
One area of particular note is the change to the sanitizer
code. Previously, they had been casting the result of
`getOrInsertFunction` to a `Function*` via
`checkSanitizerInterfaceFunction`, and storing that. That would report
an error if someone had already inserted a function declaraction with
a mismatching signature.
However, in general, LLVM allows for such mismatches, as
`getOrInsertFunction` will automatically insert a bitcast if
needed. As part of this cleanup, cause the sanitizer code to do the
same. (It will call its functions using the expected signature,
however they may have been declared.)
Finally, in a small number of locations, callers of
`getOrInsertFunction` actually were expecting/requiring that a brand
new function was being created. In such cases, I've switched them to
Function::Create instead.
Differential Revision: https://reviews.llvm.org/D57315
llvm-svn: 352827
2019-02-01 10:28:03 +08:00
|
|
|
if (FunctionCallee FPExpr =
|
|
|
|
getFunction(M, AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT
|
|
|
|
: AMDGPULibFunc::EI_RSQRT,
|
|
|
|
FInfo))) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
|
|
|
|
<< FInfo.getName().c_str() << "(" << *opr0 << ")\n");
|
2017-08-12 00:42:09 +08:00
|
|
|
Value *nval = CreateCallEx(B,FPExpr, opr0, issqrt ? "__pow2sqrt"
|
|
|
|
: "__pow2rsqrt");
|
|
|
|
replaceCall(nval);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!isUnsafeMath(CI))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Unsafe Math optimization
|
|
|
|
|
|
|
|
// Remember that ci_opr1 is set if opr1 is integral
|
|
|
|
if (CF) {
|
|
|
|
double dval = (getArgType(FInfo) == AMDGPULibFunc::F32)
|
|
|
|
? (double)CF->getValueAPF().convertToFloat()
|
|
|
|
: CF->getValueAPF().convertToDouble();
|
|
|
|
int ival = (int)dval;
|
|
|
|
if ((double)ival == dval) {
|
|
|
|
ci_opr1 = ival;
|
|
|
|
} else
|
|
|
|
ci_opr1 = 0x11111111;
|
|
|
|
}
|
|
|
|
|
|
|
|
// pow/powr/pown(x, c) = [1/](x*x*..x); where
|
|
|
|
// trunc(c) == c && the number of x == c && |c| <= 12
|
|
|
|
unsigned abs_opr1 = (ci_opr1 < 0) ? -ci_opr1 : ci_opr1;
|
|
|
|
if (abs_opr1 <= 12) {
|
|
|
|
Constant *cnval;
|
|
|
|
Value *nval;
|
|
|
|
if (abs_opr1 == 0) {
|
|
|
|
cnval = ConstantFP::get(eltType, 1.0);
|
|
|
|
if (getVecSize(FInfo) > 1) {
|
|
|
|
cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
|
|
|
|
}
|
|
|
|
nval = cnval;
|
|
|
|
} else {
|
|
|
|
Value *valx2 = nullptr;
|
|
|
|
nval = nullptr;
|
|
|
|
while (abs_opr1 > 0) {
|
|
|
|
valx2 = valx2 ? B.CreateFMul(valx2, valx2, "__powx2") : opr0;
|
|
|
|
if (abs_opr1 & 1) {
|
|
|
|
nval = nval ? B.CreateFMul(nval, valx2, "__powprod") : valx2;
|
|
|
|
}
|
|
|
|
abs_opr1 >>= 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ci_opr1 < 0) {
|
|
|
|
cnval = ConstantFP::get(eltType, 1.0);
|
|
|
|
if (getVecSize(FInfo) > 1) {
|
|
|
|
cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
|
|
|
|
}
|
|
|
|
nval = B.CreateFDiv(cnval, nval, "__1powprod");
|
|
|
|
}
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
|
|
|
|
<< ((ci_opr1 < 0) ? "1/prod(" : "prod(") << *opr0
|
|
|
|
<< ")\n");
|
2017-08-12 00:42:09 +08:00
|
|
|
replaceCall(nval);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// powr ---> exp2(y * log2(x))
|
|
|
|
// pown/pow ---> powr(fabs(x), y) | (x & ((int)y << 31))
|
[opaque pointer types] Add a FunctionCallee wrapper type, and use it.
Recommit r352791 after tweaking DerivedTypes.h slightly, so that gcc
doesn't choke on it, hopefully.
Original Message:
The FunctionCallee type is effectively a {FunctionType*,Value*} pair,
and is a useful convenience to enable code to continue passing the
result of getOrInsertFunction() through to EmitCall, even once pointer
types lose their pointee-type.
Then:
- update the CallInst/InvokeInst instruction creation functions to
take a Callee,
- modify getOrInsertFunction to return FunctionCallee, and
- update all callers appropriately.
One area of particular note is the change to the sanitizer
code. Previously, they had been casting the result of
`getOrInsertFunction` to a `Function*` via
`checkSanitizerInterfaceFunction`, and storing that. That would report
an error if someone had already inserted a function declaraction with
a mismatching signature.
However, in general, LLVM allows for such mismatches, as
`getOrInsertFunction` will automatically insert a bitcast if
needed. As part of this cleanup, cause the sanitizer code to do the
same. (It will call its functions using the expected signature,
however they may have been declared.)
Finally, in a small number of locations, callers of
`getOrInsertFunction` actually were expecting/requiring that a brand
new function was being created. In such cases, I've switched them to
Function::Create instead.
Differential Revision: https://reviews.llvm.org/D57315
llvm-svn: 352827
2019-02-01 10:28:03 +08:00
|
|
|
FunctionCallee ExpExpr =
|
|
|
|
getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2, FInfo));
|
2017-08-12 00:42:09 +08:00
|
|
|
if (!ExpExpr)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
bool needlog = false;
|
|
|
|
bool needabs = false;
|
|
|
|
bool needcopysign = false;
|
|
|
|
Constant *cnval = nullptr;
|
|
|
|
if (getVecSize(FInfo) == 1) {
|
|
|
|
CF = dyn_cast<ConstantFP>(opr0);
|
|
|
|
|
|
|
|
if (CF) {
|
|
|
|
double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
|
|
|
|
? (double)CF->getValueAPF().convertToFloat()
|
|
|
|
: CF->getValueAPF().convertToDouble();
|
|
|
|
|
|
|
|
V = log2(std::abs(V));
|
|
|
|
cnval = ConstantFP::get(eltType, V);
|
|
|
|
needcopysign = (FInfo.getId() != AMDGPULibFunc::EI_POWR) &&
|
|
|
|
CF->isNegative();
|
|
|
|
} else {
|
|
|
|
needlog = true;
|
|
|
|
needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR &&
|
|
|
|
(!CF || CF->isNegative());
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr0);
|
|
|
|
|
|
|
|
if (!CDV) {
|
|
|
|
needlog = true;
|
|
|
|
needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR;
|
|
|
|
} else {
|
|
|
|
assert ((int)CDV->getNumElements() == getVecSize(FInfo) &&
|
|
|
|
"Wrong vector size detected");
|
|
|
|
|
|
|
|
SmallVector<double, 0> DVal;
|
|
|
|
for (int i=0; i < getVecSize(FInfo); ++i) {
|
|
|
|
double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
|
|
|
|
? (double)CDV->getElementAsFloat(i)
|
|
|
|
: CDV->getElementAsDouble(i);
|
|
|
|
if (V < 0.0) needcopysign = true;
|
|
|
|
V = log2(std::abs(V));
|
|
|
|
DVal.push_back(V);
|
|
|
|
}
|
|
|
|
if (getArgType(FInfo) == AMDGPULibFunc::F32) {
|
|
|
|
SmallVector<float, 0> FVal;
|
|
|
|
for (unsigned i=0; i < DVal.size(); ++i) {
|
|
|
|
FVal.push_back((float)DVal[i]);
|
|
|
|
}
|
|
|
|
ArrayRef<float> tmp(FVal);
|
|
|
|
cnval = ConstantDataVector::get(M->getContext(), tmp);
|
|
|
|
} else {
|
|
|
|
ArrayRef<double> tmp(DVal);
|
|
|
|
cnval = ConstantDataVector::get(M->getContext(), tmp);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (needcopysign && (FInfo.getId() == AMDGPULibFunc::EI_POW)) {
|
|
|
|
// We cannot handle corner cases for a general pow() function, give up
|
|
|
|
// unless y is a constant integral value. Then proceed as if it were pown.
|
|
|
|
if (getVecSize(FInfo) == 1) {
|
|
|
|
if (const ConstantFP *CF = dyn_cast<ConstantFP>(opr1)) {
|
|
|
|
double y = (getArgType(FInfo) == AMDGPULibFunc::F32)
|
|
|
|
? (double)CF->getValueAPF().convertToFloat()
|
|
|
|
: CF->getValueAPF().convertToDouble();
|
|
|
|
if (y != (double)(int64_t)y)
|
|
|
|
return false;
|
|
|
|
} else
|
|
|
|
return false;
|
|
|
|
} else {
|
|
|
|
if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1)) {
|
|
|
|
for (int i=0; i < getVecSize(FInfo); ++i) {
|
|
|
|
double y = (getArgType(FInfo) == AMDGPULibFunc::F32)
|
|
|
|
? (double)CDV->getElementAsFloat(i)
|
|
|
|
: CDV->getElementAsDouble(i);
|
|
|
|
if (y != (double)(int64_t)y)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
} else
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Value *nval;
|
|
|
|
if (needabs) {
|
[opaque pointer types] Add a FunctionCallee wrapper type, and use it.
Recommit r352791 after tweaking DerivedTypes.h slightly, so that gcc
doesn't choke on it, hopefully.
Original Message:
The FunctionCallee type is effectively a {FunctionType*,Value*} pair,
and is a useful convenience to enable code to continue passing the
result of getOrInsertFunction() through to EmitCall, even once pointer
types lose their pointee-type.
Then:
- update the CallInst/InvokeInst instruction creation functions to
take a Callee,
- modify getOrInsertFunction to return FunctionCallee, and
- update all callers appropriately.
One area of particular note is the change to the sanitizer
code. Previously, they had been casting the result of
`getOrInsertFunction` to a `Function*` via
`checkSanitizerInterfaceFunction`, and storing that. That would report
an error if someone had already inserted a function declaraction with
a mismatching signature.
However, in general, LLVM allows for such mismatches, as
`getOrInsertFunction` will automatically insert a bitcast if
needed. As part of this cleanup, cause the sanitizer code to do the
same. (It will call its functions using the expected signature,
however they may have been declared.)
Finally, in a small number of locations, callers of
`getOrInsertFunction` actually were expecting/requiring that a brand
new function was being created. In such cases, I've switched them to
Function::Create instead.
Differential Revision: https://reviews.llvm.org/D57315
llvm-svn: 352827
2019-02-01 10:28:03 +08:00
|
|
|
FunctionCallee AbsExpr =
|
|
|
|
getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_FABS, FInfo));
|
2017-08-12 00:42:09 +08:00
|
|
|
if (!AbsExpr)
|
|
|
|
return false;
|
|
|
|
nval = CreateCallEx(B, AbsExpr, opr0, "__fabs");
|
|
|
|
} else {
|
|
|
|
nval = cnval ? cnval : opr0;
|
|
|
|
}
|
|
|
|
if (needlog) {
|
[opaque pointer types] Add a FunctionCallee wrapper type, and use it.
Recommit r352791 after tweaking DerivedTypes.h slightly, so that gcc
doesn't choke on it, hopefully.
Original Message:
The FunctionCallee type is effectively a {FunctionType*,Value*} pair,
and is a useful convenience to enable code to continue passing the
result of getOrInsertFunction() through to EmitCall, even once pointer
types lose their pointee-type.
Then:
- update the CallInst/InvokeInst instruction creation functions to
take a Callee,
- modify getOrInsertFunction to return FunctionCallee, and
- update all callers appropriately.
One area of particular note is the change to the sanitizer
code. Previously, they had been casting the result of
`getOrInsertFunction` to a `Function*` via
`checkSanitizerInterfaceFunction`, and storing that. That would report
an error if someone had already inserted a function declaraction with
a mismatching signature.
However, in general, LLVM allows for such mismatches, as
`getOrInsertFunction` will automatically insert a bitcast if
needed. As part of this cleanup, cause the sanitizer code to do the
same. (It will call its functions using the expected signature,
however they may have been declared.)
Finally, in a small number of locations, callers of
`getOrInsertFunction` actually were expecting/requiring that a brand
new function was being created. In such cases, I've switched them to
Function::Create instead.
Differential Revision: https://reviews.llvm.org/D57315
llvm-svn: 352827
2019-02-01 10:28:03 +08:00
|
|
|
FunctionCallee LogExpr =
|
|
|
|
getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2, FInfo));
|
2017-08-12 00:42:09 +08:00
|
|
|
if (!LogExpr)
|
|
|
|
return false;
|
|
|
|
nval = CreateCallEx(B,LogExpr, nval, "__log2");
|
|
|
|
}
|
|
|
|
|
|
|
|
if (FInfo.getId() == AMDGPULibFunc::EI_POWN) {
|
|
|
|
// convert int(32) to fp(f32 or f64)
|
|
|
|
opr1 = B.CreateSIToFP(opr1, nval->getType(), "pownI2F");
|
|
|
|
}
|
|
|
|
nval = B.CreateFMul(opr1, nval, "__ylogx");
|
|
|
|
nval = CreateCallEx(B,ExpExpr, nval, "__exp2");
|
|
|
|
|
|
|
|
if (needcopysign) {
|
|
|
|
Value *opr_n;
|
|
|
|
Type* rTy = opr0->getType();
|
|
|
|
Type* nTyS = eltType->isDoubleTy() ? B.getInt64Ty() : B.getInt32Ty();
|
|
|
|
Type *nTy = nTyS;
|
[SVE] Remove usages of VectorType::getNumElements() from AMDGPU
Reviewers: efriedma, arsenm, david-arm, fpetrogalli
Reviewed By: efriedma
Subscribers: dmgreen, arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, tschuett, hiraditya, rkruppe, psnobl, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D79807
2020-05-14 06:19:07 +08:00
|
|
|
if (const auto *vTy = dyn_cast<FixedVectorType>(rTy))
|
|
|
|
nTy = FixedVectorType::get(nTyS, vTy);
|
2017-08-12 00:42:09 +08:00
|
|
|
unsigned size = nTy->getScalarSizeInBits();
|
|
|
|
opr_n = CI->getArgOperand(1);
|
|
|
|
if (opr_n->getType()->isIntegerTy())
|
|
|
|
opr_n = B.CreateZExtOrBitCast(opr_n, nTy, "__ytou");
|
|
|
|
else
|
|
|
|
opr_n = B.CreateFPToSI(opr1, nTy, "__ytou");
|
|
|
|
|
|
|
|
Value *sign = B.CreateShl(opr_n, size-1, "__yeven");
|
|
|
|
sign = B.CreateAnd(B.CreateBitCast(opr0, nTy), sign, "__pow_sign");
|
|
|
|
nval = B.CreateOr(B.CreateBitCast(nval, nTy), sign);
|
|
|
|
nval = B.CreateBitCast(nval, opr0->getType());
|
|
|
|
}
|
|
|
|
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
|
|
|
|
<< "exp2(" << *opr1 << " * log2(" << *opr0 << "))\n");
|
2017-08-12 00:42:09 +08:00
|
|
|
replaceCall(nval);
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B,
|
|
|
|
const FuncInfo &FInfo) {
|
|
|
|
Value *opr0 = CI->getArgOperand(0);
|
|
|
|
Value *opr1 = CI->getArgOperand(1);
|
|
|
|
|
|
|
|
ConstantInt *CINT = dyn_cast<ConstantInt>(opr1);
|
|
|
|
if (!CINT) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
int ci_opr1 = (int)CINT->getSExtValue();
|
|
|
|
if (ci_opr1 == 1) { // rootn(x, 1) = x
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << "\n");
|
2017-08-12 00:42:09 +08:00
|
|
|
replaceCall(opr0);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
if (ci_opr1 == 2) { // rootn(x, 2) = sqrt(x)
|
|
|
|
Module *M = CI->getModule();
|
[opaque pointer types] Add a FunctionCallee wrapper type, and use it.
Recommit r352791 after tweaking DerivedTypes.h slightly, so that gcc
doesn't choke on it, hopefully.
Original Message:
The FunctionCallee type is effectively a {FunctionType*,Value*} pair,
and is a useful convenience to enable code to continue passing the
result of getOrInsertFunction() through to EmitCall, even once pointer
types lose their pointee-type.
Then:
- update the CallInst/InvokeInst instruction creation functions to
take a Callee,
- modify getOrInsertFunction to return FunctionCallee, and
- update all callers appropriately.
One area of particular note is the change to the sanitizer
code. Previously, they had been casting the result of
`getOrInsertFunction` to a `Function*` via
`checkSanitizerInterfaceFunction`, and storing that. That would report
an error if someone had already inserted a function declaraction with
a mismatching signature.
However, in general, LLVM allows for such mismatches, as
`getOrInsertFunction` will automatically insert a bitcast if
needed. As part of this cleanup, cause the sanitizer code to do the
same. (It will call its functions using the expected signature,
however they may have been declared.)
Finally, in a small number of locations, callers of
`getOrInsertFunction` actually were expecting/requiring that a brand
new function was being created. In such cases, I've switched them to
Function::Create instead.
Differential Revision: https://reviews.llvm.org/D57315
llvm-svn: 352827
2019-02-01 10:28:03 +08:00
|
|
|
if (FunctionCallee FPExpr =
|
|
|
|
getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> sqrt(" << *opr0 << ")\n");
|
2017-08-12 00:42:09 +08:00
|
|
|
Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2sqrt");
|
|
|
|
replaceCall(nval);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
} else if (ci_opr1 == 3) { // rootn(x, 3) = cbrt(x)
|
|
|
|
Module *M = CI->getModule();
|
[opaque pointer types] Add a FunctionCallee wrapper type, and use it.
Recommit r352791 after tweaking DerivedTypes.h slightly, so that gcc
doesn't choke on it, hopefully.
Original Message:
The FunctionCallee type is effectively a {FunctionType*,Value*} pair,
and is a useful convenience to enable code to continue passing the
result of getOrInsertFunction() through to EmitCall, even once pointer
types lose their pointee-type.
Then:
- update the CallInst/InvokeInst instruction creation functions to
take a Callee,
- modify getOrInsertFunction to return FunctionCallee, and
- update all callers appropriately.
One area of particular note is the change to the sanitizer
code. Previously, they had been casting the result of
`getOrInsertFunction` to a `Function*` via
`checkSanitizerInterfaceFunction`, and storing that. That would report
an error if someone had already inserted a function declaraction with
a mismatching signature.
However, in general, LLVM allows for such mismatches, as
`getOrInsertFunction` will automatically insert a bitcast if
needed. As part of this cleanup, cause the sanitizer code to do the
same. (It will call its functions using the expected signature,
however they may have been declared.)
Finally, in a small number of locations, callers of
`getOrInsertFunction` actually were expecting/requiring that a brand
new function was being created. In such cases, I've switched them to
Function::Create instead.
Differential Revision: https://reviews.llvm.org/D57315
llvm-svn: 352827
2019-02-01 10:28:03 +08:00
|
|
|
if (FunctionCallee FPExpr =
|
|
|
|
getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT, FInfo))) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> cbrt(" << *opr0 << ")\n");
|
2017-08-12 00:42:09 +08:00
|
|
|
Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2cbrt");
|
|
|
|
replaceCall(nval);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
} else if (ci_opr1 == -1) { // rootn(x, -1) = 1.0/x
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1.0 / " << *opr0 << "\n");
|
2017-08-12 00:42:09 +08:00
|
|
|
Value *nval = B.CreateFDiv(ConstantFP::get(opr0->getType(), 1.0),
|
|
|
|
opr0,
|
|
|
|
"__rootn2div");
|
|
|
|
replaceCall(nval);
|
|
|
|
return true;
|
|
|
|
} else if (ci_opr1 == -2) { // rootn(x, -2) = rsqrt(x)
|
|
|
|
Module *M = CI->getModule();
|
[opaque pointer types] Add a FunctionCallee wrapper type, and use it.
Recommit r352791 after tweaking DerivedTypes.h slightly, so that gcc
doesn't choke on it, hopefully.
Original Message:
The FunctionCallee type is effectively a {FunctionType*,Value*} pair,
and is a useful convenience to enable code to continue passing the
result of getOrInsertFunction() through to EmitCall, even once pointer
types lose their pointee-type.
Then:
- update the CallInst/InvokeInst instruction creation functions to
take a Callee,
- modify getOrInsertFunction to return FunctionCallee, and
- update all callers appropriately.
One area of particular note is the change to the sanitizer
code. Previously, they had been casting the result of
`getOrInsertFunction` to a `Function*` via
`checkSanitizerInterfaceFunction`, and storing that. That would report
an error if someone had already inserted a function declaraction with
a mismatching signature.
However, in general, LLVM allows for such mismatches, as
`getOrInsertFunction` will automatically insert a bitcast if
needed. As part of this cleanup, cause the sanitizer code to do the
same. (It will call its functions using the expected signature,
however they may have been declared.)
Finally, in a small number of locations, callers of
`getOrInsertFunction` actually were expecting/requiring that a brand
new function was being created. In such cases, I've switched them to
Function::Create instead.
Differential Revision: https://reviews.llvm.org/D57315
llvm-svn: 352827
2019-02-01 10:28:03 +08:00
|
|
|
if (FunctionCallee FPExpr =
|
|
|
|
getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_RSQRT, FInfo))) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> rsqrt(" << *opr0
|
|
|
|
<< ")\n");
|
2017-08-12 00:42:09 +08:00
|
|
|
Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2rsqrt");
|
|
|
|
replaceCall(nval);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool AMDGPULibCalls::fold_fma_mad(CallInst *CI, IRBuilder<> &B,
|
|
|
|
const FuncInfo &FInfo) {
|
|
|
|
Value *opr0 = CI->getArgOperand(0);
|
|
|
|
Value *opr1 = CI->getArgOperand(1);
|
|
|
|
Value *opr2 = CI->getArgOperand(2);
|
|
|
|
|
|
|
|
ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0);
|
|
|
|
ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1);
|
|
|
|
if ((CF0 && CF0->isZero()) || (CF1 && CF1->isZero())) {
|
|
|
|
// fma/mad(a, b, c) = c if a=0 || b=0
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr2 << "\n");
|
2017-08-12 00:42:09 +08:00
|
|
|
replaceCall(opr2);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
if (CF0 && CF0->isExactlyValue(1.0f)) {
|
|
|
|
// fma/mad(a, b, c) = b+c if a=1
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr1 << " + " << *opr2
|
|
|
|
<< "\n");
|
2017-08-12 00:42:09 +08:00
|
|
|
Value *nval = B.CreateFAdd(opr1, opr2, "fmaadd");
|
|
|
|
replaceCall(nval);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
if (CF1 && CF1->isExactlyValue(1.0f)) {
|
|
|
|
// fma/mad(a, b, c) = a+c if b=1
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " + " << *opr2
|
|
|
|
<< "\n");
|
2017-08-12 00:42:09 +08:00
|
|
|
Value *nval = B.CreateFAdd(opr0, opr2, "fmaadd");
|
|
|
|
replaceCall(nval);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
if (ConstantFP *CF = dyn_cast<ConstantFP>(opr2)) {
|
|
|
|
if (CF->isZero()) {
|
|
|
|
// fma/mad(a, b, c) = a*b if c=0
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " * "
|
|
|
|
<< *opr1 << "\n");
|
2017-08-12 00:42:09 +08:00
|
|
|
Value *nval = B.CreateFMul(opr0, opr1, "fmamul");
|
|
|
|
replaceCall(nval);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Get a scalar native builtin signle argument FP function
|
[opaque pointer types] Add a FunctionCallee wrapper type, and use it.
Recommit r352791 after tweaking DerivedTypes.h slightly, so that gcc
doesn't choke on it, hopefully.
Original Message:
The FunctionCallee type is effectively a {FunctionType*,Value*} pair,
and is a useful convenience to enable code to continue passing the
result of getOrInsertFunction() through to EmitCall, even once pointer
types lose their pointee-type.
Then:
- update the CallInst/InvokeInst instruction creation functions to
take a Callee,
- modify getOrInsertFunction to return FunctionCallee, and
- update all callers appropriately.
One area of particular note is the change to the sanitizer
code. Previously, they had been casting the result of
`getOrInsertFunction` to a `Function*` via
`checkSanitizerInterfaceFunction`, and storing that. That would report
an error if someone had already inserted a function declaraction with
a mismatching signature.
However, in general, LLVM allows for such mismatches, as
`getOrInsertFunction` will automatically insert a bitcast if
needed. As part of this cleanup, cause the sanitizer code to do the
same. (It will call its functions using the expected signature,
however they may have been declared.)
Finally, in a small number of locations, callers of
`getOrInsertFunction` actually were expecting/requiring that a brand
new function was being created. In such cases, I've switched them to
Function::Create instead.
Differential Revision: https://reviews.llvm.org/D57315
llvm-svn: 352827
2019-02-01 10:28:03 +08:00
|
|
|
FunctionCallee AMDGPULibCalls::getNativeFunction(Module *M,
|
|
|
|
const FuncInfo &FInfo) {
|
2017-08-29 02:00:08 +08:00
|
|
|
if (getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()))
|
|
|
|
return nullptr;
|
2017-08-12 00:42:09 +08:00
|
|
|
FuncInfo nf = FInfo;
|
|
|
|
nf.setPrefix(AMDGPULibFunc::NATIVE);
|
|
|
|
return getFunction(M, nf);
|
|
|
|
}
|
|
|
|
|
|
|
|
// fold sqrt -> native_sqrt (x)
|
|
|
|
bool AMDGPULibCalls::fold_sqrt(CallInst *CI, IRBuilder<> &B,
|
|
|
|
const FuncInfo &FInfo) {
|
2017-08-29 02:00:08 +08:00
|
|
|
if (getArgType(FInfo) == AMDGPULibFunc::F32 && (getVecSize(FInfo) == 1) &&
|
2017-08-12 00:42:09 +08:00
|
|
|
(FInfo.getPrefix() != AMDGPULibFunc::NATIVE)) {
|
[opaque pointer types] Add a FunctionCallee wrapper type, and use it.
Recommit r352791 after tweaking DerivedTypes.h slightly, so that gcc
doesn't choke on it, hopefully.
Original Message:
The FunctionCallee type is effectively a {FunctionType*,Value*} pair,
and is a useful convenience to enable code to continue passing the
result of getOrInsertFunction() through to EmitCall, even once pointer
types lose their pointee-type.
Then:
- update the CallInst/InvokeInst instruction creation functions to
take a Callee,
- modify getOrInsertFunction to return FunctionCallee, and
- update all callers appropriately.
One area of particular note is the change to the sanitizer
code. Previously, they had been casting the result of
`getOrInsertFunction` to a `Function*` via
`checkSanitizerInterfaceFunction`, and storing that. That would report
an error if someone had already inserted a function declaraction with
a mismatching signature.
However, in general, LLVM allows for such mismatches, as
`getOrInsertFunction` will automatically insert a bitcast if
needed. As part of this cleanup, cause the sanitizer code to do the
same. (It will call its functions using the expected signature,
however they may have been declared.)
Finally, in a small number of locations, callers of
`getOrInsertFunction` actually were expecting/requiring that a brand
new function was being created. In such cases, I've switched them to
Function::Create instead.
Differential Revision: https://reviews.llvm.org/D57315
llvm-svn: 352827
2019-02-01 10:28:03 +08:00
|
|
|
if (FunctionCallee FPExpr = getNativeFunction(
|
|
|
|
CI->getModule(), AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
|
2017-08-12 00:42:09 +08:00
|
|
|
Value *opr0 = CI->getArgOperand(0);
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
|
|
|
|
<< "sqrt(" << *opr0 << ")\n");
|
2017-08-12 00:42:09 +08:00
|
|
|
Value *nval = CreateCallEx(B,FPExpr, opr0, "__sqrt");
|
|
|
|
replaceCall(nval);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// fold sin, cos -> sincos.
|
|
|
|
bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
|
|
|
|
AliasAnalysis *AA) {
|
|
|
|
AMDGPULibFunc fInfo;
|
|
|
|
if (!AMDGPULibFunc::parse(CI->getCalledFunction()->getName(), fInfo))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
assert(fInfo.getId() == AMDGPULibFunc::EI_SIN ||
|
|
|
|
fInfo.getId() == AMDGPULibFunc::EI_COS);
|
|
|
|
bool const isSin = fInfo.getId() == AMDGPULibFunc::EI_SIN;
|
|
|
|
|
|
|
|
Value *CArgVal = CI->getArgOperand(0);
|
|
|
|
BasicBlock * const CBB = CI->getParent();
|
|
|
|
|
|
|
|
int const MaxScan = 30;
|
|
|
|
|
|
|
|
{ // fold in load value.
|
|
|
|
LoadInst *LI = dyn_cast<LoadInst>(CArgVal);
|
|
|
|
if (LI && LI->getParent() == CBB) {
|
|
|
|
BasicBlock::iterator BBI = LI->getIterator();
|
|
|
|
Value *AvailableVal = FindAvailableLoadedValue(LI, CBB, BBI, MaxScan, AA);
|
|
|
|
if (AvailableVal) {
|
|
|
|
CArgVal->replaceAllUsesWith(AvailableVal);
|
|
|
|
if (CArgVal->getNumUses() == 0)
|
|
|
|
LI->eraseFromParent();
|
|
|
|
CArgVal = CI->getArgOperand(0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Module *M = CI->getModule();
|
|
|
|
fInfo.setId(isSin ? AMDGPULibFunc::EI_COS : AMDGPULibFunc::EI_SIN);
|
|
|
|
std::string const PairName = fInfo.mangle();
|
|
|
|
|
|
|
|
CallInst *UI = nullptr;
|
|
|
|
for (User* U : CArgVal->users()) {
|
|
|
|
CallInst *XI = dyn_cast_or_null<CallInst>(U);
|
|
|
|
if (!XI || XI == CI || XI->getParent() != CBB)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
Function *UCallee = XI->getCalledFunction();
|
|
|
|
if (!UCallee || !UCallee->getName().equals(PairName))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
BasicBlock::iterator BBI = CI->getIterator();
|
|
|
|
if (BBI == CI->getParent()->begin())
|
|
|
|
break;
|
|
|
|
--BBI;
|
|
|
|
for (int I = MaxScan; I > 0 && BBI != CBB->begin(); --BBI, --I) {
|
|
|
|
if (cast<Instruction>(BBI) == XI) {
|
|
|
|
UI = XI;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (UI) break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!UI) return false;
|
|
|
|
|
|
|
|
// Merge the sin and cos.
|
|
|
|
|
|
|
|
// for OpenCL 2.0 we have only generic implementation of sincos
|
|
|
|
// function.
|
|
|
|
AMDGPULibFunc nf(AMDGPULibFunc::EI_SINCOS, fInfo);
|
2018-08-31 13:49:54 +08:00
|
|
|
nf.getLeads()[0].PtrKind = AMDGPULibFunc::getEPtrKindFromAddrSpace(AMDGPUAS::FLAT_ADDRESS);
|
[opaque pointer types] Add a FunctionCallee wrapper type, and use it.
Recommit r352791 after tweaking DerivedTypes.h slightly, so that gcc
doesn't choke on it, hopefully.
Original Message:
The FunctionCallee type is effectively a {FunctionType*,Value*} pair,
and is a useful convenience to enable code to continue passing the
result of getOrInsertFunction() through to EmitCall, even once pointer
types lose their pointee-type.
Then:
- update the CallInst/InvokeInst instruction creation functions to
take a Callee,
- modify getOrInsertFunction to return FunctionCallee, and
- update all callers appropriately.
One area of particular note is the change to the sanitizer
code. Previously, they had been casting the result of
`getOrInsertFunction` to a `Function*` via
`checkSanitizerInterfaceFunction`, and storing that. That would report
an error if someone had already inserted a function declaraction with
a mismatching signature.
However, in general, LLVM allows for such mismatches, as
`getOrInsertFunction` will automatically insert a bitcast if
needed. As part of this cleanup, cause the sanitizer code to do the
same. (It will call its functions using the expected signature,
however they may have been declared.)
Finally, in a small number of locations, callers of
`getOrInsertFunction` actually were expecting/requiring that a brand
new function was being created. In such cases, I've switched them to
Function::Create instead.
Differential Revision: https://reviews.llvm.org/D57315
llvm-svn: 352827
2019-02-01 10:28:03 +08:00
|
|
|
FunctionCallee Fsincos = getFunction(M, nf);
|
2017-08-12 00:42:09 +08:00
|
|
|
if (!Fsincos) return false;
|
|
|
|
|
|
|
|
BasicBlock::iterator ItOld = B.GetInsertPoint();
|
|
|
|
AllocaInst *Alloc = insertAlloca(UI, B, "__sincos_");
|
|
|
|
B.SetInsertPoint(UI);
|
|
|
|
|
|
|
|
Value *P = Alloc;
|
[opaque pointer types] Add a FunctionCallee wrapper type, and use it.
Recommit r352791 after tweaking DerivedTypes.h slightly, so that gcc
doesn't choke on it, hopefully.
Original Message:
The FunctionCallee type is effectively a {FunctionType*,Value*} pair,
and is a useful convenience to enable code to continue passing the
result of getOrInsertFunction() through to EmitCall, even once pointer
types lose their pointee-type.
Then:
- update the CallInst/InvokeInst instruction creation functions to
take a Callee,
- modify getOrInsertFunction to return FunctionCallee, and
- update all callers appropriately.
One area of particular note is the change to the sanitizer
code. Previously, they had been casting the result of
`getOrInsertFunction` to a `Function*` via
`checkSanitizerInterfaceFunction`, and storing that. That would report
an error if someone had already inserted a function declaraction with
a mismatching signature.
However, in general, LLVM allows for such mismatches, as
`getOrInsertFunction` will automatically insert a bitcast if
needed. As part of this cleanup, cause the sanitizer code to do the
same. (It will call its functions using the expected signature,
however they may have been declared.)
Finally, in a small number of locations, callers of
`getOrInsertFunction` actually were expecting/requiring that a brand
new function was being created. In such cases, I've switched them to
Function::Create instead.
Differential Revision: https://reviews.llvm.org/D57315
llvm-svn: 352827
2019-02-01 10:28:03 +08:00
|
|
|
Type *PTy = Fsincos.getFunctionType()->getParamType(1);
|
2017-08-12 00:42:09 +08:00
|
|
|
// The allocaInst allocates the memory in private address space. This need
|
|
|
|
// to be bitcasted to point to the address space of cos pointer type.
|
|
|
|
// In OpenCL 2.0 this is generic, while in 1.2 that is private.
|
2018-08-31 13:49:54 +08:00
|
|
|
if (PTy->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
|
2017-08-12 00:42:09 +08:00
|
|
|
P = B.CreateAddrSpaceCast(Alloc, PTy);
|
|
|
|
CallInst *Call = CreateCallEx2(B, Fsincos, UI->getArgOperand(0), P);
|
|
|
|
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(errs() << "AMDIC: fold_sincos (" << *CI << ", " << *UI << ") with "
|
|
|
|
<< *Call << "\n");
|
2017-08-12 00:42:09 +08:00
|
|
|
|
|
|
|
if (!isSin) { // CI->cos, UI->sin
|
|
|
|
B.SetInsertPoint(&*ItOld);
|
|
|
|
UI->replaceAllUsesWith(&*Call);
|
2019-02-02 04:44:24 +08:00
|
|
|
Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc);
|
2017-08-12 00:42:09 +08:00
|
|
|
CI->replaceAllUsesWith(Reload);
|
|
|
|
UI->eraseFromParent();
|
|
|
|
CI->eraseFromParent();
|
|
|
|
} else { // CI->sin, UI->cos
|
2019-02-02 04:44:24 +08:00
|
|
|
Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc);
|
2017-08-12 00:42:09 +08:00
|
|
|
UI->replaceAllUsesWith(Reload);
|
|
|
|
CI->replaceAllUsesWith(Call);
|
|
|
|
UI->eraseFromParent();
|
|
|
|
CI->eraseFromParent();
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-06-18 01:57:50 +08:00
|
|
|
bool AMDGPULibCalls::fold_wavefrontsize(CallInst *CI, IRBuilder<> &B) {
|
|
|
|
if (!TM)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
StringRef CPU = TM->getTargetCPU();
|
|
|
|
StringRef Features = TM->getTargetFeatureString();
|
|
|
|
if ((CPU.empty() || CPU.equals_lower("generic")) &&
|
|
|
|
(Features.empty() ||
|
|
|
|
Features.find_lower("wavefrontsize") == StringRef::npos))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
Function *F = CI->getParent()->getParent();
|
|
|
|
const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(*F);
|
|
|
|
unsigned N = ST.getWavefrontSize();
|
|
|
|
|
|
|
|
LLVM_DEBUG(errs() << "AMDIC: fold_wavefrontsize (" << *CI << ") with "
|
|
|
|
<< N << "\n");
|
|
|
|
|
|
|
|
CI->replaceAllUsesWith(ConstantInt::get(B.getInt32Ty(), N));
|
|
|
|
CI->eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2017-08-12 00:42:09 +08:00
|
|
|
// Get insertion point at entry.
|
|
|
|
BasicBlock::iterator AMDGPULibCalls::getEntryIns(CallInst * UI) {
|
|
|
|
Function * Func = UI->getParent()->getParent();
|
|
|
|
BasicBlock * BB = &Func->getEntryBlock();
|
|
|
|
assert(BB && "Entry block not found!");
|
|
|
|
BasicBlock::iterator ItNew = BB->begin();
|
|
|
|
return ItNew;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Insert a AllocsInst at the beginning of function entry block.
|
|
|
|
AllocaInst* AMDGPULibCalls::insertAlloca(CallInst *UI, IRBuilder<> &B,
|
|
|
|
const char *prefix) {
|
|
|
|
BasicBlock::iterator ItNew = getEntryIns(UI);
|
|
|
|
Function *UCallee = UI->getCalledFunction();
|
|
|
|
Type *RetType = UCallee->getReturnType();
|
|
|
|
B.SetInsertPoint(&*ItNew);
|
|
|
|
AllocaInst *Alloc = B.CreateAlloca(RetType, 0,
|
|
|
|
std::string(prefix) + UI->getName());
|
2020-05-16 04:23:14 +08:00
|
|
|
Alloc->setAlignment(
|
|
|
|
Align(UCallee->getParent()->getDataLayout().getTypeAllocSize(RetType)));
|
2017-08-12 00:42:09 +08:00
|
|
|
return Alloc;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool AMDGPULibCalls::evaluateScalarMathFunc(FuncInfo &FInfo,
|
|
|
|
double& Res0, double& Res1,
|
|
|
|
Constant *copr0, Constant *copr1,
|
|
|
|
Constant *copr2) {
|
|
|
|
// By default, opr0/opr1/opr3 holds values of float/double type.
|
|
|
|
// If they are not float/double, each function has to its
|
|
|
|
// operand separately.
|
|
|
|
double opr0=0.0, opr1=0.0, opr2=0.0;
|
|
|
|
ConstantFP *fpopr0 = dyn_cast_or_null<ConstantFP>(copr0);
|
|
|
|
ConstantFP *fpopr1 = dyn_cast_or_null<ConstantFP>(copr1);
|
|
|
|
ConstantFP *fpopr2 = dyn_cast_or_null<ConstantFP>(copr2);
|
|
|
|
if (fpopr0) {
|
|
|
|
opr0 = (getArgType(FInfo) == AMDGPULibFunc::F64)
|
|
|
|
? fpopr0->getValueAPF().convertToDouble()
|
|
|
|
: (double)fpopr0->getValueAPF().convertToFloat();
|
|
|
|
}
|
|
|
|
|
|
|
|
if (fpopr1) {
|
|
|
|
opr1 = (getArgType(FInfo) == AMDGPULibFunc::F64)
|
|
|
|
? fpopr1->getValueAPF().convertToDouble()
|
|
|
|
: (double)fpopr1->getValueAPF().convertToFloat();
|
|
|
|
}
|
|
|
|
|
|
|
|
if (fpopr2) {
|
|
|
|
opr2 = (getArgType(FInfo) == AMDGPULibFunc::F64)
|
|
|
|
? fpopr2->getValueAPF().convertToDouble()
|
|
|
|
: (double)fpopr2->getValueAPF().convertToFloat();
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (FInfo.getId()) {
|
|
|
|
default : return false;
|
|
|
|
|
|
|
|
case AMDGPULibFunc::EI_ACOS:
|
|
|
|
Res0 = acos(opr0);
|
|
|
|
return true;
|
|
|
|
|
|
|
|
case AMDGPULibFunc::EI_ACOSH:
|
|
|
|
// acosh(x) == log(x + sqrt(x*x - 1))
|
|
|
|
Res0 = log(opr0 + sqrt(opr0*opr0 - 1.0));
|
|
|
|
return true;
|
|
|
|
|
|
|
|
case AMDGPULibFunc::EI_ACOSPI:
|
|
|
|
Res0 = acos(opr0) / MATH_PI;
|
|
|
|
return true;
|
|
|
|
|
|
|
|
case AMDGPULibFunc::EI_ASIN:
|
|
|
|
Res0 = asin(opr0);
|
|
|
|
return true;
|
|
|
|
|
|
|
|
case AMDGPULibFunc::EI_ASINH:
|
|
|
|
// asinh(x) == log(x + sqrt(x*x + 1))
|
|
|
|
Res0 = log(opr0 + sqrt(opr0*opr0 + 1.0));
|
|
|
|
return true;
|
|
|
|
|
|
|
|
case AMDGPULibFunc::EI_ASINPI:
|
|
|
|
Res0 = asin(opr0) / MATH_PI;
|
|
|
|
return true;
|
|
|
|
|
|
|
|
case AMDGPULibFunc::EI_ATAN:
|
|
|
|
Res0 = atan(opr0);
|
|
|
|
return true;
|
|
|
|
|
|
|
|
case AMDGPULibFunc::EI_ATANH:
|
|
|
|
// atanh(x) == (log(x+1) - log(x-1))/2;
|
|
|
|
Res0 = (log(opr0 + 1.0) - log(opr0 - 1.0))/2.0;
|
|
|
|
return true;
|
|
|
|
|
|
|
|
case AMDGPULibFunc::EI_ATANPI:
|
|
|
|
Res0 = atan(opr0) / MATH_PI;
|
|
|
|
return true;
|
|
|
|
|
|
|
|
case AMDGPULibFunc::EI_CBRT:
|
|
|
|
Res0 = (opr0 < 0.0) ? -pow(-opr0, 1.0/3.0) : pow(opr0, 1.0/3.0);
|
|
|
|
return true;
|
|
|
|
|
|
|
|
case AMDGPULibFunc::EI_COS:
|
|
|
|
Res0 = cos(opr0);
|
|
|
|
return true;
|
|
|
|
|
|
|
|
case AMDGPULibFunc::EI_COSH:
|
|
|
|
Res0 = cosh(opr0);
|
|
|
|
return true;
|
|
|
|
|
|
|
|
case AMDGPULibFunc::EI_COSPI:
|
|
|
|
Res0 = cos(MATH_PI * opr0);
|
|
|
|
return true;
|
|
|
|
|
|
|
|
case AMDGPULibFunc::EI_EXP:
|
|
|
|
Res0 = exp(opr0);
|
|
|
|
return true;
|
|
|
|
|
|
|
|
case AMDGPULibFunc::EI_EXP2:
|
|
|
|
Res0 = pow(2.0, opr0);
|
|
|
|
return true;
|
|
|
|
|
|
|
|
case AMDGPULibFunc::EI_EXP10:
|
|
|
|
Res0 = pow(10.0, opr0);
|
|
|
|
return true;
|
|
|
|
|
|
|
|
case AMDGPULibFunc::EI_EXPM1:
|
|
|
|
Res0 = exp(opr0) - 1.0;
|
|
|
|
return true;
|
|
|
|
|
|
|
|
case AMDGPULibFunc::EI_LOG:
|
|
|
|
Res0 = log(opr0);
|
|
|
|
return true;
|
|
|
|
|
|
|
|
case AMDGPULibFunc::EI_LOG2:
|
|
|
|
Res0 = log(opr0) / log(2.0);
|
|
|
|
return true;
|
|
|
|
|
|
|
|
case AMDGPULibFunc::EI_LOG10:
|
|
|
|
Res0 = log(opr0) / log(10.0);
|
|
|
|
return true;
|
|
|
|
|
|
|
|
case AMDGPULibFunc::EI_RSQRT:
|
|
|
|
Res0 = 1.0 / sqrt(opr0);
|
|
|
|
return true;
|
|
|
|
|
|
|
|
case AMDGPULibFunc::EI_SIN:
|
|
|
|
Res0 = sin(opr0);
|
|
|
|
return true;
|
|
|
|
|
|
|
|
case AMDGPULibFunc::EI_SINH:
|
|
|
|
Res0 = sinh(opr0);
|
|
|
|
return true;
|
|
|
|
|
|
|
|
case AMDGPULibFunc::EI_SINPI:
|
|
|
|
Res0 = sin(MATH_PI * opr0);
|
|
|
|
return true;
|
|
|
|
|
|
|
|
case AMDGPULibFunc::EI_SQRT:
|
|
|
|
Res0 = sqrt(opr0);
|
|
|
|
return true;
|
|
|
|
|
|
|
|
case AMDGPULibFunc::EI_TAN:
|
|
|
|
Res0 = tan(opr0);
|
|
|
|
return true;
|
|
|
|
|
|
|
|
case AMDGPULibFunc::EI_TANH:
|
|
|
|
Res0 = tanh(opr0);
|
|
|
|
return true;
|
|
|
|
|
|
|
|
case AMDGPULibFunc::EI_TANPI:
|
|
|
|
Res0 = tan(MATH_PI * opr0);
|
|
|
|
return true;
|
|
|
|
|
|
|
|
case AMDGPULibFunc::EI_RECIP:
|
|
|
|
Res0 = 1.0 / opr0;
|
|
|
|
return true;
|
|
|
|
|
|
|
|
// two-arg functions
|
|
|
|
case AMDGPULibFunc::EI_DIVIDE:
|
|
|
|
Res0 = opr0 / opr1;
|
|
|
|
return true;
|
|
|
|
|
|
|
|
case AMDGPULibFunc::EI_POW:
|
|
|
|
case AMDGPULibFunc::EI_POWR:
|
|
|
|
Res0 = pow(opr0, opr1);
|
|
|
|
return true;
|
|
|
|
|
|
|
|
case AMDGPULibFunc::EI_POWN: {
|
|
|
|
if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
|
|
|
|
double val = (double)iopr1->getSExtValue();
|
|
|
|
Res0 = pow(opr0, val);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
case AMDGPULibFunc::EI_ROOTN: {
|
|
|
|
if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
|
|
|
|
double val = (double)iopr1->getSExtValue();
|
|
|
|
Res0 = pow(opr0, 1.0 / val);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// with ptr arg
|
|
|
|
case AMDGPULibFunc::EI_SINCOS:
|
|
|
|
Res0 = sin(opr0);
|
|
|
|
Res1 = cos(opr0);
|
|
|
|
return true;
|
|
|
|
|
|
|
|
// three-arg functions
|
|
|
|
case AMDGPULibFunc::EI_FMA:
|
|
|
|
case AMDGPULibFunc::EI_MAD:
|
|
|
|
Res0 = opr0 * opr1 + opr2;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool AMDGPULibCalls::evaluateCall(CallInst *aCI, FuncInfo &FInfo) {
|
|
|
|
int numArgs = (int)aCI->getNumArgOperands();
|
|
|
|
if (numArgs > 3)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
Constant *copr0 = nullptr;
|
|
|
|
Constant *copr1 = nullptr;
|
|
|
|
Constant *copr2 = nullptr;
|
|
|
|
if (numArgs > 0) {
|
|
|
|
if ((copr0 = dyn_cast<Constant>(aCI->getArgOperand(0))) == nullptr)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (numArgs > 1) {
|
|
|
|
if ((copr1 = dyn_cast<Constant>(aCI->getArgOperand(1))) == nullptr) {
|
|
|
|
if (FInfo.getId() != AMDGPULibFunc::EI_SINCOS)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (numArgs > 2) {
|
|
|
|
if ((copr2 = dyn_cast<Constant>(aCI->getArgOperand(2))) == nullptr)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// At this point, all arguments to aCI are constants.
|
|
|
|
|
|
|
|
// max vector size is 16, and sincos will generate two results.
|
|
|
|
double DVal0[16], DVal1[16];
|
|
|
|
bool hasTwoResults = (FInfo.getId() == AMDGPULibFunc::EI_SINCOS);
|
|
|
|
if (getVecSize(FInfo) == 1) {
|
|
|
|
if (!evaluateScalarMathFunc(FInfo, DVal0[0],
|
|
|
|
DVal1[0], copr0, copr1, copr2)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
ConstantDataVector *CDV0 = dyn_cast_or_null<ConstantDataVector>(copr0);
|
|
|
|
ConstantDataVector *CDV1 = dyn_cast_or_null<ConstantDataVector>(copr1);
|
|
|
|
ConstantDataVector *CDV2 = dyn_cast_or_null<ConstantDataVector>(copr2);
|
|
|
|
for (int i=0; i < getVecSize(FInfo); ++i) {
|
|
|
|
Constant *celt0 = CDV0 ? CDV0->getElementAsConstant(i) : nullptr;
|
|
|
|
Constant *celt1 = CDV1 ? CDV1->getElementAsConstant(i) : nullptr;
|
|
|
|
Constant *celt2 = CDV2 ? CDV2->getElementAsConstant(i) : nullptr;
|
|
|
|
if (!evaluateScalarMathFunc(FInfo, DVal0[i],
|
|
|
|
DVal1[i], celt0, celt1, celt2)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
LLVMContext &context = CI->getParent()->getParent()->getContext();
|
|
|
|
Constant *nval0, *nval1;
|
|
|
|
if (getVecSize(FInfo) == 1) {
|
|
|
|
nval0 = ConstantFP::get(CI->getType(), DVal0[0]);
|
|
|
|
if (hasTwoResults)
|
|
|
|
nval1 = ConstantFP::get(CI->getType(), DVal1[0]);
|
|
|
|
} else {
|
|
|
|
if (getArgType(FInfo) == AMDGPULibFunc::F32) {
|
|
|
|
SmallVector <float, 0> FVal0, FVal1;
|
|
|
|
for (int i=0; i < getVecSize(FInfo); ++i)
|
|
|
|
FVal0.push_back((float)DVal0[i]);
|
|
|
|
ArrayRef<float> tmp0(FVal0);
|
|
|
|
nval0 = ConstantDataVector::get(context, tmp0);
|
|
|
|
if (hasTwoResults) {
|
|
|
|
for (int i=0; i < getVecSize(FInfo); ++i)
|
|
|
|
FVal1.push_back((float)DVal1[i]);
|
|
|
|
ArrayRef<float> tmp1(FVal1);
|
|
|
|
nval1 = ConstantDataVector::get(context, tmp1);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
ArrayRef<double> tmp0(DVal0);
|
|
|
|
nval0 = ConstantDataVector::get(context, tmp0);
|
|
|
|
if (hasTwoResults) {
|
|
|
|
ArrayRef<double> tmp1(DVal1);
|
|
|
|
nval1 = ConstantDataVector::get(context, tmp1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (hasTwoResults) {
|
|
|
|
// sincos
|
|
|
|
assert(FInfo.getId() == AMDGPULibFunc::EI_SINCOS &&
|
|
|
|
"math function with ptr arg not supported yet");
|
|
|
|
new StoreInst(nval1, aCI->getArgOperand(1), aCI);
|
|
|
|
}
|
|
|
|
|
|
|
|
replaceCall(nval0);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Public interface to the Simplify LibCalls pass.
|
2019-12-05 17:45:32 +08:00
|
|
|
FunctionPass *llvm::createAMDGPUSimplifyLibCallsPass(const TargetMachine *TM) {
|
|
|
|
return new AMDGPUSimplifyLibCalls(TM);
|
2017-08-12 00:42:09 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
FunctionPass *llvm::createAMDGPUUseNativeCallsPass() {
|
|
|
|
return new AMDGPUUseNativeCalls();
|
|
|
|
}
|
|
|
|
|
|
|
|
bool AMDGPUSimplifyLibCalls::runOnFunction(Function &F) {
|
|
|
|
if (skipFunction(F))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
bool Changed = false;
|
|
|
|
auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
|
|
|
|
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "AMDIC: process function ";
|
|
|
|
F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';);
|
2017-08-12 00:42:09 +08:00
|
|
|
|
|
|
|
for (auto &BB : F) {
|
|
|
|
for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) {
|
|
|
|
// Ignore non-calls.
|
|
|
|
CallInst *CI = dyn_cast<CallInst>(I);
|
|
|
|
++I;
|
[AMDGPU][NFC] Skip processing intrinsics that do not become real instructions
Reviewers: rampitec
Reviewed By: rampitec
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D81260
2020-06-06 08:47:05 +08:00
|
|
|
// Ignore intrinsics that do not become real instructions.
|
|
|
|
if (!CI || isa<DbgInfoIntrinsic>(CI) || CI->isLifetimeStartOrEnd())
|
|
|
|
continue;
|
2017-08-12 00:42:09 +08:00
|
|
|
|
|
|
|
// Ignore indirect calls.
|
|
|
|
Function *Callee = CI->getCalledFunction();
|
|
|
|
if (Callee == 0) continue;
|
|
|
|
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n";
|
|
|
|
dbgs().flush());
|
2017-08-12 00:42:09 +08:00
|
|
|
if(Simplifier.fold(CI, AA))
|
|
|
|
Changed = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return Changed;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool AMDGPUUseNativeCalls::runOnFunction(Function &F) {
|
|
|
|
if (skipFunction(F) || UseNative.empty())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
bool Changed = false;
|
|
|
|
for (auto &BB : F) {
|
|
|
|
for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) {
|
|
|
|
// Ignore non-calls.
|
|
|
|
CallInst *CI = dyn_cast<CallInst>(I);
|
|
|
|
++I;
|
|
|
|
if (!CI) continue;
|
|
|
|
|
|
|
|
// Ignore indirect calls.
|
|
|
|
Function *Callee = CI->getCalledFunction();
|
|
|
|
if (Callee == 0) continue;
|
|
|
|
|
|
|
|
if(Simplifier.useNative(CI))
|
|
|
|
Changed = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return Changed;
|
|
|
|
}
|