forked from mindspore-Ecosystem/mindspore
!1840 Add op pre compiler process
Merge pull request !1840 from wangcong/master
This commit is contained in:
commit
8de8289cfd
|
@ -28,7 +28,8 @@ build_in_impl_path = get_build_in_impl_path()
|
|||
# op function list
|
||||
op_build = "compile"
|
||||
op_pre_build = "pre_build"
|
||||
|
||||
fusion_type_map = {'Convolution': 0, 'ElemWise': 1, 'CommReduce': 2,
|
||||
'Segment': 3, 'Opaque': 4}
|
||||
|
||||
def _initialize(impl_path):
|
||||
"""Initialize"""
|
||||
|
@ -108,7 +109,7 @@ def build_op(build_type, json_str):
|
|||
|
||||
# pre build
|
||||
if build_type == op_pre_build:
|
||||
op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name)
|
||||
op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name=kernel_name)
|
||||
# disable only pattern configuration
|
||||
op_build_cfg_en()
|
||||
return get_op_pattern()
|
||||
|
@ -159,11 +160,16 @@ def compile_with_json(json_str):
|
|||
json_info = json.loads(json_str)
|
||||
if "fusion_op" in json_info:
|
||||
ret = compile_fusion_op(json_str)
|
||||
elif "compile_type" in json_info:
|
||||
ret = build_op(op_pre_build, json_str)
|
||||
else:
|
||||
ret = build_op(op_build, json_str)
|
||||
return ret
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
in_args = sys.stdin.readline()
|
||||
compile_with_json(in_args)
|
||||
result = compile_with_json(in_args)
|
||||
if result in fusion_type_map:
|
||||
exit(fusion_type_map[result])
|
||||
else:
|
||||
exit(100)
|
||||
|
|
|
@ -75,7 +75,6 @@ def check_supported(op_json: str):
|
|||
|
||||
return ret
|
||||
|
||||
|
||||
def run_compiler(op_json):
|
||||
"""
|
||||
run compiler to compile op with subprocess
|
||||
|
@ -88,15 +87,16 @@ def run_compiler(op_json):
|
|||
"""
|
||||
try:
|
||||
tbe_compiler = os.path.join(os.path.split(os.path.realpath(__file__))[0], "compiler.py")
|
||||
subprocess.run([sys.executable, tbe_compiler], input=op_json, timeout=300,
|
||||
text=True, capture_output=True, check=True)
|
||||
return "Success", "Success"
|
||||
completed_object = subprocess.run([sys.executable, tbe_compiler], input=op_json, timeout=300,
|
||||
text=True, capture_output=True, check=False)
|
||||
if completed_object:
|
||||
code = completed_object.returncode
|
||||
return "Success", str(code)
|
||||
except subprocess.TimeoutExpired:
|
||||
tb = traceback.format_exc()
|
||||
return "TBEException", "CompileTimeOut: " + tb + "\ninput_args: " + op_json
|
||||
return "TBEException", "PreCompileTimeOut: " + tb + "\ninput_args: " + op_json
|
||||
except subprocess.CalledProcessError as e:
|
||||
return "TBEException", "CompileProcessFailed:\n" + e.stdout + "\n" + e.stderr + "\ninput_args: " + op_json
|
||||
|
||||
return "TBEException", "PreCompileProcessFailed:\n" + e.stdout + "\n" + e.stderr + "\ninput_args: " + op_json
|
||||
|
||||
class CompilerPool:
|
||||
"""compiler pool"""
|
||||
|
@ -154,11 +154,11 @@ class CompilerPool:
|
|||
task_id, task_future = self.__running_tasks.pop(0)
|
||||
ret_type, result = task_future.get(330)
|
||||
if ret_type == "Success":
|
||||
ret = task_id, "Success"
|
||||
ret = task_id, "Success", result
|
||||
elif ret_type in ("Exception", "TBEException"):
|
||||
ret = task_id, ret_type + ":" + result
|
||||
ret = task_id, ret_type + ":" + result, "_"
|
||||
else:
|
||||
ret = task_id, "Exception: Not support return type:" + str(ret_type)
|
||||
ret = task_id, "Exception: Not support return type:" + str(ret_type), "_"
|
||||
return ret
|
||||
|
||||
def reset_task_info(self):
|
||||
|
|
|
@ -62,6 +62,31 @@ static kernel::KernelModPtr SerialCompileImpl(const AnfNodePtr &anf_node) {
|
|||
return kernel_mod_ptr;
|
||||
}
|
||||
|
||||
static bool KernelPreBuildParallelCompile(const mindspore::session::KernelGraph *kernel_graph_ptr) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
|
||||
std::vector<AnfNodePtr> tbe_nodes;
|
||||
for (const auto &anf_node : kernel_graph_ptr->execution_order()) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
if (!AnfAlgo::IsRealKernel(anf_node)) {
|
||||
continue;
|
||||
}
|
||||
KernelType kernel_type = AnfAlgo::GetKernelType(anf_node);
|
||||
switch (kernel_type) {
|
||||
case KernelType::TBE_KERNEL: {
|
||||
if (AnfAlgo::GetKernelMod(anf_node) == nullptr) {
|
||||
tbe_nodes.push_back(anf_node);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
bool ret = kernel::TbeOpParallelPreBuild(tbe_nodes);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool KernelBuildParallelCompile(const mindspore::session::KernelGraph *kernel_graph_ptr) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
|
||||
std::vector<AnfNodePtr> tbe_nodes;
|
||||
|
@ -188,6 +213,12 @@ bool IsAtomicNode(const CNodePtr &kernel_node) {
|
|||
return atomic_flag;
|
||||
}
|
||||
|
||||
bool KernelPreBuild(const mindspore::session::KernelGraph *kernel_graph_ptr) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
|
||||
bool ret = device::ascend::KernelPreBuildParallelCompile(kernel_graph_ptr);
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool KernelBuild(const mindspore::session::KernelGraph *kernel_graph_ptr) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
|
||||
TbeUtils::LoadCache();
|
||||
|
|
|
@ -22,6 +22,10 @@
|
|||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace ascend {
|
||||
/**
|
||||
* @brief kernel pre build for ascend.
|
||||
*/
|
||||
bool KernelPreBuild(const mindspore::session::KernelGraph *kernel_graph_ptr);
|
||||
/**
|
||||
* @brief kernel build for ascend.
|
||||
*/
|
||||
|
|
|
@ -102,7 +102,8 @@ std::map<int32_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo>
|
|||
while (!build_manger->IsAllTaskFinish()) {
|
||||
int task_id = -1;
|
||||
char *task_result = nullptr;
|
||||
auto ret = build_manger->WaitOne(&task_id, &task_result);
|
||||
char *pre_build_result = nullptr;
|
||||
auto ret = build_manger->WaitOne(&task_id, &task_result, &pre_build_result);
|
||||
if (!ret) {
|
||||
MS_EXCEPTION(ArgumentError) << "Build Failed. wait one ret:" << ret << ", task id:" << task_id;
|
||||
}
|
||||
|
|
|
@ -27,7 +27,7 @@
|
|||
// the TBE back-end operator implementation difference
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
enum kCreaterType : int { SINGLE_BUILD = 0, PREBUILD, OP_SELECT_FORMAT, CHECK_SUPPORTED };
|
||||
enum kCreaterType : int { SINGLE_BUILD = 0, PREBUILD, OP_SELECT_FORMAT, CHECK_SUPPORTED, OP_PRE_COMPILE };
|
||||
namespace tbe {
|
||||
using FAttrsPass = void (*)(const AnfNodePtr &anf_node, const std::vector<std::shared_ptr<OpAttr>> &op_info_attrs,
|
||||
nlohmann::json *attrs_json);
|
||||
|
|
|
@ -42,6 +42,40 @@ constexpr auto kStartCompileOp = "start_compile_op";
|
|||
constexpr auto kWaitOne = "wait_one";
|
||||
constexpr auto kResetTaskInfo = "reset_task_info";
|
||||
|
||||
bool TbeOpParallelPreBuild(const std::vector<AnfNodePtr> &anf_nodes) {
|
||||
auto build_manger = std::make_shared<ParallelBuildManager>();
|
||||
MS_EXCEPTION_IF_NULL(build_manger);
|
||||
for (const auto &anf_node : anf_nodes) {
|
||||
// gen kernel json
|
||||
nlohmann::json kernel_json;
|
||||
TbeKernelJsonCreator creator(OP_PRE_COMPILE);
|
||||
if (!creator.GenTbeSingleKernelJson(anf_node, &kernel_json)) {
|
||||
MS_LOG(ERROR) << "GenTbeSingleKernelJson failed";
|
||||
return false;
|
||||
}
|
||||
kernel_json["compile_type"] = "pre_build";
|
||||
// op build
|
||||
auto task_id = build_manger->StartCompileOp(kernel_json);
|
||||
build_manger->SavePreTaskInfo(task_id, anf_node);
|
||||
}
|
||||
while (!build_manger->IsAllPreTaskFinish()) {
|
||||
int task_id = -1;
|
||||
char *task_result = nullptr;
|
||||
char *pre_build_result = nullptr;
|
||||
auto ret = build_manger->WaitOne(&task_id, &task_result, &pre_build_result);
|
||||
if (!ret) {
|
||||
MS_EXCEPTION(ArgumentError) << "Pre Build Failed. wait one ret:" << ret << ", task id:" << task_id;
|
||||
}
|
||||
|
||||
if ((task_result != nullptr) && (strcmp(task_result, "Success") != 0)) {
|
||||
MS_EXCEPTION(ArgumentError) << "task pre compile Failed, task id:" << task_id << ", cause:" << task_result;
|
||||
}
|
||||
|
||||
build_manger->PreTaskFinishProcess(task_id, pre_build_result);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TbeOpParallelBuild(std::vector<AnfNodePtr> anf_nodes) {
|
||||
auto build_manger = std::make_shared<ParallelBuildManager>();
|
||||
MS_EXCEPTION_IF_NULL(build_manger);
|
||||
|
@ -82,7 +116,8 @@ bool TbeOpParallelBuild(std::vector<AnfNodePtr> anf_nodes) {
|
|||
while (!build_manger->IsAllTaskFinish()) {
|
||||
int task_id = -1;
|
||||
char *task_result = nullptr;
|
||||
auto ret = build_manger->WaitOne(&task_id, &task_result);
|
||||
char *pre_build_result = nullptr;
|
||||
auto ret = build_manger->WaitOne(&task_id, &task_result, &pre_build_result);
|
||||
if (!ret) {
|
||||
MS_EXCEPTION(ArgumentError) << "Build Failed. wait one ret:" << ret << ", task id:" << task_id;
|
||||
}
|
||||
|
@ -116,7 +151,7 @@ int32_t ParallelBuildManager::StartCompileOp(const nlohmann::json &kernel_json)
|
|||
return task_id;
|
||||
}
|
||||
|
||||
bool ParallelBuildManager::WaitOne(int *task_id, char **task_result) const {
|
||||
bool ParallelBuildManager::WaitOne(int *task_id, char **task_result, char **pre_build_result) const {
|
||||
MS_LOG(INFO) << "wait task start.";
|
||||
MS_EXCEPTION_IF_NULL(task_id);
|
||||
MS_EXCEPTION_IF_NULL(task_result);
|
||||
|
@ -128,10 +163,15 @@ bool ParallelBuildManager::WaitOne(int *task_id, char **task_result) const {
|
|||
MS_EXCEPTION(ArgumentError) << "Failed to call function wait_one";
|
||||
return false;
|
||||
}
|
||||
(void)PyArg_ParseTuple(pRes, "is", task_id, task_result);
|
||||
(void)PyArg_ParseTuple(pRes, "iss", task_id, task_result, pre_build_result);
|
||||
return true;
|
||||
}
|
||||
|
||||
void ParallelBuildManager::SavePreTaskInfo(int32_t task_id, const mindspore::AnfNodePtr &anf_node) {
|
||||
MS_LOG(INFO) << "SavePreTaskInfo, task id: " << task_id;
|
||||
pre_task_map_[task_id] = anf_node;
|
||||
}
|
||||
|
||||
void ParallelBuildManager::SaveTaskInfo(int32_t task_id, const mindspore::AnfNodePtr &anf_node,
|
||||
const std::string &json_name, const std::vector<size_t> &input_size_list,
|
||||
const std::vector<size_t> &output_size_list, int32_t scope_id) {
|
||||
|
@ -150,11 +190,24 @@ void ParallelBuildManager::SaveTaskInfo(int32_t task_id, const mindspore::AnfNod
|
|||
task_map_[task_id] = task_info;
|
||||
}
|
||||
|
||||
bool ParallelBuildManager::IsAllPreTaskFinish() const {
|
||||
MS_LOG(INFO) << "wait pre build process task_num: " << pre_task_map_.size();
|
||||
return pre_task_map_.empty();
|
||||
}
|
||||
|
||||
bool ParallelBuildManager::IsAllTaskFinish() const {
|
||||
MS_LOG(INFO) << "wait process task_num: " << task_map_.size();
|
||||
return task_map_.empty();
|
||||
}
|
||||
|
||||
void ParallelBuildManager::PreTaskFinishProcess(int32_t task_id, const std::string &pre_build_result) {
|
||||
auto task_iter = pre_task_map_.find(task_id);
|
||||
if (task_iter == pre_task_map_.end()) {
|
||||
MS_EXCEPTION(ArgumentError) << "can find pre task_id:" << task_id;
|
||||
}
|
||||
(void)pre_task_map_.erase(task_iter);
|
||||
}
|
||||
|
||||
std::pair<int32_t, KernelModPtr> ParallelBuildManager::TaskFinishProcess(int32_t task_id, bool set_kernel_mod) {
|
||||
auto task_iter = task_map_.find(task_id);
|
||||
if (task_iter == task_map_.end()) {
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#include <nlohmann/json.hpp>
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
bool TbeOpParallelPreBuild(const std::vector<AnfNodePtr> &anf_nodes);
|
||||
bool TbeOpParallelBuild(std::vector<AnfNodePtr> anf_nodes);
|
||||
|
||||
struct KernelBuildTaskInfo {
|
||||
|
@ -42,6 +43,7 @@ class ParallelBuildManager {
|
|||
ParallelBuildManager();
|
||||
~ParallelBuildManager();
|
||||
int32_t StartCompileOp(const nlohmann::json &kernel_json) const;
|
||||
void SavePreTaskInfo(int32_t task_id, const AnfNodePtr &anf_node);
|
||||
void SaveTaskInfo(int32_t task_id, const AnfNodePtr &anf_node, const std::string &json_name,
|
||||
const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list,
|
||||
int32_t scope_id = 0);
|
||||
|
@ -52,8 +54,10 @@ class ParallelBuildManager {
|
|||
const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list,
|
||||
AnfNode *node) const;
|
||||
|
||||
bool WaitOne(int *task_id, char **task_result) const;
|
||||
bool WaitOne(int *task_id, char **task_result, char **pre_build_result) const;
|
||||
bool IsAllPreTaskFinish() const;
|
||||
bool IsAllTaskFinish() const;
|
||||
void PreTaskFinishProcess(int32_t task_id, const std::string &pre_build_result);
|
||||
std::pair<int32_t, KernelModPtr> TaskFinishProcess(int32_t task_id, bool set_kernel_mod = true);
|
||||
KernelModPtr GenKernelMod(const string &json_name, const string &processor,
|
||||
const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list,
|
||||
|
@ -62,6 +66,7 @@ class ParallelBuildManager {
|
|||
|
||||
private:
|
||||
PyObject *tbe_parallel_compiler_;
|
||||
std::map<int32_t, AnfNodePtr> pre_task_map_;
|
||||
std::map<int32_t, KernelBuildTaskInfo> task_map_;
|
||||
std::vector<KernelBuildTaskInfo> same_op_list_;
|
||||
};
|
||||
|
|
|
@ -550,6 +550,7 @@ void AscendSession::InitRuntimeResource() {
|
|||
|
||||
void AscendSession::HardwareOptimize(const std::shared_ptr<KernelGraph> &kernel_graph) const {
|
||||
MS_LOG(INFO) << "HardwareOptimize start!";
|
||||
device::ascend::KernelPreBuild(kernel_graph.get());
|
||||
opt::AscendBackendOptimization(kernel_graph);
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
kernel_graph->SetExecOrderByDefault();
|
||||
|
|
Loading…
Reference in New Issue