!1840 Add op pre compiler process

Merge pull request !1840 from wangcong/master
This commit is contained in:
mindspore-ci-bot 2020-06-11 14:58:58 +08:00 committed by Gitee
commit 8de8289cfd
9 changed files with 121 additions and 20 deletions

View File

@ -28,7 +28,8 @@ build_in_impl_path = get_build_in_impl_path()
# op function list
op_build = "compile"
op_pre_build = "pre_build"
fusion_type_map = {'Convolution': 0, 'ElemWise': 1, 'CommReduce': 2,
'Segment': 3, 'Opaque': 4}
def _initialize(impl_path):
"""Initialize"""
@ -108,7 +109,7 @@ def build_op(build_type, json_str):
# pre build
if build_type == op_pre_build:
op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name)
op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name=kernel_name)
# disable only pattern configuration
op_build_cfg_en()
return get_op_pattern()
@ -159,11 +160,16 @@ def compile_with_json(json_str):
json_info = json.loads(json_str)
if "fusion_op" in json_info:
ret = compile_fusion_op(json_str)
elif "compile_type" in json_info:
ret = build_op(op_pre_build, json_str)
else:
ret = build_op(op_build, json_str)
return ret
if __name__ == "__main__":
in_args = sys.stdin.readline()
compile_with_json(in_args)
result = compile_with_json(in_args)
if result in fusion_type_map:
exit(fusion_type_map[result])
else:
exit(100)

View File

@ -75,7 +75,6 @@ def check_supported(op_json: str):
return ret
def run_compiler(op_json):
"""
run compiler to compile op with subprocess
@ -88,15 +87,16 @@ def run_compiler(op_json):
"""
try:
tbe_compiler = os.path.join(os.path.split(os.path.realpath(__file__))[0], "compiler.py")
subprocess.run([sys.executable, tbe_compiler], input=op_json, timeout=300,
text=True, capture_output=True, check=True)
return "Success", "Success"
completed_object = subprocess.run([sys.executable, tbe_compiler], input=op_json, timeout=300,
text=True, capture_output=True, check=False)
if completed_object:
code = completed_object.returncode
return "Success", str(code)
except subprocess.TimeoutExpired:
tb = traceback.format_exc()
return "TBEException", "CompileTimeOut: " + tb + "\ninput_args: " + op_json
return "TBEException", "PreCompileTimeOut: " + tb + "\ninput_args: " + op_json
except subprocess.CalledProcessError as e:
return "TBEException", "CompileProcessFailed:\n" + e.stdout + "\n" + e.stderr + "\ninput_args: " + op_json
return "TBEException", "PreCompileProcessFailed:\n" + e.stdout + "\n" + e.stderr + "\ninput_args: " + op_json
class CompilerPool:
"""compiler pool"""
@ -154,11 +154,11 @@ class CompilerPool:
task_id, task_future = self.__running_tasks.pop(0)
ret_type, result = task_future.get(330)
if ret_type == "Success":
ret = task_id, "Success"
ret = task_id, "Success", result
elif ret_type in ("Exception", "TBEException"):
ret = task_id, ret_type + ":" + result
ret = task_id, ret_type + ":" + result, "_"
else:
ret = task_id, "Exception: Not support return type:" + str(ret_type)
ret = task_id, "Exception: Not support return type:" + str(ret_type), "_"
return ret
def reset_task_info(self):

View File

@ -62,6 +62,31 @@ static kernel::KernelModPtr SerialCompileImpl(const AnfNodePtr &anf_node) {
return kernel_mod_ptr;
}
static bool KernelPreBuildParallelCompile(const mindspore::session::KernelGraph *kernel_graph_ptr) {
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
std::vector<AnfNodePtr> tbe_nodes;
for (const auto &anf_node : kernel_graph_ptr->execution_order()) {
MS_EXCEPTION_IF_NULL(anf_node);
if (!AnfAlgo::IsRealKernel(anf_node)) {
continue;
}
KernelType kernel_type = AnfAlgo::GetKernelType(anf_node);
switch (kernel_type) {
case KernelType::TBE_KERNEL: {
if (AnfAlgo::GetKernelMod(anf_node) == nullptr) {
tbe_nodes.push_back(anf_node);
}
break;
}
default: {
break;
}
}
}
bool ret = kernel::TbeOpParallelPreBuild(tbe_nodes);
return ret;
}
static bool KernelBuildParallelCompile(const mindspore::session::KernelGraph *kernel_graph_ptr) {
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
std::vector<AnfNodePtr> tbe_nodes;
@ -188,6 +213,12 @@ bool IsAtomicNode(const CNodePtr &kernel_node) {
return atomic_flag;
}
bool KernelPreBuild(const mindspore::session::KernelGraph *kernel_graph_ptr) {
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
bool ret = device::ascend::KernelPreBuildParallelCompile(kernel_graph_ptr);
return ret;
}
bool KernelBuild(const mindspore::session::KernelGraph *kernel_graph_ptr) {
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
TbeUtils::LoadCache();

View File

@ -22,6 +22,10 @@
namespace mindspore {
namespace device {
namespace ascend {
/**
* @brief kernel pre build for ascend.
*/
bool KernelPreBuild(const mindspore::session::KernelGraph *kernel_graph_ptr);
/**
* @brief kernel build for ascend.
*/

View File

@ -102,7 +102,8 @@ std::map<int32_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo>
while (!build_manger->IsAllTaskFinish()) {
int task_id = -1;
char *task_result = nullptr;
auto ret = build_manger->WaitOne(&task_id, &task_result);
char *pre_build_result = nullptr;
auto ret = build_manger->WaitOne(&task_id, &task_result, &pre_build_result);
if (!ret) {
MS_EXCEPTION(ArgumentError) << "Build Failed. wait one ret:" << ret << ", task id:" << task_id;
}

View File

@ -27,7 +27,7 @@
// the TBE back-end operator implementation difference
namespace mindspore {
namespace kernel {
enum kCreaterType : int { SINGLE_BUILD = 0, PREBUILD, OP_SELECT_FORMAT, CHECK_SUPPORTED };
enum kCreaterType : int { SINGLE_BUILD = 0, PREBUILD, OP_SELECT_FORMAT, CHECK_SUPPORTED, OP_PRE_COMPILE };
namespace tbe {
using FAttrsPass = void (*)(const AnfNodePtr &anf_node, const std::vector<std::shared_ptr<OpAttr>> &op_info_attrs,
nlohmann::json *attrs_json);

View File

@ -42,6 +42,40 @@ constexpr auto kStartCompileOp = "start_compile_op";
constexpr auto kWaitOne = "wait_one";
constexpr auto kResetTaskInfo = "reset_task_info";
bool TbeOpParallelPreBuild(const std::vector<AnfNodePtr> &anf_nodes) {
auto build_manger = std::make_shared<ParallelBuildManager>();
MS_EXCEPTION_IF_NULL(build_manger);
for (const auto &anf_node : anf_nodes) {
// gen kernel json
nlohmann::json kernel_json;
TbeKernelJsonCreator creator(OP_PRE_COMPILE);
if (!creator.GenTbeSingleKernelJson(anf_node, &kernel_json)) {
MS_LOG(ERROR) << "GenTbeSingleKernelJson failed";
return false;
}
kernel_json["compile_type"] = "pre_build";
// op build
auto task_id = build_manger->StartCompileOp(kernel_json);
build_manger->SavePreTaskInfo(task_id, anf_node);
}
while (!build_manger->IsAllPreTaskFinish()) {
int task_id = -1;
char *task_result = nullptr;
char *pre_build_result = nullptr;
auto ret = build_manger->WaitOne(&task_id, &task_result, &pre_build_result);
if (!ret) {
MS_EXCEPTION(ArgumentError) << "Pre Build Failed. wait one ret:" << ret << ", task id:" << task_id;
}
if ((task_result != nullptr) && (strcmp(task_result, "Success") != 0)) {
MS_EXCEPTION(ArgumentError) << "task pre compile Failed, task id:" << task_id << ", cause:" << task_result;
}
build_manger->PreTaskFinishProcess(task_id, pre_build_result);
}
return true;
}
bool TbeOpParallelBuild(std::vector<AnfNodePtr> anf_nodes) {
auto build_manger = std::make_shared<ParallelBuildManager>();
MS_EXCEPTION_IF_NULL(build_manger);
@ -82,7 +116,8 @@ bool TbeOpParallelBuild(std::vector<AnfNodePtr> anf_nodes) {
while (!build_manger->IsAllTaskFinish()) {
int task_id = -1;
char *task_result = nullptr;
auto ret = build_manger->WaitOne(&task_id, &task_result);
char *pre_build_result = nullptr;
auto ret = build_manger->WaitOne(&task_id, &task_result, &pre_build_result);
if (!ret) {
MS_EXCEPTION(ArgumentError) << "Build Failed. wait one ret:" << ret << ", task id:" << task_id;
}
@ -116,7 +151,7 @@ int32_t ParallelBuildManager::StartCompileOp(const nlohmann::json &kernel_json)
return task_id;
}
bool ParallelBuildManager::WaitOne(int *task_id, char **task_result) const {
bool ParallelBuildManager::WaitOne(int *task_id, char **task_result, char **pre_build_result) const {
MS_LOG(INFO) << "wait task start.";
MS_EXCEPTION_IF_NULL(task_id);
MS_EXCEPTION_IF_NULL(task_result);
@ -128,10 +163,15 @@ bool ParallelBuildManager::WaitOne(int *task_id, char **task_result) const {
MS_EXCEPTION(ArgumentError) << "Failed to call function wait_one";
return false;
}
(void)PyArg_ParseTuple(pRes, "is", task_id, task_result);
(void)PyArg_ParseTuple(pRes, "iss", task_id, task_result, pre_build_result);
return true;
}
void ParallelBuildManager::SavePreTaskInfo(int32_t task_id, const mindspore::AnfNodePtr &anf_node) {
MS_LOG(INFO) << "SavePreTaskInfo, task id: " << task_id;
pre_task_map_[task_id] = anf_node;
}
void ParallelBuildManager::SaveTaskInfo(int32_t task_id, const mindspore::AnfNodePtr &anf_node,
const std::string &json_name, const std::vector<size_t> &input_size_list,
const std::vector<size_t> &output_size_list, int32_t scope_id) {
@ -150,11 +190,24 @@ void ParallelBuildManager::SaveTaskInfo(int32_t task_id, const mindspore::AnfNod
task_map_[task_id] = task_info;
}
bool ParallelBuildManager::IsAllPreTaskFinish() const {
MS_LOG(INFO) << "wait pre build process task_num: " << pre_task_map_.size();
return pre_task_map_.empty();
}
bool ParallelBuildManager::IsAllTaskFinish() const {
MS_LOG(INFO) << "wait process task_num: " << task_map_.size();
return task_map_.empty();
}
void ParallelBuildManager::PreTaskFinishProcess(int32_t task_id, const std::string &pre_build_result) {
auto task_iter = pre_task_map_.find(task_id);
if (task_iter == pre_task_map_.end()) {
MS_EXCEPTION(ArgumentError) << "can find pre task_id:" << task_id;
}
(void)pre_task_map_.erase(task_iter);
}
std::pair<int32_t, KernelModPtr> ParallelBuildManager::TaskFinishProcess(int32_t task_id, bool set_kernel_mod) {
auto task_iter = task_map_.find(task_id);
if (task_iter == task_map_.end()) {

View File

@ -26,6 +26,7 @@
#include <nlohmann/json.hpp>
namespace mindspore {
namespace kernel {
bool TbeOpParallelPreBuild(const std::vector<AnfNodePtr> &anf_nodes);
bool TbeOpParallelBuild(std::vector<AnfNodePtr> anf_nodes);
struct KernelBuildTaskInfo {
@ -42,6 +43,7 @@ class ParallelBuildManager {
ParallelBuildManager();
~ParallelBuildManager();
int32_t StartCompileOp(const nlohmann::json &kernel_json) const;
void SavePreTaskInfo(int32_t task_id, const AnfNodePtr &anf_node);
void SaveTaskInfo(int32_t task_id, const AnfNodePtr &anf_node, const std::string &json_name,
const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list,
int32_t scope_id = 0);
@ -52,8 +54,10 @@ class ParallelBuildManager {
const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list,
AnfNode *node) const;
bool WaitOne(int *task_id, char **task_result) const;
bool WaitOne(int *task_id, char **task_result, char **pre_build_result) const;
bool IsAllPreTaskFinish() const;
bool IsAllTaskFinish() const;
void PreTaskFinishProcess(int32_t task_id, const std::string &pre_build_result);
std::pair<int32_t, KernelModPtr> TaskFinishProcess(int32_t task_id, bool set_kernel_mod = true);
KernelModPtr GenKernelMod(const string &json_name, const string &processor,
const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list,
@ -62,6 +66,7 @@ class ParallelBuildManager {
private:
PyObject *tbe_parallel_compiler_;
std::map<int32_t, AnfNodePtr> pre_task_map_;
std::map<int32_t, KernelBuildTaskInfo> task_map_;
std::vector<KernelBuildTaskInfo> same_op_list_;
};

View File

@ -550,6 +550,7 @@ void AscendSession::InitRuntimeResource() {
void AscendSession::HardwareOptimize(const std::shared_ptr<KernelGraph> &kernel_graph) const {
MS_LOG(INFO) << "HardwareOptimize start!";
device::ascend::KernelPreBuild(kernel_graph.get());
opt::AscendBackendOptimization(kernel_graph);
MS_EXCEPTION_IF_NULL(kernel_graph);
kernel_graph->SetExecOrderByDefault();