!22731 GraphKernel supports CPU

Merge pull request !22731 from DeshiChen/0901_graphkernel_cpu
This commit is contained in:
i-robot 2021-09-30 09:36:35 +00:00 committed by Gitee
commit 06b0beced7
35 changed files with 608 additions and 89 deletions

View File

@ -47,7 +47,7 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(PYBIND11_CPP_STANDARD -std=c++17)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPTION_CXX_FLAGS}")
if(ENABLE_AKG AND (ENABLE_D OR ENABLE_GPU))
if(ENABLE_AKG AND CMAKE_SYSTEM_NAME MATCHES "Linux")
add_subdirectory("${CMAKE_SOURCE_DIR}/akg")
endif()

View File

@ -45,7 +45,7 @@ update_submodule()
cd "${BASEPATH}/graphengine"
git submodule update --init metadef
cd "${BASEPATH}"
if [[ "X$ENABLE_AKG" = "Xon" ]] && [[ "X$ENABLE_D" = "Xon" || "X$ENABLE_GPU" = "Xon" ]]; then
if [[ "X$ENABLE_AKG" = "Xon" ]]; then
git submodule update --init --recursive akg
fi
}
@ -57,7 +57,6 @@ build_exit()
exit 1
}
make_clean()
{
echo "enable make clean"

View File

@ -151,3 +151,7 @@ endif()
if(ENABLE_CPU AND NOT WIN32)
add_compile_definitions(ENABLE_ARMOUR)
endif()
if(ENABLE_AKG AND CMAKE_SYSTEM_NAME MATCHES "Linux")
add_compile_definitions(ENABLE_AKG)
endif()

View File

@ -291,7 +291,7 @@ install(
COMPONENT mindspore
)
if((ENABLE_D OR ENABLE_GPU) AND ENABLE_AKG)
if(ENABLE_AKG AND CMAKE_SYSTEM_NAME MATCHES "Linux")
set (AKG_PATH ${BUILD_PATH}/mindspore/akg)
file(REMOVE_RECURSE ${AKG_PATH}/_akg)
file(MAKE_DIRECTORY ${AKG_PATH}/_akg)

View File

@ -187,20 +187,6 @@ install(
COMPONENT mindspore
)
if((ENABLE_D OR ENABLE_GPU) AND ENABLE_AKG)
set (AKG_PATH ${CMAKE_SOURCE_DIR}/build/mindspore/akg)
file(REMOVE_RECURSE ${AKG_PATH}/_akg)
file(MAKE_DIRECTORY ${AKG_PATH}/_akg)
file(TOUCH ${AKG_PATH}/_akg/__init__.py)
install(DIRECTORY "${AKG_PATH}/akg" DESTINATION "${AKG_PATH}/_akg")
install(
DIRECTORY
${AKG_PATH}/_akg
DESTINATION ${INSTALL_PY_DIR}/
COMPONENT mindspore
)
endif()
if(EXISTS ${CMAKE_SOURCE_DIR}/mindspore/dataset)
install(
DIRECTORY ${CMAKE_SOURCE_DIR}/mindspore/dataset

View File

@ -1180,11 +1180,131 @@ class GraphSplitAscend(GraphSplitByPattern):
_fuse_once(fuse_func)
class GraphSplitCpu(GraphSplitByPattern):
"""Graph splitter"""
BORADCAST_FUSE_DEPTH = 20
REDUCE_FUSE_DEPTH = 20
def get_default_mode(self, op):
"""Get default mode in CPU"""
pattern = PrimLib.iter_type(op)
return self.Area.MODE_BASIC if pattern == PrimLib.RESHAPE else self.Area.MODE_COMPOSITE
def pattern_fuse(self, fuse_func=None):
"""fuse Areas by pattern"""
def _reshape(dom):
if dom.pattern != PrimLib.RESHAPE:
return None
min_area, forward_fuse = None, False
for a, _ in dom.out_relations.items():
if a.pattern <= PrimLib.BROADCAST and dom.check_acyclic(a) and \
(min_area is None or a.pattern < min_area.pattern):
min_area = a
for a, _ in dom.in_relations.items():
if a.pattern <= PrimLib.BROADCAST and a.check_acyclic(dom) and \
len(dom.ops[0].inputs[0].to_ops) == 1 and not a.is_output and \
(min_area is None or a.pattern < min_area.pattern):
min_area, forward_fuse = a, True
return ([min_area], forward_fuse) if min_area else None
def _elemwise_depth(dom):
if dom.pattern not in (PrimLib.ELEMWISE, PrimLib.BROADCAST) or len(dom.in_relations) != 1:
return None
a, r = list(dom.in_relations.items())[0]
if a.pattern > PrimLib.BROADCAST or len(a.out_relations) != 1 or r != PrimLib.ELEMWISE or \
a.dom_op().output.shape != dom.dom_op().output.shape:
return None
return [a], True
def _elemwise_width(dom):
if dom.pattern not in (PrimLib.ELEMWISE, PrimLib.BROADCAST):
return None
fused = []
for a, r in dom.in_relations.items():
if a.pattern <= PrimLib.BROADCAST and r == PrimLib.ELEMWISE and a.check_acyclic(dom) and \
a.dom_op().output.shape == dom.dom_op().output.shape:
fused.append(a)
return fused, True
def _broadcast_pat_exclude(dom, a, r):
if a.pattern == PrimLib.REDUCE:
return dom.pattern > PrimLib.ELEMWISE or r > PrimLib.ELEMWISE
return a.pattern > PrimLib.REDUCE or r > PrimLib.BROADCAST
def _broadcast_depth(dom):
if dom.pattern not in (PrimLib.ELEMWISE, PrimLib.BROADCAST) or len(dom.out_relations) != 1 or \
dom.is_output or len(dom.ops) > self.BORADCAST_FUSE_DEPTH:
return None
a, r = list(dom.out_relations.items())[0]
if _broadcast_pat_exclude(dom, a, r) or len(a.in_relations) != 1:
return None
return [a], False
def _broadcast_width(dom):
if dom.pattern not in (PrimLib.ELEMWISE, PrimLib.BROADCAST) or \
dom.is_output or len(dom.ops) > self.BORADCAST_FUSE_DEPTH:
return None
fused = []
for a, r in dom.out_relations.items():
if _broadcast_pat_exclude(dom, a, r) or not dom.check_acyclic(a) or \
(fused and fused[0].dom_op().output.shape != a.dom_op().output.shape):
return None
fused.append(a)
return fused, False
def _reduce_pat_exclude(_, a, r):
if len(a.ops) > self.REDUCE_FUSE_DEPTH:
return True
return a.pattern > PrimLib.ELEMWISE or r > PrimLib.REDUCE or r == PrimLib.BROADCAST
def _reduce_depth(dom):
if dom.pattern != PrimLib.REDUCE or len(dom.in_relations) != 1:
return None
a, r = list(dom.in_relations.items())[0]
if _reduce_pat_exclude(dom, a, r) or len(a.out_relations) != 1:
return None
return [a], True
def _reduce_width(dom):
if dom.pattern != PrimLib.REDUCE:
return None
fused = []
for a, r in dom.in_relations.items():
if not _reduce_pat_exclude(dom, a, r) and a.check_acyclic(dom):
fused.append(a)
return fused, True
def _fuse_loop():
changed = True
while changed:
changed = False
changed = self.fuse(_reshape) or changed
changed = self.fuse(_elemwise_depth) or changed
changed = self.fuse(_elemwise_width) or changed
changed = self.fuse(_reduce_depth) or changed
changed = self.fuse(_reduce_width) or changed
changed = self.fuse(_broadcast_depth) or changed
changed = self.fuse(_broadcast_width) or changed
def _fuse_once(fuse_func):
if fuse_func(_reshape) or fuse_func(_elemwise_depth) or fuse_func(_elemwise_width) or \
fuse_func(_reduce_depth) or fuse_func(_reduce_width) or fuse_func(_broadcast_depth) or \
fuse_func(_broadcast_width):
return
if fuse_func is None:
_fuse_loop()
else:
_fuse_once(fuse_func)
def split(graph, target, flags):
"""Split graph"""
result = None
if target == "cuda":
result = GraphSplitGpu(graph, flags).split()
else:
elif target == "aicore":
result = GraphSplitAscend(graph, flags).split()
else:
result = GraphSplitCpu(graph, flags).split()
return result

View File

@ -132,7 +132,7 @@ class CompositeGraph:
return dict()
attr = {}
for a in op['attr']:
if a['name'] == 'axis' and op['name'] in ('ReduceSum', 'ReduceMax', 'ReduceMin'):
if a['name'] == 'axis' and op['name'] in ('ReduceSum', 'ReduceMax', 'ReduceMin', 'Argmax', 'Argmin'):
attr['reduce_axis'] = a['value']
else:
attr[a['name']] = a['value']

View File

@ -33,7 +33,7 @@ def copy_json(pid_path, ppid_path):
shutil.move(os.path.join(pid_path, json_file), ppid_path)
def _compile_akg_task_gpu(json_strs, attrs):
def _compile_akg_task_default(json_strs, attrs):
"""
compile func called in single process
@ -110,16 +110,14 @@ class AkgProcess:
if self.argc == 0:
raise ValueError("json must be not null")
args = [(arg, attrs) for arg in self.args]
if self.platform == "GPU":
with Pool(processes=self.process_num) as pool:
res = pool.starmap_async(_compile_akg_task_gpu, args)
res.get(timeout=self.wait_time)
elif self.platform == "ASCEND":
if self.platform == "ASCEND":
with Pool(processes=self.process_num) as pool:
res = pool.starmap_async(_compile_akg_task_ascend, args)
res.get(timeout=self.wait_time)
else:
raise ValueError("The value of 'platform' must be 'GPU' or 'ASCEND'.")
with Pool(processes=self.process_num) as pool:
res = pool.starmap_async(_compile_akg_task_default, args)
res.get(timeout=self.wait_time)
return True
def accept_json(self, json):

View File

@ -12,22 +12,22 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""kernel build server for gpu"""
"""kernel build server for akg kernels"""
import sys
import warnings
from mindspore._extends.remote.kernel_build_server import Messager, get_logger, AkgBuilder
class GpuMessager(Messager):
class AkgMessager(Messager):
'''
GPU Messager
Default Messager for akg kernels.
It works as a server, communicating with c++ client.
'''
def __init__(self, fdin, fdout):
super().__init__(fdin, fdout)
get_logger().info("[TRACE] GPU Messager init...")
self.akg_builder = AkgBuilder("GPU")
get_logger().info("[TRACE] Akg Messager init...")
self.akg_builder = AkgBuilder("default")
def handle(self):
"""
@ -42,7 +42,7 @@ class GpuMessager(Messager):
self.exit()
def exit(self):
get_logger().info("[TRACE] GPU Messager Exit...")
get_logger().info("[TRACE] Akg Messager Exit...")
exit()
@ -51,5 +51,5 @@ if __name__ == '__main__':
if len(sys.argv) != 3:
raise Exception('Incorrect argv: {}'.format(sys.argv))
get_logger().debug(f"[TRACE] argv: {str(sys.argv)}")
messager = GpuMessager(int(sys.argv[1]), int(sys.argv[2]))
messager = AkgMessager(int(sys.argv[1]), int(sys.argv[2]))
messager.run()

View File

@ -13,12 +13,6 @@ if(ENABLE_D)
file(GLOB_RECURSE D_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"kernel_query.cc"
"kernel_fusion.cc"
"akg/akg_kernel_build.cc"
"akg/ascend/*.cc"
"akg/akg_kernel_json_generator.cc"
"akg/akg_kernel_json_decoder.cc"
"akg/akg_kernel_attrs_process.cc"
"akg/akg_kernel_metadata.cc"
"tbe/*.cc"
"host/*.cc"
"aicpu/*.cc"
@ -95,11 +89,6 @@ endif()
if(ENABLE_GPU)
file(GLOB_RECURSE CUDA_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"gpu/*.cu"
"akg/akg_kernel_build.cc"
"akg/gpu/*.cc"
"akg/akg_kernel_json_generator.cc"
"akg/akg_kernel_json_decoder.cc"
"akg/akg_kernel_attrs_process.cc"
)
file(GLOB_RECURSE GPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "gpu/*.cc")
@ -122,7 +111,35 @@ if(ENABLE_GPU)
# add_library(_mindspore_kernel_cuda_obj OBJECT ${CUDA_SRC_LIST})
endif()
set_property(SOURCE ${KERNEL_SRC_LIST} ${CPU_SRC_LIST} ${GPU_SRC_LIST} ${D_SRC_LIST}
if(ENABLE_AKG AND ${CMAKE_SYSTEM_NAME} MATCHES "Linux")
file(GLOB_RECURSE AKG_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"akg/akg_kernel_build.cc"
"akg/akg_kernel_json_generator.cc"
"akg/akg_kernel_json_decoder.cc"
"akg/akg_kernel_attrs_process.cc"
)
if(ENABLE_GPU)
file(GLOB_RECURSE AKG_GPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"akg/gpu/*.cc"
)
list(APPEND AKG_SRC_LIST ${AKG_GPU_SRC_LIST})
endif()
if(ENABLE_D)
file(GLOB_RECURSE AKG_D_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"akg/ascend/*.cc"
"akg/akg_kernel_metadata.cc"
)
list(APPEND AKG_SRC_LIST ${AKG_D_SRC_LIST})
endif()
if(ENABLE_CPU)
file(GLOB_RECURSE AKG_CPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"akg/cpu/*.cc"
)
list(APPEND AKG_SRC_LIST ${AKG_CPU_SRC_LIST})
endif()
endif()
set_property(SOURCE ${KERNEL_SRC_LIST} ${CPU_SRC_LIST} ${GPU_SRC_LIST} ${D_SRC_LIST} ${AKG_SRC_LIST}
PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_KERNEL)
add_library(_mindspore_backend_kernel_compiler_obj OBJECT ${KERNEL_SRC_LIST} ${CPU_SRC_LIST}
${GPU_SRC_LIST} ${D_SRC_LIST} ${QUANTUM_SRC_LIST})
${GPU_SRC_LIST} ${D_SRC_LIST} ${AKG_SRC_LIST} ${QUANTUM_SRC_LIST})

View File

@ -16,6 +16,7 @@
#include "backend/kernel_compiler/akg/akg_kernel_build.h"
#include <sys/shm.h>
#include <stdio.h>
#include <errno.h>
#include <fcntl.h>

View File

@ -17,8 +17,6 @@
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AKG_AKG_KERNEL_BUILD_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AKG_AKG_KERNEL_BUILD_H_
#include <sys/shm.h>
#include <string>
#include <utility>
#include <vector>

View File

@ -0,0 +1,49 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/akg/cpu/akg_cpu_kernel_build.h"
#include <Python.h>
#include <vector>
#include <memory>
#include <string>
#include "backend/kernel_compiler/common_utils.h"
#include "backend/kernel_compiler/akg/cpu/akg_cpu_kernel_mod.h"
#include "utils/ms_utils.h"
#include "backend/session/anf_runtime_algorithm.h"
namespace mindspore {
namespace kernel {
KernelPackPtr AkgCpuKernelBuilder::AkgSearchCache(const std::string &kernel_name) {
return SearchCache(kernel_name, kProcessorCpu);
}
KernelPackPtr AkgCpuKernelBuilder::AkgInsertCache(const std::string &kernel_name) {
return InsertCache(kernel_name, kProcessorCpu);
}
void AkgCpuKernelBuilder::AkgSetKernelMod(const KernelPackPtr &kernel_pack,
const AkgKernelJsonGenerator &json_generator, const AnfNodePtr &anf_node) {
auto kernel_mod_ptr = std::make_shared<CpuKernelMod>(kernel_pack);
kernel_mod_ptr->SetInputSizeList(json_generator.input_size_list());
kernel_mod_ptr->SetOutputSizeList(json_generator.output_size_list());
AnfAlgo::SetKernelMod(kernel_mod_ptr, anf_node.get());
}
void AkgCpuKernelBuilder::AkgSaveJsonInfo(const string &kernel_name, const string &kernel_json) {
kernel::SaveJsonInfo(kernel_name, kernel_json, kernel::KernelMeta::GetInstance()->kernel_meta_path());
}
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,39 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AKG_CPU_AKG_CPU_KERNEL_BUILD_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AKG_CPU_AKG_CPU_KERNEL_BUILD_H_
#include <string>
#include "backend/kernel_compiler/akg/akg_kernel_build.h"
#include "base/base.h"
namespace mindspore {
namespace kernel {
class AkgCpuKernelBuilder : public AkgKernelBuilder {
public:
AkgCpuKernelBuilder() = default;
~AkgCpuKernelBuilder() = default;
kernel::KernelBuildClient *GetClient() override { return &(kernel::AkgKernelBuildClient::Instance()); }
KernelPackPtr AkgSearchCache(const std::string &kernel_name) override;
KernelPackPtr AkgInsertCache(const std::string &kernel_name) override;
void AkgSetKernelMod(const KernelPackPtr &kernel_pack, const AkgKernelJsonGenerator &json_generator,
const AnfNodePtr &anf_node) override;
void AkgSaveJsonInfo(const string &kernel_name, const string &kernel_json) override;
};
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AKG_CPU_AKG_CPU_KERNEL_BUILD_H_

View File

@ -0,0 +1,143 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/akg/cpu/akg_cpu_kernel_mod.h"
#include <dlfcn.h>
#include <algorithm>
#include <memory>
#include <utility>
#include "nlohmann/json.hpp"
#include "backend/kernel_compiler/common_utils.h"
#include "common/thread_pool.h"
#include "utils/ms_utils.h"
#include "mindspore/ccsrc/debug/common.h"
namespace mindspore {
namespace kernel {
namespace {
using AkgParallelLambda = int (*)(int task_id, int num_task, void *cdata);
int AkgLaunchFunc(AkgParallelLambda flambda, void *cdata, int num_task) {
size_t num_workers =
std::min(mindspore::common::ThreadPool::GetInstance().GetSyncRunThreadNum(), static_cast<size_t>(num_task));
std::vector<mindspore::common::Task> tasks;
size_t thread_index = 0;
while (thread_index < num_workers) {
auto block = [&, thread_index]() {
flambda(thread_index, num_workers, cdata);
return mindspore::common::SUCCESS;
};
tasks.emplace_back(block);
thread_index++;
}
mindspore::common::ThreadPool::GetInstance().SyncRun(tasks);
return 0;
}
struct AkgCallBack {
void *parallel_launch_func;
void *(*malloc_func)(size_t);
void (*free_func)(void *);
AkgCallBack() {
parallel_launch_func = reinterpret_cast<void *>(&AkgLaunchFunc);
malloc_func = &malloc;
free_func = &free;
}
~AkgCallBack() = default;
};
} // namespace
CpuKernelManagerPtr CpuKernelMod::kernelmanager_ = std::make_shared<CpuKernelManager>();
CpuKernelManager::~CpuKernelManager() {
for (auto &cpu_func_pair : cpu_func_map_) {
if (cpu_func_pair.second.second != nullptr) {
(void)dlclose(cpu_func_pair.second.second);
}
}
}
void *CpuKernelManager::SearchFunc(const std::string &kernel_name) const {
auto iter = cpu_func_map_.find(kernel_name);
if (iter == cpu_func_map_.end()) {
return nullptr;
} else {
return iter->second.first;
}
}
void *CpuKernelManager::SearchFuncWithSharedLock(const std::string &kernel_name) const {
std::shared_lock lock(mutex_);
return SearchFunc(kernel_name);
}
void *CpuKernelManager::GetFunction(const std::string &kernel_name) {
if (auto func = SearchFuncWithSharedLock(kernel_name); func != nullptr) {
return func;
}
std::unique_lock lock(mutex_);
// Search cache again between setting unique lock and calling "dlopen", to make sure that
// only one thread can call "dlopen" and insert handle to the cache for a new kernel_name.
// To avoid that several nodes (with the same kernel_name) open the same "so" by dlopen,
// but only cache it once, then the "dlclose" will be called only once, causing resource leak.
if (auto func = SearchFunc(kernel_name); func != nullptr) {
return func;
}
std::string fn;
auto it = kernel_name.rfind("_kernel");
if (it < kernel_name.size()) {
fn = kernel_name.substr(0, it);
} else {
fn = kernel_name;
}
std::string fn_so = kCpuKernelMeta + fn + ".so";
auto handle = dlopen(fn_so.c_str(), RTLD_LAZY | RTLD_LOCAL);
if (handle == nullptr) {
MS_LOG(ERROR) << "Load " << fn_so << " failed. kernel: " << kernel_name;
return nullptr;
}
auto launch_func = dlsym(handle, kernel_name.c_str());
if (launch_func == nullptr) {
MS_LOG(ERROR) << "Undefined symbol " << kernel_name << " in " << fn_so;
return nullptr;
}
cpu_func_map_[kernel_name] = std::make_pair(launch_func, handle);
return launch_func;
}
bool CpuKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
auto js = nlohmann::json::parse(kernel_pack_->GetJson()->contents,
kernel_pack_->GetJson()->contents + kernel_pack_->GetJson()->len);
std::string kernel_name = js["kernelName"];
auto launch_func = kernelmanager_->GetFunction(kernel_name);
if (launch_func == nullptr) {
MS_LOG(ERROR) << "GetFunction failed. kernel: " << kernel_name;
return false;
}
std::vector<void *> runtimeargs;
(void)std::transform(std::begin(inputs), std::end(inputs), std::back_inserter(runtimeargs),
[](const AddressPtr &input) -> void * { return input->addr; });
(void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(runtimeargs),
[](const AddressPtr &output) -> void * { return output->addr; });
AkgCallBack akg_callback;
runtimeargs.emplace_back(reinterpret_cast<void *>(&akg_callback));
using AkgCpuKernelFunction = void (*)(void *);
reinterpret_cast<AkgCpuKernelFunction>(launch_func)(reinterpret_cast<void *>(runtimeargs.data()));
return true;
}
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,73 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AKG_CPU_AKG_CPU_KERNEL_MOD_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AKG_CPU_AKG_CPU_KERNEL_MOD_H_
#include <string>
#include <vector>
#include <memory>
#include <utility>
#include <unordered_map>
#include <mutex>
#include <shared_mutex>
#include "backend/kernel_compiler/kernel.h"
namespace mindspore {
namespace kernel {
class CpuKernelManager {
public:
CpuKernelManager() = default;
~CpuKernelManager();
void *GetFunction(const std::string &kernel_name);
private:
void *SearchFunc(const std::string &kernel_name) const;
void *SearchFuncWithSharedLock(const std::string &kernel_name) const;
// cache the kernel function: kernel_name -> {kernel_func, so_handle}
std::unordered_map<std::string, std::pair<void *, void *>> cpu_func_map_;
mutable std::shared_mutex mutex_;
};
using CpuKernelManagerPtr = std::shared_ptr<CpuKernelManager>;
class CpuKernelMod : public KernelMod {
public:
explicit CpuKernelMod(const KernelPackPtr &kp) : kernel_pack_(kp) {}
~CpuKernelMod() = default;
void SetInputSizeList(const std::vector<size_t> &size_list) { input_size_list_ = size_list; }
void SetOutputSizeList(const std::vector<size_t> &size_list) { output_size_list_ = size_list; }
void SetWorkspaceSizeList(const std::vector<size_t> &size_list) { workspace_size_list_ = size_list; }
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
static CpuKernelManagerPtr kernelmanager_;
private:
KernelPackPtr kernel_pack_;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
std::vector<size_t> workspace_size_list_; // workspace is not used in cpu kernel.
};
using CpuKernelModPtr = std::shared_ptr<CpuKernelMod>;
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AKG_CPU_AKG_CPU_KERNEL_MOD_H_

View File

@ -27,7 +27,7 @@ class AkgGpuKernelBuilder : public AkgKernelBuilder {
AkgGpuKernelBuilder() = default;
~AkgGpuKernelBuilder() = default;
kernel::KernelBuildClient *GetClient() override { return &(kernel::GpuKernelBuildClient::Instance()); }
kernel::KernelBuildClient *GetClient() override { return &(kernel::AkgKernelBuildClient::Instance()); }
KernelPackPtr AkgSearchCache(const std::string &kernel_name) override;
KernelPackPtr AkgInsertCache(const std::string &kernel_name) override;
void AkgSetKernelMod(const KernelPackPtr &kernel_pack, const AkgKernelJsonGenerator &json_generator,

View File

@ -157,16 +157,21 @@ FusionType GetFusionTypeByName(const std::string &name) {
}
void KernelMeta::Initialize() {
kernel_meta_path_ = std::string(kGpuKernelMeta) + "/";
if (GetStrProcessorFromContext() == kProcessorCpu) {
kernel_meta_path_ = std::string(kCpuKernelMeta);
} else {
kernel_meta_path_ = std::string(kGpuKernelMeta) + "/";
#if defined(_WIN32) || defined(_WIN64)
auto ret = mkdir(kernel_meta_path_.c_str());
auto ret = mkdir(kernel_meta_path_.c_str());
#else
auto ret = mkdir(kernel_meta_path_.c_str(), S_IRWXG | S_IRWXU);
auto ret = mkdir(kernel_meta_path_.c_str(), S_IRWXG | S_IRWXU);
#endif
if (ret != 0) {
MS_LOG(INFO) << "kernel dir [" << kernel_meta_path_ << "], will be created later";
if (ret != 0) {
MS_LOG(INFO) << "kernel dir [" << kernel_meta_path_ << "], will be created later";
}
}
initialized_ = true;
}
@ -238,6 +243,8 @@ KernelPackPtr InsertCache(const std::string &kernel_name, const std::string &pro
std::string kernel_json;
if (processor == kProcessorAiCore || processor == kProcessorAiCpu) {
kernel_json = kCceKernelMeta;
} else if (processor == kProcessorCpu) {
kernel_json = kCpuKernelMeta;
} else {
kernel_json = bin_map->kernel_meta_path();
}
@ -872,6 +879,8 @@ Processor GetProcessorFromContext() {
processor = kernel::Processor::CUDA;
} else if (device_info == kAscendDevice) {
processor = kernel::Processor::AICORE;
} else if (device_info == kCPUDevice) {
processor = kernel::Processor::CPU;
}
return processor;
}
@ -883,6 +892,8 @@ std::string GetStrProcessorFromContext() {
str_processor = kernel::kProcessorCuda;
} else if (processor == kernel::Processor::AICORE) {
str_processor = kernel::kProcessorAiCore;
} else if (processor == kernel::Processor::CPU) {
str_processor = kernel::kProcessorCpu;
}
return str_processor;
}

View File

@ -34,10 +34,12 @@
namespace mindspore {
namespace kernel {
constexpr auto kCceKernelMeta = "./kernel_meta/";
constexpr auto kCpuKernelMeta = "./kernel_meta/";
constexpr auto kGpuKernelMeta = "./cuda_meta";
constexpr auto kProcessorAiCore = "aicore";
constexpr auto kProcessorAiCpu = "aicpu";
constexpr auto kProcessorCuda = "cuda";
constexpr auto kProcessorCpu = "cpu";
constexpr auto kProcessorUnknown = "unknown";
constexpr auto kJsonSuffix = ".json";
constexpr auto kInfoSuffix = ".info";

View File

@ -1,5 +1,5 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
* Copyright 2019-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -99,6 +99,14 @@ bool KernelPack::ReadFromJsonFile(const std::string &json_f, const std::string &
(void)kernel_json.seekg(0, std::ios::beg);
(void)kernel_json.read(json_->contents, SizeToLong(json_->len));
if (processor == kProcessorCpu) {
std::string bin_f = json_f.substr(0, json_f.length() - 5) + ".so";
if (!CheckHash(json_f, bin_f, js)) {
return false;
}
return true;
}
if (processor == kProcessorCuda) {
std::string bin_f = json_f.substr(0, json_f.length() - 5) + ".ptx";
std::ifstream kernelbin(bin_f);

View File

@ -107,6 +107,7 @@ enum Processor {
AICORE = 0,
AICPU,
CUDA,
CPU,
};
struct FlexArray {

View File

@ -13,8 +13,6 @@ endif()
if(ENABLE_D OR ENABLE_ACL)
file(GLOB_RECURSE _D_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"ascend/*.cc"
"graph_kernel/*.cc"
"graph_kernel/model/*.cc"
)
list(APPEND _PREACTIVATE_SRC_LIST ${_D_SRC_LIST})
endif()
@ -22,8 +20,6 @@ endif()
if(ENABLE_GPU)
file(GLOB_RECURSE _GPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"gpu/*.cc"
"graph_kernel/*.cc"
"graph_kernel/model/*.cc"
)
list(APPEND _PREACTIVATE_SRC_LIST ${_GPU_SRC_LIST})
endif()
@ -43,6 +39,13 @@ if(ENABLE_CPU)
list(APPEND _PREACTIVATE_SRC_LIST ${_CPU_SRC_LIST})
endif()
if(ENABLE_AKG AND ${CMAKE_SYSTEM_NAME} MATCHES "Linux")
file(GLOB_RECURSE _GK_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"graph_kernel/*.cc"
)
list(APPEND _PREACTIVATE_SRC_LIST ${_GK_SRC_LIST})
endif()
set_property(SOURCE ${_PREACTIVATE_SRC_LIST} PROPERTY COMPILE_DEFINITIONS
SUBMODULE_ID=mindspore::SubModuleId::SM_PRE_ACT)
add_library(_mindspore_backend_optimizer_obj OBJECT ${_PREACTIVATE_SRC_LIST})

View File

@ -38,6 +38,8 @@
#include "runtime/device/ascend/kernel_select_ascend.h"
#elif ENABLE_GPU
#include "runtime/device/gpu/kernel_info_setter.h"
#elif ENABLE_CPU
#include "runtime/device/cpu/kernel_select_cpu.h"
#endif
namespace mindspore::graphkernel {
@ -608,6 +610,9 @@ void ResetKernelInfo(const AnfNodePtr &node, KernelType kernel_type) {
#elif ENABLE_GPU
cnode->set_kernel_info(std::make_shared<device::KernelInfo>());
device::gpu::SetKernelInfo(cnode, kernel_type);
#elif ENABLE_CPU
cnode->set_kernel_info(std::make_shared<device::KernelInfo>());
device::cpu::SetKernelInfo(cnode);
#endif
}

View File

@ -121,7 +121,7 @@ PassManagerPtr GraphKernelOptimizer::HighLevelOpt1() const {
pm->AddPass(std::make_shared<InsertPadOps>(), OptLevel_1, is_gpu);
// Universal arithmetic simplify
pm->AddPass(std::make_shared<ArithmeticSimplify>(), OptLevel_2, is_gpu);
pm->AddPass(std::make_shared<ArithmeticSimplify>(), OptLevel_2, is_gpu || is_cpu);
// Common subexpression elimination
pm->AddPass(std::make_shared<GraphKernelCSE>(), OptLevel_2);
@ -158,7 +158,7 @@ PassManagerPtr GraphKernelOptimizer::Split() const {
PassManagerPtr GraphKernelOptimizer::HighLevelOpt2() const {
auto pm = std::make_shared<GraphKernelPassManager>(4, "highlevelopt2");
// Enable atomic add
pm->AddPass(std::make_shared<AtomicCleanInsertter>(), OptLevel_2);
pm->AddPass(std::make_shared<AtomicCleanInsertter>(), OptLevel_2, is_gpu || is_ascend);
// Enable atomic add for stitch nodes.
auto level = GetPassLevelByFlag(context::GraphKernelFlags::GetInstance().enable_stitch_fusion);
@ -170,8 +170,8 @@ PassManagerPtr GraphKernelOptimizer::HighLevelOpt2() const {
pm->AddPass(std::make_shared<DecreaseComputePrecision>(), level_low_precision, is_ascend);
// Enable tsa and uss
pm->AddPass(std::make_shared<TsaAtomicAddToFirstTensor>(), OptLevel_1);
pm->AddPass(std::make_shared<UssAtomicAdd>(), OptLevel_1);
pm->AddPass(std::make_shared<TsaAtomicAddToFirstTensor>(), OptLevel_1, is_gpu);
pm->AddPass(std::make_shared<UssAtomicAdd>(), OptLevel_1, is_gpu);
return pm;
}
@ -204,6 +204,7 @@ void GraphKernelOptimizer::Run(const KernelGraphPtr &kernel_graph) {
MS_EXCEPTION_IF_NULL(context_ptr);
is_gpu = (context_ptr->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kGPUDevice);
is_ascend = (context_ptr->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kAscendDevice);
is_cpu = (context_ptr->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kCPUDevice);
auto optimizer = std::make_shared<GraphOptimizer>("graph_kernel_optimizer");
optimizer->AddPassManager(PreProcess());

View File

@ -46,6 +46,7 @@ class GraphKernelOptimizer {
bool is_gpu{false};
bool is_ascend{false};
bool is_cpu{false};
};
void GraphKernelOptimize(const KernelGraphPtr &kernel_graph);

View File

@ -21,14 +21,17 @@
#include "ir/anf.h"
#include "utils/ms_utils.h"
#include "utils/trace_base.h"
#include "utils/context/graph_kernel_flags.h"
#include "backend/session/anf_runtime_algorithm.h"
#include "runtime/device/kernel_runtime.h"
#include "backend/kernel_compiler/akg/cpu/akg_cpu_kernel_build.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
#include "runtime/device/cpu/kernel_select_cpu.h"
#include "backend/optimizer/common/optimizer.h"
#include "backend/optimizer/common/pass_manager.h"
#include "backend/optimizer/cpu/insert_cast_cpu.h"
#include "backend/optimizer/cpu/insert_format_transform_op.h"
#include "backend/optimizer/graph_kernel/graph_kernel_optimization.h"
#include "backend/optimizer/pass/replace_node_by_proxy.h"
#include "backend/optimizer/pass/erase_visit_attr.h"
#include "debug/anf_ir_dump.h"
@ -102,6 +105,16 @@ void CPUSession::Optimize(const std::shared_ptr<KernelGraph> &kernel_graph) {
kernel_graph->SetExecOrderByDefault();
}
void CPUSession::GraphKernelOptimize(const std::shared_ptr<KernelGraph> &kernel_graph) {
#ifdef ENABLE_AKG
if (!context::GraphKernelFlags::GetInstance().IsEnableGraphKernel()) {
return;
}
graphkernel::GraphKernelOptimize(kernel_graph);
kernel_graph->SetExecOrderByDefault();
#endif
}
GraphId CPUSession::CompileGraphImpl(const AnfNodePtrList &lst, const AnfNodePtrList &outputs) {
auto graph_id = graph_sum_;
auto graph = ConstructKernelGraph(lst, outputs);
@ -112,6 +125,7 @@ GraphId CPUSession::CompileGraphImpl(const AnfNodePtrList &lst, const AnfNodePtr
MS_LOG(INFO) << "Set kernel info end";
Optimize(graph);
FinalOptimize(graph);
GraphKernelOptimize(graph);
MS_LOG(INFO) << "Build kernel";
BuildKernel(graph.get());
// Remove reorder after PS feature finish adapting push/pull in auto_monad.
@ -352,10 +366,20 @@ void KernelNotSupportException(const AnfNodePtr &kernel_node) {
void CPUSession::BuildKernel(const KernelGraph *kernel_graph) {
MS_EXCEPTION_IF_NULL(kernel_graph);
auto &kernel_nodes = kernel_graph->execution_order();
kernel::KernelMeta *bin_map = kernel::KernelMeta::GetInstance();
MS_EXCEPTION_IF_NULL(bin_map);
std::vector<AnfNodePtr> akg_nodes;
for (const auto &kernel_node : kernel_nodes) {
MS_EXCEPTION_IF_NULL(kernel_node);
std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
MS_LOG(INFO) << "Cpu building operator[" << kernel_name << "].";
if (session::AnfRuntimeAlgorithm::GetKernelType(kernel_node) == KernelType::AKG_KERNEL) {
if (!bin_map->initialized()) {
bin_map->Initialize();
}
akg_nodes.push_back(kernel_node);
continue;
}
std::shared_ptr<kernel::CPUKernel> cpu_kernel =
kernel::CPUKernelFactory::GetInstance().Create(kernel_name, kernel_node);
if (cpu_kernel == nullptr) {
@ -369,6 +393,10 @@ void CPUSession::BuildKernel(const KernelGraph *kernel_graph) {
AnfAlgo::SetKernelMod(cpu_kernel, kernel_node.get());
MS_LOG(INFO) << "Cpu build success operator[" << kernel_name << "].";
}
#ifdef ENABLE_AKG
kernel::AkgCpuKernelBuilder akg_cpu_kernel_builder;
(void)akg_cpu_kernel_builder.AkgKernelParallelBuild(akg_nodes);
#endif
}
} // namespace session
} // namespace mindspore

View File

@ -42,6 +42,7 @@ class CPUSession : public SessionBasic {
VectorRef *const outputs) override;
void ExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_graph) override;
ParameterPtr CreateNewParameterFromParameter(const AnfNodePtr &anf, KernelGraph *graph) override;
void GraphKernelOptimize(const std::shared_ptr<KernelGraph> &kernel_graph);
void Optimize(const std::shared_ptr<KernelGraph> &kernel_graph);
KernelGraphPtr BuildOpImpl(const OpRunInfo &op_run_info, const GraphInfo &graph_info,
const std::vector<tensor::TensorPtr> &input_tensors,

View File

@ -256,7 +256,7 @@ class AscendKernelBuildClient : public KernelBuildClient {
~AscendKernelBuildClient() override { Close(); }
};
class GpuKernelBuildClient : public KernelBuildClient {
class AkgKernelBuildClient : public KernelBuildClient {
public:
// Server configure
constexpr inline static auto kGetPathScript =
@ -264,15 +264,15 @@ class GpuKernelBuildClient : public KernelBuildClient {
"\""
"import pkgutil;"
"path = pkgutil"
".get_loader(\\\"mindspore._extends.remote.kernel_build_server_gpu\\\")" // Server module name
".get_loader(\\\"mindspore._extends.remote.kernel_build_server_akg\\\")" // Server module name
".get_filename();"
"print('[~]' + path)"
"\"";
constexpr inline static auto kServerScript = "kernel_build_server_gpu.py";
constexpr inline static auto kServerScript = "kernel_build_server_akg.py";
static GpuKernelBuildClient &Instance() {
static GpuKernelBuildClient instance;
static AkgKernelBuildClient &Instance() {
static AkgKernelBuildClient instance;
return instance;
}
@ -283,15 +283,15 @@ class GpuKernelBuildClient : public KernelBuildClient {
return GetScriptFilePath(env, kGetPathScript, kServerScript);
}
GpuKernelBuildClient(const GpuKernelBuildClient &) = delete;
GpuKernelBuildClient &operator=(const GpuKernelBuildClient &) = delete;
AkgKernelBuildClient(const AkgKernelBuildClient &) = delete;
AkgKernelBuildClient &operator=(const AkgKernelBuildClient &) = delete;
GpuKernelBuildClient(GpuKernelBuildClient &&) = delete;
GpuKernelBuildClient &operator=(GpuKernelBuildClient &&) = delete;
AkgKernelBuildClient(AkgKernelBuildClient &&) = delete;
AkgKernelBuildClient &operator=(AkgKernelBuildClient &&) = delete;
private:
GpuKernelBuildClient() { Open(); }
~GpuKernelBuildClient() override { Close(); }
AkgKernelBuildClient() { Open(); }
~AkgKernelBuildClient() override { Close(); }
};
} // namespace kernel
} // namespace mindspore

View File

@ -18,10 +18,12 @@
#include <string>
#include "runtime/device/cpu/cpu_device_address.h"
#include "runtime/device/cpu/cpu_memory_manager.h"
#include "backend/kernel_compiler/akg/cpu/akg_cpu_kernel_build.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
#include "backend/kernel_compiler/kernel_build_info.h"
#include "runtime/device/cpu/kernel_select_cpu.h"
#include "utils/trace_base.h"
#include "utils/context/graph_kernel_flags.h"
#include "backend/optimizer/common/optimizer.h"
#include "backend/optimizer/common/pass_manager.h"
#include "backend/optimizer/common/common_backend_optimization.h"
@ -29,6 +31,8 @@
#include "backend/optimizer/cpu/insert_format_transform_op.h"
#include "backend/optimizer/pass/replace_node_by_proxy.h"
#include "backend/optimizer/pass/erase_visit_attr.h"
#include "backend/optimizer/graph_kernel/graph_kernel_optimization.h"
#include "backend/session/anf_runtime_algorithm.h"
#include "profiler/device/cpu/cpu_profiling.h"
#ifndef ENABLE_SECURITY
#include "debug/data_dump/dump_json_parser.h"
@ -113,6 +117,14 @@ void CPUDeviceContext::OptimizeGraph(const KernelGraphPtr &graph) const {
// Run final optimization.
opt::CommonFinalOptimization(graph);
#ifdef ENABLE_AKG
// Run graph kernel fusion optimization
if (context::GraphKernelFlags::GetInstance().IsEnableGraphKernel()) {
graphkernel::GraphKernelOptimize(graph);
graph->SetExecOrderByDefault();
}
#endif
}
void CPUDeviceContext::OptimizeSingleOpGraph(const KernelGraphPtr &graph) const {
@ -173,11 +185,21 @@ void CPUDeviceContext::SetOperatorInfo(const std::vector<CNodePtr> &nodes) const
}
void CPUDeviceContext::CreateKernel(const std::vector<CNodePtr> &nodes) const {
kernel::KernelMeta *bin_map = kernel::KernelMeta::GetInstance();
MS_EXCEPTION_IF_NULL(bin_map);
std::vector<AnfNodePtr> akg_nodes;
for (const auto &node : nodes) {
MS_EXCEPTION_IF_NULL(node);
if (AnfAlgo::IsControlOpExecInBackend(node)) {
continue;
}
if (session::AnfRuntimeAlgorithm::GetKernelType(node) == KernelType::AKG_KERNEL) {
if (!bin_map->initialized()) {
bin_map->Initialize();
}
akg_nodes.push_back(node);
continue;
}
std::string kernel_name = AnfAlgo::GetCNodeName(node);
std::shared_ptr<kernel::CPUKernel> cpu_kernel = kernel::CPUKernelFactory::GetInstance().Create(kernel_name, node);
if (!cpu_kernel) {
@ -195,6 +217,10 @@ void CPUDeviceContext::CreateKernel(const std::vector<CNodePtr> &nodes) const {
cpu_kernel->Init(node);
AnfAlgo::SetKernelMod(cpu_kernel, node.get());
}
#ifdef ENABLE_AKG
kernel::AkgCpuKernelBuilder akg_cpu_kernel_builder;
(void)akg_cpu_kernel_builder.AkgKernelParallelBuild(akg_nodes);
#endif
}
void CPUDeviceContext::PreprocessBeforeRunGraph(const KernelGraphPtr &graph) const {
@ -212,8 +238,6 @@ bool CPUDeviceContext::LaunchKernel(const CNodePtr &kernel, const std::vector<Ad
MS_LOG(DEBUG) << "Launch kernel: " << kernel->fullname_with_scope();
auto kernel_mod = AnfAlgo::GetKernelMod(kernel);
MS_EXCEPTION_IF_NULL(kernel_mod);
auto cpu_kernel_mod = dynamic_cast<kernel::CPUKernel *>(kernel_mod);
MS_EXCEPTION_IF_NULL(cpu_kernel_mod);
#ifdef PLATFORM_86
// Some CPU kernels need set the flush zero mode to improve performance.
@ -226,6 +250,8 @@ bool CPUDeviceContext::LaunchKernel(const CNodePtr &kernel, const std::vector<Ad
// Some CPU kernels can't initialize kernel and launch kernel in different thread, so reinitialize the kernels before
// launch.
if (kOpNotSupportMultiThreadExecList.find(AnfAlgo::GetCNodeName(kernel)) != kOpNotSupportMultiThreadExecList.end()) {
auto cpu_kernel_mod = dynamic_cast<kernel::CPUKernel *>(kernel_mod);
MS_EXCEPTION_IF_NULL(cpu_kernel_mod);
cpu_kernel_mod->InitKernel(kernel);
}
#ifndef ENABLE_SECURITY

View File

@ -172,7 +172,7 @@ void GraphKernelFlags::RegisterFlags(std::map<std::string, std::string> *flag_ma
FlagRegister reg(flag_map);
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
bool is_gpu = (context_ptr->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kGPUDevice);
bool is_ascend = (context_ptr->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kAscendDevice);
// Set opt_level first, some flags' default value depends on it.
// Default optimization level is level 2 when enable graphkernel
@ -192,7 +192,7 @@ void GraphKernelFlags::RegisterFlags(std::map<std::string, std::string> *flag_ma
// Integer flags
reg.AddFlag("online_tuning", &online_tuning);
reg.AddFlag("fusion_ops_level", &fusion_ops_level, is_gpu ? OpLevel_MAX : OpLevel_0);
reg.AddFlag("fusion_ops_level", &fusion_ops_level, is_ascend ? OpLevel_0 : OpLevel_MAX);
// String flags
reg.AddFlag("repository_path", &repository_path);

View File

@ -489,8 +489,8 @@ def _check_target_specific_cfgs(device, arg_key):
device_cfgs = {
'enable_dump': ['Ascend'],
'save_dump_path': ['Ascend'],
'enable_graph_kernel': ['Ascend', 'GPU'],
'graph_kernel_flags': ['Ascend', 'GPU'],
'enable_graph_kernel': ['Ascend', 'GPU', 'CPU'],
'graph_kernel_flags': ['Ascend', 'GPU', 'CPU'],
'enable_reduce_precision': ['Ascend'],
'enable_profiling': ['Ascend'],
'profiling_options': ['Ascend'],

View File

@ -79,8 +79,11 @@ build_mindspore()
if [[ "X$USE_GLOG" = "Xon" ]]; then
CMAKE_ARGS="${CMAKE_ARGS} -DUSE_GLOG=ON"
fi
if [[ "X$ENABLE_AKG" = "Xon" ]] && [[ "X$ENABLE_D" = "Xon" || "X$ENABLE_GPU" = "Xon" ]]; then
if [[ "X$ENABLE_AKG" = "Xon" ]]; then
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_AKG=ON"
if [[ "X$ENABLE_CPU" = "Xon" && "X$ENABLE_D" != "Xon" && "X$ENABLE_GPU" != "Xon" ]]; then
CMAKE_ARGS="${CMAKE_ARGS} -DUSE_LLVM=ON"
fi
fi
if [[ "X$ENABLE_ACL" = "Xon" ]]; then
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_ACL=ON"

View File

@ -44,7 +44,7 @@ init_default_options()
export LITE_PLATFORM=""
export LITE_ENABLE_AAR="off"
export USE_GLOG="on"
export ENABLE_AKG="on"
export ENABLE_AKG="off"
export ENABLE_ACL="off"
export ENABLE_D="off"
export ENABLE_DEBUGGER="on"

View File

@ -40,6 +40,7 @@ parse_device()
exit 1
fi
export CUDA_VERSION="$DEVICE_VERSION"
export ENABLE_AKG="on"
elif [[ "X$DEVICE" == "Xd" || "X$DEVICE" == "Xascend" ]]; then
# version default 910
if [[ "X$DEVICE_VERSION" == "X" ]]; then
@ -54,6 +55,7 @@ parse_device()
export ENABLE_ACL="on"
ENABLE_CPU="on"
export ENABLE_MPI="on"
export ENABLE_AKG="on"
else
echo "Invalid value ${DEVICE_VERSION} for option -V"
usage

View File

@ -21,7 +21,7 @@ usage()
echo "Usage:"
echo "bash build.sh [-d] [-r] [-v] [-c on|off] [-t ut|st] [-g on|off] [-h] [-b ge] [-m infer|train] \\"
echo " [-a on|off] [-p on|off] [-i] [-R] [-D on|off] [-j[n]] [-e gpu|ascend|cpu] \\"
echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 10.1|11.1|310|910] [-I arm64|arm32|x86_64] [-K] \\"
echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 10.1|11.1|310|910] [-I arm64|arm32|x86_64] [-K on|off] \\"
echo " [-B on|off] [-E] [-l on|off] [-n full|lite|off] [-H on|off] \\"
echo " [-A on|off] [-S on|off] [-k on|off] [-W sse|neon|avx|avx512|off] \\"
echo " [-L Tensor-RT path] [-y on|off] \\"
@ -52,7 +52,7 @@ usage()
echo " -V Specify the device version, if -e gpu, default CUDA 10.1, if -e ascend, default Ascend 910"
echo " -I Enable compiling mindspore lite for arm64, arm32 or x86_64, default disable mindspore lite compilation"
echo " -A Enable compiling mindspore lite aar package, option: on/off, default: off"
echo " -K Compile with AKG, default on"
echo " -K Compile with AKG, default on if -e gpu or -e ascend, else default off"
echo " -B Enable debugger, default on"
echo " -E Enable IBVERBS for parameter server, default off"
echo " -l Compile with python dependency, default on"