diff --git a/.gitmodules b/.gitmodules index a241b6d69b9..df7212b083c 100644 --- a/.gitmodules +++ b/.gitmodules @@ -13,3 +13,6 @@ [submodule "graphengine"] path = graphengine url = https://gitee.com/mindspore/graphengine.git +[submodule "akg"] + path = akg + url = https://gitee.com/mindspore/akg.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 1051aeb96c7..34521d22d39 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -86,10 +86,14 @@ if (ENABLE_GE OR ENABLE_D OR ENABLE_TESTCASES) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/graphengine/third_party/fwkacllib/inc/toolchain) endif() +if (ENABLE_AKG AND ENABLE_D) + add_subdirectory("${CMAKE_SOURCE_DIR}/akg") +endif() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden") add_subdirectory(mindspore/ccsrc) if (ENABLE_TESTCASES) add_subdirectory(tests) endif() -include(cmake/package.cmake) \ No newline at end of file +include(cmake/package.cmake) diff --git a/akg b/akg new file mode 160000 index 00000000000..c460176523d --- /dev/null +++ b/akg @@ -0,0 +1 @@ +Subproject commit c460176523d039c8995f1d71089753725ebc0792 diff --git a/build.sh b/build.sh index dfed66aadf5..7676665be78 100755 --- a/build.sh +++ b/build.sh @@ -246,6 +246,9 @@ checkopts "$@" echo "---------------- mindspore: build start ----------------" mkdir -pv "${BUILD_PATH}/package/mindspore/lib" git submodule update --init graphengine +if [[ "X$ENABLE_AKG" = "Xon" ]] && [[ "X$ENABLE_D" = "Xon" ]]; then + git submodule update --init --recursive akg +fi build_exit() { @@ -308,7 +311,7 @@ build_mindspore() if [[ "X$USE_GLOG" = "Xon" ]]; then CMAKE_ARGS="${CMAKE_ARGS} -DUSE_GLOG=ON" fi - if [[ "X$ENABLE_AKG" = "Xon" ]]; then + if [[ "X$ENABLE_AKG" = "Xon" ]] && [[ "X$ENABLE_D" = "Xon" ]]; then CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_AKG=ON" fi echo "${CMAKE_ARGS}" diff --git a/cmake/package.cmake b/cmake/package.cmake index 01f7bdabd8c..1cff396ef1e 100644 --- a/cmake/package.cmake +++ b/cmake/package.cmake @@ -236,6 +236,16 @@ if (ENABLE_GPU) endif () endif () +if (ENABLE_D AND ENABLE_AKG) + set (AKG_PATH ${CMAKE_SOURCE_DIR}/build/mindspore/akg) + install( + DIRECTORY + ${AKG_PATH}/akg + DESTINATION ${INSTALL_PY_DIR}/.. + COMPONENT mindspore + ) +endif () + if (EXISTS ${CMAKE_SOURCE_DIR}/mindspore/dataset) install( DIRECTORY ${CMAKE_SOURCE_DIR}/mindspore/dataset diff --git a/mindspore/_extends/parallel_compile/akg_compiler/__init__.py b/mindspore/_extends/parallel_compile/akg_compiler/__init__.py new file mode 100644 index 00000000000..e30774307ca --- /dev/null +++ b/mindspore/_extends/parallel_compile/akg_compiler/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ diff --git a/mindspore/_extends/parallel_compile/akg_compiler/compiler.py b/mindspore/_extends/parallel_compile/akg_compiler/compiler.py new file mode 100644 index 00000000000..de78aad7e49 --- /dev/null +++ b/mindspore/_extends/parallel_compile/akg_compiler/compiler.py @@ -0,0 +1,35 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Providing akg compile with json""" +import sys +def run_compiler(op_json): + """ + Run AKG compiler to compile op with subprocess, if this process of + compilation failed, an exception will be raised + + Args: + op_json (str): json string of the op + + Returns: + None + """ + p = __import__("akg", globals(), locals(), ['ms'], 0) + func = getattr(p.ms, "compilewithjson") + res = func(op_json) + if not res: + raise ValueError("Compile error") + +if __name__ == "__main__": + run_compiler(sys.argv[1]) diff --git a/mindspore/_extends/parallel_compile/akg_compiler/multi_process_compiler.py b/mindspore/_extends/parallel_compile/akg_compiler/multi_process_compiler.py new file mode 100644 index 00000000000..ffe9c85dc39 --- /dev/null +++ b/mindspore/_extends/parallel_compile/akg_compiler/multi_process_compiler.py @@ -0,0 +1,71 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Providing multi process compile with json""" +import os +import subprocess +import sys +from multiprocessing import Pool, cpu_count + + +def _compile_akg_task(*json_strs): + """ + compile func called in single process + + Parameters: + json_strs: list. List contains multiple kernel infos, suitable for json compile api. + """ + akg_compiler = os.path.join(os.path.split( + os.path.realpath(__file__))[0], "compiler.py") + for json_str in json_strs: + res = subprocess.run( + [sys.executable, akg_compiler, json_str], text=True) + if res.returncode != 0: + raise ValueError("Failed, args: {}!".format(json_str)) + + +def compile_akg_kernel_parallel(json_infos, process, waitime): + """ + compile kernel use multi processes + + Parameters: + json_infos: list. list contain kernel info(task id and json str) + process: int. processes num + waittime: int. max time the function blocked + + Returns: + True for all compile success, False for some failed. + """ + if not isinstance(json_infos, list): + raise ValueError("json_infos must be a list") + if not isinstance(process, int): + raise ValueError("process must be a num") + if not isinstance(waitime, int): + raise ValueError("waittime must be a num") + + if process == 0 and json_infos: + process = 1 + + cpu_proc_num = cpu_count() + max_proc_num = 16 + process = min([cpu_proc_num, max_proc_num, process]) + + args = [[] for _ in range(process)] + for p, info in enumerate(json_infos): + args[p % process].append(info) + + with Pool(processes=process) as pool: + res = pool.starmap_async(_compile_akg_task, args) + res.get(timeout=waitime) + return True diff --git a/mindspore/_extends/parallel_compile/multi_compiler.py b/mindspore/_extends/parallel_compile/multi_compiler.py deleted file mode 100644 index 86e1b684d2a..00000000000 --- a/mindspore/_extends/parallel_compile/multi_compiler.py +++ /dev/null @@ -1,107 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -"""Providing multi process compile with json""" -import json -import math -import os -import subprocess -import sys -from multiprocessing import Pool - - -def _compiletask(platform, *jsons): - """ - compile func called in single process - - Parameters: - platform: str. AKG platform or TBE platform - *jsons: str. json str contain kernel info, suitable for json compile - api - - """ - if platform == "AKG": - p = __import__("_akg", globals(), locals(), ['ms'], 0) - func = getattr(p.ms, "compilewithjson") - for json_item in jsons: - res = func(json_item) - if not res: - raise ValueError("Compile error") - if platform == "TBE": - tbe_compiler = os.path.join(os.path.split(os.path.realpath(__file__))[0], "tbe_compiler", "compiler.py") - for json_item in jsons: - res = subprocess.run([sys.executable, tbe_compiler], input=json_item, text=True) - if res.returncode != 0: - raise ValueError("Tbe compile error") - - -def compilekernelparallel(jsons, process, waitime): - """ - compile kernel use multi processes - - Parameters: - jsons: list. json str list contain kernel info - process: int. processes num - waittime: int. max time the function blocked - """ - if not isinstance(jsons, list): - raise ValueError("jsons must be a list") - if not isinstance(process, int): - raise ValueError("process must be a num") - if not isinstance(waitime, int): - raise ValueError("waittime must be a num") - - jsons_akg = [] - jsons_tbe = [] - for json_ in jsons: - j = json.loads(json_) - if j["platform"] == "TBE": - jsons_tbe.append(json_) - continue - if j["platform"] == "AKG": - jsons_akg.append(json_) - continue - raise RuntimeError( - "not support this platform {0}".format(j["platform"])) - if jsons_akg: - process_akg = math.floor(len(jsons)/len(jsons_akg)*process) - else: - process_akg = 0 - - if process_akg == 0 and jsons_akg: - process_akg = 1 - process_tbe = process-process_akg - if process_tbe == 0 and jsons_tbe: - process_tbe = 1 - raise RuntimeWarning("we add a process for compile more operator") - - args = [[] for _ in range(process_akg+process_tbe)] - args_lens = len(args) - for p in range(args_lens): - if p < process_tbe: - args[p].append("TBE") - else: - args[p].append("AKG") - jsons_tbe_lens = len(jsons_tbe) - for p in range(jsons_tbe_lens): - args[p % process_tbe].append(jsons_tbe[p]) - jsons_akg_lens = len(jsons_akg) - for p in range(jsons_akg_lens): - args[process-p % process_akg-1].append(jsons_akg[p]) - for p in range(args_lens): - args[p] = tuple(args[p]) - with Pool(processes=process) as pool: - res = pool.starmap_async(_compiletask, args) - res.get(timeout=waitime) - return True diff --git a/mindspore/ccsrc/CMakeLists.txt b/mindspore/ccsrc/CMakeLists.txt index 44f78d62164..c9e224080a5 100644 --- a/mindspore/ccsrc/CMakeLists.txt +++ b/mindspore/ccsrc/CMakeLists.txt @@ -39,7 +39,7 @@ if(ENABLE_GPU) "device/gpu/*.cu" "kernel/gpu/*.cu" "kernel/akg/gpu/*.cc" - "kernel/akg/akgkernelbuild.cc" + "kernel/akg/akg_kernel_build.cc" "kernel/akg/akg_kernel_attrs_process.cc" ) diff --git a/mindspore/ccsrc/common/trans.cc b/mindspore/ccsrc/common/trans.cc index a9ce32c8dfd..9cf6eb3a5a2 100644 --- a/mindspore/ccsrc/common/trans.cc +++ b/mindspore/ccsrc/common/trans.cc @@ -428,6 +428,10 @@ std::vector TransShapeToDevice(const std::vector &shape, const s auto temp_shape = shape; std::vector device_shape; if (format == kOpFormat_FRAC_NZ) { + if (shape.size() == 1 && (shape[0] == 1 || shape[0] % kCubeSize == 0)) { + // For [1] and [1024] shape we can trait it as NZ shape + return shape; + } if (shape.size() < 2) { MS_LOG(EXCEPTION) << "Format" << format << " is not support shape " << shape.size(); } else { diff --git a/mindspore/ccsrc/debug/anf_ir_dump.cc b/mindspore/ccsrc/debug/anf_ir_dump.cc index 1fd3096e7c5..fc32e0fb5fc 100644 --- a/mindspore/ccsrc/debug/anf_ir_dump.cc +++ b/mindspore/ccsrc/debug/anf_ir_dump.cc @@ -111,9 +111,15 @@ void DumpGlobalInfoEntry(const FuncGraphPtr &graph, std::ostringstream &buffer) } buffer << "#IR entry : @" << graph->ToString() << "." << graph->debug_info()->get_id() << std::endl; - buffer << "#flags :" << std::endl; - for (const auto &flag : graph->flags()) { - buffer << flag.first << " : " << flag.second << std::endl; + buffer << "#attrs :" << std::endl; + for (const auto &attr : graph->attrs()) { + buffer << attr.first << " : "; + if (attr.second->isa()) { + buffer << GetValue(attr.second); + } else if (attr.second->isa()) { + buffer << GetValue(attr.second); + } + buffer << std::endl; } } @@ -417,10 +423,16 @@ void DumpSubgraph(const OrderedMap fout << std::endl; for (const auto &sg : *sub_graphs) { - fout << "subgraph flag:" << std::endl; + fout << "subgraph attr:" << std::endl; MS_EXCEPTION_IF_NULL(sg.first); - for (const auto &flag : sg.first->flags()) { - fout << flag.first << " : " << flag.second << std::endl; + for (const auto &attr : sg.first->attrs()) { + fout << attr.first << " : "; + if (attr.second->isa()) { + fout << GetValue(attr.second); + } else if (attr.second->isa()) { + fout << GetValue(attr.second); + } + fout << std::endl; } fout << "subgraph @" << sg.first->ToString() << "."; fout << sg.first->debug_info()->get_id() << "("; diff --git a/mindspore/ccsrc/device/ascend/ascend_stream_assign.cc b/mindspore/ccsrc/device/ascend/ascend_stream_assign.cc index 125630fe22a..f0bad6b492e 100644 --- a/mindspore/ccsrc/device/ascend/ascend_stream_assign.cc +++ b/mindspore/ccsrc/device/ascend/ascend_stream_assign.cc @@ -548,9 +548,15 @@ void AscendStreamAssign::GetNeedActiveStreams(const shared_ptrGetAttr(kStreamNeedActivedFirst); + if (primitive != nullptr) { + value_ptr = primitive->GetAttr(kStreamNeedActivedFirst); + } else { + auto func_graph = AnfAlgo::GetCNodeFuncGraphPtr(cur_cnode_ptr); + MS_EXCEPTION_IF_NULL(func_graph); + value_ptr = func_graph->get_attr(kStreamNeedActivedFirst); + } if (value_ptr == nullptr) { continue; } diff --git a/mindspore/ccsrc/device/ascend/kernel_build_ascend.cc b/mindspore/ccsrc/device/ascend/kernel_build_ascend.cc index dcc4e6ace06..254c92afbfc 100644 --- a/mindspore/ccsrc/device/ascend/kernel_build_ascend.cc +++ b/mindspore/ccsrc/device/ascend/kernel_build_ascend.cc @@ -26,10 +26,12 @@ #include "kernel/kernel.h" #include "kernel/tbe/tbe_kernel_build.h" #include "kernel/tbe/tbe_kernel_parallel_build.h" +#include "kernel/akg/ascend/akg_ascend_kernel_build.h" #include "kernel/aicpu/aicpu_kernel_build.h" #include "kernel/hccl/hccl_kernel_build.h" #include "kernel/rts/rt_kernel_build.h" #include "kernel/tbe/tbe_utils.h" +#include "kernel/common_utils.h" #include "operator/ops.h" #include "session/anf_runtime_algorithm.h" #include "./common.h" @@ -91,6 +93,7 @@ static bool KernelPreBuildParallelCompile(const mindspore::session::KernelGraph static bool KernelBuildParallelCompile(const mindspore::session::KernelGraph *kernel_graph_ptr) { MS_EXCEPTION_IF_NULL(kernel_graph_ptr); std::vector tbe_nodes; + std::vector akg_nodes; std::vector other_nodes; for (const auto &anf_node : kernel_graph_ptr->execution_order()) { MS_EXCEPTION_IF_NULL(anf_node); @@ -105,19 +108,26 @@ static bool KernelBuildParallelCompile(const mindspore::session::KernelGraph *ke } break; } + case KernelType::AKG_KERNEL: { + akg_nodes.push_back(anf_node); + break; + } default: { other_nodes.push_back(anf_node); break; } } } - bool ret = kernel::TbeOpParallelBuild(tbe_nodes); + bool tbe_ret = kernel::TbeOpParallelBuild(tbe_nodes); + bool akg_ret = kernel::AkgAscendKernelParallelBuild(akg_nodes); + auto bin_map = kernel::tbe::KernelMeta::GetInstance(); + (void)bin_map->ReadIndex(kernel::kCceKernelMeta); for (const auto &anf_node : other_nodes) { kernel::KernelModPtr kernel_mod_ptr = SerialCompileImpl(anf_node); MS_EXCEPTION_IF_NULL(kernel_mod_ptr); AnfAlgo::SetKernelMod(kernel_mod_ptr, anf_node.get()); } - return ret; + return tbe_ret && akg_ret; } static std::vector CalCleanZerosSize(const CNodePtr &pre_node) { @@ -234,7 +244,7 @@ void KernelBuildPreprocess(mindspore::session::KernelGraph *kernel_graph) { for (const auto &anf_node : kernel_graph->execution_order()) { std::string apply_function_name = AnfAlgo::GetCNodeName(anf_node); if (apply_function_name == prim::kPrimMaxPoolGrad->name() && - AnfAlgo::GetKernelType(anf_node) == KernelType::AUTO_DIFF_KERNEL) { + AnfAlgo::GetKernelType(anf_node) == KernelType::AKG_KERNEL) { auto clear_zero_prim = std::make_shared(kClearZeroOpName); MS_EXCEPTION_IF_NULL(clear_zero_prim); auto new_value_node = NewValueNode(clear_zero_prim); diff --git a/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc b/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc index 3951e1a1327..922f62329d4 100644 --- a/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc +++ b/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc @@ -15,16 +15,27 @@ */ #include "device/ascend/kernel_select_ascend.h" + #include #include #include #include +#include #include -#include "kernel/oplib/oplib.h" -#include "kernel/kernel_query.h" -#include "session/anf_runtime_algorithm.h" -#include "utils/context/ms_context.h" +#include +#include + +#include "common/utils.h" #include "debug/anf_ir_dump.h" +#include "operator/ops.h" +#include "ir/func_graph.h" +#include "utils/context/ms_context.h" +#include "session/anf_runtime_algorithm.h" +#include "device/kernel_info.h" +#include "kernel/common_utils.h" +#include "kernel/kernel_query.h" +#include "kernel/oplib/oplib.h" +#include "kernel/kernel_build_info.h" namespace mindspore { namespace device { @@ -121,12 +132,23 @@ void UpdateCurMatchCounts(const kernel::KernelBuildInfo &kernel_build_info, cons } auto pri_match_format = GetPriorityMatchFormat(kernel_node); for (size_t input_index = 0; input_index < AnfAlgo::GetInputTensorNum(kernel_node); ++input_index) { + auto input_anf_node = kernel_node->input(input_index + 1); + // we do not take ValueNode into consideration in graph kernel. + if (kernel_build_info.kernel_type() == KernelType::AKG_KERNEL) { + if (input_anf_node->isa() && AnfAlgo::GetOutputDeviceDataType(input_anf_node, 0) == kTypeUnknown) { + continue; + } + } auto base_score = AnfAlgo::IsFeatureMapInput(kernel_node, input_index) ? kFeatureMapBaseScore : kWegihtBaseScore; if (kernel_build_info.GetInputFormat(input_index) == AnfAlgo::GetPrevNodeOutputFormat(kernel_node, input_index)) { (*cur_kernelinfo_match_counts)[MATCH_FORMAT_COUNT] += base_score; } - if (kernel_build_info.GetInputDeviceType(input_index) == - AnfAlgo::GetPrevNodeOutputDeviceDataType(kernel_node, input_index)) { + // we match output fix precision first. + auto prev_device_type = AnfAlgo::GetPrevNodeOutputPrecision(kernel_node, input_index); + if (prev_device_type == kTypeUnknown) { + prev_device_type = AnfAlgo::GetPrevNodeOutputDeviceDataType(kernel_node, input_index); + } + if (kernel_build_info.GetInputDeviceType(input_index) == prev_device_type) { (*cur_kernelinfo_match_counts)[MATCH_DTYPE_COUNT] += base_score; } if (kernel_build_info.GetInputFormat(input_index) == pri_match_format) { @@ -146,41 +168,6 @@ void UpdateCurMatchCounts(const kernel::KernelBuildInfo &kernel_build_info, cons } } -void SetTensorDeviceInfo(const kernel::KernelBuildInfo &selected_kernel_info, const CNodePtr &kernel_node) { - MS_EXCEPTION_IF_NULL(kernel_node); - for (size_t input_index = 0; input_index < AnfAlgo::GetInputTensorNum(kernel_node); ++input_index) { - auto input_kernel_node = AnfAlgo::GetInputNode(kernel_node, input_index); - MS_EXCEPTION_IF_NULL(input_kernel_node); - auto input_with_index = AnfAlgo::VisitKernel(input_kernel_node, 0); - MS_EXCEPTION_IF_NULL(input_with_index.first); - auto real_input_node = input_with_index.first; - if (real_input_node->isa()) { - continue; - } - std::shared_ptr builder = - std::make_shared(); - bool is_ref = false; - auto op_info = mindspore::kernel::OpLib::FindOp(AnfAlgo::GetCNodeName(kernel_node), kernel::kTBE); - if (op_info != nullptr) { - is_ref = op_info->is_ref(); - } - auto ms_context = MsContext::GetInstance(); - MS_EXCEPTION_IF_NULL(ms_context); - if (ms_context->execution_mode() == kPynativeMode && - AnfAlgo::GetOutputDeviceDataType(real_input_node, 0) != kTypeUnknown) { - continue; - } - // we set special device info of a input tensor. - if (AnfAlgo::GetOutputDeviceDataType(real_input_node, 0) == kTypeUnknown || is_ref) { - std::vector output_format = {selected_kernel_info.GetInputFormat(input_index)}; - builder->SetOutputsFormat(output_format); - std::vector output_type = {AnfAlgo::GetInputDeviceDataType(kernel_node, input_index)}; - builder->SetOutputsDeviceType(output_type); - AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), real_input_node.get()); - } - } -} - void AddSupportMixedPrecisionDataTypeIndex(TypeId data_type, std::vector *support_index) { MS_EXCEPTION_IF_NULL(support_index); int index = kUnSupportMixedDataTypeIndex; @@ -467,6 +454,51 @@ std::vector> FilterRaisedOrReducePrecis } } // namespace +void SetTensorDeviceInfo(const kernel::KernelBuildInfo &selected_kernel_info, const CNodePtr &kernel_node) { + MS_EXCEPTION_IF_NULL(kernel_node); + for (size_t input_index = 0; input_index < AnfAlgo::GetInputTensorNum(kernel_node); ++input_index) { + auto input_kernel_node = AnfAlgo::GetInputNode(kernel_node, input_index); + MS_EXCEPTION_IF_NULL(input_kernel_node); + auto input_with_index = AnfAlgo::VisitKernel(input_kernel_node, 0); + MS_EXCEPTION_IF_NULL(input_with_index.first); + auto real_input_node = input_with_index.first; + if (real_input_node->isa()) { + continue; + } + if (real_input_node->isa() && !AnfAlgo::IsParameterWeight(real_input_node->cast())) { + continue; + } + auto builder = std::make_shared(); + if (IsValueNode(input_kernel_node) && + AnfAlgo::GetOutputDeviceDataType(input_kernel_node, 0) == kTypeUnknown) { + std::vector output_format = {selected_kernel_info.GetInputFormat(input_index)}; + builder->SetOutputsFormat(output_format); + std::vector output_type = {selected_kernel_info.GetInputDeviceType(input_index)}; + builder->SetOutputsDeviceType(output_type); + AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), input_kernel_node.get()); + continue; + } + // we set special device info of a input tensor. + bool is_ref = false; + auto op_info = kernel::OpLib::FindOp(AnfAlgo::GetCNodeName(kernel_node), kernel::kTBE); + if (op_info != nullptr) { + is_ref = op_info->is_ref(); + } + MS_EXCEPTION_IF_NULL(MsContext::GetInstance()); + if (MsContext::GetInstance()->execution_mode() == kPynativeMode && + AnfAlgo::GetOutputDeviceDataType(real_input_node, 0) != kTypeUnknown) { + continue; + } + if (AnfAlgo::GetOutputDeviceDataType(real_input_node, 0) == kTypeUnknown || is_ref) { + std::vector output_format = {selected_kernel_info.GetInputFormat(input_index)}; + builder->SetOutputsFormat(output_format); + std::vector output_type = {selected_kernel_info.GetInputDeviceType(input_index)}; + builder->SetOutputsDeviceType(output_type); + AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), real_input_node.get()); + } + } +} + KernelSelectStatus SetMatchedKernelInfo(const CNodePtr &kernel_node, const std::vector> &kernel_info_list) { MS_EXCEPTION_IF_NULL(kernel_node); @@ -498,11 +530,17 @@ KernelSelectStatus SetMatchedKernelInfo(const CNodePtr &kernel_node, return select_status; } -KernelSelectStatus SelectKernelInfo(const CNodePtr &kernel_node) { +KernelSelectStatus SelectKernelInfo(const CNodePtr &kernel_node, KernelType kernel_type) { std::vector> kernel_info_list; std::vector> aicpu_kernel_info_list; MS_EXCEPTION_IF_NULL(kernel_node); - kernel::KernelQuery(kernel_node, &kernel_info_list); + if (AnfAlgo::IsGraphKernel(kernel_node)) { + auto func_graph = GetValueNode(kernel_node->input(kAnfPrimitiveIndex)); + MS_EXCEPTION_IF_NULL(func_graph); + SelectGraphKernelInfo(kernel_node, func_graph); + return kStatusAllMatched; + } + kernel::KernelQuery(kernel_node, &kernel_info_list, kernel_type); auto select_status = SetMatchedKernelInfo(kernel_node, kernel_info_list); // If aicore not find valid kernel info reloading aicpu kernel info list to find it if (select_status == kNoMatched) { diff --git a/mindspore/ccsrc/device/ascend/kernel_select_ascend.h b/mindspore/ccsrc/device/ascend/kernel_select_ascend.h index c4c777c18a8..7b7a7b9fb9b 100644 --- a/mindspore/ccsrc/device/ascend/kernel_select_ascend.h +++ b/mindspore/ccsrc/device/ascend/kernel_select_ascend.h @@ -27,7 +27,10 @@ enum KernelSelectStatus { kStatusReducePrecision = 1, kStatusRaisePrecision = 2, }; -KernelSelectStatus SelectKernelInfo(const CNodePtr &kernel_node); +KernelSelectStatus SelectKernelInfo(const CNodePtr &kernel_node, + KernelType kernel_type = KernelType::UNKNOWN_KERNEL_TYPE); +void SetTensorDeviceInfo(const kernel::KernelBuildInfo &selected_kernel_info, const CNodePtr &kernel_node); +void SelectGraphKernelInfo(const CNodePtr &kernel_node, const FuncGraphPtr &func_graph); } // namespace ascend } // namespace device } // namespace mindspore diff --git a/mindspore/ccsrc/device/ascend/kernel_select_graph_kernel.cc b/mindspore/ccsrc/device/ascend/kernel_select_graph_kernel.cc new file mode 100644 index 00000000000..b57ed1cd1bb --- /dev/null +++ b/mindspore/ccsrc/device/ascend/kernel_select_graph_kernel.cc @@ -0,0 +1,516 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "device/ascend/kernel_select_ascend.h" +#include "session/anf_runtime_algorithm.h" +#include "device/kernel_info.h" +#include "ir/func_graph.h" +#include "kernel/common_utils.h" +#include "kernel/kernel_query.h" +#include "kernel/kernel_build_info.h" + +namespace mindspore { +namespace device { +namespace ascend { + +TypeId GetPrimitivePrecision(const CNodePtr &cnode) { + auto primitive = AnfAlgo::GetCNodePrimitive(cnode); + MS_EXCEPTION_IF_NULL(primitive); + + TypeId except_type = kTypeUnknown; + if (primitive->GetAttr(kAttrFixPrecision) != nullptr) { + auto strExceptDtype = GetValue(primitive->GetAttr(kAttrFixPrecision)); + if (strExceptDtype == "float16") { + except_type = kNumberTypeFloat16; + } else if (strExceptDtype == "float32") { + except_type = kNumberTypeFloat32; + } else { + MS_LOG(EXCEPTION) << "The fix precision must be float16 or float32, but got" << strExceptDtype; + } + } + + return except_type; +} + +void ResetKernelBuildInfo(const CNodePtr &kernel_node) { + size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); + for (size_t input_index = 0; input_index < input_num; ++input_index) { + auto input_kernel_node = AnfAlgo::GetInputNode(kernel_node, input_index); + MS_EXCEPTION_IF_NULL(input_kernel_node); + auto kernel_with_index = AnfAlgo::VisitKernel(input_kernel_node, 0); + if (!kernel::IsWeightBoundary(kernel_with_index.first)) { + continue; + } + // reset format and dtype. + kernel::KernelBuildInfo::KernelBuildInfoBuilder builder; + builder.SetOutputsFormat(std::vector{kOpFormat_DEFAULT}); + builder.SetOutputsDeviceType(std::vector{kTypeUnknown}); + AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), input_kernel_node.get()); + } +} + +void UpdateKernelInfo(const std::vector &node_list) { + for (size_t i = 0; i < node_list.size(); ++i) { + // select nodes in subgraph. + auto anf_node = node_list[i]; + MS_EXCEPTION_IF_NULL(anf_node); + auto cnode = anf_node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + auto fix_precision_type = GetPrimitivePrecision(cnode); + if (fix_precision_type != kTypeUnknown) { + std::vector> kernel_info_list; + kernel::KernelQuery(cnode, &kernel_info_list, KernelType::AKG_KERNEL); + + for (size_t index = 0; index < kernel_info_list.size(); ++index) + // only math the first input + if (kernel_info_list[index]->GetInputDeviceType(0) == fix_precision_type && + kernel_info_list[index]->GetInputFormat(0) == AnfAlgo::GetPrevNodeOutputFormat(cnode, 0) && + AnfAlgo::GetInputDeviceDataType(cnode, 0) != fix_precision_type) { + auto selected_kernel_info_ptr = kernel_info_list[index]; + ResetKernelBuildInfo(cnode); + AnfAlgo::SetSelectKernelBuildInfo(selected_kernel_info_ptr, cnode.get()); + SetTensorDeviceInfo(*selected_kernel_info_ptr, cnode); + break; + } + } + } +} + +bool CanConvertDefaultShapeToNZ(const std::vector &shape) { + for (size_t i = 1; i <= shape.size(); ++i) { + if (i > 2) { + break; + } + if (shape[shape.size() - i] != 1 && shape[shape.size() - i] % kCubeSize != 0) { + return false; + } + } + return true; +} + +std::vector DefaultToFracNZAxis(const std::vector &ori_shape, const std::vector &axis) { + std::vector frac_nz_axis = axis; + auto shape_len = ori_shape.size(); + for (size_t i = 0; i < axis.size(); ++i) { + auto axis_idx = (frac_nz_axis[i] + shape_len) % shape_len; + if (axis_idx == shape_len - 1) { + frac_nz_axis[i] = axis_idx - 1; + frac_nz_axis.push_back(axis_idx + 2); + } else if (axis_idx == shape_len - 2) { + frac_nz_axis[i] = axis_idx + 1; + frac_nz_axis.push_back(axis_idx + 2); + } else { + frac_nz_axis[i] = axis_idx; + } + } + return frac_nz_axis; +} + +std::vector GetReducedFracNZShape(const std::vector &ori_shape, const std::vector &axis, + bool keep_dims) { + std::vector result; + std::set positive_idx; + for (const auto &a : axis) { + positive_idx.insert(a >= 0 ? a : ori_shape.size() + a); + } + for (size_t i = 0; i < ori_shape.size(); ++i) { + if (positive_idx.count(i) == 0) { + result.push_back(ori_shape[i]); + } else if (keep_dims) { + result.push_back(1); + } + } + return result; +} + +void UpdateFracNZReduceOp(const CNodePtr &cnode) { + MS_EXCEPTION_IF_NULL(cnode); + auto input_format = AnfAlgo::GetPrevNodeOutputFormat(cnode, 0); + if (input_format == kOpFormat_FRAC_NZ) { + // Clone primitive to modify it + auto prim = GetCNodePrimitive(cnode); + auto new_prim = std::make_shared(*prim); + auto new_prim_node = NewValueNode(new_prim); + cnode->set_input(0, new_prim_node); + + auto axis_value = new_prim->GetAttr(kAttrAxis); + std::vector default_axis; + if (axis_value->isa()) { + auto value_list = dyn_cast(axis_value); + for (const auto &item : value_list->value()) { + if (item->isa()) { + default_axis.push_back(GetValue(item)); + } + } + } else if (axis_value->isa()) { + auto value_tuple = dyn_cast(axis_value); + for (const auto &item : value_tuple->value()) { + if (item->isa()) { + default_axis.push_back(GetValue(item)); + } + } + } else { + MS_LOG(ERROR) << "Axis attr type is not correct!"; + } + auto infer_shape = AnfAlgo::GetPrevNodeOutputInferShape(cnode, 0); + std::vector frac_nz_axis = DefaultToFracNZAxis(infer_shape, default_axis); + AnfAlgo::SetNodeAttr(kAttrAxis, MakeValue>(frac_nz_axis), cnode); + auto output_shape = AnfAlgo::GetOutputInferShape(cnode, 0); + if (output_shape.size() == 1) { + AnfAlgo::SetNodeAttr(kAttrOutputDefault, MakeValue(true), cnode); + } + } +} + +void GetDefaultFormat(const CNodePtr &kernel_node, std::string *default_format, bool *use_same_format) { + MS_EXCEPTION_IF_NULL(kernel_node); + MS_EXCEPTION_IF_NULL(default_format); + MS_EXCEPTION_IF_NULL(use_same_format); + std::unordered_map all_input_formats; + size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); + for (size_t i = 0; i < input_num; ++i) { + auto input_kernel_node = AnfAlgo::VisitKernel(kernel_node->input(i + 1), 0).first; + MS_EXCEPTION_IF_NULL(input_kernel_node); + if (!input_kernel_node->isa()) { + auto pre_format = AnfAlgo::GetPrevNodeOutputFormat(kernel_node, i); + ++all_input_formats[pre_format]; + continue; + } + auto para = input_kernel_node->cast(); + MS_EXCEPTION_IF_NULL(para); + if (AnfAlgo::GetOutputDeviceDataType(para, 0) != kTypeUnknown) { + auto pre_format = AnfAlgo::GetOutputFormat(para, 0); + ++all_input_formats[pre_format]; + continue; + } + *use_same_format = false; + } + + if (all_input_formats.empty()) { + // all inputs are parameter. + *default_format = kOpFormat_NC1HWC0; + } else { + std::vector> pairs; + for (auto iter = all_input_formats.begin(); iter != all_input_formats.end(); ++iter) { + pairs.push_back(std::make_pair(iter->first, iter->second)); + } + auto cmp_func = [](const std::pair &a, const std::pair &b) { + if (a.second != b.second) { + return a.second > b.second; + } else if (a.first == kOpFormat_DEFAULT) { + return a.second + 1 > b.second; + } else if (b.first == kOpFormat_DEFAULT) { + return a.second > b.second + 1; + } + return a.second > b.second; + }; + std::sort(pairs.begin(), pairs.end(), cmp_func); + *default_format = pairs.begin()->first; + } + + for (size_t i = 0; i < input_num; ++i) { + auto input_kernel_node = AnfAlgo::VisitKernel(kernel_node->input(i + 1), 0).first; + MS_EXCEPTION_IF_NULL(input_kernel_node); + if (!input_kernel_node->isa() || + AnfAlgo::GetOutputDeviceDataType(input_kernel_node, 0) != kTypeUnknown) { + continue; + } + auto weight_infer_shape = AnfAlgo::GetOutputInferShape(input_kernel_node, 0); + if (weight_infer_shape.size() < 2 && *default_format == kOpFormat_FRAC_NZ) { + *default_format = kOpFormat_DEFAULT; + *use_same_format = true; + break; + } + } +} + +void UpdateGraphKernelInputsKernelInfo(const CNodePtr &kernel_node, const std::vector &input_list, + const std::string &default_format, bool use_same_format, + std::vector *graph_input_format, + std::vector *graph_input_type) { + MS_EXCEPTION_IF_NULL(graph_input_format); + MS_EXCEPTION_IF_NULL(graph_input_type); + // We set same format to all inputs of graph kernel subgraph, and process this latter. + // We set dtype to inputs of graph kernel subgraph same as infer dtypes. + size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); + for (size_t i = 0; i < input_num; ++i) { + auto input_kernel_node = AnfAlgo::VisitKernel(kernel_node->input(i + 1), 0).first; + MS_EXCEPTION_IF_NULL(input_kernel_node); + if (use_same_format) { + bool can_convert = true; + if (default_format == kOpFormat_FRAC_NZ) { + auto infer_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, i); + if (!CanConvertDefaultShapeToNZ(infer_shape)) { + MS_LOG(WARNING) << "Shape can't be converted to frac nz shape, so use default format instead"; + can_convert = false; + } + } + if (can_convert) { + graph_input_format->push_back(default_format); + } else { + graph_input_format->push_back(kOpFormat_DEFAULT); + } + graph_input_type->push_back(AnfAlgo::GetPrevNodeOutputDeviceDataType(kernel_node, i)); + continue; + } + + if (!input_kernel_node->isa()) { + // subgraph parameter from output of other nodes. + graph_input_format->push_back(AnfAlgo::GetPrevNodeOutputFormat(kernel_node, i)); + graph_input_type->push_back(AnfAlgo::GetPrevNodeOutputDeviceDataType(kernel_node, i)); + continue; + } + + auto para = input_kernel_node->cast(); + MS_EXCEPTION_IF_NULL(para); + if (AnfAlgo::GetOutputDeviceDataType(para, 0) != kTypeUnknown) { + // parameter already selected. + graph_input_format->push_back(AnfAlgo::GetOutputFormat(para, 0)); + graph_input_type->push_back(AnfAlgo::GetOutputDeviceDataType(para, 0)); + continue; + } + + // weight parameter. + graph_input_format->push_back(default_format); + graph_input_type->push_back(AnfAlgo::GetOutputInferDataType(input_kernel_node, 0)); + } + + for (size_t i = 0; i < input_num; ++i) { + kernel::KernelBuildInfo::KernelBuildInfoBuilder builder; + std::vector outputs_format = {(*graph_input_format)[i]}; + std::vector outputs_device_type = {(*graph_input_type)[i]}; + builder.SetOutputsFormat(outputs_format); + builder.SetOutputsDeviceType(outputs_device_type); + AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), input_list[i].get()); + } +} + +void UpdateEquivFormat(const std::vector> &output_index, + const std::vector &node_list, const FuncGraphPtr &func_graph, + const FuncGraphManagerPtr &mng) { + MS_EXCEPTION_IF_NULL(mng); + for (size_t i = 0; i < node_list.size(); ++i) { + // select nodes in subgraph. + auto anf_node = node_list[i]; + MS_EXCEPTION_IF_NULL(anf_node); + auto cnode = anf_node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + cnode->set_kernel_info(std::make_shared()); + SelectKernelInfo(cnode, KernelType::AKG_KERNEL); + // Update ReduceSum + if (!IsPrimitiveCNode(cnode, prim::kPrimReduceSum)) { + continue; + } + UpdateFracNZReduceOp(cnode); + // If ReduceSum's output is 1d and not Default format, convert it to Default format + auto out_format = AnfAlgo::GetOutputFormat(cnode, 0); + if (out_format == kOpFormat_DEFAULT || !AnfAlgo::HasNodeAttr(kAttrOutputDefault, cnode)) { + continue; + } + auto infer_shape = AnfAlgo::GetOutputInferShape(cnode, 0); + // Insert EquivFormat node, then select kernel info again + std::vector trans_inputs; + trans_inputs.push_back(NewValueNode(prim::kPrimEquivFormat)); + trans_inputs.push_back(cnode); + CNodePtr trans_node = func_graph->NewCNode(trans_inputs); + AnfAlgo::SetOutputInferTypeAndShape({AnfAlgo::GetPrevNodeOutputInferDataType(cnode, 0)}, + {AnfAlgo::GetOutputInferShape(cnode, 0)}, trans_node.get()); + AnfAlgo::SetNodeAttr(kAttrInputNames, MakeValue>({"x"}), trans_node); + + if (trans_node->kernel_info() == nullptr) { + trans_node->set_kernel_info(std::make_shared()); + } + SelectKernelInfo(trans_node, KernelType::AKG_KERNEL); + mng->Replace(cnode, trans_node); + } +} + +void UpdateFormatsAndDtypes(const CNodePtr &kernel_node, const std::vector &node_list, + const std::vector &input_list, const FuncGraphManagerPtr &mng, + const std::string &default_format, std::vector *graph_input_format, + std::vector *graph_input_type) { + MS_EXCEPTION_IF_NULL(kernel_node); + MS_EXCEPTION_IF_NULL(mng); + MS_EXCEPTION_IF_NULL(graph_input_format); + MS_EXCEPTION_IF_NULL(graph_input_type); + // update graph input format and dtype use inner ops. + size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); + if (graph_input_format->size() != input_num) { + MS_LOG(EXCEPTION) << "Graph input format size is not equal to input num of cnode[" << kernel_node->DebugString() + << "], [%" << graph_input_format->size() << "] != [%" << input_num << "]"; + } + std::vector need_update(input_num, false); + auto &node_users = mng->node_users(); + for (size_t i = 0; i < input_num; ++i) { + auto &input = input_list[i]; + auto iter = node_users.find(input); + if (iter == node_users.end() || iter->second.empty()) { + continue; + } + for (auto &node_user : iter->second) { + if (node_user.first->kernel_info() == nullptr || + node_user.first->kernel_info()->select_kernel_build_info() == nullptr) { + // maybe not a real kernel. + continue; + } + auto user_format = AnfAlgo::GetInputFormat(node_user.first, IntToSize(node_user.second - 1)); + if (user_format != (*graph_input_format)[i]) { + MS_LOG(WARNING) << "Users of input: [" << i << "][" << input->DebugString(2) << " of [" + << kernel_node->DebugString() + << "] selected different format. we use defult: " << default_format; + (*graph_input_format)[i] = default_format; + need_update[i] = true; + } + + if (kernel_node->input(i + 1)->isa()) { + auto user_dtype = AnfAlgo::GetInputDeviceDataType(node_user.first, IntToSize(node_user.second - 1)); + if (user_dtype != (*graph_input_type)[i]) { + TypeId default_dtype = AnfAlgo::GetOutputInferDataType(input, 0); + MS_LOG(WARNING) << "Users of input: [" << i << "][" << input->DebugString(2) << " of [" + << kernel_node->DebugString() + << "] selected different dtype. we use default: " << TypeIdLabel(default_dtype); + (*graph_input_type)[i] = default_dtype; + need_update[i] = true; + } + } + } + } + + for (size_t i = 0; i < input_num; ++i) { + if (!need_update[i]) { + continue; + } + need_update[i] = false; + + MS_LOG(DEBUG) << "Update input format: " << i << " of: [" << kernel_node->DebugString() + << "] to: " << (*graph_input_format)[i]; + MS_LOG(DEBUG) << "Update input dtype: " << i << " of: [" << kernel_node->DebugString() + << "] to: " << TypeIdLabel((*graph_input_type)[i]); + kernel::KernelBuildInfo::KernelBuildInfoBuilder builder; + std::vector outputs_format = {(*graph_input_format)[i]}; + std::vector outputs_device_type = {(*graph_input_type)[i]}; + builder.SetOutputsFormat(outputs_format); + builder.SetOutputsDeviceType(outputs_device_type); + AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), input_list[i].get()); + } + + ResetKernelBuildInfo(kernel_node); + // select nodes in subgraph again. + for (size_t i = 0; i < node_list.size(); ++i) { + auto anf_node = node_list[i]; + MS_EXCEPTION_IF_NULL(anf_node); + auto cnode = anf_node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + kernel::KernelBuildInfo::KernelBuildInfoBuilder builder; + size_t cnode_input_num = AnfAlgo::GetInputTensorNum(cnode); + for (size_t j = 0; j < cnode_input_num; ++j) { + auto input_node = cnode->input(j + 1); + MS_EXCEPTION_IF_NULL(input_node); + if (!IsValueNode(input_node)) { + continue; + } + // reset format and dtype of const tensor. + builder.SetOutputsFormat(std::vector{kOpFormat_DEFAULT}); + builder.SetOutputsDeviceType(std::vector{kTypeUnknown}); + AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), input_node.get()); + } + SelectKernelInfo(node_list[i]->cast(), KernelType::AKG_KERNEL); + } +} + +void SetGraphKernelInfo(const CNodePtr &kernel_node, const std::vector> &output_index, + const std::vector &graph_input_format, + const std::vector &graph_input_type) { + MS_EXCEPTION_IF_NULL(kernel_node); + std::vector graph_output_format; + std::vector graph_output_type; + for (size_t i = 0; i < output_index.size(); ++i) { + auto const &output = output_index[i]; + graph_output_format.push_back(AnfAlgo::GetOutputFormat(output.first, output.second)); + TypeId output_type(kTypeUnknown); + if (output.first->isa()) { + output_type = AnfAlgo::GetCNodeOutputPrecision(output.first); + } + if (output_type == kTypeUnknown) { + output_type = AnfAlgo::GetOutputDeviceDataType(output.first, output.second); + } + graph_output_type.push_back(output_type); + } + + kernel::KernelBuildInfo::KernelBuildInfoBuilder graph_info_builder; + graph_info_builder.SetInputsFormat(graph_input_format); + graph_info_builder.SetInputsDeviceType(graph_input_type); + graph_info_builder.SetOutputsFormat(graph_output_format); + graph_info_builder.SetOutputsDeviceType(graph_output_type); + graph_info_builder.SetProcessor(kernel::Processor::AICORE); + graph_info_builder.SetKernelType(KernelType::AKG_KERNEL); + graph_info_builder.SetFusionType(kernel::FusionType::OPAQUE); + auto graph_selected_info = graph_info_builder.Build(); + MS_EXCEPTION_IF_NULL(graph_selected_info); + AnfAlgo::SetSelectKernelBuildInfo(graph_selected_info, kernel_node.get()); + SetTensorDeviceInfo(*graph_selected_info, kernel_node); +} + +void SelectGraphKernelInfo(const CNodePtr &kernel_node, const FuncGraphPtr &func_graph) { + MS_EXCEPTION_IF_NULL(kernel_node); + MS_EXCEPTION_IF_NULL(func_graph); + + // collect input info of funcgraph + std::vector node_list; + std::vector input_list; + std::vector output_list; + kernel::GetValidKernelNodes(func_graph, &node_list, &input_list, &output_list); + if (input_list.size() != kernel_node->inputs().size() - 1) { + MS_EXCEPTION(ArgumentError) << "Input num of funcgraph[" << func_graph->ToString() << "] not equal input of cnode[" + << kernel_node->DebugString() << "], [%" << input_list.size() << "] != [" + << kernel_node->inputs().size() << "]"; + } + + std::string default_format; + bool use_same_format = true; + GetDefaultFormat(kernel_node, &default_format, &use_same_format); + MS_LOG(DEBUG) << "GraphKernel[" << func_graph->ToString() << "] use same input format[" << default_format + << "] for ParameterWeight."; + + std::vector graph_input_format; + std::vector graph_input_type; + UpdateGraphKernelInputsKernelInfo(kernel_node, input_list, default_format, use_same_format, &graph_input_format, + &graph_input_type); + + auto mng = func_graph->manager(); + if (mng == nullptr) { + mng = Manage(func_graph, true); + } + auto output_index = kernel::GetOutputIndex(node_list, input_list, output_list); + UpdateEquivFormat(output_index, node_list, func_graph, mng); + node_list.clear(); + input_list.clear(); + output_list.clear(); + kernel::GetValidKernelNodes(func_graph, &node_list, &input_list, &output_list); + + // update graph input format and dtype use inner ops. + UpdateFormatsAndDtypes(kernel_node, node_list, input_list, mng, default_format, &graph_input_format, + &graph_input_type); + + // set fix_precision for kernel when the me prim has fix_precision attr + UpdateKernelInfo(node_list); + + output_index = kernel::GetOutputIndex(node_list, input_list, output_list); + SetGraphKernelInfo(kernel_node, output_index, graph_input_format, graph_input_type); +} +} // namespace ascend +} // namespace device +} // namespace mindspore diff --git a/mindspore/ccsrc/device/ascend/profiling/reporter/graph_desc_reporter.cc b/mindspore/ccsrc/device/ascend/profiling/reporter/graph_desc_reporter.cc index 7f6b424f2e7..1f2d1570bb6 100644 --- a/mindspore/ccsrc/device/ascend/profiling/reporter/graph_desc_reporter.cc +++ b/mindspore/ccsrc/device/ascend/profiling/reporter/graph_desc_reporter.cc @@ -24,7 +24,7 @@ namespace device { namespace ascend { void GraphDescReporter::ReportData() { for (const auto &node : cnode_list_) { - if (AnfAlgo::GetKernelType(node) != TBE_KERNEL && AnfAlgo::GetKernelType(node) != AUTO_DIFF_KERNEL) { + if (AnfAlgo::GetKernelType(node) != TBE_KERNEL && AnfAlgo::GetKernelType(node) != AKG_KERNEL) { MS_LOG(WARNING) << "Skip non tbe kernel"; continue; } diff --git a/mindspore/ccsrc/device/ascend/profiling/reporter/task_desc_reporter.cc b/mindspore/ccsrc/device/ascend/profiling/reporter/task_desc_reporter.cc index f05cb8bbdb8..0bd66e31efb 100644 --- a/mindspore/ccsrc/device/ascend/profiling/reporter/task_desc_reporter.cc +++ b/mindspore/ccsrc/device/ascend/profiling/reporter/task_desc_reporter.cc @@ -31,7 +31,7 @@ void TaskDescReporter::ReportData() { size_t task_index = 0; for (const auto &node : cnode_list_) { - if (AnfAlgo::GetKernelType(node) != TBE_KERNEL && AnfAlgo::GetKernelType(node) != AUTO_DIFF_KERNEL) { + if (AnfAlgo::GetKernelType(node) != TBE_KERNEL && AnfAlgo::GetKernelType(node) != AKG_KERNEL) { MS_LOG(WARNING) << "Skip non tbe kernel"; ++task_index; continue; diff --git a/mindspore/ccsrc/device/ascend/tasksink/task_generator.cc b/mindspore/ccsrc/device/ascend/tasksink/task_generator.cc index 18da9665750..3281ba9b5fc 100644 --- a/mindspore/ccsrc/device/ascend/tasksink/task_generator.cc +++ b/mindspore/ccsrc/device/ascend/tasksink/task_generator.cc @@ -43,7 +43,37 @@ bool TaskGenerator::GenTasks(const std::vector &anf_node_list, std::ve void TaskGenerator::LaunchAddrCleanKernel(const CNodePtr &anf_node_ptr, AddressPtrList *kernel_inputs) { MS_EXCEPTION_IF_NULL(anf_node_ptr); if (anf_node_ptr->inputs().size() != 2) { - MS_LOG(EXCEPTION) << "atomic Addr clean Node Input nodes not equal 2."; + // akg process + // set atomic clean addr + if (AnfAlgo::HasNodeAttr(kAttrAutomicOutputIndexs, anf_node_ptr)) { + auto clean_output_indexs = AnfAlgo::GetNodeAttr>(anf_node_ptr, kAttrAutomicOutputIndexs); + auto graph = anf_node_ptr->func_graph(); + MS_EXCEPTION_IF_NULL(graph); + auto manager = graph->manager(); + MS_EXCEPTION_IF_NULL(manager); + auto node_users = manager->node_users(); + if (node_users[anf_node_ptr].empty()) { + MS_LOG(EXCEPTION) << "Node users of " << anf_node_ptr->ToString() << " is empty."; + } + auto depend_node = node_users[anf_node_ptr].pop().first; + if (!IsPrimitiveCNode(depend_node, prim::kPrimDepend)) { + MS_LOG(EXCEPTION) << "Checking Depend node failed"; + } + if (node_users[depend_node].empty()) { + MS_LOG(EXCEPTION) << "Node users of " << depend_node->ToString() << " is empty."; + } + auto post_node = node_users[depend_node].pop().first; + for (auto index : clean_output_indexs) { + auto device_address = AnfAlgo::GetOutputAddr(post_node, index); + kernel::AddressPtr input = std::make_shared(); + input->addr = device_address->ptr_; + MS_EXCEPTION_IF_NULL(input->addr); + input->size = device_address->size_; + kernel_inputs->push_back(input); + } + MS_LOG(DEBUG) << "AtomicAddClean clean output size: " << clean_output_indexs.size(); + } + return; } MS_EXCEPTION_IF_NULL(anf_node_ptr->inputs()[1]); auto pre_node = (anf_node_ptr->inputs()[1])->cast(); @@ -59,7 +89,7 @@ void TaskGenerator::LaunchAddrCleanKernel(const CNodePtr &anf_node_ptr, AddressP input->size = device_address->size_; kernel_inputs->push_back(input); } - MS_LOG(INFO) << "AtomicAddClean clean output size:" << clean_output_indexs.size(); + MS_LOG(DEBUG) << "AtomicAddClean clean output size:" << clean_output_indexs.size(); } // set clean workspace address if (AnfAlgo::HasNodeAttr(kAttrAutomicWorkspaceSize, pre_node)) { diff --git a/mindspore/ccsrc/device/gpu/gpu_kernel_build.cc b/mindspore/ccsrc/device/gpu/gpu_kernel_build.cc index b6bc22603fa..19d22845103 100644 --- a/mindspore/ccsrc/device/gpu/gpu_kernel_build.cc +++ b/mindspore/ccsrc/device/gpu/gpu_kernel_build.cc @@ -16,7 +16,7 @@ #include "device/gpu/gpu_kernel_build.h" #include #include "kernel/kernel.h" -#include "kernel/akg/akgkernelbuild.h" +#include "kernel/akg/akg_kernel_build.h" #include "kernel/akg/gpu/akg_gpu_kernel_build.h" #include "kernel/gpu/gpu_kernel_factory.h" #include "operator/ops.h" @@ -37,7 +37,7 @@ void GpuBuild(const KernelGraphPtr &kernel_graph) { continue; } - if (session::AnfRuntimeAlgorithm::GetKernelType(kernel) == KernelType::AUTO_DIFF_KERNEL) { + if (session::AnfRuntimeAlgorithm::GetKernelType(kernel) == KernelType::AKG_KERNEL) { auto gpu_kernel_ptr = kernel::AkgGpuKernelBuild(kernel); if (!gpu_kernel_ptr) { MS_LOG(EXCEPTION) << "Build akg kernel op[" << kernel_name << "] failed"; diff --git a/mindspore/ccsrc/device/gpu/kernel_info_setter.cc b/mindspore/ccsrc/device/gpu/kernel_info_setter.cc index 05d6679f768..42e76e2483c 100644 --- a/mindspore/ccsrc/device/gpu/kernel_info_setter.cc +++ b/mindspore/ccsrc/device/gpu/kernel_info_setter.cc @@ -184,7 +184,7 @@ void SetKernelInfo(const CNodePtr &kernel_node) { if (!result) { result = SelectAkgKernel(kernel_node, builder->Build()); - kernel_type = AUTO_DIFF_KERNEL; + kernel_type = AKG_KERNEL; } if (!result) { diff --git a/mindspore/ccsrc/ir/anf.cc b/mindspore/ccsrc/ir/anf.cc index 29a74b79ba0..3b2402172b9 100644 --- a/mindspore/ccsrc/ir/anf.cc +++ b/mindspore/ccsrc/ir/anf.cc @@ -26,6 +26,8 @@ #include "ir/func_graph.h" #include "ir/primitive_base.h" +#include "operator/ops.h" + namespace mindspore { // namespace to support intermediate representation definition CNode::CNode(const std::vector &inputs, const FuncGraphPtr &func_graph) @@ -106,10 +108,14 @@ std::string ValueNode::fullname_with_scope() { bool IsPrimitiveCNode(const AnfNodePtr &node, const PrimitivePtr &value) { MS_EXCEPTION_IF_NULL(node); auto cnode = node->cast(); - if (cnode != nullptr) { + if (cnode == nullptr) { + return false; + } + if (value != nullptr) { return cnode->IsApply(value); } - return false; + const auto &prim = GetValueNode(cnode->input(0)); + return prim != nullptr; } PrimitivePtr GetCNodePrimitive(const AnfNodePtr &node) { diff --git a/mindspore/ccsrc/ir/anf.h b/mindspore/ccsrc/ir/anf.h index c2db17aec5a..c2bd3ab2086 100644 --- a/mindspore/ccsrc/ir/anf.h +++ b/mindspore/ccsrc/ir/anf.h @@ -124,6 +124,7 @@ class AnfNode : public Base { const KernelInfoDevice *kernel_info() const { return kernel_info_.get(); } KernelInfoDevice *kernel_info() { return kernel_info_.get(); } + const KernelInfoDevicePtr &kernel_info_ptr() { return kernel_info_; } void set_kernel_info(const KernelInfoDevicePtr &kernel_info) { kernel_info_ = kernel_info; } AbstractBasePtr abstract() const { return abstract_; } @@ -395,9 +396,9 @@ static S GetValue(const ValuePtr &value) { std::string GetCNodeFuncName(CNodePtr cnode); // used to check whether an AnfNode is a cnode with a kind of Primitive as first input -bool IsPrimitiveCNode(const AnfNodePtr &node, const PrimitivePtr &value); +bool IsPrimitiveCNode(const AnfNodePtr &node, const PrimitivePtr &value = nullptr); -// used to check whether an AnfNode is a cnode with a Primitive as first input +// used to get PrimitivePtr from a cnode first input PrimitivePtr GetCNodePrimitive(const AnfNodePtr &node); // used to check whether an AnfNode is a valuenode having some Primitive value diff --git a/mindspore/ccsrc/ir/anf_extends.cc b/mindspore/ccsrc/ir/anf_extends.cc index 0345ad29f5f..432ffdb6060 100644 --- a/mindspore/ccsrc/ir/anf_extends.cc +++ b/mindspore/ccsrc/ir/anf_extends.cc @@ -70,7 +70,7 @@ std::string CNode::fullname_with_scope() { } fullname_with_scope_ = name; } else { - // cnode input 0 should be primitive ptr + // cnode input 0 should be primitive ptr or funcgraph ptr auto value_ptr = input(0)->cast(); if (value_ptr == nullptr) { MS_LOG(WARNING) << "Input 0 of cnode is not a value node, its type is " << input(0)->type_name() << "."; @@ -84,11 +84,23 @@ std::string CNode::fullname_with_scope() { return fullname_with_scope_; } - PrimitivePtr prim = GetValue(input_value); + auto prim = input_value->cast(); MS_EXCEPTION_IF_NULL(scope()); - MS_EXCEPTION_IF_NULL(prim); - fullname_with_scope_ = - scope()->name() + "/" + prim->name() + "-op" + id_generator::get_id(shared_from_base()); + fullname_with_scope_ = scope()->name() + "/"; + if (prim != nullptr) { + fullname_with_scope_ += prim->name(); + } else { + auto func_graph = input_value->cast(); + MS_EXCEPTION_IF_NULL(func_graph); + auto fg_flag = func_graph->get_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL); + if (fg_flag != nullptr) { + auto fg_name = GetValue(fg_flag); + fullname_with_scope_ += "GraphKernel_" + fg_name; + } else { + fullname_with_scope_ += func_graph->ToString(); + } + } + fullname_with_scope_ += "-op" + id_generator::get_id(shared_from_base()); } return fullname_with_scope_; diff --git a/mindspore/ccsrc/ir/dtype/number.h b/mindspore/ccsrc/ir/dtype/number.h index 3930f51d730..f8a746f8d68 100644 --- a/mindspore/ccsrc/ir/dtype/number.h +++ b/mindspore/ccsrc/ir/dtype/number.h @@ -77,9 +77,9 @@ class Bool : public Number { TypeId generic_type_id() const override { return kNumberTypeBool; } TypePtr DeepCopy() const override { return std::make_shared(); } - std::string ToString() const override { return "Bool_"; } - std::string ToReprString() const override { return "bool_"; } - std::string DumpText() const override { return "Bool_"; } + std::string ToString() const override { return "Bool"; } + std::string ToReprString() const override { return "bool"; } + std::string DumpText() const override { return "Bool"; } }; // Int diff --git a/mindspore/ccsrc/ir/func_graph.cc b/mindspore/ccsrc/ir/func_graph.cc index d5d80eb2f02..cdca98fc616 100644 --- a/mindspore/ccsrc/ir/func_graph.cc +++ b/mindspore/ccsrc/ir/func_graph.cc @@ -34,7 +34,7 @@ namespace mindspore { * Methods of Graph */ FuncGraph::FuncGraph() - : flags_(), + : attrs_(), transforms_(), parameter_default_value_(), seen_(0), @@ -95,13 +95,27 @@ ParameterPtr FuncGraph::AddWeightParameter(const std::string &name) { return p; } -bool FuncGraph::has_flag(const std::string &flag) { - if (flags_.count(flag)) { - return flags_[flag]; +bool FuncGraph::has_flag(const std::string &key) { + auto iter = attrs_.find(key); + if (iter != attrs_.cend()) { + if (iter->second->isa()) { + return GetValue(iter->second); + } + MS_LOG(WARNING) << "key " << key << " is not a flag, please use has_attr function."; } return false; } +bool FuncGraph::has_attr(const std::string &key) { + auto iter = attrs_.find(key); + return !(iter == attrs_.cend()); +} + +ValuePtr FuncGraph::get_attr(const std::string &key) { + auto iter = attrs_.find(key); + return iter == attrs_.cend() ? nullptr : iter->second; +} + CNodePtr FuncGraph::NewCNode(const std::vector &inputs) { CNodePtr cnode = std::make_shared(inputs, shared_from_base()); if (has_flag(GRAPH_FLAG_HAS_EFFECT)) { diff --git a/mindspore/ccsrc/ir/func_graph.h b/mindspore/ccsrc/ir/func_graph.h index c66fee2d13e..5f09dfe6b58 100644 --- a/mindspore/ccsrc/ir/func_graph.h +++ b/mindspore/ccsrc/ir/func_graph.h @@ -74,6 +74,7 @@ using FuncGraphMap = OrderedMap; const char FUNC_GRAPH_FLAG_IGNORE_VALUES[] = "ignore_values"; const char FUNC_GRAPH_FLAG_DEFER_INLINE[] = "defer_inline"; const char FUNC_GRAPH_FLAG_CORE[] = "core"; +const char FUNC_GRAPH_ATTR_GRAPH_KERNEL[] = "graph_kernel"; const char FUNC_GRAPH_FLAG_SPECIALIZE_PARAMETER[] = "spec_param"; namespace abstract { @@ -195,10 +196,19 @@ class FuncGraph : public FuncGraphBase { void set_is_generate(bool generated) { is_generated_ = generated; } bool is_generated() const { return is_generated_; } - bool has_flag(const std::string &flag); - std::unordered_map &flags() { return flags_; } - void set_flags(const std::unordered_map &flags) { flags_ = flags; } - void set_flags(const std::string &key, const bool value) { flags_[key] = value; } + std::unordered_map &attrs() { return attrs_; } + void set_attrs(const std::unordered_map &attrs) { + for (auto &attr : attrs) { + attrs_[attr.first] = attr.second; + } + } + bool has_flag(const std::string &key); + void set_flag(const std::string &key, bool flag) { attrs_[key] = MakeValue(flag); } + void erase_flag(const std::string &key) { (void)attrs_.erase(key); } + + bool has_attr(const std::string &key); + ValuePtr get_attr(const std::string &key); + void set_attr(const std::string &key, const ValuePtr &value) { attrs_[key] = value; } std::unordered_map &transforms() { return transforms_; } void set_transforms(const std::unordered_map &transforms) { @@ -317,7 +327,7 @@ class FuncGraph : public FuncGraphBase { std::unordered_map &make_ref_params() { return make_ref_params_; } - std::unordered_map flags_; + std::unordered_map attrs_; std::unordered_map transforms_; // parameter default value std::map parameter_default_value_; diff --git a/mindspore/ccsrc/ir/func_graph_cloner.cc b/mindspore/ccsrc/ir/func_graph_cloner.cc index 4622bf9ea2d..4a0c69d99a1 100644 --- a/mindspore/ccsrc/ir/func_graph_cloner.cc +++ b/mindspore/ccsrc/ir/func_graph_cloner.cc @@ -90,6 +90,7 @@ void Cloner::CloneCNode(const AnfNodePtr &node, const FuncGraphPtr &target) { new_node->set_abstract(old_node->abstract()); ScopePtr scope = (node->scope() != kDefaultScope) ? node->scope() : this->scope(); new_node->set_scope(scope); + new_node->set_kernel_info(old_node->kernel_info_ptr()); repl_node_[old_node] = new_node; nodes_.emplace_back(old_node, new_node); TraceManager::EndTrace(); @@ -211,7 +212,7 @@ void Cloner::SetFuncGraphInfo(const FuncGraphPtr &func_graph, FuncGraphPtr *cons MS_EXCEPTION_IF_NULL(target_func_graph); TraceManager::DebugTrace(func_graph->debug_info(), target_relation_); *target_func_graph = std::make_shared(); - (*target_func_graph)->set_flags(func_graph->flags()); + (*target_func_graph)->set_attrs(func_graph->attrs()); (*target_func_graph)->set_transforms(func_graph->transforms()); (*target_func_graph)->set_has_vararg(func_graph->has_vararg()); (*target_func_graph)->set_has_kwarg(func_graph->has_kwarg()); @@ -636,9 +637,14 @@ FuncGraphPtr TransformableClone(const FuncGraphPtr &func_graph, const TraceInfoP if (MsContext::GetInstance()->is_multi_graph_sink()) { if (func_graph->has_flag(FUNC_GRAPH_FLAG_IGNORE_VALUES)) { - new_func_graph->set_flags(FUNC_GRAPH_FLAG_IGNORE_VALUES, true); + new_func_graph->set_flag(FUNC_GRAPH_FLAG_IGNORE_VALUES, true); } } + + if (func_graph->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)) { + new_func_graph->set_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL, func_graph->get_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)); + } + return new_func_graph; } } // namespace mindspore diff --git a/mindspore/ccsrc/ir/func_graph_extends.cc b/mindspore/ccsrc/ir/func_graph_extends.cc index 14998a1eaa0..ad7aa6ee0cb 100644 --- a/mindspore/ccsrc/ir/func_graph_extends.cc +++ b/mindspore/ccsrc/ir/func_graph_extends.cc @@ -399,8 +399,8 @@ void FuncGraph::ReleaseFullOrderToEffectOrder() { depend_inputs.push_back(*iter); } } - set_flags(GRAPH_FLAG_HAS_EFFECT, false); - set_flags(GRAPH_FLAG_EFFECT_PATIAL_ORDER, true); + set_flag(GRAPH_FLAG_HAS_EFFECT, false); + set_flag(GRAPH_FLAG_EFFECT_PATIAL_ORDER, true); if (!depend_inputs.empty()) { SetEffectDepends(depend_inputs); } diff --git a/mindspore/ccsrc/kernel/CMakeLists.txt b/mindspore/ccsrc/kernel/CMakeLists.txt index 01fc7faa791..ceea6b1a990 100644 --- a/mindspore/ccsrc/kernel/CMakeLists.txt +++ b/mindspore/ccsrc/kernel/CMakeLists.txt @@ -9,6 +9,10 @@ if (ENABLE_D) file(GLOB_RECURSE D_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "kernel_query.cc" "kernel_fusion.cc" + "akg/ascend/*.cc" + "akg/akg_kernel_build.cc" + "akg/akg_kernel_attrs_process.cc" + "akg/akg_kernel_metadata.cc" "tbe/*.cc" "aicpu/*.cc" "rts/*.cc" @@ -33,7 +37,7 @@ if (ENABLE_GPU) file(GLOB_RECURSE CUDA_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "gpu/*.cu" "akg/gpu/*.cc" - "akg/akgkernelbuild.cc" + "akg/akg_kernel_build.cc" "akg/akg_kernel_attrs_process.cc" ) diff --git a/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_build.cc b/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_build.cc index 1afe01bd6ae..c83994b5f22 100644 --- a/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_build.cc +++ b/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_build.cc @@ -24,7 +24,7 @@ #include #include "device/kernel_runtime.h" #include "kernel/aicpu/aicpu_kernel_mod.h" -#include "kernel/akg/akgkernelbuild.h" +#include "kernel/akg/akg_kernel_build.h" #include "proto/tensor.pb.h" #include "proto/tensor_shape.pb.h" #include "proto/attr.pb.h" diff --git a/mindspore/ccsrc/kernel/akg/akg_kernel_attrs_process.cc b/mindspore/ccsrc/kernel/akg/akg_kernel_attrs_process.cc index c9ff41dc552..3a0cc3eb253 100644 --- a/mindspore/ccsrc/kernel/akg/akg_kernel_attrs_process.cc +++ b/mindspore/ccsrc/kernel/akg/akg_kernel_attrs_process.cc @@ -79,6 +79,10 @@ void SetAkgAttrsForCast(const AnfNodePtr &anf_node) { dst_type = "float32"; } else if (output_type == kFloat16->type_id()) { dst_type = "float16"; + } else if (output_type == kInt32->type_id()) { + dst_type = "int32"; + } else { + MS_LOG(WARNING) << "Unknown cast_to type: " << TypeIdToType(output_type)->ToString(); } AnfAlgo::SetNodeAttr("dst_type", MakeValue(dst_type), anf_node); } diff --git a/mindspore/ccsrc/kernel/akg/akgkernelbuild.cc b/mindspore/ccsrc/kernel/akg/akg_kernel_build.cc similarity index 78% rename from mindspore/ccsrc/kernel/akg/akgkernelbuild.cc rename to mindspore/ccsrc/kernel/akg/akg_kernel_build.cc index c0759172a59..1f88bbb89a1 100644 --- a/mindspore/ccsrc/kernel/akg/akgkernelbuild.cc +++ b/mindspore/ccsrc/kernel/akg/akg_kernel_build.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "kernel/akg/akgkernelbuild.h" +#include "kernel/akg/akg_kernel_build.h" #include #include #include @@ -43,7 +43,9 @@ namespace kernel { constexpr int ME_MAX_KERNEL_NAME_LENGTH = 200; constexpr int32_t ARGS_SIZE = 1; constexpr auto kCompileWithJsonFunc = "compilewithjson"; + // json key +constexpr auto kOpDesc = "op_desc"; constexpr auto kInputDesc = "input_desc"; constexpr auto kShape = "shape"; constexpr auto kDataType = "data_type"; @@ -51,13 +53,24 @@ constexpr auto kOutputDesc = "output_desc"; constexpr auto kName = "name"; constexpr auto kTensorName = "tensor_name"; constexpr auto kValue = "value"; -constexpr auto KInpputNames = "input_names"; +constexpr auto KDynInputSizes = "dyn_input_sizes"; +constexpr auto KInputNames = "input_names"; constexpr auto KInput = "input"; constexpr auto KDtype = "dtype"; -int AkgKernelBuild::op_cnt_ = 0; -std::mutex AkgKernelBuild::op_cnt_mtx_; +namespace { +template +std::string Vector2Str(const std::vector &inputs) { + if (!inputs.empty()) { + std::ostringstream oss; + (void)std::copy(inputs.begin(), inputs.end() - 1, std::ostream_iterator(oss, ", ")); + oss << inputs.back(); + return oss.str(); + } + return ""; +} +} // namespace -std::string PyObjectToStr(PyObject *const PyObj) { +std::string AkgKernelBuild::PyObjectToStr(PyObject *const PyObj) { char *pChar = nullptr; std::string str_res; if (PyObj == nullptr) { @@ -76,6 +89,72 @@ std::string PyObjectToStr(PyObject *const PyObj) { return str_res; } +std::string GetTensorName(const nlohmann::json &node_json, const std::string &tag, + const std::pair &position) { + if (node_json.count(tag) == 0) { + MS_LOG(ERROR) << "Node [" << node_json.dump() << "] has no key [" << tag << "]."; + return ""; + } + + auto const &tag_desc = node_json[tag]; + nlohmann::json first_index; + if (tag == kOutputDesc) { + first_index = tag_desc; + } else if (!tag_desc.is_array() || tag_desc.size() <= position.first) { + MS_LOG(ERROR) << "Node [" << tag_desc.dump() << "] has no enough value [" << position.first << "]."; + return ""; + } else { + first_index = tag_desc[position.first]; + } + + if (!first_index.is_array() || first_index.size() <= position.second) { + MS_LOG(ERROR) << "Node [" << first_index.dump() << "] has no enough value [" << position.second << "]."; + return ""; + } + auto const &second_index = first_index[position.second]; + if (second_index.count(kTensorName) == 0) { + MS_LOG(ERROR) << "Node [" << second_index.dump() << "] has no key [" << kTensorName << "]."; + return ""; + } + + return second_index[kTensorName]; +} + +void SetTensorName(const std::string &tag, const std::string &new_name, const std::pair &position, + nlohmann::json *const node_json) { + MS_EXCEPTION_IF_NULL(node_json); + if (node_json->count(tag) == 0) { + MS_LOG(ERROR) << "Node [" << node_json->dump() << "] has no key [" << tag << "]."; + return; + } + + nlohmann::json *tag_desc = &((*node_json)[tag]); + nlohmann::json *first_index; + if (tag == kOutputDesc) { + first_index = tag_desc; + } else if (!tag_desc->is_array() || tag_desc->size() <= position.first) { + MS_LOG(ERROR) << "Node [" << tag_desc->dump() << "] has no enough value [" << position.first << "]."; + return; + } else { + first_index = &((*tag_desc)[position.first]); + } + + if (!first_index->is_array() || first_index->size() <= position.second) { + MS_LOG(ERROR) << "Node [" << first_index->dump() << "] has no enough value [" << position.second << "]."; + return; + } + nlohmann::json *second_index = &((*first_index)[position.second]); + if (second_index->count(kTensorName) == 0) { + MS_LOG(ERROR) << "Node [" << second_index->dump() << "] has no key [" << kTensorName << "]."; + return; + } + (*second_index)[kTensorName] = new_name; + return; +} + +int AkgKernelBuild::op_cnt_ = 0; +std::mutex AkgKernelBuild::op_cnt_mtx_; + std::string AkgKernelBuild::GetProcessor(const AnfNodePtr &anf_node) { MS_EXCEPTION_IF_NULL(anf_node); std::string device; @@ -187,10 +266,7 @@ bool AkgKernelBuild::CreateInputDescJson(const AnfNodePtr &anf_node, nlohmann::j for (size_t input_i = 0; input_i < input_tensor_num; input_i++) { // dtype : float16 auto type_id = AnfAlgo::GetInputDeviceDataType(anf_node, real_input_index); - TypePtr type_ptr = TypeIdToType(type_id); - MS_EXCEPTION_IF_NULL(type_ptr); - std::string dtype = type_ptr->ToString(); - dtype = Dtype2String(dtype); + std::string dtype = TypeId2String(type_id); if (dtype.empty()) { MS_LOG(ERROR) << "Op [" << op_name << "] input [" << input_i << "] data type is null. "; return false; @@ -198,13 +274,23 @@ bool AkgKernelBuild::CreateInputDescJson(const AnfNodePtr &anf_node, nlohmann::j nlohmann::json input_desc_json; input_desc_json[kDataType] = dtype; input_desc_json[kName] = op_input_name; - input_desc_json[kTensorName] = - op_input_name + "_" + std::to_string(real_input_index) + "_" + std::to_string(input_i); - input_desc_json[kShape] = AnfAlgo::GetInputDeviceShape(anf_node, real_input_index); + input_desc_json[kTensorName] = "input_" + std::to_string(GetInputTensorIdxInc(anf_node, real_input_index)); + auto input_shape = AnfAlgo::GetInputDeviceShape(anf_node, real_input_index); + if (GetInputTensorValue(anf_node, real_input_index, &input_desc_json)) { + MS_LOG(WARNING) << "we take input[" << real_input_index << "] of [" << anf_node->DebugString(2) + << "] as const tensor, shape: [" << Vector2Str(input_shape) + << "], value: " << input_desc_json[kValue]; + + input_shape.clear(); + } + if (input_shape.empty()) { + input_shape.push_back(1); + } + input_desc_json[kShape] = input_shape; input_list.emplace_back(input_desc_json); + real_input_index++; } inputs_json->emplace_back(input_list); - real_input_index++; } return true; } @@ -220,10 +306,7 @@ bool AkgKernelBuild::CreateOutputDescJson(const AnfNodePtr &anf_node, nlohmann:: for (size_t i = 0; i < output_tensor_num; i++) { nlohmann::json output_json; auto type_id = AnfAlgo::GetOutputDeviceDataType(anf_node, i); - TypePtr type_ptr = TypeIdToType(type_id); - MS_EXCEPTION_IF_NULL(type_ptr); - std::string dtype = type_ptr->ToString(); - dtype = Dtype2String(dtype); + std::string dtype = TypeId2String(type_id); if (dtype.empty()) { MS_LOG(ERROR) << "Op [" << op_name << "] output [" << i << "] data type is null. "; return false; @@ -232,7 +315,7 @@ bool AkgKernelBuild::CreateOutputDescJson(const AnfNodePtr &anf_node, nlohmann:: std::string output_name = outputs[i]->name(); output_json[kDataType] = dtype; output_json[kName] = output_name; - output_json[kTensorName] = output_name + "_" + std::to_string(i); + output_json[kTensorName] = "output_" + std::to_string(i) + "_" + std::to_string(GetOutputTensorIdxInc()); output_json[kShape] = AnfAlgo::GetOutputDeviceShape(anf_node, i); outputs_json->push_back(output_json); } @@ -358,15 +441,14 @@ bool AkgKernelBuild::GenerateSingleKernelJson(const AnfNodePtr &anf_node, const MS_EXCEPTION_IF_NULL(op_info_ptr); // get basic params from currentNodeOpDesc - (*node_json)["platform"] = "AKG"; (*node_json)[kName] = op_name; - (*node_json)["fusion_type"] = AnfAlgo::GetFusionType(anf_node); (*node_json)["impl_path"] = op_info_ptr->impl_path(); (*node_json)["process"] = AkgKernelBuild::GetProcessor(anf_node); + (*node_json)["composite"] = false; auto primitive = AnfAlgo::GetCNodePrimitive(anf_node); MS_EXCEPTION_IF_NULL(primitive); - ValuePtr input_names_v = primitive->GetAttr(KInpputNames); + ValuePtr input_names_v = primitive->GetAttr(KInputNames); if (input_names_v == nullptr) { MS_LOG(ERROR) << "ApplyKernel has no input_names, op[" << op_name << "]."; return false; @@ -465,12 +547,12 @@ KernelPackPtr AkgKernelBuild::OpBuild(const std::string &node_json, const AnfNod (void)alarm(0); if (pRes == nullptr) { MS_LOG(ERROR) << "No ret got, failed to call function [" << kCompileWithJsonFunc << "], args:\n(" - << PyObjectToStr(pArg) << ")."; + << AkgKernelBuild::PyObjectToStr(pArg) << ")."; return nullptr; } if (PyObject_IsTrue(pRes) != 1) { MS_LOG(ERROR) << "Illegal ret, failed to call function [" << kCompileWithJsonFunc << "], args:\n(" - << PyObjectToStr(pArg) << ")."; + << AkgKernelBuild::PyObjectToStr(pArg) << ")."; return nullptr; } @@ -513,5 +595,29 @@ KernelPackPtr AkgKernelBuild::BuildByJson(const AnfNodePtr &anf_node, std::vecto << "]"; return kernel_pack; } + +size_t AkgKernelBuild::GetInputTensorIdxInc(const AnfNodePtr &anf_node, size_t input_idx) { + MS_EXCEPTION_IF_NULL(anf_node); + auto cnode = anf_node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + if (input_idx + 1 >= cnode->inputs().size()) { + MS_EXCEPTION(ArgumentError) << "input_idx [" << input_idx << "] is out of index of inputs of [" + << cnode->inputs().size() - 1 << "][" << cnode->DebugString() << "]"; + } + + auto input_node = cnode->input(input_idx + 1); + if (input_tensor_idx_.find(input_node) == input_tensor_idx_.end()) { + size_t index = input_tensor_idx_.size(); + input_tensor_idx_[input_node] = index; + } + + return input_tensor_idx_[input_node]; +} + +size_t AkgKernelBuild::GetOutputTensorIdxInc() { + size_t idx = output_tensor_idx_++; + return idx; +} + } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/kernel/akg/akgkernelbuild.h b/mindspore/ccsrc/kernel/akg/akg_kernel_build.h similarity index 70% rename from mindspore/ccsrc/kernel/akg/akgkernelbuild.h rename to mindspore/ccsrc/kernel/akg/akg_kernel_build.h index f8127843bd6..d32bd48ce6e 100644 --- a/mindspore/ccsrc/kernel/akg/akgkernelbuild.h +++ b/mindspore/ccsrc/kernel/akg/akg_kernel_build.h @@ -32,29 +32,45 @@ namespace mindspore { namespace kernel { class AkgKernelBuild { public: - AkgKernelBuild() = default; + AkgKernelBuild() { + input_tensor_idx_ = {}; + output_tensor_idx_ = 0; + } ~AkgKernelBuild() = default; KernelPackPtr BuildByJson(const AnfNodePtr &anf_node, std::vector *const input_size, std::vector *const output_size); + static std::string GetProcessor(const AnfNodePtr &anf_node); + static std::string PyObjectToStr(PyObject *const PyObj); - private: + protected: bool CreateInputDescJson(const AnfNodePtr &anf_node, nlohmann::json *const inputs_json); bool CreateOutputDescJson(const AnfNodePtr &anf_node, nlohmann::json *const outputs_json); bool CreateAttrDescJson(const AnfNodePtr &anf_node, const std::string &op_name, const std::shared_ptr &op_info, nlohmann::json *const attrs_json); + KernelPackPtr OpBuild(const std::string &node_json, const AnfNodePtr &anf_node); + int GetOpCntInc(); + size_t GetInputTensorIdxInc(const AnfNodePtr &anf_node, size_t input_idx); + size_t GetOutputTensorIdxInc(); bool GenerateSingleKernelJson(const AnfNodePtr &anf_node, const std::string &op_name, nlohmann::json *const node_json); - KernelPackPtr OpBuild(const std::string &node_json, const AnfNodePtr &anf_node); - int GetOpCntInc(); - std::string GetProcessor(const AnfNodePtr &anf_node); static int op_cnt_; // lock for variable fusionOpCnt in singleton mode static std::mutex op_cnt_mtx_; std::string json_name_; std::string json_info_; + std::unordered_map input_tensor_idx_; + size_t output_tensor_idx_; }; + +bool GetIOSize(const nlohmann::json &node_json, std::vector *const input_size, + std::vector *const output_size); +void SetTensorName(const std::string &tag, const std::string &new_name, const std::pair &position, + nlohmann::json *const node_json); +std::string GetTensorName(const nlohmann::json &node_json, const std::string &tag, + const std::pair &position); + } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/kernel/akg/akg_kernel_metadata.cc b/mindspore/ccsrc/kernel/akg/akg_kernel_metadata.cc new file mode 100644 index 00000000000..3515add1e09 --- /dev/null +++ b/mindspore/ccsrc/kernel/akg/akg_kernel_metadata.cc @@ -0,0 +1,50 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel/akg/akg_kernel_metadata.h" +#include +#include "session/anf_runtime_algorithm.h" +#include "kernel/oplib/oplib.h" +#include "kernel/common_utils.h" + +namespace mindspore { +namespace kernel { +void AkgMetadataInfo(const CNodePtr &kernel_node, + std::vector> *const kernel_info_list) { + MS_EXCEPTION_IF_NULL(kernel_node); + MS_EXCEPTION_IF_NULL(kernel_info_list); + + std::string op_name = AnfAlgo::GetCNodeName(kernel_node); + for (size_t i = 0; i < support_devices.size(); i++) { + auto op_info_ptr = mindspore::kernel::OpLib::FindOp(op_name, OpImplyType::kAKG); + if (op_info_ptr == nullptr) { + continue; + } + + if (!ParseMetadata(kernel_node, op_info_ptr, Processor(i), kernel_info_list)) { + MS_LOG(WARNING) << "Akg parsed metadata of op[" << op_name << "], device[" << support_devices[i] << "] failed."; + } else { + MS_LOG(DEBUG) << "Akg parsed metadata of op[" << op_name << "], device[" << support_devices[i] << "]."; + break; + } + } + + if (kernel_info_list->empty()) { + MS_LOG(WARNING) << "Akg dose not has metadata of op[" << op_name << "]."; + } +} +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/kernel/akg/akg_kernel_metadata.h b/mindspore/ccsrc/kernel/akg/akg_kernel_metadata.h new file mode 100644 index 00000000000..5e329f0080b --- /dev/null +++ b/mindspore/ccsrc/kernel/akg/akg_kernel_metadata.h @@ -0,0 +1,31 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_KERNEL_AKG_AKG_KERNEL_METADATA_H_ +#define MINDSPORE_CCSRC_KERNEL_AKG_AKG_KERNEL_METADATA_H_ + +#include +#include +#include +#include +#include "kernel/kernel_build_info.h" + +namespace mindspore { +namespace kernel { +void AkgMetadataInfo(const CNodePtr &kernel_node, std::vector> *kernel_info_list); +} // namespace kernel +} // namespace mindspore +#endif // MINDSPORE_CCSRC_KERNEL_AKG_AKG_KERNEL_METADATA_H_ diff --git a/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_build.cc b/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_build.cc new file mode 100644 index 00000000000..454b8052ab9 --- /dev/null +++ b/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_build.cc @@ -0,0 +1,385 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel/akg/ascend/akg_ascend_kernel_build.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include "ir/dtype.h" +#include "ir/func_graph.h" +#include "kernel/kernel.h" +#include "kernel/common_utils.h" +#include "kernel/tbe/tbe_utils.h" +#include "kernel/akg/ascend/akg_ascend_kernel_mod.h" +#include "kernel/akg/akg_kernel_attrs_process.h" +#include "session/anf_runtime_algorithm.h" + +namespace mindspore { +namespace kernel { + +constexpr int32_t PARALLEL_ARGS_SIZE = 3; +constexpr int32_t PROCESS_NUM = 16; +constexpr int32_t TIME_OUT = 300; + +constexpr auto kOpDesc = "op_desc"; +constexpr auto kShape = "shape"; +constexpr auto kDataType = "data_type"; +constexpr auto kInputDesc = "input_desc"; +constexpr auto kOutputDesc = "output_desc"; +constexpr auto kTensorName = "tensor_name"; +constexpr auto kCompileAkgKernelParallelFunc = "compile_akg_kernel_parallel"; +constexpr auto kMultiProcModule = "mindspore._extends.parallel_compile.akg_compiler.multi_process_compiler"; + +bool AkgAscendKernelBuilder::CollectJson(const AnfNodePtr &anf_node) { + MS_EXCEPTION_IF_NULL(anf_node); + std::string op_name = AnfAlgo::GetCNodeName(anf_node); + MS_LOG(INFO) << "AKG start compile, op[" << op_name << "], device[" << AkgKernelBuild::GetProcessor(anf_node) << "]"; + auto it = kAkgKernelAttrsProcessMap.find(op_name); + if (it != kAkgKernelAttrsProcessMap.end()) { + it->second(anf_node); + } + MS_LOG(INFO) << "Akg start compile, op[" << op_name << "], device[" << AkgKernelBuild::GetProcessor(anf_node) << "]"; + nlohmann::json node_json; + if (!GenerateSingleKernelJson(anf_node, op_name, &node_json)) { + MS_LOG(ERROR) << "Op[" << op_name << "] create single kernel json failed."; + } + + kernel_json_ = node_json.dump(); + + if (!GetIOSize(node_json, &input_size_list_, &output_size_list_)) { + MS_LOG(ERROR) << "Cal mem size failed."; + return false; + } + + return true; +} + +bool AkgAscendKernelBuilder::CollectFusedJson(const std::vector &anf_nodes, + const std::vector &input_list, + const std::vector &output_list) { + if (anf_nodes.empty() || input_list.empty()) { + MS_LOG(ERROR) << "Invalid input size, anf_nodes [" << anf_nodes.size() << "], input_list [" << input_list.size() + << "]."; + return false; + } + MS_LOG(INFO) << "anf_nodes [" << output_list.size() << "], input_list [" << anf_nodes.size() << "], output_list [" + << input_list.size() << "]."; + + std::map node_json_map; + + for (auto const &anf_node : anf_nodes) { + MS_EXCEPTION_IF_NULL(anf_node); + std::string op_name = AnfAlgo::GetCNodeName(anf_node); + if (!AnfAlgo::IsRealKernel(anf_node)) { + MS_LOG(ERROR) << "Invalid anf node to build [" << anf_node->fullname_with_scope() << "]."; + return false; + } + auto it = kAkgKernelAttrsProcessMap.find(op_name); + if (it != kAkgKernelAttrsProcessMap.end()) { + it->second(anf_node); + } + + nlohmann::json node_json; + if (!GenerateSingleKernelJson(anf_node, op_name, &node_json)) { + MS_LOG(ERROR) << "Op [" << op_name << "] create single kernel json failed."; + return false; + } + // No need for composite op. + node_json.erase("id"); + node_json.erase("op"); + node_json.erase("composite"); + + auto primitive = AnfAlgo::GetCNodePrimitive(anf_node); + MS_EXCEPTION_IF_NULL(primitive); + + if (primitive->GetAttr("fusion") != nullptr) { + node_json["fusion"] = primitive->GetAttr("fusion")->ToString(); + } + + node_json_map[anf_node] = node_json; + } + + for (auto const &anf_node : anf_nodes) { + std::vector dyn_input_sizes; + auto primitive = AnfAlgo::GetCNodePrimitive(anf_node); + MS_EXCEPTION_IF_NULL(primitive); + + if (primitive->GetAttr(kAttrDynInputSizes) != nullptr) { + dyn_input_sizes = GetValue>(primitive->GetAttr(kAttrDynInputSizes)); + } + + bool is_dynamic_input = !dyn_input_sizes.empty(); + size_t input_num = is_dynamic_input ? dyn_input_sizes.size() : AnfAlgo::GetInputTensorNum(anf_node); + size_t real_input_index = 0; + for (size_t i = 0; i < input_num; ++i) { + size_t input_tensor_num = is_dynamic_input ? IntToSize(dyn_input_sizes[i]) : 1; + for (size_t j = 0; j < input_tensor_num; ++j) { + auto tmp_input = GetKernelInput(anf_node, real_input_index); + std::string tensor_name = GetTensorName(node_json_map[anf_node], kInputDesc, std::make_pair(i, j)); + if (node_json_map.find(tmp_input.first) != node_json_map.end()) { + std::string new_tensor_name = + GetTensorName(node_json_map[tmp_input.first], kOutputDesc, std::make_pair(0, tmp_input.second)); + SetTensorName(kInputDesc, new_tensor_name, std::make_pair(i, j), &(node_json_map[anf_node])); + MS_LOG(DEBUG) << "Update [" << real_input_index << "] input [" << tensor_name << "] of [" + << anf_node->fullname_with_scope() << "] to [" << tmp_input.second << "] output [" + << new_tensor_name << "] of [" << tmp_input.first->fullname_with_scope() << "]."; + } else { + MS_LOG(DEBUG) << "[" << real_input_index << "] input " << tensor_name << "] of [" + << anf_node->fullname_with_scope() << "] is out input."; + } + real_input_index++; + } + } + } + + nlohmann::json fused_node_json; + std::vector node_json_desc; + std::transform(anf_nodes.begin(), anf_nodes.end(), std::back_inserter(node_json_desc), + [&node_json_map](const AnfNodePtr &anf_node) { return node_json_map[anf_node]; }); + fused_node_json[kOpDesc] = node_json_desc; + + nlohmann::json inputs_json; + auto input_index = GetInputIndex(anf_nodes, input_list); + for (size_t i = 0; i < input_index.size(); ++i) { + auto tmp_input = input_index[i]; + auto type_id = AnfAlgo::GetInputDeviceDataType(tmp_input.first, tmp_input.second.first); + std::string dtype = TypeId2String(type_id); + nlohmann::json input_desc_json; + input_desc_json[kTensorName] = GetTensorName(node_json_map[tmp_input.first], kInputDesc, tmp_input.second); + input_desc_json[kDataType] = dtype; + input_desc_json[kShape] = AnfAlgo::GetInputDeviceShape(tmp_input.first, tmp_input.second.first); + inputs_json.emplace_back(std::vector{input_desc_json}); + } + fused_node_json[kInputDesc] = inputs_json; + + nlohmann::json outputs_json; + auto output_index = GetOutputIndex(anf_nodes, input_list, output_list); + for (size_t i = 0; i < output_index.size(); ++i) { + auto tmp_output = output_index[i]; + bool found = false; + nlohmann::json output_desc_json; + for (size_t input_i = 0; input_i < input_list.size(); ++input_i) { + if (tmp_output.first == input_list[input_i]) { + output_desc_json = inputs_json[input_i][0]; + found = true; + break; + } + } + if (!found) { + auto type_id = AnfAlgo::GetOutputDeviceDataType(tmp_output.first, tmp_output.second); + std::string dtype = TypeId2String(type_id); + output_desc_json[kTensorName] = + GetTensorName(node_json_map[tmp_output.first], kOutputDesc, std::make_pair(0, tmp_output.second)); + output_desc_json[kDataType] = dtype; + auto output_shape = AnfAlgo::GetOutputDeviceShape(tmp_output.first, tmp_output.second); + if (output_shape.empty()) { + output_shape.push_back(1); + } + output_desc_json[kShape] = output_shape; + } + outputs_json.emplace_back(output_desc_json); + } + fused_node_json[kOutputDesc] = outputs_json; + + size_t hash_id = std::hash()(fused_node_json.dump()); + json_name_ = "Fused_"; + auto fg = anf_nodes[0]->func_graph(); + MS_EXCEPTION_IF_NULL(fg); + auto attr_val = fg->get_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL); + if (attr_val != nullptr) { + auto fg_attr = GetValue(attr_val); + (void)json_name_.append(fg_attr).append("_"); + } + (void)json_name_.append(std::to_string(hash_id)); + fused_node_json["composite_graph"] = fg->ToString(); + fused_node_json["op"] = json_name_; + fused_node_json["platform"] = "AKG"; + fused_node_json["process"] = "aicore"; + fused_node_json["composite"] = true; + + kernel_json_ = fused_node_json.dump(); + + if (!GetIOSize(fused_node_json, &input_size_list_, &output_size_list_)) { + MS_LOG(ERROR) << "Cal mem size failed."; + return false; + } + + return true; +} + +void GenParallelCompileFuncArgs(const std::vector &kernel_jsons, PyObject **p_args) { + MS_EXCEPTION_IF_NULL(p_args); + *p_args = PyTuple_New(PARALLEL_ARGS_SIZE); + + PyObject *arg1 = PyList_New(kernel_jsons.size()); + for (int i = 0; i < PyList_Size(arg1); ++i) { + PyList_SetItem(arg1, i, Py_BuildValue("s", kernel_jsons[i].c_str())); + } + PyObject *arg2 = Py_BuildValue("i", PROCESS_NUM); + PyObject *arg3 = Py_BuildValue("i", TIME_OUT); + + (void)PyTuple_SetItem(*p_args, 0, arg1); + (void)PyTuple_SetItem(*p_args, 1, arg2); + (void)PyTuple_SetItem(*p_args, 2, arg3); +} + +bool AkgOpParallelBuild(const std::vector> &build_args) { + // Remove cached nodes, gether unique nodes, and collect repeated nodes which need postprecess. + std::vector jsons; + std::unordered_set json_name_set; + std::vector> repeat_nodes; + for (const auto &[builder, anf_node] : build_args) { + MS_EXCEPTION_IF_NULL(anf_node); + auto json_name = builder.json_name(); + MS_LOG(DEBUG) << "Akg start compile op: " << json_name; + auto cached_kernel_pack = tbe::TbeUtils::SearchCache(json_name, AkgKernelBuild::GetProcessor(anf_node)); + if (cached_kernel_pack != nullptr) { + MS_LOG(DEBUG) << "Use cached kernel, json_name_[" << json_name << "], fullname_with_scope[" + << anf_node->fullname_with_scope() << "]."; + auto kernel_mod_ptr = std::make_shared(cached_kernel_pack); + kernel_mod_ptr->SetInputSizeList(builder.input_size_list()); + kernel_mod_ptr->SetOutputSizeList(builder.output_size_list()); + AnfAlgo::SetKernelMod(kernel_mod_ptr, anf_node.get()); + continue; + } + + if (json_name_set.count(json_name) != 0) { + repeat_nodes.push_back({builder, anf_node}); + continue; + } + json_name_set.insert(json_name); + auto node_json = builder.kernel_json(); + kernel::SaveJsonInfo(json_name, node_json); + jsons.push_back(node_json); + } + + // No nodes need to be compiled! + if (jsons.empty()) { + return true; + } + + // Try to call python method to compile nodes parallely. + PyObject *p_module = nullptr; + PyObject *p_func = nullptr; + PyObject *p_arg = nullptr; + PyObject *p_res = nullptr; + + p_module = PyImport_ImportModule(kMultiProcModule); + if (p_module == nullptr) { + MS_LOG(ERROR) << "Failed to import [" << kMultiProcModule << "]."; + return false; + } + + p_func = PyObject_GetAttrString(p_module, kCompileAkgKernelParallelFunc); + GenParallelCompileFuncArgs(jsons, &p_arg); + MS_LOG(DEBUG) << "Call function [" << kCompileAkgKernelParallelFunc << "], try to compile " << jsons.size() + << " Akg kernels parallelly."; + p_res = PyEval_CallObject(p_func, p_arg); + if (p_res == nullptr) { + PyErr_Print(); + MS_LOG(ERROR) << "No ret got, failed to call function [" << kCompileAkgKernelParallelFunc << "], args:\n(" + << AkgKernelBuild::PyObjectToStr(p_arg) << ")."; + return false; + } + if (PyObject_IsTrue(p_res) != 1) { + PyErr_Print(); + MS_LOG(ERROR) << "Illegal ret, failed to call function [" << kCompileAkgKernelParallelFunc << "], args:\n(" + << AkgKernelBuild::PyObjectToStr(p_arg) << ")."; + return false; + } + + // All unique done here, cache them and set kernel. + for (const auto &[builder, anf_node] : build_args) { + auto json_name = builder.json_name(); + auto new_kernel_pack = tbe::TbeUtils::InsertCache(json_name, AkgKernelBuild::GetProcessor(anf_node)); + if (new_kernel_pack == nullptr) { + MS_LOG(ERROR) << "Insert to cache failed, json_name_[" << json_name << "], fullname_with_scope[" + << anf_node->fullname_with_scope() << "]."; + return false; + } + auto kernel_mod_ptr = std::make_shared(new_kernel_pack); + kernel_mod_ptr->SetInputSizeList(builder.input_size_list()); + kernel_mod_ptr->SetOutputSizeList(builder.output_size_list()); + AnfAlgo::SetKernelMod(kernel_mod_ptr, anf_node.get()); + MS_LOG(DEBUG) << "Akg compile " << json_name << " kernel and insert cache successfully!"; + } + + // Handle repeated nodes. + for (const auto &[builder, anf_node] : repeat_nodes) { + auto node_json = builder.kernel_json(); + auto json_name = builder.json_name(); + auto cached_kernel_pack = tbe::TbeUtils::SearchCache(json_name, AkgKernelBuild::GetProcessor(anf_node)); + if (cached_kernel_pack == nullptr) return false; + MS_LOG(INFO) << "Use just compiled kernel, json_name_[" << json_name << "], fullname_with_scope[" + << anf_node->fullname_with_scope() << "]."; + auto kernel_mod_ptr = std::make_shared(cached_kernel_pack); + kernel_mod_ptr->SetInputSizeList(builder.input_size_list()); + kernel_mod_ptr->SetOutputSizeList(builder.output_size_list()); + AnfAlgo::SetKernelMod(kernel_mod_ptr, anf_node.get()); + } + + return true; +} + +bool AkgAscendKernelParallelBuild(const std::vector &anf_nodes) { + std::vector> json_and_node; + for (const auto &anf_node : anf_nodes) { + MS_EXCEPTION_IF_NULL(anf_node); + AkgAscendKernelBuilder akg_cce_kernel_builder; + KernelPackPtr kernel_pack = nullptr; + auto cnode = anf_node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + if (AnfAlgo::IsGraphKernel(cnode)) { + auto func_graph = AnfAlgo::GetCNodeFuncGraphPtr(cnode); + auto mng = func_graph->manager(); + if (mng == nullptr) { + mng = Manage(func_graph, true); + func_graph->set_manager(mng); + } + MS_EXCEPTION_IF_NULL(func_graph); + std::vector node_list; + std::vector input_list; + std::vector output_list; + std::string op_name = AnfAlgo::GetCNodeName(anf_node); + MS_LOG(INFO) << "Akg start compile composite op[" << op_name << "]"; + GetValidKernelNodes(func_graph, &node_list, &input_list, &output_list); + if (!akg_cce_kernel_builder.CollectFusedJson(node_list, input_list, output_list)) { + MS_EXCEPTION(UnknownError) << "Akg build failed composite op[" << op_name << "]."; + } + } else { + if (!akg_cce_kernel_builder.CollectJson(anf_node)) { + MS_EXCEPTION(UnknownError) << "Akg build failed op[" << AnfAlgo::GetCNodeName(anf_node) << "]."; + } + } + json_and_node.push_back({akg_cce_kernel_builder, anf_node}); + } + + if (json_and_node.empty()) { + MS_LOG(DEBUG) << "There is no kernel needed to be compiled."; + return true; + } + + return AkgOpParallelBuild(json_and_node); +} + +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_build.h b/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_build.h new file mode 100644 index 00000000000..619b583fdef --- /dev/null +++ b/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_build.h @@ -0,0 +1,52 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_KERNEL_AKG_ASCEND_AKG_ASCEND_KERNEL_BUILD_H_ +#define MINDSPORE_CCSRC_KERNEL_AKG_ASCEND_AKG_ASCEND_KERNEL_BUILD_H_ + +#include +#include +#include +#include "ir/anf.h" +#include "kernel/kernel.h" +#include "kernel/akg/akg_kernel_build.h" + +namespace mindspore { +namespace kernel { +class AkgAscendKernelBuilder : public AkgKernelBuild { + public: + AkgAscendKernelBuilder() = default; + ~AkgAscendKernelBuilder() = default; + + bool CollectJson(const AnfNodePtr &anf_node); + bool CollectFusedJson(const std::vector &anf_nodes, const std::vector &input_list, + const std::vector &output_list); + std::string json_name() const { return json_name_; } + std::string kernel_json() const { return kernel_json_; } + const std::vector &input_size_list() const { return input_size_list_; } + const std::vector &output_size_list() const { return output_size_list_; } + + private: + std::string kernel_json_; + std::vector input_size_list_; + std::vector output_size_list_; +}; + +bool AkgAscendKernelParallelBuild(const std::vector &anf_nodes); +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_KERNEL_AKG_ASCEND_AKG_ASCEND_KERNEL_BUILD_H_ diff --git a/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_mod.cc b/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_mod.cc new file mode 100644 index 00000000000..24324f70e04 --- /dev/null +++ b/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_mod.cc @@ -0,0 +1,181 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel/akg/ascend/akg_ascend_kernel_mod.h" +#include +#include +#include +#include +#include +#include +#include +#include "nlohmann/json.hpp" +#include "runtime/rt.h" +#include "utils/log_adapter.h" +#include "utils/convert_utils.h" + +namespace mindspore { +namespace kernel { +using std::fstream; +using std::map; +using std::mutex; +using std::string; +using TbeTaskInfoPtr = std::shared_ptr; +using tbe::KernelManager; +constexpr uint32_t DEFAULT_BLOCK_DIM = 1; +/** + * @brief infotable contain func_stub\blockdim\kernel file buffer + */ +AkgKernelMod::AkgKernelMod(const KernelPackPtr &kernel_pack) : kernel_pack_(kernel_pack) {} + +void AkgKernelMod::SetInputSizeList(const std::vector &size_list) { input_size_list_ = size_list; } + +void AkgKernelMod::SetOutputSizeList(const std::vector &size_list) { output_size_list_ = size_list; } + +void AkgKernelMod::SetWorkspaceSizeList(const std::vector &size_list) { workspace_size_list_ = size_list; } + +const std::vector &AkgKernelMod::GetInputSizeList() const { return input_size_list_; } + +const std::vector &AkgKernelMod::GetOutputSizeList() const { return output_size_list_; } + +const std::vector &AkgKernelMod::GetWorkspaceSizeList() const { return workspace_size_list_; } + +void DumpData(const std::vector &inputs, const std::vector &outputs) { + const char *dump_data = getenv("MS_KERNEL_DUMP_DATA"); + if (dump_data) { + int idx = 0; + for (const auto &x : inputs) { + std::vector buf(x->size); + if (RT_ERROR_NONE != rtMemcpy(buf.data(), buf.size(), reinterpret_cast(x->addr), x->size, + RT_MEMCPY_DEVICE_TO_HOST)) { + MS_LOG(WARNING) << "Call runtime rtMemcpy error."; + return; + } + + std::string file_name("input_"); + file_name += std::to_string(idx); + std::ofstream file(file_name, std::ios::binary); + if (file.is_open()) { + (void)file.write(buf.data(), SizeToLong(buf.size())); + file.close(); + idx++; + } else { + MS_LOG(ERROR) << "Open file failed."; + return; + } + } + idx = 0; + for (const auto &x : outputs) { + std::vector buf(x->size); + if (RT_ERROR_NONE != rtMemcpy(buf.data(), buf.size(), reinterpret_cast(x->addr), x->size, + RT_MEMCPY_DEVICE_TO_HOST)) { + MS_LOG(WARNING) << "Call runtime rtMemcpy error."; + return; + } + + std::string file_name("output_"); + file_name += std::to_string(idx); + std::ofstream file(file_name, std::ios::binary); + if (file.is_open()) { + (void)file.write(buf.data(), SizeToLong(buf.size())); + file.close(); + idx++; + } else { + MS_LOG(ERROR) << "Open file failed."; + return; + } + } + } +} + +bool AkgKernelMod::Launch(const std::vector &inputs, const std::vector &, + const std::vector &outputs, void *stream_ptr) { + if (stream_ptr == 0) { + MS_LOG(ERROR) << "stream_ptr should not be nullptr."; + return false; + } + + if (kernel_pack_ == nullptr) { + MS_LOG(ERROR) << "kernel pack should not be nullptr."; + return false; + } + + uint32_t block_dim = DEFAULT_BLOCK_DIM; // default blockdim equal to 1. + auto func_stub = KernelManager::GenFuncStub(*kernel_pack_, false, &block_dim); + if (func_stub == 0) { + MS_LOG(ERROR) << "GenFuncStub failed."; + return false; + } + + // pack all addresses into a vector. + std::vector runtime_args; + (void)std::transform(std::begin(inputs), std::end(inputs), std::back_inserter(runtime_args), + [](const AddressPtr &input) -> void * { return input->addr; }); + (void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(runtime_args), + [](const AddressPtr &output) -> void * { return output->addr; }); + + rtL2Ctrl_t *l2ctrl = nullptr; + auto stream = reinterpret_cast(stream_ptr); + if (RT_ERROR_NONE != rtKernelLaunch(reinterpret_cast(func_stub), block_dim, runtime_args.data(), + SizeToUint(sizeof(void *) * runtime_args.size()), l2ctrl, stream)) { + MS_LOG(ERROR) << "Call runtime rtKernelLaunch error."; + return false; + } + + DumpData(inputs, outputs); + + return true; +} + +std::vector AkgKernelMod::GenTask(const std::vector &inputs, const std::vector &, + const std::vector &outputs, uint32_t stream_id) { + if (kernel_pack_ == nullptr) { + MS_LOG(EXCEPTION) << "kernel pack should not be nullptr."; + } + + std::vector args; + uint32_t args_size = 0; + std::vector sm_desc; + void *binary = nullptr; + uint32_t binary_size = 0; + std::vector meta_data; + std::vector input_data_addrs; + std::vector output_data_addrs; + std::vector workspace_addrs; + + // pack all addresses into a vector. + (void)std::transform(std::begin(inputs), std::end(inputs), std::back_inserter(input_data_addrs), + [](const AddressPtr &input) -> void * { return input->addr; }); + (void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(output_data_addrs), + [](const AddressPtr &output) -> void * { return output->addr; }); + + uint32_t block_dim = DEFAULT_BLOCK_DIM; // default blockdim equal to 1. + auto func_stub = KernelManager::GenFuncStub(*kernel_pack_, false, &block_dim); + if (func_stub == 0) { + MS_LOG(EXCEPTION) << "GenFuncStub failed."; + } + + std::string stub_func = KernelManager::GetStubFuncName(kernel_pack_); + + MS_LOG(DEBUG) << "The block_dim is:" << block_dim; + + TbeTaskInfoPtr task_info_ptr = make_shared( + stream_id, stub_func, block_dim, args, args_size, sm_desc, binary, binary_size, meta_data, input_data_addrs, + output_data_addrs, workspace_addrs); + return {task_info_ptr}; +} +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_mod.h b/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_mod.h new file mode 100644 index 00000000000..18d342f6299 --- /dev/null +++ b/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_mod.h @@ -0,0 +1,54 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_KERNEL_AKG_ASCEND_AKG_ASCEND_KERNEL_MOD_H_ +#define MINDSPORE_CCSRC_KERNEL_AKG_ASCEND_AKG_ASCEND_KERNEL_MOD_H_ +#include +#include +#include +#include "kernel/ascend_kernel_mod.h" +#include "kernel/tbe/tbe_utils.h" + +namespace mindspore { +namespace kernel { +class AkgKernelMod : public AscendKernelMod { + public: + explicit AkgKernelMod(const KernelPackPtr &kernel_pack); + ~AkgKernelMod() final {} + + void SetInputSizeList(const std::vector &size_list); + void SetOutputSizeList(const std::vector &size_list); + void SetWorkspaceSizeList(const std::vector &size_list); + const std::vector &GetInputSizeList() const override; + const std::vector &GetOutputSizeList() const override; + const std::vector &GetWorkspaceSizeList() const override; + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs, void *stream_ptr) override; + std::vector GenTask(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs, uint32_t stream_id) override; + + private: + KernelPackPtr kernel_pack_; + std::vector input_size_list_; + std::vector output_size_list_; + std::vector workspace_size_list_; +}; + +using AkgKernelModPtr = std::shared_ptr; +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_KERNEL_AKG_ASCEND_AKG_ASCEND_KERNEL_MOD_H_ diff --git a/mindspore/ccsrc/kernel/akg/gpu/akg_gpu_kernel_build.cc b/mindspore/ccsrc/kernel/akg/gpu/akg_gpu_kernel_build.cc index 2bb2cfd2678..534e355802c 100644 --- a/mindspore/ccsrc/kernel/akg/gpu/akg_gpu_kernel_build.cc +++ b/mindspore/ccsrc/kernel/akg/gpu/akg_gpu_kernel_build.cc @@ -18,7 +18,7 @@ #include #include #include "kernel/kernel.h" -#include "kernel/akg/akgkernelbuild.h" +#include "kernel/akg/akg_kernel_build.h" #include "kernel/akg/gpu/akg_gpu_kernel_mod.h" #include "common/utils.h" diff --git a/mindspore/ccsrc/kernel/common_utils.cc b/mindspore/ccsrc/kernel/common_utils.cc index e80037fa6ec..3de03069ed4 100644 --- a/mindspore/ccsrc/kernel/common_utils.cc +++ b/mindspore/ccsrc/kernel/common_utils.cc @@ -23,6 +23,11 @@ #include "nlohmann/json.hpp" #include "session/anf_runtime_algorithm.h" #include "common/utils.h" +#include "ir/manager.h" +#include "ir/meta_tensor.h" +#include "ir/func_graph.h" +#include "operator/ops.h" +#include "utils/graph_utils.h" namespace mindspore { namespace kernel { @@ -48,12 +53,6 @@ const std::map type_id_str_map = { {TypeId::kNumberTypeBool, "bool"}, }; -const std::map DATATYPE_STRING_MAP{ - {"Float32", "float32"}, {"Float16", "float16"}, {"Int8", "int8"}, {"Int16", "int16"}, - {"UInt16", "uint16"}, {"UInt8", "uint8"}, {"Int32", "int32"}, {"UInt32", "uint32"}, - {"Int64", "int64"}, {"UInt64", "uint64"}, {"Bool_", "bool"}, {"Float64", "double"}, -}; - const std::unordered_map dtype_shortdtype_map_ = { {"float16", "f16"}, {"float32", "f32"}, {"float64", "f64"}, {"int8", "i8"}, {"int16", "i16"}, {"int32", "i32"}, {"int64", "i64"}, {"uint8", "u8"}, {"uint16", "u16"}, {"uint32", "u32"}, {"uint64", "u64"}, {"bool", "bool"}, @@ -243,14 +242,6 @@ TypeId DtypeToTypeId(const std::string &dtypes) { } } -std::string Dtype2String(const std::string &dtypes) { - auto iter = DATATYPE_STRING_MAP.find(dtypes); - if (iter == DATATYPE_STRING_MAP.end()) { - MS_EXCEPTION(ArgumentError) << "Illegal input dtype:" << dtypes; - } - return iter->second; -} - std::string TypeId2String(TypeId type_id) { auto iter = type_id_str_map.find(type_id); if (iter == type_id_str_map.end()) { @@ -361,7 +352,7 @@ bool SetOutputKernelBuilderInfo(const std::vector> &ou output_num = 1; } else { if (output_idx < real_output_num) { - MS_LOG(INFO) << "Set output kernel builder info, output type is optional, output index is :" << output_idx; + MS_LOG(DEBUG) << "Set output kernel builder info, output type is optional, output index is :" << output_idx; output_num = 1; } } @@ -403,7 +394,7 @@ void SetKernelBuildInfo(const std::shared_ptrSetKernelType(AUTO_DIFF_KERNEL); + builder->SetKernelType(AKG_KERNEL); } else if (imply_type == kAICPU) { builder->SetKernelType(AICPU_KERNEL); } else { @@ -634,5 +625,256 @@ void ReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradie } unique_grad->indices_size_ = unique_indices_size + 1; } + +std::pair GetKernelInput(const AnfNodePtr &anf_node, size_t index) { + MS_EXCEPTION_IF_NULL(anf_node); + + if (index >= AnfAlgo::GetInputTensorNum(anf_node)) { + MS_EXCEPTION(ArgumentError) << "Index is out of the size of anf_node inputs."; + } + + auto cnode = anf_node->cast(); + if (cnode == nullptr) { + return AnfAlgo::VisitKernel(anf_node, 0); + } else { + return AnfAlgo::VisitKernel(anf_node->cast()->input(index + 1), 0); + } +} + +std::vector>> GetInputIndex(const std::vector &node_list, + const std::vector &input_list) { + std::vector>> input_index; + for (size_t i = 0; i < input_list.size(); ++i) { + auto const &input = input_list[i]; + MS_EXCEPTION_IF_NULL(input); + bool found = false; + // using NodeUsersMap = std::unordered_map>>; + auto mng = input->func_graph()->manager(); + MS_EXCEPTION_IF_NULL(mng); + const NodeUsersMap &users = mng->node_users(); + auto input_users = users.find(input); + if (input_users == users.end() || input_users->second.empty()) { + MS_EXCEPTION(ArgumentError) << "Input [" << i << "][" << input->DebugString(2) << "] of [" + << input->func_graph()->ToString() << "] has no users."; + } + + for (auto const &input_user : input_users->second) { + for (auto const &anf_node : node_list) { + if (anf_node != input_user.first) { + continue; + } + + std::vector dyn_input_sizes; + auto prim = AnfAlgo::GetCNodePrimitive(anf_node); + MS_EXCEPTION_IF_NULL(prim); + if (prim->GetAttr(kAttrDynInputSizes) != nullptr) { + dyn_input_sizes = GetValue>(prim->GetAttr(kAttrDynInputSizes)); + } + + if (dyn_input_sizes.empty()) { + input_index.push_back(std::make_pair(anf_node, std::make_pair(IntToSize(input_user.second - 1), 0))); + found = true; + break; + } else { + int used_as_idx = input_user.second - 1; + int accum_idx = 0; + size_t dyn_i = 0; + for (; dyn_i < dyn_input_sizes.size(); ++dyn_i) { + accum_idx += dyn_input_sizes[dyn_i]; + if (used_as_idx < accum_idx) { + input_index.push_back(std::make_pair( + anf_node, std::make_pair(dyn_i, IntToSize(used_as_idx - (accum_idx - dyn_input_sizes[dyn_i]))))); + break; + } + } + if (dyn_i != dyn_input_sizes.size()) { + found = true; + break; + } + } + } + if (found) { + break; + } + } + + if (!found) { + MS_EXCEPTION(ArgumentError) << "Input [" << i << "][" << input->DebugString(2) << "] of [" + << input->func_graph()->ToString() << "] found no related kernel info."; + } + } + return input_index; +} + +std::vector> GetOutputIndex(const std::vector &node_list, + const std::vector &input_list, + const std::vector &output_list) { + std::vector> output_index; + for (size_t i = 0; i < output_list.size(); ++i) { + auto const &output = output_list[i]; + MS_EXCEPTION_IF_NULL(output); + bool found = false; + auto pree_node = AnfAlgo::VisitKernel(output, 0); + + auto pos = std::find(std::begin(node_list), std::end(node_list), pree_node.first); + if (pos != std::end(node_list)) { + output_index.push_back(pree_node); + continue; + } + + auto ret = std::find(std::begin(input_list), std::end(input_list), pree_node.first); + if (ret != std::end(input_list)) { + output_index.push_back(std::make_pair(pree_node.first, 0)); + found = true; + } + + if (!found) { + MS_EXCEPTION(ArgumentError) << "Output [" << i << "][" << output->DebugString(2) << "] of [" + << output->func_graph()->ToString() << "] found no related kernel info."; + } + } + return output_index; +} + +void GetValidKernelNodes(const FuncGraphPtr &func_graph, std::vector *node_list) { + MS_EXCEPTION_IF_NULL(node_list); + + MS_EXCEPTION_IF_NULL(func_graph); + + std::vector node_lists = TopoSort(func_graph->get_return()); + for (auto const &node : node_lists) { + if (!AnfAlgo::IsRealKernel(node) || !node->isa()) { + continue; + } + + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + + if (IsValueNode(cnode->input(kAnfPrimitiveIndex))) { + node_list->push_back(node); + } + } +} + +void GetValidKernelNodes(const FuncGraphPtr &func_graph, std::vector *node_list, + std::vector *input_list, std::vector *output_list) { + MS_EXCEPTION_IF_NULL(node_list); + MS_EXCEPTION_IF_NULL(input_list); + MS_EXCEPTION_IF_NULL(output_list); + MS_EXCEPTION_IF_NULL(func_graph); + + GetValidKernelNodes(func_graph, node_list); + + auto parameters = func_graph->parameters(); + input_list->insert(input_list->begin(), parameters.begin(), parameters.end()); + + auto func_output = func_graph->output(); + MS_EXCEPTION_IF_NULL(func_output); + if (func_output->isa()) { + // multi output. + auto cnode = func_output->cast(); + MS_EXCEPTION_IF_NULL(cnode); + auto input0 = cnode->input(kAnfPrimitiveIndex); + MS_EXCEPTION_IF_NULL(input0); + if (IsPrimitive(input0, prim::kPrimMakeTuple)) { + for (size_t input_idx = 1; input_idx < cnode->inputs().size(); ++input_idx) { + auto input_node = cnode->input(input_idx); + MS_EXCEPTION_IF_NULL(input_node); + output_list->push_back(AnfAlgo::VisitKernel(input_node, 0).first); + } + } else { + // single output. + output_list->push_back(AnfAlgo::VisitKernel(func_output, 0).first); + } + } else { + // single output. + output_list->push_back(AnfAlgo::VisitKernel(func_output, 0).first); + } +} + +bool GetInputTensorValue(const AnfNodePtr &anf_node, size_t input_idx, nlohmann::json *const node_json) { + MS_EXCEPTION_IF_NULL(anf_node); + MS_EXCEPTION_IF_NULL(node_json); + auto cnode = anf_node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + if (input_idx + 1 >= cnode->size()) { + MS_EXCEPTION(ArgumentError) << "input_idx [" << input_idx << "] is out of index of inputs of [" + << cnode->inputs().size() << "][" << cnode->DebugString() << "]"; + } + + auto input_node = cnode->input(input_idx + 1); + if (!IsValueNode(input_node)) { + return false; + } + + auto tensor = GetValueNode(input_node); + if (tensor == nullptr) { + return false; + } + + auto type_id = tensor->data_type(); + auto *data = tensor->data_c(); + MS_EXCEPTION_IF_NULL(data); + if (tensor->DataDim() > 1 || tensor->DataSize() != 1) { + // not const tensor. + MS_LOG(WARNING) << "We take first value of tensor whose datasize != 1, [" << input_node->DebugString(2) << "]"; + } + + if (type_id == kFloat32->type_id()) { + float *val = static_cast(data); + MS_EXCEPTION_IF_NULL(val); + (*node_json)["value"] = val[0]; + MS_LOG(DEBUG) << "Value of tensor[" << cnode->DebugString() << "] is [float32][" << *val << "]."; + return true; + } else if (type_id == kFloat16->type_id()) { + float16 *val = static_cast(data); + MS_EXCEPTION_IF_NULL(val); + (*node_json)["value"] = static_cast(val[0]); + MS_LOG(INFO) << "Value of tensor[" << cnode->DebugString() << "] is [float16][" << *val << "]."; + return true; + } else if (type_id == kInt32->type_id()) { + int *val = static_cast(data); + MS_EXCEPTION_IF_NULL(val); + (*node_json)["value"] = val[0]; + MS_LOG(INFO) << "Value of tensor[" << cnode->DebugString() << "] is [int32][" << *val << "]."; + return true; + } + MS_LOG(ERROR) << "Unknown value type of tensor[" << cnode->DebugString() << "]"; + return false; +} + +void GetGraphRealOutput(const FuncGraphPtr &func_graph, std::vector> *node_list) { + MS_EXCEPTION_IF_NULL(func_graph); + MS_EXCEPTION_IF_NULL(node_list); + auto output = func_graph->output(); + MS_EXCEPTION_IF_NULL(output); + if (AnfAlgo::IsRealKernel(output)) { + // single output. + node_list->push_back(std::make_pair(output, 0)); + return; + } else if (IsPrimitiveCNode(output, prim::kPrimMakeTuple)) { + auto output_cnode = output->cast(); + MS_EXCEPTION_IF_NULL(output_cnode); + // multi output. + auto &inputs = output_cnode->inputs(); + for (size_t i = 1; i < inputs.size(); ++i) { + auto in_with_idx = AnfAlgo::VisitKernel(inputs[i], 0); + node_list->push_back(in_with_idx); + } + return; + } + MS_EXCEPTION(ArgumentError) << "Unknown output type: " << output->DebugString(2) + << " of graph: " << func_graph->ToString(); +} + +bool IsWeightBoundary(const AnfNodePtr &node) { + if (node->isa()) { + return true; + } + if (node->isa() && AnfAlgo::IsParameterWeight(node->cast())) { + return true; + } + return false; +} } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/kernel/common_utils.h b/mindspore/ccsrc/kernel/common_utils.h index c93c7d0ac92..244d8e4e9aa 100644 --- a/mindspore/ccsrc/kernel/common_utils.h +++ b/mindspore/ccsrc/kernel/common_utils.h @@ -20,9 +20,12 @@ #include #include #include +#include #include #include #include +#include +#include #include "kernel/kernel.h" #include "kernel/oplib/opinfo.h" #include "kernel/kernel_build_info.h" @@ -79,13 +82,11 @@ bool CheckCache(const std::string &kernel_name); KernelPackPtr SearchCache(const std::string &kernel_name, const std::string &processor); KernelPackPtr InsertCache(const std::string &kernel_name, const std::string &processor); TypeId DtypeToTypeId(const std::string &dtypes); -std::string Dtype2String(const std::string &dtypes); std::string Dtype2ShortType(const std::string &dtypes); std::string TypeId2String(TypeId type_id); size_t GetDtypeNbyte(const std::string &dtypes); bool ParseMetadata(const CNodePtr &kernel_node, const std::shared_ptr &op_info_ptr, Processor processor, std::vector> *const kernel_info_list); -bool IsAtomicNode(const CNodePtr &kernel_node); void SaveJsonInfo(const std::string &json_name, const std::string &info); std::string GetProcessor(const AnfNodePtr &anf_node); bool IsSameShape(const std::vector &shape_a, const std::vector &shape_b); @@ -94,6 +95,18 @@ void DeduplicateIndexedSlices(const SparseGradient &origin_sparse_grad, SparseGr size_t outer_dim); void ReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad, size_t first_dim, size_t outer_dim); +std::pair GetKernelInput(const AnfNodePtr &anf_node, size_t index); +std::vector>> GetInputIndex(const std::vector &node_list, + const std::vector &input_list); +std::vector> GetOutputIndex(const std::vector &node_list, + const std::vector &input_list, + const std::vector &output_list); +void GetValidKernelNodes(const FuncGraphPtr &func_graph, std::vector *node_list, + std::vector *input_list, std::vector *output_list); +void GetValidKernelNodes(const FuncGraphPtr &func_graph, std::vector *node_list); +bool GetInputTensorValue(const AnfNodePtr &anf_node, size_t input_idx, nlohmann::json *const node_json); +void GetGraphRealOutput(const FuncGraphPtr &func_graph, std::vector> *node_list); +bool IsWeightBoundary(const AnfNodePtr &node); } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/kernel/kash/kernel_pack.cc b/mindspore/ccsrc/kernel/kash/kernel_pack.cc index 31f81d5d022..79e2ab9dbb8 100644 --- a/mindspore/ccsrc/kernel/kash/kernel_pack.cc +++ b/mindspore/ccsrc/kernel/kash/kernel_pack.cc @@ -17,7 +17,7 @@ #include #include "mindspore/ccsrc/kernel/kernel.h" #include "kernel/kernel.h" -#include "kernel/akg/akgkernelbuild.h" +#include "kernel/akg/akg_kernel_build.h" #include "nlohmann/json.hpp" #include "securec/include/securec.h" #include "pipeline/parse/python_adapter.h" diff --git a/mindspore/ccsrc/kernel/kernel.h b/mindspore/ccsrc/kernel/kernel.h index 4adb3ea0250..7bccce49c3a 100644 --- a/mindspore/ccsrc/kernel/kernel.h +++ b/mindspore/ccsrc/kernel/kernel.h @@ -27,7 +27,7 @@ #include "utils/log_adapter.h" namespace mindspore { -enum KernelType : int { UNKNOWN_KERNEL_TYPE = 0, AUTO_DIFF_KERNEL, AICPU_KERNEL, RT_KERNEL, HCCL_KERNEL, TBE_KERNEL }; +enum KernelType : int { UNKNOWN_KERNEL_TYPE = 0, AKG_KERNEL, AICPU_KERNEL, RT_KERNEL, HCCL_KERNEL, TBE_KERNEL }; namespace kernel { diff --git a/mindspore/ccsrc/kernel/kernel_query.cc b/mindspore/ccsrc/kernel/kernel_query.cc index f96d0cbebf3..6538c28765c 100755 --- a/mindspore/ccsrc/kernel/kernel_query.cc +++ b/mindspore/ccsrc/kernel/kernel_query.cc @@ -21,6 +21,7 @@ #include "kernel/rts/rt_kernel_info.h" #include "kernel/hccl/hccl_kernel_metadata.h" #include "kernel/tbe/tbe_kernel_select/tbe_kernel_select.h" +#include "kernel/akg/akg_kernel_metadata.h" #include "session/anf_runtime_algorithm.h" namespace mindspore { @@ -59,10 +60,14 @@ void FilterInvalidKernelInfo(const CNodePtr &kernel_node, } } } // namespace -void KernelQuery(const CNodePtr &kernel_node, std::vector> *kernel_info_list) { + +void KernelQueryAll(const CNodePtr &kernel_node, + std::vector> *kernel_info_list) { MS_EXCEPTION_IF_NULL(kernel_node); MS_EXCEPTION_IF_NULL(kernel_info_list); + TbeMetadataInfo(kernel_node, kernel_info_list); + if (kernel_info_list->empty()) { AicpuMetadataInfo(kernel_node, kernel_info_list); if (!kernel_info_list->empty()) { @@ -82,6 +87,28 @@ void KernelQuery(const CNodePtr &kernel_node, std::vectorempty()) { MS_LOG(EXCEPTION) << "Op " << kernel_node->DebugString() << "kernel query fail!"; } +} + +void KernelQuery(const CNodePtr &kernel_node, std::vector> *kernel_info_list, + KernelType kernel_type) { + MS_EXCEPTION_IF_NULL(kernel_node); + MS_EXCEPTION_IF_NULL(kernel_info_list); + + std::string op_name = AnfAlgo::GetCNodeName(kernel_node); + + switch (kernel_type) { + case KernelType::AKG_KERNEL: + AkgMetadataInfo(kernel_node, kernel_info_list); + break; + default: + KernelQueryAll(kernel_node, kernel_info_list); + break; + } + + if (kernel_info_list->empty()) { + MS_EXCEPTION(NotExistsError) << "Op[" << kernel_node->DebugString() << "] kernel query fail!"; + } + // check output FilterInvalidKernelInfo(kernel_node, kernel_info_list); } diff --git a/mindspore/ccsrc/kernel/kernel_query.h b/mindspore/ccsrc/kernel/kernel_query.h index fe8696a9190..257b0cf0735 100644 --- a/mindspore/ccsrc/kernel/kernel_query.h +++ b/mindspore/ccsrc/kernel/kernel_query.h @@ -25,7 +25,8 @@ namespace mindspore { namespace kernel { -void KernelQuery(const CNodePtr &kernel_node, std::vector> *kernel_info_list); +void KernelQuery(const CNodePtr &kernel_node, std::vector> *kernel_info_list, + KernelType kernel_type = KernelType::UNKNOWN_KERNEL_TYPE); void AICPUQuery(const CNodePtr &kernel_node, std::vector> *kernel_info_list); bool IsSupportedByAICPU(const AnfNodePtr &kernel_node, const KernelBuildInfoPtr &select_kernel_build_info); bool IsSupportedByAICore(const AnfNodePtr &kernel_node, const KernelBuildInfoPtr &select_kernel_build_info); diff --git a/mindspore/ccsrc/kernel/oplib/oplib.cc b/mindspore/ccsrc/kernel/oplib/oplib.cc index 42ec534ae02..35bc4070260 100644 --- a/mindspore/ccsrc/kernel/oplib/oplib.cc +++ b/mindspore/ccsrc/kernel/oplib/oplib.cc @@ -272,8 +272,7 @@ std::shared_ptr OpLib::FindOp(const std::string &op_name, OpImplyType im auto context = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(context); bool is_gpu = (context->device_target() == kGPUDevice); - if ((is_gpu && (imply_type == kTBE || imply_type == kAICPU)) || - (!is_gpu && (imply_type != kTBE && imply_type != kAICPU))) { + if (is_gpu && (imply_type == kTBE || imply_type == kAICPU)) { MS_LOG(ERROR) << "FindOp failed: opname: " << op_name << ", imply_type: " << ImplTypeToStr(imply_type) << ", current op num: " << op_info_.size(); return nullptr; diff --git a/mindspore/ccsrc/kernel/tbe/tbe_adapter.cc b/mindspore/ccsrc/kernel/tbe/tbe_adapter.cc index f1e827d6dd5..3007280a14e 100644 --- a/mindspore/ccsrc/kernel/tbe/tbe_adapter.cc +++ b/mindspore/ccsrc/kernel/tbe/tbe_adapter.cc @@ -347,7 +347,7 @@ static int TypeStrToDstType(const std::string &type_str) { ret = 4; } else if (type_str == "UInt64") { ret = 10; - } else if (type_str == "Bool_") { + } else if (type_str == "Bool") { ret = 12; } else { MS_LOG(INFO) << "Error type str is invailed: " << type_str; diff --git a/mindspore/ccsrc/kernel/tbe/tbe_convert_utils.cc b/mindspore/ccsrc/kernel/tbe/tbe_convert_utils.cc index ee9be221206..90c55572531 100644 --- a/mindspore/ccsrc/kernel/tbe/tbe_convert_utils.cc +++ b/mindspore/ccsrc/kernel/tbe/tbe_convert_utils.cc @@ -51,7 +51,7 @@ const std::map type_id_str_maps = { const std::map type_str_maps = { {"Float32", "float32"}, {"Float16", "float16"}, {"Int8", "int8"}, {"Int16", "int16"}, {"UInt16", "uint16"}, {"UInt8", "uint8"}, {"Int32", "int32"}, {"UInt32", "uint32"}, - {"Int64", "int64"}, {"UInt64", "uint64"}, {"Bool_", "int8"}, {"Float64", "float64"}, + {"Int64", "int64"}, {"UInt64", "uint64"}, {"Bool", "int8"}, {"Float64", "float64"}, }; const std::unordered_map type_nbyte_maps = { diff --git a/mindspore/ccsrc/operator/composite/composite.cc b/mindspore/ccsrc/operator/composite/composite.cc index 221d2b9aac8..75532b9fbdf 100644 --- a/mindspore/ccsrc/operator/composite/composite.cc +++ b/mindspore/ccsrc/operator/composite/composite.cc @@ -334,8 +334,8 @@ ArgsPairList HyperMap::Harmonize(const FuncGraphPtr &func_graph, const ArgsPairL FuncGraphPtr HyperMap::GenerateFromTypes(const TypePtrList &args_spec_list) { FuncGraphPtr ptrGraph = std::make_shared(); - ptrGraph->set_flags(FUNC_GRAPH_FLAG_CORE, true); - ptrGraph->set_flags(FUNC_GRAPH_FLAG_SPECIALIZE_PARAMETER, true); + ptrGraph->set_flag(FUNC_GRAPH_FLAG_CORE, true); + ptrGraph->set_flag(FUNC_GRAPH_FLAG_SPECIALIZE_PARAMETER, true); ptrGraph->debug_info()->set_name("hyper_map"); AnfNodePtr ptrFnArg = nullptr; @@ -389,7 +389,7 @@ FuncGraphPtr Tail::GenerateTupleFuncGraph(const abstract::AbstractTuplePtr &a_tu MS_EXCEPTION_IF_NULL(a_tuple); FuncGraphPtr ret = std::make_shared(); - ret->set_flags(FUNC_GRAPH_FLAG_CORE, true); + ret->set_flag(FUNC_GRAPH_FLAG_CORE, true); ret->debug_info()->set_name("tail"); AnfNodePtr ptrTup = ret->add_parameter(); @@ -409,7 +409,7 @@ FuncGraphPtr Tail::GenerateListFuncGraph(const abstract::AbstractListPtr &a_list MS_EXCEPTION_IF_NULL(a_list); FuncGraphPtr ret = std::make_shared(); - ret->set_flags(FUNC_GRAPH_FLAG_CORE, true); + ret->set_flag(FUNC_GRAPH_FLAG_CORE, true); ret->debug_info()->set_name("tail"); AnfNodePtr ptrList = ret->add_parameter(); @@ -481,10 +481,10 @@ FuncGraphPtr MakeTupleGradient::GenerateFuncGraph(const AbstractBasePtrList &arg grads.push_back(b->NewCNode({NewValueNode(prim::kPrimTupleGetItem), dout, NewValueNode(i)})); } - b->set_flags(FUNC_GRAPH_FLAG_CORE, true); + b->set_flag(FUNC_GRAPH_FLAG_CORE, true); b->set_output(b->NewCNode(grads)); - fg->set_flags(FUNC_GRAPH_FLAG_CORE, true); + fg->set_flag(FUNC_GRAPH_FLAG_CORE, true); fg->set_output(fg->NewCNode({NewValueNode(prim::kPrimMakeTuple), out, NewValueNode(b)})); (void)fg->transforms().emplace("primal", FuncGraphTransform(prim::kPrimMakeTuple)); return fg; @@ -504,7 +504,7 @@ FuncGraphPtr GradOperation::GetGrad(AnfNodePtr node, const AnfNodePtr &weights, const std::vector ¶ms_list, const std::vector &args, bool applyJ) { FuncGraphPtr ret = std::make_shared(); - ret->set_flags(FUNC_GRAPH_FLAG_CORE, true); + ret->set_flag(FUNC_GRAPH_FLAG_CORE, true); auto weights_node = weights; if (weights == nullptr && !args.empty()) { @@ -625,7 +625,7 @@ FuncGraphPtr GradOperation::GenerateFuncGraph(const AbstractBasePtrList &args_sp std::ostringstream ss; ss << "grad{" << nparam << "}"; - dfBuilder->set_flags(FUNC_GRAPH_FLAG_CORE, true); + dfBuilder->set_flag(FUNC_GRAPH_FLAG_CORE, true); dfBuilder->debug_info()->set_name(ss.str()); ParameterPtr param_graph = dfBuilder->add_parameter(); @@ -671,7 +671,7 @@ FuncGraphPtr ListMap::GenerateFuncGraph(const AbstractBasePtrList &args_spec_lis } FuncGraphPtr fg_ptr = std::make_shared(); - fg_ptr->set_flags(FUNC_GRAPH_FLAG_CORE, true); + fg_ptr->set_flag(FUNC_GRAPH_FLAG_CORE, true); fg_ptr->debug_info()->set_name("list_map"); AnfNodePtr fn = fg_ptr->add_parameter(); @@ -741,7 +741,7 @@ void ListMap::MakeCond(const std::vector &lists, const FuncGraphPtr // cond = reduce(lambda a, b: g.apply(P.bool_and, a, b), hasnexts) FuncGraphPtr fgtrue_ptr = std::make_shared(); fgtrue_ptr->debug_info()->set_name("ftrue"); - fgtrue_ptr->set_flags(FUNC_GRAPH_FLAG_CORE, true); + fgtrue_ptr->set_flag(FUNC_GRAPH_FLAG_CORE, true); CNodePtr fgtrue_output_cnode = fgtrue_ptr->NewCNode({NewValueNode(fgnext_ptr), fn, resl}); auto inputs = fgtrue_output_cnode->inputs(); @@ -751,7 +751,7 @@ void ListMap::MakeCond(const std::vector &lists, const FuncGraphPtr FuncGraphPtr fgfalse_ptr = std::make_shared(); fgfalse_ptr->debug_info()->set_name("ffalse"); - fgfalse_ptr->set_flags(FUNC_GRAPH_FLAG_CORE, true); + fgfalse_ptr->set_flag(FUNC_GRAPH_FLAG_CORE, true); fgfalse_ptr->set_output(resl); AnfNodePtr output_cnode = fg_ptr->NewCNode({NewValueNode(prim::kPrimSwitch), NewValueNode(std::string("cond")), @@ -808,7 +808,7 @@ FuncGraphPtr TupleAdd::GenerateFuncGraph(const AbstractBasePtrList &args_spec_li } FuncGraphPtr ret = std::make_shared(); - ret->set_flags(FUNC_GRAPH_FLAG_CORE, true); + ret->set_flag(FUNC_GRAPH_FLAG_CORE, true); AnfNodePtr p_tup_a = ret->add_parameter(); AnfNodePtr p_tup_b = ret->add_parameter(); @@ -912,7 +912,7 @@ FuncGraphPtr TupleSlice::GenerateFuncGraph(const AbstractBasePtrList &args_spec_ GenerateTupleSliceParameter(tuple, slice, &start_index, &stop_index, &step_value); FuncGraphPtr ret = std::make_shared(); - ret->set_flags(FUNC_GRAPH_FLAG_CORE, true); + ret->set_flag(FUNC_GRAPH_FLAG_CORE, true); AnfNodePtr p_tuple = ret->add_parameter(); (void)ret->add_parameter(); @@ -941,7 +941,7 @@ FuncGraphPtr TupleGetItemTensor::GenerateFuncGraph(const AbstractBasePtrList &ar AbstractBasePtrList branches = branches_abs->elements(); if (branches.size() > 0 && branches[0] != nullptr && branches[0]->isa()) { FuncGraphPtr ret_graph = std::make_shared(); - ret_graph->set_flags(FUNC_GRAPH_FLAG_CORE, true); + ret_graph->set_flag(FUNC_GRAPH_FLAG_CORE, true); AnfNodePtr functions = ret_graph->add_parameter(); auto index = ret_graph->add_parameter(); diff --git a/mindspore/ccsrc/operator/composite/do_signature.cc b/mindspore/ccsrc/operator/composite/do_signature.cc index 283afe5d5bc..3569662d292 100644 --- a/mindspore/ccsrc/operator/composite/do_signature.cc +++ b/mindspore/ccsrc/operator/composite/do_signature.cc @@ -304,7 +304,7 @@ FuncGraphPtr DoSignatureMetaFuncGraph::GenerateFuncGraph(const AbstractBasePtrLi } auto new_cnode = BuildNewCNode(func_graph, name_, function_, args_spec_list, func_graph->parameters()); func_graph->set_output(new_cnode); - func_graph->set_flags(FUNC_GRAPH_FLAG_CORE, true); + func_graph->set_flag(FUNC_GRAPH_FLAG_CORE, true); return func_graph; } } // namespace prim diff --git a/mindspore/ccsrc/operator/composite/list_append_operation.cc b/mindspore/ccsrc/operator/composite/list_append_operation.cc index b5a4fc626e9..236a5b7062a 100644 --- a/mindspore/ccsrc/operator/composite/list_append_operation.cc +++ b/mindspore/ccsrc/operator/composite/list_append_operation.cc @@ -35,7 +35,7 @@ FuncGraphPtr ListAppend::GenerateFuncGraph(const abstract::AbstractBasePtrList & MS_EXCEPTION_IF_NULL(arg0_list); FuncGraphPtr ret = std::make_shared(); - ret->set_flags(FUNC_GRAPH_FLAG_CORE, true); + ret->set_flag(FUNC_GRAPH_FLAG_CORE, true); ret->debug_info()->set_name("append"); AnfNodePtr arg0_node = ret->add_parameter(); diff --git a/mindspore/ccsrc/operator/composite/map.cc b/mindspore/ccsrc/operator/composite/map.cc index 6752cfe0789..a054da5f4da 100644 --- a/mindspore/ccsrc/operator/composite/map.cc +++ b/mindspore/ccsrc/operator/composite/map.cc @@ -51,8 +51,8 @@ AnfNodePtr Map::FullMakeLeaf(const FuncGraphPtr &func_graph, const AnfNodePtr &f FuncGraphPtr Map::GenerateLeafFunc(const size_t &args_size) { // Generate func for leaf nodes FuncGraphPtr ptrGraph = std::make_shared(); - ptrGraph->set_flags(FUNC_GRAPH_FLAG_CORE, true); - ptrGraph->set_flags(FUNC_GRAPH_FLAG_SPECIALIZE_PARAMETER, true); + ptrGraph->set_flag(FUNC_GRAPH_FLAG_CORE, true); + ptrGraph->set_flag(FUNC_GRAPH_FLAG_SPECIALIZE_PARAMETER, true); ptrGraph->debug_info()->set_name("map"); AnfNodePtr ptrFnArg = nullptr; if (fn_leaf_ == nullptr) { @@ -237,8 +237,8 @@ AnfNodePtr Map::Make(const FuncGraphPtr &func_graph, const AnfNodePtr &fn_arg, c FuncGraphPtr Map::GenerateFromTypes(const TypePtrList &args_spec_list) { FuncGraphPtr ptrGraph = std::make_shared(); - ptrGraph->set_flags(FUNC_GRAPH_FLAG_CORE, true); - ptrGraph->set_flags(FUNC_GRAPH_FLAG_SPECIALIZE_PARAMETER, true); + ptrGraph->set_flag(FUNC_GRAPH_FLAG_CORE, true); + ptrGraph->set_flag(FUNC_GRAPH_FLAG_SPECIALIZE_PARAMETER, true); ptrGraph->debug_info()->set_name("map"); AnfNodePtr ptrFnArg = nullptr; diff --git a/mindspore/ccsrc/operator/composite/unpack_call.cc b/mindspore/ccsrc/operator/composite/unpack_call.cc index 6363d495c54..3993d415973 100644 --- a/mindspore/ccsrc/operator/composite/unpack_call.cc +++ b/mindspore/ccsrc/operator/composite/unpack_call.cc @@ -51,7 +51,7 @@ FuncGraphPtr UnpackCall::GenerateFuncGraph(const AbstractBasePtrList &args_spec_ (void)abstract::CheckArg(op_name, args_spec_list, 0); auto ret_graph = std::make_shared(); - ret_graph->set_flags(FUNC_GRAPH_FLAG_CORE, true); + ret_graph->set_flag(FUNC_GRAPH_FLAG_CORE, true); AnfNodePtr fnNode = ret_graph->add_parameter(); std::vector elems; diff --git a/mindspore/ccsrc/operator/composite/zip_operation.cc b/mindspore/ccsrc/operator/composite/zip_operation.cc index 4d34163f28e..33e21da044a 100644 --- a/mindspore/ccsrc/operator/composite/zip_operation.cc +++ b/mindspore/ccsrc/operator/composite/zip_operation.cc @@ -57,7 +57,7 @@ FuncGraphPtr ZipOperation::GenerateFuncGraph(const AbstractBasePtrList &args_spe return (x->cast()->size() < y->cast()->size()); }); FuncGraphPtr ret_graph = std::make_shared(); - ret_graph->set_flags(FUNC_GRAPH_FLAG_CORE, true); + ret_graph->set_flag(FUNC_GRAPH_FLAG_CORE, true); for (size_t idx = 0; idx < args_spec_list.size(); idx++) { (void)ret_graph->add_parameter(); } diff --git a/mindspore/ccsrc/operator/ops.cc b/mindspore/ccsrc/operator/ops.cc index cae61f64d0b..f86cbd7fd2a 100755 --- a/mindspore/ccsrc/operator/ops.cc +++ b/mindspore/ccsrc/operator/ops.cc @@ -50,6 +50,12 @@ const PrimitivePtr kPrimBoolNot = std::make_shared("bool_not"); const PrimitivePtr kPrimBoolAnd = std::make_shared("bool_and"); const PrimitivePtr kPrimBoolOr = std::make_shared("bool_or"); const PrimitivePtr kPrimBoolEq = std::make_shared("bool_eq"); +const PrimitivePtr kPrimGreater = std::make_shared("Greater"); +const PrimitivePtr kPrimGreaterEqual = std::make_shared("GreaterEqual"); +const PrimitivePtr kPrimLess = std::make_shared("Less"); +const PrimitivePtr kPrimLessEqual = std::make_shared("LessEqual"); +const PrimitivePtr kPrimEqual = std::make_shared("Equal"); +const PrimitivePtr kPrimNotEqual = std::make_shared("NotEqual"); // Type introspection const PrimitivePtr kPrimTypeOf = std::make_shared("typeof"); @@ -166,17 +172,20 @@ const PrimitivePtr kPrimMul = std::make_shared("Mul"); const PrimitivePtr kPrimMinimum = std::make_shared("Minimum"); const PrimitivePtr kPrimMaximum = std::make_shared("Maximum"); const PrimitivePtr kPrimSquare = std::make_shared("Square"); -const PrimitivePtr kPrimEqual = std::make_shared("Equal"); -const PrimitivePtr kPrimLess = std::make_shared("Less"); -const PrimitivePtr kPrimLessEqual = std::make_shared("LessEqual"); const PrimitivePtr kPrimCumSum = std::make_shared("CumSum"); const PrimitivePtr kPrimCumProd = std::make_shared("CumProd"); const PrimitivePtr kPrimSubscalar = std::make_shared("Subscalar"); const PrimitivePtr kPrimInplaceAdd = std::make_shared("InplaceAdd"); const PrimitivePtr kPrimInplaceSub = std::make_shared("InplaceSub"); +const PrimitivePtr kPrimPow = std::make_shared("Pow"); +const PrimitivePtr kPrimRealDiv = std::make_shared("RealDiv"); +const PrimitivePtr kPrimSqrt = std::make_shared("Sqrt"); +const PrimitivePtr kPrimReciprocal = std::make_shared("Reciprocal"); +const PrimitivePtr kPrimExpandDims = std::make_shared("ExpandDims"); // NN const PrimitivePtr kPrimFlatten = std::make_shared("Flatten"); +const PrimitivePtr kPrimSoftmax = std::make_shared("Softmax"); const PrimitivePtr kPrimLogSoftmax = std::make_shared("LogSoftmax"); const PrimitivePtr kPrimLogSoftmaxGrad = std::make_shared("LogSoftmaxGrad"); const PrimitivePtr kPrimTanh = std::make_shared("Tanh"); @@ -253,6 +262,7 @@ const PrimitivePtr kPrimInDict = std::make_shared("in_dict"); const PrimitivePtr kPrimNotInDict = std::make_shared("not_in_dict"); const PrimitivePtr kPrimMixedPrecisionCast = std::make_shared("mixed_precision_cast"); const PrimitivePtr kPrimIsConsant = std::make_shared("is_constant"); +const PrimitivePtr kPrimEquivFormat = std::make_shared("EquivFormat"); // Comm ops const PrimitivePtr kPrimMirror = std::make_shared("_MirrorOperator"); diff --git a/mindspore/ccsrc/operator/ops.h b/mindspore/ccsrc/operator/ops.h index 3b9ac01089d..65327cf407d 100755 --- a/mindspore/ccsrc/operator/ops.h +++ b/mindspore/ccsrc/operator/ops.h @@ -59,6 +59,12 @@ extern const PrimitivePtr kPrimBoolNot; extern const PrimitivePtr kPrimBoolAnd; extern const PrimitivePtr kPrimBoolOr; extern const PrimitivePtr kPrimBoolEq; +extern const PrimitivePtr kPrimGreater; +extern const PrimitivePtr kPrimGreaterEqual; +extern const PrimitivePtr kPrimLess; +extern const PrimitivePtr kPrimLessEqual; +extern const PrimitivePtr kPrimEqual; +extern const PrimitivePtr kPrimNotEqual; // Type introspection extern const PrimitivePtr kPrimTypeOf; @@ -157,6 +163,10 @@ extern const PrimitivePtr KPrimTransData; extern const PrimitivePtr kPrimNMSWithMask; extern const PrimitivePtr kPrimPad; extern const PrimitivePtr kPrimArgMaxWithValue; +extern const PrimitivePtr kPrimRealDiv; +extern const PrimitivePtr kPrimSqrt; +extern const PrimitivePtr kPrimReciprocal; +extern const PrimitivePtr kPrimExpandDims; // Maths extern const PrimitivePtr kPrimTensorAdd; @@ -183,9 +193,11 @@ extern const PrimitivePtr kPrimCumProd; extern const PrimitivePtr kPrimSubscalar; extern const PrimitivePtr kPrimInplaceAdd; extern const PrimitivePtr kPrimInplaceSub; +extern const PrimitivePtr kPrimPow; // NN extern const PrimitivePtr kPrimFlatten; +extern const PrimitivePtr kPrimSoftmax; extern const PrimitivePtr kPrimLogSoftmax; extern const PrimitivePtr kPrimLogSoftmaxGrad; extern const PrimitivePtr kPrimApplyCenteredRMSProp; @@ -263,6 +275,7 @@ extern const PrimitivePtr kPrimInDict; extern const PrimitivePtr kPrimNotInDict; extern const PrimitivePtr kPrimMixedPrecisionCast; extern const PrimitivePtr kPrimIsConsant; +extern const PrimitivePtr kPrimEquivFormat; // Comm ops extern const PrimitivePtr kPrimAllReduce; diff --git a/mindspore/ccsrc/optimizer/ad/dfunctor.cc b/mindspore/ccsrc/optimizer/ad/dfunctor.cc index cde90db3467..e192f3912ec 100644 --- a/mindspore/ccsrc/optimizer/ad/dfunctor.cc +++ b/mindspore/ccsrc/optimizer/ad/dfunctor.cc @@ -45,10 +45,19 @@ DFunctor::DFunctor(const FuncGraphPtr &primal_graph, const pipeline::ResourceBas : primal_graph_(primal_graph), resources_(resources), need_cut_(false), is_top_(false) { TraceManager::DebugTrace(std::make_shared(primal_graph->debug_info())); k_graph_ = std::make_shared(); + if (primal_graph->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)) { + std::string grad_op_name = GetValue(primal_graph->get_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)); + k_graph_->set_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL, MakeValue(grad_op_name)); + } TraceManager::EndTrace(); TraceManager::DebugTrace(std::make_shared(primal_graph->debug_info())); tape_ = std::make_shared(); + // Add "_Grad" postfix + if (primal_graph->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)) { + std::string grad_op_name = GetValue(primal_graph->get_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)) + "_Grad"; + tape_->set_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL, MakeValue(grad_op_name)); + } TraceManager::EndTrace(); dout_ = tape_->add_parameter(); @@ -368,7 +377,7 @@ FuncGraphPtr DFunctor::KUserDefined(const FuncGraphPtr &primal) { (void)primal->transforms().insert(std::make_pair("grad", FuncGraphTransform(fg))); (void)fg->transforms().insert(std::make_pair("primal", FuncGraphTransform(primal))); // Reset defer_inline to enable successive inlining - primal->set_flags(FUNC_GRAPH_FLAG_DEFER_INLINE, false); + primal->set_flag(FUNC_GRAPH_FLAG_DEFER_INLINE, false); auto functor = std::make_shared(primal, resources_); functor->Init(); diff --git a/mindspore/ccsrc/optimizer/ad/grad.cc b/mindspore/ccsrc/optimizer/ad/grad.cc index 43d2a66ad2d..d141dc6eeae 100644 --- a/mindspore/ccsrc/optimizer/ad/grad.cc +++ b/mindspore/ccsrc/optimizer/ad/grad.cc @@ -37,7 +37,7 @@ FuncGraphPtr Grad(const FuncGraphPtr &func_graph, const pipeline::ResourceBasePt auto multi_graph_sink = [&func_graph](const FuncGraphPtr &f) { if (MsContext::GetInstance()->is_multi_graph_sink()) { if (func_graph->has_flag(FUNC_GRAPH_FLAG_IGNORE_VALUES)) { - f->set_flags(FUNC_GRAPH_FLAG_IGNORE_VALUES, true); + f->set_flag(FUNC_GRAPH_FLAG_IGNORE_VALUES, true); } } }; diff --git a/mindspore/ccsrc/optimizer/clean.cc b/mindspore/ccsrc/optimizer/clean.cc index fafe26e2ed0..6a545972829 100644 --- a/mindspore/ccsrc/optimizer/clean.cc +++ b/mindspore/ccsrc/optimizer/clean.cc @@ -78,7 +78,10 @@ AnfNodePtr ConvertGetAttrToTupleGetItem(const CNodePtr &node) { MS_EXCEPTION_IF_NULL(cons); auto dt = data->abstract(); - MS_EXCEPTION_IF_NULL(dt); + if (dt == nullptr) { + return nullptr; + } + if (!dt->isa()) { MS_LOG(EXCEPTION) << "First parameter of getattr is not AbstractClass, but " << dt->type_name() << "."; } diff --git a/mindspore/ccsrc/optimizer/graph_kernel_reuse.cc b/mindspore/ccsrc/optimizer/graph_kernel_reuse.cc new file mode 100644 index 00000000000..dc20ad925ec --- /dev/null +++ b/mindspore/ccsrc/optimizer/graph_kernel_reuse.cc @@ -0,0 +1,157 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "optimizer/graph_kernel_reuse.h" +#include +#include +#include +#include "./common.h" +#include "utils/graph_utils.h" + +namespace mindspore { +/* namespace to support opt */ +namespace opt { + +bool GraphKernelReuse::CompareNode(const AnfNodePtr a, const AnfNodePtr b) { + if (a->abstract() && b->abstract()) { + auto a_type = a->abstract()->GetTypeTrack(); + auto b_type = b->abstract()->GetTypeTrack(); + + if (a_type != b_type) { + return false; + } + + auto a_shape = a->abstract()->GetShapeTrack(); + auto b_shape = b->abstract()->GetShapeTrack(); + if (a_shape != nullptr && a_shape == b_shape) { + return true; + } + + if (a_shape != nullptr && b_shape != nullptr && a_shape->isa() && + b_shape->isa()) { + return a_shape->cast()->shape() == b_shape->cast()->shape(); + } + } + return false; +} + +bool GraphKernelReuse::DoReplace(const FuncGraphManagerPtr manager) { + bool changed = false; + auto fgs = manager->func_graphs(); + for (FuncGraphPtr &fg : fgs) { + if (!fg->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)) { + continue; + } + std::string key = GetValue(fg->get_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)); + if (graph_kernel_ops.find(key) != graph_kernel_ops.end()) { + if (find(graph_kernel_ops[key].begin(), graph_kernel_ops[key].end(), fg) == graph_kernel_ops[key].end()) { + FuncGraphPtr new_fg = nullptr; + for (auto &cfg : graph_kernel_ops[key]) { + // If two graphs have different size then continue + auto fg_topos = TopoSort(fg->get_return()); + auto cfg_topos = TopoSort(cfg->get_return()); + if (fg_topos.size() != cfg_topos.size()) { + continue; + } + + // Compare const tensor + bool has_same = true; + for (size_t i = 0; i < fg_topos.size(); ++i) { + if (IsValueNode(fg_topos[i])) { + if (!IsValueNode(cfg_topos[i])) { + has_same = false; + break; + } + + auto tensor1 = GetValueNode(fg_topos[i]); + auto tensor2 = GetValueNode(cfg_topos[i]); + if (!tensor1->ValueEqual(*tensor2)) { + has_same = false; + break; + } + } + } + + if (!has_same) { + continue; + } + + auto fg_input = fg->parameters(); + auto cfg_input = cfg->parameters(); + if (fg_input.size() != cfg_input.size()) { + continue; + } + // Compare input + for (size_t i = 0; i < fg_input.size(); ++i) { + if (!CompareNode(fg_input[i], cfg_input[i])) { + has_same = false; + break; + } + } + if (!has_same) { + continue; + } + + // Compare output + if (!CompareNode(fg->output(), cfg->output())) { + continue; + } + + // Find reusable fg + new_fg = cfg; + break; + } + + if (new_fg != nullptr) { + // Replace current fg with existing fg + auto users = fg->func_graph_cnodes_index(); + for (auto &iter : users) { + auto cnode = iter.first->first->cast(); + auto new_input = cnode->inputs(); + auto main_graph = cnode->func_graph(); + MS_EXCEPTION_IF_NULL(main_graph); + if (IsPrimitiveCNode(cnode, prim::kPrimPartial)) { + new_input[1] = NewValueNode(new_fg); + } else { + new_input[0] = NewValueNode(new_fg); + } + auto new_cnode = main_graph->NewCNode(new_input); + manager->Replace(iter.first->first, new_cnode); + changed = true; + } + + } else { + // Add current fg to map + graph_kernel_ops[key].push_back(fg); + } + } + } else { + graph_kernel_ops[key] = {fg}; + } + } + + return changed; +} + +bool GraphKernelReuse::ReuseGraphKernel(const FuncGraphPtr root, const FuncGraphManagerPtr manager) { + MS_EXCEPTION_IF_NULL(manager); + manager->AddFuncGraph(root); + + return DoReplace(manager); +} + +} // namespace opt +} // namespace mindspore diff --git a/mindspore/ccsrc/optimizer/graph_kernel_reuse.h b/mindspore/ccsrc/optimizer/graph_kernel_reuse.h new file mode 100644 index 00000000000..ed5cc93d184 --- /dev/null +++ b/mindspore/ccsrc/optimizer/graph_kernel_reuse.h @@ -0,0 +1,53 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_OPTIMIZER_GRAPH_KERNEL_OP_REUSE_H +#define MINDSPORE_CCSRC_OPTIMIZER_GRAPH_KERNEL_OP_REUSE_H + +#include +#include +#include +#include + +#include "optimizer/optimizer.h" + +namespace mindspore { +namespace opt { + +// Common subexpression elimination. +class GraphKernelReuse { + public: + GraphKernelReuse() : count(0) {} + virtual ~GraphKernelReuse() = default; + + bool operator()(const FuncGraphPtr &root, const OptimizerPtr &optimizer) { + bool chg = ReuseGraphKernel(root, optimizer->resource()->manager()); + return chg; + } + + bool CompareNode(const AnfNodePtr a, const AnfNodePtr other); + bool DoReplace(const FuncGraphManagerPtr manager); + + bool ReuseGraphKernel(const FuncGraphPtr root, const FuncGraphManagerPtr manager); + + private: + std::unordered_map> graph_kernel_ops; + int count; +}; + +} // namespace opt +} // namespace mindspore +#endif // MINDSPORE_CCSRC_OPTIMIZER_GRAPH_KERNEL_OP_REUSE_H diff --git a/mindspore/ccsrc/optimizer/irpass.cc b/mindspore/ccsrc/optimizer/irpass.cc index 5daf080492f..72177ccb06d 100644 --- a/mindspore/ccsrc/optimizer/irpass.cc +++ b/mindspore/ccsrc/optimizer/irpass.cc @@ -41,6 +41,8 @@ #include "optimizer/irpass/incorporate_call.h" #include "optimizer/irpass/grad_var_prepare.h" #include "optimizer/irpass/param_replace.h" +#include "optimizer/irpass/mark_interface_fusion.h" +#include "optimizer/opt.h" namespace mindspore { namespace opt { @@ -48,7 +50,7 @@ namespace irpass { OptimizeIRPassLib::OptimizeIRPassLib() { arithmetic_simplify_ = MakeSubstitution(ArithmeticSimplify(), "arithmetic_simplify", {prim::kPrimScalarAdd, prim::kPrimScalarMul, prim::kPrimTensorAdd, - prim::kPrimIdentity, prim::kPrimMomentum, prim::kPrimMul}); + prim::kPrimIdentity, prim::kPrimMomentum, prim::kPrimMul, prim::kPrimPow}); special_op_eliminate_ = MakeSubstitution(SpecialOpEliminater(), "special_op_eliminate", {prim::kPrimInsertGradientOf, prim::kPrimStopGradient, prim::kPrimHookBackward, @@ -90,7 +92,6 @@ OptimizeIRPassLib::OptimizeIRPassLib() { replace_refkey_by_param_ = MakeSubstitution(ReplaceRefkeyByParam(), "replace_refkey_by_param", IsValueNode, opt::FORCE_RENORM); replace_old_param_ = MakeSubstitution(ReplaceOldParam(), "replace_old_param", IsParam); - // Gradient transforms expand_jprim_ = MakeSubstitution(ExpandJPrim(), "expand_jprim", prim::kPrimJ); minmaximum_grad_ = MakeSubstitution(MinMaximumGrad(), "minmaximum_grad", prim::kPrimTupleGetItem); @@ -115,6 +116,8 @@ OptimizeIRPassLib::OptimizeIRPassLib() { // Incorporation incorporate_getitem_set_ = MakeSubstitution(IncorporateGetitemSet(), "incorporate_getitem_set", prim::kPrimTupleGetItem); + incorporate_getitem_from_param_ = + MakeSubstitution(IncorporateGetitemFromParam(), "incorporate_getitem_from_param", IsCNodeGraphKernel); incorporate_call_ = MakeSubstitution(IncorporateCall(), "incorporate_call", IsCNodeDup); incorporate_call_switch_ = MakeSubstitution(IncorporateCallSwitch(), "incorporate_call_switch", IsCNodeDup); @@ -124,6 +127,17 @@ OptimizeIRPassLib::OptimizeIRPassLib() { // Convert print_tuple_wrapper_ = MakeSubstitution(PrintTupleWrapper(), "print_tuple_wrapper", prim::kPrimPrint); + + // Unused parameter eliminate + unused_parameter_eliminate_ = + MakeSubstitution(UnusedParasEliminater(), "unused_parameter_eliminate", IsCNodeGraphKernel); + unused_output_eliminate_ = MakeSubstitution(UnusedOutputEliminater(), "unused_output_eliminate", IsCNodeGraphKernel); + + // AddN eliminate + addn_eliminate_ = MakeSubstitution(AddNEliminater(), "addn_eliminate", IsCNodeGraphKernel); + + // Mark interface fusion + mark_interface_fusion_ = MakeSubstitution(MarkInterfaceFusion(), "mark_interface_fusion", prim::kPrimSelect); } ResolveIRPassLib::ResolveIRPassLib() { diff --git a/mindspore/ccsrc/optimizer/irpass.h b/mindspore/ccsrc/optimizer/irpass.h index ac0c6eda6f8..5e1550c883a 100644 --- a/mindspore/ccsrc/optimizer/irpass.h +++ b/mindspore/ccsrc/optimizer/irpass.h @@ -84,6 +84,7 @@ class OptimizeIRPassLib { // Incorporation SubstitutionPtr incorporate_getitem_set_; + SubstitutionPtr incorporate_getitem_from_param_; SubstitutionPtr incorporate_call_; SubstitutionPtr incorporate_call_switch_; @@ -92,6 +93,16 @@ class OptimizeIRPassLib { // Convert SubstitutionPtr print_tuple_wrapper_; + + // Unused parameter eliminate + SubstitutionPtr unused_parameter_eliminate_; + SubstitutionPtr unused_output_eliminate_; + + // AddN eliminate + SubstitutionPtr addn_eliminate_; + + // Fusion + SubstitutionPtr mark_interface_fusion_; }; // the collection of irpass for resolve action @@ -145,6 +156,23 @@ inline bool IsCNodeGraph(const AnfNodePtr &node) { return IsValueNode(inp0); } +// Check if CNode Input 0 is Func Graph of graph kernel. +inline bool IsCNodeGraphKernel(const AnfNodePtr &node) { + if (node == nullptr || !node->isa()) { + return false; + } + + auto inp0 = node->cast()->input(0); + if (IsValueNode(inp0)) { + auto fg = GetValueNode(inp0); + if (fg == nullptr) { + return false; + } + return fg->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL); + } + return false; +} + // Check if CNode Input 0 is CNode inline bool IsCNodeDup(const AnfNodePtr &node) { if (node == nullptr || !node->isa()) { diff --git a/mindspore/ccsrc/optimizer/irpass/arithmetic_simplify.h b/mindspore/ccsrc/optimizer/irpass/arithmetic_simplify.h index 1a2ba871658..1836a88dbcd 100644 --- a/mindspore/ccsrc/optimizer/irpass/arithmetic_simplify.h +++ b/mindspore/ccsrc/optimizer/irpass/arithmetic_simplify.h @@ -83,6 +83,216 @@ class MultiplyByZeroOrOne : public AnfVisitor { AnfNodePtr x_{nullptr}; }; +// Support class used for checking if all values of a Tensor are equal `check_value_` +// Supported data types: double, float/float32, int/int32 +class CheckTensorConstant { + public: + explicit CheckTensorConstant(int _check_value = 0) : check_value_(_check_value) {} + ~CheckTensorConstant() = default; + bool IsTensorConstant(const ValuePtr &value) { + if (!value->isa()) { + return false; + } + auto tensor_ptr = dyn_cast(value); + TypeId tensor_type = tensor_ptr->Dtype()->type_id(); + if ((tensor_type == TypeId::kNumberTypeFloat32) || (tensor_type == TypeId::kNumberTypeFloat)) { + float *data2 = reinterpret_cast(tensor_ptr->data_c()); + for (int i = 0; i < tensor_ptr->DataSize(); i++) { + if (fabs(data2[i] - check_value_) > FLT_EPSILON) { + return false; + } + } + return true; + } else if (tensor_type == TypeId::kNumberTypeFloat64) { + double *data2 = reinterpret_cast(tensor_ptr->data_c()); + for (int i = 0; i < tensor_ptr->DataSize(); i++) { + if (fabs(data2[i] - check_value_) > DBL_EPSILON) { + return false; + } + } + return true; + } else if ((tensor_type == TypeId::kNumberTypeInt32) || (tensor_type == TypeId::kNumberTypeInt)) { + int *data2 = reinterpret_cast(tensor_ptr->data_c()); + for (int i = 0; i < tensor_ptr->DataSize(); i++) { + if (data2[i] != check_value_) { + return false; + } + } + return true; + } + // Un-support Data Types + return false; + } + + bool IsTensorScalarConstant(const ValuePtr &value) { + if (!value->isa()) { + return false; + } + auto tensor_ptr = dyn_cast(value); + if ((tensor_ptr->DataSize() > 1) || (tensor_ptr->DataDim() > 0)) { + return false; + } + return IsTensorConstant(value); + } + + private: + int check_value_; +}; + +// {prim::kPrimMul, 0, X}, {prim::kPrimMul, X, 0} +// {prim::kPrimMul, 1, X}, {prim::kPrimMul, X, 1} +class TensorMultiplyByZeroOrOne : public AnfVisitor { + public: + TensorMultiplyByZeroOrOne() : zero_(MakeValue(0)) {} + ~TensorMultiplyByZeroOrOne() override = default; + AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override { + Reset(); + AnfVisitor::Match(prim::kPrimMul)(node); + + if (is_zero_) { + if (x_->func_graph() != node->func_graph()) { + return nullptr; + } + return NewTensorFilledWithData(node); + } + if (is_one_) { + return NewTensorFilledWithData(node, x_); + } + return nullptr; + } + + void Visit(const AnfNodePtr &node) override { + if (is_zero_ || is_one_) { + x_ = node; + return; + } + + if (IsParam(node)) { + x_ = node; + return; + } + + if (IsCNode(node)) { + CNodePtr cnode = node->cast(); + if (IsPrimitive(cnode->input(0), prim::kPrimZerosLike)) { + is_zero_ = true; + return; + } + x_ = node; + return; + } + auto value = node->cast()->value(); + if (CheckTensorConstant(0).IsTensorConstant(value)) { + is_zero_ = true; + return; + } else if (CheckTensorConstant(1).IsTensorConstant(value)) { + is_one_ = true; + return; + } + x_ = node; + } + + void Visit(const ValueNodePtr &vnode) override { + auto value = vnode->value(); + if (CheckTensorConstant(0).IsTensorConstant(value)) { + is_zero_ = true; + return; + } else if (CheckTensorConstant(1).IsTensorConstant(value)) { + is_one_ = true; + return; + } + x_ = vnode; + } + void Reset() { + x_ = nullptr; + is_one_ = false; + is_zero_ = false; + } + + void *GetPointerToTensorData(const AnfNodePtr &node, bool writable = false) { + if (!node->isa()) { + return nullptr; + } + + auto value = node->cast()->value(); + + if (!value->isa()) { + return nullptr; + } + + tensor::TensorPtr tensor_ptr = dyn_cast(value); + return tensor_ptr->data_c(writable); + } + + // Make a new tensor (when possible) with the same shape as of `node` + // If x is nullptr then fill new tensor will "0" + // If x is a tensor with empty shape then fill new tensor with the single value of x + // If x is a tensor with same shape as `node` then return x as result + AnfNodePtr NewTensorFilledWithData(const AnfNodePtr &node, const AnfNodePtr &x = nullptr) { + if ((node->abstract() == nullptr) || !node->abstract()->isa()) { + return nullptr; + } + + auto tensor_abstract = node->abstract()->cast(); + TypePtr tensor_type_ptr = tensor_abstract->element()->BuildType(); + std::vector tensor_shape = tensor_abstract->shape()->shape(); + + auto new_tensor_ptr = std::make_shared(tensor_type_ptr->type_id(), tensor_shape); + size_t mem_size = GetTypeByte(tensor_type_ptr) * IntToSize(new_tensor_ptr->ElementsNum()); + char *data = reinterpret_cast(new_tensor_ptr->data_c(true)); + + if (x == nullptr) { + std::memset(data, 0, mem_size); + auto new_vnode = NewValueNode(new_tensor_ptr); + new_vnode->set_abstract(new_tensor_ptr->ToAbstract()); + return new_vnode; + } + // x is not nullptr + if (x->isa()) { + if ((x->abstract() == nullptr) || !x->abstract()->isa()) { + return nullptr; + } + auto x_abstract = x->abstract()->cast(); + std::vector x_shape = x_abstract->shape()->shape(); + + if (x_shape != tensor_shape) { + return nullptr; + } + return x; + } + + if (!x->isa()) { + return nullptr; + } + auto x_value = x->cast()->value(); + if (!x_value->isa()) { + return nullptr; + } + + auto x_tensor_ptr = dyn_cast(x_value); + + if ((x_tensor_ptr->DataSize() > 1) && (x_tensor_ptr->DataSize() != new_tensor_ptr->DataSize())) { + return nullptr; + } + char *source_data = reinterpret_cast(GetPointerToTensorData(x)); + if (x_tensor_ptr->DataSize() == 1) { + for (int i = 0; i < new_tensor_ptr->ElementsNum(); i++) { + memcpy(source_data, data + i * GetTypeByte(tensor_type_ptr), GetTypeByte(tensor_type_ptr)); + } + } else { + memcpy(source_data, data, mem_size); + } + auto new_vnode = NewValueNode(new_tensor_ptr); + new_vnode->set_abstract(new_tensor_ptr->ToAbstract()); + return new_vnode; + } + + private: + bool is_zero_{false}, is_one_{false}; + ValuePtr zero_; + AnfNodePtr x_{nullptr}; +}; + // {prim::kPrimScalarAdd, X, 0} // {prim::kPrimScalarAdd, 0, X} class AddByZero : public AnfVisitor { @@ -101,7 +311,8 @@ class AddByZero : public AnfVisitor { } void Visit(const AnfNodePtr &node) override { - if (node->isa() && *GetValueNode(node) == *zero_) { + if (node->isa() && + ((*GetValueNode(node) == *zero_) || CheckTensorConstant(0).IsTensorScalarConstant(GetValueNode(node)))) { is_zero_ = true; return; } @@ -139,10 +350,22 @@ class TensorAddByZero : public AnfVisitor { is_zero_ = true; return; } + if (node->isa() && CheckTensorConstant(0).IsTensorScalarConstant(GetValueNode(node))) { + is_zero_ = true; + return; + } x_ = node; } + void Visit(const ValueNodePtr &vnode) override { + auto value = vnode->value(); + if (CheckTensorConstant(0).IsTensorConstant(value)) { + is_zero_ = true; + return; + } + } + void Reset() { x_ = nullptr; is_zero_ = false; @@ -183,29 +406,143 @@ class OptUpdateZeroTensor : public AnfVisitor { // {prim::kPrimMul, {...}, {prim::kPrimMul, Tensor1, Tensor2}} class ConstantDuplicateMul : public AnfVisitor { public: + // Support function to multiply two constant tensors: partially support broadcasting shapes + template + void Multiply(void *in_data_1, int in_data_1_size, void *in_data_2, int in_data_2_size, void **out_data, + int out_data_size) { + T *data_1 = reinterpret_cast(in_data_1); + T *data_2 = reinterpret_cast(in_data_2); + T *data_out = new T[out_data_size]; + + if (in_data_1_size == 1) { + for (int i = 0; i < out_data_size; i++) { + data_out[i] = data_1[0]; + } + } else { + for (int i = 0; i < out_data_size; i++) { + data_out[i] = data_1[i]; + } + } + if (in_data_2_size == 1) { + for (int i = 0; i < out_data_size; i++) { + data_out[i] *= data_2[0]; + } + } else { + for (int i = 0; i < out_data_size; i++) { + data_out[i] *= data_2[i]; + } + } + *out_data = reinterpret_cast(data_out); + return; + } + + AnfNodePtr MulConstantTensors(const AnfNodePtr &vnode_1, const AnfNodePtr &vnode_2, const AnfNodePtr &node_3) { + if (!vnode_1->isa() || !vnode_2->isa() || (vnode_1->abstract() == nullptr) || + (vnode_2->abstract() == nullptr) || (node_3->abstract() == nullptr)) { + return nullptr; + } + + auto value_1 = GetValueNode(vnode_1); + auto value_2 = GetValueNode(vnode_2); + + if (!value_1->isa() || !value_2->isa()) { + return nullptr; + } + + auto tensor_ptr_1 = dyn_cast(value_1); + auto tensor_ptr_2 = dyn_cast(value_2); + + auto tensor_1_abstract = vnode_1->abstract()->cast(); + auto tensor_2_abstract = vnode_1->abstract()->cast(); + auto tensor_3_abstract = node_3->abstract()->cast(); + + TypePtr tensor_1_type_ptr = tensor_1_abstract->element()->BuildType(); + TypePtr tensor_2_type_ptr = tensor_2_abstract->element()->BuildType(); + TypePtr tensor_3_type_ptr = tensor_3_abstract->element()->BuildType(); + + if ((tensor_1_type_ptr->type_id() != tensor_3_type_ptr->type_id()) || + (tensor_2_type_ptr->type_id() != tensor_3_type_ptr->type_id())) { + return nullptr; + } + + std::vector tensor_out_shape = tensor_3_abstract->shape()->shape(); + + int data_out_size = 1; + for (auto it : tensor_out_shape) { + data_out_size *= it; + } + if ((tensor_ptr_1->DataSize() > 1) && (tensor_ptr_1->DataSize() != data_out_size)) { + return nullptr; + } + if ((tensor_ptr_2->DataSize() > 1) && (tensor_ptr_2->DataSize() != data_out_size)) { + return nullptr; + } + + void *data_out; + + if ((tensor_3_type_ptr->type_id() == TypeId::kNumberTypeFloat32) || + (tensor_3_type_ptr->type_id() == TypeId::kNumberTypeFloat)) { + Multiply(tensor_ptr_1->data_c(), tensor_ptr_1->DataSize(), tensor_ptr_2->data_c(), + tensor_ptr_2->DataSize(), &data_out, data_out_size); + } else { + if (tensor_3_type_ptr->type_id() == TypeId::kNumberTypeFloat64) { + Multiply(tensor_ptr_1->data_c(), tensor_ptr_1->DataSize(), tensor_ptr_2->data_c(), + tensor_ptr_2->DataSize(), &data_out, data_out_size); + } else { + if ((tensor_3_type_ptr->type_id() == TypeId::kNumberTypeInt32) || + (tensor_3_type_ptr->type_id() == TypeId::kNumberTypeInt)) { + Multiply(tensor_ptr_1->data_c(), tensor_ptr_1->DataSize(), tensor_ptr_2->data_c(), + tensor_ptr_2->DataSize(), &data_out, data_out_size); + } else { + // Un-support data types + return nullptr; + } + } + } + + auto new_tensor_ptr = std::make_shared(tensor_3_type_ptr->type_id(), tensor_out_shape); + size_t mem_size = GetTypeByte(tensor_3_type_ptr) * IntToSize(new_tensor_ptr->ElementsNum()); + char *data = reinterpret_cast(new_tensor_ptr->data_c(true)); + memcpy(data, data_out, mem_size); + + auto new_vnode = NewValueNode(new_tensor_ptr); + new_vnode->set_abstract(new_tensor_ptr->ToAbstract()); + return new_vnode; + } + AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override { Reset(); // {prim::kPrimMul, Tensor1, {...}} AnfVisitor::Match(prim::kPrimMul, {IsNode, IsNode})(node); - if (vnode_ == nullptr || cnode_ == nullptr) { + if (vnode_ == nullptr || c_p_node_ == nullptr) { return nullptr; } + + if (!IsCNode(c_p_node_)) { + return nullptr; + } + auto tensor1 = vnode_; - auto mul = cnode_; + auto mul = c_p_node_->cast(); Reset(); // {prim::kPrimMul, Tensor2, {...}} AnfVisitor::Match(prim::kPrimMul, {IsNode, IsNode})(mul); - if (vnode_ == nullptr || cnode_ == nullptr) { + if (vnode_ == nullptr || c_p_node_ == nullptr) { return nullptr; } auto tensor2 = vnode_; - auto cnode = cnode_; + auto c_p_node = c_p_node_; auto PrimMul = GetValueNode(mul->input(0)); auto fg = node->func_graph(); - auto ttmul = NewCNode({NewValueNode(PrimMul), tensor1, tensor2}, fg); - return NewCNode({NewValueNode(PrimMul), cnode, ttmul}, fg); + + auto new_mul_tensor = MulConstantTensors(tensor1, tensor2, c_p_node); + if (new_mul_tensor == nullptr) { + auto ttmul = NewCNode({NewValueNode(PrimMul), tensor1, tensor2}, fg); + return NewCNode({NewValueNode(PrimMul), c_p_node, ttmul}, fg); + } + return NewCNode({NewValueNode(PrimMul), c_p_node, new_mul_tensor}, fg); } void Visit(const AnfNodePtr &node) override { @@ -213,19 +550,40 @@ class ConstantDuplicateMul : public AnfVisitor { vnode_ = node; } - if (IsCNode(node)) { - cnode_ = node->cast(); + if (IsCNode(node) || IsParam(node)) { + c_p_node_ = node; } } void Reset() { vnode_ = nullptr; - cnode_ = nullptr; + c_p_node_ = nullptr; } private: AnfNodePtr vnode_; - CNodePtr cnode_; + AnfNodePtr c_p_node_; +}; + +class PowerOneEliminate : public AnfVisitor { + public: + AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override { + if (!IsPrimitiveCNode(node, prim::kPrimPow) || node->func_graph() == nullptr) { + return nullptr; + } + + auto &inputs = node->cast()->inputs(); + if (!IsValueNode(inputs[2])) { + return nullptr; + } + auto scalar = GetValueNode(inputs[2]); + if (scalar->isa() && GetValue(scalar) == 1.0) { + return inputs[1]; + } else if (scalar->isa() && GetValue(scalar) == 1) { + return inputs[1]; + } + return nullptr; + } }; // grad = AllReduce(grad) / worker_number @@ -341,17 +699,21 @@ class ArithmeticSimplify { public: ArithmeticSimplify() : multiply_by_zero_or_one_(), + tensor_multiply_by_zero_or_one_(), add_by_zero_(), tensor_add_by_zero_(), identity_(prim::kPrimIdentity), opt_update_zero_tensor_(), - constant_duplicate_mul_() { + constant_duplicate_mul_(), + power_one_() { eliminaters_.emplace_back(multiply_by_zero_or_one_); + eliminaters_.emplace_back(tensor_multiply_by_zero_or_one_); eliminaters_.emplace_back(add_by_zero_); eliminaters_.emplace_back(tensor_add_by_zero_); eliminaters_.emplace_back(identity_); eliminaters_.emplace_back(opt_update_zero_tensor_); eliminaters_.emplace_back(constant_duplicate_mul_); + eliminaters_.emplace_back(power_one_); } ~ArithmeticSimplify() = default; @@ -368,11 +730,13 @@ class ArithmeticSimplify { private: MultiplyByZeroOrOne multiply_by_zero_or_one_; + TensorMultiplyByZeroOrOne tensor_multiply_by_zero_or_one_; AddByZero add_by_zero_; TensorAddByZero tensor_add_by_zero_; PrimEliminater identity_; OptUpdateZeroTensor opt_update_zero_tensor_; ConstantDuplicateMul constant_duplicate_mul_; + PowerOneEliminate power_one_; std::vector eliminaters_{}; }; } // namespace irpass diff --git a/mindspore/ccsrc/optimizer/irpass/incorporate_getitem.h b/mindspore/ccsrc/optimizer/irpass/incorporate_getitem.h index 5b973dc3346..5afee45e95f 100644 --- a/mindspore/ccsrc/optimizer/irpass/incorporate_getitem.h +++ b/mindspore/ccsrc/optimizer/irpass/incorporate_getitem.h @@ -21,6 +21,7 @@ #include #include #include +#include #include "optimizer/irpass.h" #include "optimizer/optimizer.h" @@ -28,7 +29,6 @@ #include "ir/func_graph.h" #include "ir/func_graph_cloner.h" #include "operator/ops.h" - namespace mindspore { namespace opt { namespace irpass { @@ -81,13 +81,32 @@ class IncorporateGetitem : public AnfVisitor { AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override { Reset(); AnfVisitor::Match(prim::kPrimTupleGetItem, {IsCNode, IsValueNode})(node); - - if (node->func_graph() != nullptr && idx_ >= 0 && fg_ != nullptr) { - auto new_fg = getitem_transform_(fg_, idx_); - (void)args_.insert(args_.begin(), NewValueNode(new_fg)); - return node->func_graph()->NewCNode(args_); + if (node->func_graph() == nullptr || idx_ == -1 || fg_ == nullptr) { + return nullptr; } - return nullptr; + + if (fg_->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)) { + // If graph kernel has muti output, do not split. + // some graph kernel output has EnvInstance node or DeadCode node should split. + auto output = fg_->output(); + if (IsPrimitiveCNode(output, prim::kPrimMakeTuple)) { + auto output_cnode = output->cast(); + auto outputs = output_cnode->inputs(); + int real_output_cnt = 0; + for (size_t i = 1; i < outputs.size(); ++i) { + if (IsCNode(outputs[i]) || IsValueNode(outputs[i]) || IsParam(outputs[i])) { + real_output_cnt++; + if (real_output_cnt > 1) { + return nullptr; + } + } + } + } + } + + auto new_fg = getitem_transform_(fg_, idx_); + (void)args_.insert(args_.begin(), NewValueNode(new_fg)); + return node->func_graph()->NewCNode(args_); } void Visit(const CNodePtr &cnode) override { @@ -115,6 +134,172 @@ class IncorporateGetitem : public AnfVisitor { internal::GetitemTransform getitem_transform_; }; +class IncorporateGetitemFromParam : public AnfVisitor { + public: + void Process(const FuncGraphPtr &func_graph, const CNodePtr &cnode, const AnfNodePtr ¶m, size_t input_idx) { + auto mng = func_graph->manager(); + MS_EXCEPTION_IF_NULL(mng); + auto &node_users = mng->node_users(); + if (node_users.find(param) == node_users.end() || node_users[param].empty()) { + args_.push_back(cnode->input(input_idx + 1)); + return; + } + + for (auto &user : node_users[param]) { + if (!IsPrimitiveCNode(user.first, prim::kPrimTupleGetItem)) { + // we do not process this case. + args_.push_back(cnode->input(input_idx + 1)); + return; + } + } + + // update new args. + if (IsPrimitiveCNode(cnode->input(input_idx + 1), prim::kPrimMakeTuple)) { + // case 1 + replace_parameters_[input_idx] = true; + need_update_ = true; + auto make_tuple_cnode = cnode->input(input_idx + 1)->cast(); + auto &make_tuple_cnode_inputs = make_tuple_cnode->inputs(); + inputs_num_[input_idx] = make_tuple_cnode_inputs.size() - 1; + args_.insert(args_.end(), make_tuple_cnode_inputs.begin() + 1, make_tuple_cnode_inputs.end()); + } else { + // case 2 + auto prev_cnode = cnode->input(input_idx + 1)->cast(); + auto prev_fg = GetValueNode(prev_cnode->input(0)); + auto fg_output = prev_fg->output(); + if (!IsPrimitiveCNode(fg_output, prim::kPrimMakeTuple)) { + MS_LOG(ERROR) << "The return of: " << prev_fg->ToString() + << " should be a make tuple, but got: " << fg_output->DebugString(); + return; + } + replace_parameters_[input_idx] = true; + need_update_ = true; + auto make_tuple_cnode = fg_output->cast(); + inputs_num_[input_idx] = make_tuple_cnode->inputs().size() - 1; + for (size_t output_i = 0; output_i < inputs_num_[input_idx]; ++output_i) { + auto new_getitem = + func_graph->NewCNode({NewValueNode(prim::kPrimTupleGetItem), prev_cnode, NewValueNode(SizeToInt(output_i))}); + auto aptr = std::make_shared(std::make_shared(SizeToInt(output_i))); + new_getitem->input(2)->set_abstract(aptr); + new_getitem->set_abstract(make_tuple_cnode->input(output_i + 1)->abstract()); + args_.push_back(new_getitem); + } + } + } + + AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override { + if (node->func_graph() == nullptr) { + return nullptr; + } + + Reset(); + + auto cnode = node->cast(); + if (cnode == nullptr) { + return nullptr; + } + auto &inputs = cnode->inputs(); + auto fg = GetValueNode(inputs[0]); + if (fg == nullptr) { + return nullptr; + } + auto mng = fg->manager(); + MS_EXCEPTION_IF_NULL(mng); + auto parameters = fg->parameters(); + if (parameters.size() != inputs.size() - 1) { + return nullptr; + } + replace_parameters_ = std::vector(parameters.size(), false); + inputs_num_ = std::vector(parameters.size(), 1); + auto node_fg = node->func_graph(); + + for (size_t i = 1; i < inputs.size(); ++i) { + if (IsPrimitiveCNode(inputs[i], prim::kPrimMakeTuple) || IsCNodeGraphKernel(inputs[i])) { + Process(node_fg, cnode, parameters[i - 1], i - 1); + } else { + args_.push_back(inputs[i]); + } + } + + if (!need_update_) { + return nullptr; + } + + FuncGraphPtr new_fg = TransformableClone(fg, std::make_shared("sp")); + mng->AddFuncGraph(new_fg); + + auto node_users = mng->node_users(); + std::vector new_fg_parameters = new_fg->parameters(); + std::vector new_parameters; + size_t curr_input_idx{0}; + for (size_t param_i = 0; param_i < new_fg_parameters.size(); ++param_i) { + if (!replace_parameters_[param_i]) { + if (parameters[param_i]->abstract() != nullptr) { + new_fg_parameters[param_i]->set_abstract(parameters[param_i]->abstract()); + } + new_parameters.push_back(new_fg_parameters[param_i]); + curr_input_idx++; + continue; + } + + // make a new parameter. + for (size_t input_i = 0; input_i < inputs_num_[param_i]; ++input_i) { + auto new_param = std::make_shared(new_fg); + new_param->set_abstract(args_.at(curr_input_idx)->abstract()); + + // update users of new parameter. + for (auto &user : node_users[new_fg_parameters[param_i]]) { + idx_ = -1; + AnfVisitor::Match(prim::kPrimTupleGetItem, {IsParam, IsValueNode})(user.first); + if (idx_ == -1) { + MS_LOG(ERROR) << "User of: " << new_fg_parameters[param_i]->DebugString() + << " must be tuple getitem here, but got: " << user.first->DebugString(); + return nullptr; + } + + if (input_i == IntToSize(idx_)) { + for (auto &sub_user : node_users[user.first]) { + auto sub_user_cnode = sub_user.first->cast(); + MS_EXCEPTION_IF_NULL(sub_user_cnode); + sub_user_cnode->set_input(sub_user.second, new_param); + (void)mng->Replace(sub_user.first, sub_user_cnode); + } + } + } + + // (void)mng->Replace(new_fg_parameters[param_i], new_param); + new_parameters.push_back(new_param); + curr_input_idx++; + } + } + + mng->SetParameters(new_fg, new_parameters); + (void)args_.insert(args_.begin(), NewValueNode(new_fg)); + auto new_call = node_fg->NewCNode(args_); + new_call->set_abstract(node->abstract()); + return new_call; + } + + void Visit(const ValueNodePtr &vnode) override { idx_ = GetValue(vnode->value()); } + + void Visit(const CNodePtr &cnode) override {} + + void Reset() { + replace_parameters_.clear(); + args_.clear(); + inputs_num_.clear(); + need_update_ = false; + idx_ = -1; + } + + private: + std::vector replace_parameters_{}; + std::vector args_{}; + std::vector inputs_num_{}; + bool need_update_{false}; + int idx_{-1}; +}; + // {prim::kPrimTupleGetItem, {{prim::kPrimSwitch, X, G1, G2}, Xs}, C} class IncorporateGetitemSwitch : public AnfVisitor { public: diff --git a/mindspore/ccsrc/optimizer/irpass/inline.h b/mindspore/ccsrc/optimizer/irpass/inline.h index 854b568453e..64f192347cd 100644 --- a/mindspore/ccsrc/optimizer/irpass/inline.h +++ b/mindspore/ccsrc/optimizer/irpass/inline.h @@ -86,20 +86,10 @@ bool IsUniqueUse(const FuncGraphPtr &fg, AnfNodePtr) { bool IsInside(FuncGraphPtr, const AnfNodePtr &node) { MS_EXCEPTION_IF_NULL(node->func_graph()); - auto &flags = node->func_graph()->flags(); - if (flags.find("inline_inside") != flags.end()) { - return flags["inline_inside"]; - } - return false; + return node->func_graph()->has_flag("inline_inside"); } -bool IsCore(const FuncGraphPtr &fg, AnfNodePtr) { - auto &flags = fg->flags(); - if (flags.find("core") != flags.end()) { - return flags["core"]; - } - return false; -} +bool IsCore(const FuncGraphPtr &fg, AnfNodePtr) { return fg->has_flag("core"); } bool NoCriterion(FuncGraphPtr, AnfNodePtr) { return true; } @@ -123,6 +113,13 @@ class InlinerBase : public AnfVisitor { if (fg->has_flag(FUNC_GRAPH_FLAG_DEFER_INLINE)) { return nullptr; } + // Do not inline GraphKernel to Cell. + if (fg->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL) && !node->func_graph()->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)) { + // If the GraphKernel only contains a return node, we make it inlined. + if (fg->nodes().size() - fg->parameters().size() > 1) { + return nullptr; + } + } Reset(); bool is_match = false; diff --git a/mindspore/ccsrc/optimizer/irpass/mark_interface_fusion.h b/mindspore/ccsrc/optimizer/irpass/mark_interface_fusion.h new file mode 100644 index 00000000000..6f2bcc187fb --- /dev/null +++ b/mindspore/ccsrc/optimizer/irpass/mark_interface_fusion.h @@ -0,0 +1,86 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_OPTIMIZER_IRPASS_MARK_INTERFACE_FUSION_H +#define MINDSPORE_CCSRC_OPTIMIZER_IRPASS_MARK_INTERFACE_FUSION_H + +#include +#include +#include + +#include "session/anf_runtime_algorithm.h" +#include "optimizer/optimizer.h" +#include "optimizer/irpass.h" +#include "ir/visitor.h" +#include "operator/ops.h" +#include "utils/graph_utils.h" +#include "operator/composite/composite.h" + +namespace mindspore { +namespace opt { +namespace irpass { + +static int count = 0; + +std::string GetFusionNumber() { + std::stringstream ss; + ss << std::setw(4) << std::setfill('0') << count; + std::string num = ss.str(); + ++count; + + return "_" + num; +} + +// Mark CNodes which can be merged in kernel build +class MarkInterfaceFusion : public AnfVisitor { + public: + AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override { + if (node->func_graph()->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL) && IsPrimitiveCNode(node, prim::kPrimSelect)) { + auto cnode = node->cast(); + auto condition = cnode->input(1); + std::string cmp; + std::unordered_map cmp_list = {{"GreaterEqual", "GE"}, {"Greater", "GT"}, + {"LessEqual", "LE"}, {"Less", "LT"}, + {"Equal", "EQ"}, {"NotEqual", "NE"}}; + if (IsPrimitiveCNode(condition)) { + auto prim_name = GetCNodeFuncName(condition->cast()); + if (cmp_list.count(prim_name) != 0) { + // Mark Select and compare node + cmp = cmp_list[prim_name]; + auto cnt = GetFusionNumber(); + AnfAlgo::SetNodeAttr("fusion", MakeValue("Select" + cmp + cnt), condition); + AnfAlgo::SetNodeAttr("fusion", MakeValue("Select" + cmp + cnt + "_end"), node); + for (size_t i = 1; i < cnode->inputs().size(); ++i) { + if (IsPrimitiveCNode(cnode->input(i), prim::kPrimZerosLike)) { + AnfAlgo::SetNodeAttr("fusion", MakeValue("Select" + cmp + cnt), cnode->input(i)); + } + } + } + } + } + return nullptr; + } + + void Visit(const AnfNodePtr &) override {} + + private: + AnfNodePtr y_{nullptr}; +}; + +} // namespace irpass +} // namespace opt +} // namespace mindspore +#endif // MINDSPORE_CCSRC_OPTIMIZER_IRPASS_MARK_INTERFACE_FUSION_H diff --git a/mindspore/ccsrc/optimizer/irpass/merge_addn.h b/mindspore/ccsrc/optimizer/irpass/merge_addn.h index 35162ce4fe7..94f9e26c5b4 100644 --- a/mindspore/ccsrc/optimizer/irpass/merge_addn.h +++ b/mindspore/ccsrc/optimizer/irpass/merge_addn.h @@ -19,6 +19,7 @@ #include #include +#include #include "optimizer/irpass.h" #include "optimizer/optimizer.h" @@ -196,6 +197,131 @@ class AddNZeroFilter : public AnfVisitor { std::vector filtered_Xs_{}, Xs_{}; bool has_zero_like_{false}; }; + +// {PrimAddN, {kPrimMakeTuple, Xs}} +// Akg don't support AddN(ValueNode, Tensor, ...), converted to TensorAdd. +// case0: AddN(inputs)(inputs size < 2) -> error +// case1: AddN(inputs)(all inputs is ValueNode) -> error +// case2: AddN(inputs)(inputs size = 2) -> TensorAdd(Tensor, Tensor) +// case3: AddN(ValueNode, Tensor, Tensor, ...)(has one ValueNode input) +// -> TensorAdd(ValueNode, AddN(Tensor, Tensor, ...)) +class AddNEliminater : public AnfVisitor { + public: + AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override { + if (!node->isa() || node->func_graph() == nullptr) { + return nullptr; + } + + auto &inputs = node->cast()->inputs(); + auto fg = GetValueNode(inputs[0]); + MS_EXCEPTION_IF_NULL(fg); + auto mng = fg->manager(); + MS_EXCEPTION_IF_NULL(mng); + if (fg->recursive()) { + return nullptr; + } + + auto new_fg = TransformableClone(fg, std::make_shared("fg")); + mng->AddFuncGraph(new_fg); + need_update_ = false; + bool changed = false; + do { + changed = false; + changed |= Process(new_fg); + } while (changed); + + if (!need_update_) { + return nullptr; + } else { + auto new_sx = inputs; + new_sx[0] = NewValueNode(new_fg); + return node->func_graph()->NewCNode(new_sx); + } + } + + bool Process(const FuncGraphPtr &func_graph) { + auto mng = func_graph->manager(); + MS_EXCEPTION_IF_NULL(mng); + auto nodes = TopoSort(func_graph->output()); + bool changed = false; + + for (size_t i = 0; i < nodes.size(); ++i) { + auto node = nodes[i]; + if (!IsPrimitiveCNode(node, prim::kPrimAddN)) { + continue; + } + + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + auto &tuple_input = cnode->input(1); + MS_EXCEPTION_IF_NULL(tuple_input); + auto tuple_input_cnode = tuple_input->cast(); + MS_EXCEPTION_IF_NULL(tuple_input_cnode); + auto &tuple_inputs = tuple_input_cnode->inputs(); + if (tuple_inputs.size() < 3) { + // case0: inputs size < 2, error + MS_EXCEPTION(ArgumentError) << "Inputs size of AddN less than 2. " << cnode->DebugString(2); + } + + int valuenode_num = + std::accumulate(tuple_inputs.begin() + 1, tuple_inputs.end(), 0, [](int accumulator, const AnfNodePtr &node) { + if (IsValueNode(node)) { + return accumulator + 1; + } else { + return accumulator; + } + }); + if (IntToSize(valuenode_num) == tuple_inputs.size()) { + // case1: all inputs is ValueNode, error + MS_EXCEPTION(ArgumentError) << "All inputs of AddN is ValueNode. " << cnode->DebugString(2); + } + + if (tuple_inputs.size() == 3) { + // case2: inputs size = 2, -> TensorAdd(Tensor, Tensor) + MS_LOG(DEBUG) << "Replace AddN with two inputs with TensorAdd. " << cnode->DebugString(2); + ValuePtr prim_tensoradd = prim::GetPythonOps("TensorAdd", "mindspore.ops.operations"); + std::vector new_xs{func_graph->NewCNode({NewValueNode(prim_tensoradd)}), tuple_inputs[1], + tuple_inputs[2]}; + mng->Replace(node, func_graph->NewCNode(new_xs)); + changed = true; + continue; + } + + auto first_valuenode = std::find_if(tuple_inputs.begin() + 1, tuple_inputs.end(), + [](const AnfNodePtr &node) { return IsValueNode(node); }); + if (first_valuenode == tuple_inputs.end()) { + // no ValueNode input found. + continue; + } else { + // case3: has one ValueNode input -> TensorAdd(ValueNode, AddN(Tensor, Tensor, ...)) + std::vector make_tuple_new_xs{ + NewValueNode(prim::kPrimMakeTuple), + }; + std::for_each(tuple_inputs.begin() + 1, tuple_inputs.end(), + [&make_tuple_new_xs, &first_valuenode](const AnfNodePtr &node) { + if (node != *first_valuenode) { + make_tuple_new_xs.push_back(node); + } + }); + ValuePtr prim_addn = prim::GetPythonOps("AddN", "mindspore.ops.operations"); + auto new_addn = func_graph->NewCNode( + {func_graph->NewCNode({NewValueNode(prim_addn)}), func_graph->NewCNode(make_tuple_new_xs)}); + ValuePtr prim_tensoradd = prim::GetPythonOps("TensorAdd", "mindspore.ops.operations"); + auto new_add = + func_graph->NewCNode({func_graph->NewCNode({NewValueNode(prim_tensoradd)}), *first_valuenode, new_addn}); + (void)mng->Replace(node, new_add); + changed = true; + continue; + } + } + + need_update_ |= changed; + return changed; + } + + private: + bool need_update_{false}; +}; } // namespace irpass } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/optimizer/irpass/reduce_eliminate.h b/mindspore/ccsrc/optimizer/irpass/reduce_eliminate.h index 73dbc152e53..d2e1d15f913 100644 --- a/mindspore/ccsrc/optimizer/irpass/reduce_eliminate.h +++ b/mindspore/ccsrc/optimizer/irpass/reduce_eliminate.h @@ -79,7 +79,7 @@ class ReduceOneEliminater : public AnfVisitor { } void Visit(const AnfNodePtr &node) override { - if (x_ == nullptr) { + if (!IsVNode(node) && x_ == nullptr) { if (IsValueNode(node)) { is_tensor_ = true; } diff --git a/mindspore/ccsrc/optimizer/irpass/ref_eliminate.h b/mindspore/ccsrc/optimizer/irpass/ref_eliminate.h index ab4f9bc32ea..8d700ec7f8c 100644 --- a/mindspore/ccsrc/optimizer/irpass/ref_eliminate.h +++ b/mindspore/ccsrc/optimizer/irpass/ref_eliminate.h @@ -23,6 +23,8 @@ #include "optimizer/irpass.h" #include "ir/visitor.h" #include "operator/ops.h" +#include "utils/graph_utils.h" +#include "operator/composite/composite.h" namespace mindspore { namespace opt { @@ -36,6 +38,7 @@ class MakeRefEliminater : public AnfVisitor { this->y_ = node; return true; }; + AnfVisitor::Match(prim::kPrimMakeRef, {IsNode, gety, IsNode})(node); return y_; } diff --git a/mindspore/ccsrc/optimizer/irpass/special_op_eliminate.h b/mindspore/ccsrc/optimizer/irpass/special_op_eliminate.h index ed4ac241487..1dc8fbb344d 100644 --- a/mindspore/ccsrc/optimizer/irpass/special_op_eliminate.h +++ b/mindspore/ccsrc/optimizer/irpass/special_op_eliminate.h @@ -142,7 +142,7 @@ class ResetDeferInline : public AnfVisitor { AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override { if (IsValueNode(node)) { auto fg = GetValueNode(node); - fg->set_flags(FUNC_GRAPH_FLAG_DEFER_INLINE, false); + fg->set_flag(FUNC_GRAPH_FLAG_DEFER_INLINE, false); } return nullptr; } diff --git a/mindspore/ccsrc/optimizer/irpass/specialize_transform.h b/mindspore/ccsrc/optimizer/irpass/specialize_transform.h index 905479df776..6ac4e40f5ef 100644 --- a/mindspore/ccsrc/optimizer/irpass/specialize_transform.h +++ b/mindspore/ccsrc/optimizer/irpass/specialize_transform.h @@ -22,6 +22,7 @@ #include #include #include +#include #include "optimizer/irpass.h" #include "optimizer/optimizer.h" @@ -41,7 +42,7 @@ class SpecializeTransform { ~SpecializeTransform() = default; FuncGraphPtr operator()(const FuncGraphPtr &func_graph, std::vector graph_args, - std::vector prim_args) { + std::vector prim_args, std::vector value_args) { if (cache_.count(func_graph) == 0) { cache_[func_graph] = {}; } @@ -69,6 +70,13 @@ class SpecializeTransform { (void)mng->Replace(params[i], arg); continue; } + if (value_args[i] != nullptr) { + auto const_tensor = *value_args[i]; + auto const_tensor_ptr = std::make_shared(const_tensor); + AnfNodePtr arg = NewValueNode(const_tensor_ptr); + (void)mng->Replace(params[i], arg); + continue; + } new_params.push_back(params[i]); } @@ -108,6 +116,7 @@ class SpecializeOnGraphArguments : public AnfVisitor { std::vector graph_args; std::vector prim_args; + std::vector value_node_args; std::vector new_xs; bool hasVNode = false; for (size_t i = 1; i < inputs.size(); i++) { @@ -115,15 +124,24 @@ class SpecializeOnGraphArguments : public AnfVisitor { auto fg_vnode = GetValueNode(inputs[i]); graph_args.push_back(fg_vnode); prim_args.emplace_back(nullptr); + value_node_args.emplace_back(nullptr); hasVNode = true; } else if (IsValueNode(inputs[i])) { auto p_vnode = GetValueNode(inputs[i]); graph_args.emplace_back(nullptr); prim_args.push_back(p_vnode); + value_node_args.emplace_back(nullptr); + hasVNode = true; + } else if (IsValueNode(inputs[i])) { + tensor::TensorPtr t_vnode = GetValueNode(inputs[i]); + graph_args.emplace_back(nullptr); + prim_args.emplace_back(nullptr); + value_node_args.emplace_back(t_vnode); hasVNode = true; } else { graph_args.emplace_back(nullptr); prim_args.emplace_back(nullptr); + value_node_args.emplace_back(nullptr); new_xs.push_back(inputs[i]); } } @@ -132,7 +150,7 @@ class SpecializeOnGraphArguments : public AnfVisitor { return nullptr; } - auto new_fg = specialize_transform_(inp0_fg, graph_args, prim_args); + auto new_fg = specialize_transform_(inp0_fg, graph_args, prim_args, value_node_args); (void)new_xs.insert(new_xs.begin(), NewValueNode(new_fg)); return node->func_graph()->NewCNode(new_xs); @@ -141,6 +159,146 @@ class SpecializeOnGraphArguments : public AnfVisitor { private: internal::SpecializeTransform specialize_transform_; }; + +// Eliminate unused parameters. +// {G, Xs} +class UnusedParasEliminater : public AnfVisitor { + public: + AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override { + if (!node->isa() || node->func_graph() == nullptr) { + return nullptr; + } + + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + auto &inputs = cnode->inputs(); + auto fg = GetValueNode(inputs[0]); + MS_EXCEPTION_IF_NULL(fg); + + std::vector parameters = fg->parameters(); + size_t size = parameters.size(); + if (size != inputs.size() - 1) { + return nullptr; + } + + std::vector new_xs; + std::vector keep_parameters; + auto mng = fg->manager(); + MS_EXCEPTION_IF_NULL(mng); + auto &node_users = mng->node_users(); + bool has_unused_para = false; + for (size_t i = 0; i < size; ++i) { + auto iter = node_users.find(parameters[i]); + if (iter != node_users.end() && !iter->second.empty()) { + keep_parameters.push_back(true); + new_xs.push_back(inputs[i + 1]); + continue; + } + keep_parameters.push_back(false); + has_unused_para = true; + } + + if (!has_unused_para) { + return nullptr; + } + FuncGraphPtr new_fg = TransformableClone(fg, std::make_shared("sp")); + mng->AddFuncGraph(new_fg); + + std::vector new_fg_parameters = new_fg->parameters(); + std::vector new_parameters; + for (size_t i = 0; i < size; i++) { + if (keep_parameters[i]) { + if (parameters[i]->abstract() != nullptr) { + new_fg_parameters[i]->set_abstract(parameters[i]->abstract()); + } + new_parameters.push_back(new_fg_parameters[i]); + } + } + mng->SetParameters(new_fg, new_parameters); + + (void)new_xs.insert(new_xs.begin(), NewValueNode(new_fg)); + return node->func_graph()->NewCNode(new_xs); + } +}; + +// Eliminate unused outputs. +// {G, Xs} +class UnusedOutputEliminater : public AnfVisitor { + public: + AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override { + if (!node->isa() || node->func_graph() == nullptr) { + return nullptr; + } + + auto &inputs = node->cast()->inputs(); + auto fg = GetValueNode(inputs[0]); + MS_EXCEPTION_IF_NULL(fg); + auto mng = fg->manager(); + MS_EXCEPTION_IF_NULL(mng); + if (fg->recursive()) { + return nullptr; + } + + auto new_fg = TransformableClone(fg, std::make_shared("fg")); + mng->AddFuncGraph(new_fg); + auto new_fg_output = new_fg->output(); + if (!IsPrimitiveCNode(new_fg_output, prim::kPrimMakeTuple)) { + return nullptr; + } + + auto output_cnode = new_fg_output->cast(); + auto &node_users = mng->node_users(); + if (node_users.count(node) == 0 || node_users[node].empty()) { + return nullptr; + } + std::unordered_set used_output_idx; + std::vector> all_users; + for (auto &node_user : node_users[node]) { + if (!IsPrimitiveCNode(node_user.first, prim::kPrimTupleGetItem)) { + return nullptr; + } + auto user_cnode = node_user.first->cast(); + size_t used_idx = GetValue(user_cnode->input(2)->cast()->value()); + used_output_idx.insert(used_idx); + all_users.push_back(std::make_pair(node_user.first, used_idx)); + } + + if (used_output_idx.size() >= output_cnode->inputs().size() - 1) { + // all output has users. + return nullptr; + } + + if (used_output_idx.empty()) { + // we do not process this case. + return nullptr; + } else if (used_output_idx.size() == 1) { + // after eliminate, only one output left. + new_fg->set_output(output_cnode->input(*used_output_idx.begin() + 1)); + // update users. + for (auto &ret_user : all_users) { + (void)mng->Replace(ret_user.first, node); + } + } else { + // after eliminate, create new multi output. + std::vector new_output_inputs{output_cnode->input(0)}; + std::unordered_map new_idx_map; + for (auto idx : used_output_idx) { + new_idx_map[idx] = SizeToInt(new_output_inputs.size() - 1); + new_output_inputs.push_back(output_cnode->input(idx + 1)); + } + new_fg->set_output(new_fg->NewCNode(new_output_inputs)); + // update users. + for (auto &ret_user : all_users) { + auto ret_user_cnode = ret_user.first->cast(); + ret_user_cnode->set_input(2, NewValueNode(new_idx_map[ret_user.second])); + } + } + + auto new_sx = inputs; + new_sx[0] = NewValueNode(new_fg); + return node->func_graph()->NewCNode(new_sx); + } +}; } // namespace irpass } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/optimizer/optimizer.h b/mindspore/ccsrc/optimizer/optimizer.h index 805543f45b6..3e77edc1e9c 100644 --- a/mindspore/ccsrc/optimizer/optimizer.h +++ b/mindspore/ccsrc/optimizer/optimizer.h @@ -89,7 +89,7 @@ using OptPassGroupMap = std::vector>; class Optimizer : public std::enable_shared_from_this { public: Optimizer(const std::string &name, const pipeline::ResourceBasePtr &resource_ptr) - : name_(name), resource_(resource_ptr), run_only_once_(false), is_watch_renormalize_(false) {} + : name_(name), resource_(resource_ptr), run_only_once_(false), is_watch_renormalize_(false), is_enable_(true) {} virtual ~Optimizer() = default; void Init(const OptPassGroupMap &passes, bool run_only_once) { @@ -132,6 +132,9 @@ class Optimizer : public std::enable_shared_from_this { } FuncGraphPtr step(FuncGraphPtr func_graph, bool use_profile = true) { + if (!is_enable_) { + return func_graph; + } // Optimizer step counter; int counter = -1; bool changes = true; @@ -171,7 +174,7 @@ class Optimizer : public std::enable_shared_from_this { }; use_profile ? (WITH(MsProfile::GetProfile()->Step(pass_names_[i])) opt_func) : opt_func(); if (IS_OUTPUT_ON(mindspore::DEBUG) && MsContext::GetInstance()->save_graphs_flag()) { - MS_LOG(DEBUG) << name_ << " round " << counter << " OptPass " << pass_names_[i] << " end."; + MS_LOG(DEBUG) << "The opt " << name_ << " round " << counter << " OptPass " << pass_names_[i] << " end."; auto fg_name = "opt_substep_" + name_ + "_r" + std::to_string(counter) + "_" + std::to_string(i) + "_" + pass_names_[i]; func_graph->DumpFuncGraph(fg_name); @@ -211,6 +214,7 @@ class Optimizer : public std::enable_shared_from_this { void enable_watch_renormalize() { is_watch_renormalize_ = true; } void disable_watch_renormalize() { is_watch_renormalize_ = false; } bool is_watch_renormalize() { return is_watch_renormalize_; } + void set_enable(bool enable) { is_enable_ = enable; } private: const std::string name_; @@ -220,6 +224,7 @@ class Optimizer : public std::enable_shared_from_this { bool run_only_once_; std::vector untyped_nodes_; bool is_watch_renormalize_; + bool is_enable_; }; } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/parallel/allreduce_fusion/step_allreduce_fusion.cc b/mindspore/ccsrc/parallel/allreduce_fusion/step_allreduce_fusion.cc index 687bc12f052..999c4a85a90 100644 --- a/mindspore/ccsrc/parallel/allreduce_fusion/step_allreduce_fusion.cc +++ b/mindspore/ccsrc/parallel/allreduce_fusion/step_allreduce_fusion.cc @@ -64,7 +64,7 @@ bool StepAllreduceFusion(const FuncGraphPtr &root, const opt::OptimizerPtr &opti DumpGraph(root, std::string(ALLREDUCE_FUSION_END)); // allreduce fusion only run once - root->flags()[ALLREDUCE_FUSION_RUN_ONCE_ONLY] = true; + root->set_flag(ALLREDUCE_FUSION_RUN_ONCE_ONLY, true); res->results()[pipeline::kStepParallelGraph] = root; #if defined(_WIN32) || defined(_WIN64) auto end_time = std::chrono::steady_clock::now(); diff --git a/mindspore/ccsrc/parallel/context.cc b/mindspore/ccsrc/parallel/context.cc index 6802292cb46..8957dc842c4 100644 --- a/mindspore/ccsrc/parallel/context.cc +++ b/mindspore/ccsrc/parallel/context.cc @@ -158,8 +158,8 @@ void ParallelParameterContextRestoreInNoTraining(const FuncGraphPtr &func_graph, MS_EXCEPTION_IF_NULL(func_graph); MS_EXCEPTION_IF_NULL(param_node); MS_EXCEPTION_IF_NULL(ptr); - if (!func_graph->has_flag(AUTO_PARALLEL) || (func_graph->flags().count(TRAINING) == 0) || - func_graph->flags()[TRAINING]) { + if (!func_graph->has_flag(AUTO_PARALLEL) || (func_graph->attrs().count(TRAINING) == 0) || + func_graph->has_flag(TRAINING)) { return; } diff --git a/mindspore/ccsrc/parallel/step_auto_parallel.cc b/mindspore/ccsrc/parallel/step_auto_parallel.cc index 429241c8b73..8b4f7e2dec2 100644 --- a/mindspore/ccsrc/parallel/step_auto_parallel.cc +++ b/mindspore/ccsrc/parallel/step_auto_parallel.cc @@ -107,7 +107,7 @@ bool StepAutoParallel(const FuncGraphPtr &root, const opt::OptimizerPtr &) { time += static_cast(end_time.tv_usec - start_time.tv_usec); MS_LOG(INFO) << "Now leaving step auto parallel, used time: " << time << " us"; - root->flags()[AUTO_PARALLEL_RUN_ONCE_ONLY] = true; + root->set_flag(AUTO_PARALLEL_RUN_ONCE_ONLY, true); return changes; } diff --git a/mindspore/ccsrc/parallel/step_parallel.cc b/mindspore/ccsrc/parallel/step_parallel.cc index a5e5dee9907..fc7b48d2679 100644 --- a/mindspore/ccsrc/parallel/step_parallel.cc +++ b/mindspore/ccsrc/parallel/step_parallel.cc @@ -2270,10 +2270,10 @@ bool StepParallel(const FuncGraphPtr &root, const opt::OptimizerPtr &optimizer) (root->has_flag(SEMI_AUTO_PARALLEL_RUN_ONCE_ONLY))) { if (!root->has_flag(CHECK_SET_STRATEGY_VALID_ONCE_ONLY)) { if (HasStrategy(root)) { - MS_LOG(INFO) << "strategies ignored in " << parallel_mode + MS_LOG(INFO) << "Strategies ignored in " << parallel_mode << ", set_strategy() only valid in [semi_]auto_parallel."; } - root->flags()[CHECK_SET_STRATEGY_VALID_ONCE_ONLY] = true; + root->set_flag(CHECK_SET_STRATEGY_VALID_ONCE_ONLY, true); } return changes; @@ -2330,11 +2330,11 @@ bool StepParallel(const FuncGraphPtr &root, const opt::OptimizerPtr &optimizer) DumpGraph(root, std::string(STEP_PARALLEL_END)); // step parallel only run once - root->flags()[SEMI_AUTO_PARALLEL_RUN_ONCE_ONLY] = true; + root->set_flag(SEMI_AUTO_PARALLEL_RUN_ONCE_ONLY, true); res->results()[pipeline::kStepParallelGraph] = root; // in auto parallel mode, no need to check if stategies set - root->flags()[CHECK_SET_STRATEGY_VALID_ONCE_ONLY] = true; + root->set_flag(CHECK_SET_STRATEGY_VALID_ONCE_ONLY, true); (void)gettimeofday(&end_time, nullptr); uint64_t time = kUSecondInSecond * static_cast(end_time.tv_sec - start_time.tv_sec); diff --git a/mindspore/ccsrc/pipeline/init.cc b/mindspore/ccsrc/pipeline/init.cc index b8472de4094..7025447a29c 100644 --- a/mindspore/ccsrc/pipeline/init.cc +++ b/mindspore/ccsrc/pipeline/init.cc @@ -151,7 +151,10 @@ PYBIND11_MODULE(_c_expression, m) { .def("set_check_bprop_flag", &mindspore::MsContext::set_check_bprop_flag, "Set whether to check bprop.") .def("get_max_device_memory", &mindspore::MsContext::max_device_memory, "Get deivce memory max size.") .def("set_max_device_memory", &mindspore::MsContext::set_max_device_memory, "Set deivce memory max size.") - .def("set_print_file_path", &mindspore::MsContext::set_print_file_path, "Set path to print."); + .def("set_print_file_path", &mindspore::MsContext::set_print_file_path, "Set path to print.") + .def("set_enable_graph_kernel", &mindspore::MsContext::set_enable_graph_kernel, + "Set the GraphKernel switch to on or off.") + .def("get_enable_graph_kernel", &mindspore::MsContext::enable_graph_kernel, "Get the value of GraphKernel switch."); (void)py::class_>(m, "MpiConfig") .def_static("get_instance", &mindspore::MpiConfig::GetInstance, "Get mpi config instance.") diff --git a/mindspore/ccsrc/pipeline/parse/data_converter.cc b/mindspore/ccsrc/pipeline/parse/data_converter.cc index 20f7c0c9ced..330d03d11ca 100644 --- a/mindspore/ccsrc/pipeline/parse/data_converter.cc +++ b/mindspore/ccsrc/pipeline/parse/data_converter.cc @@ -278,7 +278,7 @@ bool ConvertCellObjToFuncGraph(py::object obj, ValuePtr *const data) { if (bprop_graph != nullptr) { (void)func_graph->transforms().insert(std::make_pair(CUSTOM_BPROP_NAME, FuncGraphTransform(bprop_graph))); (void)bprop_graph->transforms().insert(std::make_pair("primal", FuncGraphTransform(func_graph))); - func_graph->set_flags(FUNC_GRAPH_FLAG_DEFER_INLINE, true); + func_graph->set_flag(FUNC_GRAPH_FLAG_DEFER_INLINE, true); } } *data = func_graph; diff --git a/mindspore/ccsrc/pipeline/parse/parse.cc b/mindspore/ccsrc/pipeline/parse/parse.cc index 972f11230ee..6d5c28c98c7 100644 --- a/mindspore/ccsrc/pipeline/parse/parse.cc +++ b/mindspore/ccsrc/pipeline/parse/parse.cc @@ -1448,15 +1448,23 @@ bool ParseAst::UpdateFuncGraphFlags(const FuncGraphPtr &func_graph) { } py::dict flags = python_adapter::GetPyObjAttr(obj_, PYTHON_EXTERN_MINDSPORE_FLAG); for (auto &item : flags) { - if (!py::isinstance(item.first) || !py::isinstance(item.second)) { + if (!py::isinstance(item.first)) { MS_LOG(ERROR) << "Type error in flags dict convert"; return false; } auto name = py::cast(item.first); - auto value = py::cast(item.second); - MS_LOG(DEBUG) << "Flag name: " << name << ". Value: " << value; - - func_graph->set_flags(name, value); + if (py::isinstance(item.second)) { + auto value = py::cast(item.second); + MS_LOG(DEBUG) << "Flag name: " << name << ". Value: " << value; + func_graph->set_flag(name, value); + } else if (py::isinstance(item.second)) { + auto value = py::cast(item.second); + MS_LOG(DEBUG) << "Flag name: " << name << ". Value: " << value; + func_graph->set_attr(name, MakeValue(value)); + } else { + MS_LOG(ERROR) << "Type error in flags/attrs dict convert"; + return false; + } } return true; diff --git a/mindspore/ccsrc/pipeline/parse/parse.h b/mindspore/ccsrc/pipeline/parse/parse.h index 969effbd18a..0a56ccaed9a 100644 --- a/mindspore/ccsrc/pipeline/parse/parse.h +++ b/mindspore/ccsrc/pipeline/parse/parse.h @@ -223,8 +223,8 @@ class Parser { FunctionBlockPtr block = std::make_shared(parse); // In order to keep effect order in the sub-graphs which generated by control flow. // We copy the flags from the top graph to the sub-graphs. - if (func_graph_ && !func_graph_->flags().empty()) { - block->func_graph()->set_flags(func_graph_->flags()); + if (func_graph_ && !func_graph_->attrs().empty()) { + block->func_graph()->set_attrs(func_graph_->attrs()); } func_block_list_.push_back(block); return block; diff --git a/mindspore/ccsrc/pipeline/pass.cc b/mindspore/ccsrc/pipeline/pass.cc index 0ffaebac4cb..94063fb780d 100644 --- a/mindspore/ccsrc/pipeline/pass.cc +++ b/mindspore/ccsrc/pipeline/pass.cc @@ -25,12 +25,14 @@ #include #include "ir/func_graph_cloner.h" +#include "debug/anf_ir_utils.h" #include "pipeline/parse/parse_base.h" #include "pipeline/parse/data_converter.h" #include "pipeline/resource.h" #include "pipeline/validator.h" #include "optimizer/optimizer.h" #include "optimizer/cse.h" +#include "optimizer/graph_kernel_reuse.h" #include "optimizer/clean.h" #include "optimizer/irpass.h" #include "optimizer/control_depend.h" @@ -38,6 +40,7 @@ #include "parallel/step_auto_parallel.h" #include "parallel/allreduce_fusion/step_allreduce_fusion.h" #include "utils/any.h" +#include "utils/log_adapter.h" namespace mindspore { namespace pipeline { @@ -162,6 +165,40 @@ OptPassGroupMap GetOptPassesB(const opt::irpass::OptimizeIRPassLib &irpass) { return map; } +OptPassGroupMap GetOptPassesGraphKernelA(const opt::irpass::OptimizeIRPassLib &irpass) { + opt::OptPassConfig interface_fusion = opt::OptPassConfig({ + irpass.mark_interface_fusion_, + }); + OptPassGroupMap map({ + {"graph_kernel_reuse", opt::OptPassConfig(opt::GraphKernelReuse())}, + {"interface_fusion", interface_fusion}, + {"renormalize", opt::OptPassConfig::Renormalize()}, + {"cse", opt::OptPassConfig(opt::CSE(false))}, + }); + return map; +} + +OptPassGroupMap GetOptPassesGraphKernelB(const opt::irpass::OptimizeIRPassLib &irpass) { + opt::OptPassConfig elim_1 = opt::OptPassConfig({ + irpass.addn_eliminate_, + irpass.incorporate_getitem_from_param_, + }); + opt::OptPassConfig elim_2 = opt::OptPassConfig({ + irpass.unused_parameter_eliminate_, + irpass.unused_output_eliminate_, + }); + OptPassGroupMap map({ + {"elim_1", elim_1}, + {"renormalize", opt::OptPassConfig::Renormalize()}, + {"elim_2", elim_2}, + }); + return map; +} + +OptPassGroupMap GetOptPassesC(const opt::irpass::OptimizeIRPassLib &irpass) { + return OptPassGroupMap({{"renormalize", opt::OptPassConfig::Renormalize()}}); +} + OptPassGroupMap GetControlPhases(const opt::irpass::OptimizeIRPassLib &irpass) { opt::OptPassConfig control_group = opt::OptPassConfig({irpass.convert_switch_replacement_}, true); OptPassGroupMap map({ @@ -191,8 +228,19 @@ void InitOpt(const ResourcePtr &res) { opt::irpass::OptimizeIRPassLib irpass; g_pass_opts["opt_a"] = Optimizer::MakeOptimizer("opt_a", res, GetOptPassesA(irpass)); g_pass_opts["opt_b"] = Optimizer::MakeOptimizer("opt_b", res, GetOptPassesB(irpass), false, true); + g_pass_opts["opt_graph_kernel_a"] = + Optimizer::MakeOptimizer("opt_graph_kernel_a", res, GetOptPassesGraphKernelA(irpass), true); + g_pass_opts["opt_graph_kernel_b"] = + Optimizer::MakeOptimizer("opt_graph_kernel_b", res, GetOptPassesGraphKernelB(irpass), false); + g_pass_opts["renormal"] = Optimizer::MakeOptimizer("renormal", res, GetOptPassesC(irpass)); g_pass_opts["opt_control"] = Optimizer::MakeOptimizer("opt_control", res, GetControlPhases(irpass), false, true); g_pass_opts["opt_prepare"] = Optimizer::MakeOptimizer("opt_prepare", res, GetPreparePhases(irpass)); + auto context_ptr = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context_ptr); + if (!(context_ptr->enable_graph_kernel())) { + g_pass_opts["opt_graph_kernel_a"]->set_enable(false); + g_pass_opts["opt_graph_kernel_b"]->set_enable(false); + } } } } // namespace @@ -224,9 +272,13 @@ bool OptPassGroup(const ResourcePtr &res, const std::string &name) { bool OptPassAGroup(const ResourcePtr &res) { return OptPassGroup(res, "opt_a"); } bool OptPassBGroup(const ResourcePtr &res) { return OptPassGroup(res, "opt_b"); } +bool OptPassGraphKernelGroupA(const ResourcePtr &res) { return OptPassGroup(res, "opt_graph_kernel_a"); } +bool OptPassGraphKernelGroupB(const ResourcePtr &res) { return OptPassGroup(res, "opt_graph_kernel_b"); } bool ControlGroup(const ResourcePtr &res) { return OptPassGroup(res, "opt_control"); } bool PrepareGroup(const ResourcePtr &res) { return OptPassGroup(res, "opt_prepare"); } +bool OptPassRNGroup(const ResourcePtr &res) { return OptPassGroup(res, "renormal"); } + bool AddControlDependPass(const ResourcePtr &res) { FuncGraphPtr func_graph = res->func_graph(); MS_EXCEPTION_IF_NULL(func_graph); @@ -270,8 +322,10 @@ bool InferenceOptPreparePass(const ResourcePtr &res) { std::vector kVmPasses = {{"simplify_data_structures", SimplifyDataStructuresPass}, {"opt_a", OptPassAGroup}, {"opt_b", OptPassBGroup}, - {"add_control_depend", AddControlDependPass}, - {"cconv", CconvPass}}; + {"cconv", CconvPass}, + {"opt_graph_kernel_a", OptPassGraphKernelGroupA}, + {"opt_graph_kernel_b", OptPassGraphKernelGroupB}, + {"add_control_depend", AddControlDependPass}}; std::vector kGePasses = {{"simplify_data_structures", SimplifyDataStructuresPass}, {"opt_a", OptPassAGroup}, diff --git a/mindspore/ccsrc/pipeline/pipeline_ge.cc b/mindspore/ccsrc/pipeline/pipeline_ge.cc index 27761096553..ea0ca14c7a6 100644 --- a/mindspore/ccsrc/pipeline/pipeline_ge.cc +++ b/mindspore/ccsrc/pipeline/pipeline_ge.cc @@ -488,7 +488,7 @@ py::object ExecDFGraph(const std::map &info, const #ifdef ENABLE_INFER // Now don't use the graph because the exec ge function don't take effect MS_EXCEPTION_IF_NULL(info.at(phase)->func_graph); - if (ENABLE_TRAIN != info.at(phase)->func_graph->flags()["training"]) { + if (ENABLE_TRAIN != info.at(phase)->func_graph->has_flag("training")) { MS_LOG(ERROR) << "Graph training mode mismatch mode of libraries"; ConfigManager::GetInstance().ResetConfig(); return py::none(); diff --git a/mindspore/ccsrc/pipeline/static_analysis/evaluator.cc b/mindspore/ccsrc/pipeline/static_analysis/evaluator.cc index 254fd43c0b1..c9b1ce4f937 100644 --- a/mindspore/ccsrc/pipeline/static_analysis/evaluator.cc +++ b/mindspore/ccsrc/pipeline/static_analysis/evaluator.cc @@ -165,7 +165,7 @@ AbstractBasePtrList FuncGraphEvaluator::BroadenUndeterminedArgs(const AbstractBa MS_LOG(DEBUG) << "Joined args: " << ::mindspore::ToString(joined_args_spec_list); // If there is loop variant, all arguments need to be broaden to avoid wrong constant propagation. if (!(joined_args_spec_list == args_spec_list)) { - func_graph_->set_flags(FUNC_GRAPH_FLAG_IGNORE_VALUES, true); + func_graph_->set_flag(FUNC_GRAPH_FLAG_IGNORE_VALUES, true); } return joined_args_spec_list; } @@ -178,7 +178,7 @@ AbstractBasePtrList FuncGraphEvaluator::BroadenUndeterminedArgs(const AbstractBa // If there is loop variant, all arguments need to be broaden to avoid wrong constant propagation. if (!(joined_args_spec_list == args_spec_list)) { trace_.push_back(joined_args_spec_list); - func_graph_->set_flags(FUNC_GRAPH_FLAG_IGNORE_VALUES, true); + func_graph_->set_flag(FUNC_GRAPH_FLAG_IGNORE_VALUES, true); } MS_LOG(DEBUG) << "Joined eval args: " << ::mindspore::ToString(joined_args_spec_list); return joined_args_spec_list; diff --git a/mindspore/ccsrc/pipeline/static_analysis/static_analysis.cc b/mindspore/ccsrc/pipeline/static_analysis/static_analysis.cc index 9299a020020..9da148d2a73 100644 --- a/mindspore/ccsrc/pipeline/static_analysis/static_analysis.cc +++ b/mindspore/ccsrc/pipeline/static_analysis/static_analysis.cc @@ -479,7 +479,7 @@ void AnalysisEngine::SetUndeterminedFlag(const EvaluatorPtr &evaluator) { if (undetermined_fgs) { auto fg_parent = fg->parent(); MS_EXCEPTION_IF_NULL(fg_parent); - fg_parent->set_flags(kFuncGraphFlagUndetermined, true); + fg_parent->set_flag(kFuncGraphFlagUndetermined, true); MS_LOG(DEBUG) << "Set graph undetermined: " << fg_parent->ToString(); } } diff --git a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc index f4f9d8da14b..981e2255f36 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc @@ -16,6 +16,7 @@ #include "pre_activate/ascend/ascend_backend_optimization.h" #include #include +#include #include "pre_activate/common/optimizer.h" #include "pre_activate/ascend/ir_fission/bn_split.h" #include "pre_activate/ascend/ir_fission/bn_grad_split.h" @@ -63,6 +64,9 @@ #include "pre_activate/ascend/format_type/convert_unsupported_transnode_to_aicpu.h" #include "pre_activate/pass/eliminate_redundant_op.h" #include "pre_activate/pass/common_subexpression_elimination.h" +#include "pre_activate/pass/fuse_graph_kernel.h" +#include "pre_activate/pass/fuse_basic.h" +#include "pre_activate/pass/add_atomic_clean.h" #include "pre_activate/ascend/format_type/merge_cast_to_op.h" #include "pre_activate/ascend/format_type/check_consistency.h" #include "pre_activate/ascend/buffer_fusion/ub_pattern_fusion.h" @@ -88,6 +92,8 @@ #include "pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.h" #include "pre_activate/ascend/ir_fission/batch_norm_grad_infer_fission.h" #include "pre_activate/ascend/ir_fission/split_fission.h" +#include "pre_activate/ascend/format_type/modify_ops_attrs.h" +#include "pre_activate/ascend/format_type/remove_no_use_reshape_op.h" #include "utils/context/ms_context.h" #include "utils/config_manager.h" #include "debug/anf_ir_dump.h" @@ -164,6 +170,19 @@ void RunOpAscendDataLayout(const std::shared_ptr &kernel_g kernel_graph->SetExecOrderByDefault(); } +void AscendGraphKernelCommonProcess(const std::shared_ptr &kernel_graph) { + MS_EXCEPTION_IF_NULL(kernel_graph); + auto optimizer = std::make_shared(); + MS_EXCEPTION_IF_NULL(optimizer); + auto common_process = std::make_shared("graph_kernel_common_process"); + MS_EXCEPTION_IF_NULL(common_process); + common_process->AddPass(std::make_shared()); + common_process->AddPass(std::make_shared()); + optimizer->AddPassManager(common_process); + (void)optimizer->Optimize(kernel_graph); + kernel_graph->SetExecOrderByDefault(); +} + void AscendDataLayout(const std::shared_ptr &kernel_graph) { MS_EXCEPTION_IF_NULL(kernel_graph); auto optimizer = std::make_shared(); @@ -332,7 +351,94 @@ void AscendBackendOptimization(const std::shared_ptr &kern std::string file_path = save_graphs_path + "/" + "hwopt_d_end" + "_graph_" + std::to_string(kernel_graph->graph_id()) + ".ir"; DumpIR(file_path, kernel_graph, true); - DumpIRProto(kernel_graph, "after_hwopt_" + std::to_string(kernel_graph->graph_id())); + DumpIRProto(kernel_graph, "after_hwopt"); + kernel_graph->DumpFuncGraph("hwopt_d_end"); + } +} + +void AscendBackendGraphKernelOpt(const std::shared_ptr &kernel_graph, + bool is_before_kernel_select) { + auto context_ptr = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context_ptr); + if (!(context_ptr->enable_graph_kernel())) { + return; + } + bool save_graphs = context_ptr->save_graphs_flag(); + auto save_graphs_path = context_ptr->save_graphs_path(); + if (save_graphs_path.empty()) { + save_graphs_path = "."; + } + if (save_graphs) { + std::string file_path = save_graphs_path + "/" + "hwopt_d_graph_kernel_opt_before_graph_" + + std::to_string(!is_before_kernel_select) + "_" + std::to_string(kernel_graph->graph_id()) + + ".ir"; + DumpIR(file_path, kernel_graph); + } + + // Fuse graph kernels with basic ops + FuseGraphKernel(kernel_graph, is_before_kernel_select); + + if (save_graphs) { + std::string file_path = save_graphs_path + "/" + "hwopt_d_graph_kernel_opt_end_graph_" + + std::to_string(!is_before_kernel_select) + "_" + std::to_string(kernel_graph->graph_id()) + + ".ir"; + DumpIR(file_path, kernel_graph, true); + } +} + +void AscendBackendFuseBasicOpt(const std::shared_ptr &kernel_graph, + bool is_before_kernel_select) { + auto context_ptr = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context_ptr); + if (!(context_ptr->enable_graph_kernel())) { + return; + } + bool save_graphs = context_ptr->save_graphs_flag(); + auto save_graphs_path = context_ptr->save_graphs_path(); + if (save_graphs_path.empty()) { + save_graphs_path = "."; + } + if (save_graphs) { + std::string file_path = save_graphs_path + "/" + "hwopt_d_fuse_basic_opt_before_graph_" + + std::to_string(!is_before_kernel_select) + "_" + std::to_string(kernel_graph->graph_id()) + + ".ir"; + DumpIR(file_path, kernel_graph, true); + } + + // Fuse basic ops with basic ops + FuseBasic(kernel_graph, is_before_kernel_select); + + if (save_graphs) { + std::string file_path = save_graphs_path + "/" + "hwopt_d_fuse_basic_opt_end_graph_" + + std::to_string(!is_before_kernel_select) + "_" + std::to_string(kernel_graph->graph_id()) + + ".ir"; + DumpIR(file_path, kernel_graph, true); + } +} + +void AscendBackendAddAtomicClean(const std::shared_ptr &kernel_graph) { + auto context_ptr = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context_ptr); + if (!(context_ptr->enable_graph_kernel())) { + return; + } + bool save_graphs = context_ptr->save_graphs_flag(); + auto save_graphs_path = context_ptr->save_graphs_path(); + if (save_graphs_path.empty()) { + save_graphs_path = "."; + } + if (save_graphs) { + std::string file_path = save_graphs_path + "/" + "hwopt_d_add_atomic_clean_before" + "_graph_" + + std::to_string(kernel_graph->graph_id()) + ".ir"; + DumpIR(file_path, kernel_graph); + } + + AddAtomicClean(kernel_graph); + + if (save_graphs) { + std::string file_path = + save_graphs_path + "/" + "hwopt_d_end" + "_graph_" + std::to_string(kernel_graph->graph_id()) + ".ir"; + DumpIR(file_path, kernel_graph, true); } } diff --git a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.h b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.h index 46d9f9bd1b1..222c4b90b5a 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.h +++ b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.h @@ -24,6 +24,12 @@ void RunOpAscendBackendIRFusionOptimization(const std::shared_ptr &kernel_graph); void AscendMixPrecision(const std::shared_ptr &kernel_graph); void AscendBackendOptimization(const std::shared_ptr &kernel_graph); +void AscendGraphKernelCommonProcess(const std::shared_ptr &kernel_graph); +void AscendBackendGraphKernelOpt(const std::shared_ptr &kernel_graph, + bool is_before_kernel_select = false); +void AscendBackendFuseBasicOpt(const std::shared_ptr &kernel_graph, + bool is_before_kernel_select = false); +void AscendBackendAddAtomicClean(const std::shared_ptr &kernel_graph); void AscendBackendIRFusionOptimization(const std::shared_ptr &kernel_graph); void AscendBackendUBFusionOptimization(const std::shared_ptr &kernel_graph); } // namespace opt diff --git a/mindspore/ccsrc/pre_activate/ascend/ascend_helper.cc b/mindspore/ccsrc/pre_activate/ascend/ascend_helper.cc index 8a14b438bb1..9c498bd7361 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ascend_helper.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ascend_helper.cc @@ -22,6 +22,7 @@ #include "utils/utils.h" #include "device/kernel_info.h" #include "kernel/oplib/oplib.h" +#include "kernel/common_utils.h" #include "operator/ops.h" #include "session/anf_runtime_algorithm.h" #include "session/kernel_graph.h" @@ -229,7 +230,7 @@ AnfNodePtr AddCastOpNodeToGraph(const FuncGraphPtr &func_graph, const AnfNodePtr if (kernel::OpLib::FindOp(prim::kPrimCast->name(), kernel::kTBE) != nullptr) { builder.SetKernelType(KernelType::TBE_KERNEL); } else { - builder.SetKernelType(KernelType::AUTO_DIFF_KERNEL); + builder.SetKernelType(KernelType::AKG_KERNEL); } // if kernel info is null , it remarks this function is running ut if (cast->kernel_info() == nullptr) { @@ -284,22 +285,17 @@ CNodePtr InsertCastForInput(const FuncGraphPtr &func_graph, const CNodePtr &cnod MS_EXCEPTION_IF_NULL(cnode); std::vector new_inputs = {AnfAlgo::GetCNodePrimitiveNode(cnode)}; for (size_t input_index = 0; input_index < AnfAlgo::GetInputTensorNum(cnode); ++input_index) { - TypeId origin_type; + const auto infer_type = AnfAlgo::GetPrevNodeOutputInferDataType(cnode, input_index); + TypeId origin_type(kTypeUnknown); auto cur_input = AnfAlgo::GetInputNode(cnode, input_index); auto kernel_with_index = AnfAlgo::VisitKernel(cur_input, 0); - auto is_weight_boundary = [](const AnfNodePtr &node) -> bool { - if (node->isa()) { - return true; - } - if (node->isa() && AnfAlgo::IsParameterWeight(node->cast())) { - return true; - } - return false; - }; auto real_input_node = kernel_with_index.first; - if (is_weight_boundary(real_input_node)) { + if (kernel::IsWeightBoundary(real_input_node) || func_graph->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)) { // weight - origin_type = AnfAlgo::GetPrevNodeOutputDeviceDataType(cnode, input_index); + origin_type = AnfAlgo::GetPrevNodeOutputPrecision(cnode, input_index); + if (origin_type == kTypeUnknown) { + origin_type = AnfAlgo::GetPrevNodeOutputDeviceDataType(cnode, input_index); + } } else { // feature map origin_type = AnfAlgo::GetPrevNodeOutputInferDataType(cnode, input_index); @@ -307,9 +303,13 @@ CNodePtr InsertCastForInput(const FuncGraphPtr &func_graph, const CNodePtr &cnod const std::string dev_fmt = AnfAlgo::GetInputFormat(cnode, input_index); const std::vector origin_shape = AnfAlgo::GetPrevNodeOutputInferShape(cnode, input_index); const TypeId device_type = AnfAlgo::GetInputDeviceDataType(cnode, input_index); - if (origin_type != device_type) { + // In graph kernel, we check parameter, + // the eliminate pass will not eliminate this case, so we just do not insert the noused cast. + if (func_graph->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL) && IsValueNode(cur_input)) { + new_inputs.push_back(cur_input); + } else if (origin_type != device_type) { auto cast = - AddCastOpNodeToGraph(func_graph, cur_input, dev_fmt, origin_type, device_type, origin_shape, origin_type); + AddCastOpNodeToGraph(func_graph, cur_input, dev_fmt, origin_type, device_type, origin_shape, infer_type); MS_EXCEPTION_IF_NULL(cast); cast->set_scope(cnode->scope()); AnfAlgo::SetNodeAttr(kAttrVisited, MakeValue(true), cast); diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/check_consistency.cc b/mindspore/ccsrc/pre_activate/ascend/format_type/check_consistency.cc index d2557a4bb70..7c8fb70fda1 100644 --- a/mindspore/ccsrc/pre_activate/ascend/format_type/check_consistency.cc +++ b/mindspore/ccsrc/pre_activate/ascend/format_type/check_consistency.cc @@ -17,9 +17,12 @@ #include #include +#include #include "utils/utils.h" #include "session/anf_runtime_algorithm.h" +#include "common/utils.h" +#include "kernel/common_utils.h" namespace mindspore { namespace opt { @@ -74,11 +77,21 @@ const AnfNodePtr CheckConsistency::Process(const FuncGraphPtr &, const AnfNodePt if (node == nullptr || !node->isa() || !AnfAlgo::IsRealKernel(node)) { return nullptr; } - CNodePtr cnode = node->cast(); - for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(cnode); i++) { - if (!CheckFormatForConsistency(cnode, i) || !CheckDataTypeForConsistency(cnode, i)) { - MS_LOG(EXCEPTION) << "Found inconsistent format or data type! Op: " << AnfAlgo::GetCNodeName(node) << "[" - << node->DebugString() << "]"; + + std::vector todos = {node}; + if (AnfAlgo::IsGraphKernel(node)) { + auto sub_graph = AnfAlgo::GetCNodeFuncGraphPtr(node); + MS_EXCEPTION_IF_NULL(sub_graph); + kernel::GetValidKernelNodes(sub_graph, &todos); + } + + for (auto &t : todos) { + CNodePtr cnode = t->cast(); + for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(cnode); i++) { + if (!CheckFormatForConsistency(cnode, i) || !CheckDataTypeForConsistency(cnode, i)) { + MS_LOG(EXCEPTION) << "Found inconsistent format or data type! Op: " << AnfAlgo::GetCNodeName(cnode) << "[" + << cnode->DebugString() << "]"; + } } } return nullptr; diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/insert_cast.cc b/mindspore/ccsrc/pre_activate/ascend/format_type/insert_cast.cc index 0fefab10d0c..3d09233d996 100644 --- a/mindspore/ccsrc/pre_activate/ascend/format_type/insert_cast.cc +++ b/mindspore/ccsrc/pre_activate/ascend/format_type/insert_cast.cc @@ -18,6 +18,7 @@ #include #include #include +#include #include "device/kernel_info.h" #include "pre_activate/ascend/ascend_helper.h" @@ -27,34 +28,45 @@ #include "session/anf_runtime_algorithm.h" #include "session/kernel_graph.h" #include "utils/utils.h" +#include "kernel/common_utils.h" namespace mindspore { namespace opt { namespace { -AnfNodePtr InsertCastForMultipleOutput(const FuncGraphPtr &func_graph, const CNodePtr &cnode) { +AnfNodePtr InsertCastForMultipleOutput(const FuncGraphPtr &func_graph, const CNodePtr &cnode, + const std::vector &need_insert_cast) { MS_EXCEPTION_IF_NULL(func_graph); MS_EXCEPTION_IF_NULL(cnode); std::vector make_tuple_inputs; AbstractBasePtrList abstract_list; make_tuple_inputs.push_back(NewValueNode(prim::kPrimMakeTuple)); for (size_t output_idx = 0; output_idx < AnfAlgo::GetOutputTensorNum(cnode); ++output_idx) { - const std::string dev_fmt = AnfAlgo::GetOutputFormat(cnode, output_idx); - const std::vector origin_shape = AnfAlgo::GetOutputInferShape(cnode, output_idx); - const TypeId origin_type = AnfAlgo::GetOutputInferDataType(cnode, output_idx); - const TypeId device_type = AnfAlgo::GetOutputDeviceDataType(cnode, output_idx); + AnfNodePtr replace_node = nullptr; + const auto origin_shape = AnfAlgo::GetOutputInferShape(cnode, output_idx); + const auto infer_type = AnfAlgo::GetOutputInferDataType(cnode, output_idx); auto idx = NewValueNode(SizeToInt(output_idx)); MS_EXCEPTION_IF_NULL(idx); auto imm = std::make_shared(output_idx); idx->set_abstract(std::make_shared(imm)); auto getitem = func_graph->NewCNode({NewValueNode(prim::kPrimTupleGetItem), cnode, idx}); - AnfAlgo::SetOutputInferTypeAndShape({origin_type}, {origin_shape}, getitem.get()); - AnfNodePtr replace_node = nullptr; - if (origin_type != device_type) { - replace_node = - AddCastOpNodeToGraph(func_graph, getitem, dev_fmt, device_type, origin_type, origin_shape, origin_type); - MS_EXCEPTION_IF_NULL(replace_node); - replace_node->set_scope(cnode->scope()); - AnfAlgo::SetNodeAttr(kAttrVisited, MakeValue(true), replace_node); + AnfAlgo::SetOutputInferTypeAndShape({infer_type}, {origin_shape}, getitem.get()); + if (need_insert_cast[output_idx]) { + const auto dev_fmt = AnfAlgo::GetOutputFormat(cnode, output_idx); + TypeId origin_type(kTypeUnknown); + if (func_graph->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)) { + origin_type = AnfAlgo::GetCNodeOutputPrecision(cnode); + } + origin_type = origin_type == kTypeUnknown ? infer_type : origin_type; + const auto device_type = AnfAlgo::GetOutputDeviceDataType(cnode, output_idx); + if (origin_type != device_type) { + replace_node = + AddCastOpNodeToGraph(func_graph, getitem, dev_fmt, device_type, origin_type, origin_shape, infer_type); + MS_EXCEPTION_IF_NULL(replace_node); + replace_node->set_scope(cnode->scope()); + AnfAlgo::SetNodeAttr(kAttrVisited, MakeValue(true), replace_node); + } else { + replace_node = getitem; + } } else { replace_node = getitem; } @@ -65,9 +77,10 @@ AnfNodePtr InsertCastForMultipleOutput(const FuncGraphPtr &func_graph, const CNo MS_EXCEPTION_IF_NULL(make_tuple); make_tuple->set_abstract(std::make_shared(abstract_list)); return make_tuple; -} +} // namespace -AnfNodePtr InsertCastForOutput(const FuncGraphPtr &func_graph, const CNodePtr &cnode) { +AnfNodePtr InsertCastForOutput(const FuncGraphPtr &func_graph, const CNodePtr &cnode, + const std::vector &need_insert_cast) { MS_EXCEPTION_IF_NULL(func_graph); MS_EXCEPTION_IF_NULL(cnode); if (AnfAlgo::GetOutputTensorNum(cnode) == 0) { @@ -76,14 +89,23 @@ AnfNodePtr InsertCastForOutput(const FuncGraphPtr &func_graph, const CNodePtr &c MS_EXCEPTION_IF_NULL(cnode->Type()); // Single output if (!cnode->Type()->isa()) { + if (!need_insert_cast[0]) { + return cnode; + } + const std::string dev_fmt = AnfAlgo::GetOutputFormat(cnode, 0); std::vector origin_shape = AnfAlgo::GetOutputInferShape(cnode, 0); - const TypeId origin_type = AnfAlgo::GetOutputInferDataType(cnode, 0); + const auto infer_type = AnfAlgo::GetOutputInferDataType(cnode, 0); + TypeId origin_type(kTypeUnknown); + if (func_graph->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)) { + origin_type = AnfAlgo::GetCNodeOutputPrecision(cnode); + } + origin_type = origin_type == kTypeUnknown ? infer_type : origin_type; const TypeId device_type = AnfAlgo::GetOutputDeviceDataType(cnode, 0); AnfNodePtr replace_node = cnode; if (origin_type != device_type) { replace_node = - AddCastOpNodeToGraph(func_graph, cnode, dev_fmt, device_type, origin_type, origin_shape, origin_type); + AddCastOpNodeToGraph(func_graph, cnode, dev_fmt, device_type, origin_type, origin_shape, infer_type); MS_EXCEPTION_IF_NULL(replace_node); replace_node->set_scope(cnode->scope()); AnfAlgo::SetNodeAttr(kAttrVisited, MakeValue(true), replace_node); @@ -91,7 +113,57 @@ AnfNodePtr InsertCastForOutput(const FuncGraphPtr &func_graph, const CNodePtr &c return replace_node; } // Multiple output - return InsertCastForMultipleOutput(func_graph, cnode); + return InsertCastForMultipleOutput(func_graph, cnode, need_insert_cast); +} + +AnfNodePtr ProcessGraphKernelOp(const FuncGraphPtr &func_graph, const AnfNodePtr &node) { + // insert cast for ops in graph kernel. + auto sub_graph = AnfAlgo::GetCNodeFuncGraphPtr(node); + MS_EXCEPTION_IF_NULL(sub_graph); + auto mng = sub_graph->manager(); + MS_EXCEPTION_IF_NULL(mng); + std::vector todo; + std::vector> graph_rets; + kernel::GetValidKernelNodes(sub_graph, &todo); + kernel::GetGraphRealOutput(sub_graph, &graph_rets); + for (auto &t : todo) { + AnfAlgo::SetNodeAttr(kAttrVisited, MakeValue(true), t); + // process input + CNodePtr t_cnode = t->cast(); + MS_EXCEPTION_IF_NULL(t_cnode); + auto t_new_node = InsertCastForInput(sub_graph, t_cnode); + AnfNodePtr t_new_node_1 = nullptr; + std::vector need_insert_cast(AnfAlgo::GetOutputTensorNum(t), true); + // process output + auto iter = std::find_if(graph_rets.begin(), graph_rets.end(), + [&t](const std::pair &ret) { return ret.first == t; }); + if (iter != graph_rets.end()) { + auto t_fix_output_type = AnfAlgo::GetCNodeOutputPrecision(t); + auto t_output_type = AnfAlgo::GetOutputDeviceDataType(t, iter->second); + auto graph_output_type = AnfAlgo::GetOutputDeviceDataType(node, iter - graph_rets.begin()); + if (t_fix_output_type == kTypeUnknown && t_output_type == graph_output_type) { + need_insert_cast[iter->second] = false; + } else if (t_fix_output_type == t_output_type && t_output_type == graph_output_type) { + need_insert_cast[iter->second] = false; + } + t_new_node_1 = InsertCastForOutput(sub_graph, t_new_node, need_insert_cast); + } else { + t_new_node_1 = InsertCastForOutput(sub_graph, t_new_node, need_insert_cast); + } + + if (t_new_node_1 != nullptr && t_new_node_1 != t) { + (void)mng->Replace(t, t_new_node_1); + } + } + + // insert cast for graph kernel. + AnfAlgo::SetNodeAttr(kAttrVisited, MakeValue(true), node); + // process input + CNodePtr cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + auto new_node = InsertCastForInput(func_graph, cnode); + // process output + return InsertCastForOutput(func_graph, new_node, std::vector(AnfAlgo::GetOutputTensorNum(new_node), true)); } } // namespace @@ -106,13 +178,27 @@ const AnfNodePtr InsertCast::Process(const FuncGraphPtr &func_graph, const AnfNo if (!AnfAlgo::IsRealCNodeKernel(node) || func_graph == nullptr) { return nullptr; } + + if (AnfAlgo::IsGraphKernel(node)) { + return ProcessGraphKernelOp(func_graph, node); + } else { + // insert cast for single op. + AnfAlgo::SetNodeAttr(kAttrVisited, MakeValue(true), node); + // process input + CNodePtr cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + auto new_node = InsertCastForInput(func_graph, cnode); + // process output + return InsertCastForOutput(func_graph, new_node, std::vector(AnfAlgo::GetOutputTensorNum(new_node), true)); + } + // insert cast for single op. AnfAlgo::SetNodeAttr(kAttrVisited, MakeValue(true), node); // process input CNodePtr cnode = node->cast(); MS_EXCEPTION_IF_NULL(cnode); auto new_node = InsertCastForInput(func_graph, cnode); // process output - return InsertCastForOutput(func_graph, new_node); + return InsertCastForOutput(func_graph, new_node, std::vector(AnfAlgo::GetOutputTensorNum(new_node), true)); } } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/merge_cast_to_op.cc b/mindspore/ccsrc/pre_activate/ascend/format_type/merge_cast_to_op.cc index 8bb58c18a59..3c37e098e7a 100644 --- a/mindspore/ccsrc/pre_activate/ascend/format_type/merge_cast_to_op.cc +++ b/mindspore/ccsrc/pre_activate/ascend/format_type/merge_cast_to_op.cc @@ -133,6 +133,9 @@ AnfNodePtr MergeCastToNextOp(const FuncGraphPtr &graph, const CNodePtr &node, co return nullptr; } auto next_cnode = next_node->cast(); + if (AnfAlgo::IsGraphKernel(next_node)) { + return nullptr; + } auto next_op_name = AnfAlgo::GetCNodeName(next_node); std::vector> kernel_info_list; kernel_query->Query(next_cnode, &kernel_info_list); @@ -206,6 +209,9 @@ AnfNodePtr MergeCastToPriorOp(const FuncGraphPtr &graph, const CNodePtr &cur_nod return nullptr; } MS_EXCEPTION_IF_NULL(prior_op); + if (AnfAlgo::IsGraphKernel(prior_op)) { + return nullptr; + } std::vector> kernel_info_list; kernel_query->Query(prior_op, &kernel_info_list); diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/modify_ops_attrs.cc b/mindspore/ccsrc/pre_activate/ascend/format_type/modify_ops_attrs.cc new file mode 100644 index 00000000000..42061957b90 --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/format_type/modify_ops_attrs.cc @@ -0,0 +1,99 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "pre_activate/ascend/format_type/modify_ops_attrs.h" +#include +#include +#include "utils/utils.h" +#include "pre_activate/common/helper.h" +#include "kernel/common_utils.h" +#include "session/anf_runtime_algorithm.h" +#include "operator/ops.h" + +namespace mindspore { +namespace opt { +namespace { +AnfNodePtr ModifyReduceOpsAttrs(const CNodePtr &cnode) { + auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(cnode, 0); + auto input_format = AnfAlgo::GetInputFormat(cnode, 0); + if (input_shape.size() == 5 || input_format != kOpFormat_NC1HWC0) { + return nullptr; + } + if (!AnfAlgo::HasNodeAttr(kAttrKeepDims, cnode)) { + return nullptr; + } + + AnfAlgo::SetNodeAttr(kAttrKeepDims, MakeValue(true), cnode); + return cnode; +} + +AnfNodePtr ModifyTileOpAttrs(const CNodePtr &cnode) { + auto input_shape = AnfAlgo::GetInputDeviceShape(cnode, 0); + if (input_shape.size() != 5) { + return nullptr; + } + if (!AnfAlgo::HasNodeAttr(kAttrMultiples, cnode)) { + return nullptr; + } + + auto multiples = AnfAlgo::GetNodeAttr>(cnode, kAttrMultiples); + if (multiples.size() == 4 && multiples[1] == 1) { + multiples.push_back(1); + AnfAlgo::SetNodeAttr(kAttrMultiples, MakeValue(multiples), cnode); + } + + return cnode; +} + +AnfNodePtr ModifyAttrs(const CNodePtr &cnode) { + MS_EXCEPTION_IF_NULL(cnode); + auto op_name = AnfAlgo::GetCNodeName(cnode); + if (op_name == prim::kPrimTile->name()) { + return ModifyTileOpAttrs(cnode); + } else if (op_name == prim::kPrimReduceSum->name()) { + // kPrimReduceMean + // kPrimReduceSum + // kPrimReduceAll + // kPrimReduceMax + // kPrimReduceMin + return ModifyReduceOpsAttrs(cnode); + } + return nullptr; +} +} // namespace + +const AnfNodePtr ModifyOpAttrs::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, + const EquivPtr &) const { + if (node == nullptr || !node->isa() || !AnfAlgo::IsGraphKernel(node)) { + return nullptr; + } + MS_LOG(DEBUG) << "====Process op: " << AnfAlgo::GetCNodeName(node); + auto fg = AnfAlgo::GetCNodeFuncGraphPtr(node); + MS_EXCEPTION_IF_NULL(fg); + auto manager = fg->manager(); + MS_EXCEPTION_IF_NULL(manager); + std::vector todos; + kernel::GetValidKernelNodes(fg, &todos); + for (auto &t : todos) { + auto new_node = ModifyAttrs(t->cast()); + if (new_node != nullptr && new_node != t) { + (void)manager->Replace(t, new_node); + } + } + return node; +} +} // namespace opt +} // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/modify_ops_attrs.h b/mindspore/ccsrc/pre_activate/ascend/format_type/modify_ops_attrs.h new file mode 100644 index 00000000000..25ec94b6b4f --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/format_type/modify_ops_attrs.h @@ -0,0 +1,33 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_FORMAT_TYPE_MODIFY_OPS_ATTRS_H +#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_FORMAT_TYPE_MODIFY_OPS_ATTRS_H + +#include "pre_activate/common/optimizer.h" + +namespace mindspore { +namespace opt { +class ModifyOpAttrs : public PatternProcessPass { + public: + explicit ModifyOpAttrs(bool multigraph = true) : PatternProcessPass("modify_ops_attrs", multigraph) {} + ~ModifyOpAttrs() override = default; + const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override; +}; +} // namespace opt +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_FORMAT_TYPE_MODIFY_OPS_ATTRS_H diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/remove_no_use_reshape_op.cc b/mindspore/ccsrc/pre_activate/ascend/format_type/remove_no_use_reshape_op.cc new file mode 100644 index 00000000000..dde40a50900 --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/format_type/remove_no_use_reshape_op.cc @@ -0,0 +1,66 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "pre_activate/ascend/format_type/remove_no_use_reshape_op.h" +#include +#include +#include "pre_activate/common/helper.h" +#include "kernel/common_utils.h" +#include "session/anf_runtime_algorithm.h" +#include "operator/ops.h" + +namespace mindspore { +namespace opt { +namespace { +AnfNodePtr RemoveReshapeOp(const CNodePtr &cnode) { + MS_EXCEPTION_IF_NULL(cnode); + auto op_name = AnfAlgo::GetCNodeName(cnode); + if (op_name != prim::kPrimReshape->name()) { + return nullptr; + } + + auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(cnode, 0); + auto input_format = AnfAlgo::GetPrevNodeOutputFormat(cnode, 0); + if (input_shape.size() != 1 || input_format != kOpFormat_NC1HWC0) { + return nullptr; + } + + return cnode->input(1); +} +} // namespace + +const AnfNodePtr RemoveNoUseReshapeOp::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, + const EquivPtr &) const { + if (node == nullptr || !node->isa() || !AnfAlgo::IsGraphKernel(node)) { + return nullptr; + } + MS_LOG(DEBUG) << "====process op: " << AnfAlgo::GetCNodeName(node); + auto fg = AnfAlgo::GetCNodeFuncGraphPtr(node); + MS_EXCEPTION_IF_NULL(fg); + auto manager = fg->manager(); + MS_EXCEPTION_IF_NULL(manager); + std::vector todos; + kernel::GetValidKernelNodes(fg, &todos); + for (auto &t : todos) { + auto new_node = RemoveReshapeOp(t->cast()); + if (new_node != nullptr && new_node != t) { + (void)manager->Replace(t, new_node); + } + } + return node; +} +} // namespace opt +} // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/remove_no_use_reshape_op.h b/mindspore/ccsrc/pre_activate/ascend/format_type/remove_no_use_reshape_op.h new file mode 100644 index 00000000000..4942c2fc082 --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/format_type/remove_no_use_reshape_op.h @@ -0,0 +1,33 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_FORMAT_TYPE_REMOVE_NO_USE_RESHAPE_OP_H +#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_FORMAT_TYPE_REMOVE_NO_USE_RESHAPE_OP_H + +#include "pre_activate/common/optimizer.h" + +namespace mindspore { +namespace opt { +class RemoveNoUseReshapeOp : public PatternProcessPass { + public: + explicit RemoveNoUseReshapeOp(bool multigraph = true) : PatternProcessPass("remove_no_use_reshape_op", multigraph) {} + ~RemoveNoUseReshapeOp() override = default; + const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override; +}; +} // namespace opt +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_FORMAT_TYPE_REMOVE_NO_USE_RESHAPE_OP_H diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion.cc index fba1ab40af9..b16387d8f1d 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion.cc @@ -121,6 +121,9 @@ const AnfNodePtr LayerNormBetaGammaBackpropFusion::Process(const FuncGraphPtr &f if (node == nullptr || !node->isa()) { return nullptr; } + if (AnfAlgo::IsGraphKernel(node)) { + return nullptr; + } auto cnode = node->cast(); MS_EXCEPTION_IF_NULL(cnode); std::vector cast_nodes; diff --git a/mindspore/ccsrc/pre_activate/common/helper.cc b/mindspore/ccsrc/pre_activate/common/helper.cc index c59260564ab..1c2ade201c3 100644 --- a/mindspore/ccsrc/pre_activate/common/helper.cc +++ b/mindspore/ccsrc/pre_activate/common/helper.cc @@ -102,9 +102,12 @@ bool UnVisited(const BaseRef &n) { auto prim_py = value->cast(); MS_EXCEPTION_IF_NULL(prim_py); return !prim_py->HasAttr(kAttrVisited); - } else { - return false; + } else if (IsValueNode(in)) { + auto func_graph = GetValueNode(in); + MS_EXCEPTION_IF_NULL(func_graph); + return !func_graph->has_flag(kAttrVisited); } + return false; } return false; } @@ -188,9 +191,12 @@ bool Visited(const BaseRef &n) { auto prim_py = value->cast(); MS_EXCEPTION_IF_NULL(prim_py); return prim_py->HasAttr(kAttrVisited); - } else { - return false; + } else if (IsValueNode(in)) { + auto func_graph = GetValueNode(in); + MS_EXCEPTION_IF_NULL(func_graph); + return func_graph->has_flag(kAttrVisited); } + return false; } return false; } diff --git a/mindspore/ccsrc/pre_activate/common/node_pass.cc b/mindspore/ccsrc/pre_activate/common/node_pass.cc index a6e93d2f074..876da8667ba 100644 --- a/mindspore/ccsrc/pre_activate/common/node_pass.cc +++ b/mindspore/ccsrc/pre_activate/common/node_pass.cc @@ -22,6 +22,7 @@ #include "ir/anf.h" #include "ir/func_graph.h" #include "ir/manager.h" +#include "session/anf_runtime_algorithm.h" namespace mindspore { namespace opt { @@ -52,8 +53,13 @@ bool NodePass::Run(const FuncGraphPtr &func_graph) { if (new_node && IsValueNode(new_node)) { auto const_func_graph = GetValueNode(new_node); MS_EXCEPTION_IF_NULL(const_func_graph); - todo.push_back(const_func_graph->output()); + if (!const_func_graph->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)) { + todo.push_back(const_func_graph->output()); + } } else if (new_node && new_node->isa()) { + if (AnfAlgo::IsGraphKernel(new_node)) { + todo.push_back(new_node); + } auto cnode = new_node->cast(); MS_EXCEPTION_IF_NULL(cnode); auto inputs = cnode->inputs(); diff --git a/mindspore/ccsrc/pre_activate/common/optimizer.cc b/mindspore/ccsrc/pre_activate/common/optimizer.cc index fa51a0bd8ce..71a523ea1d0 100644 --- a/mindspore/ccsrc/pre_activate/common/optimizer.cc +++ b/mindspore/ccsrc/pre_activate/common/optimizer.cc @@ -86,11 +86,8 @@ void GraphOptimizer::AddPassManager(const PassManagerPtr &pass_manager) { FuncGraphPtr GraphOptimizer::Optimize(const FuncGraphPtr &func_graph, bool run_only_once) { MS_EXCEPTION_IF_NULL(func_graph); run_only_once_ = (pass_managers_.size() == 1) ? true : run_only_once; - auto manager = func_graph->manager(); - if (manager == nullptr) { - manager = Manage(func_graph, false); - func_graph->set_manager(manager); - } + // Performance risk by creating new manager each time + auto manager = Manage(func_graph, true); bool changed = true; while (changed) { diff --git a/mindspore/ccsrc/pre_activate/pass/add_atomic_clean.cc b/mindspore/ccsrc/pre_activate/pass/add_atomic_clean.cc new file mode 100644 index 00000000000..0c2b22578f0 --- /dev/null +++ b/mindspore/ccsrc/pre_activate/pass/add_atomic_clean.cc @@ -0,0 +1,122 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "pre_activate/pass/add_atomic_clean.h" +#include +#include +#include +#include "operator/ops.h" +#include "utils/utils.h" +#include "utils/graph_utils.h" +#include "utils/log_adapter.h" +#include "session/anf_runtime_algorithm.h" +#include "session/kernel_graph.h" +#include "debug/anf_ir_dump.h" + +namespace mindspore { +namespace opt { +namespace { + +static std::vector g_output_idx; + +bool HasAtomic(const AnfNodePtr &input) { + if (IsPrimitiveCNode(input)) { + const auto &cnode = input->cast(); + const auto &prim = GetValueNode(cnode->input(0)); + return prim->HasAttr("atomic_add"); + } + return false; +} + +std::vector CalCleanSize(const CNodePtr &pre_node) { + MS_EXCEPTION_IF_NULL(pre_node); + std::vector clean_size_list; + // clean output + for (auto &index : g_output_idx) { + TypeId output_type_id = AnfAlgo::GetOutputDeviceDataType(pre_node, index); + size_t type_size = GetTypeByte(TypeIdToType(output_type_id)); + std::vector shape = AnfAlgo::GetOutputDeviceShape(pre_node, index); + auto size = std::accumulate(shape.begin(), shape.end(), type_size, std::multiplies()); + clean_size_list.push_back((size + kMemAlignSize + 31) / kMemAlignSize * kMemAlignSize); + } + MS_LOG(DEBUG) << "Clear output size: " << clean_size_list.size() << ", pre_node: " << pre_node->fullname_with_scope(); + return clean_size_list; +} + +CNodePtr CreateTbeAtomicCleanNode(const std::shared_ptr &kernel_graph, + const mindspore::CNodePtr &pre_node) { + MS_EXCEPTION_IF_NULL(kernel_graph); + MS_EXCEPTION_IF_NULL(pre_node); + auto clean_zero_prim = std::make_shared(kAtomicAddrCleanOpName); + auto new_value_node = NewValueNode(clean_zero_prim); + std::vector inputs = {new_value_node}; + CNodePtr clean_zero = kernel_graph->NewCNode(inputs); + AbstractBasePtr abstract = std::make_shared(); + clean_zero->set_abstract(abstract); + auto builder = std::make_shared(); + builder->SetKernelType(KernelType::TBE_KERNEL); + AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), clean_zero.get()); + auto clean_size = CalCleanSize(pre_node); + AnfAlgo::SetNodeAttr(kAttrAutomicAddMemSize, MakeValue(clean_size), clean_zero); + AnfAlgo::SetNodeAttr(kAttrAutomicOutputIndexs, MakeValue(g_output_idx), clean_zero); + AnfAlgo::SetStreamDistinctionLabel(AnfAlgo::GetStreamDistinctionLabel(pre_node.get()), clean_zero.get()); + return clean_zero; +} +} // namespace + +void AddAtomicClean(const std::shared_ptr &kernel_graph) { + MS_EXCEPTION_IF_NULL(kernel_graph); + auto mng = kernel_graph->manager(); + if (mng == nullptr) { + mng = Manage(kernel_graph, true); + kernel_graph->set_manager(mng); + } + auto &todos = kernel_graph->execution_order(); + for (auto iter = todos.cbegin(); iter != todos.end(); ++iter) { + auto node = *iter; + if (AnfAlgo::IsGraphKernel(node) && kernel_graph->nodes().contains(node)) { + auto fg = GetValueNode(node->input(kAnfPrimitiveIndex)); + MS_EXCEPTION_IF_NULL(fg); + auto input = fg->get_return()->input(1); + if (IsPrimitiveCNode(input, prim::kPrimMakeTuple)) { + const auto &cnode = input->cast(); + for (size_t i = 0; i < cnode->inputs().size(); ++i) { + if (HasAtomic(cnode->input(i))) { + g_output_idx.push_back(i - 1); + } + } + } else if (HasAtomic(input)) { + g_output_idx.push_back(0); + } + + if (!g_output_idx.empty()) { + auto zero_node = CreateTbeAtomicCleanNode(kernel_graph, node); + auto depend = kernel_graph->NewCNode({NewValueNode(prim::kPrimDepend), node->input(1), zero_node}); + std::vector new_input = node->inputs(); + new_input[1] = depend; + auto new_cnode = std::make_shared(new_input, kernel_graph); + // Set abstract + new_cnode->set_abstract(node->abstract()); + // Set kernel info + new_cnode->set_kernel_info(node->kernel_info_ptr()); + mng->Replace(node, new_cnode); + g_output_idx.clear(); + } + } + } +} +} // namespace opt +} // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/pass/add_atomic_clean.h b/mindspore/ccsrc/pre_activate/pass/add_atomic_clean.h new file mode 100644 index 00000000000..bb1edb0e359 --- /dev/null +++ b/mindspore/ccsrc/pre_activate/pass/add_atomic_clean.h @@ -0,0 +1,29 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_ADD_ATOMIC_CLEAN_H_ +#define MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_ADD_ATOMIC_CLEAN_H_ + +#include +#include "session/kernel_graph.h" + +namespace mindspore { +namespace opt { +void AddAtomicClean(const std::shared_ptr &kernel_graph); +} // namespace opt +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_ADD_ATOMIC_CLEAN_H diff --git a/mindspore/ccsrc/pre_activate/pass/common_subexpression_elimination.cc b/mindspore/ccsrc/pre_activate/pass/common_subexpression_elimination.cc index f8604d7638c..9af50eac330 100644 --- a/mindspore/ccsrc/pre_activate/pass/common_subexpression_elimination.cc +++ b/mindspore/ccsrc/pre_activate/pass/common_subexpression_elimination.cc @@ -45,6 +45,8 @@ bool BackendCSE::CheckReplace(const AnfNodePtr &main, const AnfNodePtr &node) co auto node_value = GetValueNode(node); if (main_value->isa() && node_value->isa()) { replace = false; + } else if (main_value->isa() && node_value->isa()) { + replace = (AbsOf(main) == AbsOf(node)) && CheckEqualKernelBuildInfo(main, node); } else { replace = (AbsOf(main) == AbsOf(node)) && (*main_value == *node_value); } diff --git a/mindspore/ccsrc/pre_activate/pass/convert_const_input_to_attr.cc b/mindspore/ccsrc/pre_activate/pass/convert_const_input_to_attr.cc index 1f9e2712a6a..38d629c4154 100644 --- a/mindspore/ccsrc/pre_activate/pass/convert_const_input_to_attr.cc +++ b/mindspore/ccsrc/pre_activate/pass/convert_const_input_to_attr.cc @@ -26,6 +26,7 @@ #include "utils/context/ms_context.h" #include "operator/ops.h" #include "session/anf_runtime_algorithm.h" +#include "kernel/common_utils.h" namespace mindspore { namespace opt { @@ -34,14 +35,24 @@ const AnfNodePtr ConvertConstInputToAttr::Process(const FuncGraphPtr &, const An if (node == nullptr || !AnfAlgo::IsRealCNodeKernel(node)) { return nullptr; } - CNodePtr cnode = node->cast(); - - ConstInputToAttrInfoRegister reg; - if (!ConstInputToAttrInfoRegistry::Instance().GetRegisterByOpName(AnfAlgo::GetCNodeName(cnode), ®)) { - return nullptr; + std::vector todos; + if (AnfAlgo::IsGraphKernel(node)) { + auto sub_graph = AnfAlgo::GetCNodeFuncGraphPtr(node); + MS_EXCEPTION_IF_NULL(sub_graph); + kernel::GetValidKernelNodes(sub_graph, &todos); + } else { + todos.push_back(node); } - ConstInputToAttr(cnode, reg.GetConstInputAttrInfo()); - return cnode; + + for (auto &t : todos) { + CNodePtr cnode = t->cast(); + ConstInputToAttrInfoRegister reg; + if (!ConstInputToAttrInfoRegistry::Instance().GetRegisterByOpName(AnfAlgo::GetCNodeName(cnode), ®)) { + continue; + } + ConstInputToAttr(cnode, reg.GetConstInputAttrInfo()); + } + return node; } } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/pass/convert_const_input_to_tensor_input.cc b/mindspore/ccsrc/pre_activate/pass/convert_const_input_to_tensor_input.cc index 56be2e273d0..b4f98cc6d71 100644 --- a/mindspore/ccsrc/pre_activate/pass/convert_const_input_to_tensor_input.cc +++ b/mindspore/ccsrc/pre_activate/pass/convert_const_input_to_tensor_input.cc @@ -17,15 +17,39 @@ #include #include +#include #include "utils/graph_utils.h" #include "pre_activate/common/helper.h" #include "session/anf_runtime_algorithm.h" #include "session/kernel_graph.h" +#include "kernel/common_utils.h" +#include "device/kernel_info.h" namespace mindspore { namespace opt { namespace { +ValueNodePtr MakeValueNode(const ValueNodePtr &value_node) { + MS_EXCEPTION_IF_NULL(value_node); + ValueNodePtr new_value_node = std::make_shared(value_node->value()); + new_value_node->set_abstract(value_node->abstract()); + // create kernel_info fo new value node + auto kernel_info = std::make_shared(); + new_value_node->set_kernel_info(kernel_info); + // create kernel_build_info for new value node + auto kernel_build_info_builder = std::make_shared(); + // set the format of value_node to DEFAULT_FORMAT + kernel_build_info_builder->SetOutputsFormat(std::vector{kOpFormat_DEFAULT}); + // set value node initial device data type = infer data type + std::vector types; + for (size_t index = 0; index < AnfAlgo::GetOutputTensorNum(value_node); ++index) { + types.push_back(kTypeUnknown); + } + kernel_build_info_builder->SetOutputsDeviceType(types); + AnfAlgo::SetSelectKernelBuildInfo(kernel_build_info_builder->Build(), new_value_node.get()); + return new_value_node; +} + AnfNodePtr CreateTensorInput(const KernelGraphPtr &kernel_graph, const AnfNodePtr &input_node) { MS_EXCEPTION_IF_NULL(input_node); auto value_node = input_node->cast(); @@ -50,6 +74,8 @@ AnfNodePtr CreateTensorInput(const KernelGraphPtr &kernel_graph, const AnfNodePt if (kernel_graph != nullptr) { tensor_input = kernel_graph->NewValueNode(tensor_input); kernel_graph->AddValueNodeToGraph(tensor_input); + } else { + tensor_input = MakeValueNode(tensor_input); } tensor_input->set_scope(input_node->scope()); return tensor_input; @@ -89,6 +115,26 @@ AnfNodePtr ConstInputToTensorInput(const FuncGraphPtr &func_graph, const CNodePt } return nullptr; } + +AnfNodePtr ProcessGraphKernelOp(const AnfNodePtr &node) { + auto sub_graph = AnfAlgo::GetCNodeFuncGraphPtr(node); + MS_EXCEPTION_IF_NULL(sub_graph); + auto mng = sub_graph->manager(); + MS_EXCEPTION_IF_NULL(mng); + std::vector todo; + std::vector> graph_rets; + kernel::GetValidKernelNodes(sub_graph, &todo); + kernel::GetGraphRealOutput(sub_graph, &graph_rets); + + for (auto &t : todo) { + auto t_new_node = ConstInputToTensorInput(sub_graph, t->cast()); + if (t_new_node != nullptr && t_new_node != t) { + (void)mng->Replace(t, t_new_node); + } + } + + return node; +} } // namespace const AnfNodePtr ConvertConstInputToTensorInput::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, @@ -96,8 +142,11 @@ const AnfNodePtr ConvertConstInputToTensorInput::Process(const FuncGraphPtr &fun if (node == nullptr || func_graph == nullptr || !AnfAlgo::IsRealCNodeKernel(node)) { return nullptr; } - CNodePtr cnode = node->cast(); - return ConstInputToTensorInput(func_graph, cnode); + if (AnfAlgo::IsGraphKernel(node)) { + return ProcessGraphKernelOp(node); + } else { + return ConstInputToTensorInput(func_graph, node->cast()); + } } } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/pass/convert_tuple_input_to_dynamic_input.cc b/mindspore/ccsrc/pre_activate/pass/convert_tuple_input_to_dynamic_input.cc index ab2395b1f5c..a03087c1a4a 100644 --- a/mindspore/ccsrc/pre_activate/pass/convert_tuple_input_to_dynamic_input.cc +++ b/mindspore/ccsrc/pre_activate/pass/convert_tuple_input_to_dynamic_input.cc @@ -21,10 +21,37 @@ #include "session/anf_runtime_algorithm.h" #include "pre_activate/common/helper.h" #include "session/kernel_graph.h" +#include "kernel/common_utils.h" +#include "device/kernel_info.h" namespace mindspore { namespace opt { namespace { +bool MakeValueNode(const AnfNodePtr &node) { + auto value_node = node->cast(); + if (value_node == nullptr) { + return false; + } + + // create kernel_info fo new value node + auto kernel_info = std::make_shared(); + value_node->set_kernel_info(kernel_info); + // create kernel_build_info for new value node + auto kernel_build_info_builder = std::make_shared(); + // set the format of value_node to DEFAULT_FORMAT + kernel_build_info_builder->SetOutputsFormat(std::vector{kOpFormat_DEFAULT}); + // set value node initial device data type = infer data type + TypeId infer_data_type; + if (AnfAlgo::GetOutputTensorNum(value_node) == 0) { + infer_data_type = kTypeUnknown; + } else { + infer_data_type = AnfAlgo::GetOutputInferDataType(value_node, 0); + } + kernel_build_info_builder->SetOutputsDeviceType(std::vector{infer_data_type}); + AnfAlgo::SetSelectKernelBuildInfo(kernel_build_info_builder->Build(), value_node.get()); + return true; +} + void ConvertTupleOuputToPlantInputs(const FuncGraphPtr &graph, const AnfNodePtr &input_node, std::vector *plant_inputs, std::vector *dyn_input_sizes) { MS_EXCEPTION_IF_NULL(plant_inputs); @@ -50,12 +77,12 @@ void ConvertTupleOuputToPlantInputs(const FuncGraphPtr &graph, const AnfNodePtr (void)std::copy(convert_inputs.begin(), convert_inputs.end(), std::back_inserter(*plant_inputs)); } -CNodePtr ConvertMakeTupleInputToPlantInputs(const FuncGraphPtr &graph, const CNodePtr &cnode_ptr) { +void ConvertMakeTupleInputToPlantInputs(const FuncGraphPtr &graph, const CNodePtr &cnode_ptr) { MS_EXCEPTION_IF_NULL(cnode_ptr); MS_EXCEPTION_IF_NULL(graph); auto &ori_args = cnode_ptr->inputs(); if (ori_args.size() < 1) { - return nullptr; + return; } std::vector plant_inputs; std::vector dyn_input_sizes; @@ -68,7 +95,16 @@ CNodePtr ConvertMakeTupleInputToPlantInputs(const FuncGraphPtr &graph, const CNo auto cnode = input_node->cast(); MS_EXCEPTION_IF_NULL(cnode); auto inputs = cnode->inputs(); - (void)std::copy(inputs.begin() + 1, inputs.end(), std::back_inserter(plant_inputs)); + for (size_t j = 1; j < inputs.size(); ++j) { + MS_EXCEPTION_IF_NULL(inputs[j]); + if (IsValueNode(inputs[j])) { + auto success = MakeValueNode(inputs[j]); + if (!success) { + MS_LOG(WARNING) << "Make value node failed, " << inputs[j]->DebugString(); + } + } + plant_inputs.push_back(inputs[j]); + } } else if (input_node->Type() != nullptr && AnfAlgo::IsTupleOutput(input_node)) { ConvertTupleOuputToPlantInputs(graph, input_node, &plant_inputs, &dyn_input_sizes); } else { @@ -81,7 +117,6 @@ CNodePtr ConvertMakeTupleInputToPlantInputs(const FuncGraphPtr &graph, const CNo AnfAlgo::SetNodeAttr(kAttrDynInputSizes, MakeValue(dyn_input_sizes), cnode_ptr); cnode_ptr->set_inputs(plant_inputs); } - return cnode_ptr; } } // namespace @@ -96,7 +131,18 @@ const AnfNodePtr ConvertTupleInputToDynamicInput::Process(const FuncGraphPtr &fu if (node == nullptr || !node->isa() || !AnfAlgo::IsRealKernel(node)) { return nullptr; } - return ConvertMakeTupleInputToPlantInputs(func_graph, node->cast()); + if (AnfAlgo::IsGraphKernel(node)) { + auto sub_graph = AnfAlgo::GetCNodeFuncGraphPtr(node); + MS_EXCEPTION_IF_NULL(sub_graph); + std::vector todos; + kernel::GetValidKernelNodes(sub_graph, &todos); + for (auto &t : todos) { + ConvertMakeTupleInputToPlantInputs(sub_graph, t->cast()); + } + } else { + ConvertMakeTupleInputToPlantInputs(func_graph, node->cast()); + } + return node; } } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/pass/eliminate_redundant_op.cc b/mindspore/ccsrc/pre_activate/pass/eliminate_redundant_op.cc index 2fc971881d6..4d3dcfccc00 100644 --- a/mindspore/ccsrc/pre_activate/pass/eliminate_redundant_op.cc +++ b/mindspore/ccsrc/pre_activate/pass/eliminate_redundant_op.cc @@ -18,10 +18,12 @@ #include #include #include +#include #include "session/anf_runtime_algorithm.h" #include "utils/utils.h" #include "pre_activate/common/helper.h" #include "operator/ops.h" +#include "kernel/common_utils.h" namespace mindspore { namespace opt { @@ -125,13 +127,7 @@ void EliminateRedundantOp::Init() { kTransDataOpName, std::pair(kTransDataOpName, TransDataOpEliminateCondition))); } -const AnfNodePtr EliminateRedundantOp::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, - const EquivPtr &) const { - MS_EXCEPTION_IF_NULL(node); - auto cnode = node->cast(); - if (cnode == nullptr || func_graph == nullptr) { - return nullptr; - } +const AnfNodePtr EliminateRedundantOp::DoEliminate(const FuncGraphPtr &func_graph, const CNodePtr &cnode) const { // match the first name auto name1 = AnfAlgo::GetCNodeName(cnode); auto it = redundant_process_map_.find(name1); @@ -160,5 +156,35 @@ const AnfNodePtr EliminateRedundantOp::Process(const FuncGraphPtr &func_graph, c return ProcessMatchedNodes(func_graph, cnode, prev_cnode, &pass_vector); } + +const AnfNodePtr EliminateRedundantOp::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, + const EquivPtr &) const { + MS_EXCEPTION_IF_NULL(node); + auto cnode = node->cast(); + if (cnode == nullptr || func_graph == nullptr) { + return nullptr; + } + + if (AnfAlgo::IsGraphKernel(node)) { + // do eliminate for ops in graph kernel. + auto sub_graph = AnfAlgo::GetCNodeFuncGraphPtr(node); + MS_EXCEPTION_IF_NULL(sub_graph); + auto mng = sub_graph->manager(); + MS_EXCEPTION_IF_NULL(mng); + std::vector todo; + kernel::GetValidKernelNodes(sub_graph, &todo); + for (auto &t : todo) { + CNodePtr t_cnode = t->cast(); + MS_EXCEPTION_IF_NULL(t_cnode); + auto t_new_node = DoEliminate(sub_graph, t_cnode); + if (t_new_node != nullptr && t_new_node != t) { + (void)mng->Replace(t, t_new_node); + } + } + return node; + } + // do eliminate for single op. + return DoEliminate(func_graph, cnode); +} } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/pass/eliminate_redundant_op.h b/mindspore/ccsrc/pre_activate/pass/eliminate_redundant_op.h index 9e0dacecb18..c44190f6459 100644 --- a/mindspore/ccsrc/pre_activate/pass/eliminate_redundant_op.h +++ b/mindspore/ccsrc/pre_activate/pass/eliminate_redundant_op.h @@ -40,6 +40,7 @@ class EliminateRedundantOp : public PatternProcessPass { private: void Init(); + const AnfNodePtr DoEliminate(const FuncGraphPtr &func_graph, const CNodePtr &cnode) const; std::unordered_map redundant_process_map_; }; } // namespace opt diff --git a/mindspore/ccsrc/pre_activate/pass/erase_visit_attr.cc b/mindspore/ccsrc/pre_activate/pass/erase_visit_attr.cc index 4ea817df852..3b566b4f7cb 100644 --- a/mindspore/ccsrc/pre_activate/pass/erase_visit_attr.cc +++ b/mindspore/ccsrc/pre_activate/pass/erase_visit_attr.cc @@ -16,6 +16,8 @@ #include "pre_activate/pass/erase_visit_attr.h" #include +#include +#include "kernel/common_utils.h" #include "session/anf_runtime_algorithm.h" #include "pre_activate/common/helper.h" @@ -28,7 +30,20 @@ const BaseRef EraseVisitAttr::DefinePattern() const { } const AnfNodePtr EraseVisitAttr::Process(const FuncGraphPtr &, const AnfNodePtr &node, const EquivPtr &) const { - AnfAlgo::EraseNodeAttr(kAttrVisited, node); + if (node != nullptr && AnfAlgo::IsRealCNodeKernel(node)) { + if (AnfAlgo::IsGraphKernel(node)) { + auto fg = AnfAlgo::GetCNodeFuncGraphPtr(node); + MS_EXCEPTION_IF_NULL(fg); + std::vector todos; + kernel::GetValidKernelNodes(fg, &todos); + for (auto &t : todos) { + AnfAlgo::EraseNodeAttr(kAttrVisited, t); + } + } + AnfAlgo::EraseNodeAttr(kAttrVisited, node); + } else { + AnfAlgo::EraseNodeAttr(kAttrVisited, node); + } return nullptr; } } // namespace opt diff --git a/mindspore/ccsrc/pre_activate/pass/fuse_basic.cc b/mindspore/ccsrc/pre_activate/pass/fuse_basic.cc new file mode 100644 index 00000000000..84edd5c5e2f --- /dev/null +++ b/mindspore/ccsrc/pre_activate/pass/fuse_basic.cc @@ -0,0 +1,222 @@ + +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "pre_activate/pass/fuse_basic.h" +#include "pre_activate/pass/fuse_graph_kernel.h" + +#include +#include +#include +#include +#include +#include + +#include "operator/ops.h" +#include "utils/utils.h" +#include "utils/graph_utils.h" +#include "pre_activate/common/helper.h" +#include "session/anf_runtime_algorithm.h" +#include "vm/segment_runner.h" +#include "debug/draw.h" +#include "debug/anf_ir_dump.h" +#include "ir/func_graph_cloner.h" + +namespace mindspore { +namespace opt { +namespace { +std::vector get_fusable_basic_ops(bool is_before_kernel_select) { + std::vector fusable_basic_ops = {prim::kPrimTensorAdd, prim::kPrimMul, prim::kPrimSub, + prim::kPrimExpandDims}; + if (!is_before_kernel_select) { + fusable_basic_ops.push_back(prim::kPrimCast); + } + return fusable_basic_ops; +} + +IncludeType IncludeFusedBasicOpForward(const AnfNodePtr &cur_node, const GraphKernelInfo &info, + const AnfNodePtr &node) { + if (cur_node == node) { + return FOLLOW; + } + if (!IsPrimitiveCNode(node)) { + return EXCLUDE; + } + + auto fusable_basic_ops = get_fusable_basic_ops(info.is_before_kernel_select); + bool is_fusable = std::any_of(fusable_basic_ops.begin(), fusable_basic_ops.end(), + [&node](const PrimitivePtr &prim) { return IsPrimitiveCNode(node, prim); }); + + return is_fusable ? FOLLOW : EXCLUDE; +} + +std::vector FindFuseCNodes(const CNodePtr &cnode, bool is_before_kernel_select) { + GraphKernelInfo info; + info.is_before_kernel_select = is_before_kernel_select; + // Search fusable nodes according input direction. + auto include_func_forward = std::bind(IncludeFusedBasicOpForward, cnode, info, std::placeholders::_1); + auto used_nodes = DeepLinkedGraphSearch(cnode, include_func_forward); + if (used_nodes.size() > 1) { + used_nodes = RemoveCircle(used_nodes, false); + } + TopoSortForNodeList(&used_nodes); + return used_nodes; +} + +void RemoveControlDependOut(const FuncGraphPtr &fg, AnfNodePtrList *outputs, const FuncGraphManagerPtr &mng) { + AnfNodeSet outputs_set; + for (auto out : *outputs) { + outputs_set.insert(out); + } + + AnfNodePtrList vir_outputs; + std::unordered_map eqv; + auto fg_outputs = fg->output(); + if (IsPrimitiveCNode(fg_outputs, prim::kPrimMakeTuple)) { + auto cnode = fg_outputs->cast(); + for (size_t i = 1; i < cnode->size(); ++i) { + vir_outputs.push_back(cnode->input(i)); + } + } else { + vir_outputs.push_back(fg_outputs); + } + + if (vir_outputs.size() != outputs->size()) { + MS_LOG(EXCEPTION) << "The size of virtual output of the fg is not the same with the real output"; + } + bool has_erase_outs = false; + size_t index = -1; + for (auto it = outputs->begin(); it != outputs->end();) { + index++; + auto out = *it; + eqv[out] = vir_outputs[index]; + auto users = mng->node_users()[out]; + bool is_only_control_depend_use = true; + std::vector control_depend_use_index; + std::vector control_depend_nodes; + AnfNodePtr use_out = nullptr; + for (auto &user : users) { + auto use_node = user.first; + if (outputs_set.count(use_node) == 0 && !(IsPrimitiveCNode(use_node, prim::kPrimControlDepend))) { + is_only_control_depend_use = false; + continue; + } + if (outputs_set.count(use_node) != 0) { + use_out = use_node; + } + + if (IsPrimitiveCNode(use_node, prim::kPrimControlDepend)) { + control_depend_nodes.push_back(use_node->cast()); + control_depend_use_index.push_back(user.second); + } + } + + if (is_only_control_depend_use && !control_depend_nodes.empty()) { + MS_EXCEPTION_IF_NULL(use_out); + it = outputs->erase(it); + for (size_t i = 0; i < control_depend_nodes.size(); ++i) { + auto control_depend_node = control_depend_nodes[i]; + std::vector new_control_depend_inputs; + for (size_t j = 0; j < control_depend_node->size(); ++j) { + if (j == control_depend_use_index[i]) { + new_control_depend_inputs.push_back(use_out); + } else { + new_control_depend_inputs.push_back(control_depend_node->input(j)); + } + } + auto new_control_depend = control_depend_node->func_graph()->NewCNode(new_control_depend_inputs); + mng->Replace(control_depend_node, new_control_depend); + has_erase_outs = true; + } + } else { + it++; + } + } + + if (!has_erase_outs) { + return; + } + + AnfNodePtr fg_new_output; + if (outputs->size() > 1) { + std::vector output_args; + output_args.push_back(NewValueNode(prim::kPrimMakeTuple)); + (void)std::transform(std::begin(*outputs), std::end(*outputs), std::back_inserter(output_args), + [&eqv](const AnfNodePtr &o) -> AnfNodePtr { return eqv[o]; }); + // Set output for AnfGraph + fg_new_output = fg->NewCNode(output_args); + } else { + fg_new_output = eqv[(*outputs)[0]]; + } + fg->set_output(fg_new_output, true); +} + +void FuseBasic(const std::shared_ptr &kernel_graph, const std::vector &todos, + std::unordered_set *fused_ops, bool is_before_kernel_select) { + auto mng = kernel_graph->manager(); + for (auto iter = todos.cbegin(); iter != todos.cend(); ++iter) { + auto node = (*iter)->cast(); + if (node == nullptr) { + continue; + } + if (fused_ops->count(node)) { + continue; + } + auto fusable_basic_ops = get_fusable_basic_ops(is_before_kernel_select); + bool is_basic_op = std::any_of(fusable_basic_ops.begin(), fusable_basic_ops.end(), + [&node](const PrimitivePtr &prim) { return IsPrimitiveCNode(node, prim); }); + if (!is_basic_op || !kernel_graph->nodes().contains(node)) { + continue; + } + + auto fuse_nodes = FindFuseCNodes(node, is_before_kernel_select); + if (fuse_nodes.size() <= 1) { + continue; + } + + FuncGraphPtr fg; + AnfNodePtrList inputs; + AnfNodePtrList outputs; + std::tie(fg, inputs, outputs) = compile::TransformSegmentToAnfGraph(fuse_nodes); + RemoveControlDependOut(fg, &outputs, mng); + auto fuse_new_node = CreateNewFuseCNode(kernel_graph, fg, inputs, outputs, is_before_kernel_select); + + ReplaceNewFuseCNode(kernel_graph, fuse_new_node, outputs); + + // Set graph kernel attr + std::string fuse_op_name = ""; + for (auto &fuse_node : fuse_nodes) { + fuse_op_name += AnfAlgo::GetCNodePrimitive(fuse_node)->name() + "_"; + } + fused_ops->insert(fuse_nodes.begin(), fuse_nodes.end()); + fg->set_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL, MakeValue(fuse_op_name)); + } +} +} // namespace + +void FuseBasic(const std::shared_ptr &kernel_graph, bool is_before_kernel_select) { + MS_EXCEPTION_IF_NULL(kernel_graph); + auto mng = kernel_graph->manager(); + if (mng == nullptr) { + mng = Manage(kernel_graph, true); + kernel_graph->set_manager(mng); + } + std::unordered_set fused_ops; + auto todos = TopoSort(kernel_graph->get_return()); + std::reverse(todos.begin(), todos.end()); + FuseBasic(kernel_graph, todos, &fused_ops, is_before_kernel_select); +} +} // namespace opt +} // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/pass/fuse_basic.h b/mindspore/ccsrc/pre_activate/pass/fuse_basic.h new file mode 100644 index 00000000000..fbbf5d99370 --- /dev/null +++ b/mindspore/ccsrc/pre_activate/pass/fuse_basic.h @@ -0,0 +1,29 @@ + +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_FUSE_BASIC_H_ +#define MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_FUSE_BASIC_H_ + +#include +#include "pre_activate/common/optimizer.h" +#include "session/kernel_graph.h" + +namespace mindspore { +namespace opt { +void FuseBasic(const std::shared_ptr &kernel_graph, bool is_before_kernel_select); +} // namespace opt +} // namespace mindspore +#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_FUSE_BASIC_H_ diff --git a/mindspore/ccsrc/pre_activate/pass/fuse_graph_kernel.cc b/mindspore/ccsrc/pre_activate/pass/fuse_graph_kernel.cc new file mode 100644 index 00000000000..591b2103353 --- /dev/null +++ b/mindspore/ccsrc/pre_activate/pass/fuse_graph_kernel.cc @@ -0,0 +1,562 @@ + +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "pre_activate/pass/fuse_graph_kernel.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "operator/ops.h" +#include "utils/utils.h" +#include "utils/graph_utils.h" +#include "pre_activate/common/helper.h" +#include "session/anf_runtime_algorithm.h" +#include "vm/segment_runner.h" +#include "debug/draw.h" +#include "debug/anf_ir_dump.h" +#include "ir/func_graph_cloner.h" + +namespace mindspore { +namespace opt { +std::vector get_fusable_basic_ops(bool is_before_kernel_select) { + std::vector fusable_basic_ops = { + prim::kPrimAddN, prim::kPrimTensorAdd, prim::kPrimMul, prim::kPrimSub, prim::kPrimMaximum, + prim::kPrimMinimum, prim::kPrimNeg, prim::kPrimRealDiv, prim::kPrimPow, prim::kPrimSqrt, + prim::kPrimReciprocal, prim::kPrimExpandDims, prim::kPrimLessEqual}; + if (!is_before_kernel_select) { + fusable_basic_ops.push_back(prim::kPrimCast); + } + return fusable_basic_ops; +} + +std::vector get_fusable_basic_ops_with_reduce(bool is_before_kernel_select) { + std::vector fusable_basic_ops_with_reduce; + if (!is_before_kernel_select) { + fusable_basic_ops_with_reduce.push_back(prim::kPrimCast); + } + return fusable_basic_ops_with_reduce; +} + +std::vector get_reduce_ops() { + std::vector reduce_ops = {prim::kPrimReduceSum, prim::kPrimReduceMean, prim::kPrimReduceMin, + prim::kPrimReduceMax, prim::kPrimReduceAll}; + return reduce_ops; +} + +void GetGraphKernelInfo(const FuncGraphPtr fg, GraphKernelInfo *info) { + MS_EXCEPTION_IF_NULL(fg); + auto reduce_ops = get_reduce_ops(); + const auto &nodes = fg->nodes(); + info->op_type = ELEWISE; + info->cal_step = -1; + info->reduce_op_num = 0; + for (auto node : nodes) { + auto cnode = node->cast(); + if (cnode == nullptr) { + continue; + } + info->cal_step++; + auto prim = GetValueNode(cnode->input(0)); + if (prim != nullptr) { + bool is_reudce = std::any_of(reduce_ops.begin(), reduce_ops.end(), [&prim](const PrimitivePtr &op) { + return op->hash() == prim->hash() && op->name() == prim->name(); + }); + if (is_reudce) { + info->op_type = REDUCE; + info->reduce_op_num++; + } + } + } +} + +bool IsFuse(const GraphKernelInfo &info, const AnfNodePtr &node) { + auto fusable_basic_ops = get_fusable_basic_ops(info.is_before_kernel_select); + auto fusable_basic_ops_with_reduce = get_fusable_basic_ops_with_reduce(info.is_before_kernel_select); + bool is_fusable = false; + if (info.op_type == REDUCE && + (info.cal_step >= MAX_REDUCE_OP_FUSION_CAL_STEP || info.reduce_op_num >= MAX_REDUCE_OP_FUSION_REDUCE_NUM)) { + is_fusable = std::any_of(fusable_basic_ops_with_reduce.begin(), fusable_basic_ops_with_reduce.end(), + [&node](const PrimitivePtr &prim) { return IsPrimitiveCNode(node, prim); }); + } else { + is_fusable = std::any_of(fusable_basic_ops.begin(), fusable_basic_ops.end(), + [&node](const PrimitivePtr &prim) { return IsPrimitiveCNode(node, prim); }); + } + + return is_fusable; +} + +IncludeType IncludeFusedBasicOpForward(const AnfNodePtr &cur_node, const GraphKernelInfo &info, + const AnfNodePtr &node) { + if (cur_node == node) { + return FOLLOW; + } + if (!IsPrimitiveCNode(node)) { + return EXCLUDE; + } + + bool is_fusable = IsFuse(info, node); + return is_fusable ? FOLLOW : EXCLUDE; +} + +IncludeType IncludeFusedBasicOpBackward(const AnfNodePtr &cur_node, const GraphKernelInfo &info, + const AnfNodePtr &node) { + if (cur_node == node) { + return FOLLOW; + } + if (AnfAlgo::IsGraphKernel(node)) { + auto cnode = node->cast(); + auto fg = GetValueNode(cnode->input(kAnfPrimitiveIndex)); + auto fg_attr_val = fg->get_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL); + MS_EXCEPTION_IF_NULL(fg_attr_val); + auto fg_attr = GetValue(fg_attr_val); + if (fg_attr == kApplyMomentumOpName) { + return FOLLOW; + } + return EXCLUDE; + } + if (!IsPrimitiveCNode(node)) { + return EXCLUDE; + } + + bool is_fusable = IsFuse(info, node); + return is_fusable ? FOLLOW : EXCLUDE; +} + +bool CheckCircle(const std::set &fused_op_set, const AnfNodePtr &check_node, + std::set *cached_unconnected_set) { + if (!check_node->isa() || AnfAlgo::IsGraphKernel(check_node)) { + return false; + } + + auto cnode = check_node->cast(); + const auto &inputs = cnode->inputs(); + // there is a input not in fused_op_set, but the input depends on the fused_op_set + bool has_circle = false; + for (auto input : inputs) { + if (input->isa() && !fused_op_set.count(input)) { + std::set done; + std::vector todos = {input}; + while (!todos.empty()) { + auto node = todos.back(); + todos.pop_back(); + if (done.count(node) || cached_unconnected_set->count(node)) { + continue; + } + + done.insert(node); + if (fused_op_set.count(node)) { + has_circle = true; + break; + } + + if (node->isa()) { + auto cnode_ptr = node->cast(); + for (auto it : cnode_ptr->inputs()) { + if (it->isa()) { + todos.push_back(it); + } + } + } + } + + if (has_circle) { + return true; + } + cached_unconnected_set->insert(done.begin(), done.end()); + } + } + + return false; +} + +bool IsMakeTupleOut(const AnfNodePtr &out, AnfNodePtrList *real_outs) { + if (IsPrimitiveCNode(out, prim::kPrimMakeTuple)) { + auto &inputs = out->cast()->inputs(); + for (size_t i = 1; i < inputs.size(); ++i) { + real_outs->push_back(inputs[i]); + } + return true; + } + + if (AnfAlgo::GetCNodeFuncGraphPtr(out) != nullptr) { + auto fg = AnfAlgo::GetCNodeFuncGraphPtr(out); + auto fg_out = fg->output(); + if (IsPrimitiveCNode(fg_out, prim::kPrimMakeTuple)) { + auto inputs = fg_out->cast()->inputs(); + for (size_t i = 1; i < inputs.size(); ++i) { + real_outs->push_back(inputs[i]); + } + return true; + } + } + return false; +} + +std::vector RemoveCircle(const std::vector &fused_op, bool is_backward) { + std::set cached_unconnected_set; + std::set fused_op_set(fused_op.begin(), fused_op.end()); + auto include = [&fused_op_set](const AnfNodePtr &node) { + if (fused_op_set.count(node)) { + return FOLLOW; + } + return EXCLUDE; + }; + for (auto iter = fused_op.rbegin(); iter != fused_op.rend(); ++iter) { + bool has_circle = CheckCircle(fused_op_set, *iter, &cached_unconnected_set); + // delete the circle node and the node which depend on the circle node in fused op + if (has_circle) { + auto mng = (*iter)->func_graph()->manager(); + std::vector erase_nodes; + if (is_backward) { + erase_nodes = DeepUsersSearch(*iter, include, mng); + } else { + erase_nodes = DeepLinkedGraphSearch(*iter, include); + } + for (auto erase_node : erase_nodes) { + fused_op_set.erase(erase_node); + } + } + } + + std::vector res; + for (auto node : fused_op) { + if (fused_op_set.count(node)) { + res.push_back(node); + } + } + return res; +} + +void TopoSortForNodeList(std::vector *lst) { + if (lst->size() < 2) { + return; + } + + std::vector res; + std::set node_sets(lst->begin(), lst->end()); + std::map> ins; + std::map> outs; + std::queue q; + for (auto node : *lst) { + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + for (auto input : cnode->inputs()) { + if (!node_sets.count(input)) { + continue; + } + // out_degree + outs[input].insert(node); + // in_degree + ins[node].insert(input); + } + if (!ins.count(node)) { + ins[node] = {}; + } + } + + for (auto p : ins) { + if (p.second.size() == 0) { + q.push(p.first); + } + } + + while (!q.empty()) { + auto node = q.front(); + q.pop(); + res.push_back(node); + if (!outs.count(node)) { + continue; + } + for (auto out : outs[node]) { + if (!ins.count(out)) { + continue; + } + ins[out].erase(node); + if (ins[out].size() == 0) { + q.push(out); + } + } + } + + lst->assign(res.begin(), res.end()); +} + +std::vector FindFuseCNodes(const CNodePtr &cnode, bool is_before_kernel_select) { + auto func_graph = cnode->func_graph(); + auto graph_kernel_g = GetValueNode(cnode->input(0)); + GraphKernelInfo info; + info.is_before_kernel_select = is_before_kernel_select; + GetGraphKernelInfo(graph_kernel_g, &info); + auto mng = func_graph->manager(); + // Search fusable nodes according input direction. + auto include_func_forward = std::bind(IncludeFusedBasicOpForward, cnode, info, std::placeholders::_1); + auto used_nodes = DeepLinkedGraphSearch(cnode, include_func_forward); + std::reverse(used_nodes.begin(), used_nodes.end()); + // Search fusable nodes according output direction. + auto include_func_backward = std::bind(IncludeFusedBasicOpBackward, cnode, info, std::placeholders::_1); + auto user_nodes = DeepUsersSearch(cnode, include_func_backward, mng); + + used_nodes.insert(used_nodes.end(), user_nodes.begin() + 1, user_nodes.end()); + if (used_nodes.size() > 1) { + used_nodes = RemoveCircle(used_nodes); + } + TopoSortForNodeList(&used_nodes); + return used_nodes; +} + +AbstractBasePtr GetOutputAbstract(const AnfNodePtr &node, size_t output_idx) { + auto out_spec = node->abstract(); + if (out_spec->isa()) { + return out_spec->cast()->elements()[output_idx]; + } + return out_spec; +} + +AnfNodePtr CreateNewFuseCNode(const std::shared_ptr &kernel_graph, const FuncGraphPtr &fg, + const AnfNodePtrList &inputs, const AnfNodePtrList &outputs, + bool is_before_kernel_select) { + auto func_node = NewValueNode(fg); + std::vector fn_inputs; + fn_inputs.push_back(func_node); + fn_inputs.insert(fn_inputs.end(), inputs.begin(), inputs.end()); + auto fuse_cnode = kernel_graph->NewCNode(fn_inputs); + // Set output abstract + if (outputs.size() > 1) { + std::vector out_specs; + for (size_t i = 0; i < outputs.size(); ++i) { + out_specs.push_back(outputs[i]->abstract()); + } + auto out_spec = std::make_shared(out_specs); + fuse_cnode->set_abstract(out_spec); + } else { + fuse_cnode->set_abstract(outputs[0]->abstract()); + } + // Set parameter abstract. + for (size_t i = 0; i < inputs.size(); ++i) { + auto kernel_with_index = AnfAlgo::VisitKernel(inputs[i], 0); + auto input_abs = GetOutputAbstract(kernel_with_index.first, kernel_with_index.second); + fg->parameters()[i]->set_abstract(input_abs); + if (is_before_kernel_select) { + fg->parameters()[i]->set_kernel_info(std::make_shared()); + } + } + // Set kernel info. + if (!is_before_kernel_select) { + std::vector graph_input_format; + std::vector graph_input_type; + std::vector graph_output_format; + std::vector graph_output_type; + for (size_t i = 0; i < inputs.size(); ++i) { + auto kernel_with_index = AnfAlgo::VisitKernel(inputs[i], 0); + auto input_format = AnfAlgo::GetOutputFormat(kernel_with_index.first, kernel_with_index.second); + graph_input_format.push_back(input_format); + auto input_type = AnfAlgo::GetOutputDeviceDataType(kernel_with_index.first, kernel_with_index.second); + graph_input_type.push_back(input_type); + auto input_abs = GetOutputAbstract(kernel_with_index.first, kernel_with_index.second); + fg->parameters()[i]->set_abstract(input_abs); + } + auto new_outputs = outputs; + if (outputs.size() == 1 && AnfAlgo::IsGraphKernel(outputs[0])) { + std::vector real_outs; + if (IsMakeTupleOut(outputs[0], &real_outs)) { + new_outputs = real_outs; + } + } + for (size_t i = 0; i < new_outputs.size(); ++i) { + auto kernel_with_index = AnfAlgo::VisitKernel(new_outputs[i], 0); + auto output_format = AnfAlgo::GetOutputFormat(kernel_with_index.first, kernel_with_index.second); + auto output_type = AnfAlgo::GetOutputDeviceDataType(kernel_with_index.first, kernel_with_index.second); + graph_output_format.push_back(output_format); + graph_output_type.push_back(output_type); + } + kernel::KernelBuildInfo::KernelBuildInfoBuilder graph_info_builder; + graph_info_builder.SetInputsFormat(graph_input_format); + graph_info_builder.SetInputsDeviceType(graph_input_type); + graph_info_builder.SetOutputsFormat(graph_output_format); + graph_info_builder.SetOutputsDeviceType(graph_output_type); + graph_info_builder.SetProcessor(kernel::Processor::AICORE); + graph_info_builder.SetKernelType(KernelType::AKG_KERNEL); + graph_info_builder.SetFusionType(kernel::FusionType::OPAQUE); + auto graph_selected_info = graph_info_builder.Build(); + AnfAlgo::SetSelectKernelBuildInfo(graph_selected_info, fuse_cnode.get()); + } + return fuse_cnode; +} + +void ReplaceNewFuseCNode(const std::shared_ptr &kernel_graph, const AnfNodePtr &new_fuse_cnode, + const AnfNodePtrList &outputs) { + MS_EXCEPTION_IF_NULL(kernel_graph); + auto mng = kernel_graph->manager(); + MS_EXCEPTION_IF_NULL(mng); + // single out + if (outputs.size() == 1) { + mng->Replace(outputs[0], new_fuse_cnode); + return; + } + + std::vector fn_inputs; + for (size_t out_idx = 0; out_idx < outputs.size(); out_idx++) { + AnfNodePtrList real_outs; + // not make tuple out, replace + if (!IsMakeTupleOut(outputs[out_idx], &real_outs)) { + fn_inputs.clear(); + fn_inputs.push_back(NewValueNode(prim::kPrimTupleGetItem)); + fn_inputs.push_back(new_fuse_cnode); + fn_inputs.push_back(NewValueNode(MakeValue(SizeToInt(out_idx)))); + auto new_out = kernel_graph->NewCNode(fn_inputs); + new_out->set_abstract(outputs[out_idx]->abstract()); + mng->Replace(outputs[out_idx], new_out); + continue; + } + + // the out is make tuple , modify the get_item node's value + auto users = mng->node_users()[outputs[out_idx]]; + for (auto &user : users) { + auto use_node = user.first; + if (use_node->isa() && (IsPrimitiveCNode(use_node, prim::kPrimTupleGetItem))) { + auto get_item_cnode = use_node->cast(); + auto value_input = get_item_cnode->input(kInputNodeOutputIndexInTupleGetItem); + MS_EXCEPTION_IF_NULL(value_input); + auto value_node = value_input->cast(); + MS_EXCEPTION_IF_NULL(value_node); + int item_idx = GetValue(value_node->value()); + int new_item_idx = SizeToInt(out_idx) + item_idx; + fn_inputs.clear(); + fn_inputs.push_back(NewValueNode(prim::kPrimTupleGetItem)); + fn_inputs.push_back(new_fuse_cnode); + fn_inputs.push_back(NewValueNode(new_item_idx)); + auto new_out = kernel_graph->NewCNode(fn_inputs); + new_out->set_abstract(get_item_cnode->abstract()); + mng->Replace(get_item_cnode, new_out); + } + } + } +} + +AnfNodePtrList EliminateMakeTuple(FuncGraphPtr *fg, FuncGraphManagerPtr *mng) { + AnfNodePtrList outs; + auto out_node = (*fg)->output(); + if (IsPrimitiveCNode(out_node, prim::kPrimMakeTuple)) { + std::vector output_args; + auto out_cnode = out_node->cast(); + for (auto out : out_cnode->inputs()) { + if (IsPrimitiveCNode(out, prim::kPrimMakeTuple)) { + auto inputs = out->cast()->inputs(); + for (size_t i = 1; i < inputs.size(); ++i) { + output_args.push_back(inputs[i]); + } + } else { + output_args.push_back(out); + } + } + if (output_args.size() != out_cnode->inputs().size()) { + auto new_out = (*fg)->NewCNode(output_args); + (*mng)->Replace(out_node, new_out); + } + + for (size_t i = 1; i < output_args.size(); ++i) { + outs.push_back(output_args[i]); + } + return outs; + } + + outs.push_back(out_node); + return outs; +} + +AnfNodePtrList GetExpandOuts(const AnfNodePtrList &outs) { + AnfNodePtrList res; + if (outs.size() <= 1) { + return outs; + } + + for (auto out : outs) { + AnfNodePtrList real_outs; + if (IsMakeTupleOut(out, &real_outs)) { + res.insert(res.end(), real_outs.begin(), real_outs.end()); + continue; + } + res.push_back(out); + } + return res; +} + +void FuseGraphKernel(const std::shared_ptr &kernel_graph, bool is_before_kernel_select) { + MS_EXCEPTION_IF_NULL(kernel_graph); + auto mng = kernel_graph->manager(); + if (mng == nullptr) { + mng = Manage(kernel_graph, true); + kernel_graph->set_manager(mng); + } + auto &todos = kernel_graph->execution_order(); + for (auto iter = todos.cbegin(); iter != todos.cend(); ++iter) { + auto node = *iter; + if (!AnfAlgo::IsGraphKernel(node) || !kernel_graph->nodes().contains(node)) { + continue; + } + + auto origin_fg = AnfAlgo::GetCNodeFuncGraphPtr(node); + auto fg_attr = origin_fg->get_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL); + if (fg_attr != nullptr) { + auto fg_name = GetValue(fg_attr); + if (graph_kernel_black_list.count(fg_name) != 0) { + continue; + } + } + + auto fuse_nodes = FindFuseCNodes(node, is_before_kernel_select); + if (fuse_nodes.size() <= 1) { + continue; + } + + FuncGraphPtr fg; + AnfNodePtrList inputs; + AnfNodePtrList outputs; + std::tie(fg, inputs, outputs) = compile::TransformSegmentToAnfGraph(fuse_nodes); + + // Remove nest make tuple in outs + auto expand_out = GetExpandOuts(outputs); + auto fuse_new_node = CreateNewFuseCNode(kernel_graph, fg, inputs, expand_out, is_before_kernel_select); + + ReplaceNewFuseCNode(kernel_graph, fuse_new_node, outputs); + + // Inline origin graphkernel + auto cnodes = fg->GetOrderedCnodes(); + for (const auto &n : cnodes) { + if (!AnfAlgo::IsGraphKernel(n)) { + continue; + } + auto graph_kernel_g = GetValueNode(n->input(0)); + AnfNodePtrList ins; + ins.insert(ins.end(), n->inputs().begin() + 1, n->inputs().end()); + auto out = InlineClone(graph_kernel_g, fg, ins, n->input(0)->scope()); + mng->Replace(n, out); + } + + EliminateMakeTuple(&fg, &mng); + // Set graphkernel flag + auto ori_fg = GetValueNode(node->input(kAnfPrimitiveIndex)); + fg->set_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL, ori_fg->get_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)); + } +} +} // namespace opt +} // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/pass/fuse_graph_kernel.h b/mindspore/ccsrc/pre_activate/pass/fuse_graph_kernel.h new file mode 100644 index 00000000000..a5a26765a3d --- /dev/null +++ b/mindspore/ccsrc/pre_activate/pass/fuse_graph_kernel.h @@ -0,0 +1,63 @@ + +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_FUSE_GRAPH_KERNEL_H_ +#define MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_FUSE_GRAPH_KERNEL_H_ + +#include +#include +#include +#include +#include "pre_activate/common/optimizer.h" +#include "session/kernel_graph.h" + +namespace mindspore { +namespace opt { +enum GraphKernelType { + ELEWISE = 0, // only contain elewise basic ops + REDUCE, // contain reduce ops + CUBE, // contain cube ops +}; +struct GraphKernelInfo { + GraphKernelType op_type = ELEWISE; + bool is_before_kernel_select = false; + int reduce_op_num = 0; + int cal_step = 0; +}; + +// when reduce graph kernel's cal step is greater than this number, not fuse +const int MAX_REDUCE_OP_FUSION_CAL_STEP = 5; +// when reduce graph kernel contain reduce op num is greater than this number, not fuse +const int MAX_REDUCE_OP_FUSION_REDUCE_NUM = 2; + +const std::set graph_kernel_black_list = {"BNTrainingUpdateSum", "ApplyMomentum", "LayerNormForward", + "LambNextMV", "LambUpdateWithLR"}; + +std::vector RemoveCircle(const std::vector &fused_op, bool is_backward = true); + +void TopoSortForNodeList(std::vector *lst); + +AnfNodePtr CreateNewFuseCNode(const std::shared_ptr &kernel_graph, const FuncGraphPtr &fg, + const AnfNodePtrList &inputs, const AnfNodePtrList &outputs, + bool is_before_kernel_select); + +void ReplaceNewFuseCNode(const std::shared_ptr &kernel_graph, const AnfNodePtr &new_fuse_cnode, + const AnfNodePtrList &outputs); + +void FuseGraphKernel(const std::shared_ptr &kernel_graph, bool is_before_kernel_select = false); +} // namespace opt +} // namespace mindspore +#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_FUSE_GRAPH_KERNEL_H_ diff --git a/mindspore/ccsrc/pynative/pynative_execute.cc b/mindspore/ccsrc/pynative/pynative_execute.cc index ceaf4bd43eb..d65807b9f6f 100644 --- a/mindspore/ccsrc/pynative/pynative_execute.cc +++ b/mindspore/ccsrc/pynative/pynative_execute.cc @@ -985,7 +985,7 @@ FuncGraphPtr PynativeExecutor::GradGraph(FuncGraphPtr g, const GradOperationPtr auto nparam = top_g_->parameters().size(); std::ostringstream ss; ss << "grad{" << nparam << "}"; - df_builder_->set_flags(FUNC_GRAPH_FLAG_CORE, true); + df_builder_->set_flag(FUNC_GRAPH_FLAG_CORE, true); df_builder_->debug_info()->set_name(ss.str()); auto df = grad_op->GetGrad(NewValueNode(g), nullptr, top_g_->parameters(), weights); diff --git a/mindspore/ccsrc/session/anf_runtime_algorithm.cc b/mindspore/ccsrc/session/anf_runtime_algorithm.cc index 1ec11d50db3..5db7dbc324f 100644 --- a/mindspore/ccsrc/session/anf_runtime_algorithm.cc +++ b/mindspore/ccsrc/session/anf_runtime_algorithm.cc @@ -178,12 +178,29 @@ bool AnfRuntimeAlgorithm::CheckPrimitiveType(const AnfNodePtr &node, const Primi return IsPrimitive(cnode->input(kAnfPrimitiveIndex), primitive_type); } +FuncGraphPtr AnfRuntimeAlgorithm::GetCNodeFuncGraphPtr(const AnfNodePtr &node) { + MS_EXCEPTION_IF_NULL(node); + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + auto attr_input = cnode->input(kAnfPrimitiveIndex); + MS_EXCEPTION_IF_NULL(attr_input); + auto value_node = attr_input->cast(); + MS_EXCEPTION_IF_NULL(value_node); + auto value = value_node->value(); + MS_EXCEPTION_IF_NULL(value); + return value->cast(); +} + std::string AnfRuntimeAlgorithm::GetCNodeName(const AnfNodePtr &node) { MS_EXCEPTION_IF_NULL(node); if (node->isa()) { auto primitive = AnfAlgo::GetCNodePrimitive(node); - MS_EXCEPTION_IF_NULL(primitive); - return primitive->name(); + if (primitive != nullptr) { + return primitive->name(); + } + auto func_graph = AnfAlgo::GetCNodeFuncGraphPtr(node); + MS_EXCEPTION_IF_NULL(func_graph); + return func_graph->ToString(); } MS_LOG(EXCEPTION) << "Unknown anf node type " << node->DebugString(); } @@ -198,9 +215,16 @@ void AnfRuntimeAlgorithm::SetNodeAttr(const std::string &key, const ValuePtr &va if (!node->isa()) { MS_LOG(EXCEPTION) << "Only cnode has attr, but this anf is " << node->DebugString(); } + // single op cnode. auto primitive = AnfAlgo::GetCNodePrimitive(node); - MS_EXCEPTION_IF_NULL(primitive); - primitive->set_attr(key, value); + if (primitive != nullptr) { + primitive->set_attr(key, value); + return; + } + // graph kernel cnode. + auto fg = AnfAlgo::GetCNodeFuncGraphPtr(node); + MS_EXCEPTION_IF_NULL(fg); + fg->set_attr(key, value); } void AnfRuntimeAlgorithm::CopyNodeAttr(const std::string &key, const AnfNodePtr &from, const AnfNodePtr &to) { @@ -241,16 +265,33 @@ void AnfRuntimeAlgorithm::EraseNodeAttr(const std::string &key, const AnfNodePtr if (!node->isa()) { MS_LOG(EXCEPTION) << "Only cnode has attr, but this anf is " << node->DebugString(); } + // single op cnode. auto primitive = AnfAlgo::GetCNodePrimitive(node); - MS_EXCEPTION_IF_NULL(primitive); - primitive->EraseAttr(key); + if (primitive != nullptr) { + primitive->EraseAttr(key); + return; + } + // graph kernel cnode. + auto fg = AnfAlgo::GetCNodeFuncGraphPtr(node); + MS_EXCEPTION_IF_NULL(fg); + fg->erase_flag(key); } bool AnfRuntimeAlgorithm::HasNodeAttr(const std::string &key, const CNodePtr &node) { MS_EXCEPTION_IF_NULL(node); + if (!node->isa()) { + MS_LOG(WARNING) << "Only cnode has attr, but this anf is " << node->DebugString(); + return false; + } + // single op cnode. auto primitive = AnfAlgo::GetCNodePrimitive(node); - MS_EXCEPTION_IF_NULL(primitive); - return primitive->HasAttr(key); + if (primitive != nullptr) { + return primitive->HasAttr(key); + } + // graph kernel cnode. + auto fg = AnfAlgo::GetCNodeFuncGraphPtr(node); + MS_EXCEPTION_IF_NULL(fg); + return fg->has_flag(key); } size_t AnfRuntimeAlgorithm::GetInputTensorNum(const AnfNodePtr &node) { @@ -782,6 +823,26 @@ bool AnfRuntimeAlgorithm::IsRealCNodeKernel(const AnfNodePtr &node) { return IsRealKernel(node); } +bool AnfRuntimeAlgorithm::IsGraphKernel(const AnfNodePtr &node) { + MS_EXCEPTION_IF_NULL(node); + // graph kernel should be a real cnode kernel. + if (!IsRealCNodeKernel(node)) { + return false; + } + + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + auto input = cnode->input(kAnfPrimitiveIndex); + // graph kernel should has func_graph as first input. + if (!IsValueNode(input)) { + return false; + } + + auto func_graph = GetValueNode(input); + MS_EXCEPTION_IF_NULL(func_graph); + return func_graph->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL); +} + bool AnfRuntimeAlgorithm::IsParameterWeight(const ParameterPtr &node) { MS_EXCEPTION_IF_NULL(node); return node->has_default(); @@ -1014,5 +1075,44 @@ void AnfRuntimeAlgorithm::ReorderExecList(NotNull *> node_ std::copy(all_opt_list.begin(), all_opt_list.end(), std::back_inserter(*node_list)); } +TypeId AnfRuntimeAlgorithm::GetCNodeOutputPrecision(const AnfNodePtr &node) { + MS_EXCEPTION_IF_NULL(node); + auto prim = AnfAlgo::GetCNodePrimitive(node); + if (prim == nullptr) { + return kTypeUnknown; + } + + TypeId except_type = kTypeUnknown; + if (prim->GetAttr(kAttrOutputPrecision) != nullptr) { + auto output_type_str = GetValue(prim->GetAttr(kAttrOutputPrecision)); + if (output_type_str == "float16") { + except_type = kNumberTypeFloat16; + } else if (output_type_str == "float32") { + except_type = kNumberTypeFloat32; + } else { + MS_LOG(EXCEPTION) << "The fix precision must be float16 or float32, but got " << output_type_str; + } + } + + return except_type; +} + +TypeId AnfRuntimeAlgorithm::GetPrevNodeOutputPrecision(const AnfNodePtr &node, size_t input_idx) { + if (!node->isa()) { + MS_LOG(EXCEPTION) << node->DebugString() << ", input node is not CNode."; + } + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + if (input_idx + 1 >= cnode->inputs().size()) { + MS_LOG(EXCEPTION) << "Input index " << input_idx << " is larger than input number " << GetInputTensorNum(cnode); + } + auto input_node = cnode->input(input_idx + 1); + MS_EXCEPTION_IF_NULL(input_node); + auto kernel_with_index = VisitKernel(input_node, 0); + if (!kernel_with_index.first->isa()) { + return kTypeUnknown; + } + return GetCNodeOutputPrecision(kernel_with_index.first); +} } // namespace session } // namespace mindspore diff --git a/mindspore/ccsrc/session/anf_runtime_algorithm.h b/mindspore/ccsrc/session/anf_runtime_algorithm.h index cd14a8b20d5..c46f0b59556 100644 --- a/mindspore/ccsrc/session/anf_runtime_algorithm.h +++ b/mindspore/ccsrc/session/anf_runtime_algorithm.h @@ -54,6 +54,8 @@ class AnfRuntimeAlgorithm { static PrimitivePtr GetCNodePrimitive(const AnfNodePtr &node); // check whether anf node is a node of 'primitive_type',such as make_tuple is a cnode of kPrimMakeTuple static bool CheckPrimitiveType(const AnfNodePtr &node, const PrimitivePtr &primitive_type); + // get cnode primitive + static FuncGraphPtr GetCNodeFuncGraphPtr(const AnfNodePtr &node); // get kernel_name of anf node static std::string GetCNodeName(const AnfNodePtr &node); // get detail info of anf node @@ -161,6 +163,8 @@ class AnfRuntimeAlgorithm { static bool IsRealKernel(const AnfNodePtr &node); // checkout whether the anf node is a real kernel that is a cnode and can run on device static bool IsRealCNodeKernel(const AnfNodePtr &node); + // checkout whether the anf node is a graph kernel. + static bool IsGraphKernel(const AnfNodePtr &node); // check parameter is weight or data static bool IsParameterWeight(const ParameterPtr &node); // set stream id of kernel,which will be set in stream assign and be used in stream generate @@ -190,6 +194,11 @@ class AnfRuntimeAlgorithm { static bool IsScalarInput(const CNodePtr &cnode, size_t index); static bool IsScalarOutput(const CNodePtr &cnode, size_t index); static void ReorderExecList(NotNull *> node_list); + static bool IsWhileTrueGraph(const KernelGraphPtr &child_graph); + // get fix output precision of cnode. + static TypeId GetCNodeOutputPrecision(const AnfNodePtr &node); + // get fix output precision from prev node, input_idx is the input index of current node related to prev node. + static TypeId GetPrevNodeOutputPrecision(const AnfNodePtr &node, size_t input_idx); }; } // namespace session using AnfAlgo = session::AnfRuntimeAlgorithm; diff --git a/mindspore/ccsrc/session/ascend_session.cc b/mindspore/ccsrc/session/ascend_session.cc index 7173a26ed10..c69fa63bc89 100644 --- a/mindspore/ccsrc/session/ascend_session.cc +++ b/mindspore/ccsrc/session/ascend_session.cc @@ -37,6 +37,7 @@ #include "ir/scalar.h" #include "debug/anf_ir_dump.h" #include "debug/anf_ir_utils.h" +#include "debug/draw.h" #include "common/utils.h" #include "pre_activate/common/helper.h" #include "device/kernel_runtime_manager.h" @@ -48,7 +49,7 @@ namespace mindspore { namespace session { const size_t kInvalidIndex = SIZE_MAX; namespace { -void DumpGraphExeOrder(const std::vector &execution_order) { +void DumpGraphExeOrder(const std::vector &execution_order, const std::string &tag = "") { MS_LOG(INFO) << "Dump execution_order size " << execution_order.size(); MS_LOG(INFO) << "[index][stream_label][graph_id][node string]"; int i = 0; @@ -60,6 +61,24 @@ void DumpGraphExeOrder(const std::vector &execution_order) { << "[" << cnode->DebugString() << "]"; i++; } + + std::stringstream buf; + buf << "================== execution order ==================\n"; + if (!tag.empty()) { + buf << tag << "\n"; + } + buf << "execution_order size: " << execution_order.size() << "\n"; + i = 0; + for (auto &cnode : execution_order) { + MS_EXCEPTION_IF_NULL(cnode); + buf << i << ":\n"; + buf << "\t" << cnode->DebugString() << "\n"; + buf << "\t" << AnfAlgo::GetStreamDistinctionLabel(cnode.get()) << "\n"; + buf << "\t" << AnfAlgo::GetGraphId(cnode.get()) << "\n"; + i++; + } + buf << "================== execution order ==================\n"; + // std::cout << buf.str() << std::endl; } void DumpGraphInputArgs(const VectorRef &args) { @@ -378,8 +397,28 @@ void AscendSession::CompileChildGraph(const KernelGraphPtr &child_graph) { MS_EXCEPTION_IF_NULL(child_graph); MS_LOG(INFO) << "CompileChildGraph " << child_graph->ToString(); opt::AscendBackendIRFusionOptimization(child_graph); + opt::AscendBackendFuseBasicOpt(child_graph, true); + opt::AscendBackendGraphKernelOpt(child_graph, true); + child_graph->SetExecOrderByDefault(); + auto context_ptr = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context_ptr); + bool save_graphs = context_ptr->save_graphs_flag(); + auto save_graphs_path = context_ptr->save_graphs_path(); + if (save_graphs_path.empty()) { + save_graphs_path = "."; + } + if (save_graphs) { + std::string file_path = + save_graphs_path + "/" + "select_kernel_before" + "_graph_" + std::to_string(child_graph->graph_id()) + ".ir"; + DumpIR(file_path, child_graph); + } // select kernel build info SelectKernel(*child_graph); + if (save_graphs) { + std::string file_path = + save_graphs_path + "/" + "select_kernel_after" + "_graph_" + std::to_string(child_graph->graph_id()) + ".ir"; + DumpIR(file_path, child_graph); + } // convert kernel Graph to model predictmodel::StepConvertGraph(child_graph); // optimize graph @@ -543,6 +582,9 @@ void AscendSession::HardwareOptimize(const std::shared_ptr &kernel_ device::ascend::KernelPreBuild(kernel_graph.get()); MS_LOG(INFO) << "HardwareOptimize start!"; opt::AscendBackendOptimization(kernel_graph); + opt::AscendGraphKernelCommonProcess(kernel_graph); + opt::AscendBackendFuseBasicOpt(kernel_graph, false); + opt::AscendBackendAddAtomicClean(kernel_graph); MS_EXCEPTION_IF_NULL(kernel_graph); kernel_graph->SetExecOrderByDefault(); MS_LOG(INFO) << "HardwareOptimize Finish!"; diff --git a/mindspore/ccsrc/session/kernel_graph.cc b/mindspore/ccsrc/session/kernel_graph.cc index 6bc0ec86775..7e9bb62aabe 100644 --- a/mindspore/ccsrc/session/kernel_graph.cc +++ b/mindspore/ccsrc/session/kernel_graph.cc @@ -24,6 +24,7 @@ #include "device/kernel_info.h" #include "kernel/kernel_build_info.h" #include "device/kernel_runtime_manager.h" +#include "kernel/common_utils.h" namespace mindspore { namespace session { @@ -75,6 +76,31 @@ std::vector GetCallRealOutputs(const AnfNodePtr &call_node) { } return real_inputs; } + +AnfNodePtr MakeValueNode(const AnfNodePtr &node) { + auto value_node = node->cast(); + if (value_node == nullptr) { + return nullptr; + } + + ValueNodePtr new_value_node = std::make_shared(value_node->value()); + new_value_node->set_abstract(value_node->abstract()); + // create kernel_info fo new value node + auto kernel_info = std::make_shared(); + new_value_node->set_kernel_info(kernel_info); + // create kernel_build_info for new value node + auto kernel_build_info_builder = std::make_shared(); + // set the format of value_node to DEFAULT_FORMAT + kernel_build_info_builder->SetOutputsFormat(std::vector{kOpFormat_DEFAULT}); + // set value node initial device data type = infer data type + std::vector types; + for (size_t index = 0; index < AnfAlgo::GetOutputTensorNum(value_node); ++index) { + types.push_back(kTypeUnknown); + } + kernel_build_info_builder->SetOutputsDeviceType(types); + AnfAlgo::SetSelectKernelBuildInfo(kernel_build_info_builder->Build(), new_value_node.get()); + return new_value_node; +} } // namespace std::vector KernelGraph::outputs() const { auto graph_output = output(); @@ -231,7 +257,8 @@ CNodePtr KernelGraph::NewCNode(const std::vector &inputs) { auto cnode = FuncGraph::NewCNode(inputs); MS_EXCEPTION_IF_NULL(cnode); cnode->set_abstract(std::make_shared()); - // create kernel_info from new parameter + CreateKernelInfoFromNewParameter(cnode); + auto kernel_info = std::make_shared(); std::vector feature_map_input_indexs; // if the node only has the primitive(such as getNext) or the node's input has a feature map input @@ -257,6 +284,41 @@ CNodePtr KernelGraph::NewCNode(const std::vector &inputs) { return cnode; } +void KernelGraph::CreateKernelInfoFromNewParameter(const CNodePtr &cnode) { + if (!AnfAlgo::IsGraphKernel(cnode)) { + return; + } + auto func_graph = AnfAlgo::GetCNodeFuncGraphPtr(cnode); + MS_EXCEPTION_IF_NULL(func_graph); + + std::vector node_list; + std::vector input_list; + std::vector output_list; + kernel::GetValidKernelNodes(func_graph, &node_list, &input_list, &output_list); + for (auto &anf_node : node_list) { + MS_EXCEPTION_IF_NULL(anf_node); + auto kernel_info = std::make_shared(); + anf_node->set_kernel_info(kernel_info); + auto anf_cnode = anf_node->cast(); + MS_EXCEPTION_IF_NULL(anf_cnode); + for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(anf_cnode); ++i) { + auto input_node = anf_cnode->input(i + 1); + MS_EXCEPTION_IF_NULL(input_node); + if (IsValueNode(input_node)) { + auto new_input_node = MakeValueNode(input_node); + if (new_input_node != nullptr) { + anf_cnode->set_input(i + 1, new_input_node); + } + } + } + } + for (auto &anf_node : input_list) { + MS_EXCEPTION_IF_NULL(anf_node); + auto kernel_info = std::make_shared(); + anf_node->set_kernel_info(kernel_info); + } +} + CNodePtr KernelGraph::NewCNode(const CNodePtr &cnode) { MS_EXCEPTION_IF_NULL(cnode); auto new_cnode = std::make_shared(*cnode); @@ -352,21 +414,7 @@ std::vector KernelGraph::SplitTupleValueNodeToNodeList(const ValueNo ValueNodePtr KernelGraph::NewValueNode(const ValueNodePtr &value_node) { MS_EXCEPTION_IF_NULL(value_node); - ValueNodePtr new_value_node = std::make_shared(value_node->value()); - new_value_node->set_abstract(value_node->abstract()); - // create kernel_info fo new value node - auto kernel_info = std::make_shared(); - kernel_info->SetFeatureMapFlag(false); - new_value_node->set_kernel_info(kernel_info); - // create kernel_build_info for new value node - auto kernel_build_info_builder = std::make_shared(); - // set the format of value_node to DEFAULT_FORMAT - auto output_tensor_num = AnfAlgo::GetOutputTensorNum(value_node); - kernel_build_info_builder->SetOutputsFormat(std::vector(output_tensor_num, kOpFormat_DEFAULT)); - // set value node initial device data type = infer data type - std::vector types = std::vector(output_tensor_num, kTypeUnknown); - kernel_build_info_builder->SetOutputsDeviceType(types); - AnfAlgo::SetSelectKernelBuildInfo(kernel_build_info_builder->Build(), new_value_node.get()); + auto new_value_node = MakeValueNode(value_node)->cast(); AnfAlgo::SetGraphId(graph_id_, new_value_node.get()); return new_value_node; } diff --git a/mindspore/ccsrc/session/kernel_graph.h b/mindspore/ccsrc/session/kernel_graph.h index 9954b5b1d0b..3009ab0ce9b 100644 --- a/mindspore/ccsrc/session/kernel_graph.h +++ b/mindspore/ccsrc/session/kernel_graph.h @@ -51,6 +51,7 @@ class KernelGraph : public FuncGraph { std::vector *MutableInputs() const { return inputs_.get(); } std::vector outputs() const; CNodePtr NewCNode(const std::vector &inputs) override; + void CreateKernelInfoFromNewParameter(const CNodePtr &cnode); CNodePtr NewCNode(const CNodePtr &cnode); ParameterPtr NewParameter(const ParameterPtr ¶meter = nullptr); ValueNodePtr NewValueNode(const ValueNodePtr &value_node = nullptr); diff --git a/mindspore/ccsrc/session/session_basic.cc b/mindspore/ccsrc/session/session_basic.cc index e5e58045cdd..a78c2ac4d83 100644 --- a/mindspore/ccsrc/session/session_basic.cc +++ b/mindspore/ccsrc/session/session_basic.cc @@ -21,6 +21,7 @@ #include "pipeline/parse/data_converter.h" #include "ir/manager.h" #include "ir/param_value_py.h" +#include "kernel/common_utils.h" #include "operator/ops.h" #include "common/trans.h" #include "utils/context/ms_context.h" @@ -33,6 +34,7 @@ #include "common/utils.h" #include "ir/dtype.h" #include "ir/anf.h" +#include "ir/func_graph_cloner.h" namespace mindspore { namespace session { @@ -367,10 +369,17 @@ CNodePtr SessionBasic::CreateNewCNode(const CNodePtr &cnode, bool valid_input, K MS_EXCEPTION_IF_NULL(other_graph_cnode); *from_other_graph = false; // get primitive of old node + std::vector cnode_inputs; auto prim = AnfAlgo::GetCNodePrimitive(cnode); - MS_EXCEPTION_IF_NULL(prim); - // push attr to inputs[0] of new cnode - std::vector cnode_inputs = {std::make_shared(std::make_shared(*prim))}; + if (prim != nullptr) { + // push attr to inputs[0] of new cnode + cnode_inputs.push_back(std::make_shared(std::make_shared(*prim))); + } else { + auto fg = AnfAlgo::GetCNodeFuncGraphPtr(cnode); + MS_EXCEPTION_IF_NULL(fg); + auto new_fg = BasicClone(fg); + cnode_inputs.push_back(std::make_shared(new_fg)); + } // if has multiple depends,only select first depend as parameter for (size_t input_idx = 1; input_idx < cnode->inputs().size(); input_idx++) { auto anf = cnode->inputs()[input_idx]; diff --git a/mindspore/ccsrc/transform/convert.h b/mindspore/ccsrc/transform/convert.h index 8a63f00c6c2..2f6c9bb0add 100644 --- a/mindspore/ccsrc/transform/convert.h +++ b/mindspore/ccsrc/transform/convert.h @@ -102,22 +102,15 @@ class DfGraphConvertor { explicit DfGraphConvertor(const AnfGraphPtr &anf_graph) : anf_graph_(anf_graph), df_graph_(std::make_shared(anf_graph_->ToString())) { #if (!defined ENABLE_GE) || (defined ENABLE_INFER) - auto it_training = anf_graph->flags().find("training"); - if (it_training != anf_graph->flags().end()) { - training_ = it_training->second; - } else { - training_ = false; - } + training_ = anf_graph->has_flag("training"); #else training_ = ENABLE_TRAIN; #endif - auto it_distribute = anf_graph->flags().find("broadcast_flag"); - if (it_distribute != anf_graph->flags().end()) { + distribute_ = anf_graph->has_flag("broadcast_flag"); + if (anf_graph->has_flag("broadcast_flag")) { ConfigManager::GetInstance().set_parallel_strategy(ParallelStrategy::DISTRIBUTION); - distribute_ = it_distribute->second; } else { ConfigManager::GetInstance().set_parallel_strategy(ParallelStrategy::ONE_DEVICE); - distribute_ = false; } MS_LOG(INFO) << "Create DfGraphConvertor with training: " << training_ << ", distribute: " << distribute_; diff --git a/mindspore/ccsrc/utils/context/ms_context.cc b/mindspore/ccsrc/utils/context/ms_context.cc index f9f5fa1ef12..9f283319a75 100644 --- a/mindspore/ccsrc/utils/context/ms_context.cc +++ b/mindspore/ccsrc/utils/context/ms_context.cc @@ -84,6 +84,7 @@ MsContext::MsContext(const std::string &policy, const std::string &target) { check_bprop_flag_ = false; max_device_memory_ = kDefaultMaxDeviceMemory; print_file_path_ = ""; + enable_graph_kernel_ = false; } std::shared_ptr MsContext::GetInstance() { diff --git a/mindspore/ccsrc/utils/context/ms_context.h b/mindspore/ccsrc/utils/context/ms_context.h index a1ab728bc74..a5f936d65c9 100644 --- a/mindspore/ccsrc/utils/context/ms_context.h +++ b/mindspore/ccsrc/utils/context/ms_context.h @@ -157,6 +157,9 @@ class MsContext { float max_device_memory() const { return max_device_memory_; } void set_max_device_memory(float max_device_memory) { max_device_memory_ = max_device_memory; } + void set_enable_graph_kernel(bool enable_graph_kernel) { enable_graph_kernel_ = enable_graph_kernel; } + bool enable_graph_kernel() const { return enable_graph_kernel_; } + private: MsContext(const std::string &backend_policy, const std::string &target); void GetGeOptions(std::map *ge_options) const; @@ -199,6 +202,7 @@ class MsContext { bool check_bprop_flag_; float max_device_memory_; std::string print_file_path_; + bool enable_graph_kernel_; }; } // namespace mindspore diff --git a/mindspore/ccsrc/utils/graph_utils.h b/mindspore/ccsrc/utils/graph_utils.h index e2703a28774..93edda3e34f 100644 --- a/mindspore/ccsrc/utils/graph_utils.h +++ b/mindspore/ccsrc/utils/graph_utils.h @@ -62,6 +62,10 @@ std::vector DeepLinkedGraphSearch(const AnfNodePtr &root, const Incl std::vector DeepScopedGraphSearchWithFilter(const AnfNodePtr &root, const IncludeFunc &include, const FilterFunc &filter); +class FuncGraphManager; +using FuncGraphManagerPtr = std::shared_ptr; +std::vector DeepUsersSearch(const AnfNodePtr &root, const IncludeFunc &include, + const FuncGraphManagerPtr &mng); std::vector TopoSort(const AnfNodePtr &root, const SuccFunc &succ = SuccIncoming, const IncludeFunc &include = AlwaysInclude); diff --git a/mindspore/ccsrc/utils/graph_utils_extends.cc b/mindspore/ccsrc/utils/graph_utils_extends.cc index 85f9986a0d5..0740c242363 100644 --- a/mindspore/ccsrc/utils/graph_utils_extends.cc +++ b/mindspore/ccsrc/utils/graph_utils_extends.cc @@ -26,6 +26,7 @@ #include #include "ir/visitor.h" +#include "ir/manager.h" #include "ir/func_graph.h" #include "debug/label.h" #include "utils/log_adapter.h" @@ -161,6 +162,24 @@ class DeepLinkedGraphSearcher : public DeepFirstSearcher { void Visit(const ValueNodePtr &) override {} }; + +class DeepUsersSearcher : public DeepFirstSearcher { + public: + explicit DeepUsersSearcher(const IncludeFunc &include, const FuncGraphManagerPtr &mng) + : DeepFirstSearcher(include), mng_(mng) {} + ~DeepUsersSearcher() override = default; + + void Visit(const CNodePtr &cnode) override { + auto &users = mng_->node_users()[cnode]; + for (auto iter = users.begin(); iter != users.end(); ++iter) { + DeepFirstSearcher::Visit(iter->first); + } + } + void Visit(const ValueNodePtr &) override {} + + private: + FuncGraphManagerPtr mng_; +}; } // namespace // include for if expand the node the search, filter for if put the node to results. @@ -180,4 +199,9 @@ std::vector DeepUsedGraphSearch(const AnfNodePtr &root, const Includ std::vector DeepLinkedGraphSearch(const AnfNodePtr &root, const IncludeFunc &include) { return DeepLinkedGraphSearcher(include).Search(root); } + +std::vector DeepUsersSearch(const AnfNodePtr &root, const IncludeFunc &include, + const FuncGraphManagerPtr &mng) { + return DeepUsersSearcher(include, mng).Search(root); +} } // namespace mindspore diff --git a/mindspore/ccsrc/utils/utils.h b/mindspore/ccsrc/utils/utils.h index 97ffd739bbf..83e3404d7b7 100644 --- a/mindspore/ccsrc/utils/utils.h +++ b/mindspore/ccsrc/utils/utils.h @@ -192,6 +192,9 @@ constexpr auto kAttrEventId = "event_id"; constexpr auto kAttrDynInput = "dynamic"; constexpr auto kAttrDynInputSizes = "dyn_input_sizes"; constexpr auto kAttrSrcFormat = "src_format"; +constexpr auto kAttrMultiples = "multiples"; +constexpr auto kAttrFixPrecision = "fix_precision"; +constexpr auto kAttrOutputPrecision = "output_precision"; constexpr auto kAttrOutputUsedNum = "output_used_num"; constexpr auto kAttrHasBias = "has_bias"; constexpr auto kAttrN = "n"; @@ -216,6 +219,7 @@ constexpr auto kAttrSplitDim = "split_dim"; constexpr auto kAttrNumSplit = "num_split"; constexpr auto kAttrOutputNum = "output_num"; constexpr auto kAttrSizeSplits = "size_splits"; +constexpr auto kAttrOutputDefault = "output_default"; // attr value constexpr auto kValueTargetSwitch = "target_switch"; diff --git a/mindspore/ccsrc/vm/segment_runner.cc b/mindspore/ccsrc/vm/segment_runner.cc index dcd62a548d9..9b2ee51b3fb 100644 --- a/mindspore/ccsrc/vm/segment_runner.cc +++ b/mindspore/ccsrc/vm/segment_runner.cc @@ -92,6 +92,8 @@ std::tuple TransformSegmentToAnfGr } else if (eqv.find(a) == eqv.end()) { inputs.push_back(a); eqv[a] = fg->add_parameter(); + eqv[a]->set_abstract(a->abstract()); + eqv[a]->set_kernel_info(a->kernel_info_ptr()); } return eqv[a]; @@ -107,15 +109,20 @@ std::tuple TransformSegmentToAnfGr if (inps.empty()) { MS_LOG(EXCEPTION) << "Input is empty"; } - if (!IsValueNode(inps[0])) { + if (!IsValueNode(inps[0]) && + !(IsValueNode(inps[0]) && + inps[0]->cast()->value()->cast()->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL))) { MS_LOG(EXCEPTION) << "Input[0] Must be a Primitive valuenode"; } + auto fn = inps[0]; std::vector args{fn}; (void)std::transform(std::begin(inps) + 1, std::end(inps), std::back_inserter(args), ref); eqv[n] = fg->NewCNode(args); + eqv[n]->set_abstract(n->abstract()); + eqv[n]->set_kernel_info(n->kernel_info_ptr()); } std::vector eqv_keys; @@ -123,15 +130,18 @@ std::tuple TransformSegmentToAnfGr [](const std::pair &elem) -> AnfNodePtr { return elem.first; }); auto outputs = GetOutput(lst, lst[0]->func_graph()->manager()->node_users(), eqv_keys); - std::vector output_args; - output_args.push_back(NewValueNode(prim::kPrimMakeTuple)); - (void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(output_args), - [&eqv](const AnfNodePtr &o) -> AnfNodePtr { return eqv[o]; }); - - // Set output for AnfGraph - auto fg_output = fg->NewCNode(output_args); + AnfNodePtr fg_output; + if (outputs.size() > 1) { + std::vector output_args; + output_args.push_back(NewValueNode(prim::kPrimMakeTuple)); + (void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(output_args), + [&eqv](const AnfNodePtr &o) -> AnfNodePtr { return eqv[o]; }); + // Set output for AnfGraph + fg_output = fg->NewCNode(output_args); + } else { + fg_output = eqv[outputs[0]]; + } fg->set_output(fg_output); - return std::make_tuple(fg, inputs, outputs); } diff --git a/mindspore/ccsrc/vm/transform.cc b/mindspore/ccsrc/vm/transform.cc index 91aa974cdf5..c1fba78be8d 100644 --- a/mindspore/ccsrc/vm/transform.cc +++ b/mindspore/ccsrc/vm/transform.cc @@ -33,6 +33,7 @@ #include "utils/graph_utils.h" #include "utils/context/ms_context.h" #include "debug/trace.h" +#include "debug/anf_ir_dump.h" namespace mindspore { namespace compile { @@ -269,6 +270,14 @@ bool CompileGraph::IsCut(const AnfNodePtr &node) { } AnfNodePtr fn = inputs[0]; + MS_EXCEPTION_IF_NULL(fn); + if (IsValueNode(fn)) { + auto fg = GetValueNode(fn); + if (fg->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)) { + return false; + } + } + if (!IsValueNode(fn)) { return true; } @@ -316,7 +325,6 @@ VectorRef CompileGraph::SplitNodes(const FuncGraphPtr &graph) { for (auto &node : nodes) { MS_EXCEPTION_IF_NULL(node); if (IsCut(node)) { - MS_LOG(DEBUG) << "Cut node:" << node->DebugString(10) << ", size:" << split.size(); if (split.size() != 0) { splits.push_back(split); } @@ -330,10 +338,8 @@ VectorRef CompileGraph::SplitNodes(const FuncGraphPtr &graph) { } last_target = cur_target; split.push_back(node); - MS_LOG(DEBUG) << "Insert node:" << node->DebugString(10) << ", size:" << split.size(); } } - MS_LOG(DEBUG) << "Split node size :" << splits.size(); return splits; } @@ -567,7 +573,6 @@ InstSet CompileGraph::GenMultiGraphsSinkInst(const FuncGraphPtr &graph) { InstSet CompileGraph::Run(const FuncGraphPtr &graph) { MS_EXCEPTION_IF_NULL(graph); - MS_LOG(DEBUG) << "Compile start graph: " << graph->ToString(); Reset(); PushParameters(graph); @@ -793,16 +798,11 @@ CompileGraphs::CompileGraphs(const BackendPtr &backend, const std::vectormanager(); - MS_EXCEPTION_IF_NULL(graph_manager); - FuncGraphSet graphs = graph_manager->func_graphs(); - for (auto &g : graphs) { - mapping_[g] = static_cast(insts_.size()); - if (transform_ != nullptr) { - InstSet insts = transform_->Run(g); - if (!insts.empty()) { - (void)insts_.insert(insts_.end(), insts.begin(), insts.end()); - } + mapping_[graph] = static_cast(insts_.size()); + if (transform_ != nullptr) { + InstSet insts = transform_->Run(graph); + if (!insts.empty()) { + (void)insts_.insert(insts_.end(), insts.begin(), insts.end()); } } MS_LOG(DEBUG) << "End"; @@ -847,8 +847,15 @@ FinalVMPtr CompileGraphs::CompileAndLink(const FuncGraphPtr &graph) { Reset(); MS_LOG(DEBUG) << "Begin parameter:" << graph->parameters().size(); - (void)WrapPrimitives(graph); - Compile(graph); + FuncGraphPtr prim_graph = WrapPrimitives(graph); + Compile(prim_graph); + MS_EXCEPTION_IF_NULL(prim_graph); + FuncGraphSet graphs = prim_graph->manager()->func_graphs(); + for (auto g : graphs) { + if (g != graph && g != nullptr && !(g->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL))) { + Compile(g); + } + } FinalVMPtr rt = Link(graph); Reset(); diff --git a/mindspore/context.py b/mindspore/context.py index 6c4d616cf12..ad601f8fab9 100644 --- a/mindspore/context.py +++ b/mindspore/context.py @@ -56,7 +56,8 @@ def _make_directory(path): os.makedirs(path) real_path = path except PermissionError as e: - logger.error(f"No write permission on the directory `{path}, error = {e}") + logger.error( + f"No write permission on the directory `{path}, error = {e}") raise ValueError(f"No write permission on the directory `{path}`.") return real_path @@ -79,11 +80,13 @@ class _ThreadLocalInfo(threading.local): def reserve_class_name_in_scope(self, reserve_class_name_in_scope): """Sets whether to save the network class name in the scope.""" if not isinstance(reserve_class_name_in_scope, bool): - raise ValueError("Set reserve_class_name_in_scope value must be bool!") + raise ValueError( + "Set reserve_class_name_in_scope value must be bool!") self._reserve_class_name_in_scope = reserve_class_name_in_scope -_ContextRecord = namedtuple("_ContextRecord", ["is_pynative_mode", "switch_context_fn"]) +_ContextRecord = namedtuple( + "_ContextRecord", ["is_pynative_mode", "switch_context_fn"]) class _ContextSwitchInfo(threading.local): @@ -110,7 +113,8 @@ class _ContextSwitchInfo(threading.local): """ if isinstance(switch_context_fn, FunctionType): switch_context_fn() - self.context_stack.append(_ContextRecord(is_pynative, switch_context_fn)) + self.context_stack.append( + _ContextRecord(is_pynative, switch_context_fn)) def pop(self): self.context_stack.pop() @@ -194,7 +198,8 @@ class _Context: @save_graphs_path.setter def save_graphs_path(self, save_graphs_path): - self._context_handle.set_save_graphs_path(_make_directory(save_graphs_path)) + self._context_handle.set_save_graphs_path( + _make_directory(save_graphs_path)) @property def device_target(self): @@ -213,7 +218,8 @@ class _Context: @device_id.setter def device_id(self, device_id): if device_id < 0 or device_id > 4095: - raise ValueError("Device id must be in [0, 4095], but got {}".format(device_id)) + raise ValueError( + "Device id must be in [0, 4095], but got {}".format(device_id)) success = self._context_handle.set_device_id(device_id) if not success: raise RuntimeError("Device id set failed!!!") @@ -240,7 +246,8 @@ class _Context: @enable_auto_mixed_precision.setter def enable_auto_mixed_precision(self, enable_auto_mixed_precision): - self._context_handle.set_auto_mixed_precision_flag(enable_auto_mixed_precision) + self._context_handle.set_auto_mixed_precision_flag( + enable_auto_mixed_precision) @property def enable_reduce_precision(self): @@ -248,7 +255,8 @@ class _Context: @enable_reduce_precision.setter def enable_reduce_precision(self, enable_reduce_precision): - self._context_handle.set_enable_reduce_precision_flag(enable_reduce_precision) + self._context_handle.set_enable_reduce_precision_flag( + enable_reduce_precision) @property def enable_dump(self): @@ -280,12 +288,21 @@ class _Context: @profiling_options.setter def profiling_options(self, option): - options = ["training_trace", "task_trace", "task_trace:training_trace", "training_trace:task_trace", "op_trace"] + options = ["training_trace", "task_trace", + "task_trace:training_trace", "training_trace:task_trace", "op_trace"] if option not in options: raise ValueError("Profiling options must be in 'training_trace' 'task_trace' " "'task_trace:training_trace' 'training_trace:task_trace' or 'op_trace'.") self._context_handle.set_profiling_options(option) + @property + def enable_graph_kernel(self): + return self._context_handle.get_enable_graph_kernel() + + @enable_graph_kernel.setter + def enable_graph_kernel(self, graph_kernel_switch_): + self._context_handle.set_enable_graph_kernel(graph_kernel_switch_) + @property def reserve_class_name_in_scope(self): """Gets whether to save the network class name in the scope.""" @@ -303,13 +320,19 @@ class _Context: @variable_memory_max_size.setter def variable_memory_max_size(self, variable_memory_max_size): if not check_input_format(variable_memory_max_size): - raise ValueError("Context param variable_memory_max_size should be in correct format! Such as \"5GB\"") + raise ValueError( + "Context param variable_memory_max_size should be in correct format! Such as \"5GB\"") if int(variable_memory_max_size[:-2]) >= _DEVICE_APP_MEMORY_SIZE: - raise ValueError("Context param variable_memory_max_size should be less than 31GB.") - variable_memory_max_size_ = variable_memory_max_size[:-2] + " * 1024 * 1024 * 1024" - graph_memory_max_size = _DEVICE_APP_MEMORY_SIZE - int(variable_memory_max_size[:-2]) - graph_memory_max_size_ = str(graph_memory_max_size) + " * 1024 * 1024 * 1024" - self._context_handle.set_variable_memory_max_size(variable_memory_max_size_) + raise ValueError( + "Context param variable_memory_max_size should be less than 31GB.") + variable_memory_max_size_ = variable_memory_max_size[:- + 2] + " * 1024 * 1024 * 1024" + graph_memory_max_size = _DEVICE_APP_MEMORY_SIZE - \ + int(variable_memory_max_size[:-2]) + graph_memory_max_size_ = str( + graph_memory_max_size) + " * 1024 * 1024 * 1024" + self._context_handle.set_variable_memory_max_size( + variable_memory_max_size_) self._context_handle.set_graph_memory_max_size(graph_memory_max_size_) @property @@ -582,7 +605,8 @@ def get_context(attr_key): ValueError: If input key is not an attribute in context. """ if not hasattr(_context(), attr_key): - raise ValueError("Get context keyword %s is not recognized!" % attr_key) + raise ValueError( + "Get context keyword %s is not recognized!" % attr_key) return getattr(_context(), attr_key) @args_type_check(enable_mpi=bool) diff --git a/mindspore/nn/__init__.py b/mindspore/nn/__init__.py index f3f59edcbf5..8d5e7d3b0a1 100644 --- a/mindspore/nn/__init__.py +++ b/mindspore/nn/__init__.py @@ -18,14 +18,14 @@ Neural Networks Cells. Pre-defined building blocks or computing units to construct Neural Networks. """ from . import layer, loss, optim, metrics, wrap -from .cell import Cell +from .cell import Cell, GraphKernel from .layer import * from .loss import * from .optim import * from .metrics import * from .wrap import * -__all__ = ["Cell"] +__all__ = ["Cell", "GraphKernel"] __all__.extend(layer.__all__) __all__.extend(loss.__all__) __all__.extend(optim.__all__) diff --git a/mindspore/nn/cell.py b/mindspore/nn/cell.py index 65c1ce95486..c046c2e1bf4 100755 --- a/mindspore/nn/cell.py +++ b/mindspore/nn/cell.py @@ -707,9 +707,6 @@ class Cell: return cells def add_flags(self, **flags): - for x in flags: - if not isinstance(flags[x], bool): - raise TypeError(f"Flags (f{x}) must be bool but {type(flags[x])}.") if not hasattr(self, "_mindspore_flags"): self._mindspore_flags = {} self._mindspore_flags.update({**flags}) @@ -820,3 +817,27 @@ class Cell: """ self._backward_hook = HookBackward(fn, self.cls_name + "(" + str(id(self)) + ")") self.enable_hook = True + +class GraphKernel(Cell): + """ + Base class for GraphKernel. + + A `GraphKernel` a composite of basic primitives and can be compiled into a fused kernel automaticly when + context.set_context(enable_graph_kernel=True). + + Examples: + >>> class Relu(GraphKernel): + >>> def __init__(self): + >>> super(Relu, self).__init__() + >>> self.max = P.Maximum() + >>> + >>> def construct(self, x): + >>> return self.max(P.Fill()(P.DType()(x), P.Shape()(x), 0.0), x) + """ + def __init__(self, auto_prefix=True, pips=None): + super(GraphKernel, self).__init__(auto_prefix, pips) + class_name = self.__class__.__name__ + self.add_flags(graph_kernel=class_name) + + def construct(self): + raise NotImplementedError diff --git a/mindspore/nn/graph_kernels/__init__.py b/mindspore/nn/graph_kernels/__init__.py new file mode 100644 index 00000000000..8128f2db609 --- /dev/null +++ b/mindspore/nn/graph_kernels/__init__.py @@ -0,0 +1,30 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +GraphKernel. + +GraphKernel provides a unified style to express graph and kernel for user. +It breaks the boundary between graph and kernel and provides more opportunities to do compile optimization. +""" +from .graph_kernels import MaximumGrad, MinimumGrad, AbsGrad, ApplyMomentum, BiasAdd, EqualCount, \ + ReduceMean, ReLU, SoftmaxCrossEntropyWithLogits, LayerNorm, LayerNormXBackprop, \ + LayerNormBetaGammaBackprop, LogSoftmax, Tanh, TanhGrad, Gelu, Softmax, BiasAddGrad, \ + LambUpdateWithLR, LambNextMV + +__all__ = ['MaximumGrad', 'MinimumGrad', 'AbsGrad', 'ApplyMomentum', 'BiasAdd', 'EqualCount', + 'ReduceMean', 'ReLU', 'SoftmaxCrossEntropyWithLogits', 'LayerNorm', + 'LayerNormXBackprop', 'LayerNormBetaGammaBackprop', 'LogSoftmax', 'Tanh', 'TanhGrad', + 'Gelu', 'Softmax', 'BiasAddGrad', 'LambUpdateWithLR', 'LambNextMV' + ] diff --git a/mindspore/nn/graph_kernels/graph_kernels.py b/mindspore/nn/graph_kernels/graph_kernels.py new file mode 100644 index 00000000000..21cc4f87109 --- /dev/null +++ b/mindspore/nn/graph_kernels/graph_kernels.py @@ -0,0 +1,1201 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +Graph kernels. They are composites of basic primitives and can be compiled into +a fused kernel automaticly when context.set_context(enable_graph_kernel=True). +""" +from ...common import dtype as mstype +from ...ops import operations as P +from ...ops.primitive import PrimitiveWithInfer, prim_attr_register +from ...ops.composite import multitype_ops as C +from ...ops.operations import _grad_ops as G +from ..._checkparam import ParamValidator as validator +from ..cell import Cell, GraphKernel + + +class InplaceAssign(PrimitiveWithInfer): + """ + Inplace assign `Parameter` with a value. + + This primitive can only use in graph kernel. + + Inputs: + - **variable** (Parameter) - The `Parameter`. + - **value** (Tensor) - The value to assign. + - **depend** (Tensor) - The dependent tensor to keep this op connected in graph. + + Outputs: + Tensor, has the same type as original `variable`. + + Examples: + >>> def construct(self, x): + >>> val = x - 1.0 + >>> ret = x + 2.0 + >>> return InplaceAssign()(x, val, ret) + >>> x = Tensor([2.0], mindspore.float32) + >>> net = Net() + >>> net(x) + """ + @prim_attr_register + def __init__(self): + self.init_prim_io_names(inputs=['x', 'y', 'z'], outputs=['output']) + + def infer_shape(self, x, y, z): + return z + + def infer_dtype(self, x, y, z): + return z + + def get_bprop(self): + def bprop(x, y, z, out, dout): + return (x, C.zeros_like(y), dout) + return bprop + + +class MaximumGrad(GraphKernel): + """ + + Backprop function for Maximum operator. + + Inputs: + - **x** (Tensor) - The first input tensor of maximum. + - **y** (Tensor) - The second input tensor of maximum. + - **dout** (Tensor) - has the same shape as x and y, next operator's backprop output. + + Outputs: + dx (Tensor): has the same shape as x and y, returns dout element if + `x >= y` returns true at the same position, or returns zero at that + position + dy (Tensor): has the same shape as x and y, dy = dout - dx + + Examples: + >>> layer = MaximumGrad() + >>> output = layer(Tensor([1,2,3], [3, 2, 1], [4, 5, 6])) + """ + + def __init__(self, grad_x=True, grad_y=True): + super(MaximumGrad, self).__init__() + self.grad_x = grad_x + self.grad_y = grad_y + self.select = P.Select() + self.greater_equal = P.GreaterEqual() + self.zeros_like = P.ZerosLike() + self.sub = P.Sub() + + def construct(self, x, y, dout): + cmp_result = self.greater_equal(x, y) + dx = self.select(cmp_result, dout, self.zeros_like(dout)) + dy = dout - dx + + return dx, dy + + +class MinimumGrad(GraphKernel): + """ + Backprop function for Minimum operator. + + Compares x and y elementwise, dout should has the same shape with x and y. + + Inputs: + - **x** (Tensor) - The first input + - **y** (Tensor) - x and y should have same shape + - **dout** (Tensor) - Has the same shape as x and y, next operator's backprop output + + Outputs: + - dx (Tensor) - Has the same shape as x and y, returns dout element if + `x <= y` returns true at the same position, or returns zero at that + position + - dy (Tensor) - Has the same shape as x and y, dy = dout - dx + + Examples: + >>> layer = MinimumGrad() + >>> output = layer(Tensor([1,2,3], [3, 2, 1], [4, 5, 6])) + """ + + def __init__(self, grad_x=True, grad_y=True): + super(MinimumGrad, self).__init__() + self.grad_x = grad_x + self.grad_y = grad_y + self.select = P.Select() + self.less_equal = P.LessEqual() + self.zeros_like = P.ZerosLike() + self.sub = P.Sub() + + def construct(self, x, y, dout): + cmp_result = self.less_equal(x, y) + dx = self.select(cmp_result, dout, self.zeros_like(dout)) + # dy = self.select(cmp_result, self.zeros_like(dout), dout) + dy = dout - dx + + return dx, dy + + +class AbsGrad(GraphKernel): + """ + Abs's backprop function. + + Inputs: + **input_x** (Tensor) - input data of this operator. + **dout** (Tensor) - output of the next operator's backprop function. + + Outputs: + Tensor, has the same shape as input_x. + + Examples: + >>> back = AbsGrad() + >>> output = back(Tensor([1, 2, 3]), Tensor([4, 5, 6])) + """ + + def __init__(self): + super(AbsGrad, self).__init__() + self.mul = P.Mul() + self.abs = P.Abs() + self.add = P.TensorAdd() + self.div = P.RealDiv() + self.round = P.Round() + + def construct(self, input_x, dout): + NUM_MAX = 32768 + mul_max = self.mul(input_x, P.Fill()(P.DType()(input_x), (1,), NUM_MAX)) + res_abs = self.abs(mul_max) + res_div = self.div(mul_max, res_abs) + res_round = self.round(res_div) + res = self.mul(res_round, dout) + return res + + +class ApplyMomentum(GraphKernel): + """ + Update parameter according to the ApplyMomentum algorithm. + + Inputs: + variable (Tensor): mutable tensor var + accumulation (Tensor): mutable tensor accum + learning_rate (float32): learning rate + gradient (float32): The gradient + momentum (float32): Momentum + + Outputs: updated accumulation and variable + """ + + def __init__(self, + use_nesterov=False, + use_locking=False, + gradient_scale=1.0): + super(ApplyMomentum, self).__init__() + self.gradient_scale = validator.check_type('gradient_scale', gradient_scale, [float]) + self.fake_output_assign_1 = InplaceAssign() + self.fake_output_assign_1.add_prim_attr("fake_output", True) + self.fake_output_assign_2 = InplaceAssign() + self.fake_output_assign_2.add_prim_attr("fake_output", True) + + def construct(self, variable, accumulation, learning_rate, gradient, momentum): + gradient = gradient * self.gradient_scale + momt_accumulation = accumulation * momentum + accumulation_inplace = momt_accumulation + gradient + + sum_gradient = accumulation_inplace * learning_rate + variable_inplace = variable - sum_gradient + + accumulation_inplace = self.fake_output_assign_1(accumulation, accumulation_inplace, accumulation_inplace) + variable_inplace = self.fake_output_assign_2(variable, variable_inplace, variable_inplace) + return accumulation_inplace, variable_inplace + + +class BiasAdd(GraphKernel): + """ + Return the sum of x and bias. + + Inputs: + x (Tensor): Tensor of input data. + bias (Tensor): The bias tensor. + + Output: + Tensor, the sum of x and bias. + + Example: + >>> layer = BiasGrad() + >>> output = BiasAdd(Tensor([1, 2, 3]), Tensor([1,])) + """ + + def __init__(self): + super(BiasAdd, self).__init__() + + def construct(self, x, bias): + shape = P.Shape()(x) + if len(shape) == 4: + bias_shape = (1, P.Shape()(bias)[0], 1, 1) # NCHW + else: + bias_shape = (1, P.Shape()(bias)[0]) + res = x + P.Reshape()(bias, bias_shape) + return res + +class BiasAddGrad(GraphKernel): + """ + Computes gradients of BiasAdd. + + Inputs: + x (Tensor): the gradients of bias add output. + + Output: + Tensor, the gradients of bias add input. + + Examples: + >>> dout = Tensor(np.ones(shape=[1, 2, 3, 4]), mindspore.float32) + >>> bias_add_grad = BiasAddGrad() + >>> dx = bias_add_grad(dout) + """ + def __init__(self): + super(BiasAddGrad, self).__init__() + + def construct(self, x): + shape_x = P.Shape()(x) + reduce_axis = [0] + for i in range(2, len(shape_x)): + reduce_axis.append(i) + + res = P.ReduceSum()(x, reduce_axis) + return res + + +class EqualCount(GraphKernel): + """ + Computes the number of the same elements of two tensors. + + The two input tensors should have same shape and data type. + + Inputs: + x (Tensor): the first input tensor. + y (Tensor): the second input tensor. + + Outputs: + Tensor, the type is same as input tensor and size as (1,). + + Examples: + >>> x = Tensor(np.array([1, 2, 3]), mindspore.int32) + >>> y = Tensor(np.array([1, 2, 4]), mindspore.int32) + >>> equal_count = EqualCount() + >>> equal_count(x, y) + """ + def __init__(self): + super(EqualCount, self).__init__() + + def construct(self, x, y): + equal_bool = P.Equal()(P.Cast()(x, mstype.float32), P.Cast()(y, mstype.float32)) + equal_count = P.Cast()(equal_bool, mstype.float16) + + axes = (0,) + res = P.ReduceSum()(equal_count, axes) + res = P.Cast()(res, P.DType()(x)) + return res + + +class ReduceMean(GraphKernel): + """ + Reduce a dimension of a tensor by averaging all elements in the dimension. + + The dtype of the tensor to be reduced is number. + + Args: + keep_dims (bool): If True, keep these reduced dimensions and the length is 1. + If False, don't keep these dimensions. Default : False. + + Inputs: + - **input_x** (Tensor[Number]) - The input tensor. + - **axis** (Union[int, tuple(int), list(int)]) - The dimensions to reduce. Default: (), reduce all dimensions. + Only constant value is allowed. + + Outputs: + Tensor, has the same dtype as the 'input_x'. + + - If axis is (), and keep_dims is false, + the output is a 0-D tensor representing the sum of all elements in the input tensor. + - If axis is int, set as 2, and keep_dims is false, + the shape of output is :math:`(x_1, x_3, ..., x_R)`. + - If axis is tuple(int), set as (2, 3), and keep_dims is false, + the shape of output is :math:`(x_1, x_4, ..., x_R)`. + + Examples: + >>> input_x = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32)) + >>> op = ReduceMean(keep_dims=True) + >>> output = op(input_x, 1) + """ + + def __init__(self, keep_dims=True): + super(ReduceMean, self).__init__() + self.keep_dims = validator.check_type('keep_dims', keep_dims, [bool]) + self.sum = P.ReduceSum(self.keep_dims) + + def construct(self, x, axis): + shape = P.Shape()(x) + value_num = 1 + for i in axis: + value_num *= shape[i] + + data_sum = self.sum(x, axis) + avg = 1.0 / P.Fill()(P.DType()(x), (1,), value_num) + res = data_sum * avg + return res + + +class ReLU(GraphKernel): + r""" + Computes ReLU(Rectified Linear Unit) of input tensor element-wise. + + It returns :math:`\max(x,\ 0)` element-wise. + + Inputs: + - **input_x** (Tensor) - The input tensor. + + Outputs: + Tensor, with the same type and shape as the `input_x`. + + Examples: + >>> input_x = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]]), mindspore.float32) + >>> relu = ReLU() + >>> result = relu(input_x) + [[0, 4.0, 0.0], [2.0, 0.0, 9.0]] + """ + def __init__(self): + super(ReLU, self).__init__() + self.max = P.Maximum() + + def construct(self, x): + return self.max(P.Fill()(P.DType()(x), P.Shape()(x), 0.0), x) + + +class SoftmaxCrossEntropyWithLogits(GraphKernel): + r""" + Gets the softmax cross-entropy value between logits and labels which shoule be one-hot encoding. + + Note: + Sets input logits as `X`, input label as `Y`, output as `loss`. Then, + + .. math:: + p_{ij} = softmax(X_{ij}) = \frac{exp(x_i)}{\sum_{j = 0}^{N-1}\exp(x_j)} + + .. math:: + loss_{ij} = -\sum_j{Y_{ij} * ln(p_{ij})} + + Inputs: + - **logits** (Tensor) - Input logits, with shape :math:`(N, C)`. + - **labels** (Tensor) - Ground truth labels, with shape :math:`(N, C)`. + + Outputs: + Tuple of 2 Tensor, the loss shape is `(N,)`, and the dlogits with the same shape as `logits`. + + Examples: + >>> logits = Tensor([[2, 4, 1, 4, 5], [2, 1, 2, 4, 3]], mindspore.float32) + >>> labels = Tensor([[0, 0, 0, 0, 1], [0, 0, 0, 1, 0]], mindspore.float32) + >>> softmax_cross = SoftmaxCrossEntropyWithLogits() + >>> loss, backprop = softmax_cross(logits, labels) + """ + + def __init__(self): + super(SoftmaxCrossEntropyWithLogits, self).__init__() + self.max = P.ReduceMax(keep_dims=True) + self.sum_keep_dims = P.ReduceSum(keep_dims=True) + + def construct(self, features, labels): + data_max = self.max(features, (1,)) + data_sub = features - data_max + data_exp = P.Exp()(data_sub) + data_sum = self.sum_keep_dims(data_exp, (1,)) + data_div = data_exp / data_sum + data_log_tmp = P.Log()(data_sum) + data_log = data_sub - data_log_tmp + data_mul = labels * data_log + data_muls = P.Neg()(data_mul) + loss = P.ReduceSum()(data_muls, (1,)) + backprop = data_div - labels + return loss, backprop + + def bprop(self, features, labels, out, dout): + grad = out[1] + grad = grad * P.ExpandDims()(dout[0], -1) + return grad, P.ZerosLike()(labels) + + +class LayerNormForward(GraphKernel): + """ Forward function of the LayerNorm operator. """ + def __init__(self, begin_norm_axis=1, begin_params_axis=1): + super(LayerNormForward, self).__init__() + self.begin_norm_axis = validator.check_type('begin_norm_axis', begin_norm_axis, [int]) + self.begin_params_axis = validator.check_type('begin_params_axis', begin_params_axis, [int]) + self.mul = P.Mul() + self.sum_keep_dims = P.ReduceSum(keep_dims=True) + self.sub = P.Sub() + self.add = P.TensorAdd() + self.log = P.Log() + self.exp = P.Exp() + self.eps = P.Eps() + + def construct(self, input_x, input_gamma, input_beta): + shape_x = P.Shape()(input_x) + + # Calculate the scaling ratio of the average + begin_norm_axis = self.begin_norm_axis + if begin_norm_axis < 0: + begin_norm_axis += len(shape_x) + reduce_axis = () + for i in range(len(shape_x)): + if i > begin_norm_axis or i == begin_norm_axis: + reduce_axis = reduce_axis + (i,) + + reduce_elts = 1.0 + for i in reduce_axis: + reduce_elts *= shape_x[i] + mean_cof = 1.0 / reduce_elts + + # Calculate mean + mean_muls = self.mul(input_x, mean_cof) + mean = self.sum_keep_dims(mean_muls, reduce_axis) + + # Calculate variance + variance_sub = self.sub(input_x, mean) + variance_mul = self.mul(variance_sub, variance_sub) + variance_muls = self.mul(variance_mul, mean_cof) + variance = self.sum_keep_dims(variance_muls, reduce_axis) + + # Calculate normalize + normalize_sub = self.sub(input_x, mean) + epsilon = self.eps(input_x) + normalize_add = self.add(variance, epsilon) + normalize_log = self.log(normalize_add) + normalize_log_mul = self.mul(normalize_log, -0.5) + normalize_exp = self.exp(normalize_log_mul) + normalize_mul = self.mul(normalize_sub, normalize_exp) + + # Calculate scale and translate + if self.begin_params_axis == 0: + scale_mul = self.mul(input_gamma, normalize_mul) + res = self.add(scale_mul, input_beta) + else: + scale_mul = self.mul(input_gamma, normalize_mul) + res = self.add(scale_mul, input_beta) + + return res, mean, variance + + +class LayerNormXBackprop(GraphKernel): + r""" + Together with LayerNormBetaGammaBackprop, to supply the backprop + functionality for LayerNorm. + + Note: + Sets input_x as :math:`x_i`, variance as :math:`\sigma^2`, mean as :math:`\mu`, + input_gamma as :math:`\gamma`. Then, + .. math:: + \begin{array}{ll} \\ + \hat{x_i} = \frac{x_i - \mu}{\sqrt{\sigma^2 + \epsilon}} \\ + \frac {\partial L} {\partial x_i} = + \frac{\gamma}{\sqrt{\sigma^2+\epsilon}} + ( \frac{\partial L}{\partial y_i} + - \frac{1}{m} \cdot \frac{\partial L}{\partial \beta} + - \frac{\hat{x_i}}{m} \cdot \frac{\partial L}{\partial \gamma}) + \end{array} + + Inputs: + - **dy**(Tensor) - The first item of the next operator's backprop's output. + - **input_x**(Tensor) - The first input of the forward function of LayerNorm. + - **variance**(Tensor) - The second input of the forward function of LayerNorm. + - **mean**(Tensor) - The third input of the forward function of LayerNorm. + - **input_gamma**(Tensor) - The fourth input of the forward function of LayerNorm. + + Outputs: + Tensor, the output of this operator, will be used as the first item of the result of + LayerNorm's backprop function, has the same shape and data type as 'input_x'. + + Examples: + >>> dy = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32)) + >>> input_x = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32)) + >>> variance = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32)) + >>> mean = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32)) + >>> input_gamma = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32)) + >>> op = LayerNormXBackprop(keep_dims=False) + >>> output = op(dy, input_x, variance, mean, input_gamma) + """ + + def __init__(self): + super(LayerNormXBackprop, self).__init__() + self.sum_keep_dims = P.ReduceSum(keep_dims=True) + self.log = P.Log() + self.exp = P.Exp() + self.eps = P.Eps() + + def construct(self, dy, input_x, variance, mean, input_gamma): + shape_x = P.Shape()(input_x) + shape_mean = P.Shape()(mean) + reduce_axis = () + flag = -1 + min_l = 0 + if len(shape_x) > len(shape_mean): + min_l = len(shape_x) + else: + min_l = len(shape_mean) + for i in range(min_l): + if (shape_x[i] != shape_mean[i]) and (flag == -1): + flag = i + if flag != -1: + for i in range(flag, len(shape_x)): + reduce_axis = reduce_axis + (i,) + else: + reduce_axis = reduce_axis + (len(shape_x) - 1,) + mean_num = 1.0 + for i in reduce_axis: + mean_num *= shape_x[i] + pd_xl = input_gamma * dy + epsilon = self.eps(input_x) + var_elta = variance + epsilon + var_elta_log = self.log(var_elta) + var_elta_mul = var_elta_log * -0.5 + var_elta_2 = P.Exp()(var_elta_mul) + pdvar1_mul = var_elta_2 * var_elta_2 + pd_var_1 = pdvar1_mul * var_elta_2 + sub_x_mean = input_x - mean + pdvar_mul1 = pd_xl * sub_x_mean + pdvar_sum = self.sum_keep_dims(pdvar_mul1, reduce_axis) + pdvar_mul3 = pdvar_sum * pd_var_1 + pd_var = pdvar_mul3 * -0.5 + pdmean1_sum = self.sum_keep_dims(pd_xl, reduce_axis) + pdmean1_mul = pdmean1_sum * var_elta_2 + pd_mean_1 = pdmean1_mul * -1.0 + pdmean2_mul1 = sub_x_mean * -2.0 + pdmean2_sum = self.sum_keep_dims(pdmean2_mul1, reduce_axis) + pdmean2_mul3 = pdmean2_sum * (1.0 / mean_num) + pd_mean_2 = pd_var * pdmean2_mul3 + pd_mean = pd_mean_2 + pd_mean_1 + pd_x_1 = var_elta_2 * pd_xl + pdx2_mul = pd_var * sub_x_mean + pd_x_2 = pdx2_mul * (2.0 * (1.0 / mean_num)) + pd_x_3 = pd_mean * (1.0 / mean_num) + pdx_add = pd_x_1 + pd_x_2 + pd_x = pdx_add + pd_x_3 + return pd_x + + +class LayerNormBetaGammaBackprop(GraphKernel): + r""" + Together with LayerNormXBackprop, to supply the backprop functionality for + LayerNorm. + Note: + Sets input_x as :math:`x_i`, variance as :math:`\sigma^2`, mean as :math:`\mu`, + input_gamma as :math:`\gamma`. Then, + .. math:: + \begin{array}{ll} \\ + \hat{x_i} = \frac{x_i - \mu}{\sqrt{\sigma^2 + \epsilon}} \\ + \frac {\partial L} {\partial \beta} = + \sum_{i=1}^m \\frac{\\partial L}{\partial y_i} \\ + \frac {\partial L} {\partial \gamma} = + \sum_{i=1}^m \\frac{\partial L}{\partial y_i} \cdot \hat{x_i} + \end{array} + + Inputs: + - **dy**(Tensor) - The first item of the next operator's backprop's output. + - **input_x**(Tensor) - The first input of the forward function of LayerNorm. + - **variance**(Tensor) - The second input of the forward function of LayerNorm. + - **mean**(Tensor) - The third input of the forward function of LayerNorm. + - **input_gamma**(Tensor) - The fourth input of the forward function of LayerNorm. + + Outputs: + Tuple of 2 Tensor, the backprop outputs. + + - **pd_beta**(Tensor) - The first item of return value of this operator, will be used as + the second item of the LayerNorm's backprop function. + - **pd_gamma**(Tensor) - The second item of return value of this operator, will be used as + the third item of the LayerNorm's backprop function. + + Examples: + >>> dy = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32)) + >>> input_x = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32)) + >>> variance = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32)) + >>> mean = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32)) + >>> input_gamma = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32)) + >>> op = LayerNormBetaGammaBackprop(keep_dims=False) + >>> pd_beta, pd_gamma = op(dy, input_x, variance, mean, input_gamma) + """ + def __init__(self): + super(LayerNormBetaGammaBackprop, self).__init__() + self.sum_not_keep_dims = P.ReduceSum(keep_dims=False) + self.log = P.Log() + self.exp = P.Exp() + self.eps = P.Eps() + + def construct(self, dy, input_x, variance, mean, shape_gamma): + shape_x = P.Shape()(input_x) + params_axis = () + + if len(shape_x) != len(shape_gamma): + sub = len(shape_x) - len(shape_gamma) + for i in range(sub): + params_axis = params_axis + (i,) + + pd_beta = self.sum_not_keep_dims(dy, params_axis) + epsilon = self.eps(input_x) + var_elta = variance + epsilon + var_elta_log = self.log(var_elta) + var_elta_mul = var_elta_log * -0.5 + var_elta_2 = P.Exp()(var_elta_mul) + sub_x_mean = input_x - mean + var_elta_2_cast = var_elta_2 + xl_mul = var_elta_2_cast * sub_x_mean + pdga_mul = dy * xl_mul + pd_gamma = self.sum_not_keep_dims(pdga_mul, params_axis) + return pd_beta, pd_gamma + + +class LogSoftmax(GraphKernel): + r""" + Log Softmax activation function. + + Applies the Log Softmax function to the input tensor on the specified axis. + Suppose a slice along the given aixs :math:`x` then for each element :math:`x_i` + the Log Softmax function is shown as follows: + + .. math:: + \text{output}(x_i) = \log \left(\frac{exp(x_i)} {\sum_{j = 0}^{N-1}\exp(x_j)}\right), + + where :math:`N` is the length of the Tensor. + + Args: + axis (int): The axis to do the Log softmax operation. Default: -1. + + Inputs: + logits (Tensor): The input of Log Softmax. + + Outputs: + Tensor, with the same type and shape as the logits. + + Examples: + >>> input_x = Tensor(np.array([1, 2, 3, 4, 5]), mindspore.float32) + >>> log_softmax = LogSoftmax() + >>> log_softmax(input_x) + [-4.4519143, -3.4519143, -2.4519143, -1.4519144, -0.4519144] + """ + + def __init__(self, axis=-1): + super(LogSoftmax, self).__init__() + self.axis = validator.check_type('axis', axis, [int]) + self.max_keep_dims = P.ReduceMax(keep_dims=True) + self.sub = P.Sub() + self.exp = P.Exp() + self.sum_keep_dims = P.ReduceSum(keep_dims=True) + self.log = P.Log() + self.mul = P.Mul() + + def construct(self, input_x): + data_max = self.max_keep_dims(input_x, (self.axis,)) + data_sub = self.sub(input_x, data_max) + + data_exp = self.exp(data_sub) + data_sum = self.sum_keep_dims(data_exp, (self.axis,)) + data_log = self.log(data_sum) + + res = self.sub(data_sub, data_log) + return res + + def bprop(self, input_x, out, dout): + input_x = out + input_dy = dout + + data_exp = self.exp(input_x) + data_sum = self.sum_keep_dims(input_dy, (self.axis,)) + data_softmax = self.mul(data_exp, data_sum) + + res = self.sub(input_dy, data_softmax) + return (res,) + + +class Tanh(GraphKernel): + r""" + Tanh activation function. + + Computes hyperbolic tangent of input element-wise. The Tanh function is defined as: + + .. math:: + tanh(x_i) = \frac{\exp(x_i) - \exp(-x_i)}{\exp(x_i) + \exp(-x_i)} = \frac{\exp(2x_i) - 1}{\exp(2x_i) + 1}, + + where :math:`x_i` is an element of the input Tensor. + + Inputs: + - **input_x** (Tensor) - The input of Tanh. + + Outputs: + Tensor, with the same type and shape as the input_x. + + Examples: + >>> input_x = Tensor(np.array([1, 2, 3, 4, 5]), mindspore.float32) + >>> tanh = Tanh() + >>> tanh(input_x) + [0.7615941, 0.9640276, 0.9950548, 0.9993293, 0.99990916] + """ + def __init__(self): + super(Tanh, self).__init__() + self.abs = P.Abs() + self.add = P.TensorAdd() + self.div = P.RealDiv() + self.mul = P.Mul() + self.mul_fp16 = P.Mul() + self.mul_fp16.add_prim_attr("output_precision", "float16") + self.exp = P.Exp() + + def construct(self, input_x): + input_abs = self.abs(input_x) + sign_flag = self.div(input_x, input_abs) + sign_flag_neg = self.mul(sign_flag, -1.0) + + power_val = self.mul(input_abs, -2.0) + exp_val = self.exp(power_val) + up_val = self.add(exp_val, -1.0) + down_val = self.add(exp_val, 1.0) + + div_val = self.div(up_val, down_val) + res = self.mul(sign_flag_neg, div_val) + return res + + def bprop(self, input_x, out, dout): + input_y = out + input_dy = dout + + data_square = self.mul(input_y, input_y) + data_mul = self.mul(data_square, -1.0) + anuminate = self.add(data_mul, 1.0) + res = self.mul_fp16(anuminate, input_dy) + + return (res,) + +class TanhGrad(GraphKernel): + """ + Backprop function of Tanh + + Mathematical calculating: + result = Tanh(out) + result = 1 - result * result + result = result * dout + Inputs: + out (Tensor): Tanh's output + dout (Tensor): next layer's backward function's output, has same shape as out + + Outputs: + result (Tensor): result of (1 - tanh(out)^2) * dout + + Examples: + >>> x_np = np.random.randn(5, 3, 6).astype(np.float16) + >>> dy_np = np.random.randn(5, 3, 6).astype(np.float16) + >>> x_ms = Tensor(x_np) + >>> dy_ms = Tensor(dy_np) + >>> tanh_grad = TanhGrad() + >>> out = tanh_grad(x_np, dy_np) + """ + def __init__(self): + super(TanhGrad, self).__init__() + self.add = P.TensorAdd() + self.mul = P.Mul() + self.mul_fp16 = P.Mul() + self.mul_fp16.add_prim_attr("output_precision", "float16") + + def construct(self, out, dout): + input_y = out + input_dy = dout + + data_square = self.mul(input_y, input_y) + data_mul = self.mul(data_square, -1.0) + anuminate = self.add(data_mul, 1.0) + res = self.mul_fp16(anuminate, input_dy) + + return res + +class Gelu(GraphKernel): + r""" + Gaussian Error Linear Units activation function. + + GeLU is described in the paper `Gaussian Error Linear Units (GELUs) `_. + And also please refer to `BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. + `_. + + Defined as follows: + + .. math:: + \text{output} = 0.5 * x * (1 + erf(x / \sqrt{2})), + + where :math:`erf` is the "Gauss error function" . + + Inputs: + - **input_x** (Tensor) - Input to compute the Gelu. + + Outputs: + Tensor, with the same type and shape as input. + + Examples: + >>> tensor = Tensor(np.array([1.0, 2.0, 3.0]), mindspore.float32) + >>> gelu = Gelu() + >>> result = gelu(tensor) + """ + + def __init__(self): + super(Gelu, self).__init__() + self.add = P.TensorAdd() + self.abs = P.Abs() + self.exp = P.Exp() + self.neg = P.Neg() + self.minimum = P.Minimum() + self.div = P.RealDiv() + self.mul = P.Mul() + self.CSVALUE = 0.044715 + self.CSVALUE_A = 1.59576912 + self.CSVALUE_5 = 0.3989422804 + self.CSVALUE_3B = 0.2140644488 + + def construct(self, input_x): + def _tanh_parameter_compute(data_x): + """ + compute the parameter of tanh: + return: result equal (x+0.044715*tf.pow(x,3)) + """ + mul_0 = self.mul(data_x, data_x) + pow_0 = self.mul(mul_0, data_x) + mul_1 = self.mul(pow_0, self.CSVALUE) + result = self.add(data_x, mul_1) + + return result + + tanh_parameter = _tanh_parameter_compute(input_x) + mul_0 = self.mul(tanh_parameter, 1.5957691) + + mul_0_min = self.minimum(mul_0, 0.0) + right_mul = self.exp(mul_0_min) + + mul_0_abs = self.abs(mul_0) + mul_0_abs_neg = self.mul(mul_0_abs, -1.0) + mul_0_abs_neg_exp = self.exp(mul_0_abs_neg) + + mul_0_abs_neg_exp_add = self.add(mul_0_abs_neg_exp, 1.0) + left_mul = self.div(input_x, mul_0_abs_neg_exp_add) + + result = self.mul(left_mul, right_mul) + return result + + def bprop(self, input_x, out, dout): + """ register backprop function for Gelu """ + data_x = input_x + data_gelu = out + data_dy = dout + + def _math_four_compute(data_x): + """ + return: math_four equal 2*(np(sqrt(2 / np.pi)*(x + 0.044715*tf.pow(x, 3))) + """ + datax_pow = data_x * data_x * data_x + datax_muls_c = self.mul(datax_pow, self.CSVALUE) + datax_addx = self.add(datax_muls_c, data_x) + datax_muls_s = self.mul(datax_addx, self.CSVALUE_A) + + return datax_muls_s + + # common part + math_four = _math_four_compute(data_x) + math_four_abs = self.abs(math_four) + math_four_abs_neg = self.mul(math_four_abs, -1.0) + math_four_abs_neg_exp = self.exp(math_four_abs_neg) + math_four_min = self.minimum(math_four, 0.0) + + # dividend part + datax_pow = self.mul(data_x, data_x) + datax_pow_mul = self.mul(datax_pow, self.CSVALUE_3B) + datax_pow_mul_add = self.add(datax_pow_mul, self.CSVALUE_A) + data_gelu_mul = self.mul(data_gelu, datax_pow_mul_add) + math_four_min_2 = self.mul(math_four_min, 2.0) + div_right = self.mul(data_gelu_mul, math_four_abs_neg_exp) + div_left = self.exp(math_four_min_2) + dividend = self.add(div_left, div_right) + + # divisor part + div_0 = self.add(math_four_abs_neg_exp, 1.0) + div_1 = self.exp(math_four_min) + divisor = self.mul(div_1, div_0) + res_grad = self.div(dividend, divisor) + + result = self.mul(res_grad, data_dy) + return (result,) + + +class Softmax(GraphKernel): + """ + Operator Softmax + .. math: `exp(x-max(x)) / sum(exp(x-max(x)))` + + Args: + axis (int, tuple): Axis along which the softmax normalization is applied + + Inputs: + x (Tensor): input data for softmax + + Outputs: + output (Tensor): a tensor with the same shape of the input + + Examples: + >>> layer = Softmax(1) + >>> x = Tensor(np.array([1.2, 2.1], [2.2, 3.2]), mindspore.float32) + >>> output = layer(x) + """ + + def __init__(self, axis): + super(Softmax, self).__init__() + validator.check_type("axis", axis, [int, tuple]) + if isinstance(axis, int): + self.axis = (axis,) + else: + self.axis = axis + for item in self.axis: + validator.check_type("item of axis", item, [int]) + self.max = P.ReduceMax(keep_dims=True) + self.sub = P.Sub() + self.exp = P.Exp() + self.sum = P.ReduceSum(keep_dims=True) + self.mul = P.Mul() + + def construct(self, x): + max_x = self.max(x, self.axis) + data_sub = self.sub(x, max_x) + data_exp = self.exp(data_sub) + data_expsum = self.sum(data_exp, self.axis) + output = data_exp / data_expsum + return output + + def bprop(self, x, out, dout): + mul_res = self.mul(dout, out) + sum_res = self.sum(mul_res, self.axis) + sub_res = self.sub(dout, sum_res) + res = self.mul(sub_res, out) + return (res,) + + +class LayerNorm(Cell): + r""" + Applies Layer Normalization over a mini-batch of inputs. + + Layer normalization is widely used in recurrent neural networks. It applies + normalization over a mini-batch of inputs for each single training case as described + in the paper `Layer Normalization `_. Unlike batch + normalization, layer normalization performs exactly the same computation at training and + testing times. It can be described using the following formula. It is applied across all channels + and pixel but only one batch size. + + .. math:: + y = \frac{x - \mathrm{E}[x]}{\sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta + + Args: + normalized_shape (Union(tuple[int], list[int]): The normalization is performed over axis + `begin_norm_axis ... R - 1`. + begin_norm_axis (int): It first normalization dimension: normalization will be performed along dimensions + `begin_norm_axis: rank(inputs)`, the value should be in [-1, rank(input)). Default: -1. + begin_params_axis (int): The first parameter(beta, gamma)dimension: scale and centering parameters + will have dimensions `begin_params_axis: rank(inputs)` and will be broadcast with + the normalized inputs accordingly, the value should be in [-1, rank(input)). Default: -1. + gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', 'xavier_uniform', + 'he_uniform', etc. Default: 'ones'. + beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', 'xavier_uniform', + 'he_uniform', etc. Default: 'zeros'. + + Inputs: + - **input_x** (Tensor) - The shape of 'input_x' is :math:`(x_1, x_2, ..., x_R)`, + and `input_shape[begin_norm_axis:]` is equal to `normalized_shape`. + + Outputs: + Tensor, the normalized and scaled offset tensor, has the same shape and data type as the `input_x`. + + Examples: + >>> x = Tensor(np.ones([20, 5, 10, 10]), mindspore.float32) + >>> shape1 = x.shape()[1:] + >>> m = G.LayerNorm(shape1, begin_norm_axis=1, begin_params_axis=1) + >>> m(x) + """ + + def __init__(self, + begin_norm_axis=-1, + begin_params_axis=-1 + ): + super(LayerNorm, self).__init__() + self.begin_norm_axis = begin_norm_axis + self.begin_params_axis = begin_params_axis + self.layer_norm = LayerNormForward(begin_norm_axis, begin_params_axis) + self.layer_norm_x_grad = LayerNormXBackprop() + self.layer_norm_beta_gamma = LayerNormBetaGammaBackprop() + self.layer_norm_grad = G.LayerNormGrad(self.begin_norm_axis, self.begin_params_axis) + + def construct(self, input_x, input_gamma, input_beta): + return self.layer_norm(input_x, input_gamma, input_beta) + + # case 1 + def bprop(self, input_x, input_gamma, input_beta, out, dout): + dx, d_gamma, d_beta = self.layer_norm_grad(input_x, dout[0], out[2], dout[1], input_gamma) + return dx, d_gamma, d_beta + + +class LambUpdateWithLR(GraphKernel): + r""" + Part of Lamb optimizer. + + .. math:: + s_1 = select(i_1 \gt y_g, select(i_0 \gt y_g, \frac{i_1}{i_2}, se), se) + i_5 = i_5 - max(min(s_1, y_m), y_g) \times i_3 \times i_4 + + Inputs: + - **input0** (Tensor) - The first tensor to be computed. + - **input1** (Tensor) - The second tensor to be computed. + - **input2** (Tensor) - The third tensor to be computed. + - **input3** (Tensor) - The fourth tensor to be computed. + - **input4** (Tensor) - The fifth tensor to be computed. + - **input5** (Tensor) - The sixth tensor to be computed. It will be updated by result. + - **greater_y** (Tensor) - The seventh tensor to be computed. + - **select_e** (Tensor) - The eighth tensor to be computed. + - **minimum_y** (Tensor) - The ninth tensor to be computed. + + Outputs: + A fake output tensor. + + Examples: + >>> lamb_update = LambUpdateWithLR() + >>> i0 = np.random.normal(0, 1, [1, 16]).astype(np.float32) + >>> i1 = np.random.normal(0, 1, [1]).astype(np.float32) + >>> i2 = np.random.normal(0, 1, [1]).astype(np.float32) + >>> i3 = np.random.normal(0, 1, [1]).astype(np.float32) + >>> i4 = np.random.normal(0, 1, [1, 16]).astype(np.float32) + >>> i5 = np.random.normal(0, 1, [1, 16]).astype(np.float32) + >>> yg = np.random.normal(0, 1, [1]).astype(np.float32) + >>> se = np.random.normal(0, 1, [1]).astype(np.float32) + >>> ym = np.random.normal(0, 1, [1]).astype(np.float32) + >>> lamb_update(i0, i1, i2, i3, i4, i5, yg, se, ym) + + """ + + def __init__(self): + super(LambUpdateWithLR, self).__init__() + self.greater = P.Greater() + self.select = P.Select() + self.div = P.RealDiv() + self.min = P.Minimum() + self.max = P.Maximum() + self.mul = P.Mul() + self.sub = P.Sub() + self.fake_output_assign = InplaceAssign() + self.fake_output_assign.add_prim_attr("fake_output", True) + + def construct(self, input0, input1, input2, input3, input4, input5, greater_y, select_e, minimum_y): + greater0 = self.greater(input0, greater_y) + greater1 = self.greater(input1, greater_y) + real_div0 = self.div(input1, input2) + select0 = self.select(greater0, real_div0, select_e) + select1 = self.select(greater1, select0, select_e) + min0 = self.min(select1, minimum_y) + max0 = self.max(min0, greater_y) + mul0 = self.mul(max0, input3) + mul1 = self.mul(mul0, input4) + sub0 = self.sub(input5, mul1) + sub0 = self.fake_output_assign(input5, sub0, sub0) + return sub0 + +class LambNextMV(GraphKernel): + r""" + Part of Lamb optimizer. + + .. math:: + rd_0 = \frac{i_8 \times i_5 + i_9 \times i_4}{i6} + rd_1 = \frac{x_0 \times i_2 + x_1 \times i_1}{i3} + y_2 = \frac{rd_0}{\sqrt{rd_1 + x3}} + x_2 \times i_7 + y_3 = \frac{rd_0}{\sqrt{rd_1} + x3} + i5 = i_8 \times i_5 + i_9 \times i_4 + i2 = x_0 \times i_2 + x_1 \times i_1 + + Inputs: + - **inputs1** (Tensor) - The first input tensor to be computed. + - **inputs2** (Tensor) - The second input tensor to be computed. It will be updated by result. + - **inputs3** (Tensor) - The third input tensor to be computed. + - **inputs4** (Tensor) - The fourth input tensor to be computed. + - **inputs5** (Tensor) - The fifth input tensor to be computed. It will be updated by result. + - **inputs6** (Tensor) - The sixth input tensor to be computed. + - **inputs7** (Tensor) - The seventh input tensor to be computed. + - **inputs8** (Tensor) - The eighth input tensor to be computed. + - **inputs9** (Tensor) - The ninth input tensor to be computed. + - **inputsx0** (Tensor) - The tenth input tensor to be computed. + - **inputsx1** (Tensor) - The eleventh input tensor to be computed. + - **inputsx2** (Tensor) - The twelfth input tensor to be computed. + - **inputsx3** (Tensor) - The thirteenth input tensor to be computed. + + Outputs: + Tuple of 2 Tensor. + + - **add3** (Tensor) - The shape is same as the shape after broadcasting, and the data type is + the one with high precision or high digits among the inputs. + - **realdiv4** (Tensor) - The shape is same as the shape after broadcasting, and the data type is + the one with high precision or high digits among the inputs. + + Examples: + >>> lamb_next_mv = LambNextMV() + >>> i1 = Tensor(np.random.normal(0, 1, [1, 16]).astype(np.float32)) + >>> i2 = Tensor(np.random.normal(0, 1, [1, 16]).astype(np.float32)) + >>> i3 = Tensor(np.random.normal(0, 1, [1, 16]).astype(np.float32)) + >>> i4 = Tensor(np.random.normal(0, 1, [1, 16]).astype(np.float32)) + >>> i5 = Tensor(np.random.normal(0, 1, [1, 16]).astype(np.float32)) + >>> i6 = Tensor(np.random.normal(0, 1, [1, 16]).astype(np.float32)) + >>> i7 = Tensor(np.random.normal(0, 1, [1, 16]).astype(np.float32)) + >>> i8 = Tensor(np.random.normal(0, 1, [1, 16]).astype(np.float32)) + >>> i9 = Tensor(np.random.normal(0, 1, [1, 16]).astype(np.float32)) + >>> x0 = Tensor(np.random.normal(0, 1, [1, 16]).astype(np.float32)) + >>> x1 = Tensor(np.random.normal(0, 1, [1, 16]).astype(np.float32)) + >>> x2 = Tensor(np.random.normal(0, 1, [1, 16]).astype(np.float32)) + >>> x3 = Tensor(np.ones([1, 16]).astype(np.float32) * 1e-6) + >>> lamb_next_mv(i1, i2, i3, i4, i5, i6, i7, i8, i9, x0, x1, x2, x3) + + """ + + def __init__(self): + super(LambNextMV, self).__init__() + self.mul = P.Mul() + self.add = P.TensorAdd() + self.div = P.RealDiv() + self.sqrt = P.Sqrt() + self.rsqrt = P.Rsqrt() + self.fake_output_assign_1 = InplaceAssign() + self.fake_output_assign_1.add_prim_attr("fake_output", False) + self.fake_output_assign_2 = InplaceAssign() + self.fake_output_assign_2.add_prim_attr("fake_output", False) + + + def construct(self, input1, input2, input3, input4, input5, input6, input7, + input8, input9, inputx0, inputx1, inputx2, inputx3): + mul3 = self.mul(inputx1, input1) + mul2 = self.mul(inputx0, input2) + add1 = self.add(mul2, mul3) + realdiv1 = self.div(add1, input3) + add2 = self.add(realdiv1, inputx3) + sqrt0 = self.rsqrt(add2) + sqrt1 = self.sqrt(realdiv1) + add4 = self.add(sqrt1, inputx3) + mul1 = self.mul(input9, input4) + mul0 = self.mul(input8, input5) + add0 = self.add(mul0, mul1) + realdiv0 = self.div(add0, input6) + realdiv2 = self.mul(realdiv0, sqrt0) + realdiv4 = self.div(realdiv0, add4) + mul4 = self.mul(inputx2, input7) + add3 = self.add(realdiv2, mul4) + + add3 = self.fake_output_assign_1(input5, add0, add3) + add3 = self.fake_output_assign_2(input2, add1, add3) + + return add3, realdiv4 diff --git a/mindspore/nn/layer/activation.py b/mindspore/nn/layer/activation.py index f20ad8692d3..9b62639bb1b 100644 --- a/mindspore/nn/layer/activation.py +++ b/mindspore/nn/layer/activation.py @@ -20,8 +20,10 @@ from mindspore.common.parameter import Parameter from mindspore.common.initializer import initializer from mindspore.common.tensor import Tensor from mindspore._extends import cell_attr_register +from mindspore.ops import _selected_ops from ..cell import Cell + __all__ = ['Softmax', 'LogSoftmax', 'ReLU', @@ -73,7 +75,7 @@ class Softmax(Cell): def __init__(self, axis=-1): super(Softmax, self).__init__() - self.softmax = P.Softmax(axis) + self.softmax = _selected_ops.Softmax(axis) def construct(self, x): return self.softmax(x) @@ -110,7 +112,7 @@ class LogSoftmax(Cell): def __init__(self, axis=-1): super(LogSoftmax, self).__init__() - self.log_softmax = P.LogSoftmax(axis) + self.log_softmax = _selected_ops.LogSoftmax(axis) def construct(self, x): return self.log_softmax(x) @@ -286,7 +288,7 @@ class Tanh(Cell): def __init__(self): super(Tanh, self).__init__() - self.tanh = P.Tanh() + self.tanh = _selected_ops.Tanh() def construct(self, x): return self.tanh(x) @@ -318,7 +320,7 @@ class GELU(Cell): def __init__(self): super(GELU, self).__init__() - self.gelu = P.Gelu() + self.gelu = _selected_ops.Gelu() def construct(self, x): return self.gelu(x) @@ -503,6 +505,7 @@ class LogSigmoid(Cell): [-3.1326166e-01, -1.2692806e-01, -4.8587345e-02] """ + def __init__(self): super(LogSigmoid, self).__init__() self.mul = P.Mul() diff --git a/mindspore/nn/layer/basic.py b/mindspore/nn/layer/basic.py index c050d35f8f9..548fbcec1e0 100644 --- a/mindspore/nn/layer/basic.py +++ b/mindspore/nn/layer/basic.py @@ -27,10 +27,12 @@ from mindspore.common.parameter import Parameter from mindspore._extends import cell_attr_register from mindspore.common.api import ms_function from mindspore import context +from mindspore.ops import _selected_ops from ..cell import Cell from .activation import get_activation from ..._checkparam import Validator as validator + __all__ = ['Dropout', 'Flatten', 'Dense', 'ClipByNorm', 'Norm', 'OneHot', 'Pad', 'Unfold'] class Dropout(Cell): @@ -74,6 +76,7 @@ class Dropout(Cell): >>> net = nn.Dropout(keep_prob=0.8) >>> net(x) """ + def __init__(self, keep_prob=0.5, seed0=0, seed1=0, dtype=mstype.float32): super(Dropout, self).__init__() if keep_prob <= 0 or keep_prob > 1: @@ -137,6 +140,7 @@ class Flatten(Cell): [[1.2 1.2 2.1 2.1] [2.2 2.2 3.2 3.2]] """ + def __init__(self): super(Flatten, self).__init__() @@ -212,7 +216,7 @@ class Dense(Cell): self.bias = Parameter(initializer(bias_init, [out_channels]), name="bias") self.matmul = P.MatMul(transpose_b=True) - self.bias_add = P.BiasAdd() + self.bias_add = _selected_ops.BiasAdd() self.activation = get_activation(activation) self.activation_flag = self.activation is not None @@ -271,6 +275,7 @@ class ClipByNorm(Cell): >>> net(input, clip_norm) """ + def __init__(self): super(ClipByNorm, self).__init__() self.reduce_sum = P.ReduceSum(keep_dims=True) @@ -302,6 +307,7 @@ class ClipByNorm(Cell): intermediate = x else: intermediate = x * clip_norm + max_norm = self.max_op(l2norm, clip_norm) values_clip = self.cast(intermediate, mstype.float32) / self.expand_dims(max_norm, -1) values_clip = self.reshape(values_clip, self.shape(x)) @@ -330,6 +336,7 @@ class Norm(Cell): >>> input = Tensor(np.random.randint(0, 10, [4, 16]), mindspore.float32) >>> net(input) """ + def __init__(self, axis=(), keep_dims=False): super(Norm, self).__init__() self.axis = axis @@ -392,6 +399,7 @@ class OneHot(Cell): [0. 1.] [0. 0.]]] """ + def __init__(self, axis=-1, depth=1, on_value=1.0, off_value=0.0, dtype=mstype.float32): super(OneHot, self).__init__() self.onehot = P.OneHot(axis) @@ -506,6 +514,7 @@ class Unfold(Cell): Tensor ([[[[1, 1] [1, 1]] [[1, 1], [1, 1]] [[1, 1] [1, 1]], [[1, 1], [1, 1]]]], shape=(1, 4, 2, 2), dtype=mstype.float16) """ + def __init__(self, ksizes, strides, rates, padding="valid"): super(Unfold, self).__init__() self.extract_image_patches = inner.ExtractImagePatches(ksizes, strides, rates, padding) diff --git a/mindspore/nn/layer/normalization.py b/mindspore/nn/layer/normalization.py index 744e87ece5d..4c7ea9d4d6b 100644 --- a/mindspore/nn/layer/normalization.py +++ b/mindspore/nn/layer/normalization.py @@ -24,9 +24,11 @@ from mindspore._extends import cell_attr_register from mindspore.communication.management import get_group_size, get_rank from mindspore.communication import management from mindspore._checkparam import check_int_positive +from mindspore.ops import _selected_ops from ..cell import Cell + __all__ = ['BatchNorm1d', 'BatchNorm2d', 'LayerNorm', 'GroupNorm', 'GlobalBatchNorm'] class _BatchNorm(Cell): @@ -116,12 +118,11 @@ class _BatchNorm(Cell): "local_rank_size is {}".format(group_size, get_group_size())) if len(world_rank) % group_size != 0: raise ValueError("please make your group size correct.") - world_rank_list = zip(*(iter(world_rank),) *group_size) + world_rank_list = zip(*(iter(world_rank),) * group_size) group_list = [list(i) for i in world_rank_list] return group_list - def _global_sync(self, x, axes, re_shape): """calculate global batch normalization output""" x_mean = self.reduce_mean(x, axes) @@ -188,15 +189,19 @@ class _BatchNorm(Cell): return 'num_features={}, eps={}, momentum={}, gamma={}, beta={}, moving_mean={}, moving_variance={}'.format( self.num_features, self.eps, self.momentum, self.gamma, self.beta, self.moving_mean, self.moving_variance) + @constexpr def _channel_check(channel, num_channel): if channel != num_channel: raise ValueError("the input channel is not equal with num_channel") + @constexpr def _shape_check(in_shape): if len(in_shape) != 4: raise ValueError("The input must has 4 dims") + + @constexpr def _shape_infer(x_shape, num_feature): """global batch normalization shape and axes infer""" @@ -208,6 +213,7 @@ def _shape_infer(x_shape, num_feature): re_shape = (1, num_feature) return axes, re_shape + class BatchNorm1d(_BatchNorm): r""" Batch normalization layer over a 2D input. @@ -257,6 +263,7 @@ class BatchNorm1d(_BatchNorm): >>> input = Tensor(np.random.randint(0, 255, [3, 16]), mindspore.float32) >>> net(input) """ + def __init__(self, num_features, eps=1e-5, @@ -276,6 +283,7 @@ class BatchNorm1d(_BatchNorm): moving_mean_init, moving_var_init, use_batch_statistics) + def _check_data_dim(self, x): if x.dim() != 2: pass @@ -330,6 +338,7 @@ class BatchNorm2d(_BatchNorm): >>> input = Tensor(np.random.randint(0, 255, [1, 3, 224, 224]), mindspore.float32) >>> net(input) """ + def __init__(self, num_features, eps=1e-5, @@ -349,6 +358,7 @@ class BatchNorm2d(_BatchNorm): moving_mean_init, moving_var_init, use_batch_statistics) + def _check_data_dim(self, x): if x.dim() != 4: pass @@ -404,6 +414,7 @@ class GlobalBatchNorm(_BatchNorm): >>> input = Tensor(np.random.randint(0, 255, [1, 3, 224, 224]), mindspore.float32) >>> global_bn_op(input) """ + def __init__(self, num_features, eps=1e-5, @@ -428,10 +439,12 @@ class GlobalBatchNorm(_BatchNorm): self.group = check_int_positive(device_num_each_group) if self.group <= 1: raise ValueError("the number of group must be greater than 1.") + def _check_data_dim(self, x): if x.dim == 0: pass + class LayerNorm(Cell): r""" Applies Layer Normalization over a mini-batch of inputs. @@ -475,6 +488,7 @@ class LayerNorm(Cell): >>> m = nn.LayerNorm(shape1, begin_norm_axis=1, begin_params_axis=1) >>> m(x) """ + def __init__(self, normalized_shape, begin_norm_axis=-1, @@ -495,8 +509,8 @@ class LayerNorm(Cell): gamma_init, normalized_shape), name="gamma") self.beta = Parameter(initializer( beta_init, normalized_shape), name="beta") - self.layer_norm = P.LayerNorm(begin_norm_axis=self.begin_norm_axis, begin_params_axis=self.begin_params_axis, - epsilon=self.epsilon) + self.layer_norm = _selected_ops.LayerNorm(begin_norm_axis=self.begin_norm_axis, + begin_params_axis=self.begin_params_axis) def construct(self, input_x): y, _, _ = self.layer_norm(input_x, self.gamma, self.beta) @@ -508,6 +522,7 @@ class LayerNorm(Cell): self.normalized_shape, self.begin_norm_axis, self.begin_params_axis, self.gamma, self.beta) return s + class GroupNorm(Cell): r""" Group Normalization over a mini-batch of inputs. @@ -544,6 +559,7 @@ class GroupNorm(Cell): >>> x = Tensor(np.ones([1, 64, 256, 256], np.float32)) >>> goup_norm_op(x) """ + def __init__(self, num_groups, num_channels, eps=1e-05, affine=True, gamma_init='ones', beta_init='zeros'): super(GroupNorm, self).__init__() self.num_groups = check_int_positive(num_groups) diff --git a/mindspore/nn/loss/loss.py b/mindspore/nn/loss/loss.py index 426f111bb20..4639229c414 100644 --- a/mindspore/nn/loss/loss.py +++ b/mindspore/nn/loss/loss.py @@ -18,6 +18,7 @@ from mindspore.common.tensor import Tensor from mindspore.ops import operations as P from mindspore.ops import functional as F from mindspore.ops.primitive import constexpr +from mindspore.ops import _selected_ops from mindspore.nn.cell import Cell from mindspore._checkparam import Validator as validator from mindspore._checkparam import Rel @@ -44,7 +45,7 @@ class _Loss(Cell): if reduction == 'none': self.reduce = False - self.reduce_mean = P.ReduceMean() + self.reduce_mean = _selected_ops.ReduceMean() self.reduce_sum = P.ReduceSum() def get_axis(self, x): @@ -245,11 +246,11 @@ class SoftmaxCrossEntropyWithLogits(_Loss): super(SoftmaxCrossEntropyWithLogits, self).__init__(reduction) self.is_grad = is_grad self.sparse = sparse - validator.check_integer("num_classes", num_classes, 1, Rel.GT, self.cls_name) - validator.check_number_range("smooth_factor", smooth_factor, 0, 1, Rel.INC_BOTH, self.cls_name) + validator.check_number_range( + "smooth_factor", smooth_factor, 0, 1, Rel.INC_BOTH, self.cls_name) self.smooth_factor = smooth_factor self.num_classes = num_classes - self.softmax_cross_entropy = P.SoftmaxCrossEntropyWithLogits() + self.softmax_cross_entropy = _selected_ops.SoftmaxCrossEntropyWithLogits() self.one_hot = P.OneHot() self.on_value = Tensor(1.0 - self.smooth_factor, mstype.float32) self.off_value = Tensor(1.0 * self.smooth_factor / (self.num_classes - 1), mstype.float32) diff --git a/mindspore/nn/optim/lamb.py b/mindspore/nn/optim/lamb.py index ab914da08ea..832b35d66f1 100755 --- a/mindspore/nn/optim/lamb.py +++ b/mindspore/nn/optim/lamb.py @@ -14,6 +14,7 @@ # ============================================================================ """lamb""" import numpy as np +from mindspore import context from mindspore.common import dtype as mstype from mindspore.common.initializer import initializer from mindspore.ops import operations as P @@ -25,13 +26,15 @@ from mindspore._checkparam import Validator as validator from mindspore._checkparam import Rel from .optimizer import Optimizer from .. import layer +from .. import graph_kernels as G num_one = Tensor(np.ones([1]), mstype.float32) _lamb_opt = C.MultitypeFuncGraph("lamb_opt") -@_lamb_opt.register("Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", - "Tensor", "Bool") + +@_lamb_opt.register("Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", + "Tensor", "Tensor", "Tensor", "Tensor", "Bool") def _update_run_op(beta1, beta2, eps, lr, weight_decay_tensor, global_step, param, m, v, gradient, decay_flag): """ @@ -72,9 +75,11 @@ def _update_run_op(beta1, beta2, eps, lr, weight_decay_tensor, global_step, para v_fp32 = op_cast(v, mstype.float32) gradient_fp32 = op_cast(gradient, mstype.float32) - next_m = op_mul(beta1, m_fp32) + op_mul(op_cast(num_one, mstype.float32) - beta1, gradient_fp32) + next_m = op_mul(beta1, m_fp32) + op_mul(op_cast(num_one, + mstype.float32) - beta1, gradient_fp32) - next_v = op_mul(beta2, v_fp32) + op_mul(op_cast(num_one, mstype.float32) - beta2, op_square(gradient_fp32)) + next_v = op_mul(beta2, v_fp32) + op_mul(op_cast(num_one, + mstype.float32) - beta2, op_square(gradient_fp32)) next_mm = next_m / (op_cast(num_one, mstype.float32) - op_pow(beta1, op_cast(global_step + num_one, mstype.float32))) @@ -83,7 +88,8 @@ def _update_run_op(beta1, beta2, eps, lr, weight_decay_tensor, global_step, para w_norm = op_norm(param_fp32) g_norm = op_norm(gradient_fp32) - g_norm_hat = op_norm(op_mul(next_mm, op_rsqrt(next_vv + eps)) + weight_decay_tensor * param_fp32) + g_norm_hat = op_norm(op_mul(next_mm, op_rsqrt( + next_vv + eps)) + weight_decay_tensor * param_fp32) zeros = F.zeros_like(w_norm) ones = op_fill(op_dtype(w_norm), op_shape(w_norm), 1.0) trust_ratio = op_select( @@ -108,6 +114,70 @@ def _update_run_op(beta1, beta2, eps, lr, weight_decay_tensor, global_step, para return next_v +lamb_opt_graph_kernel = C.MultitypeFuncGraph("lamb_opt_graph_kernel") + + +@lamb_opt_graph_kernel.register("Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", + "Tensor", "Tensor", "Tensor", "Tensor", "Bool") +def _update_run_op_graph_kernel(beta1, beta2, eps, lr, weight_decay_tensor, + global_step, param, m, v, gradient, decay_flag): + """ + Update parameters. + + Args: + beta1 (Tensor): The exponential decay rate for the 1st moment estimates. Should be in range (0.0, 1.0). + beta2 (Tensor): The exponential decay rate for the 2nd moment estimates. Should be in range (0.0, 1.0). + eps (Tensor): Term added to the denominator to improve numerical stability. Should be greater than 0. + lr (Tensor): Learning rate. + weight_decay_tensor (Tensor): Weight decay. Should be equal to or greater than 0. + global_step (Tensor): Global step. + param (Tensor): Parameters. + m (Tensor): m value of parameters. + v (Tensor): v value of parameters. + gradient (Tensor): Gradient of parameters. + decay_flag (bool): Specifies whether param update with weight decay. + + Returns: + Tensor, the new value of v after updating. + """ + op_mul = P.Mul() + op_square = P.Square() + op_cast = P.Cast() + op_shape = P.Shape() + op_pow = P.Pow() + op_norm = layer.Norm() + op_fill = P.Fill() + op_dtype = P.DType() + + param_fp32 = op_cast(param, mstype.float32) + gradient_fp32 = op_cast(gradient, mstype.float32) + + i6_ex = op_cast(global_step + num_one, mstype.float32) + i9 = op_cast(num_one, mstype.float32) - beta1 + x1 = op_cast(num_one, mstype.float32) - beta2 + i6 = op_cast(num_one, mstype.float32) - op_pow(beta1, i6_ex) + i3 = op_cast(num_one, mstype.float32) - op_pow(beta2, i6_ex) + i1 = op_square(gradient_fp32) + add3, update = G.LambNextMV()(i1, v, i3, gradient, m, i6, param, beta1, + i9, beta2, x1, weight_decay_tensor, eps) + + if decay_flag: + update = update + op_mul(weight_decay_tensor, param_fp32) + + w_norm = op_norm(param_fp32) + g_norm = op_norm(gradient_fp32) + g_norm_hat = op_norm(add3) + + zeros = F.zeros_like(w_norm) + ones = op_fill(op_dtype(w_norm), op_shape(w_norm), 1.0) + tens = op_fill(op_dtype(w_norm), op_shape(w_norm), 10.0) + + next_param = G.LambUpdateWithLR()(g_norm, w_norm, g_norm_hat, lr, update, + param, zeros, ones, tens) + next_v = F.control_depend(add3, next_param) + return next_v + + def _check_param_value(decay_steps, warmup_steps, start_learning_rate, end_learning_rate, power, beta1, beta2, eps, weight_decay, prim_name): """Check the type of inputs.""" @@ -124,11 +194,16 @@ def _check_param_value(decay_steps, warmup_steps, start_learning_rate, validator.check_value_type("beta1", beta1, [float], prim_name) validator.check_value_type("beta2", beta2, [float], prim_name) validator.check_value_type("eps", eps, [float], prim_name) - validator.check_value_type("weight_dacay", weight_decay, [float], prim_name) - validator.check_number_range("beta1", beta1, 0.0, 1.0, Rel.INC_NEITHER, prim_name) - validator.check_number_range("beta2", beta2, 0.0, 1.0, Rel.INC_NEITHER, prim_name) - validator.check_number_range("eps", eps, 0.0, float("inf"), Rel.INC_NEITHER, prim_name) - validator.check_number_range("weight_decay", weight_decay, 0.0, float("inf"), Rel.INC_LEFT, prim_name) + validator.check_value_type( + "weight_dacay", weight_decay, [float], prim_name) + validator.check_number_range( + "beta1", beta1, 0.0, 1.0, Rel.INC_NEITHER, prim_name) + validator.check_number_range( + "beta2", beta2, 0.0, 1.0, Rel.INC_NEITHER, prim_name) + validator.check_number_range( + "eps", eps, 0.0, float("inf"), Rel.INC_NEITHER, prim_name) + validator.check_number_range( + "weight_decay", weight_decay, 0.0, float("inf"), Rel.INC_LEFT, prim_name) class Lamb(Optimizer): @@ -186,7 +261,8 @@ class Lamb(Optimizer): decay_filter=lambda x: 'layernorm' not in x.name.lower() and 'bias' not in x.name.lower()): super(Lamb, self).__init__(0.0, params) if self.is_group: - raise RuntimeError(f"The {self.cls_name} optimizer cannot support group setting.") + raise RuntimeError( + f"The {self.cls_name} optimizer cannot support group setting.") _check_param_value(decay_steps, warmup_steps, start_learning_rate, end_learning_rate, power, beta1, beta2, eps, weight_decay, self.cls_name) @@ -198,14 +274,18 @@ class Lamb(Optimizer): if warmup_steps > 0: self.warmup_flag = True self.decay_steps = Tensor(np.array([decay_steps]).astype(np.float32)) - self.start_learning_rate = Tensor(np.array([start_learning_rate]).astype(np.float32)) - self.end_learning_rate = Tensor(np.array([end_learning_rate]).astype(np.float32)) - self.diff_learning_rate = Tensor(np.array([start_learning_rate - end_learning_rate]).astype(np.float32)) + self.start_learning_rate = Tensor( + np.array([start_learning_rate]).astype(np.float32)) + self.end_learning_rate = Tensor( + np.array([end_learning_rate]).astype(np.float32)) + self.diff_learning_rate = Tensor( + np.array([start_learning_rate - end_learning_rate]).astype(np.float32)) self.power = power self.beta1 = Tensor(np.array([beta1]).astype(np.float32)) self.beta2 = Tensor(np.array([beta2]).astype(np.float32)) self.eps = Tensor(np.array([eps]).astype(np.float32)) - self.weight_decay_tensor = Tensor(np.array([weight_decay]).astype(np.float32)) + self.weight_decay_tensor = Tensor( + np.array([weight_decay]).astype(np.float32)) self.params = self.parameters self.moments1 = self.params.clone(prefix="lamb_m", init='zeros') self.moments2 = self.params.clone(prefix="lamb_v", init='zeros') @@ -217,19 +297,29 @@ class Lamb(Optimizer): self.greater = P.Greater() self.one = Tensor(np.array([1.0]).astype(np.float32)) self.cast = P.Cast() + self.enable_graph_kernel = context.get_context("enable_graph_kernel") def construct(self, gradients): step = self.min(self.global_step, self.decay_steps) p = step / self.decay_steps - lr = self.diff_learning_rate * self.pow(self.one - p, self.power) + self.end_learning_rate + lr = self.diff_learning_rate * \ + self.pow(self.one - p, self.power) + self.end_learning_rate if self.warmup_flag: warmup_percent = self.global_step / self.warmup_steps warmup_lr = self.start_learning_rate * warmup_percent - is_warmup = self.cast(self.greater(self.warmup_steps, self.global_step), mstype.float32) + is_warmup = self.cast(self.greater( + self.warmup_steps, self.global_step), mstype.float32) lr = (self.one - is_warmup) * lr + is_warmup * warmup_lr - updated_velocity = self.hyper_map(F.partial(_lamb_opt, self.beta1, self.beta2, self.eps, lr, - self.weight_decay_tensor, self.global_step), - self.params, self.moments1, self.moments2, gradients, self.decay_flag) + if self.enable_graph_kernel: + updated_velocity = self.hyper_map(F.partial(lamb_opt_graph_kernel, + self.beta1, self.beta2, self.eps, lr, + self.weight_decay_tensor, self.global_step), + self.params, self.moments1, self.moments2, gradients, self.decay_flag) + else: + updated_velocity = self.hyper_map(F.partial(_lamb_opt, + self.beta1, self.beta2, self.eps, lr, + self.weight_decay_tensor, self.global_step), + self.params, self.moments1, self.moments2, gradients, self.decay_flag) added_global_step = self.global_step + self.one F.control_depend(lr, added_global_step) diff --git a/mindspore/nn/optim/momentum.py b/mindspore/nn/optim/momentum.py index d93f38510dd..ebdc5d86bff 100755 --- a/mindspore/nn/optim/momentum.py +++ b/mindspore/nn/optim/momentum.py @@ -13,7 +13,8 @@ # limitations under the License. # ============================================================================ """momentum""" -from mindspore.ops import functional as F, composite as C, operations as P +from mindspore.ops import functional as F, composite as C +from mindspore.ops import _selected_ops from mindspore.common.parameter import Parameter from mindspore.common.tensor import Tensor import mindspore.common.dtype as mstype @@ -120,7 +121,7 @@ class Momentum(Optimizer): self.use_nesterov = check_bool(use_nesterov) self.moments = self.params.clone(prefix="moments", init='zeros') self.hyper_map = C.HyperMap() - self.opt = P.ApplyMomentum(use_nesterov=self.use_nesterov) + self.opt = _selected_ops.ApplyMomentum(use_nesterov=self.use_nesterov) def construct(self, gradients): params = self.params diff --git a/mindspore/ops/_grad/grad_math_ops.py b/mindspore/ops/_grad/grad_math_ops.py index fdcda730c34..2a8a4fb03bc 100755 --- a/mindspore/ops/_grad/grad_math_ops.py +++ b/mindspore/ops/_grad/grad_math_ops.py @@ -17,6 +17,7 @@ from functools import reduce import numpy as np +from mindspore.ops import _selected_grad_ops as SG from .. import functional as F from .. import operations as P from ..operations import _grad_ops as G @@ -26,6 +27,7 @@ from .grad_base import bprop_getters from ..primitive import constexpr from ..composite.multitype_ops import _constexpr_utils as const_utils + shape_op = P.Shape() reduce_sum = P.ReduceSum() reshape = P.Reshape() @@ -468,7 +470,7 @@ def get_bprop_expm1(self): @bprop_getters.register(P.Minimum) def get_bprop_minimum(self): """Grad definition for `Minimum` operation.""" - input_grad = G.MinimumGrad() + input_grad = SG.MinimumGrad() def bprop(x, y, out, dout): dx, dy = input_grad(x, y, dout) @@ -480,7 +482,7 @@ def get_bprop_minimum(self): @bprop_getters.register(P.Maximum) def get_bprop_maximum(self): """Grad definition for `Maximum` operation.""" - input_grad = G.MaximumGrad() + input_grad = SG.MaximumGrad() def bprop(x, y, out, dout): dx, dy = input_grad(x, y, dout) @@ -910,7 +912,7 @@ def get_bprop_cosh(self): @bprop_getters.register(P.Abs) def get_bprop_abs(self): """Grad definition for `Abs` operation.""" - abs_grad = G.AbsGrad() + abs_grad = SG.AbsGrad() def bprop(x, out, dout): dx = abs_grad(x, dout) diff --git a/mindspore/ops/_grad/grad_nn_ops.py b/mindspore/ops/_grad/grad_nn_ops.py index 036d7ddec86..13fb89b23fb 100755 --- a/mindspore/ops/_grad/grad_nn_ops.py +++ b/mindspore/ops/_grad/grad_nn_ops.py @@ -14,6 +14,7 @@ # ============================================================================ """Define the grad rules of neural network related operations.""" +from mindspore.ops import _selected_grad_ops as SG from .grad_base import bprop_getters from .. import functional as F from .. import operations as P @@ -23,10 +24,11 @@ from ..operations import _inner_ops as inner from ... import context + @bprop_getters.register(P.BiasAdd) def get_bprop_bias_add(self): """Grad definition for `BiasAdd` operation.""" - bias_grad = G.BiasAddGrad() + bias_grad = SG.BiasAddGrad() def bprop(x, w, out, dout): return dout, bias_grad(dout) @@ -303,7 +305,6 @@ def get_bprop_softmax(self): sub = P.Sub() mul = P.Mul() axis = self.axis - def bprop(x, out, dout): dx = mul(out, sub(dout, sum_func(mul(out, dout), axis))) return (dx,) @@ -338,10 +339,10 @@ def get_bprop_softplus(self): @bprop_getters.register(P.Tanh) def get_bprop_tanh(self): """Grad definition for `Tanh` operation.""" - logsoftmax_grad = G.TanhGrad() + tanh_grad = SG.TanhGrad() def bprop(x, out, dout): - dx = logsoftmax_grad(out, dout) + dx = tanh_grad(out, dout) return (dx,) return bprop @@ -404,7 +405,8 @@ def get_bprop_layer_norm(self): layer_norm_grad = G.LayerNormGrad(self.begin_norm_axis, self.begin_params_axis) def bprop(x, gamma, beta, out, dout): - dx, d_gamma, d_beta = layer_norm_grad(x, dout[0], out[2], out[1], gamma) + dx, d_gamma, d_beta = layer_norm_grad( + x, dout[0], out[2], out[1], gamma) return dx, d_gamma, d_beta return bprop diff --git a/mindspore/ops/_op_impl/akg/__init__.py b/mindspore/ops/_op_impl/akg/__init__.py index e69de29bb2d..f38b99f5e4f 100644 --- a/mindspore/ops/_op_impl/akg/__init__.py +++ b/mindspore/ops/_op_impl/akg/__init__.py @@ -0,0 +1,88 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""autodiff ops""" +from .abs import _abs_akg +from .add_n import _add_n_akg +from .add import _add_akg +from .apply_momentum import _apply_momentum_akg +from .assign import _assign_akg +from .inplace_assign import _inplace_assign_akg +from .assign_add import _assign_add_akg +from .bias_add_grad import _bias_add_grad_akg +from .bias_add import _bias_add_akg +from .cast import _cast_akg +from .clear_zero import _clear_zero_akg +from .conv_bn1 import _conv_bn1_akg +from .conv2d_backprop_filter import _conv2d_backprop_filter_akg +from .conv2d_backprop_input import _conv2d_backprop_input_akg +from .conv2d import _conv2d_akg +from .div import _div_akg +from .equal_count import _equal_count_akg +from .exp import _exp_akg +from .five2four import _five2four_akg +from .four2five import _four2five_akg +from .fused_batch_norm_grad import _fused_batch_norm_grad_akg +from .fused_batch_norm_infer import _fused_batch_norm_infer_akg +from .fused_batch_norm import _fused_batch_norm_akg +from .fused_bn1_grad import _bn1_grad_akg +from .fused_bn1 import _fused_bn1_akg +from .fused_bn2_grad import _bn2_grad_akg +from .fused_bn2 import _fused_bn2_akg +from .fused_bn3_grad import _bn3_grad_akg +from .fused_bn3 import _fused_bn3_akg +from .gather_v2 import _gather_v2_akg +from .less import _less_akg +from .log import _log_akg +from .matmul import _matmul_akg +from .max_pool_grad_with_argmax import _max_pool_grad_with_argmax_akg +from .max_pool_with_argmax import _max_pool_with_argmax_akg +from .max import _max_akg +from .maximum import _maximum_akg +from .mean_grad import _mean_grad_akg +from .mean import _mean_akg +from .minimum import _minimum_akg +from .mul import _mul_akg +from .neg import _neg_akg +from .one_hot import _one_hot_akg +from .pow import _power_akg +from .real_div import _real_div_akg +from .reciprocal import _reciprocal_akg +from .reduce_max import _reduce_max_akg +from .reduce_mean import _reduce_mean_akg +from .reduce_sum import _reduce_sum_akg +from .relu_grad import _relu_grad_akg +from .relu import _relu_akg +from .reshape import _reshape_akg +from .round import _round_akg +from .rsqrt import _rsqrt_akg +from .select import _select_akg +from .softmax import _softmax_akg +from .sparse_softmax_cross_entropy_with_logits import _sparse_softmax_cross_entropy_with_logits_akg +from .sqrt import _sqrt_akg +from .strided_slice import _strided_slice_akg +from .sub import _sub_akg +from .sum import _sum_akg +from .tile import _tile_akg +from .zeros_like import _zeros_like_akg +from .argmax import _argmax_akg +from .floordiv import _floor_div_akg +from .equal import _equal_akg +from .greater_equal import _greater_equal_akg +from .less_equal import _less_equal_akg +from .expand_dims import _expand_dims_akg +from .greater import _greater_akg +from .equiv_format import _equiv_format_akg +from . import gpu diff --git a/mindspore/ops/_op_impl/akg/abs.py b/mindspore/ops/_op_impl/akg/abs.py new file mode 100644 index 00000000000..8c08f405da4 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/abs.py @@ -0,0 +1,58 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Abs op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Abs", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32", + "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", + "FRACTAL_NZ", "FRACTAL_NZ" + ], + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32", + "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", + "FRACTAL_NZ", "FRACTAL_NZ" + ], + "name": "output" + } + ] +}""") +def _abs_akg(): + """Abs AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/add.py b/mindspore/ops/_op_impl/akg/add.py new file mode 100644 index 00000000000..60544ea1c75 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/add.py @@ -0,0 +1,72 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""TensorAdd op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "TensorAdd", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "int32", "float16", "int32", "float32", "float32", + "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0", + "FracZ", "FracZ", "FracZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ" + ], + "param_type": "required", + "name": "x" + }, + { + "index": 1, + "dtype": [ + "float16", "int32", "float16", "int32", "float32", "float32", + "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0", + "FracZ", "FracZ", "FracZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ" + ], + "param_type": "required", + "name": "y" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "int32", "float16", "int32", "float32", "float32", + "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0", + "FracZ", "FracZ", "FracZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ" + ], + "name": "output" + } + ] +}""") +def _add_akg(): + """TensorAdd AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/add_n.py b/mindspore/ops/_op_impl/akg/add_n.py new file mode 100644 index 00000000000..53320f752ee --- /dev/null +++ b/mindspore/ops/_op_impl/akg/add_n.py @@ -0,0 +1,58 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""AddN op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "AddN", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16","float32","float16","float32", "float16", "float32", + "float16","float32" + ], + "format": [ + "DefaultFormat","DefaultFormat","NC1HWC0","NC1HWC0", "FracZ", "FracZ", + "FRACTAL_NZ", "FRACTAL_NZ" + ], + "param_type": "dynamic", + "name": "inputs" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16","float32","float16","float32", "float16", "float32", + "float16","float32" + ], + "format": [ + "DefaultFormat","DefaultFormat","NC1HWC0","NC1HWC0", "FracZ", "FracZ", + "FRACTAL_NZ", "FRACTAL_NZ" + ], + "name": "output" + } + ] +}""") +def _add_n_akg(): + """AddN AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/apply_momentum.py b/mindspore/ops/_op_impl/akg/apply_momentum.py new file mode 100644 index 00000000000..71605718822 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/apply_momentum.py @@ -0,0 +1,103 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""ApplyMomentum op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "ApplyMomentum", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + { + "name": "use_nesterov", + "param_type": "optional", + "type": "bool" + }, + { + "name": "gradient_scale", + "param_type": "optional", + "type": "float" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float32","float32","float32" + ], + "format": [ + "DefaultFormat","NC1HWC0","FracZ" + ], + "name": "variable" + }, + { + "index": 1, + "dtype": [ + "float32","float32","float32" + ], + "format": [ + "DefaultFormat","NC1HWC0","FracZ" + ], + "name": "accumulation" + }, + { + "index": 2, + "dtype": [ + "float32","float32","float32" + ], + "format": [ + "DefaultFormat","DefaultFormat","DefaultFormat" + ], + "name": "learning_rate" + }, + { + "index": 3, + "dtype": [ + "float32","float32","float32" + ], + "format": [ + "DefaultFormat","NC1HWC0","FracZ" + ], + "name": "gradient" + }, + { + "index": 4, + "dtype": [ + "float32","float32","float32" + ], + "format": [ + "DefaultFormat","DefaultFormat","DefaultFormat" + ], + "name": "momentum" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float32","float32","float32" + ], + "format": [ + "DefaultFormat","NC1HWC0","FracZ" + ], + "name": "output" + } + ] +}""") +def _apply_momentum_akg(): + """ApplyMomentum AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/argmax.py b/mindspore/ops/_op_impl/akg/argmax.py new file mode 100644 index 00000000000..b04862cbeb1 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/argmax.py @@ -0,0 +1,58 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Argmax op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Argmax", + "imply_type": "AutoDiff", + "fusion_type": "OPAQUE", + "attr": [ + { + "name": "axis", + "param_type": "optional", + "type": "int" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0" + ], + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "int32", "int32", "int32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _argmax_akg(): + """Argmax AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/assign.py b/mindspore/ops/_op_impl/akg/assign.py new file mode 100644 index 00000000000..e7c5a082bdf --- /dev/null +++ b/mindspore/ops/_op_impl/akg/assign.py @@ -0,0 +1,63 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Assign op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Assign", + "imply_type": "AutoDiff", + "fusion_type": "OPAQUE", + "attr": [ + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ" + ], + "name": "ref" + }, + { + "index": 1, + "dtype": [ + "int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ" + ], + "name": "value" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ" + ], + "name": "output" + } + ] +}""") +def _assign_akg(): + """Assign AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/assign_add.py b/mindspore/ops/_op_impl/akg/assign_add.py new file mode 100644 index 00000000000..7d0d345764f --- /dev/null +++ b/mindspore/ops/_op_impl/akg/assign_add.py @@ -0,0 +1,64 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""AssignAdd op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "AssignAdd", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "ref" + }, + { + "index": 1, + "dtype": [ + "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "value" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _assign_add_akg(): + """AssignAdd AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/bias_add.py b/mindspore/ops/_op_impl/akg/bias_add.py new file mode 100644 index 00000000000..74f2bf7bcf5 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/bias_add.py @@ -0,0 +1,68 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""BiasAdd op""" + +from mindspore.ops.op_info_register import op_info_register + +@op_info_register("""{ + "op_name": "BiasAdd", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + { + "name": "data_format", + "param_type": "optional", + "type": "listStr" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16","float32","float16","float32","float16","float32" + ], + "format": [ + "NHWC","NHWC","NC1HWC0","NC1HWC0","DefaultFormat","DefaultFormat" + ], + "name": "x" + }, + { + "index": 1, + "dtype": [ + "float16","float32","float16","float32","float16","float32" + ], + "format": [ + "NHWC","NHWC","NC1HWC0","NC1HWC0","DefaultFormat","DefaultFormat" + ], + "name": "b" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16","float32","float16","float32","float16","float32" + ], + "format": [ + "DefaultFormat","DefaultFormat","NC1HWC0","NC1HWC0","DefaultFormat","DefaultFormat" + ], + "name": "output" + } + ] +}""") +def _bias_add_akg(): + """BiasAddGrad AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/bias_add_grad.py b/mindspore/ops/_op_impl/akg/bias_add_grad.py new file mode 100644 index 00000000000..7726af6692c --- /dev/null +++ b/mindspore/ops/_op_impl/akg/bias_add_grad.py @@ -0,0 +1,58 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""BiasAddGrad op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "BiasAddGrad", + "imply_type": "AutoDiff", + "fusion_type": "COMMREDUCE", + "attr": [ + { + "name": "data_format", + "param_type": "optional", + "type": "listStr" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16","float32","float16","float32","float16","float32" + ], + "format": [ + "NHWC","NHWC","NC1HWC0","NC1HWC0","DefaultFormat","DefaultFormat" + ], + "name": "dout" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16","float32","float16","float32","float16","float32" + ], + "format": [ + "DefaultFormat","DefaultFormat","NC1HWC0","NC1HWC0","DefaultFormat","DefaultFormat" + ], + "name": "output" + } + ] +}""") +def _bias_add_grad_akg(): + """BiasAddGrad AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/cast.py b/mindspore/ops/_op_impl/akg/cast.py new file mode 100644 index 00000000000..a78d4d87e4a --- /dev/null +++ b/mindspore/ops/_op_impl/akg/cast.py @@ -0,0 +1,74 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Cast op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Cast", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + { + "name": "dst_type", + "param_type": "required", + "type": "str" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "bool", "bool", + "float16", "float32", "int32", "int32", + "bool", + "float16", "float32", "bool", "bool", + "float16", "float32", "bool", "bool" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "DefaultFormat", + "DefaultFormat", "DefaultFormat", "DefaultFormat", "DefaultFormat", + "DefaultFormat", + "NC1HWC0", "NC1HWC0", "NC1HWC0", "NC1HWC0", + "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ" + ], + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float32", "float16", "int32", "float16", + "int32", "int32", "float16", "float32", + "float32", + "float32", "float16", "int32", "float32", + "float32", "float16", "int32", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "DefaultFormat", + "DefaultFormat", "DefaultFormat", "DefaultFormat", "DefaultFormat", + "DefaultFormat", + "NC1HWC0", "NC1HWC0", "NC1HWC0", "NC1HWC0", + "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ" + ], + "name": "output" + } + ] +}""") +def _cast_akg(): + """Cast AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/clear_zero.py b/mindspore/ops/_op_impl/akg/clear_zero.py new file mode 100644 index 00000000000..38bf35044f8 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/clear_zero.py @@ -0,0 +1,64 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""ClearZero op""" + +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "ClearZero", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + { + "name": "pad_mod", + "param_type": "optional", + "type": "string" + }, + { + "name": "window", + "param_type": "optional", + "type": "int" + }, + { + "name": "pad", + "param_type": "optional", + "type": "int" + }, + { + "name": "stride", + "param_type": "optional", + "type": "int" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "x" + } + ], + "outputs": [ + ] +}""") +def _clear_zero_akg(): + """MaxPoolGradWithArgmax AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/conv2d.py b/mindspore/ops/_op_impl/akg/conv2d.py new file mode 100644 index 00000000000..709aca70012 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/conv2d.py @@ -0,0 +1,88 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Conv2D op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Conv2D", + "imply_type": "AutoDiff", + "fusion_type": "CONVLUTION", + "attr": [ + { + "name": "x_shape", + "param_type": "required", + "type": "listInt" + }, + { + "name": "w_shape", + "param_type": "required", + "type": "listInt" + }, + { + "name": "pad_list", + "param_type": "required", + "type": "listInt" + }, + { + "name": "stride", + "param_type": "optional", + "type": "int" + }, + { + "name": "dilation", + "param_type": "optional", + "type": "int" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16" + ], + "format": [ + "NC1HWC0" + ], + "name": "x" + }, + { + "index": 1, + "dtype": [ + "float16" + ], + "format": [ + "FracZ" + ], + "name": "w" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16" + ], + "format": [ + "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _conv2d_akg(): + """Conv2D AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/conv2d_backprop_filter.py b/mindspore/ops/_op_impl/akg/conv2d_backprop_filter.py new file mode 100644 index 00000000000..1e4e4f1a1ef --- /dev/null +++ b/mindspore/ops/_op_impl/akg/conv2d_backprop_filter.py @@ -0,0 +1,88 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Conv2DBackpropFilter op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Conv2DBackpropFilter", + "imply_type": "AutoDiff", + "fusion_type": "CONVLUTION", + "attr": [ + { + "name": "input_shape", + "param_type": "required", + "type": "listInt" + }, + { + "name": "filter_sizes", + "param_type": "required", + "type": "listInt" + }, + { + "name": "stride", + "param_type": "optional", + "type": "int" + }, + { + "name": "pad_list", + "param_type": "required", + "type": "listInt" + }, + { + "name": "dilation", + "param_type": "optional", + "type": "int" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16" + ], + "format": [ + "NC1HWC0" + ], + "name": "out_backprop" + }, + { + "index": 1, + "dtype": [ + "float16" + ], + "format": [ + "NC1HWC0" + ], + "name": "input" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float32" + ], + "format": [ + "FracZ" + ], + "name": "output" + } + ] +}""") +def _conv2d_backprop_filter_akg(): + """Conv2DBackpropFilter AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/conv2d_backprop_input.py b/mindspore/ops/_op_impl/akg/conv2d_backprop_input.py new file mode 100644 index 00000000000..52c7f2e7b39 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/conv2d_backprop_input.py @@ -0,0 +1,88 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Conv2DBackpropInput op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Conv2DBackpropInput", + "imply_type": "AutoDiff", + "fusion_type": "CONVLUTION", + "attr": [ + { + "name": "input_sizes", + "param_type": "required", + "type": "listInt" + }, + { + "name": "filter_shape", + "param_type": "required", + "type": "listInt" + }, + { + "name": "stride", + "param_type": "optional", + "type": "int" + }, + { + "name": "pad_list", + "param_type": "required", + "type": "listInt" + }, + { + "name": "dilation", + "param_type": "optional", + "type": "int" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16" + ], + "format": [ + "NC1HWC0" + ], + "name": "out_backprop" + }, + { + "index": 1, + "dtype": [ + "float16" + ], + "format": [ + "FracZ" + ], + "name": "filter" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16" + ], + "format": [ + "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _conv2d_backprop_input_akg(): + """Conv2DBackpropInput AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/conv_bn1.py b/mindspore/ops/_op_impl/akg/conv_bn1.py new file mode 100644 index 00000000000..118c94e6fcf --- /dev/null +++ b/mindspore/ops/_op_impl/akg/conv_bn1.py @@ -0,0 +1,108 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""ConvBN1 op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "ConvBN1", + "imply_type": "AutoDiff", + "fusion_type": "CONVLUTION", + "attr": [ + { + "name": "x_shape", + "param_type": "required", + "type": "listInt" + }, + { + "name": "w_shape", + "param_type": "required", + "type": "listInt" + }, + { + "name": "pad_list", + "param_type": "required", + "type": "listInt" + }, + { + "name": "stride", + "param_type": "optional", + "type": "int" + }, + { + "name": "dilation", + "param_type": "optional", + "type": "int" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16" + ], + "format": [ + "NC1HWC0" + ], + "name": "x" + }, + { + "index": 1, + "dtype": [ + "float16" + ], + "format": [ + "FracZ" + ], + "name": "w" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16" + ], + "format": [ + "NC1HWC0" + ], + "name": "conv_res_16" + }, + { + "index": 1, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "var_part" + }, + { + "index": 2, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "mean" + } + ] +}""") +def _conv_bn1_akg(): + """ConvBN1 AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/div.py b/mindspore/ops/_op_impl/akg/div.py new file mode 100644 index 00000000000..56cdcca8684 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/div.py @@ -0,0 +1,64 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Div op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Div", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "int32", "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "x" + }, + { + "index": 1, + "dtype": [ + "float16", "float32", "int32", "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "y" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "int32", "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _div_akg(): + """Div AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/equal.py b/mindspore/ops/_op_impl/akg/equal.py new file mode 100644 index 00000000000..35874c62bb2 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/equal.py @@ -0,0 +1,64 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Equal op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Equal", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "x" + }, + { + "index": 1, + "dtype": [ + "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "y" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "bool", "bool", "bool", "bool", "bool", "bool" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _equal_akg(): + """Equal AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/equal_count.py b/mindspore/ops/_op_impl/akg/equal_count.py new file mode 100644 index 00000000000..9c575db7b31 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/equal_count.py @@ -0,0 +1,64 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""EqualCount op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "EqualCount", + "imply_type": "AutoDiff", + "fusion_type": "OPAQUE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "int32" + ], + "format": [ + "DefaultFormat" + ], + "name": "x" + }, + { + "index": 1, + "dtype": [ + "int32" + ], + "format": [ + "DefaultFormat" + ], + "name": "y" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "int32" + ], + "format": [ + "DefaultFormat" + ], + "name": "output" + } + ] +}""") +def _equal_count_akg(): + """EqualCount AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/equiv_format.py b/mindspore/ops/_op_impl/akg/equiv_format.py new file mode 100644 index 00000000000..111451b15c5 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/equiv_format.py @@ -0,0 +1,54 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""EquivFormat op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "EquivFormat", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "FRACTAL_NZ", "FRACTAL_NZ" + ], + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32" + ], + "format": [ + "FRACTAL_NZ", "FRACTAL_NZ", "DefaultFormat", "DefaultFormat" + ], + "name": "output" + } + ] +}""") +def _equiv_format_akg(): + """EquivFormat AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/exp.py b/mindspore/ops/_op_impl/akg/exp.py new file mode 100644 index 00000000000..273b3348a45 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/exp.py @@ -0,0 +1,59 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Exp op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Exp", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32", + "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", + "FRACTAL_NZ", "FRACTAL_NZ" + ], + "param_type": "required", + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32", + "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", + "FRACTAL_NZ", "FRACTAL_NZ" + ], + "name": "output" + } + ] +}""") +def _exp_akg(): + """Exp AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/expand_dims.py b/mindspore/ops/_op_impl/akg/expand_dims.py new file mode 100644 index 00000000000..9e1b18153a7 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/expand_dims.py @@ -0,0 +1,58 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""ExpandDims op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "ExpandDims", + "imply_type": "AutoDiff", + "fusion_type": "OPAQUE", + "attr": [ + { + "name": "axis", + "param_type": "required", + "type": "int" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat" + ], + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat" + ], + "name": "y" + } + ] +}""") +def _expand_dims_akg(): + """ExpandDims AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/five2four.py b/mindspore/ops/_op_impl/akg/five2four.py new file mode 100644 index 00000000000..1dac2c3628a --- /dev/null +++ b/mindspore/ops/_op_impl/akg/five2four.py @@ -0,0 +1,68 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Five2Four op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Five2Four", + "imply_type": "AutoDiff", + "fusion_type": "OPAQUE", + "attr": [ + { + "name": "shape4d", + "param_type": "required", + "type": "listInt" + }, + { + "name": "dstType", + "param_type": "required", + "type": "str" + }, + { + "name": "output_format", + "param_type": "required", + "type": "str" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16","float16","float16","float32","float16","float32" + ], + "format": [ + "NC1HWC0","NC1HWC0","NC1HWC0","NC1HWC0","NC1HWC0","NC1HWC0" + ], + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16","float16","float32","float32","float32","float32" + ], + "format": [ + "DefaultFormat","NHWC","DefaultFormat","DefaultFormat","NHWC","NHWC" + ], + "name": "output" + } + ] +}""") +def _five2four_akg(): + """Five2Four AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/floordiv.py b/mindspore/ops/_op_impl/akg/floordiv.py new file mode 100644 index 00000000000..99e577b4be1 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/floordiv.py @@ -0,0 +1,64 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""FloorDiv op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "FloorDiv", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0" + ], + "name": "x" + }, + { + "index": 1, + "dtype": [ + "float16", "float32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0" + ], + "name": "y" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "int32", "int32", "int32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _floor_div_akg(): + """FloorDiv AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/four2five.py b/mindspore/ops/_op_impl/akg/four2five.py new file mode 100644 index 00000000000..01b6f857151 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/four2five.py @@ -0,0 +1,63 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Four2Five op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Four2Five", + "imply_type": "AutoDiff", + "fusion_type": "OPAQUE", + "attr": [ + { + "name": "data_format", + "param_type": "optional", + "type": "listStr" + }, + { + "name": "dst_type", + "param_type": "required", + "type": "str" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float32", "float16","float32", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NHWC", "NHWC", "NHWC" + ], + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float16", "float32", "float16", "float16", "float32" + ], + "format": [ + "NC1HWC0", "NC1HWC0", "NC1HWC0", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _four2five_akg(): + """Four2Five AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/fused_batch_norm.py b/mindspore/ops/_op_impl/akg/fused_batch_norm.py new file mode 100644 index 00000000000..5ce9839328e --- /dev/null +++ b/mindspore/ops/_op_impl/akg/fused_batch_norm.py @@ -0,0 +1,149 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""FusedBatchNorm op""" + +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "FusedBatchNorm", + "imply_type": "AutoDiff", + "fusion_type": "OPAQUE", + "attr": [ + { + "name": "momentum", + "param_type": "optional", + "type": "float" + }, + { + "name": "epsilon", + "param_type": "optional", + "type": "float" + }, + { + "name": "data_format", + "param_type": "optional", + "type": "listStr" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "x" + }, + { + "index": 1, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "scale" + }, + { + "index": 2, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "b" + }, + { + "index": 3, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "mean" + }, + { + "index": 4, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "variance" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "y" + }, + { + "index": 1, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "running_mean" + }, + { + "index": 2, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "running_variance" + }, + { + "index": 3, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "save_mean" + }, + { + "index": 4, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "save_inv_variance" + } + ] +}""") +def _fused_batch_norm_akg(): + """FusedBatchNorm AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/fused_batch_norm_grad.py b/mindspore/ops/_op_impl/akg/fused_batch_norm_grad.py new file mode 100644 index 00000000000..9191548f731 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/fused_batch_norm_grad.py @@ -0,0 +1,119 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""FusedBatchNormGrad op""" + +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "FusedBatchNormGrad", + "imply_type": "AutoDiff", + "fusion_type": "OPAQUE", + "attr": [ + { + "name": "data_format", + "param_type": "optional", + "type": "listStr" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "dy" + }, + { + "index": 1, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "x" + }, + { + "index": 2, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "scale" + }, + { + "index": 3, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "save_mean" + }, + { + "index": 4, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "save_inv_variance" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "dx" + }, + { + "index": 1, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "bn_scale" + }, + { + "index": 2, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "bn_bias" + } + ] +}""") +def _fused_batch_norm_grad_akg(): + """BiasAddGrad AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/fused_batch_norm_infer.py b/mindspore/ops/_op_impl/akg/fused_batch_norm_infer.py new file mode 100644 index 00000000000..1e7743fa8f5 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/fused_batch_norm_infer.py @@ -0,0 +1,109 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""FusedBatchNormInfer op""" + +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "FusedBatchNormInfer", + "imply_type": "AutoDiff", + "fusion_type": "OPAQUE", + "attr": [ + { + "name": "momentum", + "param_type": "optional", + "type": "float" + }, + { + "name": "epsilon", + "param_type": "optional", + "type": "float" + }, + { + "name": "data_format", + "param_type": "optional", + "type": "listStr" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "x" + }, + { + "index": 1, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "scale" + }, + { + "index": 2, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "b" + }, + { + "index": 3, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "mean" + }, + { + "index": 4, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "variance" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "y" + } + ] +}""") +def _fused_batch_norm_infer_akg(): + """FusedBatchNormInfer AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/fused_bn1.py b/mindspore/ops/_op_impl/akg/fused_bn1.py new file mode 100644 index 00000000000..fdaa673f257 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/fused_bn1.py @@ -0,0 +1,64 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""FusedBN1 op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "FusedBN1", + "imply_type": "AutoDiff", + "fusion_type": "COMMREDUCE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32" + ], + "format": [ + "NC1HWC0", "NC1HWC0" + ], + "name": "data" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float32", "float32" + ], + "format": [ + "NC1HWC0", "NC1HWC0" + ], + "name": "output" + }, + { + "index": 1, + "dtype": [ + "float32", "float32" + ], + "format": [ + "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _fused_bn1_akg(): + """FusedBN1 AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/fused_bn1_grad.py b/mindspore/ops/_op_impl/akg/fused_bn1_grad.py new file mode 100644 index 00000000000..8de6796d6f2 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/fused_bn1_grad.py @@ -0,0 +1,93 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""BNGrad1 op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "BNGrad1", + "imply_type": "AutoDiff", + "fusion_type": "COMMREDUCE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32" + ], + "format": [ + "NC1HWC0", "NC1HWC0" + ], + "name": "dy" + }, + { + "index": 1, + "dtype": [ + "float16", "float32" + ], + "format": [ + "NC1HWC0", "NC1HWC0" + ], + "name": "data" + },{ + "index": 2, + "dtype": [ + "float32", "float32" + ], + "format": [ + "NC1HWC0", "NC1HWC0" + ], + "name": "mean" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float32", "float32" + ], + "format": [ + "NC1HWC0", "NC1HWC0" + ], + "name": "output" + }, + { + "index": 1, + "dtype": [ + "float32", "float32" + ], + "format": [ + "NC1HWC0", "NC1HWC0" + ], + "name": "output" + }, + { + "index": 2, + "dtype": [ + "float32", "float32" + ], + "format": [ + "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _bn1_grad_akg(): + """BNGrad1 AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/fused_bn2.py b/mindspore/ops/_op_impl/akg/fused_bn2.py new file mode 100644 index 00000000000..e26a5ad8a06 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/fused_bn2.py @@ -0,0 +1,108 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""FusedBN2 op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "FusedBN2", + "imply_type": "AutoDiff", + "fusion_type": "COMMREDUCE", + "attr": [ + { + "name": "momentum", + "param_type": "optional", + "type": "float" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "mean" + }, + { + "index": 1, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "var_part" + }, + { + "index": 2, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "running_mean" + }, + { + "index": 3, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "running_var" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "output" + }, + { + "index": 1, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "output" + }, + { + "index": 2, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _fused_bn2_akg(): + """FusedBN2 AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/fused_bn2_grad.py b/mindspore/ops/_op_impl/akg/fused_bn2_grad.py new file mode 100644 index 00000000000..e29a9177b61 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/fused_bn2_grad.py @@ -0,0 +1,132 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""BNGrad1 op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "BNGrad2", + "imply_type": "AutoDiff", + "fusion_type": "COMMREDUCE", + "attr": [ + { + "name": "eps", + "param_type": "optional", + "type": "float" + }, + { + "name": "data_shape", + "param_type": "optional", + "type": "listInt" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "dgamma_red_hw" + }, + { + "index": 1, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "dbeta_red_hw" + },{ + "index": 2, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "variance" + }, + { + "index": 3, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "gamma" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "output" + }, + { + "index": 1, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "output" + }, + { + "index": 2, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "output" + }, + { + "index": 3, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "output" + }, + { + "index": 4, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _bn2_grad_akg(): + """BNGrad2 AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/fused_bn3.py b/mindspore/ops/_op_impl/akg/fused_bn3.py new file mode 100644 index 00000000000..74f3f652f36 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/fused_bn3.py @@ -0,0 +1,95 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""FusedBN3 op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "FusedBN3", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + { + "name": "eps", + "param_type": "optional", + "type": "float" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16" + ], + "format": [ + "NC1HWC0" + ], + "name": "data" + }, + { + "index": 1, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "mean" + },{ + "index": 2, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "variance" + },{ + "index": 3, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "gamma" + },{ + "index": 4, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "beta" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16" + ], + "format": [ + "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _fused_bn3_akg(): + """FusedBN3 AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/fused_bn3_grad.py b/mindspore/ops/_op_impl/akg/fused_bn3_grad.py new file mode 100644 index 00000000000..5ffc57a68e2 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/fused_bn3_grad.py @@ -0,0 +1,93 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""BNGrad3 op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "BNGrad3", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32" + ], + "format": [ + "NC1HWC0", "NC1HWC0" + ], + "name": "dy" + }, + { + "index": 1, + "dtype": [ + "float32", "float32" + ], + "format": [ + "NC1HWC0", "NC1HWC0" + ], + "name": "rs" + },{ + "index": 2, + "dtype": [ + "float32", "float32" + ], + "format": [ + "NC1HWC0", "NC1HWC0" + ], + "name": "dgamma_dx" + }, + { + "index": 3, + "dtype": [ + "float32", "float32" + ], + "format": [ + "NC1HWC0", "NC1HWC0" + ], + "name": "dbeta_dx" + }, + { + "index": 4, + "dtype": [ + "float32", "float32" + ], + "format": [ + "NC1HWC0", "NC1HWC0" + ], + "name": "data_minus_mean" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32" + ], + "format": [ + "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _bn3_grad_akg(): + """BNGrad3 AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/gather_v2.py b/mindspore/ops/_op_impl/akg/gather_v2.py new file mode 100644 index 00000000000..84ab7eb6696 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/gather_v2.py @@ -0,0 +1,68 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""GatherV2 op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "GatherV2", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + { + "name": "axis", + "param_type": "optional", + "type": "int" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat" + ], + "name": "params" + }, + { + "index": 1, + "dtype": [ + "int32", "int32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat" + ], + "name": "indices" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat" + ], + "name": "output" + } + ] +}""") +def _gather_v2_akg(): + """GatherV2 AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/greater.py b/mindspore/ops/_op_impl/akg/greater.py new file mode 100644 index 00000000000..941946163a0 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/greater.py @@ -0,0 +1,64 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Greater op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Greater", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float16", "float32", "float32" + ], + "format": [ + "DefaultFormat", "NC1HWC0", "DefaultFormat", "NC1HWC0" + ], + "name": "x" + }, + { + "index": 1, + "dtype": [ + "float16", "float16", "float32", "float32" + ], + "format": [ + "DefaultFormat", "NC1HWC0", "DefaultFormat", "NC1HWC0" + ], + "name": "y" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "bool", "bool", "bool", "bool" + ], + "format": [ + "DefaultFormat", "NC1HWC0", "DefaultFormat", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _greater_akg(): + """Greater AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/greater_equal.py b/mindspore/ops/_op_impl/akg/greater_equal.py new file mode 100644 index 00000000000..11642baa864 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/greater_equal.py @@ -0,0 +1,64 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""GreaterEqual op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "GreaterEqual", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "x" + }, + { + "index": 1, + "dtype": [ + "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "y" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "bool", "bool", "bool", "bool", "bool", "bool" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _greater_equal_akg(): + """Equal AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/inplace_assign.py b/mindspore/ops/_op_impl/akg/inplace_assign.py new file mode 100644 index 00000000000..1cc40abe9b5 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/inplace_assign.py @@ -0,0 +1,78 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""InplaceAssign op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "InplaceAssign", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + { + "name": "fake_output", + "param_type": "optional", + "type": "bool" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ" + ], + "name": "x" + }, + { + "index": 1, + "dtype": [ + "int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ" + ], + "name": "y" + }, + { + "index": 2, + "dtype": [ + "int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ" + ], + "name": "z" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ" + ], + "name": "output" + } + ] +}""") +def _inplace_assign_akg(): + """InplaceAssign AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/less.py b/mindspore/ops/_op_impl/akg/less.py new file mode 100644 index 00000000000..499ed2e8fc0 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/less.py @@ -0,0 +1,64 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Less op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Less", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float16" + ], + "format": [ + "DefaultFormat", "NC1HWC0" + ], + "name": "x" + }, + { + "index": 1, + "dtype": [ + "float16", "float16" + ], + "format": [ + "DefaultFormat", "NC1HWC0" + ], + "name": "y" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "bool", "bool" + ], + "format": [ + "DefaultFormat", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _less_akg(): + """Less AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/less_equal.py b/mindspore/ops/_op_impl/akg/less_equal.py new file mode 100644 index 00000000000..97fbdec0906 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/less_equal.py @@ -0,0 +1,64 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""LessEqual op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "LessEqual", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "x" + }, + { + "index": 1, + "dtype": [ + "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "y" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "bool", "bool", "bool", "bool", "bool", "bool" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _less_equal_akg(): + """Equal AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/log.py b/mindspore/ops/_op_impl/akg/log.py new file mode 100644 index 00000000000..526538d17d1 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/log.py @@ -0,0 +1,55 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Log op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Log", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ" + ], + "param_type": "required", + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ" + ], + "name": "output" + } + ] +}""") +def _log_akg(): + """Log AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/matmul.py b/mindspore/ops/_op_impl/akg/matmul.py new file mode 100644 index 00000000000..084ba754fa9 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/matmul.py @@ -0,0 +1,73 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""MatMul op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "MatMul", + "imply_type": "AutoDiff", + "fusion_type": "OPAQUE", + "attr": [ + { + "name": "transpose_a", + "param_type": "optional", + "type": "bool" + }, + { + "name": "transpose_b", + "param_type": "optional", + "type": "bool" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat" + ], + "name": "x1" + }, + { + "index": 1, + "dtype": [ + "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat" + ], + "name": "x2" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat" + ], + "name": "output" + } + ] +}""") +def _matmul_akg(): + """MatMul AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/max.py b/mindspore/ops/_op_impl/akg/max.py new file mode 100644 index 00000000000..21fd4ef9c46 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/max.py @@ -0,0 +1,63 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Max op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Max", + "imply_type": "AutoDiff", + "fusion_type": "COMMREDUCE", + "attr": [ + { + "name": "axis", + "param_type": "required", + "type": "listInt" + }, + { + "name": "keep_dims", + "param_type": "required", + "type": "bool" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "int32", "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "int32", "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _max_akg(): + """Max AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/max_pool_grad_with_argmax.py b/mindspore/ops/_op_impl/akg/max_pool_grad_with_argmax.py new file mode 100644 index 00000000000..4adad3eb883 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/max_pool_grad_with_argmax.py @@ -0,0 +1,93 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""MaxPoolGradWithArgmax op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "MaxPoolGradWithArgmax", + "imply_type": "AutoDiff", + "fusion_type": "CONVLUTION", + "attr": [ + { + "name": "pad_mode", + "param_type": "optional", + "type": "str" + }, + { + "name": "window", + "param_type": "optional", + "type": "int" + }, + { + "name": "pad", + "param_type": "optional", + "type": "int" + }, + { + "name": "stride", + "param_type": "optional", + "type": "int" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float16" + ], + "format": [ + "NC1HWC0", "NC1HWC0" + ], + "name": "x" + }, + { + "index": 1, + "dtype": [ + "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat" + ], + "name": "argmax" + }, + { + "index": 2, + "dtype": [ + "float16", "float32" + ], + "format": [ + "NC1HWC0", "NC1HWC0" + ], + "name": "grad" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32" + ], + "format": [ + "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _max_pool_grad_with_argmax_akg(): + """MaxPoolGradWithArgmax AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/max_pool_with_argmax.py b/mindspore/ops/_op_impl/akg/max_pool_with_argmax.py new file mode 100644 index 00000000000..3ae36d47932 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/max_pool_with_argmax.py @@ -0,0 +1,83 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""MaxPoolWithArgmax op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "MaxPoolWithArgmax", + "imply_type": "AutoDiff", + "fusion_type": "CONVLUTION", + "attr": [ + { + "name": "pad_mode", + "param_type": "optional", + "type": "str" + }, + { + "name": "window", + "param_type": "optional", + "type": "int" + }, + { + "name": "pad", + "param_type": "optional", + "type": "int" + }, + { + "name": "stride", + "param_type": "optional", + "type": "int" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16" + ], + "format": [ + "NC1HWC0" + ], + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16" + ], + "format": [ + "NC1HWC0" + ], + "name": "output" + }, + { + "index": 1, + "dtype": [ + "float16" + ], + "format": [ + "DefaultFormat" + ], + "name": "argmax" + } + ] +}""") +def _max_pool_with_argmax_akg(): + """MaxPoolWithArgmax AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/maximum.py b/mindspore/ops/_op_impl/akg/maximum.py new file mode 100644 index 00000000000..8d8de5270ac --- /dev/null +++ b/mindspore/ops/_op_impl/akg/maximum.py @@ -0,0 +1,64 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Maximum op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Maximum", + "imply_type": "AutoDiff", + "fusion_type": "COMMREDUCE", + "attr": [], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "int32", "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "param_type": "required", + "name": "x" + }, + { + "index": 1, + "dtype": [ + "float16", "float32", "int32", "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "param_type": "required", + "name": "y" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "int32", "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _maximum_akg(): + """Maximum AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/mean.py b/mindspore/ops/_op_impl/akg/mean.py new file mode 100644 index 00000000000..0b49e768653 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/mean.py @@ -0,0 +1,54 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""SimpleMean op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "SimpleMean", + "imply_type": "AutoDiff", + "fusion_type": "COMMREDUCE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0" + ], + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _mean_akg(): + """SimpleMean AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/mean_grad.py b/mindspore/ops/_op_impl/akg/mean_grad.py new file mode 100644 index 00000000000..3b8379d1f0e --- /dev/null +++ b/mindspore/ops/_op_impl/akg/mean_grad.py @@ -0,0 +1,58 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""SimpleMeanGrad op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "SimpleMeanGrad", + "imply_type": "AutoDiff", + "fusion_type": "COMMREDUCE", + "attr": [ + { + "name": "input_shape", + "param_type": "required", + "type": "listInt" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0" + ], + "name": "HEAD" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _mean_grad_akg(): + """SimpleMeanGrad AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/minimum.py b/mindspore/ops/_op_impl/akg/minimum.py new file mode 100644 index 00000000000..759df2085fc --- /dev/null +++ b/mindspore/ops/_op_impl/akg/minimum.py @@ -0,0 +1,70 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Minimum op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Minimum", + "imply_type": "AutoDiff", + "fusion_type": "COMMREDUCE", + "attr": [], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "int32", "float16", "float32", "int32", + "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", + "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ" + ], + "param_type": "required", + "name": "x" + }, + { + "index": 1, + "dtype": [ + "float16", "float32", "int32", "float16", "float32", "int32", + "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", + "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ" + ], + "param_type": "required", + "name": "y" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "int32", "float16", "float32", "int32", + "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", + "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ" + ], + "name": "output" + } + ] +}""") +def _minimum_akg(): + """Minimum AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/mul.py b/mindspore/ops/_op_impl/akg/mul.py new file mode 100644 index 00000000000..ab02c2d89e9 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/mul.py @@ -0,0 +1,86 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Mul op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Mul", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + { + "name": "x_shape", + "param_type": "required", + "type": "listInt" + }, + { + "name": "y_shape", + "param_type": "required", + "type": "listInt" + }, + { + "name": "data_format", + "param_type": "required", + "type": "listStr" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32", "float16", "float32", + "float16", "float32" + ], + "format": [ + "FracZ", "FracZ", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", + "FRACTAL_NZ", "FRACTAL_NZ" + ], + "param_type": "required", + "name": "x" + }, + { + "index": 1, + "dtype": [ + "float16", "float32", "float16", "float32", "float16", "float32", + "float16", "float32" + ], + "format": [ + "FracZ", "FracZ", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", + "FRACTAL_NZ", "FRACTAL_NZ" + ], + "param_type": "required", + "name": "y" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32", "float16", "float32", + "float16", "float32" + ], + "format": [ + "FracZ", "FracZ", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", + "FRACTAL_NZ", "FRACTAL_NZ" + ], + "name": "output" + } + ] +}""") +def _mul_akg(): + """Mul AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/neg.py b/mindspore/ops/_op_impl/akg/neg.py new file mode 100644 index 00000000000..bc00d60271e --- /dev/null +++ b/mindspore/ops/_op_impl/akg/neg.py @@ -0,0 +1,59 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Neg op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Neg", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "int32", "float16", "float32", "int32", + "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", + "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ" + ], + "param_type": "required", + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "int32", "float16", "float32", "int32", + "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", + "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ" + ], + "name": "output" + } + ] +}""") +def _neg_akg(): + """Neg AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/one_hot.py b/mindspore/ops/_op_impl/akg/one_hot.py new file mode 100644 index 00000000000..c5034dbbd4b --- /dev/null +++ b/mindspore/ops/_op_impl/akg/one_hot.py @@ -0,0 +1,83 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""OneHot op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "OneHot", + "imply_type": "AutoDiff", + "fusion_type": "OPAQUE", + "attr": [ + { + "name": "depth", + "param_type": "required", + "type": "int" + }, + { + "name": "axis", + "param_type": "required", + "type": "int" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "int32", "int32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat" + ], + "name": "indices" + }, + { + "index": 1, + "dtype": [ + "int32", "float32", "float16" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat" + ], + "name": "on_value" + }, + { + "index": 2, + "dtype": [ + "int32", "float32", "float16" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat" + ], + "name": "off_value" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "int32", "float32", "float16" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat" + ], + "name": "output" + } + ] +}""") +def _one_hot_akg(): + """OneHot AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/pow.py b/mindspore/ops/_op_impl/akg/pow.py new file mode 100644 index 00000000000..d782968c052 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/pow.py @@ -0,0 +1,65 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Pow op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Pow", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "int32", "float16", "int32", "float32", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0" + ], + "param_type": "required", + "name": "x" + }, + { + "index": 1, + "dtype": [ + "float16", "int32", "float16", "int32", "float32", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0" + ], + "param_type": "required", + "name": "power" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "int32", "float16", "int32", "float32", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _power_akg(): + """Pow AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/real_div.py b/mindspore/ops/_op_impl/akg/real_div.py new file mode 100644 index 00000000000..9fa37a24e33 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/real_div.py @@ -0,0 +1,72 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""RealDiv op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "RealDiv", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32", + "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", + "FRACTAL_NZ", "FRACTAL_NZ" + ], + "param_type": "required", + "name": "x" + }, + { + "index": 1, + "dtype": [ + "float16", "float32", "float16", "float32", + "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", + "FRACTAL_NZ", "FRACTAL_NZ" + ], + "param_type": "required", + "name": "y" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32", + "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", + "FRACTAL_NZ", "FRACTAL_NZ" + ], + "name": "output" + } + ] +}""") +def _real_div_akg(): + """RealDiv AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/reciprocal.py b/mindspore/ops/_op_impl/akg/reciprocal.py new file mode 100644 index 00000000000..9fd7cc40b42 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/reciprocal.py @@ -0,0 +1,54 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Reciprocal op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Reciprocal", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0" + ], + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _reciprocal_akg(): + """Reciprocal AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/reduce_max.py b/mindspore/ops/_op_impl/akg/reduce_max.py new file mode 100644 index 00000000000..b9db8ea83af --- /dev/null +++ b/mindspore/ops/_op_impl/akg/reduce_max.py @@ -0,0 +1,63 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""ReduceMax op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "ReduceMax", + "imply_type": "AutoDiff", + "fusion_type": "COMMREDUCE", + "attr": [ + { + "name": "axis", + "param_type": "required", + "type": "listInt" + }, + { + "name": "keep_dims", + "param_type": "required", + "type": "bool" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float16" + ], + "format": [ + "DefaultFormat", "NC1HWC0" + ], + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float16" + ], + "format": [ + "DefaultFormat", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _reduce_max_akg(): + """ReduceMax AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/reduce_mean.py b/mindspore/ops/_op_impl/akg/reduce_mean.py new file mode 100644 index 00000000000..0a4ffdf2216 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/reduce_mean.py @@ -0,0 +1,63 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""ReduceMean op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "ReduceMean", + "imply_type": "AutoDiff", + "fusion_type": "COMMREDUCE", + "attr": [ + { + "name": "axis", + "param_type": "required", + "type": "listInt" + }, + { + "name": "keep_dims", + "param_type": "required", + "type": "bool" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0" + ], + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _reduce_mean_akg(): + """ReduceMean AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/reduce_sum.py b/mindspore/ops/_op_impl/akg/reduce_sum.py new file mode 100644 index 00000000000..20d091ac76c --- /dev/null +++ b/mindspore/ops/_op_impl/akg/reduce_sum.py @@ -0,0 +1,73 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""ReduceSum op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "ReduceSum", + "imply_type": "AutoDiff", + "fusion_type": "COMMREDUCE", + "attr": [ + { + "name": "axis", + "param_type": "required", + "type": "listInt" + }, + { + "name": "keep_dims", + "param_type": "required", + "type": "bool" + }, + { + "name": "atomic_add", + "param_type": "optional", + "type": "str" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32", + "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", + "FRACTAL_NZ", "FRACTAL_NZ" + ], + "param_type": "required", + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32", + "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", + "FRACTAL_NZ", "FRACTAL_NZ" + ], + "name": "output" + } + ] +}""") +def _reduce_sum_akg(): + """ReduceSum AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/relu.py b/mindspore/ops/_op_impl/akg/relu.py new file mode 100644 index 00000000000..b32725f8859 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/relu.py @@ -0,0 +1,54 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""ReLU op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "ReLU", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0" + ], + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _relu_akg(): + """ReLU AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/relu_grad.py b/mindspore/ops/_op_impl/akg/relu_grad.py new file mode 100644 index 00000000000..c785b750fe1 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/relu_grad.py @@ -0,0 +1,64 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""ReluGrad op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "ReluGrad", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0" + ], + "name": "y_backprop" + }, + { + "index": 1, + "dtype": [ + "float16", "float32", "float16" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0" + ], + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _relu_grad_akg(): + """ReluGrad AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/reshape.py b/mindspore/ops/_op_impl/akg/reshape.py new file mode 100644 index 00000000000..d200b66fa2e --- /dev/null +++ b/mindspore/ops/_op_impl/akg/reshape.py @@ -0,0 +1,58 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Reshape op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Reshape", + "imply_type": "AutoDiff", + "fusion_type": "OPAQUE", + "attr": [ + { + "name": "shape", + "param_type": "required", + "type": "listInt" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0" + ], + "name": "tensor" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _reshape_akg(): + """Reshape AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/round.py b/mindspore/ops/_op_impl/akg/round.py new file mode 100644 index 00000000000..0625c3ceda7 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/round.py @@ -0,0 +1,54 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Round op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Round", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0" + ], + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _round_akg(): + """Round AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/rsqrt.py b/mindspore/ops/_op_impl/akg/rsqrt.py new file mode 100644 index 00000000000..9264864f914 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/rsqrt.py @@ -0,0 +1,54 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Rsqrt op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Rsqrt", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "int32", "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "param_type": "required", + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "int32", "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _rsqrt_akg(): + """Rsqrt AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/select.py b/mindspore/ops/_op_impl/akg/select.py new file mode 100644 index 00000000000..006c6a5444e --- /dev/null +++ b/mindspore/ops/_op_impl/akg/select.py @@ -0,0 +1,76 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Select op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Select", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "bool", "bool", "bool", "bool", "bool", "bool" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0" + ], + "param_type": "required", + "name": "condition" + }, + { + "index": 1, + "dtype": [ + "float16", "int32", "float16", "int32", "float32", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0" + ], + "param_type": "required", + "name": "x" + }, + { + "index": 2, + "dtype": [ + "float16", "int32", "float16", "int32", "float32", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0" + ], + "param_type": "required", + "name": "y" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "int32", "float16", "int32", "float32", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _select_akg(): + """Select AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/softmax.py b/mindspore/ops/_op_impl/akg/softmax.py new file mode 100644 index 00000000000..a41c2aef368 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/softmax.py @@ -0,0 +1,58 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Softmax op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Softmax", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + { + "name": "axis", + "param_type": "required", + "type": "listInt" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0" + ], + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _softmax_akg(): + """Softmax AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/sparse_softmax_cross_entropy_with_logits.py b/mindspore/ops/_op_impl/akg/sparse_softmax_cross_entropy_with_logits.py new file mode 100644 index 00000000000..e9e828f312e --- /dev/null +++ b/mindspore/ops/_op_impl/akg/sparse_softmax_cross_entropy_with_logits.py @@ -0,0 +1,73 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""SparseSoftmaxCrossEntropyWithLogits op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "SparseSoftmaxCrossEntropyWithLogits", + "imply_type": "AutoDiff", + "fusion_type": "OPAQUE", + "attr": [ + { + "name": "is_grad", + "param_type": "optional", + "type": "bool" + }, + { + "name": "sens", + "param_type": "optional", + "type": "float" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float32" + ], + "format": [ + "DefaultFormat" + ], + "name": "features" + }, + { + "index": 1, + "dtype": [ + "int32" + ], + "format": [ + "DefaultFormat" + ], + "name": "labels" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float32" + ], + "format": [ + "DefaultFormat" + ], + "name": "output" + } + ] +}""") +def _sparse_softmax_cross_entropy_with_logits_akg(): + """SparseSoftmaxCrossEntropyWithLogits AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/sqrt.py b/mindspore/ops/_op_impl/akg/sqrt.py new file mode 100644 index 00000000000..fcaa84b3d41 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/sqrt.py @@ -0,0 +1,54 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Sqrt op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Sqrt", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "int32", "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "param_type": "required", + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "int32", "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _sqrt_akg(): + """Sqrt AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/strided_slice.py b/mindspore/ops/_op_impl/akg/strided_slice.py new file mode 100644 index 00000000000..bdbd8dfc2f1 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/strided_slice.py @@ -0,0 +1,93 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""StridedSlice op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "StridedSlice", + "imply_type": "AutoDiff", + "fusion_type": "OPAQUE", + "attr": [ + { + "name": "begin", + "param_type": "required", + "type": "listInt" + }, + { + "name": "end", + "param_type": "required", + "type": "listInt" + }, + { + "name": "strides", + "param_type": "required", + "type": "listInt" + }, + { + "name": "begin_mask", + "param_type": "required", + "type": "int" + }, + { + "name": "end_mask", + "param_type": "required", + "type": "int" + }, + { + "name": "ellipsis_mask", + "param_type": "required", + "type": "int" + }, + { + "name": "new_axis_mask", + "param_type": "required", + "type": "int" + }, + { + "name": "shrink_axis_mask", + "param_type": "required", + "type": "int" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "int32", "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "int32", "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _strided_slice_akg(): + """StridedSlice AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/sub.py b/mindspore/ops/_op_impl/akg/sub.py new file mode 100644 index 00000000000..846aa280bb0 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/sub.py @@ -0,0 +1,72 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Sub op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Sub", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "int32", "float16", "float32", "int32", "float16", "float32", + "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", + "FracZ", "FracZ", "FracZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ" + ], + "param_type": "required", + "name": "x" + }, + { + "index": 1, + "dtype": [ + "int32", "float16", "float32", "int32", "float16", "float32", + "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", + "FracZ", "FracZ", "FracZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ" + ], + "param_type": "required", + "name": "y" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "int32", "float16", "float32", "int32", "float16", "float32", + "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", + "FracZ", "FracZ", "FracZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ" + ], + "name": "output" + } + ] +}""") +def _sub_akg(): + """Sub AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/sum.py b/mindspore/ops/_op_impl/akg/sum.py new file mode 100644 index 00000000000..501b387b250 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/sum.py @@ -0,0 +1,68 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Sum op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Sum", + "imply_type": "AutoDiff", + "fusion_type": "COMMREDUCE", + "attr": [ + { + "name": "axis", + "param_type": "required", + "type": "listInt" + }, + { + "name": "keepdims", + "param_type": "required", + "type": "bool" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32", + "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", + "FRACTAL_NZ", "FRACTAL_NZ" + ], + "param_type": "required", + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32", + "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", + "FRACTAL_NZ", "FRACTAL_NZ" + ], + "name": "output" + } + ] +}""") +def _sum_akg(): + """Sum AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/tile.py b/mindspore/ops/_op_impl/akg/tile.py new file mode 100644 index 00000000000..bd13978fe73 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/tile.py @@ -0,0 +1,58 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Tile op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Tile", + "imply_type": "AutoDiff", + "fusion_type": "OPAQUE", + "attr": [ + { + "name": "multiples", + "param_type": "required", + "type": "listInt" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "int32", "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "int32", "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _tile_akg(): + """Tile AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/zeros_like.py b/mindspore/ops/_op_impl/akg/zeros_like.py new file mode 100644 index 00000000000..a02ece22d71 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/zeros_like.py @@ -0,0 +1,54 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""ZerosLike op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "ZerosLike", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0" + ], + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _zeros_like_akg(): + """ZerosLike AutoDiff register""" + return diff --git a/mindspore/ops/_selected_grad_ops.py b/mindspore/ops/_selected_grad_ops.py new file mode 100644 index 00000000000..5da1d53abfb --- /dev/null +++ b/mindspore/ops/_selected_grad_ops.py @@ -0,0 +1,50 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +""" resolved grad ops """ +from mindspore.ops.op_selector import new_ops_selector + +op_selector = new_ops_selector( + "mindspore.ops.operations._grad_ops", "mindspore.nn.graph_kernels") + + +@op_selector +class MaximumGrad: + def __call__(self, *args): + pass + + +@op_selector +class MinimumGrad: + def __call__(self, *args): + pass + + +@op_selector +class AbsGrad: + def __call__(self, *args): + pass + + +@op_selector +class BiasAddGrad: + def __call__(self, *args): + pass + + +@op_selector +class TanhGrad: + def __call__(self, *args): + pass diff --git a/mindspore/ops/_selected_ops.py b/mindspore/ops/_selected_ops.py new file mode 100644 index 00000000000..5e125025c92 --- /dev/null +++ b/mindspore/ops/_selected_ops.py @@ -0,0 +1,108 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +""" resolve ops """ +from mindspore.ops.op_selector import new_ops_selector + +op_selector = new_ops_selector( + "mindspore.ops.operations", "mindspore.nn.graph_kernels") +opt_selector = new_ops_selector( + "mindspore.nn.optim", "mindspore.nn.graph_kernels") +nn_selector = new_ops_selector( + "mindspore.nn", "mindspore.nn.graph_kernels") + + +@nn_selector +class BatchNorm2d: + def __call__(self, *args): + pass + + +@op_selector +class ReLU: + def __call__(self, *args): + pass + + +@op_selector +class ReduceMean: + def __call__(self, *args): + pass + + +@op_selector +class BiasAdd: + def __call__(self, *args): + pass + + +@op_selector +class FusedBatchNorm: + def __call__(self, *args): + pass + + +@op_selector +class ApplyMomentum: + def __call__(self, *args): + pass + + +@op_selector +class SoftmaxCrossEntropyWithLogits: + def __call__(self, *args): + pass + + +@op_selector +class LogSoftmax: + def __call__(self, *args): + pass + + +@op_selector +class Tanh: + def __call__(self, *args): + pass + + +@op_selector +class Gelu: + def __call__(self, *args): + pass + + +@op_selector +class LayerNorm: + def __call__(self, *args): + pass + + +@op_selector +class Softmax: + def __call__(self, *args): + pass + + +@op_selector +class LambUpdateWithLR: + def __call__(self, *args): + pass + + +@op_selector +class LambNextMV: + def __call__(self, *args): + pass diff --git a/mindspore/ops/op_selector.py b/mindspore/ops/op_selector.py new file mode 100644 index 00000000000..bdd00ac7f15 --- /dev/null +++ b/mindspore/ops/op_selector.py @@ -0,0 +1,120 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +""" +A factory class that create op selector instance to config switch on a class, +which can be used to control the switch of op type: GraphKernel or Primitive. +""" +import importlib +import inspect +from mindspore import context + + +class _OpSelector: + """ + A helper class, which can be used to choose different type of operator. + + When an instance of this class is called, we return the right operator + according to the context['enable_graph_kernel'] and the name of the + parameter. returned operator will be a GraphKernel op ora Primitive op. + + Args: + op (class): an empty class has an operator name as its class name + config_optype (str): operator type, which must be either 'GraphKernel' + or 'Primitive' + graph_kernel_pkg (str): real operator's package name + primitive_pkg (str): graph kernel operator's package name + + Examples: + >>> class A: pass + >>> selected_op = _OpSelector(A, "GraphKernel", + >>> "graph_kernel.ops.pkg", "primitive.ops.pkg") + >>> # selected_op() will call graph_kernel.ops.pkg.A() + """ + GRAPH_KERNEL = "GraphKernel" + PRIMITIVE = "Primitive" + DEFAULT_OP_TYPE = PRIMITIVE + KW_STR = "op_type" + + def __init__(self, op, config_optype, primitive_pkg, graph_kernel_pkg): + self.op_name = op.__name__ + self.config_optype = config_optype + self.graph_kernel_pkg = graph_kernel_pkg + self.primitive_pkg = primitive_pkg + + def __call__(self, *args, **kwargs): + _op_type = _OpSelector.DEFAULT_OP_TYPE + if context.get_context("enable_graph_kernel"): + if _OpSelector.KW_STR in kwargs: + _op_type = kwargs.get(_OpSelector.KW_STR) + kwargs.pop(_OpSelector.KW_STR, None) + elif self.config_optype is not None: + _op_type = self.config_optype + if _op_type == _OpSelector.GRAPH_KERNEL: + pkg = self.graph_kernel_pkg + else: + pkg = self.primitive_pkg + op = getattr(importlib.import_module(pkg, __package__), self.op_name) + return op(*args, **kwargs) + + +def new_ops_selector(primitive_pkg, graph_kernel_pkg): + """ + A factory method to return an op selector + + When the GraphKernel switch is on: + `context.get_context('enable_graph_kernel') == True`, we have 2 ways to control the op type: + (1). call the real op with an extra parameter `op_type='Primitive'` or `op_type='GraphKernel'` + (2). pass a parameter to the op selector, like `@op_selector('Primitive')` or + `@op_selector('GraphKernel')` + (3). default op type is PRIMITIVE + The order of the highest priority to lowest priority is (1), (2), (3) + If the GraphKernel switch is off, then op_type will always be PRIMITIVE. + + Args: + primitive_pkg (str): primitive op's package name + graph_kernel_pkg (str): graph kernel op's package name + + Returns: + returns an op selector, which can control what operator should be actually called. + + Examples: + >>> op_selector = new_ops_selector("primitive_pkg.some.path", + >>> "graph_kernel_pkg.some.path") + >>> @op_selector + >>> class ReduceSum: pass + """ + + def op_selector(cls_or_optype): + + _primitive_pkg = primitive_pkg + _graph_kernel_pkg = graph_kernel_pkg + + def direct_op_type(): + darg = None + if cls_or_optype is None: + pass + elif not inspect.isclass(cls_or_optype): + darg = cls_or_optype + return darg + + if direct_op_type() is not None: + def deco_cls(_real_cls): + return _OpSelector(_real_cls, direct_op_type(), _primitive_pkg, _graph_kernel_pkg) + return deco_cls + + return _OpSelector(cls_or_optype, direct_op_type(), _primitive_pkg, _graph_kernel_pkg) + + return op_selector diff --git a/mindspore/ops/operations/__init__.py b/mindspore/ops/operations/__init__.py index 57ffd969c14..901db32c46b 100644 --- a/mindspore/ops/operations/__init__.py +++ b/mindspore/ops/operations/__init__.py @@ -51,7 +51,7 @@ from .math_ops import (Abs, ACos, Asin, Asinh, AddN, AssignAdd, AssignSub, Atan2 NPUGetFloatStatus, Pow, RealDiv, IsNan, IsInf, IsFinite, FloatStatus, Reciprocal, CumSum, HistogramFixedWidth, Sin, Sqrt, Rsqrt, BesselI0e, BesselI1e, - Square, Sub, TensorAdd, Sign, Round, SquareSumAll, Atan, Atanh, Cosh, Sinh) + Square, Sub, TensorAdd, Sign, Round, SquareSumAll, Atan, Atanh, Cosh, Sinh, Eps) from .random_ops import (RandomChoiceWithMask, Normal) from .nn_ops import (LSTM, SGD, Adam, SparseApplyAdam, SparseApplyLazyAdam, ApplyMomentum, BatchNorm, @@ -282,6 +282,7 @@ __all__ = [ "Sign", "LARSUpdate", "Round", + "Eps", "ApplyFtrl", "SpaceToBatch", "SparseApplyFtrl", diff --git a/mindspore/ops/operations/_grad_ops.py b/mindspore/ops/operations/_grad_ops.py index e964ba272ce..24177bceebc 100644 --- a/mindspore/ops/operations/_grad_ops.py +++ b/mindspore/ops/operations/_grad_ops.py @@ -405,6 +405,33 @@ class FusedBatchNormGrad(Primitive): def __call__(self, dy, x, scale, save_mean, save_inv_variance): raise NotImplementedError +class BNTrainingReduceGrad(PrimitiveWithInfer): + """Gradients of FusedBatchNorm operation.""" + + @prim_attr_register + def __init__(self, epsilon=0.0001): + _inputs = ['grads', 'x', 'diff_scale', 'diff_offset', 'scale', 'batch_mean', 'batch_variance'] + self.init_prim_io_names(inputs=_inputs, outputs=['y']) + + def infer_shape(self, grads, x, diff_scale, diff_offset, scale, batch_mean, batch_variance): + return grads + + def infer_dtype(self, grads, x, diff_scale, diff_offset, scale, batch_mean, batch_variance): + return grads + +class BNTrainingUpdateGrad(PrimitiveWithInfer): + """Gradients of FusedBatchNorm operation.""" + + @prim_attr_register + def __init__(self, epsilon=0.0001): + self.init_prim_io_names(inputs=['grads', 'x', 'batch_mean', 'batch_variance'], + outputs=['diff_scale', 'diff_offset']) + + def infer_shape(self, grads, x, batch_mean, batch_variance): + return (batch_mean, batch_variance) + + def infer_dtype(self, grads, x, batch_mean, batch_variance): + return (batch_mean, batch_variance) class GeluGrad(PrimitiveWithInfer): """Gradients of Gelu operation.""" diff --git a/mindspore/ops/operations/array_ops.py b/mindspore/ops/operations/array_ops.py index d7298e2099c..395d3c509c2 100644 --- a/mindspore/ops/operations/array_ops.py +++ b/mindspore/ops/operations/array_ops.py @@ -83,12 +83,17 @@ class ExpandDims(PrimitiveWithInfer): axis_v = axis['value'] rank = len(x_shape) validator.check_int_range('axis', axis_v, -rank - 1, rank, Rel.INC_BOTH, self.name) + value = None + if x['value'] is not None: + value = x['value'].asnumpy() + value = np.expand_dims(value, axis_v) + value = Tensor(value) if axis_v < 0: axis_v = rank + 1 + axis_v x_shape.insert(axis_v, 1) out = {'shape': x_shape, 'dtype': x['dtype'], - 'value': None} + 'value': value} return out @@ -1661,6 +1666,7 @@ class Select(PrimitiveWithInfer): @prim_attr_register def __init__(self): """init""" + self.init_prim_io_names(inputs=['condition', 'x', 'y'], outputs=['output']) def infer_shape(self, cond_shape, x_shape, y_shape): if cond_shape != x_shape or x_shape != y_shape: @@ -1676,6 +1682,16 @@ class Select(PrimitiveWithInfer): raise TypeError('\'%s\' the x_type %s must be the same as y_type %s.' % (self.name, x_type, y_type)) return x_type + def infer_value(self, cond, x, y): + if cond is not None and x is not None and y is not None: + cond = cond.asnumpy() + x = x.asnumpy() + y = y.asnumpy() + out = np.where(cond, x, y) + return Tensor(out) + return None + + class StridedSlice(PrimitiveWithInfer): r""" @@ -2472,8 +2488,7 @@ class SpaceToBatch(PrimitiveWithInfer): validator.check_integer('rank of input_x', len(x_shape), 4, Rel.EQ, self.name) out_shape = copy.deepcopy(x_shape) for i in range(2): - padded = out_shape[i + 2] + self.paddings[i][0] + \ - self.paddings[i][1] + padded = out_shape[i + 2] + self.paddings[i][0] + self.paddings[i][1] if padded % self.block_size != 0: raise ValueError(f'For \'{self.name}\' padded[{i}] {padded} should be divisible by ' f'block_size {self.block_size}') diff --git a/mindspore/ops/operations/math_ops.py b/mindspore/ops/operations/math_ops.py index 8bba03f2518..f66bea0be26 100644 --- a/mindspore/ops/operations/math_ops.py +++ b/mindspore/ops/operations/math_ops.py @@ -15,6 +15,7 @@ """Operators for math.""" +import copy import numpy as np from ... import context from ..._c_expression import signature_rw as sig_rw @@ -142,6 +143,15 @@ class TensorAdd(_MathBinaryOp): [5,7,9] """ + def infer_value(self, x, y): + if x is not None and y is not None: + x = x.asnumpy() + y = y.asnumpy() + out = x + y + out = np.array(out, x.dtype) + return Tensor(out) + return None + class AssignAdd(PrimitiveWithInfer): """ @@ -255,6 +265,7 @@ class _Reduce(PrimitiveWithInfer): return output def do_infer(self, input_x, axis, valid_dtype=mstype.number_type): + """ return meta infos of input parameters """ axis_v = axis['value'] input_shp = input_x['shape'] args = {'input_x': input_x['dtype']} @@ -263,9 +274,26 @@ class _Reduce(PrimitiveWithInfer): if axis_v is None: raise ValueError(f"For {self.name}, axis must be const.") input_shp = _infer_shape_reduce(input_shp, axis_v, self.keep_dims, self.name) + value = None + if input_x['value'] is not None: + prim_map = { + 'ReduceSum': np.sum, + 'ReduceMax': np.max, + 'ReduceMin': np.min, + } + np_reduce_func = prim_map.get(self.name, None) + + if np_reduce_func is not None: + value = input_x['value'].asnumpy() + if not axis_v: + axis_v = [i for i in range(len(input_x['shape']))] + axis_v = tuple(axis_v) + value = np_reduce_func(value, axis_v, keepdims=self.keep_dims) + value = np.array(value) + value = Tensor(value) return {'shape': input_shp, 'dtype': input_x['dtype'], - 'value': None} + 'value': value} def __infer__(self, input_x, axis): return self.do_infer(input_x, axis) @@ -334,6 +362,12 @@ class ReduceSum(_Reduce): >>> output = op(input_x, 1) """ + @prim_attr_register + def __init__(self, keep_dims=False): + """init ReduceSum""" + super(ReduceSum, self).__init__(keep_dims) + self.__setattr_flag__ = True + class ReduceAll(_Reduce): """ @@ -403,6 +437,12 @@ class ReduceMax(_Reduce): >>> output = op(input_x, 1) """ + @prim_attr_register + def __init__(self, keep_dims=False): + """ReduceMax""" + super(ReduceMax, self).__init__(keep_dims) + self.__setattr_flag__ = True + class ReduceMin(_Reduce): """ @@ -743,6 +783,20 @@ class AddN(PrimitiveWithInfer): validator.check_tensor_type_same(args, mstype.number_type + (mstype.bool_,), cls_name) return inputs[0] + def infer_value(self, inputs): + if inputs is None: + return None + + for x in inputs: + if x is None: + return None + + added = copy.deepcopy(inputs[0].asnumpy()) + for x in inputs[1:]: + added += x.asnumpy() + out = np.array(added, inputs[0].asnumpy().dtype) + return Tensor(out) + class Neg(PrimitiveWithInfer): """ @@ -773,6 +827,13 @@ class Neg(PrimitiveWithInfer): validator.check_tensor_type_same({"input_x": input_x}, mstype.number_type, self.name) return input_x + def infer_value(self, input_x): + if input_x is not None: + input_x = input_x.asnumpy() + return Tensor(-input_x) + + return None + class InplaceAdd(PrimitiveWithInfer): """ @@ -920,6 +981,15 @@ class Sub(_MathBinaryOp): [-3, -3, -3] """ + def infer_value(self, x, y): + if x is not None and y is not None: + x = x.asnumpy() + y = y.asnumpy() + out = x - y + out = np.array(out, x.dtype) + return Tensor(out) + return None + class Mul(_MathBinaryOp): """ @@ -978,6 +1048,7 @@ class Square(PrimitiveWithInfer): @prim_attr_register def __init__(self): """init Square""" + self.init_prim_io_names(inputs=['input_x'], outputs=['output']) def infer_shape(self, x_shape): return x_shape @@ -986,6 +1057,14 @@ class Square(PrimitiveWithInfer): validator.check_tensor_type_same({"x": x_type}, mstype.number_type, self.name) return x_type + def infer_value(self, x): + if x is not None: + x = x.asnumpy() + out = x * x + out = np.array(out, x.dtype) + return Tensor(out) + return None + class Rsqrt(PrimitiveWithInfer): """ @@ -1007,6 +1086,7 @@ class Rsqrt(PrimitiveWithInfer): @prim_attr_register def __init__(self): """init Rsqrt""" + self.init_prim_io_names(inputs=['x'], outputs=['output']) def infer_shape(self, x_shape): return x_shape @@ -1015,6 +1095,14 @@ class Rsqrt(PrimitiveWithInfer): validator.check_tensor_type_same({"x": x_type}, mstype.number_type, self.name) return x_type + def infer_value(self, x): + if x is not None: + x = x.asnumpy() + out = 1.0 / np.sqrt(x) + out = np.array(out, x.dtype) + return Tensor(out) + return None + class Sqrt(PrimitiveWithInfer): """ @@ -1036,6 +1124,7 @@ class Sqrt(PrimitiveWithInfer): @prim_attr_register def __init__(self): """init Sqrt""" + self.init_prim_io_names(inputs=['x'], outputs=['output']) def infer_shape(self, x_shape): return x_shape @@ -1044,6 +1133,14 @@ class Sqrt(PrimitiveWithInfer): validator.check_tensor_type_same({"x": x_type}, mstype.number_type, self.name) return x_type + def infer_value(self, x): + if x is not None: + x = x.asnumpy() + out = np.sqrt(x) + out = np.array(out, x.dtype) + return Tensor(out) + return None + class Reciprocal(PrimitiveWithInfer): """ @@ -1074,6 +1171,14 @@ class Reciprocal(PrimitiveWithInfer): validator.check_subclass("x", x, mstype.tensor, self.name) return x + def infer_value(self, x): + if x is not None: + x = x.asnumpy() + out = 1.0 / x + out = np.array(out, x.dtype) + return Tensor(out) + return None + class Pow(_MathBinaryOp): """ @@ -1109,6 +1214,15 @@ class Pow(_MathBinaryOp): [1.0, 16.0, 64.0] """ + def infer_value(self, x, power): + if x is not None and power is not None: + x = x.asnumpy() + power = power.asnumpy() + out = np.power(x, power) + out = np.array(out, x.dtype) + return Tensor(out) + return None + class Exp(PrimitiveWithInfer): """ @@ -1139,6 +1253,14 @@ class Exp(PrimitiveWithInfer): validator.check_subclass("x", x_type, mstype.tensor, self.name) return x_type + def infer_value(self, x): + if x is not None: + x = x.asnumpy() + out = np.exp(x) + out = np.array(out, x.dtype) + return Tensor(out) + return None + class Expm1(PrimitiveWithInfer): """ @@ -1242,6 +1364,14 @@ class Log(PrimitiveWithInfer): validator.check_subclass("x", x, mstype.tensor, self.name) return x + def infer_value(self, x): + if x is not None: + x = x.asnumpy() + out = np.log(x) + out = np.array(out, x.dtype) + return Tensor(out) + return None + class Log1p(PrimitiveWithInfer): """ @@ -1360,6 +1490,15 @@ class Minimum(_MathBinaryOp): [1.0, 2.0, 3.0] """ + def infer_value(self, x, y): + if x is not None and y is not None: + x = x.asnumpy() + y = y.asnumpy() + out = np.minimum(x, y) + out = np.array(out, x.dtype) + return Tensor(out) + return None + class Maximum(_MathBinaryOp): """ @@ -1389,6 +1528,14 @@ class Maximum(_MathBinaryOp): [4.0, 5.0, 6.0] """ + def infer_value(self, x, y): + if x is not None and y is not None: + x = x.asnumpy() + y = y.asnumpy() + out = np.maximum(x, y) + out = np.array(out, x.dtype) + return Tensor(out) + return None class RealDiv(_MathBinaryOp): """ @@ -1923,6 +2070,13 @@ class Greater(_LogicBinaryOp): >>> greater(input_x, input_y) [False, True, False] """ + def infer_value(self, x, y): + if x is not None and y is not None: + x = x.asnumpy() + y = y.asnumpy() + out = np.array(np.greater(x, y)) + return Tensor(out) + return None class GreaterEqual(_LogicBinaryOp): @@ -1951,6 +2105,13 @@ class GreaterEqual(_LogicBinaryOp): >>> greater_equal(input_x, input_y) [True, True, False] """ + def infer_value(self, x, y): + if x is not None and y is not None: + x = x.asnumpy() + y = y.asnumpy() + out = np.array(np.greater_equal(x, y)) + return Tensor(out) + return None class Less(_LogicBinaryOp): @@ -1979,6 +2140,13 @@ class Less(_LogicBinaryOp): >>> less(input_x, input_y) [False, False, True] """ + def infer_value(self, x, y): + if x is not None and y is not None: + x = x.asnumpy() + y = y.asnumpy() + out = np.array(np.less(x, y)) + return Tensor(out) + return None class LessEqual(_LogicBinaryOp): @@ -2007,6 +2175,13 @@ class LessEqual(_LogicBinaryOp): >>> less_equal(input_x, input_y) [True, False, True] """ + def infer_value(self, x, y): + if x is not None and y is not None: + x = x.asnumpy() + y = y.asnumpy() + out = np.array(np.less_equal(x, y)) + return Tensor(out) + return None class LogicalNot(PrimitiveWithInfer): @@ -2517,6 +2692,7 @@ class Abs(PrimitiveWithInfer): @prim_attr_register def __init__(self): """init Abs""" + self.init_prim_io_names(inputs=['input_x'], outputs=['output']) def infer_shape(self, x_shape): return x_shape @@ -2587,7 +2763,8 @@ class Round(PrimitiveWithInfer): @prim_attr_register def __init__(self): - pass + """init Round""" + self.init_prim_io_names(inputs=['input_x'], outputs=['output']) def infer_shape(self, x_shape): return x_shape @@ -2679,7 +2856,6 @@ class Atan2(_MathBinaryOp): [[0. 0.7853982]] """ - class SquareSumAll(PrimitiveWithInfer): """ Returns square sum all of a tensor element-wise @@ -2705,6 +2881,7 @@ class SquareSumAll(PrimitiveWithInfer): @prim_attr_register def __init__(self): """init SquareSumAll""" + def infer_shape(self, x_shape, y_shape): validator.check("x1_shape", x_shape, "x2_shape", y_shape, Rel.EQ, self.name) return [], [] @@ -2891,3 +3068,41 @@ class Invert(PrimitiveWithInfer): def infer_dtype(self, x_dtype): validator.check_tensor_type_same({'x_dtype': x_dtype}, [mstype.int16, mstype.uint16], self.name) return x_dtype + + +class Eps(PrimitiveWithInfer): + """ + Creates a tensor filled with `input_x` dtype minimum val. + + Inputs: + - **input_x** (Tensor) - Input tensor. + + Outputs: + Tensor, has the same type and shape as `input_x`, but filled with `input_x` dtype minimum val. + + Examples: + >>> out = P.Eps()(input_x) + """ + + @prim_attr_register + def __init__(self): + """init Eps""" + self.init_prim_io_names(inputs=['input_x'], outputs=['y']) + + def __infer__(self, input_x): + valid_types = [mstype.float16, mstype.float32] + validator.check_tensor_type_same({'input_x': input_x['dtype']}, valid_types, self.name) + + x_nptype = mstype.dtype_to_nptype(input_x['dtype'].element_type()) + if x_nptype == np.float16: + min_val = 2 ** (-14) + else: + min_val = 2 ** (-16) + + res = np.full(input_x['shape'], min_val, x_nptype) + out = { + 'value': Tensor(res), + 'shape': input_x['shape'], + 'dtype': input_x['dtype'], + } + return out diff --git a/mindspore/ops/operations/nn_ops.py b/mindspore/ops/operations/nn_ops.py index a5c1684fce7..a1659e139da 100644 --- a/mindspore/ops/operations/nn_ops.py +++ b/mindspore/ops/operations/nn_ops.py @@ -830,9 +830,9 @@ class Conv2D(PrimitiveWithInfer): pad_top, pad_bottom, pad_left, pad_right = self.pad, self.pad, self.pad, self.pad h_out = 1 + (x_shape[2] + 2 * self.pad - kernel_size_h - (kernel_size_h - 1) * (dilation_h - 1)) \ - / stride_h + / stride_h w_out = 1 + (x_shape[3] + 2 * self.pad - kernel_size_w - (kernel_size_w - 1) * (dilation_w - 1)) \ - / stride_w + / stride_w h_out = math.floor(h_out) w_out = math.floor(w_out) @@ -953,9 +953,9 @@ class DepthwiseConv2dNative(PrimitiveWithInfer): pad_top, pad_bottom, pad_left, pad_right = self.pad, self.pad, self.pad, self.pad h_out = 1 + (x_shape[2] + 2 * self.pad - kernel_size_h - (kernel_size_h - 1) * (dilation_h - 1)) \ - / stride_h + / stride_h w_out = 1 + (x_shape[3] + 2 * self.pad - kernel_size_w - (kernel_size_w - 1) * (dilation_w - 1)) \ - / stride_w + / stride_w h_out = math.floor(h_out) w_out = math.floor(w_out) diff --git a/mindspore/ops/operations/other_ops.py b/mindspore/ops/operations/other_ops.py index 1f22c4caacf..74c6080ab41 100644 --- a/mindspore/ops/operations/other_ops.py +++ b/mindspore/ops/operations/other_ops.py @@ -53,7 +53,7 @@ class Assign(PrimitiveWithInfer): ) @prim_attr_register def __init__(self): - pass + self.init_prim_io_names(inputs=['ref', 'value'], outputs=['output']) def infer_shape(self, variable, value): return variable diff --git a/model_zoo/bert/src/bert_for_pre_training.py b/model_zoo/bert/src/bert_for_pre_training.py index 976f1a3c43f..5e014f02ba5 100644 --- a/model_zoo/bert/src/bert_for_pre_training.py +++ b/model_zoo/bert/src/bert_for_pre_training.py @@ -27,6 +27,7 @@ from mindspore.nn.wrap.grad_reducer import DistributedGradReducer from mindspore.train.parallel_utils import ParallelMode from mindspore.communication.management import get_group_size from mindspore import context +from mindspore.ops import _selected_ops from .bert_model import BertModel GRADIENT_CLIP_TYPE = 1 @@ -130,7 +131,7 @@ class GetNextSentenceOutput(nn.Cell): """ def __init__(self, config): super(GetNextSentenceOutput, self).__init__() - self.log_softmax = P.LogSoftmax() + self.log_softmax = _selected_ops.LogSoftmax() self.weight_init = TruncatedNormal(config.initializer_range) self.dense = nn.Dense(config.hidden_size, 2, weight_init=self.weight_init, has_bias=True).to_float(config.compute_type) diff --git a/tests/st/ops/ascend/test_fused_batchnorm.py b/tests/st/ops/ascend/test_fused_batchnorm.py deleted file mode 100644 index 59e2df67deb..00000000000 --- a/tests/st/ops/ascend/test_fused_batchnorm.py +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -import numpy as np - -import mindspore.context as context -import mindspore.nn as nn -from mindspore import Tensor -from mindspore.common.initializer import initializer -from mindspore.common.parameter import Parameter -from mindspore.ops import operations as P - -context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") - - -class Net(nn.Cell): - def __init__(self): - super(Net, self).__init__() - self.bn = P.FusedBatchNorm() - self.scale = Parameter(initializer('ones', [64]), name='scale') - self.b = Parameter(initializer('zeros', [64]), name='b') - self.mean = Parameter(initializer('ones', [64]), name='mean') - self.variance = Parameter(initializer('zeros', [64]), name='variance') - - def construct(self, x): - return self.bn(x, self.scale, self.b, self.mean, self.variance)[0] - - -def test_net(): - x = np.random.randn(1, 64, 112, 112).astype(np.float32) - # mean = np.random.randn(1,16,1,1).astype(np.float32) - # variance = np.random.randn(1,16,1,1).astype(np.float32) - fusedBn = Net() - output = fusedBn(Tensor(x)) - print("***********x*********") - print(x) - - print("***********output y*********") - print(output.asnumpy()) diff --git a/tests/st/tbe_networks/resnet_cifar.py b/tests/st/tbe_networks/resnet_cifar.py index 6b3b75a63c7..cf9eb594009 100644 --- a/tests/st/tbe_networks/resnet_cifar.py +++ b/tests/st/tbe_networks/resnet_cifar.py @@ -1,4 +1,4 @@ -# Copyright 2019 Huawei Technologies Co., Ltd +# Copyright 2020 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -19,20 +19,20 @@ import numpy as np from resnet import resnet50 import mindspore.common.dtype as mstype +import mindspore.ops.functional as F +from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor +from mindspore.train.serialization import load_checkpoint, load_param_into_net import mindspore.dataset as ds import mindspore.dataset.transforms.c_transforms as C import mindspore.dataset.transforms.vision.c_transforms as vision import mindspore.nn as nn -import mindspore.ops.functional as F from mindspore import Tensor from mindspore import context from mindspore.communication.management import init from mindspore.nn.optim.momentum import Momentum from mindspore.ops import operations as P from mindspore.parallel._auto_parallel_context import auto_parallel_context -from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor from mindspore.train.model import Model, ParallelMode -from mindspore.train.serialization import load_checkpoint, load_param_into_net random.seed(1) np.random.seed(1) @@ -62,12 +62,12 @@ def create_dataset(repeat_num=1, training=True): data_dir = data_home + "/cifar-10-batches-bin" if not training: data_dir = data_home + "/cifar-10-verify-bin" - data_set = ds.Cifar10Dataset(data_dir) + data_set = ds.Cifar10Dataset(data_dir, num_samples=32) if args_opt.run_distribute: rank_id = int(os.getenv('RANK_ID')) rank_size = int(os.getenv('RANK_SIZE')) - data_set = ds.Cifar10Dataset(data_dir, num_shards=rank_size, shard_id=rank_id) + data_set = ds.Cifar10Dataset(data_dir, num_shards=rank_size, shard_id=rank_id, num_samples=32) resize_height = 224 resize_width = 224 @@ -140,8 +140,9 @@ if __name__ == '__main__': batch_num = dataset.get_dataset_size() config_ck = CheckpointConfig(save_checkpoint_steps=batch_num * 5, keep_checkpoint_max=10) ckpoint_cb = ModelCheckpoint(prefix="train_resnet_cifar10", directory="./", config=config_ck) + time_cb = TimeMonitor(data_size=batch_num) loss_cb = LossMonitor() - model.train(epoch_size, dataset, callbacks=[ckpoint_cb, loss_cb]) + model.train(epoch_size, dataset, callbacks=[ckpoint_cb, loss_cb, time_cb]) if args_opt.do_eval: if args_opt.checkpoint_path: diff --git a/tests/ut/cpp/CMakeLists.txt b/tests/ut/cpp/CMakeLists.txt index 840a66ad20c..13f961fa246 100644 --- a/tests/ut/cpp/CMakeLists.txt +++ b/tests/ut/cpp/CMakeLists.txt @@ -91,6 +91,7 @@ file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "../../../mindspore/ccsrc/device/kernel_info.cc" "../../../mindspore/ccsrc/device/ascend/profiling/*.cc" "../../../mindspore/ccsrc/device/ascend/kernel_select_ascend.cc" + "../../../mindspore/ccsrc/device/ascend/kernel_select_graph_kernel.cc" "../../../mindspore/ccsrc/device/convert_tensor_utils.cc" "../../../mindspore/ccsrc/device/ascend/kernel_build_ascend.cc" "../../../mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc" diff --git a/tests/ut/cpp/optimizer/lib_test.cc b/tests/ut/cpp/optimizer/lib_test.cc index 037bcd75d16..ed4497f9a56 100644 --- a/tests/ut/cpp/optimizer/lib_test.cc +++ b/tests/ut/cpp/optimizer/lib_test.cc @@ -583,6 +583,5 @@ TEST_F(TestOptLib, test_adjust_allreduce_mul_add) { ASSERT_TRUE(CheckOpt(before2l, after2, patterns)); ASSERT_TRUE(CheckOpt(before2r, after2, patterns)); } - } // namespace opt } // namespace mindspore diff --git a/tests/ut/cpp/pre_activate/ascend/format_type/insert_cast_test.cc b/tests/ut/cpp/pre_activate/ascend/format_type/insert_cast_test.cc index 2da100af930..317eace6c6e 100644 --- a/tests/ut/cpp/pre_activate/ascend/format_type/insert_cast_test.cc +++ b/tests/ut/cpp/pre_activate/ascend/format_type/insert_cast_test.cc @@ -60,7 +60,7 @@ TEST_F(TestHWInsertCast, test_insert_cast_op_for_single_output) { builder.SetOutputsDeviceType({kFloat16->type_id()}); builder.SetFusionType(kernel::FusionType::ELEMWISE); builder.SetProcessor(kernel::Processor::AICORE); - builder.SetKernelType(KernelType::AUTO_DIFF_KERNEL); + builder.SetKernelType(KernelType::AKG_KERNEL); kernel::KernelBuildInfo::KernelBuildInfoBuilder builder1; builder1.SetInputsFormat({"NC1HWC0"}); builder1.SetInputsDeviceType({kFloat32->type_id()}); @@ -68,7 +68,7 @@ TEST_F(TestHWInsertCast, test_insert_cast_op_for_single_output) { builder1.SetOutputsDeviceType({kFloat32->type_id()}); builder1.SetFusionType(kernel::FusionType::ELEMWISE); builder1.SetProcessor(kernel::Processor::AICORE); - builder1.SetKernelType(KernelType::AUTO_DIFF_KERNEL); + builder1.SetKernelType(KernelType::AKG_KERNEL); auto node_list = TopoSort(func_graph->get_return()); for (auto& node : node_list) { if (node == nullptr) { @@ -122,7 +122,7 @@ TEST_F(TestHWInsertCast, test_insert_cast_op_for_multiple_output) { builder1.SetOutputsDeviceType({kFloat32->type_id()}); builder1.SetFusionType(kernel::FusionType::ELEMWISE); builder1.SetProcessor(kernel::Processor::AICORE); - builder1.SetKernelType(KernelType::AUTO_DIFF_KERNEL); + builder1.SetKernelType(KernelType::AKG_KERNEL); auto node_list = TopoSort(func_graph->get_return()); for (auto& node : node_list) { if (node == nullptr) { diff --git a/tests/ut/cpp/pre_activate/pass/allreduce_fusion_test.cc b/tests/ut/cpp/pre_activate/pass/allreduce_fusion_test.cc index 077a9f07233..69a330614e9 100644 --- a/tests/ut/cpp/pre_activate/pass/allreduce_fusion_test.cc +++ b/tests/ut/cpp/pre_activate/pass/allreduce_fusion_test.cc @@ -56,7 +56,7 @@ TEST_F(TestHWAllReduceFusion, test_fusion_all) { builder.SetOutputsDeviceType({kFloat32->type_id()}); builder.SetFusionType(kernel::FusionType::ELEMWISE); builder.SetProcessor(kernel::Processor::AICORE); - builder.SetKernelType(KernelType::AUTO_DIFF_KERNEL); + builder.SetKernelType(KernelType::AKG_KERNEL); auto node_list = TopoSort(func_graph->get_return()); for (auto &node : node_list) { if (node == nullptr) { @@ -97,7 +97,7 @@ TEST_F(TestHWAllReduceFusion, test_fusion_group) { builder.SetOutputsDeviceType({kFloat32->type_id()}); builder.SetFusionType(kernel::FusionType::ELEMWISE); builder.SetProcessor(kernel::Processor::AICORE); - builder.SetKernelType(KernelType::AUTO_DIFF_KERNEL); + builder.SetKernelType(KernelType::AKG_KERNEL); auto node_list = TopoSort(func_graph->get_return()); for (auto &node : node_list) { if (node == nullptr) { @@ -138,7 +138,7 @@ TEST_F(TestHWAllReduceFusion, test_fusion_op) { builder.SetOutputsDeviceType({kFloat32->type_id()}); builder.SetFusionType(kernel::FusionType::ELEMWISE); builder.SetProcessor(kernel::Processor::AICORE); - builder.SetKernelType(KernelType::AUTO_DIFF_KERNEL); + builder.SetKernelType(KernelType::AKG_KERNEL); auto node_list = TopoSort(func_graph->get_return()); int count = 0; for (auto &node : node_list) { @@ -195,7 +195,7 @@ TEST_F(TestHWAllReduceFusion, test_fusion_sorted) { builder.SetOutputsDeviceType({kFloat32->type_id()}); builder.SetFusionType(kernel::FusionType::ELEMWISE); builder.SetProcessor(kernel::Processor::AICORE); - builder.SetKernelType(KernelType::AUTO_DIFF_KERNEL); + builder.SetKernelType(KernelType::AKG_KERNEL); auto node_list = TopoSort(func_graph->get_return()); for (auto &node : node_list) { if (node == nullptr) { diff --git a/tests/ut/cpp/session/anf_runtime_algorithm_test.cc b/tests/ut/cpp/session/anf_runtime_algorithm_test.cc index 9ff81230045..2ea2453381c 100644 --- a/tests/ut/cpp/session/anf_runtime_algorithm_test.cc +++ b/tests/ut/cpp/session/anf_runtime_algorithm_test.cc @@ -645,9 +645,9 @@ TEST_F(AnfRuntimeAlgorithmTest, GetKernelType) { auto d_kernel_info = add->kernel_info(); MS_EXCEPTION_IF_NULL(d_kernel_info); KernelBuildInfoBuilder builder; - builder.SetKernelType(AUTO_DIFF_KERNEL); + builder.SetKernelType(AKG_KERNEL); d_kernel_info->set_select_kernel_build_info(builder.Build()); - EXPECT_EQ(AnfAlgo::GetKernelType(add), AUTO_DIFF_KERNEL); + EXPECT_EQ(AnfAlgo::GetKernelType(add), AKG_KERNEL); EXPECT_THROW(AnfAlgo::GetKernelType(nullptr), std::runtime_error); }