commit
11f786c9bd
|
@ -60,11 +60,6 @@ if(ENABLE_GPU)
|
|||
add_compile_definitions(ENABLE_GPU)
|
||||
endif ()
|
||||
|
||||
## make flatuffer files
|
||||
include_directories("${CMAKE_BINARY_DIR}/predict/schema/inner")
|
||||
file(GLOB_RECURSE FLATBUFFER_IN RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "predict/schema/*.fbs")
|
||||
set(FLATBUFFER_OU "${CMAKE_BINARY_DIR}/predict/schema/inner")
|
||||
ms_build_flatbuffers("${FLATBUFFER_IN}" "${FLATBUFFER_IN}" flat_input "${FLATBUFFER_OU}")
|
||||
|
||||
## make protobuf files
|
||||
file(COPY "${ms_onnx_INC}/onnx/onnx.proto" DESTINATION ${CMAKE_BINARY_DIR}/proto)
|
||||
|
@ -104,13 +99,9 @@ endif ()
|
|||
|
||||
if (ENABLE_D)
|
||||
include_directories("${CMAKE_BINARY_DIR}/backend/kernel_compiler/aicpu")
|
||||
include_directories("${CMAKE_BINARY_DIR}/predict/generator/ir")
|
||||
file(GLOB_RECURSE PROTO_IN RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "backend/kernel_compiler/aicpu/proto/*.proto")
|
||||
ms_protobuf_generate(PROTOSRCS PROTOHDRS ${PROTO_IN})
|
||||
|
||||
file(GLOB_RECURSE PROTO_INNER RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "predict/proto/*.proto")
|
||||
ms_protobuf_generate(PREDICT_PROTOSRCS PREDICT_PROTOHDRS ${PROTO_INNER})
|
||||
|
||||
file(GLOB_RECURSE PROTO_DUMP RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "runtime/device/ascend/dump/proto/*.proto")
|
||||
ms_protobuf_generate(DUMP_PROTOSRCS PROTOHDRS ${PROTO_DUMP})
|
||||
|
||||
|
@ -139,7 +130,7 @@ set(SUB_COMP
|
|||
frontend/operator
|
||||
pipeline/jit
|
||||
pipeline/pynative
|
||||
common debug gvar predict pybind_api utils vm
|
||||
common debug gvar pybind_api utils vm
|
||||
)
|
||||
|
||||
foreach (_comp ${SUB_COMP})
|
||||
|
@ -147,7 +138,7 @@ foreach (_comp ${SUB_COMP})
|
|||
string(REPLACE "/" "_" sub ${_comp})
|
||||
if (TARGET _mindspore_${sub}_obj)
|
||||
list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_${sub}_obj>)
|
||||
add_dependencies(_mindspore_${sub}_obj proto_input flat_input)
|
||||
add_dependencies(_mindspore_${sub}_obj proto_input )
|
||||
endif ()
|
||||
endforeach ()
|
||||
add_subdirectory(${CMAKE_SOURCE_DIR}/mindspore/core/base base)
|
||||
|
@ -158,7 +149,7 @@ add_subdirectory(${CMAKE_SOURCE_DIR}/mindspore/core/utils util)
|
|||
list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_core_utils_obj>)
|
||||
add_subdirectory(${CMAKE_SOURCE_DIR}/mindspore/core/ir ir)
|
||||
list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_ir_obj>)
|
||||
add_dependencies(_mindspore_core_utils_obj _mindspore_base_obj _mindspore_ir_obj _mindspore_abstract_obj proto_input flat_input)
|
||||
add_dependencies(_mindspore_core_utils_obj _mindspore_base_obj _mindspore_ir_obj _mindspore_abstract_obj proto_input )
|
||||
|
||||
set_property(SOURCE ${SUB_OBJECTS_SRC} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_ME)
|
||||
add_library(mindspore STATIC ${SUB_OBJECTS_SRC})
|
||||
|
|
|
@ -34,7 +34,6 @@
|
|||
#include "runtime/device/kernel_adjust.h"
|
||||
#include "runtime/device/ascend/ascend_stream_assign.h"
|
||||
#include "runtime/device/ascend/ascend_label_assign.h"
|
||||
#include "predict/predict.h"
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
#include "ir/scalar.h"
|
||||
#include "debug/anf_ir_dump.h"
|
||||
|
@ -303,8 +302,6 @@ void AscendSession::CompileChildGraph(const KernelGraphPtr &child_graph) {
|
|||
save_graphs_path + "/" + "select_kernel_after" + "_graph_" + std::to_string(child_graph->graph_id()) + ".ir";
|
||||
DumpIR(file_path, child_graph);
|
||||
}
|
||||
// convert kernel Graph to model
|
||||
predictmodel::StepConvertGraph(child_graph);
|
||||
// optimize graph
|
||||
HardwareOptimize(child_graph);
|
||||
// assign static memory of parameters
|
||||
|
@ -333,8 +330,6 @@ void AscendSession::RunGraph(const GraphId &graph_id, const std::vector<tensor::
|
|||
InitPSParamAndOptim(kernel_graph, inputs);
|
||||
}
|
||||
#endif
|
||||
// convert inputs to model
|
||||
predictmodel::StepConvertWeight(inputs);
|
||||
{
|
||||
py::gil_scoped_release release;
|
||||
// run task on device
|
||||
|
@ -1036,8 +1031,6 @@ void AscendSession::HardwareOptimize(NotNull<KernelGraphPtr> graph,
|
|||
memo->insert(graph.get());
|
||||
|
||||
MS_LOG(INFO) << "Start to do HardwareOptimize in graph: " << graph->graph_id();
|
||||
// convert kernel Graph to model
|
||||
predictmodel::StepConvertGraph(graph.get());
|
||||
|
||||
HardwareOptimize(graph.get());
|
||||
for (auto &child_graph : graph->child_graph_order()) {
|
||||
|
|
|
@ -23,7 +23,6 @@
|
|||
#include "common/utils.h"
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
#include "runtime/device/kernel_runtime.h"
|
||||
#include "predict/predict.h"
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
|
||||
#include "runtime/device/cpu/kernel_select_cpu.h"
|
||||
#include "backend/optimizer/common/optimizer.h"
|
||||
|
@ -79,7 +78,6 @@ GraphId CPUSession::CompileGraph(const AnfNodePtrList &lst, const AnfNodePtrList
|
|||
Optimize(graph);
|
||||
}
|
||||
#endif
|
||||
predictmodel::StepConvertGraph(graph);
|
||||
MS_LOG(INFO) << "Build kernel";
|
||||
BuildKernel(graph.get());
|
||||
MS_LOG(INFO) << "Assign kernel address";
|
||||
|
@ -100,7 +98,6 @@ void CPUSession::RunGraph(const GraphId &graph_id, const std::vector<tensor::Ten
|
|||
std::vector<tensor::TensorPtr> need_sync_outputs;
|
||||
runtime_.BindInputOutput(kernel_graph.get(), inputs, outputs, &need_sync_outputs);
|
||||
MS_LOG(INFO) << "Run graph start";
|
||||
predictmodel::StepConvertWeight(inputs);
|
||||
auto execution_order = kernel_graph->execution_order();
|
||||
Reorder(&execution_order);
|
||||
|
||||
|
|
|
@ -31,7 +31,6 @@
|
|||
#include "backend/optimizer/gpu/replace_momentum_cast_fusion.h"
|
||||
#include "backend/optimizer/gpu/replace_addn_fusion.h"
|
||||
#include "runtime/device/kernel_runtime_manager.h"
|
||||
#include "predict/predict.h"
|
||||
#include "common/utils.h"
|
||||
#include "common/trans.h"
|
||||
#include "utils/context/ms_context.h"
|
||||
|
@ -190,8 +189,6 @@ GraphId GPUSession::CompileGraph(const AnfNodePtrList &lst, const AnfNodePtrList
|
|||
// Assign parameter keys.
|
||||
AssignParamKey(graph);
|
||||
#endif
|
||||
// Convert kernel Graph to model
|
||||
predictmodel::StepConvertGraph(graph);
|
||||
// Start gpu kernel runtime
|
||||
StartKernelRT();
|
||||
// Dump .pb graph before hardware optimization
|
||||
|
@ -245,8 +242,6 @@ void GPUSession::RunGraph(const GraphId &graph_id, const std::vector<tensor::Ten
|
|||
}
|
||||
#endif
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
// Convert inputs to model
|
||||
predictmodel::StepConvertWeight(inputs);
|
||||
{
|
||||
py::gil_scoped_release gil_release;
|
||||
// Run graph on GPU
|
||||
|
|
|
@ -123,10 +123,6 @@ PYBIND11_MODULE(_c_expression, m) {
|
|||
"Set whether to enable reduce precision.")
|
||||
.def("get_save_graphs_path", &mindspore::MsContext::save_graphs_path, "Get save graphs path.")
|
||||
.def("set_save_graphs_path", &mindspore::MsContext::set_save_graphs_path, "Set save graphs path.")
|
||||
.def("get_save_ms_model_flag", &mindspore::MsContext::save_ms_model_flag, "Get whether to save ms model.")
|
||||
.def("set_save_ms_model_flag", &mindspore::MsContext::set_save_ms_model_flag, "Set whether to save ms model.")
|
||||
.def("get_save_ms_model_path", &mindspore::MsContext::save_ms_model_path, "Get path to save ms model.")
|
||||
.def("set_save_ms_model_path", &mindspore::MsContext::set_save_ms_model_path, "Set path to save ms model")
|
||||
.def("get_enable_dump", &mindspore::MsContext::enable_dump, "Get whether to enable dump.")
|
||||
.def("set_enable_dump", &mindspore::MsContext::set_enable_dump, "Set whether to enable dump.")
|
||||
.def("get_save_dump_path", &mindspore::MsContext::save_dump_path, "Get path to dump.")
|
||||
|
|
|
@ -1,14 +0,0 @@
|
|||
file(GLOB_RECURSE _PREDICT_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
"predict.cc"
|
||||
"generator/utils/ir_model_util.cc"
|
||||
"converter/*.cc"
|
||||
"converter/attr_utils/*.cc"
|
||||
"converter/lite_model/*.cc"
|
||||
"converter/lite_model/operations/*.cc"
|
||||
)
|
||||
|
||||
if (ENABLE_D)
|
||||
file(GLOB_RECURSE _D_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "generator/ir/*.cc")
|
||||
list(APPEND _PREDICT_SRC_LIST ${_D_SRC_LIST})
|
||||
endif ()
|
||||
add_library(_mindspore_predict_obj OBJECT ${_PREDICT_SRC_LIST})
|
|
@ -1,229 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "predict/converter/attr_utils/convert_util.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
namespace utils {
|
||||
TypePtr GetTypePtr(const AnfNodePtr &anf_node) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
TypePtr type_ptr = anf_node->Type();
|
||||
MS_EXCEPTION_IF_NULL(type_ptr);
|
||||
if (type_ptr->isa<TensorType>()) {
|
||||
auto tensor_ptr = type_ptr->cast<TensorTypePtr>();
|
||||
MS_EXCEPTION_IF_NULL(tensor_ptr);
|
||||
TypePtr elem = tensor_ptr->element();
|
||||
return elem;
|
||||
} else if (type_ptr->isa<Tuple>()) {
|
||||
auto tuple_ptr = type_ptr->cast<TuplePtr>();
|
||||
MS_EXCEPTION_IF_NULL(tuple_ptr);
|
||||
auto tuple_i = (*tuple_ptr)[0];
|
||||
MS_EXCEPTION_IF_NULL(tuple_i);
|
||||
if (tuple_i->isa<TensorType>()) {
|
||||
auto tensor_ptr = tuple_i->cast<TensorTypePtr>();
|
||||
MS_EXCEPTION_IF_NULL(tensor_ptr);
|
||||
TypePtr elem = tensor_ptr->element();
|
||||
MS_EXCEPTION_IF_NULL(elem);
|
||||
return elem;
|
||||
} else if (tuple_i->isa<Number>()) {
|
||||
return type_ptr;
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "unsupported type: " << type_ptr->ToString();
|
||||
}
|
||||
} else if (type_ptr->isa<Number>()) {
|
||||
return type_ptr;
|
||||
}
|
||||
std::string type_name = type_ptr->ToString();
|
||||
MS_LOG(EXCEPTION)
|
||||
<< "The output type of node should be a tensor type a number or a tuple of tensor type, but this is: "
|
||||
<< type_name;
|
||||
}
|
||||
|
||||
MsDataType GetMSDataType(TypeId ori_data_type) {
|
||||
MsDataType dst_data_type;
|
||||
switch (ori_data_type) {
|
||||
case kNumberTypeFloat16:
|
||||
dst_data_type = mindspore::predict::DataType_DT_FLOAT16;
|
||||
return dst_data_type;
|
||||
case kNumberTypeFloat32:
|
||||
dst_data_type = mindspore::predict::DataType_DT_FLOAT;
|
||||
return dst_data_type;
|
||||
case kNumberTypeInt8:
|
||||
dst_data_type = mindspore::predict::DataType_DT_INT8;
|
||||
return dst_data_type;
|
||||
case kNumberTypeInt32:
|
||||
dst_data_type = mindspore::predict::DataType_DT_INT32;
|
||||
return dst_data_type;
|
||||
case kNumberTypeUInt8:
|
||||
dst_data_type = mindspore::predict::DataType_DT_UINT8;
|
||||
return dst_data_type;
|
||||
case kNumberTypeUInt32:
|
||||
dst_data_type = mindspore::predict::DataType_DT_UINT32;
|
||||
return dst_data_type;
|
||||
case kTypeUnknown:
|
||||
dst_data_type = mindspore::predict::DataType_DT_UNDEFINED;
|
||||
return dst_data_type;
|
||||
default:
|
||||
MS_LOG(EXCEPTION) << "Ms don't support this DataType";
|
||||
}
|
||||
}
|
||||
|
||||
MsFormat GetMsFormat(const std::string &format_str) {
|
||||
if (format_str == kOpFormat_DEFAULT) {
|
||||
MsFormat ms_format = predict::Format_NCHW;
|
||||
return ms_format;
|
||||
} else {
|
||||
// all middle format default to NCHW
|
||||
return predict::Format_NCHW;
|
||||
}
|
||||
}
|
||||
|
||||
TensorPtr GetParaAscendTensor(const AnfNodePtr &anf_node) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
if (!anf_node->isa<Parameter>()) {
|
||||
return nullptr;
|
||||
}
|
||||
auto device_type_id = AnfAlgo::GetOutputDeviceDataType(anf_node, 0);
|
||||
// device type_ptr
|
||||
auto device_type_ptr = GetTypePtr(anf_node);
|
||||
// device shape
|
||||
auto shape = AnfAlgo::GetOutputDeviceShape(anf_node, 0);
|
||||
std::vector<int> tensor_shape;
|
||||
(void)std::transform(shape.begin(), shape.end(), std::back_inserter(tensor_shape), SizeToInt);
|
||||
// device format
|
||||
auto format = AnfAlgo::GetOutputFormat(anf_node, 0);
|
||||
// device tensor
|
||||
TensorPtr device_tensor = std::make_shared<tensor::Tensor>(device_type_id, tensor_shape);
|
||||
// device info
|
||||
device_tensor->SetDeviceInfo(format, device_type_ptr);
|
||||
return device_tensor;
|
||||
}
|
||||
|
||||
TensorPtr GetParaCpuTensor(const AnfNodePtr &anf_node) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
if (!(anf_node->isa<Parameter>())) {
|
||||
return nullptr;
|
||||
} else {
|
||||
auto ori_type_id = AnfAlgo::GetOutputInferDataType(anf_node, 0);
|
||||
auto ori_type_ptr = GetTypePtr(anf_node);
|
||||
auto ori_shape = AnfAlgo::GetOutputInferShape(anf_node, 0);
|
||||
std::vector<int> tensor_shape;
|
||||
(void)std::transform(ori_shape.begin(), ori_shape.end(), std::back_inserter(tensor_shape), SizeToInt);
|
||||
auto ori_format = AnfAlgo::GetOutputFormat(anf_node, 0);
|
||||
TensorPtr cpu_tensor = std::make_shared<tensor::Tensor>(ori_type_id, tensor_shape);
|
||||
cpu_tensor->SetDeviceInfo(ori_format, ori_type_ptr);
|
||||
return cpu_tensor;
|
||||
}
|
||||
}
|
||||
|
||||
TensorPtr GetValueTensor(const ValueNodePtr &const_node) {
|
||||
MS_EXCEPTION_IF_NULL(const_node);
|
||||
auto value_ptr = const_node->value();
|
||||
MS_EXCEPTION_IF_NULL(value_ptr);
|
||||
if (!value_ptr->isa<tensor::Tensor>()) {
|
||||
return nullptr;
|
||||
}
|
||||
TensorPtr tensor = value_ptr->cast<TensorPtr>();
|
||||
MS_EXCEPTION_IF_NULL(tensor);
|
||||
auto data_type = tensor->Dtype();
|
||||
MS_EXCEPTION_IF_NULL(data_type);
|
||||
auto type_id = data_type->type_id();
|
||||
auto shape = tensor->shape();
|
||||
TensorPtr tensor_constant = std::make_shared<tensor::Tensor>(type_id, shape);
|
||||
tensor_constant->SetDeviceInfo(tensor->device_info().format_, tensor->device_info().data_type_);
|
||||
return tensor_constant;
|
||||
}
|
||||
|
||||
TensorPtr GetKernelCpuTensor(const CNodePtr &c_node_ptr, size_t inx) {
|
||||
if (c_node_ptr == nullptr || inx >= AnfAlgo::GetOutputTensorNum(c_node_ptr)) {
|
||||
MS_LOG(ERROR) << "GetKernelCpuTensor failed";
|
||||
return nullptr;
|
||||
}
|
||||
auto ori_shape = AnfAlgo::GetOutputInferShape(c_node_ptr, inx);
|
||||
auto ori_type_id = AnfAlgo::GetOutputInferDataType(c_node_ptr, inx);
|
||||
std::vector<int> tensor_shape;
|
||||
(void)std::transform(ori_shape.begin(), ori_shape.end(), std::back_inserter(tensor_shape), SizeToInt);
|
||||
auto ori_output_type = GetTypePtr(c_node_ptr);
|
||||
TensorPtr device_tensor = std::make_shared<tensor::Tensor>(ori_type_id, tensor_shape);
|
||||
auto format = AnfAlgo::GetOutputFormat(c_node_ptr, inx);
|
||||
device_tensor->SetDeviceInfo(format, ori_output_type);
|
||||
return device_tensor;
|
||||
}
|
||||
|
||||
TensorPtr GetKernelAscendTensor(const CNodePtr &c_node_ptr, size_t inx) {
|
||||
if (c_node_ptr == nullptr || inx >= AnfAlgo::GetOutputTensorNum(c_node_ptr)) {
|
||||
MS_LOG(ERROR) << "GetKernelAscendTensor failed";
|
||||
return nullptr;
|
||||
}
|
||||
auto shape = AnfAlgo::GetOutputDeviceShape(c_node_ptr, inx);
|
||||
std::vector<int> tensor_shape;
|
||||
(void)std::transform(shape.begin(), shape.end(), std::back_inserter(tensor_shape), SizeToInt);
|
||||
auto format = AnfAlgo::GetOutputFormat(c_node_ptr, inx);
|
||||
auto type_id = AnfAlgo::GetOutputDeviceDataType(c_node_ptr, inx);
|
||||
auto output_type_ptr = GetTypePtr(c_node_ptr);
|
||||
TensorPtr device_tensor = std::make_shared<tensor::Tensor>(type_id, tensor_shape);
|
||||
device_tensor->SetDeviceInfo(format, output_type_ptr);
|
||||
return device_tensor;
|
||||
}
|
||||
|
||||
TensorPtr GetOutputTensor(const AnfNodePtr &out_node, size_t inx) {
|
||||
MS_EXCEPTION_IF_NULL(out_node);
|
||||
auto shape = AnfAlgo::GetOutputInferShape(out_node, inx);
|
||||
std::vector<int> tensor_shape;
|
||||
(void)std::transform(shape.begin(), shape.end(), std::back_inserter(tensor_shape), SizeToInt);
|
||||
auto type_id = AnfAlgo::GetOutputInferDataType(out_node, inx);
|
||||
auto output_type_ptr = GetTypePtr(out_node);
|
||||
auto format = AnfAlgo::GetOutputFormat(out_node, inx);
|
||||
TensorPtr output_tensor = std::make_shared<tensor::Tensor>(type_id, tensor_shape);
|
||||
output_tensor->SetDeviceInfo(format, output_type_ptr);
|
||||
return output_tensor;
|
||||
}
|
||||
|
||||
bool FindNodeInMap(const std::unordered_map<MsKernelKey, int> &node_map, const AnfNodePtr &node) {
|
||||
return std::any_of(node_map.begin(), node_map.end(),
|
||||
[node](const std::pair<MsKernelKey, int> &kernel_key) { return kernel_key.first == node.get(); });
|
||||
}
|
||||
|
||||
bool SaveDeviceModelUtil(const std::shared_ptr<GraphDefT> &new_ms_graph_ptr, const std::string &save_path_name,
|
||||
SubGraphDefT *sub_graph) {
|
||||
MS_EXCEPTION_IF_NULL(new_ms_graph_ptr);
|
||||
MS_EXCEPTION_IF_NULL(sub_graph);
|
||||
// save mindspore schema to file
|
||||
new_ms_graph_ptr->name = "default_graph";
|
||||
std::unique_ptr<mindspore::predict::SubGraphDefT> sub_graph_ptr(sub_graph);
|
||||
new_ms_graph_ptr->subgraphs.emplace_back(std::move(sub_graph_ptr));
|
||||
// get flatbuffer builder
|
||||
flatbuffers::FlatBufferBuilder builder(1024);
|
||||
auto offset = mindspore::predict::GraphDef::Pack(builder, new_ms_graph_ptr.get());
|
||||
builder.Finish(offset);
|
||||
auto size = builder.GetSize();
|
||||
if (size == 0) {
|
||||
MS_LOG(ERROR) << "builder has no size";
|
||||
return false;
|
||||
}
|
||||
auto content = builder.GetBufferPointer();
|
||||
std::ofstream output(save_path_name);
|
||||
if (!output.is_open()) {
|
||||
MS_LOG(EXCEPTION) << "mindspore.mindspoire output failed";
|
||||
}
|
||||
(void)output.write((const char *)content, size);
|
||||
output.close();
|
||||
return true;
|
||||
}
|
||||
} // namespace utils
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
|
@ -1,60 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_PREDICT_CONVERTER_ATTR_UTILS_CONVERT_UTIL_H_
|
||||
#define MINDSPORE_CCSRC_PREDICT_CONVERTER_ATTR_UTILS_CONVERT_UTIL_H_
|
||||
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
#include <string>
|
||||
#include <fstream>
|
||||
#include "ir/tensor.h"
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
#include "predict/schema/inner/ms_generated.h"
|
||||
|
||||
using TensorPtr = mindspore::tensor::TensorPtr;
|
||||
using TensorPtrList = std::vector<mindspore::tensor::TensorPtr>;
|
||||
using AllOutputTensors = std::unordered_map<int, TensorPtrList>;
|
||||
using OpDefT = mindspore::predict::OpDefT;
|
||||
using GraphDefT = mindspore::predict::GraphDefT;
|
||||
using TensorDefT = mindspore::predict::TensorDefT;
|
||||
using SubGraphDefT = mindspore::predict::SubGraphDefT;
|
||||
using SubGraphPtr = std::unique_ptr<mindspore::predict::SubGraphDefT>;
|
||||
using MsDataType = mindspore::predict::DataType;
|
||||
using MsFormat = mindspore::predict::Format;
|
||||
using MsKernelKey = void *;
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
namespace utils {
|
||||
TypePtr GetTypePtr(const AnfNodePtr &anf_node);
|
||||
MsDataType GetMSDataType(TypeId ori_data_type);
|
||||
MsFormat GetMsFormat(const std::string &format_str);
|
||||
TensorPtr GetParaAscendTensor(const AnfNodePtr &anf_node);
|
||||
TensorPtr GetParaCpuTensor(const AnfNodePtr &anf_node);
|
||||
TensorPtr GetValueTensor(const ValueNodePtr &const_node);
|
||||
TensorPtr GetKernelCpuTensor(const CNodePtr &c_node_ptr, size_t inx);
|
||||
TensorPtr GetKernelAscendTensor(const CNodePtr &c_node_ptr, size_t inx);
|
||||
TensorPtr GetOutputTensor(const AnfNodePtr &out_node, size_t inx);
|
||||
bool FindNodeInMap(const std::unordered_map<MsKernelKey, int> &Nodemap, const AnfNodePtr &node);
|
||||
bool SaveDeviceModelUtil(const std::shared_ptr<GraphDefT> &new_ms_graph_ptr, const std::string &save_path_name,
|
||||
SubGraphDefT *sub_graph_def_t);
|
||||
} // namespace utils
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_PREDICT_CONVERTER_ATTR_UTILS_CONVERT_UTIL_H_
|
|
@ -1,65 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_PREDICT_CONVERTER_CPU_ATTR_UTILS_OP_ATTR_TYPE_H_
|
||||
#define MINDSPORE_CCSRC_PREDICT_CONVERTER_CPU_ATTR_UTILS_OP_ATTR_TYPE_H_
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
namespace convert {
|
||||
typedef enum CpuOpType {
|
||||
CPU_OP_PAD = 0,
|
||||
CPU_OP_MAXIMUM,
|
||||
CPU_OP_CONCAT,
|
||||
CPU_OP_SOFTMAX,
|
||||
CPU_OP_ACTIVATION,
|
||||
CPU_OP_CONV2D,
|
||||
CPU_OP_FUSEDBATCHNORM,
|
||||
CPU_OP_CAFFEBATCHNORM,
|
||||
CPU_OP_SQUEEZE,
|
||||
CPU_OP_BIASADD,
|
||||
CPU_OP_POOLING,
|
||||
CPU_OP_DEPTHWISECONV2D,
|
||||
CPU_OP_DEDEPTHWISECONV2D,
|
||||
CPU_OP_RESIZE,
|
||||
CPU_OP_DETECTIONPOSTPROCESS,
|
||||
CPU_OP_FULLCONNECTION,
|
||||
CPU_OP_MEAN,
|
||||
CPU_OP_DECONV2D,
|
||||
CPU_OP_SCALE,
|
||||
CPU_OP_ELTWISE,
|
||||
CPU_OP_ADD,
|
||||
CPU_OP_SLICE,
|
||||
CPU_OP_MUL,
|
||||
CPU_OP_EXP,
|
||||
CPU_OP_RESHAPE,
|
||||
CPU_OP_POWER,
|
||||
CPU_OP_ARGMAX,
|
||||
CPU_OP_ARGMAX_NETOUTPUT,
|
||||
CPU_OP_MATMUL,
|
||||
CPU_OP_CAFFEPRELU,
|
||||
CPU_OP_STRIDEDSLICE,
|
||||
CPU_OP_STACK,
|
||||
CPU_OP_RANGE,
|
||||
CPU_OP_EXPANDDIMS,
|
||||
CPU_OP_TILE,
|
||||
CPU_OP_CAST,
|
||||
CPU_OP_CAFFECROP,
|
||||
CPU_OP_PRESERVEED = 37
|
||||
} CpuOpType_t;
|
||||
} // namespace convert
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_PREDICT_CONVERTER_CPU_ATTR_UTILS_OP_ATTR_TYPE_H_
|
|
@ -1,49 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "predict/converter/executor_tensor.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace executor {
|
||||
int TensorCache::addExTensor(int tensor_key, const TensorPtr &tensor, int refCount, const std::vector<int> &host_shape,
|
||||
ExTensorType stable, bool inc) {
|
||||
MS_EXCEPTION_IF_NULL(tensor);
|
||||
TensorPtr tmp_tensor = tensor;
|
||||
ExTensorPtr ex_tensor_ptr =
|
||||
std::make_shared<ExTensor>(tensor_key, tmp_tensor, refCount, nodeIndex, host_shape, stable);
|
||||
int pre_index = ex_tensor_ptr->index_;
|
||||
if (inc) {
|
||||
nodeIndex++;
|
||||
}
|
||||
// no need to judge,just add to map directly
|
||||
tensors[tensor_key].push_back(ex_tensor_ptr);
|
||||
return pre_index;
|
||||
}
|
||||
|
||||
std::vector<ExTensorPtr> TensorCache::findTensor(int key) {
|
||||
std::vector<ExTensorPtr> ex_tensors;
|
||||
auto iter = tensors.find(key);
|
||||
if (iter != tensors.end()) {
|
||||
return iter->second;
|
||||
} else {
|
||||
MS_LOG(INFO) << "can not find any tensorlist";
|
||||
return ex_tensors;
|
||||
}
|
||||
}
|
||||
|
||||
void TensorCache::deleteTensor(int key) { (void)tensors.erase(key); }
|
||||
} // namespace executor
|
||||
} // namespace mindspore
|
|
@ -1,70 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_PREDICT_CONVERTER_EXECUTOR_TENSOR_H_
|
||||
#define MINDSPORE_CCSRC_PREDICT_CONVERTER_EXECUTOR_TENSOR_H_
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include "ir/tensor.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace executor {
|
||||
using TensorPtr = tensor::TensorPtr;
|
||||
static constexpr int MS_MAX_REFCOUNT = 999;
|
||||
enum ExTensorType { INPUTDATA, WEIGHTS, CONSTANT, KERNEL, OUTPUT };
|
||||
class ExTensor {
|
||||
public:
|
||||
int key_;
|
||||
TensorPtr device_tensor_ptr_;
|
||||
int ref_count_;
|
||||
int index_;
|
||||
std::vector<int> host_shape_;
|
||||
ExTensorType stable_;
|
||||
ExTensor(int key, TensorPtr tensor_ptr, int ref_count, int index, std::vector<int> host_shape,
|
||||
ExTensorType ex_tensor_type)
|
||||
: key_(key),
|
||||
device_tensor_ptr_(std::move(tensor_ptr)),
|
||||
ref_count_(ref_count),
|
||||
index_(index),
|
||||
host_shape_(std::move(host_shape)),
|
||||
stable_(ex_tensor_type) {}
|
||||
~ExTensor() { host_shape_.clear(); }
|
||||
};
|
||||
using ExTensorPtr = std::shared_ptr<ExTensor>;
|
||||
class TensorCache {
|
||||
public:
|
||||
TensorCache() = default;
|
||||
|
||||
~TensorCache() { tensors.clear(); }
|
||||
|
||||
int addExTensor(int tensor_key, const TensorPtr &tensor, int refCount, const std::vector<int> &host_shape,
|
||||
ExTensorType stable, bool inc = true);
|
||||
// just adjust for dynamic tensor
|
||||
std::vector<ExTensorPtr> findTensor(int key);
|
||||
void deleteTensor(int key);
|
||||
const std::unordered_map<int, std::vector<ExTensorPtr>> &GetCachedTensor() const { return tensors; }
|
||||
|
||||
private:
|
||||
std::unordered_map<int, std::vector<ExTensorPtr>> tensors;
|
||||
int nodeIndex = 0;
|
||||
};
|
||||
using TensorCachePtr = std::shared_ptr<TensorCache>;
|
||||
} // namespace executor
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_PREDICT_CONVERTER_EXECUTOR_TENSOR_H_
|
|
@ -1,561 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "predict/converter/kernel2ms.h"
|
||||
#include <algorithm>
|
||||
#include "ir/anf.h"
|
||||
#include "predict/converter/lite_model/op_attr_packer.h"
|
||||
#include "mindspore/ccsrc/frontend/operator/ops.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace executor {
|
||||
Kernel2Ms &Kernel2Ms::GetInstance() {
|
||||
static Kernel2Ms instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
bool Kernel2Ms::SetMemResue() const {
|
||||
MS_LOG(INFO) << "MemResue start";
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Kernel2Ms::SetAllTensors(const TensorCachePtr &tensor_cache, SubGraphDefT *ms_graph) {
|
||||
if (tensor_cache == nullptr || ms_graph == nullptr) {
|
||||
return false;
|
||||
}
|
||||
const std::unordered_map<int, std::vector<ExTensorPtr>> &cachedTensors = tensor_cache->GetCachedTensor();
|
||||
size_t total_size = 0;
|
||||
if (cachedTensors.empty()) {
|
||||
return false;
|
||||
}
|
||||
for (auto &iter : cachedTensors) {
|
||||
auto ex_tensors = iter.second;
|
||||
total_size += ex_tensors.size();
|
||||
}
|
||||
ms_graph->allTensors.resize(total_size);
|
||||
for (auto &iter : cachedTensors) {
|
||||
for (auto &ex_tensor : iter.second) {
|
||||
std::unique_ptr<TensorDefT> ms_tensor(new TensorDefT());
|
||||
auto device_tensor_tmp = ex_tensor->device_tensor_ptr_;
|
||||
auto device_d_type = device_tensor_tmp->data_type();
|
||||
ms_tensor->dataType = predict::utils::GetMSDataType(device_d_type);
|
||||
auto device_shape = device_tensor_tmp->shape();
|
||||
ms_tensor->dims.clear();
|
||||
if (device_shape.empty()) {
|
||||
ms_tensor->dims.push_back(1);
|
||||
} else {
|
||||
ms_tensor->dims.assign(device_shape.begin(), device_shape.end());
|
||||
}
|
||||
std::string format_str = device_tensor_tmp->device_info().format_;
|
||||
ms_tensor->format = predict::utils::GetMsFormat(format_str);
|
||||
ms_tensor->offset = 0;
|
||||
auto stable = ex_tensor->stable_;
|
||||
if (stable == INPUTDATA || stable == CONSTANT || stable == WEIGHTS) {
|
||||
ms_tensor->refCount = MS_MAX_REFCOUNT;
|
||||
} else {
|
||||
ms_tensor->refCount = 0;
|
||||
}
|
||||
ms_graph->allTensors[IntToSize(ex_tensor->index_)] = std::move(ms_tensor);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Kernel2Ms::SetGraphOutputIdx(const KernelGraphPtr &kernel_graph_ptr, const TensorCachePtr &tensor_cache,
|
||||
SubGraphDefT *ms_graph, AllOutputTensors *all_output_tensors) {
|
||||
MS_EXCEPTION_IF_NULL(tensor_cache);
|
||||
MS_EXCEPTION_IF_NULL(ms_graph);
|
||||
MS_EXCEPTION_IF_NULL(all_output_tensors);
|
||||
auto out_nodes = kernel_graph_ptr->outputs();
|
||||
if (out_nodes.empty()) {
|
||||
return false;
|
||||
}
|
||||
// maybe need to judge out_nodes is real && output must be CNode
|
||||
for (size_t i = 0; i < out_nodes.size(); ++i) {
|
||||
std::vector<AnfNodePtr> real_inputs_link;
|
||||
std::vector<size_t> real_output_idx_link;
|
||||
GetRealInpoutsPtr(out_nodes[i], &real_inputs_link, &real_output_idx_link);
|
||||
if (real_inputs_link.empty()) {
|
||||
MS_LOG(INFO) << "this graph output node is vitural node, has no real input";
|
||||
continue;
|
||||
}
|
||||
for (size_t k = 0; k < real_inputs_link.size(); ++k) {
|
||||
int key = node_indexs_[out_nodes[i].get()];
|
||||
auto ex_tensor_list = tensor_cache->findTensor(key);
|
||||
if (ex_tensor_list.empty()) {
|
||||
MS_LOG(INFO) << "SetGraphOutputIdx do not add Extensor ";
|
||||
continue;
|
||||
}
|
||||
auto ex_tensor = ex_tensor_list[real_output_idx_link[k]];
|
||||
ex_tensor_list.clear();
|
||||
ms_graph->outputIndex.push_back(ex_tensor->index_);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Kernel2Ms::SetOpOutputIdx(const CNodePtr &c_node_ptr, const TensorPtr &output_tensor,
|
||||
const TensorCachePtr &tensor_cache, int ref_count, size_t order_index, OpDefT *ms_node) {
|
||||
MS_EXCEPTION_IF_NULL(c_node_ptr);
|
||||
MS_EXCEPTION_IF_NULL(output_tensor);
|
||||
MS_EXCEPTION_IF_NULL(ms_node);
|
||||
MS_EXCEPTION_IF_NULL(tensor_cache);
|
||||
if (!predict::utils::FindNodeInMap(node_indexs_, c_node_ptr)) {
|
||||
MS_LOG(ERROR) << "can not find any pk_key in inited node_indexs map";
|
||||
return false;
|
||||
}
|
||||
int tensor_key = node_indexs_[c_node_ptr.get()];
|
||||
auto host_shape = AnfAlgo::GetOutputInferShape(c_node_ptr, order_index);
|
||||
std::vector<int> tensor_shape;
|
||||
(void)std::transform(host_shape.begin(), host_shape.end(), std::back_inserter(tensor_shape), SizeToInt);
|
||||
int outputIndex = tensor_cache->addExTensor(tensor_key, output_tensor, ref_count, tensor_shape, KERNEL);
|
||||
ms_node->outputIndex.push_back(outputIndex);
|
||||
return true;
|
||||
}
|
||||
|
||||
void Kernel2Ms::GetRealInpoutsPtr(const AnfNodePtr &node, std::vector<AnfNodePtr> *real_inputs,
|
||||
std::vector<size_t> *real_output_idx) {
|
||||
MS_EXCEPTION_IF_NULL(real_inputs);
|
||||
MS_EXCEPTION_IF_NULL(real_output_idx);
|
||||
size_t default_idx = 0;
|
||||
if (node->isa<CNode>()) {
|
||||
auto c_node = node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(c_node);
|
||||
std::string c_node_name = GetCNodeFuncName(c_node);
|
||||
if (c_node_name == prim::kPrimTupleGetItem->name()) {
|
||||
auto v_node = c_node->inputs()[kTupleGetItemIndex]->cast<ValueNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(v_node);
|
||||
default_idx = IntToSize(GetValue<int>(v_node->value()));
|
||||
real_inputs->push_back(c_node->inputs()[1]);
|
||||
real_output_idx->push_back(default_idx);
|
||||
return;
|
||||
} else if (c_node_name == prim::kPrimDepend->name()) {
|
||||
GetRealInpoutsPtr(c_node->inputs()[1], real_inputs, real_output_idx);
|
||||
return;
|
||||
} else if (c_node_name == prim::kPrimMakeTuple->name()) {
|
||||
for (auto &in : c_node->inputs()) {
|
||||
GetRealInpoutsPtr(in, real_inputs, real_output_idx);
|
||||
}
|
||||
return;
|
||||
} else {
|
||||
real_inputs->push_back(node);
|
||||
real_output_idx->push_back(default_idx);
|
||||
}
|
||||
} else if (node->isa<Parameter>()) {
|
||||
real_inputs->push_back(node);
|
||||
real_output_idx->push_back(default_idx);
|
||||
} else if (node->isa<ValueNode>()) {
|
||||
real_inputs->push_back(node);
|
||||
real_output_idx->push_back(default_idx);
|
||||
}
|
||||
}
|
||||
|
||||
bool Kernel2Ms::SetOpInputIdx(const CNodePtr &c_node_ptr, const TensorCachePtr &tensor_cache, OpDefT *ms_node) {
|
||||
MS_EXCEPTION_IF_NULL(c_node_ptr);
|
||||
MS_EXCEPTION_IF_NULL(tensor_cache);
|
||||
MS_EXCEPTION_IF_NULL(ms_node);
|
||||
for (size_t i = 1; i < c_node_ptr->inputs().size(); ++i) {
|
||||
std::vector<AnfNodePtr> real_inputs;
|
||||
std::vector<size_t> real_output_idx;
|
||||
GetRealInpoutsPtr(c_node_ptr->inputs()[i], &real_inputs, &real_output_idx);
|
||||
if (real_inputs.empty()) {
|
||||
MS_LOG(INFO) << "kernel has no inputs: " << c_node_ptr.get() << " input size[%lu]" << c_node_ptr->inputs().size();
|
||||
continue;
|
||||
}
|
||||
for (size_t j = 0; j < real_inputs.size(); ++j) {
|
||||
int key = node_indexs_[real_inputs[j].get()];
|
||||
std::vector<ExTensorPtr> ex_tensor_list = tensor_cache->findTensor(key);
|
||||
if (ex_tensor_list.empty()) {
|
||||
continue;
|
||||
}
|
||||
ExTensorPtr ex_tensor_ptr = ex_tensor_list[real_output_idx[j]];
|
||||
ex_tensor_list.clear();
|
||||
ms_node->inputIndex.push_back(ex_tensor_ptr->index_);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void Kernel2Ms::TransformGraphIndx() {
|
||||
// transform index && anfnodeptr
|
||||
if (node_indexs_.empty()) {
|
||||
MS_LOG(EXCEPTION) << "node_indexs_ not ininted";
|
||||
}
|
||||
for (auto &item : node_indexs_) {
|
||||
index_nodes_[item.second] = item.first;
|
||||
}
|
||||
}
|
||||
|
||||
bool Kernel2Ms::InitGraphInputsIndx(const KernelGraphPtr &kernel_graph_ptr) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
|
||||
auto input_nodes = kernel_graph_ptr->inputs();
|
||||
if (input_nodes.empty()) {
|
||||
return false;
|
||||
}
|
||||
for (const auto &input_node : input_nodes) {
|
||||
if (input_node->isa<Parameter>()) {
|
||||
if (!predict::utils::FindNodeInMap(node_indexs_, input_node)) {
|
||||
// init every parameter node
|
||||
node_indexs_[input_node.get()] = graph_index_;
|
||||
graph_index_++;
|
||||
}
|
||||
} else {
|
||||
MS_LOG(INFO) << "This node is anfnode, no need to handle, continue. node info: " << input_node->ToString();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
MS_LOG(DEBUG) << "inputs GraphIndex: " << graph_index_;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Kernel2Ms::InitGraphValueNodesIndx(const KernelGraphPtr &kernel_graph_ptr) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
|
||||
if (kernel_graph_ptr->value_nodes().empty()) {
|
||||
return false;
|
||||
}
|
||||
for (auto &item : kernel_graph_ptr->value_nodes()) {
|
||||
if (item.first->isa<ValueNode>()) {
|
||||
auto value_node = item.first->cast<ValueNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(value_node);
|
||||
if (value_node == nullptr) {
|
||||
MS_LOG(WARNING) << "value_node is nullptr";
|
||||
return false;
|
||||
}
|
||||
if (value_node->value() == nullptr) {
|
||||
MS_LOG(ERROR) << "Constant value is null.";
|
||||
return false;
|
||||
}
|
||||
if (!value_node->value()->isa<tensor::Tensor>()) {
|
||||
continue;
|
||||
}
|
||||
if (!predict::utils::FindNodeInMap(node_indexs_, item.first)) {
|
||||
// init node
|
||||
auto node_ptr = item.first;
|
||||
node_indexs_[node_ptr.get()] = graph_index_;
|
||||
graph_index_++;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Kernel2Ms::InitGraphOpsIndx(const KernelGraphPtr &kernel_graph_ptr) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
|
||||
auto kernels = kernel_graph_ptr->execution_order();
|
||||
if (kernels.empty()) {
|
||||
MS_LOG(WARNING) << "this graph has no kernel";
|
||||
return false;
|
||||
}
|
||||
for (size_t i = 0; i < kernels.size(); ++i) {
|
||||
// for each kernel's inputs foreach real_input
|
||||
if (kernels[i]->isa<CNode>()) {
|
||||
if (!predict::utils::FindNodeInMap(node_indexs_, kernels[i])) {
|
||||
// init node
|
||||
node_indexs_[kernels[i].get()] = graph_index_;
|
||||
graph_index_++;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Kernel2Ms::InitGraphOutputsIndx(const KernelGraphPtr &kernel_graph_ptr) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
|
||||
// graph output && their inputs should link together
|
||||
auto out_nodes = kernel_graph_ptr->outputs();
|
||||
if (out_nodes.empty()) {
|
||||
MS_LOG(ERROR) << "this graph has no outputs";
|
||||
return false;
|
||||
}
|
||||
for (auto &item : out_nodes) {
|
||||
if (!predict::utils::FindNodeInMap(node_indexs_, item)) {
|
||||
node_indexs_[item.get()] = graph_index_;
|
||||
graph_index_++;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Kernel2Ms::InitGraphIndx(const KernelGraphPtr &kernel_graph_ptr) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
|
||||
// only parameter
|
||||
if (!InitGraphInputsIndx(kernel_graph_ptr)) {
|
||||
return false;
|
||||
}
|
||||
// init value node
|
||||
if (!InitGraphValueNodesIndx(kernel_graph_ptr)) {
|
||||
return false;
|
||||
}
|
||||
// init op
|
||||
if (!InitGraphOpsIndx(kernel_graph_ptr)) {
|
||||
return false;
|
||||
}
|
||||
// init Graphoutput attention: out_put nodes have inputs
|
||||
return InitGraphOutputsIndx(kernel_graph_ptr);
|
||||
}
|
||||
|
||||
bool Kernel2Ms::SetGraphInputTensors(const KernelGraphPtr &kernel_graph_ptr, const TensorCachePtr &tensor_cache,
|
||||
SubGraphDefT *ms_graph) {
|
||||
MS_EXCEPTION_IF_NULL(tensor_cache);
|
||||
MS_EXCEPTION_IF_NULL(ms_graph);
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
|
||||
if (convert_mode_ == kConvertUnused) {
|
||||
return false;
|
||||
}
|
||||
if (kernel_graph_ptr->inputs().empty()) {
|
||||
return false;
|
||||
}
|
||||
for (const auto &input_node : kernel_graph_ptr->inputs()) {
|
||||
if (input_node->isa<Parameter>()) {
|
||||
ParameterPtr pk_node = std::dynamic_pointer_cast<Parameter>(input_node);
|
||||
TensorPtr device_tensor;
|
||||
if (convert_mode_ == kConvertCpuMode) {
|
||||
device_tensor = predict::utils::GetParaCpuTensor(input_node);
|
||||
} else {
|
||||
device_tensor = predict::utils::GetParaAscendTensor(input_node);
|
||||
}
|
||||
if (device_tensor == nullptr) {
|
||||
return false;
|
||||
}
|
||||
ExTensorType node_type;
|
||||
if (AnfAlgo::IsParameterWeight(pk_node)) {
|
||||
node_type = WEIGHTS;
|
||||
} else {
|
||||
node_type = INPUTDATA;
|
||||
}
|
||||
if (!predict::utils::FindNodeInMap(node_indexs_, input_node)) {
|
||||
MS_LOG(WARNING) << "can not find any pk_key in inited node_indexs map";
|
||||
return false;
|
||||
}
|
||||
auto pk_key = node_indexs_[input_node.get()];
|
||||
all_output_tensors_[pk_key].push_back(device_tensor);
|
||||
int nodeRefCount = SizeToInt(AnfAlgo::GetOutputTensorNum(input_node));
|
||||
int nodeInputIdx =
|
||||
tensor_cache->addExTensor(pk_key, device_tensor, nodeRefCount, device_tensor->shape(), node_type);
|
||||
if (!AnfAlgo::IsParameterWeight(pk_node)) {
|
||||
ms_graph->inputIndex.push_back(nodeInputIdx);
|
||||
all_input_idxs_.push_back(nodeInputIdx);
|
||||
} else {
|
||||
input_weight_idxs_.push_back(nodeInputIdx);
|
||||
all_input_idxs_.push_back(nodeInputIdx);
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Kernel2Ms::SetGraphValueTensors(const KernelGraphPtr &kernel_graph_ptr, const TensorCachePtr &tensor_cache) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
|
||||
MS_EXCEPTION_IF_NULL(tensor_cache);
|
||||
for (auto &item : kernel_graph_ptr->value_nodes()) {
|
||||
if (item.first->isa<ValueNode>()) {
|
||||
auto const_node = item.first->cast<ValueNodePtr>();
|
||||
auto tensor_constant = predict::utils::GetValueTensor(const_node);
|
||||
if (tensor_constant == nullptr) {
|
||||
continue;
|
||||
}
|
||||
if (!predict::utils::FindNodeInMap(node_indexs_, item.first)) {
|
||||
MS_LOG(WARNING) << "can not find any pk_key in inited node_indexs map";
|
||||
return false;
|
||||
}
|
||||
int constant_key = node_indexs_[(item.first).get()];
|
||||
all_output_tensors_[constant_key].push_back(tensor_constant);
|
||||
auto shape = tensor_constant->shape();
|
||||
(void)tensor_cache->addExTensor(constant_key, tensor_constant, 0, shape, CONSTANT);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Kernel2Ms::SetGraphOpTensors(const KernelGraphPtr &kernel_graph_ptr, const TensorCachePtr &tensor_cache,
|
||||
SubGraphDefT *ms_graph) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
|
||||
MS_EXCEPTION_IF_NULL(tensor_cache);
|
||||
MS_EXCEPTION_IF_NULL(ms_graph);
|
||||
auto kernels = kernel_graph_ptr->execution_order();
|
||||
if (kernels.empty()) {
|
||||
MS_LOG(ERROR) << "this graph has no kernels";
|
||||
return false;
|
||||
}
|
||||
for (auto &kernel : kernels) {
|
||||
if (!predict::utils::FindNodeInMap(node_indexs_, kernel)) {
|
||||
MS_LOG(ERROR) << "can not find any pk_key in inited node_indexs map";
|
||||
return false;
|
||||
}
|
||||
auto kernel_key = node_indexs_[kernel.get()];
|
||||
std::unique_ptr<OpDefT> ms_node(new OpDefT);
|
||||
ms_node->name = kernel->fullname_with_scope();
|
||||
ms_node->fmkType = mindspore::predict::FmkType_CAFFE;
|
||||
auto c_name = AnfAlgo::GetCNodeName(kernel);
|
||||
auto fun = predict::convert::OpAttrFactory::GetInstance()->GetPackFun(c_name);
|
||||
if (fun == nullptr) {
|
||||
MS_LOG(WARNING) << "get node [" << kernel->fullname_with_scope() << "] attr failed.";
|
||||
} else if (!fun(kernel, ms_node.get())) {
|
||||
MS_LOG(ERROR) << "set node [" << kernel->fullname_with_scope() << "] attr failed.";
|
||||
return false;
|
||||
}
|
||||
auto output_size = AnfAlgo::GetOutputTensorNum(kernel);
|
||||
int nodeRefCount = SizeToInt(output_size);
|
||||
for (size_t j = 0; j < output_size; ++j) {
|
||||
TensorPtr device_tensor;
|
||||
if (convert_mode_ == kConvertCpuMode) {
|
||||
device_tensor = predict::utils::GetKernelCpuTensor(kernel, j);
|
||||
} else if (convert_mode_ == kConvertAscendMode) {
|
||||
device_tensor = predict::utils::GetKernelAscendTensor(kernel, j);
|
||||
}
|
||||
if (device_tensor == nullptr) {
|
||||
return false;
|
||||
}
|
||||
all_output_tensors_[kernel_key].push_back(device_tensor);
|
||||
if (!SetOpOutputIdx(kernel, device_tensor, tensor_cache, nodeRefCount, j, ms_node.get())) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
tmp_op_nodes_.emplace_back(ms_node.release());
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Kernel2Ms::KernelGraph2MsGraph(const KernelGraphPtr &kernel_graph_ptr) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
|
||||
graph_index_ = 0;
|
||||
all_output_tensors_.clear();
|
||||
node_indexs_.clear();
|
||||
index_nodes_.clear();
|
||||
std::unique_ptr<SubGraphDefT> sub_ms_graph(new SubGraphDefT());
|
||||
if (!InitGraphIndx(kernel_graph_ptr)) {
|
||||
return false;
|
||||
}
|
||||
TransformGraphIndx();
|
||||
tensor_cache_ptr_ = std::make_shared<TensorCache>();
|
||||
// foreach node to init it's real output tensor
|
||||
if (!SetGraphInputTensors(kernel_graph_ptr, tensor_cache_ptr_, sub_ms_graph.get())) {
|
||||
return false;
|
||||
}
|
||||
// Get KernelGraph value node
|
||||
if (!SetGraphValueTensors(kernel_graph_ptr, tensor_cache_ptr_)) {
|
||||
return false;
|
||||
}
|
||||
// Get KernelGraph apply_kernel && add opNode
|
||||
if (!SetGraphOpTensors(kernel_graph_ptr, tensor_cache_ptr_, sub_ms_graph.get())) {
|
||||
return false;
|
||||
}
|
||||
// Get KernelGraph outputs
|
||||
if (!SetGraphOutputIdx(kernel_graph_ptr, tensor_cache_ptr_, sub_ms_graph.get(), &all_output_tensors_)) {
|
||||
return false;
|
||||
}
|
||||
auto kernels = kernel_graph_ptr->execution_order();
|
||||
for (size_t i = 0; i < kernels.size(); ++i) {
|
||||
auto ms_node = tmp_op_nodes_[i];
|
||||
if (!SetOpInputIdx(kernels[i], tensor_cache_ptr_, ms_node)) {
|
||||
return false;
|
||||
}
|
||||
std::unique_ptr<OpDefT> ms_node_tmp(ms_node);
|
||||
sub_ms_graph->nodes.emplace_back(std::move(ms_node_tmp));
|
||||
}
|
||||
if (!SetAllTensors(tensor_cache_ptr_, sub_ms_graph.get())) {
|
||||
return false;
|
||||
}
|
||||
if (!SetMemResue()) {
|
||||
return false;
|
||||
}
|
||||
sub_ms_graph_ = std::move(sub_ms_graph);
|
||||
sub_ms_graph_->name = "default_sub_graph";
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Kernel2Ms::CheckInputSizes(const std::vector<TensorPtr> &input_tensors,
|
||||
const std::vector<uint32_t> &all_input_idxs) {
|
||||
if (input_tensors.size() != all_input_idxs.size()) {
|
||||
MS_LOG(EXCEPTION) << "real input tensors size:" << input_tensors.size()
|
||||
<< "not equal converted tesnors size:" << all_input_idxs.size() << "the graph has changed";
|
||||
}
|
||||
for (auto in : all_input_idxs) {
|
||||
if (in < sub_ms_graph_->allTensors.size()) {
|
||||
auto real_tensor = input_tensors[in];
|
||||
auto convert_dims = sub_ms_graph_->allTensors[in]->dims;
|
||||
auto real_dims = real_tensor->shape();
|
||||
if (real_dims.size() != convert_dims.size()) {
|
||||
return false;
|
||||
} else {
|
||||
for (size_t i = 0; i < convert_dims.size(); ++i) {
|
||||
if (convert_dims[i] != real_dims[i]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "index: " << in << "in all_input_idxs is valid";
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void Kernel2Ms::ReleaseContextRes() {
|
||||
tmp_op_nodes_.clear();
|
||||
node_indexs_.clear();
|
||||
index_nodes_.clear();
|
||||
tensor_cache_ptr_ = nullptr;
|
||||
all_output_tensors_.clear();
|
||||
}
|
||||
|
||||
bool Kernel2Ms::KernelInput2MS(const std::vector<TensorPtr> &input_tensors) {
|
||||
const std::unordered_map<int, std::vector<ExTensorPtr>> &cache_tensors = tensor_cache_ptr_->GetCachedTensor();
|
||||
if (cache_tensors.empty()) {
|
||||
return false;
|
||||
}
|
||||
auto all_weights_idxs = GetAllInputWeightIdxs();
|
||||
auto all_input_idxs = GetAllInputIdxs();
|
||||
auto real_input_size = input_tensors.size();
|
||||
// check tensor size
|
||||
bool ret = CheckInputSizes(input_tensors, all_input_idxs);
|
||||
std::vector<uint32_t> match_to_rel_idxs;
|
||||
// indx order not matched,macth to it
|
||||
if (!ret) {
|
||||
for (auto idx : all_weights_idxs) {
|
||||
auto macth_idx = real_input_size - idx;
|
||||
match_to_rel_idxs.push_back(macth_idx);
|
||||
}
|
||||
} else {
|
||||
match_to_rel_idxs = all_weights_idxs;
|
||||
}
|
||||
if (match_to_rel_idxs.size() == all_weights_idxs.size()) {
|
||||
for (size_t j = 0; j < all_weights_idxs.size(); ++j) {
|
||||
auto cache_idx = all_weights_idxs[j];
|
||||
auto match_idx = match_to_rel_idxs[j];
|
||||
auto real_tensor = input_tensors[match_idx];
|
||||
auto real_size = LongToSize(real_tensor->data().nbytes());
|
||||
auto real_data = real_tensor->data_c();
|
||||
MS_EXCEPTION_IF_NULL(real_data);
|
||||
if (sub_ms_graph_->allTensors[cache_idx] != nullptr) {
|
||||
sub_ms_graph_->allTensors[cache_idx]->data.resize(real_size);
|
||||
}
|
||||
if (memcpy_s(sub_ms_graph_->allTensors[cache_idx]->data.data(), real_size, real_data, real_size) != 0) {
|
||||
MS_LOG(ERROR) << "KernelInput2MS memcpy_s failed";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
ReleaseContextRes();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Kernel2Ms::SaveDeviceModel(const std::shared_ptr<GraphDefT> &new_ms_graph_ptr, const std::string &save_path_name) {
|
||||
MS_EXCEPTION_IF_NULL(new_ms_graph_ptr);
|
||||
return predict::utils::SaveDeviceModelUtil(new_ms_graph_ptr, save_path_name, sub_ms_graph_.release());
|
||||
}
|
||||
} // namespace executor
|
||||
} // namespace mindspore
|
|
@ -1,118 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_PREDICT_CONVERTER_KERNEL_TO_MS_H_
|
||||
#define MINDSPORE_CCSRC_PREDICT_CONVERTER_KERNEL_TO_MS_H_
|
||||
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include "backend/session/kernel_graph.h"
|
||||
#include "predict/converter/executor_tensor.h"
|
||||
#include "predict/schema/inner/ms_generated.h"
|
||||
#include "predict/converter/attr_utils/convert_util.h"
|
||||
|
||||
static constexpr size_t kTupleGetItemIndex = 2;
|
||||
namespace mindspore {
|
||||
namespace executor {
|
||||
using KernelGraphPtr = std::shared_ptr<mindspore::session::KernelGraph>;
|
||||
enum ConvertMode { kConvertCpuMode, kConvertAscendMode, kConvertUnused };
|
||||
enum TargetMode { kCPUTarget, kGPUTarget, kUnknowTarget };
|
||||
class Kernel2Ms {
|
||||
public:
|
||||
static Kernel2Ms &GetInstance();
|
||||
|
||||
Kernel2Ms(const Kernel2Ms &) = delete;
|
||||
|
||||
Kernel2Ms &operator=(const Kernel2Ms &) = delete;
|
||||
|
||||
bool KernelGraph2MsGraph(const KernelGraphPtr &kernel_graph_ptr);
|
||||
|
||||
bool KernelInput2MS(const std::vector<TensorPtr> &input_tensors);
|
||||
|
||||
ConvertMode convert_mode() const { return convert_mode_; }
|
||||
|
||||
void set_convert_mode(ConvertMode convert_mode) { convert_mode_ = convert_mode; }
|
||||
|
||||
TargetMode device_target() const { return device_target_; }
|
||||
|
||||
void set_device_target(TargetMode device_target) { device_target_ = device_target; }
|
||||
|
||||
bool SaveDeviceModel(const std::shared_ptr<GraphDefT> &new_ms_graph_ptr, const std::string &save_path_name);
|
||||
|
||||
private:
|
||||
Kernel2Ms() : graph_index_(0) {}
|
||||
|
||||
void ReleaseContextRes();
|
||||
|
||||
~Kernel2Ms() = default;
|
||||
|
||||
bool SetAllTensors(const TensorCachePtr &tensor_cache, SubGraphDefT *sub_graph_def_t);
|
||||
|
||||
bool SetOpInputIdx(const CNodePtr &c_node_ptr, const TensorCachePtr &tensor_cache, OpDefT *ms_node);
|
||||
|
||||
bool SetOpOutputIdx(const CNodePtr &c_node_ptr, const TensorPtr &output_tensor, const TensorCachePtr &tensor_cache,
|
||||
int ref_count, size_t order_index, OpDefT *ms_node);
|
||||
|
||||
bool SetGraphOutputIdx(const KernelGraphPtr &kernel_graph_ptr, const TensorCachePtr &tensor_cache,
|
||||
SubGraphDefT *sub_graph_def_t, AllOutputTensors *all_output_tensors);
|
||||
|
||||
void TransformGraphIndx();
|
||||
|
||||
void GetRealInpoutsPtr(const AnfNodePtr &node, std::vector<AnfNodePtr> *real_inputs,
|
||||
std::vector<size_t> *real_output_idx);
|
||||
|
||||
bool InitGraphIndx(const KernelGraphPtr &kernel_graph_ptr);
|
||||
|
||||
bool InitGraphInputsIndx(const KernelGraphPtr &kernel_graph_ptr);
|
||||
|
||||
bool InitGraphValueNodesIndx(const KernelGraphPtr &kernel_graph_ptr);
|
||||
|
||||
bool InitGraphOpsIndx(const KernelGraphPtr &kernel_graph_ptr);
|
||||
|
||||
bool InitGraphOutputsIndx(const KernelGraphPtr &kernel_graph_ptr);
|
||||
|
||||
bool SetGraphInputTensors(const KernelGraphPtr &kernel_graph_ptr, const TensorCachePtr &tensor_cache,
|
||||
SubGraphDefT *sub_graph_def_t);
|
||||
|
||||
bool SetGraphValueTensors(const KernelGraphPtr &kernel_graph_ptr, const TensorCachePtr &tensor_cache);
|
||||
|
||||
bool SetGraphOpTensors(const KernelGraphPtr &kernel_graph_ptr, const TensorCachePtr &tensor_cache,
|
||||
SubGraphDefT *sub_graph_def_t);
|
||||
std::vector<uint32_t> GetAllInputWeightIdxs() const { return input_weight_idxs_; }
|
||||
std::vector<uint32_t> GetAllInputIdxs() const { return all_input_idxs_; }
|
||||
|
||||
bool CheckInputSizes(const std::vector<TensorPtr> &input_tensors, const std::vector<uint32_t> &all_input_idxs);
|
||||
|
||||
bool SetMemResue() const;
|
||||
SubGraphPtr sub_ms_graph_;
|
||||
AllOutputTensors all_output_tensors_;
|
||||
std::vector<OpDefT *> tmp_op_nodes_;
|
||||
std::unordered_map<MsKernelKey, int> node_indexs_;
|
||||
std::unordered_map<int, MsKernelKey> index_nodes_;
|
||||
int graph_index_ = 0;
|
||||
TensorCachePtr tensor_cache_ptr_ = nullptr;
|
||||
ConvertMode convert_mode_ = kConvertCpuMode;
|
||||
TargetMode device_target_ = kCPUTarget;
|
||||
std::vector<uint32_t> input_weight_idxs_;
|
||||
std::vector<uint32_t> all_input_idxs_;
|
||||
};
|
||||
using Kernel2MsPtr = std::shared_ptr<Kernel2Ms>;
|
||||
} // namespace executor
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_PREDICT_CONVERTER_KERNEL_TO_MS_H_
|
|
@ -1,110 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "predict/converter/lite_model/op_attr_packer.h"
|
||||
#include "./securec.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
namespace convert {
|
||||
// forward declare
|
||||
bool Conv2dPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op);
|
||||
bool MatMulPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op);
|
||||
bool BiasAddPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op);
|
||||
bool ReshapePacker(const CNodePtr &c_node_ptr, OpDefT *ms_op);
|
||||
bool ActivationPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op);
|
||||
bool PoolingPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op);
|
||||
bool FusedBatchNormPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op);
|
||||
bool AddPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op);
|
||||
bool CastPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op);
|
||||
bool MeanPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op);
|
||||
bool SoftmaxPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op);
|
||||
bool ScalePacker(const CNodePtr &c_node_ptr, OpDefT *ms_op);
|
||||
bool AddFoldPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op);
|
||||
bool ArgMaxPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op);
|
||||
bool BatchNormFoldPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op);
|
||||
bool FakeQuantWithMinMaxPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op);
|
||||
bool FakeQuantWithMinMaxPerChannelPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op);
|
||||
bool MulPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op);
|
||||
bool MulFoldPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op);
|
||||
bool SqueezePacker(const CNodePtr &c_node_ptr, OpDefT *ms_op);
|
||||
|
||||
OpAttrFactory::OpAttrFactory() {
|
||||
pack_funs_ = {{"Conv2D", Conv2dPacker},
|
||||
{"MatMul", MatMulPacker},
|
||||
{"BiasAdd", BiasAddPacker},
|
||||
{"Reshape", ReshapePacker},
|
||||
{"Activation", ActivationPacker},
|
||||
{"ReLU", ActivationPacker},
|
||||
{"ReLU6", ActivationPacker},
|
||||
{"EReLU", ActivationPacker},
|
||||
{"LeakyReLU", ActivationPacker},
|
||||
{"Sigmoid", ActivationPacker},
|
||||
{"Softsign", ActivationPacker},
|
||||
{"Softplus", ActivationPacker},
|
||||
{"Tanh", ActivationPacker},
|
||||
{"HSwish", ActivationPacker},
|
||||
{"HSigmoid", ActivationPacker},
|
||||
{"MaxPool", PoolingPacker},
|
||||
{"MaxPool2D", PoolingPacker},
|
||||
{"MeanPool", PoolingPacker},
|
||||
{"GlobalPool", PoolingPacker},
|
||||
{"FusedBatchNorm", FusedBatchNormPacker},
|
||||
{"FusedBatchNormGrad", FusedBatchNormPacker},
|
||||
{"Cast", CastPacker},
|
||||
{"TensorAdd", AddPacker},
|
||||
{"SoftMax", SoftmaxPacker},
|
||||
{"SimpleMean", MeanPacker},
|
||||
{"ReduceMean", MeanPacker},
|
||||
{"AddFold", AddFoldPacker},
|
||||
{"ArgMax", ArgMaxPacker},
|
||||
{"BatchNorm", BatchNormFoldPacker},
|
||||
{"FakeQuantPerLayer", FakeQuantWithMinMaxPacker},
|
||||
{"FakeQuantPerChannel", FakeQuantWithMinMaxPerChannelPacker},
|
||||
{"Mul", MulPacker},
|
||||
{"MulFold", MulFoldPacker},
|
||||
{"Squeeze", SqueezePacker}};
|
||||
}
|
||||
OpAttrPackFun OpAttrFactory::GetPackFun(const std::string &opType) {
|
||||
if (pack_funs_.find(opType) == pack_funs_.end()) {
|
||||
MS_LOG(WARNING) << "Op Attr pack fun [" << opType << "] not found.";
|
||||
return nullptr;
|
||||
}
|
||||
return pack_funs_[opType];
|
||||
}
|
||||
|
||||
mindspore::predict::Format GetAttrFormat(const std::string &format) {
|
||||
if (format == kOpFormat_NCHW) {
|
||||
return predict::Format::Format_NCHW;
|
||||
} else if (format == kOpFormat_NHWC) {
|
||||
return predict::Format::Format_NHWC;
|
||||
} else {
|
||||
return predict::Format::Format_NUM_OF_FORMAT;
|
||||
}
|
||||
}
|
||||
|
||||
mindspore::predict::PadMode GetAttrPadMode(const std::string &pad_mode) {
|
||||
if (pad_mode == "same") {
|
||||
return mindspore::predict::PadMode::PadMode_SAME;
|
||||
} else if (pad_mode == "valid") {
|
||||
return mindspore::predict::PadMode::PadMode_VALID;
|
||||
} else {
|
||||
return mindspore::predict::PadMode::PadMode_NOTSET;
|
||||
}
|
||||
}
|
||||
} // namespace convert
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
|
@ -1,58 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_PREDICT_CONVERTER_OP_ATTR_PACKER_H_
|
||||
#define MINDSPORE_CCSRC_PREDICT_CONVERTER_OP_ATTR_PACKER_H_
|
||||
|
||||
#include <utility>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
#include "predict/schema/inner/ms_generated.h"
|
||||
|
||||
static constexpr size_t kNIndex = 0;
|
||||
static constexpr size_t kCIndex = 1;
|
||||
static constexpr size_t kHIndex = 2;
|
||||
static constexpr size_t kWIndex = 3;
|
||||
static constexpr size_t kNCHWSize = 4;
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
namespace convert {
|
||||
using OpAttrPackFun = bool (*)(const CNodePtr &c_node_ptr, OpDefT *ms_op);
|
||||
class OpAttrFactory {
|
||||
public:
|
||||
static OpAttrFactory *GetInstance() {
|
||||
static OpAttrFactory instance;
|
||||
return &instance;
|
||||
}
|
||||
OpAttrFactory(const OpAttrFactory &) = delete;
|
||||
OpAttrFactory &operator=(const OpAttrFactory &) = delete;
|
||||
OpAttrPackFun GetPackFun(const std::string &op_type);
|
||||
~OpAttrFactory() { pack_funs_.clear(); }
|
||||
OpAttrFactory();
|
||||
|
||||
private:
|
||||
std::unordered_map<std::string, OpAttrPackFun> pack_funs_;
|
||||
};
|
||||
|
||||
mindspore::predict::Format GetAttrFormat(const std::string &format);
|
||||
|
||||
mindspore::predict::PadMode GetAttrPadMode(const std::string &pad_mode);
|
||||
} // namespace convert
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_PREDICT_CONVERTER_CPU_OP_INFO_OP_ATTR_FACTORY_H_
|
|
@ -1,59 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "predict/converter/lite_model/op_attr_packer.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
namespace convert {
|
||||
bool ActivationPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) {
|
||||
if (c_node_ptr == nullptr || ms_op == nullptr) {
|
||||
return false;
|
||||
}
|
||||
std::unique_ptr<ActivationT> attr(new ActivationT());
|
||||
MS_EXCEPTION_IF_NULL(attr);
|
||||
if (AnfAlgo::GetCNodeName(c_node_ptr) == "ReLU") {
|
||||
attr->type = predict::ActivationType::ActivationType_RELU;
|
||||
} else if (AnfAlgo::GetCNodeName(c_node_ptr) == "Sigmoid") {
|
||||
attr->type = predict::ActivationType::ActivationType_SIGMOID;
|
||||
} else if (AnfAlgo::GetCNodeName(c_node_ptr) == "ReLU6") {
|
||||
attr->type = predict::ActivationType::ActivationType_RELU6;
|
||||
} else if (AnfAlgo::GetCNodeName(c_node_ptr) == "ELU") {
|
||||
attr->type = predict::ActivationType::ActivationType_ELU;
|
||||
} else if (AnfAlgo::GetCNodeName(c_node_ptr) == "Leaky_ReLU") {
|
||||
attr->type = predict::ActivationType::ActivationType_LEAKY_RELU;
|
||||
} else if (AnfAlgo::GetCNodeName(c_node_ptr) == "ABS") {
|
||||
attr->type = predict::ActivationType::ActivationType_ABS;
|
||||
} else if (AnfAlgo::GetCNodeName(c_node_ptr) == "ReLU1") {
|
||||
attr->type = predict::ActivationType::ActivationType_RELU1;
|
||||
} else if (AnfAlgo::GetCNodeName(c_node_ptr) == "Softsign") {
|
||||
attr->type = predict::ActivationType::ActivationType_SOFTSIGN;
|
||||
} else if (AnfAlgo::GetCNodeName(c_node_ptr) == "Softplus") {
|
||||
attr->type = predict::ActivationType::ActivationType_SOFTPLUS;
|
||||
} else if (AnfAlgo::GetCNodeName(c_node_ptr) == "Tanh") {
|
||||
attr->type = predict::ActivationType::ActivationType_TANH;
|
||||
} else {
|
||||
attr->type = predict::ActivationType::ActivationType_UNKNOW;
|
||||
MS_LOG(WARNING) << "unknow Activation";
|
||||
}
|
||||
ms_op->name = c_node_ptr->fullname_with_scope();
|
||||
ms_op->attr.type = OpT_Activation;
|
||||
ms_op->attr.value = attr.release();
|
||||
return true;
|
||||
}
|
||||
} // namespace convert
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
|
@ -1,35 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "predict/converter/lite_model/op_attr_packer.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
namespace convert {
|
||||
bool AddPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) {
|
||||
if (c_node_ptr == nullptr || ms_op == nullptr) {
|
||||
return false;
|
||||
}
|
||||
std::unique_ptr<AddT> attr(new AddT());
|
||||
MS_EXCEPTION_IF_NULL(attr);
|
||||
ms_op->name = c_node_ptr->fullname_with_scope();
|
||||
ms_op->attr.type = OpT_Add;
|
||||
ms_op->attr.value = attr.release();
|
||||
return true;
|
||||
}
|
||||
} // namespace convert
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
|
@ -1,34 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "predict/converter/lite_model/op_attr_packer.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
namespace convert {
|
||||
bool AddFoldPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) {
|
||||
if (c_node_ptr == nullptr || ms_op == nullptr) {
|
||||
return false;
|
||||
}
|
||||
std::unique_ptr<AddFoldT> attr(new AddFoldT());
|
||||
MS_EXCEPTION_IF_NULL(attr);
|
||||
ms_op->attr.type = OpT_AddFold;
|
||||
ms_op->attr.value = attr.release();
|
||||
return true;
|
||||
}
|
||||
} // namespace convert
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
|
@ -1,34 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "predict/converter/lite_model/op_attr_packer.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
namespace convert {
|
||||
bool ArgMaxPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) {
|
||||
if (c_node_ptr == nullptr || ms_op == nullptr) {
|
||||
return false;
|
||||
}
|
||||
std::unique_ptr<ArgMaxT> attr(new ArgMaxT());
|
||||
MS_EXCEPTION_IF_NULL(attr);
|
||||
ms_op->attr.type = OpT_ArgMax;
|
||||
ms_op->attr.value = attr.release();
|
||||
return true;
|
||||
}
|
||||
} // namespace convert
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
|
@ -1,34 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "predict/converter/lite_model/op_attr_packer.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
namespace convert {
|
||||
bool BatchNormFoldPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) {
|
||||
if (c_node_ptr == nullptr || ms_op == nullptr) {
|
||||
return false;
|
||||
}
|
||||
std::unique_ptr<BatchNormFoldT> attr(new BatchNormFoldT());
|
||||
MS_EXCEPTION_IF_NULL(attr);
|
||||
ms_op->attr.type = OpT_BatchNormFold;
|
||||
ms_op->attr.value = attr.release();
|
||||
return true;
|
||||
}
|
||||
} // namespace convert
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
|
@ -1,37 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <memory>
|
||||
#include "predict/converter/lite_model/op_attr_packer.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
namespace convert {
|
||||
bool BiasAddPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) {
|
||||
if (c_node_ptr == nullptr || ms_op == nullptr) {
|
||||
return false;
|
||||
}
|
||||
std::unique_ptr<BiasAddT> attr(new BiasAddT());
|
||||
MS_EXCEPTION_IF_NULL(attr);
|
||||
attr->axis = {1};
|
||||
ms_op->name = c_node_ptr->fullname_with_scope();
|
||||
ms_op->attr.type = OpT_BiasAdd;
|
||||
ms_op->attr.value = attr.release();
|
||||
return true;
|
||||
}
|
||||
} // namespace convert
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
|
@ -1,37 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "predict/converter/lite_model/op_attr_packer.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
namespace convert {
|
||||
bool CastPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) {
|
||||
if (c_node_ptr == nullptr || ms_op == nullptr) {
|
||||
return false;
|
||||
}
|
||||
std::unique_ptr<CastT> attr(new CastT());
|
||||
MS_EXCEPTION_IF_NULL(attr);
|
||||
attr->srcT = 0;
|
||||
attr->dstT = 0;
|
||||
ms_op->name = c_node_ptr->fullname_with_scope();
|
||||
ms_op->attr.type = OpT_Cast;
|
||||
ms_op->attr.value = attr.release();
|
||||
return true;
|
||||
}
|
||||
} // namespace convert
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
|
@ -1,63 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "predict/converter/lite_model/op_attr_packer.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
namespace convert {
|
||||
bool Conv2dPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) {
|
||||
if (c_node_ptr == nullptr || ms_op == nullptr) {
|
||||
return false;
|
||||
}
|
||||
int kernel_group_value = AnfAlgo::GetNodeAttr<int>(c_node_ptr, "group");
|
||||
int kernel_channel_value = AnfAlgo::GetNodeAttr<int>(c_node_ptr, "out_channel");
|
||||
std::vector<int> kernel_size_value = AnfAlgo::GetNodeAttr<std::vector<int>>(c_node_ptr, "kernel_size");
|
||||
std::string kernel_pad_mode_value = AnfAlgo::GetNodeAttr<std::string>(c_node_ptr, "pad_mode");
|
||||
int kernel_pad_value = AnfAlgo::GetNodeAttr<int>(c_node_ptr, "pad");
|
||||
auto kernel_stride_value = AnfAlgo::GetNodeAttr<std::vector<int>>(c_node_ptr, "stride");
|
||||
auto kernel_dilation_value = AnfAlgo::GetNodeAttr<std::vector<int>>(c_node_ptr, "dilation");
|
||||
std::string kernel_data_format_value = AnfAlgo::GetNodeAttr<std::string>(c_node_ptr, "data_format");
|
||||
std::unique_ptr<Conv2DT> attr(new Conv2DT());
|
||||
MS_EXCEPTION_IF_NULL(attr);
|
||||
attr->format = GetAttrFormat(kernel_data_format_value);
|
||||
attr->group = kernel_group_value;
|
||||
auto in_shape = AnfAlgo::GetPrevNodeOutputInferShape(c_node_ptr, 1);
|
||||
if (in_shape.size() != kNCHWSize) {
|
||||
return false;
|
||||
}
|
||||
attr->channelIn = SizeToInt(in_shape[1]);
|
||||
attr->channelOut = kernel_channel_value;
|
||||
attr->kernelW = kernel_size_value[0];
|
||||
attr->kernelH = kernel_size_value[1];
|
||||
attr->strideW = kernel_stride_value[0];
|
||||
attr->strideH = kernel_stride_value[1];
|
||||
attr->padMode = GetAttrPadMode(kernel_pad_mode_value);
|
||||
attr->padUp = kernel_pad_value;
|
||||
attr->padDown = kernel_pad_value;
|
||||
attr->padLeft = kernel_pad_value;
|
||||
attr->padRight = kernel_pad_value;
|
||||
attr->dilateW = kernel_dilation_value[0];
|
||||
attr->dilateH = kernel_dilation_value[1];
|
||||
attr->hasBias = false;
|
||||
ms_op->name = c_node_ptr->fullname_with_scope();
|
||||
ms_op->attr.type = OpT_Conv2D;
|
||||
ms_op->attr.value = attr.release();
|
||||
return true;
|
||||
}
|
||||
} // namespace convert
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
|
@ -1,34 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "predict/converter/lite_model/op_attr_packer.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
namespace convert {
|
||||
bool FakeQuantWithMinMaxPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) {
|
||||
if (c_node_ptr == nullptr || ms_op == nullptr) {
|
||||
return false;
|
||||
}
|
||||
std::unique_ptr<FakeQuantWithMinMaxT> attr(new FakeQuantWithMinMaxT());
|
||||
MS_EXCEPTION_IF_NULL(attr);
|
||||
ms_op->attr.type = OpT_FakeQuantWithMinMax;
|
||||
ms_op->attr.value = attr.release();
|
||||
return true;
|
||||
}
|
||||
} // namespace convert
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
|
@ -1,34 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "predict/converter/lite_model/op_attr_packer.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
namespace convert {
|
||||
bool FakeQuantWithMinMaxPerChannelPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) {
|
||||
if (c_node_ptr == nullptr || ms_op == nullptr) {
|
||||
return false;
|
||||
}
|
||||
std::unique_ptr<FakeQuantWithMinMaxPerChannelT> attr(new FakeQuantWithMinMaxPerChannelT());
|
||||
MS_EXCEPTION_IF_NULL(attr);
|
||||
ms_op->attr.type = OpT_FakeQuantWithMinMaxPerChannel;
|
||||
ms_op->attr.value = attr.release();
|
||||
return true;
|
||||
}
|
||||
} // namespace convert
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
|
@ -1,37 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "predict/converter/lite_model/op_attr_packer.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
namespace convert {
|
||||
bool FusedBatchNormPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) {
|
||||
if (c_node_ptr == nullptr || ms_op == nullptr) {
|
||||
return false;
|
||||
}
|
||||
std::unique_ptr<FusedBatchNormT> attr(new FusedBatchNormT());
|
||||
MS_EXCEPTION_IF_NULL(attr);
|
||||
auto kernel_epsilon = AnfAlgo::GetNodeAttr<float>(c_node_ptr, "epsilon");
|
||||
attr->epsilon = kernel_epsilon;
|
||||
ms_op->name = c_node_ptr->fullname_with_scope();
|
||||
ms_op->attr.type = OpT_FusedBatchNorm;
|
||||
ms_op->attr.value = attr.release();
|
||||
return true;
|
||||
}
|
||||
} // namespace convert
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
|
@ -1,39 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "predict/converter/lite_model/op_attr_packer.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
namespace convert {
|
||||
bool MatMulPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) {
|
||||
if (c_node_ptr == nullptr || ms_op == nullptr) {
|
||||
return false;
|
||||
}
|
||||
bool kernel_transpore_a = AnfAlgo::GetNodeAttr<bool>(c_node_ptr, "transpose_a");
|
||||
bool kernel_transpore_b = AnfAlgo::GetNodeAttr<bool>(c_node_ptr, "transpose_b");
|
||||
std::unique_ptr<MatMulT> attr(new MatMulT());
|
||||
MS_EXCEPTION_IF_NULL(attr);
|
||||
attr->transposeA = kernel_transpore_a;
|
||||
attr->transposeB = kernel_transpore_b;
|
||||
ms_op->name = c_node_ptr->fullname_with_scope();
|
||||
ms_op->attr.type = OpT_MatMul;
|
||||
ms_op->attr.value = attr.release();
|
||||
return true;
|
||||
}
|
||||
} // namespace convert
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
|
@ -1,37 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "predict/converter/lite_model/op_attr_packer.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
namespace convert {
|
||||
bool MeanPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) {
|
||||
if (c_node_ptr == nullptr || ms_op == nullptr) {
|
||||
return false;
|
||||
}
|
||||
std::unique_ptr<MeanT> attr(new MeanT());
|
||||
MS_EXCEPTION_IF_NULL(attr);
|
||||
attr->axis = {1};
|
||||
attr->keepDims = false;
|
||||
ms_op->name = c_node_ptr->fullname_with_scope();
|
||||
ms_op->attr.type = OpT_Mean;
|
||||
ms_op->attr.value = attr.release();
|
||||
return true;
|
||||
}
|
||||
} // namespace convert
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
|
@ -1,34 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "predict/converter/lite_model/op_attr_packer.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
namespace convert {
|
||||
bool MulPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) {
|
||||
if (c_node_ptr == nullptr || ms_op == nullptr) {
|
||||
return false;
|
||||
}
|
||||
std::unique_ptr<MulT> attr(new MulT());
|
||||
MS_EXCEPTION_IF_NULL(attr);
|
||||
ms_op->attr.type = OpT_Mul;
|
||||
ms_op->attr.value = attr.release();
|
||||
return true;
|
||||
}
|
||||
} // namespace convert
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
|
@ -1,35 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "predict/converter/lite_model/op_attr_packer.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
namespace convert {
|
||||
bool MulFoldPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) {
|
||||
if (c_node_ptr == nullptr || ms_op == nullptr) {
|
||||
return false;
|
||||
}
|
||||
std::unique_ptr<MulFoldT> attr(new MulFoldT());
|
||||
MS_EXCEPTION_IF_NULL(attr);
|
||||
ms_op->name = c_node_ptr->fullname_with_scope();
|
||||
ms_op->attr.type = OpT_MulFold;
|
||||
ms_op->attr.value = attr.release();
|
||||
return true;
|
||||
}
|
||||
} // namespace convert
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
|
@ -1,61 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "predict/converter/lite_model/op_attr_packer.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
namespace convert {
|
||||
bool PoolingPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) {
|
||||
if (c_node_ptr == nullptr || ms_op == nullptr) {
|
||||
return false;
|
||||
}
|
||||
std::unique_ptr<PoolingT> attr(new PoolingT());
|
||||
MS_EXCEPTION_IF_NULL(attr);
|
||||
std::string kernel_format_value = AnfAlgo::GetNodeAttr<std::string>(c_node_ptr, "data_format");
|
||||
attr->format = GetAttrFormat(kernel_format_value);
|
||||
auto c_name = AnfAlgo::GetCNodeName(c_node_ptr);
|
||||
if (c_name == "MaxPool") {
|
||||
ms_op->name = c_node_ptr->fullname_with_scope();
|
||||
attr->poolingMode = mindspore::predict::PoolMode::PoolMode_MAX_POOLING;
|
||||
} else if (c_name == "MeanPool") {
|
||||
ms_op->name = c_node_ptr->fullname_with_scope();
|
||||
attr->poolingMode = mindspore::predict::PoolMode::PoolMode_MEAN_POOLING;
|
||||
} else if (c_name == "GlobalPool") {
|
||||
ms_op->name = c_node_ptr->fullname_with_scope();
|
||||
} else {
|
||||
MS_LOG(ERROR) << "unknowed pooling type.";
|
||||
return false;
|
||||
}
|
||||
std::vector<int> kernel_ksize = AnfAlgo::GetNodeAttr<std::vector<int>>(c_node_ptr, "ksize");
|
||||
attr->windowW = kernel_ksize[kHIndex];
|
||||
attr->windowH = kernel_ksize[kWIndex];
|
||||
std::vector<int> kernel_strides = AnfAlgo::GetNodeAttr<std::vector<int>>(c_node_ptr, "strides");
|
||||
attr->strideW = kernel_strides[kHIndex];
|
||||
attr->strideH = kernel_strides[kWIndex];
|
||||
std::string kernel_pad_mode_value = AnfAlgo::GetNodeAttr<std::string>(c_node_ptr, "padding");
|
||||
attr->padMode = GetAttrPadMode(kernel_pad_mode_value);
|
||||
attr->padUp = 0;
|
||||
attr->padDown = 0;
|
||||
attr->padLeft = 0;
|
||||
attr->padRight = 0;
|
||||
ms_op->attr.type = OpT_Pooling;
|
||||
ms_op->attr.value = attr.release();
|
||||
return true;
|
||||
}
|
||||
} // namespace convert
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
|
@ -1,36 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "predict/converter/lite_model/op_attr_packer.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
namespace convert {
|
||||
bool ReshapePacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) {
|
||||
if (c_node_ptr == nullptr || ms_op == nullptr) {
|
||||
return false;
|
||||
}
|
||||
std::unique_ptr<ReshapeT> attr(new ReshapeT());
|
||||
MS_EXCEPTION_IF_NULL(attr);
|
||||
attr->format = predict::Format::Format_NCHW;
|
||||
ms_op->name = c_node_ptr->fullname_with_scope();
|
||||
ms_op->attr.type = OpT_Reshape;
|
||||
ms_op->attr.value = attr.release();
|
||||
return true;
|
||||
}
|
||||
} // namespace convert
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
|
@ -1,36 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "predict/converter/lite_model/op_attr_packer.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
namespace convert {
|
||||
bool ScalePacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) {
|
||||
if (c_node_ptr == nullptr || ms_op == nullptr) {
|
||||
return false;
|
||||
}
|
||||
std::unique_ptr<ScaleT> attr(new ScaleT());
|
||||
MS_EXCEPTION_IF_NULL(attr);
|
||||
attr->format = predict::Format::Format_NCHW;
|
||||
ms_op->name = c_node_ptr->fullname_with_scope();
|
||||
ms_op->attr.type = OpT_Scale;
|
||||
ms_op->attr.value = attr.release();
|
||||
return true;
|
||||
}
|
||||
} // namespace convert
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
|
@ -1,36 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "predict/converter/lite_model/op_attr_packer.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
namespace convert {
|
||||
bool SoftmaxPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) {
|
||||
if (c_node_ptr == nullptr || ms_op == nullptr) {
|
||||
return false;
|
||||
}
|
||||
std::unique_ptr<SoftMaxT> attr(new SoftMaxT());
|
||||
MS_EXCEPTION_IF_NULL(attr);
|
||||
attr->axis = {1};
|
||||
ms_op->name = c_node_ptr->fullname_with_scope();
|
||||
ms_op->attr.type = OpT_SoftMax;
|
||||
ms_op->attr.value = attr.release();
|
||||
return true;
|
||||
}
|
||||
} // namespace convert
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
|
@ -1,38 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "predict/converter/lite_model/op_attr_packer.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
namespace convert {
|
||||
bool SqueezePacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) {
|
||||
if (c_node_ptr == nullptr || ms_op == nullptr) {
|
||||
return false;
|
||||
}
|
||||
std::unique_ptr<SqueezeT> attr(new SqueezeT());
|
||||
MS_EXCEPTION_IF_NULL(attr);
|
||||
|
||||
std::vector<int> kernel_axis_value = AnfAlgo::GetNodeAttr<std::vector<int>>(c_node_ptr, "axis");
|
||||
attr->axis = kernel_axis_value;
|
||||
|
||||
ms_op->attr.type = OpT_Squeeze;
|
||||
ms_op->attr.value = attr.release();
|
||||
return true;
|
||||
}
|
||||
} // namespace convert
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
|
@ -1,31 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "predict/generator/ir/ir_model.h"
|
||||
|
||||
#include <utility>
|
||||
#include <algorithm>
|
||||
|
||||
#include "utils/log_adapter.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace generator {
|
||||
IRModel::~IRModel() { ir_tasks_.clear(); }
|
||||
void IRModel::SetIrTaskInfos(const std::vector<IRtaskInfoPtr> &ir_tasks) {
|
||||
(void)std::copy(ir_tasks.begin(), ir_tasks.end(), std::back_inserter(ir_tasks_));
|
||||
}
|
||||
} // namespace generator
|
||||
} // namespace mindspore
|
|
@ -1,37 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_EXECUTOR_GENERATOR_IR_IR_MODEL_H_
|
||||
#define MINDSPORE_CCSRC_EXECUTOR_GENERATOR_IR_IR_MODEL_H_
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "predict/generator/ir/ir_task_info.h"
|
||||
namespace mindspore {
|
||||
namespace generator {
|
||||
class IRModel {
|
||||
public:
|
||||
void SetIrTaskInfos(const std::vector<IRtaskInfoPtr> &ir_tasks);
|
||||
IRModel() = default;
|
||||
~IRModel();
|
||||
|
||||
private:
|
||||
std::vector<IRtaskInfoPtr> ir_tasks_;
|
||||
};
|
||||
using IrModelPtr = std::shared_ptr<IRModel>;
|
||||
} // namespace generator
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_EXECUTOR_GENERATOR_IR_IR_MODEL_H_
|
|
@ -1,244 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "predict/generator/ir/ir_task_info.h"
|
||||
#include "utils/log_adapter.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace generator {
|
||||
bool CceIRTaskInfo::SerializeIRToProto() {
|
||||
auto cce_task_def_ptr = std::unique_ptr<ge::model_runner::CceTaskDef>();
|
||||
auto kernel_context_ptr = std::unique_ptr<ge::model_runner::KernelContext>();
|
||||
MS_EXCEPTION_IF_NULL(cce_task_def_ptr);
|
||||
MS_EXCEPTION_IF_NULL(kernel_context_ptr);
|
||||
kernel_context_ptr->set_kernel_type(k_ctx_.kernel_type);
|
||||
kernel_context_ptr->set_op_id(k_ctx_.op_id);
|
||||
kernel_context_ptr->set_kernel_func_id(k_ctx_.kernel_func_id);
|
||||
kernel_context_ptr->set_op_index(k_ctx_.op_index);
|
||||
kernel_context_ptr->set_is_flowtable(k_ctx_.is_flowtable);
|
||||
kernel_context_ptr->set_args_count(k_ctx_.args_count);
|
||||
for (unsigned int i : k_ctx_.origin_op_index) {
|
||||
kernel_context_ptr->add_origin_op_index(i);
|
||||
}
|
||||
void *tmp_args_offset = static_cast<void *>((k_ctx_.args_offset).data());
|
||||
if (tmp_args_offset == nullptr) {
|
||||
MS_LOG(WARNING) << "tmp_args_offset have no data";
|
||||
return false;
|
||||
}
|
||||
kernel_context_ptr->set_args_offset(tmp_args_offset, k_ctx_.args_offset.size());
|
||||
cce_task_def_ptr->set_allocated_kernel_context(std::move(kernel_context_ptr).get());
|
||||
cce_task_def_ptr->set_stub_func(stub_func_);
|
||||
cce_task_def_ptr->set_block_dim(block_dim_);
|
||||
cce_task_def_ptr->set_args_size(args_size_);
|
||||
void *tmp_sm_desc = static_cast<void *>(sm_desc_.data());
|
||||
if (tmp_sm_desc == nullptr) {
|
||||
MS_LOG(WARNING) << "tmp_sm_desc have no data";
|
||||
return false;
|
||||
}
|
||||
cce_task_def_ptr->set_sm_desc(tmp_sm_desc, sm_desc_.size());
|
||||
|
||||
void *tmp_flow_table = static_cast<void *>(flow_table_.data());
|
||||
if (tmp_flow_table == nullptr) {
|
||||
MS_LOG(WARNING) << "tmp_flow_table have no data";
|
||||
return false;
|
||||
}
|
||||
cce_task_def_ptr->set_flow_table(tmp_flow_table, flow_table_.size());
|
||||
return true;
|
||||
}
|
||||
|
||||
CceIRTaskInfo::~CceIRTaskInfo() {
|
||||
args_.clear();
|
||||
sm_desc_.clear();
|
||||
flow_table_.clear();
|
||||
}
|
||||
|
||||
bool TbeIRTaskInfo::SerializeIRToProto() {
|
||||
auto tbe_task_def_ptr = std::unique_ptr<ge::model_runner::TbeTaskDef>();
|
||||
MS_EXCEPTION_IF_NULL(tbe_task_def_ptr);
|
||||
tbe_task_def_ptr->set_stub_func(stub_func_);
|
||||
tbe_task_def_ptr->set_block_dim(block_dim_);
|
||||
tbe_task_def_ptr->set_args_size(args_size_);
|
||||
void *tmp_args = static_cast<void *>(args_.data());
|
||||
if (tmp_args == nullptr) {
|
||||
MS_LOG(WARNING) << "tmp_args have no data";
|
||||
return false;
|
||||
}
|
||||
tbe_task_def_ptr->set_args(tmp_args, args_.size());
|
||||
void *tmp_sm_desc = static_cast<void *>(sm_desc_.data());
|
||||
if (tmp_sm_desc == nullptr) {
|
||||
MS_LOG(WARNING) << "tmp_sm_desc have no data";
|
||||
return false;
|
||||
}
|
||||
tbe_task_def_ptr->set_sm_desc(tmp_sm_desc, sm_desc_.size());
|
||||
void *tmp_meta_data = static_cast<void *>(meta_data_.data());
|
||||
if (tmp_meta_data == nullptr) {
|
||||
MS_LOG(WARNING) << "tmp_meta_data have no data";
|
||||
return false;
|
||||
}
|
||||
tbe_task_def_ptr->set_meta_data(tmp_meta_data, meta_data_.size());
|
||||
for (auto &in : input_data_addrs_) {
|
||||
tbe_task_def_ptr->add_input_addrs(in);
|
||||
}
|
||||
for (auto &ou : output_data_addrs_) {
|
||||
tbe_task_def_ptr->add_output_addrs(ou);
|
||||
}
|
||||
for (auto &wk : workspace_addrs_) {
|
||||
tbe_task_def_ptr->add_workspace_addrs(wk);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
TbeIRTaskInfo::~TbeIRTaskInfo() {
|
||||
args_.clear();
|
||||
sm_desc_.clear();
|
||||
meta_data_.clear();
|
||||
input_data_addrs_.clear();
|
||||
output_data_addrs_.clear();
|
||||
workspace_addrs_.clear();
|
||||
}
|
||||
|
||||
bool AicpuIRTaskInfo::SerializeIRToProto() {
|
||||
auto aicpu_task_def_ptr = std::unique_ptr<ge::model_runner::AicpuTaskDef>();
|
||||
MS_EXCEPTION_IF_NULL(aicpu_task_def_ptr);
|
||||
aicpu_task_def_ptr->set_op_type(op_type_);
|
||||
aicpu_task_def_ptr->set_flag(flag_);
|
||||
for (auto &shape : input_data_shapes_) {
|
||||
auto in_shape_ptr = aicpu_task_def_ptr->add_input_shapes();
|
||||
for (auto &in_sh : shape) {
|
||||
in_shape_ptr->add_shape(static_cast<uint32_t>(in_sh));
|
||||
}
|
||||
}
|
||||
for (auto &shape : output_data_shapes_) {
|
||||
auto ou_shape_ptr = aicpu_task_def_ptr->add_output_shapes();
|
||||
for (auto &ou_sh : shape) {
|
||||
ou_shape_ptr->add_shape(static_cast<uint32_t>(ou_sh));
|
||||
}
|
||||
}
|
||||
for (auto &in_type : input_data_types_) {
|
||||
aicpu_task_def_ptr->add_input_types(in_type);
|
||||
}
|
||||
for (auto &ou_type : output_data_types_) {
|
||||
aicpu_task_def_ptr->add_output_types(ou_type);
|
||||
}
|
||||
for (auto &in_addr : input_data_addrs_) {
|
||||
aicpu_task_def_ptr->add_input_addrs(in_addr);
|
||||
}
|
||||
for (auto &ou_addr : output_data_addrs_) {
|
||||
aicpu_task_def_ptr->add_output_addrs(ou_addr);
|
||||
}
|
||||
void *tmp_node_def = static_cast<void *>(node_def_.data());
|
||||
if (tmp_node_def == nullptr) {
|
||||
MS_LOG(WARNING) << "tmp_node_def have no data";
|
||||
return false;
|
||||
}
|
||||
aicpu_task_def_ptr->set_node_def(tmp_node_def, node_def_.size());
|
||||
void *tmp_func_def = static_cast<void *>(func_def_.data());
|
||||
if (tmp_func_def == nullptr) {
|
||||
MS_LOG(WARNING) << "tmp_func_def have no data";
|
||||
return false;
|
||||
}
|
||||
aicpu_task_def_ptr->set_func_def(tmp_func_def, func_def_.size());
|
||||
return true;
|
||||
}
|
||||
|
||||
AicpuIRTaskInfo::~AicpuIRTaskInfo() {
|
||||
input_data_types_.clear();
|
||||
input_data_shapes_.clear();
|
||||
input_data_addrs_.clear();
|
||||
output_data_types_.clear();
|
||||
output_data_shapes_.clear();
|
||||
output_data_addrs_.clear();
|
||||
node_def_.clear();
|
||||
func_def_.clear();
|
||||
}
|
||||
|
||||
bool LabelIRTaskInfo::SerializeIRToProto() {
|
||||
auto label_task_def_ptr = std::unique_ptr<ge::model_runner::LabelTaskDef>();
|
||||
MS_EXCEPTION_IF_NULL(label_task_def_ptr);
|
||||
label_task_def_ptr->set_label_id(label_id_);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool EventIRTaskInfo::SerializeIRToProto() {
|
||||
auto event_task_def_ptr = std::unique_ptr<ge::model_runner::EventTaskDef>();
|
||||
MS_EXCEPTION_IF_NULL(event_task_def_ptr);
|
||||
event_task_def_ptr->set_event_id(event_id_);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool HcclIRTaskInfo::SerializeIRToProto() {
|
||||
auto hccl_task_def_ptr = std::unique_ptr<ge::model_runner::HcclTaskDef>();
|
||||
MS_EXCEPTION_IF_NULL(hccl_task_def_ptr);
|
||||
hccl_task_def_ptr->set_hccl_type(hccl_type_);
|
||||
hccl_task_def_ptr->set_input_addr(input_data_addr_);
|
||||
hccl_task_def_ptr->set_output_addr(output_data_addr_);
|
||||
auto tmp_wk = static_cast<void *>(workspace_.data());
|
||||
hccl_task_def_ptr->set_workspace(tmp_wk, workspace_.size());
|
||||
hccl_task_def_ptr->set_workspace_num(workspace_num_);
|
||||
auto tmp_pri_def = static_cast<void *>(private_def_.data());
|
||||
hccl_task_def_ptr->set_private_def(tmp_pri_def, private_def_.size());
|
||||
hccl_task_def_ptr->set_ops_kernel_store(ops_kernel_store_);
|
||||
hccl_task_def_ptr->set_count(count_);
|
||||
hccl_task_def_ptr->set_root_id(root_id_);
|
||||
hccl_task_def_ptr->set_op_type(op_type_);
|
||||
hccl_task_def_ptr->set_data_type(data_type_);
|
||||
return true;
|
||||
}
|
||||
|
||||
HcclIRTaskInfo::~HcclIRTaskInfo() {
|
||||
workspace_.clear();
|
||||
private_def_.clear();
|
||||
}
|
||||
|
||||
bool ProfilerIRTaskInfo::SerializeIRToProto() {
|
||||
auto profiler_task_def_ptr = std::unique_ptr<ge::model_runner::ProfilerTaskDef>();
|
||||
MS_EXCEPTION_IF_NULL(profiler_task_def_ptr);
|
||||
profiler_task_def_ptr->set_log_id(log_id_);
|
||||
profiler_task_def_ptr->set_flat(flat_);
|
||||
profiler_task_def_ptr->set_notify(notify_);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MemcpyAsyncIRTaskInfo::SerializeIRToProto() {
|
||||
auto mem_task_def_ptr = std::unique_ptr<ge::model_runner::MemcpyAsyncTaskDef>();
|
||||
MS_EXCEPTION_IF_NULL(mem_task_def_ptr);
|
||||
mem_task_def_ptr->set_dst(dst_);
|
||||
mem_task_def_ptr->set_dst_max(dst_max_);
|
||||
mem_task_def_ptr->set_src(src_);
|
||||
mem_task_def_ptr->set_count(count_);
|
||||
mem_task_def_ptr->set_kind(kind_);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool StreamSwitchIRTaskInfo::SerializeIRToProto() {
|
||||
auto stream_switch_task_def_ptr = std::unique_ptr<ge::model_runner::StreamSwitchTaskDef>();
|
||||
MS_EXCEPTION_IF_NULL(stream_switch_task_def_ptr);
|
||||
stream_switch_task_def_ptr->set_true_stream_id(true_stream_id_);
|
||||
stream_switch_task_def_ptr->set_input_addr(input_addr_);
|
||||
stream_switch_task_def_ptr->set_value_addr(value_addr_);
|
||||
stream_switch_task_def_ptr->set_cond(cond_);
|
||||
stream_switch_task_def_ptr->set_data_type(data_type_);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool StreamActiveIRTaskInfo::SerializeIRToProto() {
|
||||
auto stream_active_task_def_ptr = std::unique_ptr<ge::model_runner::StreamActiveTaskDef>();
|
||||
MS_EXCEPTION_IF_NULL(stream_active_task_def_ptr);
|
||||
stream_active_task_def_ptr->set_active_stream_id(active_stream_id_);
|
||||
return true;
|
||||
}
|
||||
} // namespace generator
|
||||
} // namespace mindspore
|
|
@ -1,295 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_EXECUTOR_GENERATOR_IR_IR_TASK_H_
|
||||
#define MINDSPORE_CCSRC_EXECUTOR_GENERATOR_IR_IR_TASK_H_
|
||||
#include <cstdint>
|
||||
#include <utility>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "proto/ge_runtime_taskinfo.pb.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace generator {
|
||||
using TaskType = ::ge::model_runner::TaskDef_TaskType;
|
||||
enum TaskTmpType {
|
||||
CCE_TMP_DEF = 0,
|
||||
TBE_TMP_DEF = 1,
|
||||
AICPU_TMP_DEF = 2,
|
||||
LABEL_TMP_DEF = 3,
|
||||
EVENT_TMP_DEF = 4,
|
||||
HCCL_TMP_DEF = 5,
|
||||
PROFILER_TRACE_TMP_DEF = 6,
|
||||
MEMCPY_ASYNC_TMP_DEF = 7,
|
||||
STREAM_SWITCH_TMP_DEF = 8,
|
||||
STREAM_ACTIVE_TMP_DEF = 9
|
||||
};
|
||||
|
||||
struct KernelContext {
|
||||
uint32_t kernel_type = 0;
|
||||
uint32_t op_id = 0;
|
||||
uint32_t kernel_func_id = 0;
|
||||
uint32_t op_index = 0;
|
||||
bool is_flowtable = false;
|
||||
std::vector<uint8_t> args_offset;
|
||||
uint32_t args_count = 0;
|
||||
std::vector<uint32_t> origin_op_index;
|
||||
};
|
||||
|
||||
class IRtaskInfo {
|
||||
public:
|
||||
virtual ~IRtaskInfo() = default;
|
||||
virtual bool SerializeIRToProto() = 0;
|
||||
|
||||
protected:
|
||||
IRtaskInfo(TaskType task_type, TaskTmpType task_tmp_type, uint64_t stream_id)
|
||||
: task_type_(task_type), task_tmp_type_(task_tmp_type), stream_id_(stream_id) {}
|
||||
|
||||
public:
|
||||
uint64_t GetStreamId() const { return stream_id_; }
|
||||
TaskType GetTaskType() const { return task_type_; }
|
||||
TaskTmpType GetTaskTmpType() const { return task_tmp_type_; }
|
||||
|
||||
private:
|
||||
TaskType task_type_;
|
||||
TaskTmpType task_tmp_type_;
|
||||
uint64_t stream_id_ = 0;
|
||||
};
|
||||
|
||||
using IRtaskInfoPtr = std::shared_ptr<IRtaskInfo>;
|
||||
|
||||
class CceIRTaskInfo : public IRtaskInfo {
|
||||
public:
|
||||
CceIRTaskInfo(TaskType task_type, uint64_t stream_id, KernelContext k_ctx, std::string stub_func, uint32_t block_dim,
|
||||
std::vector<uint8_t> args, uint32_t args_size, std::vector<uint8_t> sm_desc,
|
||||
std::vector<uint8_t> flow_table)
|
||||
: IRtaskInfo(task_type, CCE_TMP_DEF, stream_id),
|
||||
k_ctx_(std::move(k_ctx)),
|
||||
stub_func_(std::move(stub_func)),
|
||||
block_dim_(block_dim),
|
||||
args_(std::move(args)),
|
||||
args_size_(args_size),
|
||||
sm_desc_(std::move(sm_desc)),
|
||||
flow_table_(std::move(flow_table)) {}
|
||||
~CceIRTaskInfo() override;
|
||||
bool SerializeIRToProto() override;
|
||||
|
||||
private:
|
||||
KernelContext k_ctx_;
|
||||
std::string stub_func_;
|
||||
uint32_t block_dim_ = 0;
|
||||
std::vector<uint8_t> args_;
|
||||
// uintptr_t args_addr_;
|
||||
uint32_t args_size_ = 0;
|
||||
std::vector<uint8_t> sm_desc_;
|
||||
std::vector<uint8_t> flow_table_;
|
||||
};
|
||||
|
||||
class TbeIRTaskInfo : public IRtaskInfo {
|
||||
public:
|
||||
TbeIRTaskInfo(TaskType task_type, uint64_t stream_id, std::string stub_func, uint32_t block_dim,
|
||||
std::vector<uint8_t> args, uint32_t args_size, std::vector<uint8_t> sm_desc,
|
||||
std::vector<uint8_t> meta_data, std::vector<uintptr_t> input_data_addrs,
|
||||
std::vector<uintptr_t> output_data_addrs, std::vector<uintptr_t> workspace_addrs)
|
||||
: IRtaskInfo(task_type, TBE_TMP_DEF, stream_id),
|
||||
stub_func_(std::move(stub_func)),
|
||||
block_dim_(block_dim),
|
||||
args_(std::move(args)),
|
||||
args_size_(args_size),
|
||||
sm_desc_(std::move(sm_desc)),
|
||||
meta_data_(std::move(meta_data)),
|
||||
input_data_addrs_(std::move(input_data_addrs)),
|
||||
output_data_addrs_(std::move(output_data_addrs)),
|
||||
workspace_addrs_(std::move(workspace_addrs)) {}
|
||||
~TbeIRTaskInfo() override;
|
||||
bool SerializeIRToProto() override;
|
||||
|
||||
private:
|
||||
std::string stub_func_;
|
||||
uint32_t block_dim_ = 0;
|
||||
std::vector<uint8_t> args_;
|
||||
uint32_t args_size_ = 0;
|
||||
std::vector<uint8_t> sm_desc_;
|
||||
// uintptr_t binary_;
|
||||
// uint32_t binary_size_;
|
||||
std::vector<uint8_t> meta_data_;
|
||||
std::vector<uintptr_t> input_data_addrs_;
|
||||
std::vector<uintptr_t> output_data_addrs_;
|
||||
std::vector<uintptr_t> workspace_addrs_;
|
||||
// std::vector<uint8_t> flow_table_;
|
||||
};
|
||||
|
||||
class AicpuIRTaskInfo : public IRtaskInfo {
|
||||
public:
|
||||
AicpuIRTaskInfo(TaskType task_type, uint64_t stream_id, std::string op_type, uint32_t flag,
|
||||
std::vector<uint32_t> input_data_types, std::vector<std::vector<size_t>> input_data_shapes,
|
||||
std::vector<uintptr_t> input_data_addrs, std::vector<uint32_t> output_data_types,
|
||||
std::vector<std::vector<size_t>> output_data_shapes, std::vector<uintptr_t> output_data_addrs,
|
||||
std::vector<uint8_t> node_def, std::vector<uint8_t> func_def)
|
||||
: IRtaskInfo(task_type, AICPU_TMP_DEF, stream_id),
|
||||
op_type_(std::move(op_type)),
|
||||
flag_(flag),
|
||||
input_data_types_(std::move(input_data_types)),
|
||||
input_data_shapes_(std::move(input_data_shapes)),
|
||||
input_data_addrs_(std::move(input_data_addrs)),
|
||||
output_data_types_(std::move(output_data_types)),
|
||||
output_data_shapes_(std::move(output_data_shapes)),
|
||||
output_data_addrs_(std::move(output_data_addrs)),
|
||||
node_def_(std::move(node_def)),
|
||||
func_def_(std::move(func_def)) {}
|
||||
~AicpuIRTaskInfo() override;
|
||||
bool SerializeIRToProto() override;
|
||||
|
||||
private:
|
||||
std::string op_type_;
|
||||
uint32_t flag_ = 0;
|
||||
std::vector<uint32_t> input_data_types_;
|
||||
std::vector<std::vector<size_t>> input_data_shapes_;
|
||||
std::vector<uintptr_t> input_data_addrs_;
|
||||
std::vector<uint32_t> output_data_types_;
|
||||
std::vector<std::vector<size_t>> output_data_shapes_;
|
||||
std::vector<uintptr_t> output_data_addrs_;
|
||||
std::vector<uint8_t> node_def_;
|
||||
std::vector<uint8_t> func_def_;
|
||||
};
|
||||
|
||||
class LabelIRTaskInfo : public IRtaskInfo {
|
||||
public:
|
||||
LabelIRTaskInfo(TaskType task_type, uint64_t stream_id, uint32_t label_id)
|
||||
: IRtaskInfo(task_type, LABEL_TMP_DEF, stream_id), label_id_(label_id) {}
|
||||
~LabelIRTaskInfo() override {}
|
||||
bool SerializeIRToProto() override;
|
||||
|
||||
private:
|
||||
uint32_t label_id_ = 0;
|
||||
};
|
||||
|
||||
class EventIRTaskInfo : public IRtaskInfo {
|
||||
public:
|
||||
EventIRTaskInfo(TaskType task_type, uint64_t stream_id, uint32_t event_id)
|
||||
: IRtaskInfo(task_type, EVENT_TMP_DEF, stream_id), event_id_(event_id) {}
|
||||
~EventIRTaskInfo() override {}
|
||||
bool SerializeIRToProto() override;
|
||||
|
||||
private:
|
||||
uint32_t event_id_ = 0;
|
||||
};
|
||||
|
||||
class HcclIRTaskInfo : public IRtaskInfo {
|
||||
public:
|
||||
HcclIRTaskInfo(TaskType task_type, uint64_t stream_id, std::string hccl_type, uintptr_t input_data_addr,
|
||||
uintptr_t output_data_addr, std::vector<uint8_t> workspace, int64_t workspace_num,
|
||||
std::vector<uint8_t> private_def, uintptr_t ops_kernel_store, int32_t count, int64_t root_id,
|
||||
int64_t op_type, int64_t data_type)
|
||||
: IRtaskInfo(task_type, HCCL_TMP_DEF, stream_id),
|
||||
hccl_type_(std::move(hccl_type)),
|
||||
input_data_addr_(input_data_addr),
|
||||
output_data_addr_(output_data_addr),
|
||||
workspace_(std::move(workspace)),
|
||||
workspace_num_(workspace_num),
|
||||
private_def_(std::move(private_def)),
|
||||
ops_kernel_store_(ops_kernel_store),
|
||||
count_(count),
|
||||
root_id_(root_id),
|
||||
op_type_(op_type),
|
||||
data_type_(data_type) {}
|
||||
~HcclIRTaskInfo() override;
|
||||
bool SerializeIRToProto() override;
|
||||
|
||||
private:
|
||||
std::string hccl_type_;
|
||||
uintptr_t input_data_addr_ = 0;
|
||||
uintptr_t output_data_addr_ = 0;
|
||||
std::vector<uint8_t> workspace_;
|
||||
int64_t workspace_num_ = 0;
|
||||
std::vector<uint8_t> private_def_;
|
||||
uintptr_t ops_kernel_store_ = 0;
|
||||
int32_t count_ = 0;
|
||||
int64_t root_id_ = 0;
|
||||
int64_t op_type_ = 0;
|
||||
int64_t data_type_ = 0;
|
||||
};
|
||||
|
||||
class ProfilerIRTaskInfo : public IRtaskInfo {
|
||||
public:
|
||||
ProfilerIRTaskInfo(TaskType task_type, uint64_t stream_id, uint64_t log_id, bool notify, uint32_t flat)
|
||||
: IRtaskInfo(task_type, PROFILER_TRACE_TMP_DEF, stream_id), log_id_(log_id), notify_(notify), flat_(flat) {}
|
||||
~ProfilerIRTaskInfo() override {}
|
||||
bool SerializeIRToProto() override;
|
||||
|
||||
private:
|
||||
uint64_t log_id_ = 0;
|
||||
bool notify_ = false;
|
||||
uint32_t flat_ = 0;
|
||||
};
|
||||
|
||||
class MemcpyAsyncIRTaskInfo : public IRtaskInfo {
|
||||
public:
|
||||
MemcpyAsyncIRTaskInfo(TaskType task_type, uint32_t stream_id, uint64_t dst, uint64_t dst_max, uint64_t src,
|
||||
uint64_t count, int64_t kind)
|
||||
: IRtaskInfo(task_type, MEMCPY_ASYNC_TMP_DEF, stream_id),
|
||||
dst_(dst),
|
||||
dst_max_(dst_max),
|
||||
src_(src),
|
||||
count_(count),
|
||||
kind_(kind) {}
|
||||
~MemcpyAsyncIRTaskInfo() override {}
|
||||
bool SerializeIRToProto() override;
|
||||
|
||||
private:
|
||||
uint64_t dst_ = 0;
|
||||
uint64_t dst_max_ = 0;
|
||||
uint64_t src_ = 0;
|
||||
uint64_t count_ = 0;
|
||||
uint32_t kind_ = 0;
|
||||
};
|
||||
|
||||
class StreamSwitchIRTaskInfo : public IRtaskInfo {
|
||||
public:
|
||||
StreamSwitchIRTaskInfo(TaskType task_type, uint64_t stream_id, uint32_t true_stream_id, uintptr_t input_addr,
|
||||
uintptr_t value_addr, uint32_t cond, int64_t data_type)
|
||||
: IRtaskInfo(task_type, STREAM_SWITCH_TMP_DEF, stream_id),
|
||||
true_stream_id_(true_stream_id),
|
||||
input_addr_(input_addr),
|
||||
value_addr_(value_addr),
|
||||
cond_(cond),
|
||||
data_type_(data_type) {}
|
||||
~StreamSwitchIRTaskInfo() override {}
|
||||
bool SerializeIRToProto() override;
|
||||
|
||||
private:
|
||||
uint32_t true_stream_id_ = 0;
|
||||
uintptr_t input_addr_ = 0;
|
||||
uintptr_t value_addr_ = 0;
|
||||
uint32_t cond_ = 0;
|
||||
int64_t data_type_ = 0;
|
||||
};
|
||||
|
||||
class StreamActiveIRTaskInfo : public IRtaskInfo {
|
||||
public:
|
||||
StreamActiveIRTaskInfo(TaskType task_type, uint64_t stream_id, uint32_t active_stream_id)
|
||||
: IRtaskInfo(task_type, STREAM_ACTIVE_TMP_DEF, stream_id), active_stream_id_(active_stream_id) {}
|
||||
~StreamActiveIRTaskInfo() override {}
|
||||
bool SerializeIRToProto() override;
|
||||
|
||||
private:
|
||||
uint32_t active_stream_id_ = 0;
|
||||
};
|
||||
}; // namespace generator
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_EXECUTOR_GENERATOR_IR_IR_TASK_H_
|
|
@ -1,43 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "predict/generator/utils/ir_model_util.h"
|
||||
namespace mindspore {
|
||||
namespace generator {
|
||||
IRModelUtil &IRModelUtil::GetInstance() {
|
||||
static IRModelUtil instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
void IRModelUtil::Init() {
|
||||
MS_LOG(INFO) << "IRModel init success";
|
||||
version_ = "defaultVersion";
|
||||
stream_num_ = 0;
|
||||
event_num_ = 0;
|
||||
batch_num_ = 0;
|
||||
memory_size_ = 0;
|
||||
weight_size_ = 0;
|
||||
var_size_ = 0;
|
||||
logic_mem_base_ = 0;
|
||||
logic_var_base_ = 0;
|
||||
logic_var_base_ = 0;
|
||||
priority_ = 0;
|
||||
is_enable_save_model_ = false;
|
||||
min_static_offset_ = 0;
|
||||
max_dynamic_offset_ = 0;
|
||||
}
|
||||
} // namespace generator
|
||||
} // namespace mindspore
|
|
@ -1,92 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_PREDICT_GENERATOR_IR_IR_MODEL_UTIL_H_
|
||||
#define MINDSPORE_CCSRC_PREDICT_GENERATOR_IR_IR_MODEL_UTIL_H_
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include <algorithm>
|
||||
#include "utils/log_adapter.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace generator {
|
||||
class IRModelUtil {
|
||||
public:
|
||||
static IRModelUtil &GetInstance();
|
||||
IRModelUtil(const IRModelUtil &) = delete;
|
||||
IRModelUtil &operator=(const IRModelUtil &) = delete;
|
||||
void Init();
|
||||
|
||||
void set_version(const std::string &version) { version_ = version; }
|
||||
void set_stream_num(uint32_t stream_num) { stream_num_ = stream_num; }
|
||||
void set_event_num(uint32_t event_num) { event_num_ = event_num; }
|
||||
void set_batch_num(uint32_t batch_num) { batch_num_ = batch_num; }
|
||||
void set_memory_size(uint32_t memory_size) { memory_size_ = memory_size; }
|
||||
void set_weight_size(uint32_t weight_size) { weight_size_ = weight_size; }
|
||||
void set_var_size(uint32_t var_size) { var_size_ = var_size; }
|
||||
void set_logic_mem_base(uint32_t logic_mem_base) { logic_mem_base_ = logic_mem_base; }
|
||||
void set_logic_weight_base(uint32_t logic_weight_base) { logic_weight_base_ = logic_weight_base; }
|
||||
void set_logic_var_base(uint32_t logic_var_base) { logic_var_base_ = logic_var_base; }
|
||||
void set_priority(uint32_t priority) { priority_ = priority; }
|
||||
void set_is_enable_save_model(bool is_enable_save_model) { is_enable_save_model_ = is_enable_save_model; }
|
||||
void set_min_static_offset(uint64_t min_static_offset) { min_static_offset_ = min_static_offset; }
|
||||
void set_max_dynamic_offset(uint64_t max_dynamic_offset) { max_dynamic_offset_ = max_dynamic_offset; }
|
||||
void set_max_mem_size(uint64_t max_mem_size) { max_mem_size_ = max_mem_size; }
|
||||
void set_irmodel_mem_base(uint8_t irmodel_mem_base) { irmodel_mem_base_ = irmodel_mem_base; }
|
||||
|
||||
std::string version() const { return version_; }
|
||||
uint32_t stream_num() const { return stream_num_; }
|
||||
uint32_t event_num() const { return event_num_; }
|
||||
uint32_t batch_num() const { return batch_num_; }
|
||||
uint64_t memory_size() const { return memory_size_; }
|
||||
uint64_t weight_size() const { return weight_size_; }
|
||||
uint64_t var_size() const { return var_size_; }
|
||||
uint64_t logic_mem_base() const { return logic_mem_base_; }
|
||||
uint64_t logic_weight_base() const { return logic_weight_base_; }
|
||||
uint64_t logic_var_base() const { return logic_var_base_; }
|
||||
uint32_t priority() const { return priority_; }
|
||||
bool is_enable_save_model() const { return is_enable_save_model_; }
|
||||
uint64_t min_static_offset() const { return min_static_offset_; }
|
||||
uint64_t max_dynamic_offset() const { return max_dynamic_offset_; }
|
||||
uint64_t max_mem_size() const { return max_mem_size_; }
|
||||
uint8_t irmodel_mem_base() const { return irmodel_mem_base_; }
|
||||
|
||||
private:
|
||||
IRModelUtil() = default;
|
||||
~IRModelUtil() = default;
|
||||
std::string version_;
|
||||
uint32_t stream_num_ = 0;
|
||||
uint32_t event_num_ = 0;
|
||||
uint32_t batch_num_ = 0;
|
||||
uint64_t memory_size_ = 0;
|
||||
uint64_t weight_size_ = 0;
|
||||
uint64_t var_size_ = 0;
|
||||
uint64_t logic_mem_base_ = 0;
|
||||
uint64_t logic_weight_base_ = 0;
|
||||
uint64_t logic_var_base_ = 0;
|
||||
uint32_t priority_ = 0;
|
||||
bool is_enable_save_model_ = false;
|
||||
uint64_t min_static_offset_ = 0;
|
||||
uint64_t max_dynamic_offset_ = 0;
|
||||
uint64_t max_mem_size_ = 0;
|
||||
uint8_t irmodel_mem_base_ = 0;
|
||||
};
|
||||
} // namespace generator
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_PREDICT_GENERATOR_IR_IR_MODEL_UTIL_H_
|
|
@ -1,69 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "predict/predict.h"
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
namespace mindspore {
|
||||
namespace predictmodel {
|
||||
void StepConvertGraph(const KernelGraphPtr &kernel_graph_ptr) {
|
||||
MS_LOG(INFO) << "start convert_graph step";
|
||||
// get kernel_graph. this graph can be origin or device, depends on which steps to persistence
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
|
||||
bool save_ms_model = MsContext::GetInstance()->save_ms_model_flag();
|
||||
if (save_ms_model) {
|
||||
if (kernel_graph_ptr->inputs().empty()) {
|
||||
return;
|
||||
}
|
||||
// set convert_mode: convert cpu info or convert Davnici
|
||||
executor::Kernel2Ms::GetInstance().set_convert_mode(executor::kConvertCpuMode);
|
||||
// convert kernel_graph to sub_ms_graph
|
||||
bool ret = executor::Kernel2Ms::GetInstance().KernelGraph2MsGraph(kernel_graph_ptr);
|
||||
if (!ret) {
|
||||
MS_LOG(WARNING) << "convert to mindsporeGraph failed";
|
||||
} else {
|
||||
MS_LOG(INFO) << "convert to Graph success";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void StepConvertWeight(const std::vector<tensor::TensorPtr> &inputs) {
|
||||
MS_LOG(INFO) << "start convert_input step";
|
||||
// get all inputs tensor
|
||||
bool save_ms_model = MsContext::GetInstance()->save_ms_model_flag();
|
||||
std::string save_path = MsContext::GetInstance()->save_ms_model_path();
|
||||
if (save_ms_model) {
|
||||
if (inputs.empty()) {
|
||||
return;
|
||||
}
|
||||
MS_LOG(INFO) << "save ms model is true to path " << save_path;
|
||||
if (!executor::Kernel2Ms::GetInstance().KernelInput2MS(inputs)) {
|
||||
MS_LOG(WARNING) << "convert mindspore kernel input failed";
|
||||
}
|
||||
auto new_ms_graph_ptr = std::make_shared<mindspore::predict::GraphDefT>();
|
||||
bool ret = executor::Kernel2Ms::GetInstance().SaveDeviceModel(new_ms_graph_ptr, save_path);
|
||||
if (!ret) {
|
||||
MS_LOG(WARNING) << "convert to mindsporeGraph failed";
|
||||
} else {
|
||||
MS_LOG(INFO) << "save ms model success";
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace predictmodel
|
||||
} // namespace mindspore
|
|
@ -1,32 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_PREDICT_H_
|
||||
#define MINDSPORE_CCSRC_PREDICT_H_
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include "backend/session/session_basic.h"
|
||||
#include "predict/converter/kernel2ms.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace predictmodel {
|
||||
using KernelGraphPtr = std::shared_ptr<mindspore::session::KernelGraph>;
|
||||
void StepConvertGraph(const KernelGraphPtr &kernel_graph_ptr);
|
||||
void StepConvertWeight(const std::vector<tensor::TensorPtr> &inputs);
|
||||
} // namespace predictmodel
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_PREDICT_H_
|
|
@ -1,42 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
syntax = "proto3";
|
||||
import public "Graph_ir.proto";
|
||||
import public "ge_runtime_taskinfo.proto";
|
||||
package ge.model_runner;
|
||||
option cc_enable_arenas = true;
|
||||
|
||||
message ModelTaskDef {
|
||||
|
||||
string version = 1;
|
||||
|
||||
repeated TaskDef task = 10;
|
||||
|
||||
uint32 stream_num = 11;
|
||||
uint32 event_num = 12;
|
||||
uint32 batch_num_ = 13;
|
||||
|
||||
uint64 memory_size = 14;
|
||||
uint64 weight_size = 15;
|
||||
uint64 var_size_ = 16;
|
||||
|
||||
uint64 logic_mem_base_ = 17;
|
||||
uint64 logic_weight_base_ = 18;
|
||||
uint64 logic_var_base_ = 19;
|
||||
|
||||
uint32 priority_ = 20;
|
||||
}
|
|
@ -1,125 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
syntax = "proto3";
|
||||
|
||||
package mindspore;
|
||||
|
||||
// Data type definition
|
||||
enum DataType {
|
||||
DT_UNDEFINED = 0;
|
||||
// Basic types.
|
||||
DT_BOOL = 1; // bool
|
||||
|
||||
DT_INT8 = 2; // int8_t
|
||||
DT_INT16 = 3; // int16_t
|
||||
DT_INT32 = 4; // int32_t
|
||||
DT_INT64 = 5; // int64_t
|
||||
|
||||
DT_UINT8 = 6; // uint8_t
|
||||
DT_UINT16 = 7; // uint16_t
|
||||
DT_UINT32 = 8; // uint32_t
|
||||
DT_UINT64 = 9; // uint64_t
|
||||
|
||||
DT_FLOAT16 = 10; // float 16
|
||||
DT_FLOAT32 = 11; // float 32
|
||||
DT_FLOAT64 = 12; // float 64
|
||||
|
||||
DT_STRING = 13; // string
|
||||
DT_TENSOR = 14; // tensor
|
||||
DT_GRAPH = 15; // graph
|
||||
|
||||
// list type
|
||||
DT_BOOLS = 16; // list of bool
|
||||
|
||||
DT_INTS8 = 17; // list of int8_t
|
||||
DT_INTS16 = 18; // list of int16_t
|
||||
DT_INTS32 = 19; // list of int32_t
|
||||
DT_INTS64 = 20; // list of int64_t
|
||||
|
||||
DT_UINTS8 = 21; // list of uint8_t
|
||||
DT_UINTS16 = 22; // list of uint16_t
|
||||
DT_UINTS32 = 23; // list of uint32_t
|
||||
DT_UINTS64 = 24; // list of uint64_t
|
||||
|
||||
DT_FLOATS16 = 25; // list of float16
|
||||
DT_FLOATS32 = 26; // list of float32
|
||||
DT_FLOATS64 = 27; // list of float64
|
||||
|
||||
DT_STRINGS = 28; // list of string
|
||||
DT_TENSORS = 29; // list of tensor
|
||||
DT_GRAPHS = 30; // list of graph
|
||||
|
||||
DT_TUPLE = 31; // tuple
|
||||
DT_LIST = 32; // list
|
||||
DT_DICT = 33; // dictionary
|
||||
|
||||
// other types
|
||||
DT_NONE = 34; // None
|
||||
DT_SYM_INST = 35; // Symbolic Key Instance
|
||||
|
||||
// type related type
|
||||
DT_BASE_INT = 36; // type generic int
|
||||
DT_BASE_UINT = 37; // type generate unsigned int
|
||||
DT_BASE_FLOAT = 38; // type generate float
|
||||
DT_TYPE = 39; // type type
|
||||
DT_ANYTHING = 40; // type anything
|
||||
};
|
||||
|
||||
enum MSConst {
|
||||
DEFAULT_REFCOUNT = 0;
|
||||
WEIGHT_REFCOUNT = 999;
|
||||
};
|
||||
|
||||
message TensorDef {
|
||||
DataType data_type = 1;
|
||||
|
||||
repeated int64 dims = 2;
|
||||
|
||||
string format = 3;
|
||||
string layout = 4;
|
||||
uint32 refCount = 5;
|
||||
uint64 offset = 6;
|
||||
uint64 size = 7;
|
||||
uint64 weight_size = 8;
|
||||
bytes data = 9;
|
||||
}
|
||||
|
||||
message OpDef {
|
||||
string name = 1;
|
||||
string type = 2;
|
||||
|
||||
string fwk_type = 3;
|
||||
string opAttr = 4;
|
||||
repeated int64 input_index = 5;
|
||||
repeated int64 output_index = 6;
|
||||
}
|
||||
|
||||
message GraphDef {
|
||||
string name = 1;
|
||||
|
||||
repeated int64 input_index = 2;
|
||||
|
||||
repeated int64 output_index = 3;
|
||||
uint64 mempool_size = 4;
|
||||
|
||||
repeated OpDef opdefs = 5;
|
||||
|
||||
repeated TensorDef alltensors = 6;
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -1,155 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
syntax = "proto3";
|
||||
|
||||
package ge.model_runner;
|
||||
option cc_enable_arenas = true;
|
||||
|
||||
message TaskDef {
|
||||
enum TaskType {
|
||||
CCE = 0;
|
||||
TBE = 1;
|
||||
AICPU = 2;
|
||||
LABEL_SET = 3;
|
||||
LABEL_SWITCH = 4;
|
||||
LABEL_GOTO = 5;
|
||||
EVENT_RECORD = 6;
|
||||
EVENT_WAIT = 7;
|
||||
FUSION_START = 8;
|
||||
FUSION_END = 9;
|
||||
HCCL = 10;
|
||||
PROFILER_TRACE = 11;
|
||||
MEMCPY_ASYNC = 12;
|
||||
STREAM_SWITCH = 13;
|
||||
STREAM_ACTIVE = 14;
|
||||
// insert new task type here
|
||||
REVSERVED = 23;
|
||||
};
|
||||
|
||||
TaskType task_type = 1;
|
||||
uint64 stream_id = 2;
|
||||
oneof subclass {
|
||||
CceTaskDef cce_task_def = 3;
|
||||
TbeTaskDef tbe_task_def = 4;
|
||||
AicpuTaskDef aicpu_task_def = 5;
|
||||
LabelTaskDef label_task_def = 6;
|
||||
EventTaskDef event_task_def = 7;
|
||||
HcclTaskDef hccl_task_def = 8;
|
||||
ProfilerTaskDef profiler_task_def = 9;
|
||||
MemcpyAsyncTaskDef memcpy_async_task_def = 10;
|
||||
StreamSwitchTaskDef stream_switch_task_def = 11;
|
||||
StreamActiveTaskDef stream_active_task_def = 12;
|
||||
}
|
||||
}
|
||||
|
||||
message CceTaskDef {
|
||||
KernelContext kernel_context = 1;
|
||||
string stub_func = 2;
|
||||
uint32 block_dim = 3;
|
||||
bytes args = 4;
|
||||
uint32 args_size = 5;
|
||||
bytes sm_desc = 6;
|
||||
bytes flow_table = 7;
|
||||
}
|
||||
|
||||
message TbeTaskDef {
|
||||
string stub_func = 1;
|
||||
uint32 block_dim = 2;
|
||||
bytes args = 3;
|
||||
uint32 args_size = 4;
|
||||
bytes sm_desc = 5;
|
||||
bytes meta_data = 8;
|
||||
repeated uint64 input_addrs = 9;
|
||||
repeated uint64 output_addrs = 10;
|
||||
repeated uint64 workspace_addrs = 11;
|
||||
}
|
||||
|
||||
message AicpuTaskDef {
|
||||
string op_type = 1;
|
||||
uint32 flag = 2;
|
||||
repeated uint32 input_types = 3;
|
||||
repeated Shape input_shapes = 4;
|
||||
repeated uint64 input_addrs = 5;
|
||||
repeated uint32 output_types = 6;
|
||||
repeated Shape output_shapes = 7;
|
||||
repeated uint64 output_addrs = 8;
|
||||
bytes node_def = 9;
|
||||
bytes func_def = 10;
|
||||
}
|
||||
|
||||
message Shape {
|
||||
repeated uint32 shape = 1;
|
||||
}
|
||||
|
||||
message LabelTaskDef {
|
||||
uint32 label_id = 1;
|
||||
}
|
||||
|
||||
message EventTaskDef {
|
||||
uint32 event_id = 1;
|
||||
}
|
||||
|
||||
message HcclTaskDef {
|
||||
string hccl_type = 1;
|
||||
uint64 input_addr = 2;
|
||||
uint64 output_addr = 3;
|
||||
bytes workspace = 4;
|
||||
int64 workspace_num = 5;
|
||||
bytes private_def = 6;
|
||||
uint64 ops_kernel_store = 7;
|
||||
int32 count = 8;
|
||||
int64 root_id = 9;
|
||||
int64 op_type = 10;
|
||||
int64 data_type = 11;
|
||||
}
|
||||
|
||||
message ProfilerTaskDef {
|
||||
uint64 log_id = 1;
|
||||
bool notify = 2;
|
||||
uint32 flat = 3;
|
||||
}
|
||||
|
||||
message MemcpyAsyncTaskDef {
|
||||
uint64 dst = 1;
|
||||
uint64 dst_max = 2;
|
||||
uint64 src = 3;
|
||||
uint64 count = 4;
|
||||
uint32 kind = 5;
|
||||
}
|
||||
|
||||
message StreamSwitchTaskDef {
|
||||
uint32 true_stream_id = 1;
|
||||
uint64 input_addr = 2;
|
||||
uint64 value_addr = 3;
|
||||
int64 cond = 4;
|
||||
int64 data_type = 5;
|
||||
}
|
||||
|
||||
message StreamActiveTaskDef {
|
||||
uint32 active_stream_id = 1;
|
||||
}
|
||||
|
||||
message KernelContext {
|
||||
uint32 kernel_type = 1;
|
||||
uint32 op_id = 2;
|
||||
uint32 kernel_func_id = 3;
|
||||
uint32 op_index = 4;
|
||||
bool is_flowtable = 5;
|
||||
bytes args_offset = 6;
|
||||
uint32 args_count = 7;
|
||||
repeated uint32 origin_op_index = 8;
|
||||
}
|
|
@ -1,17 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
this is a dictory for predict including saving model &&& saving taskinfos.
|
|
@ -1 +0,0 @@
|
|||
this is a dictory for predict to gen fbs headers
|
|
@ -1,212 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
include "op.fbs";
|
||||
|
||||
namespace mindspore.predict;
|
||||
|
||||
enum MSCONST: int {
|
||||
WEIGHT_REFCOUNT = 999
|
||||
}
|
||||
|
||||
table QuantParam {
|
||||
scale: double;
|
||||
zeroPoint: int;
|
||||
min: double = 0;
|
||||
max: double = 0;
|
||||
narrowRange: bool = true;
|
||||
numBits: int = 8;
|
||||
}
|
||||
|
||||
table QuantParamArray {
|
||||
param: [QuantParam]; //pre-channel
|
||||
}
|
||||
|
||||
table TensorDef {
|
||||
// data type
|
||||
dataType: DataType;
|
||||
// shape
|
||||
dims: [int];
|
||||
format: Format;
|
||||
refCount: int;
|
||||
offset: int;
|
||||
data: [ubyte];
|
||||
}
|
||||
|
||||
union OpT {
|
||||
Concat,
|
||||
SoftMax,
|
||||
Activation,
|
||||
Conv2D,
|
||||
FusedBatchNorm,
|
||||
CaffeBatchNorm,
|
||||
BiasAdd,
|
||||
Pooling,
|
||||
DepthwiseConv2D,
|
||||
DeDepthwiseConv2D,
|
||||
Resize,
|
||||
DetectionPostProcess,
|
||||
FullConnection,
|
||||
Mean,
|
||||
DeConv2D,
|
||||
Scale,
|
||||
Reshape,
|
||||
Eltwise,
|
||||
NetOutput,
|
||||
Add,
|
||||
Sub,
|
||||
MatMul,
|
||||
StridedSlice,
|
||||
Power,
|
||||
Slice,
|
||||
Stack,
|
||||
Mul,
|
||||
RealDiv,
|
||||
Pad,
|
||||
Maximum,
|
||||
Minimum,
|
||||
CaffePReLU,
|
||||
LeakyReLU,
|
||||
ArgMax,
|
||||
ArgMin,
|
||||
Exp,
|
||||
CaffeCrop,
|
||||
Range,
|
||||
Rsqrt,
|
||||
ExpandDims,
|
||||
Tile,
|
||||
Cast,
|
||||
Shape,
|
||||
Nchw2Nhwc,
|
||||
Nhwc2Nchw,
|
||||
QuantDTypeCast,
|
||||
Split,
|
||||
Permute,
|
||||
FakeQuantWithMinMaxVars,
|
||||
Equal,
|
||||
Less,
|
||||
Greater,
|
||||
Min,
|
||||
Floor,
|
||||
Abs,
|
||||
Neg,
|
||||
Cos,
|
||||
Sin,
|
||||
Sqrt,
|
||||
Square,
|
||||
Constant,
|
||||
Log,
|
||||
Tan,
|
||||
Atan,
|
||||
Asin,
|
||||
Clip,
|
||||
Transpose,
|
||||
Squeeze,
|
||||
Unsqueeze,
|
||||
Upsample,
|
||||
Dropout,
|
||||
Broadcast,
|
||||
Lrn,
|
||||
Prelu,
|
||||
ZerosLike,
|
||||
TopK,
|
||||
SpaceToDepth,
|
||||
SpaceToBatch,
|
||||
SparseToDense,
|
||||
ReverseSequence,
|
||||
Rank,
|
||||
Gather,
|
||||
GatherNd,
|
||||
Fill,
|
||||
Elu,
|
||||
DepthToSpace,
|
||||
BatchToSpace,
|
||||
AddN,
|
||||
Ceil,
|
||||
EmbeddingLookup,
|
||||
EmbeddingLookupSparse,
|
||||
FloorDiv,
|
||||
FloorMod,
|
||||
L2Norm,
|
||||
LocalResponseNormalization,
|
||||
MatrixDiag,
|
||||
Reduce,
|
||||
Reverse,
|
||||
Round,
|
||||
Select,
|
||||
Scatter,
|
||||
Unique,
|
||||
Unstack,
|
||||
LogicalAnd,
|
||||
LogicalOr,
|
||||
LogicalXor,
|
||||
LogicalNot,
|
||||
OnnxInt8Quantize,
|
||||
OnnxInt8Dequantize,
|
||||
FakeQuantWithMinMax,
|
||||
FakeQuantWithMinMaxPerChannel,
|
||||
BatchNormFold,
|
||||
MulFold,
|
||||
AddFold,
|
||||
SquaredDifference
|
||||
}
|
||||
|
||||
enum QuantType: int {
|
||||
QUANT_NONE,
|
||||
AwareTrainning,
|
||||
WeightQuant,
|
||||
PostTraining
|
||||
}
|
||||
|
||||
enum FmkType: int {
|
||||
TF,
|
||||
CAFFE,
|
||||
ONNX,
|
||||
MS,
|
||||
TFLITE
|
||||
}
|
||||
|
||||
table OpDef {
|
||||
name: string;
|
||||
fmkType: FmkType;
|
||||
attr: OpT;
|
||||
inputIndex: [uint];
|
||||
outputIndex: [uint];
|
||||
quantType: QuantType = QUANT_NONE;
|
||||
quantParam: [QuantParamArray];
|
||||
}
|
||||
|
||||
table SubGraphDef {
|
||||
name: string;
|
||||
inputIndex: [uint];
|
||||
outputIndex: [uint];
|
||||
mempoolSize: uint;
|
||||
nodes: [OpDef];
|
||||
allTensors: [TensorDef]; // weight + input + output
|
||||
}
|
||||
|
||||
table MempoolCfg {
|
||||
size: uint;
|
||||
shiftFactor: uint;
|
||||
}
|
||||
|
||||
table GraphDef {
|
||||
name: string;
|
||||
mempoolCfg: MempoolCfg;
|
||||
subgraphs: [SubGraphDef];
|
||||
}
|
||||
|
||||
root_type GraphDef;
|
|
@ -1,699 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
namespace mindspore.predict;
|
||||
|
||||
enum ResizeMethod: byte {
|
||||
UNKNOW = -1,
|
||||
BILINEAR = 0,
|
||||
NEAREST_NEIGHBOR = 1
|
||||
}
|
||||
|
||||
enum DataType : int {
|
||||
DT_FLOAT = 0,
|
||||
DT_FLOAT16 = 1,
|
||||
DT_INT8 = 2,
|
||||
DT_INT32 = 3,
|
||||
DT_UINT8 = 4,
|
||||
DT_INT16 = 5,
|
||||
DT_UINT32 = 8,
|
||||
DT_INT64 = 9,
|
||||
DT_UINT16 = 10,
|
||||
DT_UNDEFINED = 16
|
||||
}
|
||||
|
||||
enum Format : int {
|
||||
NCHW = 0,
|
||||
NHWC,
|
||||
HWKC,
|
||||
HWCK,
|
||||
KCHW,
|
||||
CKHW,
|
||||
KHWC,
|
||||
CHWK,
|
||||
NC4HW4 = 100,
|
||||
NUM_OF_FORMAT
|
||||
}
|
||||
|
||||
enum ActivationType : byte {
|
||||
NO_ACTIVATION = 0,
|
||||
RELU = 1,
|
||||
SIGMOID = 2,
|
||||
RELU6 = 3,
|
||||
ELU = 4,
|
||||
LEAKY_RELU = 5,
|
||||
ABS = 6,
|
||||
RELU1 = 7,
|
||||
SOFTSIGN = 8,
|
||||
SOFTPLUS = 9,
|
||||
TANH = 10,
|
||||
SELU = 11,
|
||||
HSWISH = 12,
|
||||
HSIGMOID = 13,
|
||||
THRESHOLDRELU = 14,
|
||||
LINEAR = 15,
|
||||
UNKNOW = 16
|
||||
}
|
||||
|
||||
enum ReduceType : byte {
|
||||
REDUCE_MAX = 0,
|
||||
REDUCE_MEAN = 1,
|
||||
REDUCE_ALL = 2,
|
||||
REDUCE_ANY = 3,
|
||||
REDUCE_LOG_SUM_EXP = 4,
|
||||
REDUCE_PROD = 5,
|
||||
REDUCE_SUM = 6,
|
||||
UNKNOW = 7
|
||||
}
|
||||
|
||||
enum PoolMode : byte {
|
||||
MAX_POOLING = 0,
|
||||
MEAN_POOLING = 1,
|
||||
}
|
||||
|
||||
enum EltwiseMode : byte {
|
||||
PROD = 0,
|
||||
SUM = 1,
|
||||
MAXIMUM = 2,
|
||||
UNKNOW = 3
|
||||
}
|
||||
|
||||
enum PadMode : byte {
|
||||
NOTSET = 0,
|
||||
SAME = 1,
|
||||
VALID = 2,
|
||||
CAFFE = 4
|
||||
}
|
||||
|
||||
enum RoundMode : byte {
|
||||
FLOOR = 0,
|
||||
CEIL = 1
|
||||
}
|
||||
|
||||
enum PaddingMode : byte {
|
||||
CONSTANT = 0,
|
||||
REFLECT = 1,
|
||||
SYMMETRIC = 2,
|
||||
MODE_RESERVED = 3
|
||||
}
|
||||
|
||||
table Pad {
|
||||
paddingmode: PaddingMode;
|
||||
paddings: [int];
|
||||
}
|
||||
|
||||
table Maximum {
|
||||
}
|
||||
|
||||
table Minimum {
|
||||
}
|
||||
|
||||
table Concat {
|
||||
axis: int;
|
||||
n: int;
|
||||
}
|
||||
|
||||
table SoftMax {
|
||||
axis: [int];
|
||||
}
|
||||
|
||||
table Activation {
|
||||
type: ActivationType = 0;
|
||||
}
|
||||
|
||||
table Conv2D {
|
||||
format: Format = 0;
|
||||
group: int;
|
||||
channelIn: int;
|
||||
channelOut: int;
|
||||
kernelW: int;
|
||||
kernelH: int;
|
||||
strideW: int;
|
||||
strideH: int;
|
||||
padMode: PadMode;
|
||||
padUp: int;
|
||||
padDown: int;
|
||||
padLeft: int;
|
||||
padRight: int;
|
||||
dilateW: int;
|
||||
dilateH: int;
|
||||
hasBias: bool = false;
|
||||
activationType: ActivationType = 0;
|
||||
}
|
||||
|
||||
table FusedBatchNorm {
|
||||
epsilon: float = 0.00001; // eg. epsilon=0.001
|
||||
momentum: float = 0.9;
|
||||
spatial: int = 1;
|
||||
}
|
||||
|
||||
table CaffeBatchNorm {
|
||||
epsilon: float; // eg. epsilon=0.001
|
||||
}
|
||||
|
||||
table Shape {
|
||||
}
|
||||
|
||||
table Nchw2Nhwc {
|
||||
|
||||
}
|
||||
|
||||
table Nhwc2Nchw {
|
||||
|
||||
}
|
||||
|
||||
table FakeQuantWithMinMaxVars {
|
||||
narrowRange: bool;
|
||||
numBits: int;
|
||||
}
|
||||
|
||||
table BiasAdd {
|
||||
axis: [int];
|
||||
}
|
||||
|
||||
table Pooling {
|
||||
format: Format = 0;
|
||||
poolingMode: PoolMode;
|
||||
global: bool = false;
|
||||
windowW: int;
|
||||
windowH: int;
|
||||
strideW: int;
|
||||
strideH: int;
|
||||
padMode: PadMode;
|
||||
padUp: int;
|
||||
padDown: int;
|
||||
padLeft: int;
|
||||
padRight: int;
|
||||
roundMode: RoundMode;
|
||||
}
|
||||
|
||||
table DepthwiseConv2D {
|
||||
format: Format = 0;
|
||||
channelIn: int;
|
||||
channelMultiplier: int;
|
||||
kernelW: int;
|
||||
kernelH: int;
|
||||
strideW: int;
|
||||
strideH: int;
|
||||
padMode: PadMode;
|
||||
padUp: int;
|
||||
padDown: int;
|
||||
padLeft: int;
|
||||
padRight: int;
|
||||
dilateW: int;
|
||||
dilateH: int;
|
||||
hasBias: bool = false;
|
||||
activationType: ActivationType = 0;
|
||||
}
|
||||
|
||||
table DeDepthwiseConv2D {
|
||||
format: Format = 0;
|
||||
channelIn: int;
|
||||
channelMultiplier: int;
|
||||
kernelW: int;
|
||||
kernelH: int;
|
||||
strideW: int;
|
||||
strideH: int;
|
||||
padMode: PadMode;
|
||||
padUp: int;
|
||||
padDown: int;
|
||||
padLeft: int;
|
||||
padRight: int;
|
||||
dilateW: int;
|
||||
dilateH: int;
|
||||
hasBias: bool = false;
|
||||
activationType: ActivationType = 0;
|
||||
}
|
||||
|
||||
|
||||
table Resize {
|
||||
format: Format = 0;
|
||||
method: ResizeMethod;
|
||||
newHeight: long;
|
||||
newWidth: long;
|
||||
alignCorners: bool = false;
|
||||
preserveAspectRatio: bool = false;
|
||||
}
|
||||
|
||||
table DetectionPostProcess {
|
||||
format: Format = 0;
|
||||
inputSize: int;
|
||||
hScale: float;
|
||||
wScale: float;
|
||||
xScale: float;
|
||||
yScale: float;
|
||||
NmsIouThreshold: float;
|
||||
NmsScoreThreshold: float;
|
||||
MaxDetections: long;
|
||||
DetectionsPreClass: long;
|
||||
MaxClassesPreDetection: long;
|
||||
NumClasses: long;
|
||||
UseRegularNms: bool;
|
||||
}
|
||||
|
||||
table FullConnection {
|
||||
hasBias: bool;
|
||||
axis: int;
|
||||
}
|
||||
|
||||
// Mean(input_tensor, axis, keep_dims)
|
||||
table Mean {
|
||||
axis: [int];
|
||||
keepDims: bool = false;
|
||||
}
|
||||
|
||||
table DeConv2D {
|
||||
format: Format = 0;
|
||||
group: int;
|
||||
channelIn: int;
|
||||
channelOut: int;
|
||||
kernelW: int;
|
||||
kernelH: int;
|
||||
strideW: int;
|
||||
strideH: int;
|
||||
padMode: PadMode;
|
||||
padUp: int;
|
||||
padDown: int;
|
||||
padLeft: int;
|
||||
padRight: int;
|
||||
dilateW: int;
|
||||
dilateH: int;
|
||||
hasBias: bool = false;
|
||||
activationType: ActivationType = 0;
|
||||
}
|
||||
|
||||
table Scale {
|
||||
format: Format = 0;
|
||||
}
|
||||
|
||||
table Eltwise {
|
||||
mode: EltwiseMode;
|
||||
}
|
||||
|
||||
table Add {
|
||||
}
|
||||
|
||||
table Sub {
|
||||
}
|
||||
|
||||
table Mul {
|
||||
}
|
||||
|
||||
table RealDiv {
|
||||
}
|
||||
|
||||
table Rsqrt {
|
||||
}
|
||||
|
||||
table Equal {
|
||||
}
|
||||
|
||||
table Less {
|
||||
}
|
||||
|
||||
table Greater {
|
||||
}
|
||||
|
||||
table Min {
|
||||
}
|
||||
|
||||
table Slice {
|
||||
format: Format = 0;
|
||||
begin: [int];
|
||||
size: [int];
|
||||
}
|
||||
|
||||
table Floor {
|
||||
}
|
||||
|
||||
table Abs {
|
||||
}
|
||||
|
||||
table Neg {
|
||||
}
|
||||
|
||||
table Exp {
|
||||
}
|
||||
|
||||
table Cos {
|
||||
}
|
||||
|
||||
table Sin {
|
||||
}
|
||||
|
||||
table Sqrt {
|
||||
}
|
||||
|
||||
table Square {
|
||||
}
|
||||
|
||||
table Ceil {
|
||||
}
|
||||
|
||||
table Log {
|
||||
}
|
||||
|
||||
table Tan {
|
||||
}
|
||||
|
||||
table Atan {
|
||||
}
|
||||
|
||||
table Asin {
|
||||
}
|
||||
|
||||
table Reshape {
|
||||
format: Format = 0;
|
||||
shape: [long];
|
||||
}
|
||||
|
||||
table Power {
|
||||
power: float;
|
||||
scale: float;
|
||||
shift: float;
|
||||
}
|
||||
|
||||
table ArgMax {
|
||||
axis: int;
|
||||
outMaxValue: bool;
|
||||
topK: int = 1;
|
||||
keepDims: bool;
|
||||
axisType: int;
|
||||
}
|
||||
|
||||
table ArgMin {
|
||||
axis: int;
|
||||
outMaxValue: bool;
|
||||
topK: int = 1;
|
||||
keepDims: bool;
|
||||
axisType: int;
|
||||
}
|
||||
|
||||
table NetOutput {
|
||||
}
|
||||
|
||||
table MatMul {
|
||||
transposeA : bool = false;
|
||||
transposeB : bool = false;
|
||||
}
|
||||
|
||||
table CaffePReLU {
|
||||
channelShared : bool = false;
|
||||
}
|
||||
|
||||
table LeakyReLU {
|
||||
negativeSlope: float;
|
||||
}
|
||||
|
||||
table StridedSlice {
|
||||
beginMask: int;
|
||||
endMask: int;
|
||||
ellipsisMask: int;
|
||||
newAxisMask: int;
|
||||
shrinkAxisMask: int;
|
||||
begin: [int];
|
||||
end: [int];
|
||||
stride: [int];
|
||||
isScale: [int];
|
||||
}
|
||||
|
||||
table Stack {
|
||||
axis: int;
|
||||
n: int;
|
||||
isScale: [int];
|
||||
}
|
||||
|
||||
table Range {
|
||||
dType: DataType;
|
||||
start: int;
|
||||
limit: int;
|
||||
delta: int;
|
||||
}
|
||||
|
||||
table ExpandDims {
|
||||
dim: int;
|
||||
}
|
||||
|
||||
table Tile {
|
||||
multiples: [int];
|
||||
}
|
||||
|
||||
table Cast {
|
||||
srcT: int;
|
||||
dstT: int;
|
||||
}
|
||||
|
||||
table QuantDTypeCast {
|
||||
srcT: DataType;
|
||||
dstT: DataType;
|
||||
}
|
||||
|
||||
table Split {
|
||||
numberSplit: int;
|
||||
sizeSplits: [int];
|
||||
splitDim: int;
|
||||
}
|
||||
|
||||
table CaffeCrop {
|
||||
axis : long;
|
||||
offsets : [long];
|
||||
}
|
||||
|
||||
table Permute {
|
||||
order: [long];
|
||||
}
|
||||
|
||||
table Clip {
|
||||
max: float;
|
||||
min: float;
|
||||
}
|
||||
|
||||
table Constant {
|
||||
}
|
||||
|
||||
|
||||
table Elu {
|
||||
alpha: float = 1.0;
|
||||
}
|
||||
|
||||
table Broadcast {
|
||||
}
|
||||
|
||||
table Lrn {
|
||||
alpha: float = 0.0001;
|
||||
beta: float = 0.75;
|
||||
bias: float = 1.0;
|
||||
size: int;
|
||||
}
|
||||
|
||||
enum ReduceMode : byte {
|
||||
ReduceMean = 0,
|
||||
ReduceMax = 1,
|
||||
ReduceMin = 2,
|
||||
ReduceProd = 3,
|
||||
ReduceSum = 4,
|
||||
ReduceSumSquare = 5
|
||||
}
|
||||
|
||||
table Reduce {
|
||||
axes: [int];
|
||||
keepDims: int;
|
||||
mode: ReduceMode;
|
||||
}
|
||||
|
||||
table Prelu {
|
||||
slope: [float];
|
||||
}
|
||||
|
||||
table Transpose {
|
||||
perm: [int];
|
||||
conjugate: bool = false;
|
||||
}
|
||||
|
||||
table Squeeze {
|
||||
axis: [int];
|
||||
}
|
||||
|
||||
table Unsqueeze {
|
||||
axis: [int];
|
||||
}
|
||||
|
||||
table Upsample {
|
||||
mode: string;
|
||||
scales: [float];
|
||||
}
|
||||
|
||||
table Dropout {
|
||||
ratio : float = 0.5;
|
||||
}
|
||||
|
||||
table LocalResponseNormalization {
|
||||
depth_radius: int;
|
||||
bias: float;
|
||||
alpha: float;
|
||||
beta: float;
|
||||
}
|
||||
|
||||
table ZerosLike {
|
||||
}
|
||||
|
||||
table TopK {
|
||||
k : int;
|
||||
sorted : bool = true;
|
||||
}
|
||||
|
||||
table SpaceToDepth {
|
||||
blockSize : int;
|
||||
format: Format = 0;
|
||||
}
|
||||
|
||||
table SpaceToBatch {
|
||||
blockShape : [int];
|
||||
paddings : [int];
|
||||
}
|
||||
|
||||
table SparseToDense {
|
||||
validateIndices: bool;
|
||||
}
|
||||
|
||||
table ReverseSequence {
|
||||
seqAxis: int;
|
||||
batchAxis: int;
|
||||
}
|
||||
|
||||
table Rank {
|
||||
}
|
||||
|
||||
|
||||
table Gather {
|
||||
axis: int;
|
||||
batchDims: int;
|
||||
}
|
||||
|
||||
table GatherNd {
|
||||
batchDims: int;
|
||||
}
|
||||
|
||||
table Fill {
|
||||
dims: [int];
|
||||
}
|
||||
|
||||
table DepthToSpace {
|
||||
blockSize: int;
|
||||
format: Format = 0;
|
||||
}
|
||||
|
||||
|
||||
table BatchToSpace {
|
||||
blockShape: [int];
|
||||
crops: [int];
|
||||
}
|
||||
|
||||
table AddN {
|
||||
N: int;
|
||||
}
|
||||
|
||||
|
||||
table EmbeddingLookup {
|
||||
ids: [int];
|
||||
maxNorm: float;
|
||||
}
|
||||
|
||||
table EmbeddingLookupSparse {
|
||||
spIds: [int];
|
||||
spWeights: [float];
|
||||
//combiner: Combiner=0;
|
||||
maxNortm: float;
|
||||
}
|
||||
|
||||
table FloorDiv {
|
||||
}
|
||||
|
||||
table FloorMod {
|
||||
}
|
||||
|
||||
table L2Norm {
|
||||
axis: [int];
|
||||
epsilon: float;
|
||||
}
|
||||
|
||||
table LogicalAnd {
|
||||
}
|
||||
|
||||
table LogicalOr {
|
||||
}
|
||||
|
||||
table LogicalXor {
|
||||
}
|
||||
|
||||
table LogicalNot {
|
||||
}
|
||||
|
||||
table MatrixDiag {
|
||||
k: int;
|
||||
numRows: int;
|
||||
numCols: int;
|
||||
paddingValue: float;
|
||||
}
|
||||
|
||||
table Select {
|
||||
}
|
||||
|
||||
table TfReduce {
|
||||
type: ReduceType = 7;
|
||||
}
|
||||
|
||||
table Reverse {
|
||||
axis: [int];
|
||||
}
|
||||
|
||||
table Round {
|
||||
}
|
||||
|
||||
table Scatter {
|
||||
}
|
||||
|
||||
table Unique {
|
||||
}
|
||||
|
||||
table Unstack {
|
||||
num: int;
|
||||
axis: int;
|
||||
}
|
||||
|
||||
table OnnxInt8Quantize {
|
||||
}
|
||||
|
||||
table OnnxInt8Dequantize {
|
||||
}
|
||||
|
||||
table FakeQuantWithMinMax {
|
||||
}
|
||||
|
||||
table FakeQuantWithMinMaxPerChannel {
|
||||
}
|
||||
|
||||
table BatchNormFold {
|
||||
}
|
||||
|
||||
table MulFold {
|
||||
}
|
||||
|
||||
table AddFold {
|
||||
}
|
||||
|
||||
table SquaredDifference {
|
||||
}
|
|
@ -24,7 +24,6 @@
|
|||
#include "common/utils.h"
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
#include "runtime/device/kernel_adjust.h"
|
||||
#include "predict/generator/utils/ir_model_util.h"
|
||||
#include "backend/optimizer/common/helper.h"
|
||||
#include "utils/utils.h"
|
||||
|
||||
|
@ -53,13 +52,6 @@ void AscendStreamAssign::AssignStream(const NotNull<KernelGraphPtr> &graph_ptr)
|
|||
GetStreamRelations();
|
||||
PrintStreamGroups();
|
||||
FindEventRelations(graph_ptr);
|
||||
|
||||
// Get info for D Model
|
||||
AscendResourceMng &resource_manager = AscendResourceMng::GetInstance();
|
||||
generator::IRModelUtil::GetInstance().set_event_num(resource_manager.get_cur_event_num());
|
||||
generator::IRModelUtil::GetInstance().set_stream_num(resource_manager.get_cur_stream_num());
|
||||
// Init to 1,temporarily
|
||||
generator::IRModelUtil::GetInstance().set_batch_num(1);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -24,7 +24,6 @@
|
|||
#include "runtime/device/device_address.h"
|
||||
#include "ir/tensor.h"
|
||||
#include "utils/convert_utils.h"
|
||||
#include "predict/generator/utils/ir_model_util.h"
|
||||
#ifdef ENABLE_DUMP_E2E
|
||||
#include "debug/e2e_dump.h"
|
||||
#endif
|
||||
|
|
|
@ -50,8 +50,6 @@ std::map<std::string, MsBackendPolicy> MsContext::policy_map_ = {{"ge", kMsBacke
|
|||
MsContext::MsContext(const std::string &policy, const std::string &target) {
|
||||
save_graphs_flag_ = false;
|
||||
save_graphs_path_ = ".";
|
||||
save_ms_model_flag_ = false;
|
||||
save_ms_model_path_ = "./model.ms";
|
||||
enable_dump_ = false;
|
||||
save_dump_path_ = ".";
|
||||
tsd_ref_ = 0;
|
||||
|
|
|
@ -102,12 +102,6 @@ class MsContext {
|
|||
void set_enable_mem_reuse(bool enable_mem_reuse) { enable_mem_reuse_ = enable_mem_reuse; }
|
||||
bool enable_mem_reuse() const { return enable_mem_reuse_; }
|
||||
|
||||
bool save_ms_model_flag() const { return save_ms_model_flag_; }
|
||||
void set_save_ms_model_flag(bool save_ms_model_flag) { save_ms_model_flag_ = save_ms_model_flag; }
|
||||
|
||||
std::string save_ms_model_path() const { return save_ms_model_path_; }
|
||||
void set_save_ms_model_path(const std::string &save_ms_model_path) { save_ms_model_path_ = save_ms_model_path; }
|
||||
|
||||
void set_enable_gpu_summary(bool enable_gpu_summary) { enable_gpu_summary_ = enable_gpu_summary; }
|
||||
bool enable_gpu_summary() const { return enable_gpu_summary_; }
|
||||
|
||||
|
@ -190,8 +184,6 @@ class MsContext {
|
|||
bool enable_reduce_precision_;
|
||||
bool enable_loop_sink_;
|
||||
bool enable_mem_reuse_;
|
||||
std::string save_ms_model_path_;
|
||||
bool save_ms_model_flag_;
|
||||
bool enable_gpu_summary_;
|
||||
bool enable_dump_;
|
||||
std::string save_dump_path_;
|
||||
|
|
|
@ -234,22 +234,6 @@ class _Context:
|
|||
if not success:
|
||||
raise RuntimeError("Device id set failed!!!")
|
||||
|
||||
@property
|
||||
def save_ms_model(self):
|
||||
return self._context_handle.get_save_ms_model_flag()
|
||||
|
||||
@save_ms_model.setter
|
||||
def save_ms_model(self, save_ms_model_flag):
|
||||
self._context_handle.set_save_ms_model_flag(save_ms_model_flag)
|
||||
|
||||
@property
|
||||
def save_ms_model_path(self):
|
||||
return self._context_handle.get_save_ms_model_path()
|
||||
|
||||
@save_ms_model_path.setter
|
||||
def save_ms_model_path(self, save_ms_model_path):
|
||||
self._context_handle.set_save_ms_model_path(save_ms_model_path)
|
||||
|
||||
@property
|
||||
def enable_auto_mixed_precision(self):
|
||||
return self._context_handle.get_auto_mixed_precision_flag()
|
||||
|
@ -541,7 +525,7 @@ def reset_auto_parallel_context():
|
|||
|
||||
|
||||
@args_type_check(mode=int, precompile_only=bool, device_target=str, device_id=int, save_graphs=bool,
|
||||
save_graphs_path=str, save_ms_model=bool, save_ms_model_path=str, enable_dump=bool,
|
||||
save_graphs_path=str, enable_dump=bool,
|
||||
save_dump_path=str, enable_reduce_precision=bool, variable_memory_max_size=str,
|
||||
enable_profiling=bool, profiling_options=str, enable_auto_mixed_precision=bool,
|
||||
enable_graph_kernel=bool, check_bprop=bool, max_device_memory=str, print_file_path=str,
|
||||
|
@ -569,8 +553,6 @@ def set_context(**kwargs):
|
|||
device_id (int): Id of target device, the value must be in [0, device_num_per_host-1],
|
||||
while device_num_per_host should no more than 4096. Default: 0.
|
||||
save_graphs (bool): Whether to save graphs. Default: False.
|
||||
save_ms_model (bool): Whether to save lite model converted by graph. Default: False.
|
||||
save_ms_model_path (str): Path to save converted lite model. Default: "."
|
||||
save_graphs_path (str): Path to save graphs. Default: "."
|
||||
enable_auto_mixed_precision (bool): Whether to enable auto mixed precision. Default: True.
|
||||
enable_graph_kernel (bool): Whether to enable composition of basic primitives. These primitives would be
|
||||
|
@ -615,7 +597,6 @@ def set_context(**kwargs):
|
|||
>>> context.set_context(device_id=0)
|
||||
>>> context.set_context(save_graphs=True, save_graphs_path="./model.ms")
|
||||
>>> context.set_context(enable_reduce_precision=True)
|
||||
>>> context.set_context(save_ms_model=True, save_ms_model_path=".")
|
||||
>>> context.set_context(enable_dump=True, save_dump_path=".")
|
||||
>>> context.set_context(reserve_class_name_in_scope=True)
|
||||
>>> context.set_context(variable_memory_max_size="6GB")
|
||||
|
|
|
@ -20,7 +20,6 @@ from threading import Thread, Lock
|
|||
import numpy as np
|
||||
|
||||
import mindspore.nn as nn
|
||||
import mindspore.context as context
|
||||
from mindspore import log as logger
|
||||
from mindspore.train.checkpoint_pb2 import Checkpoint
|
||||
from mindspore.train.print_pb2 import Print
|
||||
|
@ -457,18 +456,17 @@ def export(net, *inputs, file_name, file_format='GEIR'):
|
|||
net (Cell): MindSpore network.
|
||||
inputs (Tensor): Inputs of the `net`.
|
||||
file_name (str): File name of model to export.
|
||||
file_format (str): MindSpore currently supports 'GEIR', 'ONNX' 'LITE' and 'BINARY' format for exported model.
|
||||
file_format (str): MindSpore currently supports 'GEIR', 'ONNX' and 'BINARY' format for exported model.
|
||||
|
||||
- GEIR: Graph Engine Intermidiate Representation. An intermidiate representation format of
|
||||
Ascend model.
|
||||
- ONNX: Open Neural Network eXchange. An open format built to represent machine learning models.
|
||||
- LITE: Huawei model format for mobile. A lite model only for the MindSpore Lite
|
||||
- BINARY: Binary format for model. An intermidiate representation format for models.
|
||||
"""
|
||||
logger.info("exporting model file:%s format:%s.", file_name, file_format)
|
||||
check_input_data(*inputs, data_class=Tensor)
|
||||
|
||||
supported_formats = ['GEIR', 'ONNX', 'LITE', 'BINARY']
|
||||
supported_formats = ['GEIR', 'ONNX', 'BINARY']
|
||||
if file_format not in supported_formats:
|
||||
raise ValueError(f'Illegal file format {file_format}, it must be one of {supported_formats}')
|
||||
# switch network mode to infer when it is training
|
||||
|
@ -497,9 +495,6 @@ def export(net, *inputs, file_name, file_format='GEIR'):
|
|||
with open(file_name, 'wb') as f:
|
||||
os.chmod(file_name, stat.S_IWUSR | stat.S_IRUSR)
|
||||
f.write(onnx_stream)
|
||||
elif file_format == 'LITE': # file_format is 'LITE'
|
||||
context.set_context(save_ms_model=True, save_ms_model_path=file_name)
|
||||
net(*inputs)
|
||||
# restore network training mode
|
||||
if is_training:
|
||||
net.set_train(mode=True)
|
||||
|
|
|
@ -1,14 +0,0 @@
|
|||
# git ignore file for predict
|
||||
|
||||
#flatbuf generated file
|
||||
schema/*_generated.h
|
||||
schema/inner/*_generated.h
|
||||
module/tvm_module/lite/include/*_generated.h
|
||||
|
||||
#tvm fbs files
|
||||
module/tvm_module/lite/tune/convert/*.fbs
|
||||
|
||||
#doTest dir
|
||||
test/doTest/
|
||||
|
||||
|
|
@ -1,79 +0,0 @@
|
|||
cmake_minimum_required(VERSION 3.12.1)
|
||||
project (mindspore-predict)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g")
|
||||
set(CMAKE_BUILD_TYPE "Release")
|
||||
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=hidden")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden")
|
||||
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -s")
|
||||
|
||||
option(ENABLE_ASAN "Enable Google Sanitizer to find memory bugs" OFF)
|
||||
option(ENABLE_PREDICT_ARM64 "predict arm64" OFF)
|
||||
option(ENABLE_PREDICT_ARM32 "predict arm32" OFF)
|
||||
|
||||
set(PREDICT_DIR ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
set(PREDICT_BUILD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/build)
|
||||
set(3RD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../third_party)
|
||||
set(DOTEST_DIR ${PREDICT_BUILD_DIR}/test/doTest)
|
||||
|
||||
include_directories(${3RD_DIR})
|
||||
include_directories(${3RD_DIR}/flatbuffers/include/)
|
||||
include_directories(${3RD_DIR}/protobuf/build/include/)
|
||||
include_directories(${3RD_DIR}/googletest/googletest/include/)
|
||||
include_directories(${3RD_DIR}/googletest/googlemock/include/)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/module/tvm_kernel/lite/include/)
|
||||
include_directories(${PREDICT_DIR}/module/tvm_kernel/incubator-tvm/3rdparty/dlpack/include)
|
||||
include_directories(common)
|
||||
|
||||
if(ENABLE_PREDICT_ARM64 OR ENABLE_PREDICT_ARM32)
|
||||
message("*********************predict compile arm*********************")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMS_USE_ARM=1")
|
||||
set(ANDROID_NDK $ENV{ANDROID_NDK})
|
||||
if(ANDROID_NDK)
|
||||
add_subdirectory(${3RD_DIR}/googletest ${CMAKE_BINARY_DIR}/googletest)
|
||||
link_directories(${PREDICT_BUILD_DIR}/googletest/googlemock/gtest)
|
||||
|
||||
add_subdirectory(${3RD_DIR}/securec ${CMAKE_BINARY_DIR}/securec)
|
||||
link_directories(${PREDICT_BUILD_DIR}/securec/src)
|
||||
else()
|
||||
message(FATAL_ERROR "please set ANDROID_NDK in environment variable for example: export ANDROID_NDK=/root/usr/android-ndk-r16b/")
|
||||
endif()
|
||||
|
||||
include_directories(${ANDROID_SYSROOT}/usr/include/)
|
||||
if(${ANDROID_ABI} STREQUAL "armeabi-v7a")
|
||||
include_directories(${ANDROID_SYSROOT}/usr/include/arm-linux-androideabi)
|
||||
elseif(${ANDROID_ABI} STREQUAL "arm64-v8a")
|
||||
include_directories(${ANDROID_SYSROOT}/usr/include/aarch64-linux-android)
|
||||
else()
|
||||
include_directories(${ANDROID_SYSROOT}/usr/include/arm-linux-androideabi)
|
||||
endif()
|
||||
|
||||
else()
|
||||
# include libsecurec.a x86
|
||||
message("*********************predict compile x86*********************")
|
||||
if(EXISTS "${PREDICT_DIR}/../build/mindspore/securec/src/libsecurec.a")
|
||||
link_directories(${PREDICT_DIR}/../build/mindspore/securec/src)
|
||||
else()
|
||||
include(${PREDICT_DIR}/../cmake/dependency_securec.cmake)
|
||||
link_directories(${PREDICT_BUILD_DIR}/securec/src)
|
||||
endif()
|
||||
|
||||
# include libgtest.so x86
|
||||
if(EXISTS "${PREDICT_DIR}/../build/googletest/googlemock/gtest/libgtest.so")
|
||||
link_directories(${PREDICT_DIR}/../build/googletest/googlemock/gtest)
|
||||
else()
|
||||
include(${PREDICT_DIR}/../cmake/dependency_gtest.cmake)
|
||||
link_directories(${PREDICT_BUILD_DIR}/googletest/googlemock/gtest)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (CODE_COVERAGE)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage -O0")
|
||||
endif()
|
||||
|
||||
add_subdirectory(common)
|
||||
add_subdirectory(src)
|
||||
add_subdirectory(benchmark)
|
||||
add_subdirectory(test)
|
||||
add_subdirectory(module)
|
|
@ -1,38 +0,0 @@
|
|||
|
||||
cmake_minimum_required(VERSION 3.12)
|
||||
project(benchmark)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 14)
|
||||
set(CMAKE_BUILD_TYPE "Debug")
|
||||
|
||||
#include 3rd
|
||||
include_directories(${3RD_DIR}/protobuf/build/include)
|
||||
include_directories(${3RD_DIR}/securec/include)
|
||||
include_directories(${3RD_DIR}/flatbuffers/include)
|
||||
include_directories(${3RD_DIR}/googletest/googletest/include)
|
||||
include_directories(${3RD_DIR}/googletest/googlemock/include)
|
||||
include_directories(${PREDICT_DIR}/module/tvm_kernel/incubator-tvm/3rdparty/dlpack/include)
|
||||
include_directories(${3RD_DIR}/flatbuffers/include)
|
||||
include_directories(${3RD_DIR}/securec/include)
|
||||
|
||||
#include ms
|
||||
include_directories(.)
|
||||
include_directories(${PREDICT_DIR})
|
||||
|
||||
set(COMMON_SRC ${PREDICT_DIR}/common/flag_parser.cc
|
||||
${PREDICT_DIR}/common/file_utils.cc
|
||||
${PREDICT_DIR}/common/func_utils.cc
|
||||
${PREDICT_DIR}/common/mslog.cc
|
||||
${PREDICT_DIR}/common/utils.cc)
|
||||
|
||||
link_directories(${CMAKE_CURRENT_SOURCE_DIR}/../output/lib/)
|
||||
|
||||
add_executable(benchmark main.cc benchmark.cc ${COMMON_SRC})
|
||||
|
||||
target_link_libraries(benchmark mspredict libsecurec.a)
|
||||
add_dependencies(benchmark tvm_kernel)
|
||||
add_dependencies(benchmark securec)
|
||||
|
||||
add_custom_command(TARGET benchmark POST_BUILD
|
||||
COMMAND mkdir -pv ${DOTEST_DIR}
|
||||
COMMAND cp ${PREDICT_BUILD_DIR}/benchmark/benchmark ${DOTEST_DIR})
|
|
@ -1,451 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "benchmark/benchmark.h"
|
||||
#include <random>
|
||||
#include <limits>
|
||||
#include <algorithm>
|
||||
#include <utility>
|
||||
#include <memory>
|
||||
#include "include/session.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
STATUS Benchmark::GenerateRandomData(size_t size, void *data) {
|
||||
MS_ASSERT(data != nullptr);
|
||||
char *castedData = static_cast<char *>(data);
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
castedData[i] = static_cast<char>(i);
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
STATUS Benchmark::GenerateInputData() {
|
||||
for (Tensor *tensor : msInputs) {
|
||||
MS_ASSERT(tensor != nullptr);
|
||||
auto ret = tensor->MallocData();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOGE("MallocData for inTensor failed %d", ret);
|
||||
return ret;
|
||||
}
|
||||
MS_ASSERT(tensor->GetData() != nullptr);
|
||||
auto tensorByteSize = tensor->GetDataSize();
|
||||
auto status = GenerateRandomData(tensorByteSize, tensor->GetData());
|
||||
if (status != RET_OK) {
|
||||
MS_LOGE("GenerateRandomData for inTensor failed %d", status);
|
||||
return status;
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
STATUS Benchmark::LoadInput() {
|
||||
size_t size = 0;
|
||||
char *graphBuf = ReadFile(_flags->modelPath.c_str(), &size);
|
||||
if (graphBuf == nullptr) {
|
||||
MS_LOGE("Load graph failed, path %s", _flags->modelPath.c_str());
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
this->msInputs = session->GetInput();
|
||||
|
||||
if (_flags->inDataPath.empty()) {
|
||||
auto status = GenerateInputData();
|
||||
if (status != RET_OK) {
|
||||
delete graphBuf;
|
||||
MS_LOGE("Generate input data error %d", status);
|
||||
return status;
|
||||
}
|
||||
} else {
|
||||
auto status = ReadInputFile();
|
||||
if (status != RET_OK) {
|
||||
delete graphBuf;
|
||||
MS_LOGE("ReadInputFile error, %d", status);
|
||||
return status;
|
||||
}
|
||||
}
|
||||
delete graphBuf;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
STATUS Benchmark::ReadInputFile() {
|
||||
MS_ASSERT(msInputs.size() <= 1);
|
||||
if (msInputs.empty()) {
|
||||
return RET_OK;
|
||||
}
|
||||
Tensor *inTensor = msInputs.at(0);
|
||||
MS_ASSERT(inTensor != nullptr);
|
||||
|
||||
size_t size;
|
||||
char *binBuf = ReadFile(_flags->inDataPath.c_str(), &size);
|
||||
if (binBuf == nullptr) {
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto tensorDataSize = inTensor->GetDataSize();
|
||||
if (size != tensorDataSize) {
|
||||
MS_LOGE("Input binary file size error, required: %zu, in fact: %zu", tensorDataSize, size);
|
||||
delete binBuf;
|
||||
return RET_ERROR;
|
||||
}
|
||||
inTensor->SetData(binBuf);
|
||||
binBuf = nullptr;
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
// calibData is FP32
|
||||
STATUS Benchmark::ReadCalibData() {
|
||||
const char *calibDataPath = _flags->calibDataPath.c_str();
|
||||
// read calib data
|
||||
std::ifstream inFile(calibDataPath);
|
||||
if (!inFile.good()) {
|
||||
MS_LOGE("file: %s is not exist", calibDataPath);
|
||||
return RET_PARAM_INVALID;
|
||||
}
|
||||
|
||||
if (!inFile.is_open()) {
|
||||
MS_LOGE("file: %s open failed", calibDataPath);
|
||||
inFile.close();
|
||||
return RET_PARAM_INVALID;
|
||||
}
|
||||
|
||||
std::string line;
|
||||
MS_LOGI("Start reading calibData file");
|
||||
std::string tensorName;
|
||||
while (!inFile.eof()) {
|
||||
getline(inFile, line);
|
||||
std::stringstream stringLine1(line);
|
||||
size_t dim = 0;
|
||||
stringLine1 >> tensorName >> dim;
|
||||
std::vector<size_t> dims;
|
||||
size_t shapeSize = 1;
|
||||
for (size_t i = 0; i < dim; i++) {
|
||||
size_t tmpDim;
|
||||
stringLine1 >> tmpDim;
|
||||
dims.push_back(tmpDim);
|
||||
shapeSize *= tmpDim;
|
||||
}
|
||||
|
||||
getline(inFile, line);
|
||||
std::stringstream stringLine2(line);
|
||||
std::vector<float> tensorData;
|
||||
for (size_t i = 0; i < shapeSize; i++) {
|
||||
float tmpData;
|
||||
stringLine2 >> tmpData;
|
||||
tensorData.push_back(tmpData);
|
||||
}
|
||||
|
||||
std::unique_ptr<CheckTensor> checkTensor(new CheckTensor(dims, tensorData));
|
||||
this->calibData.insert(std::make_pair(tensorName, checkTensor.release()));
|
||||
}
|
||||
inFile.close();
|
||||
MS_LOGI("Finish reading calibData file");
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
// tensorData need to be converter first
|
||||
float Benchmark::CompareData(const std::string &nodeName, std::vector<int64_t> msShape, float *msTensorData) {
|
||||
auto iter = this->calibData.find(nodeName);
|
||||
if (iter != this->calibData.end()) {
|
||||
std::vector<size_t> castedMSShape;
|
||||
size_t shapeSize = 1;
|
||||
for (int64_t dim : msShape) {
|
||||
castedMSShape.push_back(size_t(dim));
|
||||
shapeSize *= dim;
|
||||
}
|
||||
|
||||
CheckTensor *calibTensor = iter->second;
|
||||
if (calibTensor->shape != castedMSShape) {
|
||||
std::ostringstream oss;
|
||||
oss << "Shape of mslite output(";
|
||||
for (auto dim : castedMSShape) {
|
||||
oss << dim << ",";
|
||||
}
|
||||
oss << ") and shape source model output(";
|
||||
for (auto dim : calibTensor->shape) {
|
||||
oss << dim << ",";
|
||||
}
|
||||
oss << ") are different";
|
||||
MS_LOGE("%s", oss.str().c_str());
|
||||
return -1;
|
||||
}
|
||||
|
||||
float meanBias = 0;
|
||||
std::ostringstream outputData;
|
||||
outputData << "Data of node " << nodeName << " : ";
|
||||
for (size_t j = 0; j < shapeSize; j++) {
|
||||
if (j < printNum) {
|
||||
outputData << msTensorData[j] << " ";
|
||||
}
|
||||
if (fabs(calibTensor->data.at(j)) > minFloatThr) {
|
||||
double bias = fabs(msTensorData[j] - calibTensor->data.at(j)) / fabs(calibTensor->data.at(j));
|
||||
meanBias += bias;
|
||||
}
|
||||
}
|
||||
meanBias /= shapeSize;
|
||||
MS_LOGI("%s", outputData.str().c_str());
|
||||
|
||||
if (meanBias <= minFloatThr) {
|
||||
MS_LOGI("Mean bias of node %s : 0%%", nodeName.c_str());
|
||||
} else {
|
||||
MS_LOGI("Mean bias of node %s : %f%%", nodeName.c_str(), meanBias * percentage);
|
||||
}
|
||||
return meanBias;
|
||||
} else {
|
||||
MS_LOGI("%s is not in Source Model output", nodeName.c_str());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
STATUS Benchmark::CompareOutput(const std::map<NODE_ID, std::vector<Tensor *>> &msOutputs) {
|
||||
float totalBias = 0;
|
||||
int totalSize = 0;
|
||||
bool hasError = false;
|
||||
for (const auto &msOutput : msOutputs) {
|
||||
std::string nodeName = msOutput.first;
|
||||
auto tensors = msOutput.second;
|
||||
for (auto tensor : tensors) {
|
||||
MS_ASSERT(tensor->GetData() != nullptr);
|
||||
float bias = CompareData(nodeName, tensor->GetDims(), static_cast<float *>(tensor->GetData()));
|
||||
if (bias >= 0) {
|
||||
totalBias += bias;
|
||||
totalSize++;
|
||||
} else {
|
||||
hasError = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!hasError) {
|
||||
float meanBias;
|
||||
if (totalSize != 0) {
|
||||
meanBias = totalBias / totalSize * percentage;
|
||||
} else {
|
||||
meanBias = 0;
|
||||
}
|
||||
|
||||
MS_LOGI("Mean bias all node : %f%%", meanBias);
|
||||
|
||||
if (meanBias > 1) {
|
||||
MS_LOGE("Mean bias of all nodes is too big: %f%%", meanBias);
|
||||
return RET_ERROR;
|
||||
} else {
|
||||
return RET_OK;
|
||||
}
|
||||
} else {
|
||||
MS_LOGE("Error in CompareData");
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
STATUS Benchmark::MarkPerformance() {
|
||||
MS_LOGI("Running warm up loops...");
|
||||
for (int i = 0; i < _flags->warmUpLoopCount; i++) {
|
||||
auto status = session->Run(msInputs);
|
||||
if (status != RET_OK) {
|
||||
MS_LOGE("Inference error %d", status);
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
MS_LOGI("Running benchmark loops...");
|
||||
uint64_t timeMin = maxTimeThr;
|
||||
uint64_t timeMax = 0;
|
||||
uint64_t timeAvg = 0;
|
||||
for (int i = 0; i < _flags->loopCount; i++) {
|
||||
uint64_t start = GetTimeUs();
|
||||
auto status = session->Run(msInputs);
|
||||
if (status != RET_OK) {
|
||||
MS_LOGE("Inference error %d", status);
|
||||
return status;
|
||||
}
|
||||
|
||||
uint64_t end = GetTimeUs();
|
||||
uint64_t time = end - start;
|
||||
timeMin = std::min(timeMin, time);
|
||||
timeMax = std::max(timeMax, time);
|
||||
timeAvg += time;
|
||||
|
||||
msOutputs = session->GetAllOutput();
|
||||
if (cleanData) {
|
||||
for (auto &msOutput : msOutputs) {
|
||||
for (auto &outputTensor : msOutput.second) {
|
||||
delete outputTensor;
|
||||
}
|
||||
}
|
||||
msOutputs.clear();
|
||||
}
|
||||
}
|
||||
if (_flags->loopCount > 0) {
|
||||
timeAvg /= _flags->loopCount;
|
||||
MS_LOGI("MinRunTime = %f ms, MaxRuntime = %f ms, AvgRunTime = %f ms", timeMin / US2MS, timeMax / US2MS,
|
||||
timeAvg / US2MS);
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
STATUS Benchmark::MarkAccuracy() {
|
||||
MS_LOGI("MarkAccuracy");
|
||||
|
||||
auto status = session->Run(msInputs);
|
||||
if (status != RET_OK) {
|
||||
MS_LOGE("Inference error %d", status);
|
||||
return status;
|
||||
}
|
||||
msOutputs = session->GetAllOutput();
|
||||
|
||||
ReadCalibData();
|
||||
status = CompareOutput(msOutputs);
|
||||
if (cleanData) {
|
||||
for (auto &msOutput : msOutputs) {
|
||||
for (auto &outputTensor : msOutput.second) {
|
||||
delete outputTensor;
|
||||
}
|
||||
}
|
||||
msOutputs.clear();
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
STATUS Benchmark::CleanData() {
|
||||
if (cleanData) {
|
||||
for (auto &msInput : msInputs) {
|
||||
delete msInput;
|
||||
}
|
||||
msInputs.clear();
|
||||
for (auto &data : calibData) {
|
||||
data.second->shape.clear();
|
||||
data.second->data.clear();
|
||||
delete data.second;
|
||||
}
|
||||
calibData.clear();
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
STATUS Benchmark::RunBenchmark() {
|
||||
// Load graph
|
||||
std::string comment = modelName;
|
||||
|
||||
MS_LOGI("start reading model file");
|
||||
size_t size = 0;
|
||||
char *graphBuf = ReadFile(_flags->modelPath.c_str(), &size);
|
||||
if (graphBuf == nullptr) {
|
||||
MS_LOGE("Load graph failed while running %s", comment.c_str());
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
uint64_t startPrepareTime = GetTimeUs();
|
||||
session = CreateSession(graphBuf, size, ctx);
|
||||
if (session == nullptr) {
|
||||
delete graphBuf;
|
||||
MS_LOGE("new session failed while running %s", comment.c_str());
|
||||
return RET_ERROR;
|
||||
}
|
||||
uint64_t endPrepareTime = GetTimeUs();
|
||||
MS_LOGI("PrepareTime = %f ms, ", (endPrepareTime - startPrepareTime) / US2MS);
|
||||
|
||||
// Load input
|
||||
MS_LOGI("start generate input data");
|
||||
auto status = LoadInput();
|
||||
if (status != RET_OK) {
|
||||
delete graphBuf;
|
||||
MS_LOGE("Generate input data error");
|
||||
return status;
|
||||
}
|
||||
|
||||
if (!_flags->calibDataPath.empty()) {
|
||||
status = MarkAccuracy();
|
||||
if (status != RET_OK) {
|
||||
delete graphBuf;
|
||||
MS_LOGE("Run MarkAccuracy error: %d", status);
|
||||
return status;
|
||||
}
|
||||
} else {
|
||||
status = MarkPerformance();
|
||||
if (status != RET_OK) {
|
||||
delete graphBuf;
|
||||
MS_LOGE("Run MarkPerformance error: %d", status);
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
CleanData();
|
||||
delete graphBuf;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
STATUS Benchmark::Init() {
|
||||
if (this->_flags == nullptr) {
|
||||
return RET_ERROR;
|
||||
}
|
||||
MS_LOGI("ModelPath = %s", this->_flags->modelPath.c_str());
|
||||
MS_LOGI("InDataPath = %s", this->_flags->inDataPath.c_str());
|
||||
MS_LOGI("TensorDataType = %s", this->_flags->tensorDataTypeIn.c_str());
|
||||
MS_LOGI("LoopCount = %d", this->_flags->loopCount);
|
||||
MS_LOGI("WarmUpLoopCount = %d", this->_flags->warmUpLoopCount);
|
||||
MS_LOGI("NumThreads = %d", this->_flags->numThreads);
|
||||
MS_LOGI("calibDataPath = %s", this->_flags->calibDataPath.c_str());
|
||||
|
||||
this->_flags->inDataType = this->_flags->inDataTypeIn == "img" ? kImage : kBinary;
|
||||
if (this->_flags->tensorDataTypeIn == "float") {
|
||||
this->_flags->tensorDataType = DataType_DT_FLOAT;
|
||||
}
|
||||
|
||||
if (_flags->modelPath.empty()) {
|
||||
MS_LOGE("modelPath is required");
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
modelName = _flags->modelPath.substr(_flags->modelPath.find_last_of("/") + 1);
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int RunBenchmark(int argc, const char **argv) {
|
||||
BenchmarkFlags flags;
|
||||
Option<std::string> err = flags.ParseFlags(argc, argv);
|
||||
|
||||
if (err.IsSome()) {
|
||||
std::cerr << err.Get() << std::endl;
|
||||
std::cerr << flags.Usage() << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (flags.help) {
|
||||
std::cerr << flags.Usage() << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
Benchmark mBenchmark(&flags);
|
||||
auto status = mBenchmark.Init();
|
||||
if (status != RET_OK) {
|
||||
MS_LOGE("Benchmark init Error : %d", status);
|
||||
return 1;
|
||||
}
|
||||
|
||||
status = mBenchmark.RunBenchmark();
|
||||
if (status != RET_OK) {
|
||||
MS_LOGE("Run Benchmark Error : %d", status);
|
||||
return 1;
|
||||
}
|
||||
|
||||
MS_LOGI("end of benchmark");
|
||||
return 0;
|
||||
}
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
|
@ -1,142 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PREDICT_BENCHMARK_BENCHMARK_H_
|
||||
#define PREDICT_BENCHMARK_BENCHMARK_H_
|
||||
|
||||
#include <getopt.h>
|
||||
#include <signal.h>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "common/flag_parser.h"
|
||||
#include "common/file_utils.h"
|
||||
#include "common/func_utils.h"
|
||||
#include "common/mslog.h"
|
||||
#include "common/utils.h"
|
||||
#include "include/errorcode.h"
|
||||
#include "include/session.h"
|
||||
#include "include/tensor.h"
|
||||
#include "schema/inner/ms_generated.h"
|
||||
#include "src/graph.h"
|
||||
#include "src/graph_execution.h"
|
||||
#include "src/op.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
enum InDataType { kImage = 0, kBinary = 1 };
|
||||
|
||||
struct CheckTensor {
|
||||
CheckTensor(const std::vector<size_t> &shape, const std::vector<float> &data) {
|
||||
this->shape = shape;
|
||||
this->data = data;
|
||||
}
|
||||
std::vector<size_t> shape;
|
||||
std::vector<float> data;
|
||||
};
|
||||
|
||||
class BenchmarkFlags : public virtual FlagParser {
|
||||
public:
|
||||
BenchmarkFlags() {
|
||||
// common
|
||||
AddFlag(&BenchmarkFlags::modelPath, "modelPath", "Input model path", "");
|
||||
AddFlag(&BenchmarkFlags::tensorDataTypeIn, "tensorDataType", "Data type of input Tensor. float", "float");
|
||||
AddFlag(&BenchmarkFlags::inDataPath, "inDataPath", "Input data path, if not set, use random input", "");
|
||||
// MarkPerformance
|
||||
AddFlag(&BenchmarkFlags::loopCount, "loopCount", "Run loop count", 10);
|
||||
AddFlag(&BenchmarkFlags::numThreads, "numThreads", "Run threads number", 2);
|
||||
AddFlag(&BenchmarkFlags::warmUpLoopCount, "warmUpLoopCount", "Run warm up loop", 3);
|
||||
// MarkAccuracy
|
||||
AddFlag(&BenchmarkFlags::calibDataPath, "calibDataPath", "Calibration data file path", "");
|
||||
}
|
||||
|
||||
~BenchmarkFlags() override = default;
|
||||
|
||||
public:
|
||||
// common
|
||||
std::string modelPath;
|
||||
std::string inDataPath;
|
||||
InDataType inDataType;
|
||||
std::string inDataTypeIn;
|
||||
DataType tensorDataType;
|
||||
std::string tensorDataTypeIn;
|
||||
// MarkPerformance
|
||||
int loopCount;
|
||||
int numThreads;
|
||||
int warmUpLoopCount;
|
||||
// MarkAccuracy
|
||||
std::string calibDataPath;
|
||||
};
|
||||
|
||||
class Benchmark {
|
||||
public:
|
||||
explicit Benchmark(BenchmarkFlags *flags) : _flags(flags) {}
|
||||
|
||||
virtual ~Benchmark() = default;
|
||||
|
||||
STATUS Init();
|
||||
STATUS RunBenchmark();
|
||||
|
||||
private:
|
||||
// call GenerateInputData or ReadInputFile to init inputTensors
|
||||
STATUS LoadInput();
|
||||
|
||||
// call GenerateRandomData to fill inputTensors
|
||||
STATUS GenerateInputData();
|
||||
|
||||
STATUS GenerateRandomData(size_t size, void *data);
|
||||
|
||||
STATUS ReadInputFile();
|
||||
|
||||
STATUS ReadCalibData();
|
||||
|
||||
STATUS CleanData();
|
||||
|
||||
STATUS CompareOutput(const std::map<NODE_ID, std::vector<Tensor *>> &msOutputs);
|
||||
|
||||
float CompareData(const std::string &nodeName, std::vector<int64_t> msShape, float *msTensorData);
|
||||
|
||||
STATUS MarkPerformance();
|
||||
|
||||
STATUS MarkAccuracy();
|
||||
|
||||
private:
|
||||
BenchmarkFlags *_flags;
|
||||
std::shared_ptr<Session> session;
|
||||
Context ctx;
|
||||
std::vector<Tensor *> msInputs;
|
||||
std::map<std::string, std::vector<Tensor *>> msOutputs;
|
||||
std::unordered_map<std::string, CheckTensor *> calibData;
|
||||
std::string modelName = "";
|
||||
bool cleanData = true;
|
||||
|
||||
const float US2MS = 1000.0f;
|
||||
const float percentage = 100.0f;
|
||||
const int printNum = 50;
|
||||
const float minFloatThr = 0.0000001f;
|
||||
|
||||
const uint64_t maxTimeThr = 1000000;
|
||||
};
|
||||
|
||||
int RunBenchmark(int argc, const char **argv);
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
||||
#endif // PREDICT_BENCHMARK_BENCHMARK_H_
|
|
@ -1,24 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <random>
|
||||
#include <limits>
|
||||
#include "benchmark/benchmark.h"
|
||||
|
||||
int main(int argc, const char **argv) {
|
||||
signal(SIGSEGV, mindspore::predict::CoreDumpTraceFunc);
|
||||
return mindspore::predict::RunBenchmark(argc, argv);
|
||||
}
|
|
@ -1,17 +0,0 @@
|
|||
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../include)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../third_party)
|
||||
|
||||
add_compile_options(-fPIC)
|
||||
|
||||
add_library(common_mid OBJECT
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/common.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/graph_util.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/file_utils.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/flag_parser.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/func_utils.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/module_registry.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mslog.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/storage.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/utils.cc)
|
|
@ -1,57 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PREDICT_COMMON_COMMON_H_
|
||||
#define PREDICT_COMMON_COMMON_H_
|
||||
|
||||
#include <string>
|
||||
#include "schema/inner/ms_generated.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
enum NCHW_SHAPE { NCHW_N = 0, NCHW_C = 1, NCHW_H = 2, NCHW_W = 3 };
|
||||
enum NHWC_SHAPE { NHWC_N = 0, NHWC_H = 1, NHWC_W = 2, NHWC_C = 3 };
|
||||
enum HWCK_SHAPE { HWCK_H = 0, HWCK_W = 1, HWCK_C = 2, HWCK_K = 3 };
|
||||
enum KCHW_SHAPE { KCHW_K = 0, KCHW_C = 1, KCHW_H = 2, KCHW_W = 3 };
|
||||
enum CHW_SHAPE { CHW_C = 0, CHW_H = 1, CHW_W = 2 };
|
||||
enum HWC_SHAPE { HWC_H = 0, HWC_W = 1, HWC_C = 2 };
|
||||
|
||||
static constexpr int TENSOR_MAX_REFCOUNT = 999;
|
||||
|
||||
static const char *DELIM_COLON = ":";
|
||||
static const char *DELIM_COMMA = ",";
|
||||
static const char *DELIM_SLASH = "/";
|
||||
static const char *DELIM_DOUBLE_BACKSLASH = "\\";
|
||||
|
||||
// quantization relative
|
||||
static const char QUANTIZED_UINT8[] = "QUANTIZED_UINT8";
|
||||
static const char QUANTIZED_INT8[] = "QUANTIZED_INT8";
|
||||
static const char QUANTIZED_INT16[] = "QUANTIZED_INT16";
|
||||
static const char QUANTIZED_UINT16[] = "QUANTIZED_UINT16";
|
||||
static const char QUANTIZED_FLOAT16[] = "FLOAT16";
|
||||
static const char QUANTIZED_FLOAT32[] = "FLOAT32";
|
||||
static const char QUANTIZATION_TYPE_DYNAMIC[] = "DYNAMIC";
|
||||
static const char QUANTIZATION_TYPE_STATIC[] = "STATIC";
|
||||
static const char CALIB_NORM[] = "NORM";
|
||||
|
||||
// dims
|
||||
static const int32_t DIM_DEFAULT_SIZE = 4;
|
||||
|
||||
static const Format DEFAULT_FORMAT = Format_NCHW;
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // PREDICT_COMMON_COMMON_H_
|
|
@ -1,79 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "common/file_utils.h"
|
||||
#include <climits>
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
char *ReadFile(const char *file, size_t *size) {
|
||||
if (file == nullptr) {
|
||||
MS_LOGE("file is nullptr");
|
||||
return nullptr;
|
||||
}
|
||||
MS_ASSERT(size != nullptr);
|
||||
std::ifstream ifs(RealPath(file));
|
||||
if (!ifs.good()) {
|
||||
MS_LOGE("file: %s is not exist", file);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (!ifs.is_open()) {
|
||||
MS_LOGE("file: %s open failed", file);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
ifs.seekg(0, std::ios::end);
|
||||
*size = ifs.tellg();
|
||||
std::unique_ptr<char> buf(new (std::nothrow) char[*size]);
|
||||
if (buf == nullptr) {
|
||||
MS_LOGE("malloc buf failed, file:%s", file);
|
||||
ifs.close();
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
ifs.seekg(0, std::ios::beg);
|
||||
ifs.read(buf.get(), *size);
|
||||
ifs.close();
|
||||
|
||||
return buf.release();
|
||||
}
|
||||
|
||||
std::string RealPath(const char *path) {
|
||||
if (path == nullptr) {
|
||||
MS_LOGE("path is nullptr");
|
||||
return "";
|
||||
}
|
||||
if ((strlen(path)) >= PATH_MAX) {
|
||||
MS_LOGE("path is too long");
|
||||
return "";
|
||||
}
|
||||
|
||||
std::shared_ptr<char> resolvedPath(new (std::nothrow) char[PATH_MAX]{0});
|
||||
if (resolvedPath == nullptr) {
|
||||
MS_LOGE("new resolvedPath failed");
|
||||
return "";
|
||||
}
|
||||
|
||||
auto ret = realpath(path, resolvedPath.get());
|
||||
if (ret == nullptr) {
|
||||
MS_LOGE("realpath failed");
|
||||
return "";
|
||||
}
|
||||
return resolvedPath.get();
|
||||
}
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
|
@ -1,39 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PREDICT_COMMON_FILE_UTILS_H_
|
||||
#define PREDICT_COMMON_FILE_UTILS_H_
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <fstream>
|
||||
#include "common/utils.h"
|
||||
#include "common/mslog.h"
|
||||
#include "include/tensor.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
char *ReadFile(const char *file, size_t *size);
|
||||
|
||||
std::string RealPath(const char *path);
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // PREDICT_COMMON_FILE_UTILS_H_
|
|
@ -1,179 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "common/flag_parser.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
// parse flags read from command line
|
||||
Option<std::string> FlagParser::ParseFlags(int argc, const char *const *argv, bool supportUnknown,
|
||||
bool supportDuplicate) {
|
||||
MS_ASSERT(argv != nullptr);
|
||||
const int FLAG_PREFIX_LEN = 2;
|
||||
// Get binary name
|
||||
binName = GetFileName(argv[0]);
|
||||
|
||||
std::multimap<std::string, Option<std::string>> keyValues;
|
||||
for (int i = 1; i < argc; i++) {
|
||||
std::string tmp = argv[i];
|
||||
Trim(&tmp);
|
||||
const std::string flagItem(tmp);
|
||||
|
||||
if (flagItem == "--") {
|
||||
break;
|
||||
}
|
||||
|
||||
if (flagItem.find("--") == std::string::npos) {
|
||||
continue;
|
||||
}
|
||||
|
||||
std::string key;
|
||||
Option<std::string> value = Option<std::string>(None());
|
||||
|
||||
size_t pos = flagItem.find_first_of("=");
|
||||
if (pos == std::string::npos && flagItem.find("--no-") != std::string::npos) {
|
||||
key = flagItem.substr(FLAG_PREFIX_LEN);
|
||||
} else if (pos == std::string::npos) {
|
||||
key = flagItem.substr(FLAG_PREFIX_LEN);
|
||||
} else {
|
||||
key = flagItem.substr(FLAG_PREFIX_LEN, pos - FLAG_PREFIX_LEN);
|
||||
value = Option<std::string>(flagItem.substr(pos + 1));
|
||||
}
|
||||
|
||||
keyValues.insert(std::pair<std::string, Option<std::string>>(key, value));
|
||||
}
|
||||
|
||||
Option<std::string> ret = Option<std::string>(InnerParseFlags(&keyValues));
|
||||
if (ret.IsSome()) {
|
||||
return Option<std::string>(ret.Get());
|
||||
}
|
||||
|
||||
return Option<std::string>(None());
|
||||
}
|
||||
|
||||
bool FlagParser::GetRealFlagName(const std::string &oriFlagName, std::string *flagName) {
|
||||
MS_ASSERT(flagName != nullptr);
|
||||
const int BOOL_TYPE_FLAG_PREFIX_LEN = 3;
|
||||
bool opaque = false;
|
||||
if (StartsWithPrefix(oriFlagName, "no-")) {
|
||||
*flagName = oriFlagName.substr(BOOL_TYPE_FLAG_PREFIX_LEN);
|
||||
opaque = true;
|
||||
} else {
|
||||
*flagName = oriFlagName;
|
||||
}
|
||||
return opaque;
|
||||
}
|
||||
|
||||
// Inner parse function
|
||||
Option<std::string> FlagParser::InnerParseFlags(std::multimap<std::string, Option<std::string>> *keyValues) {
|
||||
MS_ASSERT(keyValues != nullptr);
|
||||
for (auto it = keyValues->begin(); it != keyValues->end(); ++it) {
|
||||
std::string flagName;
|
||||
bool opaque = GetRealFlagName((*it).first, &flagName);
|
||||
Option<std::string> flagValue = (*it).second;
|
||||
|
||||
auto item = flags.find(flagName);
|
||||
if (item == flags.end()) {
|
||||
return Option<std::string>(std::string(flagName + " is not a valid flag"));
|
||||
}
|
||||
FlagInfo *flag = &(item->second);
|
||||
if (flag == nullptr) {
|
||||
return Option<std::string>("Failed: flag is nullptr");
|
||||
}
|
||||
if (flag->isParsed) {
|
||||
return Option<std::string>("Failed: already parsed flag: " + flagName);
|
||||
}
|
||||
std::string tmpValue;
|
||||
if (!flag->isBoolean) {
|
||||
if (opaque) {
|
||||
return Option<std::string>(flagName + " is not a boolean type");
|
||||
}
|
||||
if (flagValue.IsNone()) {
|
||||
return Option<std::string>("No value provided for non-boolean type: " + flagName);
|
||||
}
|
||||
tmpValue = flagValue.Get();
|
||||
} else {
|
||||
if (flagValue.IsNone() || flagValue.Get().empty()) {
|
||||
tmpValue = !opaque ? "true" : "false";
|
||||
} else if (!opaque) {
|
||||
tmpValue = flagValue.Get();
|
||||
} else {
|
||||
return Option<std::string>(std::string("Boolean flag can not have non-empty value"));
|
||||
}
|
||||
}
|
||||
// begin to parse value
|
||||
Option<Nothing> ret = flag->parse(this, tmpValue);
|
||||
if (ret.IsNone()) {
|
||||
return Option<std::string>("Failed to parse value for: " + flag->flagName);
|
||||
}
|
||||
flag->isParsed = true;
|
||||
}
|
||||
|
||||
// to check flags not given in command line but added as in constructor
|
||||
for (auto &flag : flags) {
|
||||
if (flag.second.isRequired && !flag.second.isParsed) {
|
||||
return Option<std::string>("Error, value of '" + flag.first + "' not provided");
|
||||
}
|
||||
}
|
||||
|
||||
return Option<std::string>(None());
|
||||
}
|
||||
|
||||
void Replaceall(std::string *str, const std::string &oldValue, const std::string &newValue) {
|
||||
if (str == nullptr) {
|
||||
MS_LOGE("Input str is nullptr");
|
||||
return;
|
||||
}
|
||||
while (true) {
|
||||
std::string::size_type pos(0);
|
||||
if ((pos = str->find(oldValue)) != std::string::npos) {
|
||||
str->replace(pos, oldValue.length(), newValue);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::string FlagParser::Usage(const Option<std::string> &usgMsg) const {
|
||||
// first line, brief of the usage
|
||||
std::string usageString = usgMsg.IsSome() ? usgMsg.Get() + "\n" : "";
|
||||
// usage of bin name
|
||||
usageString += usageMsg.IsNone() ? "usage: " + binName + " [options]\n" : usageMsg.Get() + "\n";
|
||||
// help line of help message, usageLine:message of parametors
|
||||
std::string helpLine = "";
|
||||
std::string usageLine = "";
|
||||
uint32_t i = 0;
|
||||
for (auto flag = flags.begin(); flag != flags.end(); flag++) {
|
||||
std::string flagName = flag->second.flagName;
|
||||
std::string helpInfo = flag->second.helpInfo;
|
||||
// parameter line
|
||||
std::string thisLine = flag->second.isBoolean ? " --[no-]" + flagName : " --" + flagName + "=VALUE";
|
||||
if (++i < flags.size()) {
|
||||
// add paramter help message of each line
|
||||
thisLine += " " + helpInfo;
|
||||
Replaceall(&helpInfo, "\n\r", "\n");
|
||||
usageLine += thisLine + "\n";
|
||||
} else {
|
||||
// brief help message
|
||||
helpLine = thisLine + " " + helpInfo + "\n";
|
||||
}
|
||||
}
|
||||
// total usage is brief of usage+ brief of bin + help message + brief of
|
||||
// paramters
|
||||
return usageString + helpLine + usageLine;
|
||||
}
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
|
@ -1,291 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PREDICT_COMMON_FLAG_PARSER_H_
|
||||
#define PREDICT_COMMON_FLAG_PARSER_H_
|
||||
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <utility>
|
||||
#include <string>
|
||||
|
||||
#include "common/utils.h"
|
||||
#include "common/option.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
struct FlagInfo;
|
||||
|
||||
struct Nothing {};
|
||||
|
||||
class FlagParser {
|
||||
public:
|
||||
FlagParser() { AddFlag(&FlagParser::help, "help", "print usage message", false); }
|
||||
|
||||
virtual ~FlagParser() = default;
|
||||
|
||||
// only support read flags from command line
|
||||
virtual Option<std::string> ParseFlags(int argc, const char *const *argv, bool supportUnknown = false,
|
||||
bool supportDuplicate = false);
|
||||
std::string Usage(const Option<std::string> &usgMsg = Option<std::string>(None())) const;
|
||||
|
||||
template <typename Flags, typename T1, typename T2>
|
||||
void AddFlag(T1 *t1, const std::string &flagName, const std::string &helpInfo, const T2 &t2);
|
||||
|
||||
template <typename Flags, typename T1, typename T2>
|
||||
void AddFlag(T1 Flags::*t1, const std::string &flagName, const std::string &helpInfo, const T2 &t2);
|
||||
|
||||
template <typename Flags, typename T>
|
||||
void AddFlag(T Flags::*t, const std::string &flagName, const std::string &helpInfo);
|
||||
|
||||
// Option-type fields
|
||||
template <typename Flags, typename T>
|
||||
void AddFlag(Option<T> Flags::*t, const std::string &flagName, const std::string &helpInfo);
|
||||
bool help;
|
||||
|
||||
protected:
|
||||
std::string binName;
|
||||
Option<std::string> usageMsg;
|
||||
|
||||
private:
|
||||
struct FlagInfo {
|
||||
std::string flagName;
|
||||
bool isRequired;
|
||||
bool isBoolean;
|
||||
std::string helpInfo;
|
||||
bool isParsed;
|
||||
std::function<Option<Nothing>(FlagParser *, const std::string &)> parse;
|
||||
};
|
||||
|
||||
inline void AddFlag(const FlagInfo &flag);
|
||||
|
||||
// construct a temporary flag
|
||||
template <typename Flags, typename T>
|
||||
void ConstructFlag(Option<T> Flags::*t, const std::string &flagName, const std::string &helpInfo, FlagInfo *flag);
|
||||
|
||||
// construct a temporary flag
|
||||
template <typename Flags, typename T1>
|
||||
void ConstructFlag(T1 Flags::*t1, const std::string &flagName, const std::string &helpInfo, FlagInfo *flag);
|
||||
|
||||
Option<std::string> InnerParseFlags(std::multimap<std::string, Option<std::string>> *values);
|
||||
|
||||
bool GetRealFlagName(const std::string &oriFlagName, std::string *flagName);
|
||||
|
||||
std::map<std::string, FlagInfo> flags;
|
||||
};
|
||||
|
||||
// convert to std::string
|
||||
template <typename Flags, typename T>
|
||||
Option<std::string> ConvertToString(T Flags::*t, const FlagParser &baseFlag) {
|
||||
const Flags *flag = dynamic_cast<Flags *>(&baseFlag);
|
||||
if (flag != nullptr) {
|
||||
return std::to_string(flag->*t);
|
||||
}
|
||||
|
||||
return Option<std::string>(None());
|
||||
}
|
||||
|
||||
// construct for a Option-type flag
|
||||
template <typename Flags, typename T>
|
||||
void FlagParser::ConstructFlag(Option<T> Flags::*t1, const std::string &flagName, const std::string &helpInfo,
|
||||
FlagInfo *flag) {
|
||||
if (flag == nullptr) {
|
||||
MS_LOGE("FlagInfo is nullptr");
|
||||
return;
|
||||
}
|
||||
flag->flagName = flagName;
|
||||
flag->helpInfo = helpInfo;
|
||||
flag->isBoolean = typeid(T) == typeid(bool);
|
||||
flag->isParsed = false;
|
||||
}
|
||||
|
||||
// construct a temporary flag
|
||||
template <typename Flags, typename T>
|
||||
void FlagParser::ConstructFlag(T Flags::*t1, const std::string &flagName, const std::string &helpInfo, FlagInfo *flag) {
|
||||
if (flag == nullptr) {
|
||||
MS_LOGE("FlagInfo is nullptr");
|
||||
return;
|
||||
}
|
||||
if (t1 == nullptr) {
|
||||
MS_LOGE("t1 is nullptr");
|
||||
return;
|
||||
}
|
||||
flag->flagName = flagName;
|
||||
flag->helpInfo = helpInfo;
|
||||
flag->isBoolean = typeid(T) == typeid(bool);
|
||||
flag->isParsed = false;
|
||||
}
|
||||
|
||||
inline void FlagParser::AddFlag(const FlagInfo &flagItem) { flags[flagItem.flagName] = flagItem; }
|
||||
|
||||
template <typename Flags, typename T>
|
||||
void FlagParser::AddFlag(T Flags::*t, const std::string &flagName, const std::string &helpInfo) {
|
||||
if (t == nullptr) {
|
||||
MS_LOGE("t1 is nullptr");
|
||||
return;
|
||||
}
|
||||
|
||||
Flags *flag = dynamic_cast<Flags *>(this);
|
||||
if (flag == nullptr) {
|
||||
MS_LOGI("dynamic_cast failed");
|
||||
return;
|
||||
}
|
||||
|
||||
FlagInfo flagItem;
|
||||
|
||||
// flagItem is as a output parameter
|
||||
ConstructFlag(t, flagName, helpInfo, &flagItem);
|
||||
flagItem.parse = [t](FlagParser *base, const std::string &value) -> Option<Nothing> {
|
||||
Flags *flag = dynamic_cast<Flags *>(base);
|
||||
if (base != nullptr) {
|
||||
Option<T> ret = Option<T>(GenericParseValue<T>(value));
|
||||
if (ret.IsNone()) {
|
||||
return Option<Nothing>(None());
|
||||
} else {
|
||||
flag->*t = ret.Get();
|
||||
}
|
||||
}
|
||||
|
||||
return Option<Nothing>(Nothing());
|
||||
};
|
||||
|
||||
flagItem.isRequired = true;
|
||||
flagItem.helpInfo +=
|
||||
!helpInfo.empty() && helpInfo.find_last_of("\n\r") != helpInfo.size() - 1 ? " (default: " : "(default: ";
|
||||
flagItem.helpInfo += ")";
|
||||
|
||||
// add this flag to a std::map
|
||||
AddFlag(flagItem);
|
||||
}
|
||||
|
||||
template <typename Flags, typename T1, typename T2>
|
||||
void FlagParser::AddFlag(T1 *t1, const std::string &flagName, const std::string &helpInfo, const T2 &t2) {
|
||||
if (t1 == nullptr) {
|
||||
MS_LOGE("t1 is nullptr");
|
||||
return;
|
||||
}
|
||||
|
||||
FlagInfo flagItem;
|
||||
|
||||
// flagItem is as a output parameter
|
||||
ConstructFlag(t1, flagName, helpInfo, flagItem);
|
||||
flagItem.parse = [t1](FlagParser *base, const std::string &value) -> Option<Nothing> {
|
||||
if (base != nullptr) {
|
||||
Option<T1> ret = Option<T1>(GenericParseValue<T1>(value));
|
||||
if (ret.IsNone()) {
|
||||
return Option<T1>(None());
|
||||
} else {
|
||||
*t1 = ret.Get();
|
||||
}
|
||||
}
|
||||
|
||||
return Option<Nothing>(Nothing());
|
||||
};
|
||||
|
||||
flagItem.isRequired = false;
|
||||
*t1 = t2;
|
||||
|
||||
flagItem.helpInfo +=
|
||||
!helpInfo.empty() && helpInfo.find_last_of("\n\r") != helpInfo.size() - 1 ? " (default: " : "(default: ";
|
||||
flagItem.helpInfo += ToString(t2).Get();
|
||||
flagItem.helpInfo += ")";
|
||||
|
||||
// add this flag to a std::map
|
||||
AddFlag(flagItem);
|
||||
}
|
||||
|
||||
template <typename Flags, typename T1, typename T2>
|
||||
void FlagParser::AddFlag(T1 Flags::*t1, const std::string &flagName, const std::string &helpInfo, const T2 &t2) {
|
||||
if (t1 == nullptr) {
|
||||
MS_LOGE("t1 is nullptr");
|
||||
return;
|
||||
}
|
||||
|
||||
Flags *flag = dynamic_cast<Flags *>(this);
|
||||
if (flag == nullptr) {
|
||||
MS_LOGI("dynamic_cast failed");
|
||||
return;
|
||||
}
|
||||
|
||||
FlagInfo flagItem;
|
||||
|
||||
// flagItem is as a output parameter
|
||||
ConstructFlag(t1, flagName, helpInfo, &flagItem);
|
||||
flagItem.parse = [t1](FlagParser *base, const std::string &value) -> Option<Nothing> {
|
||||
Flags *flag = dynamic_cast<Flags *>(base);
|
||||
if (base != nullptr) {
|
||||
Option<T1> ret = Option<T1>(GenericParseValue<T1>(value));
|
||||
if (ret.IsNone()) {
|
||||
return Option<Nothing>(None());
|
||||
} else {
|
||||
flag->*t1 = ret.Get();
|
||||
}
|
||||
}
|
||||
|
||||
return Option<Nothing>(Nothing());
|
||||
};
|
||||
|
||||
flagItem.isRequired = false;
|
||||
flag->*t1 = t2;
|
||||
|
||||
flagItem.helpInfo +=
|
||||
!helpInfo.empty() && helpInfo.find_last_of("\n\r") != helpInfo.size() - 1 ? " (default: " : "(default: ";
|
||||
flagItem.helpInfo += ToString(t2).Get();
|
||||
flagItem.helpInfo += ")";
|
||||
|
||||
// add this flag to a std::map
|
||||
AddFlag(flagItem);
|
||||
}
|
||||
|
||||
// option-type add flag
|
||||
template <typename Flags, typename T>
|
||||
void FlagParser::AddFlag(Option<T> Flags::*t, const std::string &flagName, const std::string &helpInfo) {
|
||||
if (t == nullptr) {
|
||||
MS_LOGE("t is nullptr");
|
||||
return;
|
||||
}
|
||||
|
||||
Flags *flag = dynamic_cast<Flags *>(this);
|
||||
if (flag == nullptr) {
|
||||
MS_LOGE("dynamic_cast failed");
|
||||
return;
|
||||
}
|
||||
|
||||
FlagInfo flagItem;
|
||||
// flagItem is as a output parameter
|
||||
ConstructFlag(t, flagName, helpInfo, &flagItem);
|
||||
flagItem.isRequired = false;
|
||||
flagItem.parse = [t](FlagParser *base, const std::string &value) -> Option<Nothing> {
|
||||
Flags *flag = dynamic_cast<Flags *>(base);
|
||||
if (base != nullptr) {
|
||||
Option<T> ret = Option<std::string>(GenericParseValue<T>(value));
|
||||
if (ret.IsNone()) {
|
||||
return Option<Nothing>(None());
|
||||
} else {
|
||||
flag->*t = Option<T>(Some(ret.Get()));
|
||||
}
|
||||
}
|
||||
|
||||
return Option<Nothing>(Nothing());
|
||||
};
|
||||
|
||||
// add this flag to a std::map
|
||||
AddFlag(flagItem);
|
||||
}
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // PREDICT_COMMON_FLAG_PARSER_H_
|
|
@ -1,77 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "common/func_utils.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
#if MS_USE_ARM
|
||||
_Unwind_Reason_Code PrintTraceArm(_Unwind_Context *ctx, void *d) {
|
||||
MS_ASSERT(ctx != nullptr);
|
||||
MS_ASSERT(d != nullptr);
|
||||
Dl_info info;
|
||||
int *depth = static_cast<int *>(d);
|
||||
auto ipAddr = static_cast<int64_t>(_Unwind_GetIP(ctx));
|
||||
if (dladdr(reinterpret_cast<void *>(ipAddr), &info)) {
|
||||
const char *symbol = "";
|
||||
const char *dlfile = "";
|
||||
if (info.dli_sname) {
|
||||
symbol = info.dli_sname;
|
||||
}
|
||||
if (info.dli_fname) {
|
||||
dlfile = info.dli_fname;
|
||||
}
|
||||
MS_PRINT_ERROR("#%d: (%08lx) %s %s ", *depth, ipAddr, dlfile, symbol);
|
||||
}
|
||||
|
||||
(*depth)++;
|
||||
return _URC_NO_REASON;
|
||||
}
|
||||
#endif
|
||||
|
||||
void CoreDumpTraceFunc(int iSignum) {
|
||||
MS_PRINT_ERROR("----- start get backtrace info -----");
|
||||
#if MS_USE_ARM
|
||||
int depth = 0;
|
||||
_Unwind_Backtrace(&PrintTraceArm, &depth);
|
||||
#else
|
||||
const auto maxDeep = 32;
|
||||
const auto maxStringLen = 100;
|
||||
void *apBuffer[maxStringLen];
|
||||
char **ppStrings;
|
||||
|
||||
auto iStackDepth = backtrace(apBuffer, maxDeep);
|
||||
if (0 > iStackDepth) {
|
||||
KillProcess("Get backtrace depth failed");
|
||||
return;
|
||||
}
|
||||
MS_PRINT_ERROR("Current stack depth is %d", iStackDepth);
|
||||
ppStrings = backtrace_symbols(apBuffer, iStackDepth);
|
||||
if (nullptr == ppStrings) {
|
||||
KillProcess("Get backtrace_symbols failed");
|
||||
return;
|
||||
}
|
||||
|
||||
for (int iLoop = 0; iLoop < iStackDepth; iLoop++) {
|
||||
MS_PRINT_ERROR("%s \n", ppStrings[iLoop]);
|
||||
}
|
||||
#endif
|
||||
MS_PRINT_ERROR("----- finish get backtrace info -----");
|
||||
KillProcess("Exit after core dump");
|
||||
return; // try exit 1
|
||||
}
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
|
@ -1,35 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PREDICT_COMMON_FUNC_UTILS_H_
|
||||
#define PREDICT_COMMON_FUNC_UTILS_H_
|
||||
|
||||
#if MS_USE_ARM
|
||||
#include <dlfcn.h>
|
||||
#include <unwind.h>
|
||||
#else
|
||||
#include <execinfo.h>
|
||||
#endif
|
||||
#include "include/errorcode.h"
|
||||
#include "common/mslog.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
void CoreDumpTraceFunc(int iSignum);
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // PREDICT_COMMON_FUNC_UTILS_H_
|
|
@ -1,167 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "common/graph_util.h"
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include "common/mslog.h"
|
||||
#include "include/errorcode.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
OpGraph *OpGraph::Build(const SubGraphDef &subGraphDef) {
|
||||
auto graph = std::unique_ptr<OpGraph>(new OpGraph());
|
||||
if (graph == nullptr) {
|
||||
MS_LOGE("malloc opgraph failed");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto nodeDefs = subGraphDef.nodes();
|
||||
if (nodeDefs == nullptr) {
|
||||
MS_LOGE("nodeDefs from subGraphDef is nullptr");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
uint32_t opCount = nodeDefs->size();
|
||||
for (uint32_t i = 0; i < opCount; i++) {
|
||||
auto nodeDef = nodeDefs->GetAs<NodeDef>(i);
|
||||
MS_ASSERT(nodeDef != nullptr);
|
||||
auto ret = graph->AddEdge(*nodeDef, *nodeDefs);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOGE("%s add edge failed. ret:%d", nodeDef->opDef()->name()->c_str(), ret);
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
return graph.release();
|
||||
}
|
||||
|
||||
int OpGraph::AddEdge(const NodeDef &srcNodeDef, const flatbuffers::Vector<flatbuffers::Offset<NodeDef>> &nodeDefs) {
|
||||
MS_ASSERT(srcNodeDef.opDef() != nullptr);
|
||||
MS_ASSERT(srcNodeDef.opDef()->name() != nullptr);
|
||||
NODE_ID srcId = std::string(srcNodeDef.opDef()->name()->c_str());
|
||||
uint32_t opCount = nodeDefs.size();
|
||||
|
||||
MS_ASSERT(srcNodeDef.opDef()->outputIndex() != nullptr);
|
||||
for (auto index : *(srcNodeDef.opDef()->outputIndex())) {
|
||||
for (uint32_t i = 0; i < opCount; i++) {
|
||||
auto dstNodeDef = nodeDefs.GetAs<NodeDef>(i);
|
||||
bool find = false;
|
||||
MS_ASSERT(dstNodeDef != nullptr);
|
||||
MS_ASSERT(dstNodeDef->opDef() != nullptr);
|
||||
auto inputIndex = dstNodeDef->opDef()->inputIndex();
|
||||
MS_ASSERT(inputIndex != nullptr);
|
||||
if (std::any_of(inputIndex->begin(), inputIndex->end(), [&index](int i) { return i == index; })) {
|
||||
find = true;
|
||||
}
|
||||
|
||||
if (!find) {
|
||||
continue;
|
||||
}
|
||||
MS_ASSERT(dstNodeDef->opDef()->name() != nullptr);
|
||||
NODE_ID dstId = std::string(dstNodeDef->opDef()->name()->c_str());
|
||||
auto ret = AddEdge(srcId, dstId);
|
||||
if (ret != RET_OK) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int OpGraph::AddEdge(const NODE_ID &srcId, const NODE_ID &dstId) {
|
||||
auto srcNode = AddNode(srcId);
|
||||
if (srcNode == nullptr) {
|
||||
MS_LOGE("add srcNode failed");
|
||||
return RET_ERROR;
|
||||
}
|
||||
srcNode->AddOutEdge(dstId);
|
||||
auto dstNode = AddNode(dstId);
|
||||
if (dstNode == nullptr) {
|
||||
MS_LOGE("add dstNode failed");
|
||||
return RET_ERROR;
|
||||
}
|
||||
dstNode->AddInEdge(srcId);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
OpNode *OpGraph::GetNode(const NODE_ID &nodeId) {
|
||||
auto node = nodes.find(nodeId);
|
||||
if (node == nodes.end()) {
|
||||
return nullptr;
|
||||
}
|
||||
return node->second;
|
||||
}
|
||||
|
||||
OpNode *OpGraph::AddNode(const NODE_ID &nodeId) {
|
||||
auto node = GetNode(nodeId);
|
||||
if (node != nullptr) {
|
||||
return node;
|
||||
}
|
||||
node = new (std::nothrow) OpNode(nodeId);
|
||||
if (node == nullptr) {
|
||||
MS_LOGE("new node failed");
|
||||
return nullptr;
|
||||
}
|
||||
nodes[nodeId] = node;
|
||||
return node;
|
||||
}
|
||||
|
||||
std::unordered_set<NODE_ID> OpGraph::GetInputNode() {
|
||||
std::unordered_set<NODE_ID> inputNodes;
|
||||
for (const auto &iter : nodes) {
|
||||
auto node = iter.second;
|
||||
MS_ASSERT(node != nullptr);
|
||||
if (node->GetAllInEdge().empty()) {
|
||||
inputNodes.insert(node->ID());
|
||||
}
|
||||
}
|
||||
return inputNodes;
|
||||
}
|
||||
|
||||
std::unordered_set<NODE_ID> OpGraph::GetOutputNode() {
|
||||
std::unordered_set<NODE_ID> outputNodes;
|
||||
for (const auto &iter : nodes) {
|
||||
auto node = iter.second;
|
||||
MS_ASSERT(node != nullptr);
|
||||
if (node->GetAllOutEdge().empty()) {
|
||||
outputNodes.insert(node->ID());
|
||||
}
|
||||
}
|
||||
return outputNodes;
|
||||
}
|
||||
|
||||
OpGraph::~OpGraph() {
|
||||
for (auto iter : nodes) {
|
||||
if (iter.second != nullptr) {
|
||||
delete iter.second;
|
||||
}
|
||||
}
|
||||
nodes.clear();
|
||||
}
|
||||
|
||||
NODE_ID OpNode::ID() { return id; }
|
||||
|
||||
void OpNode::AddInEdge(const NODE_ID &nodeId) { inEdges.insert(nodeId); }
|
||||
|
||||
void OpNode::AddOutEdge(const NODE_ID &nodeId) { outEdges.insert(nodeId); }
|
||||
|
||||
std::unordered_set<NODE_ID> OpNode::GetAllInEdge() { return inEdges; }
|
||||
|
||||
std::unordered_set<NODE_ID> OpNode::GetAllOutEdge() { return outEdges; }
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
|
@ -1,71 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PREDICT_COMMON_GRAPH_UTIL_H_
|
||||
#define PREDICT_COMMON_GRAPH_UTIL_H_
|
||||
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "common/utils.h"
|
||||
#include "schema/inner/ms_generated.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
using NODE_ID = std::string;
|
||||
|
||||
class OpNode {
|
||||
public:
|
||||
explicit OpNode(NODE_ID nodeId) : id(std::move(nodeId)) {}
|
||||
NODE_ID ID();
|
||||
void AddInEdge(const NODE_ID &nodeId);
|
||||
void AddOutEdge(const NODE_ID &nodeId);
|
||||
std::unordered_set<NODE_ID> GetAllInEdge();
|
||||
std::unordered_set<NODE_ID> GetAllOutEdge();
|
||||
|
||||
protected:
|
||||
NODE_ID id;
|
||||
std::unordered_set<NODE_ID> inEdges;
|
||||
std::unordered_set<NODE_ID> outEdges;
|
||||
};
|
||||
|
||||
class OpGraph {
|
||||
public:
|
||||
OpGraph() = default;
|
||||
|
||||
~OpGraph();
|
||||
|
||||
static OpGraph *Build(const SubGraphDef &subGraphDef);
|
||||
|
||||
OpNode *GetNode(const NODE_ID &nodeId);
|
||||
OpNode *AddNode(const NODE_ID &nodeId);
|
||||
std::unordered_set<NODE_ID> GetInputNode();
|
||||
std::unordered_set<NODE_ID> GetOutputNode();
|
||||
|
||||
private:
|
||||
int AddEdge(const NODE_ID &srcId, const NODE_ID &dstId);
|
||||
int AddEdge(const NodeDef &srcNodeDef, const flatbuffers::Vector<flatbuffers::Offset<NodeDef>> &nodeDefs);
|
||||
|
||||
protected:
|
||||
std::unordered_map<NODE_ID, OpNode *> nodes;
|
||||
};
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // PREDICT_COMMON_GRAPH_UTIL_H_
|
|
@ -1,26 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "common/module_registry.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
ModuleRegistry *GetRegistryInstance() {
|
||||
static ModuleRegistry registry;
|
||||
return ®istry;
|
||||
}
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
|
@ -1,97 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PREDICT_COMMON_MODULE_REGISTRY_H_
|
||||
#define PREDICT_COMMON_MODULE_REGISTRY_H_
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include "common/mslog.h"
|
||||
|
||||
#define MSPREDICT_API __attribute__((visibility("default")))
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
class ModuleBase {
|
||||
public:
|
||||
virtual ~ModuleBase() = default;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class Module;
|
||||
|
||||
class ModuleRegistry {
|
||||
public:
|
||||
ModuleRegistry() = default;
|
||||
|
||||
virtual ~ModuleRegistry() = default;
|
||||
|
||||
template <class T>
|
||||
bool Register(const std::string &name, const T &t) {
|
||||
modules[name] = &t;
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
std::shared_ptr<T> Create(const std::string &name) {
|
||||
auto it = modules.find(name);
|
||||
if (it == modules.end()) {
|
||||
return nullptr;
|
||||
}
|
||||
auto *module = (Module<T> *)it->second;
|
||||
if (module == nullptr) {
|
||||
return nullptr;
|
||||
} else {
|
||||
return module->Create();
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
T *GetInstance(const std::string &name) {
|
||||
auto it = modules.find(name);
|
||||
if (it == modules.end()) {
|
||||
return nullptr;
|
||||
}
|
||||
auto *module = (Module<T> *)it->second;
|
||||
if (module == nullptr) {
|
||||
return nullptr;
|
||||
} else {
|
||||
return module->GetInstance();
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
std::unordered_map<std::string, const ModuleBase *> modules;
|
||||
};
|
||||
|
||||
ModuleRegistry *GetRegistryInstance() MSPREDICT_API;
|
||||
|
||||
template <class T>
|
||||
class ModuleRegistrar {
|
||||
public:
|
||||
ModuleRegistrar(const std::string &name, const T &module) {
|
||||
auto registryInstance = GetRegistryInstance();
|
||||
if (registryInstance == nullptr) {
|
||||
MS_LOGW("registryInstance is nullptr.");
|
||||
} else {
|
||||
registryInstance->Register(name, module);
|
||||
}
|
||||
}
|
||||
};
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // PREDICT_COMMON_MODULE_REGISTRY_H_
|
|
@ -1,47 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "common/mslog.h"
|
||||
#include <iostream>
|
||||
#include <cstdlib>
|
||||
#include <climits>
|
||||
#include <string>
|
||||
#include "include/errorcode.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
std::string GetEnv(const std::string &envvar) {
|
||||
const char *value = std::getenv(envvar.c_str());
|
||||
if (value == nullptr) {
|
||||
return std::string();
|
||||
}
|
||||
return std::string(value);
|
||||
}
|
||||
|
||||
bool IsPrint(int level) {
|
||||
auto envString = GetEnv("MSLOG");
|
||||
static int env = static_cast<int>(std::strtol(!envString.empty() ? envString.c_str() : "3", nullptr, 0));
|
||||
if (env == INT_MIN || env == INT_MAX) {
|
||||
env = WARN;
|
||||
// enable the SP for binscope checking
|
||||
std::string errorStr = "env exceeded the value that type int is able to represent";
|
||||
MS_LOGE("%s", errorStr.c_str());
|
||||
}
|
||||
|
||||
return level >= env;
|
||||
}
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
|
@ -1,230 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PREDICT_COMMON_MSLOG_H_
|
||||
#define PREDICT_COMMON_MSLOG_H_
|
||||
|
||||
#include <syslog.h>
|
||||
#include <unistd.h>
|
||||
#include <csignal>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#if defined(__ANDROID__) || defined(ANDROID)
|
||||
#include <android/log.h>
|
||||
#endif
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
constexpr const char *TAG = "MS_PREDICT";
|
||||
|
||||
constexpr int DEBUG = 1;
|
||||
constexpr int INFO = 2;
|
||||
constexpr int WARN = 3;
|
||||
constexpr int ERROR = 4;
|
||||
|
||||
#define MSPREDICT_API __attribute__((visibility("default")))
|
||||
|
||||
bool MSPREDICT_API IsPrint(int level);
|
||||
|
||||
#if !defined(__ANDROID__) && !defined(ANDROID)
|
||||
|
||||
#if LOG_TO_FILE
|
||||
#define MS_LOGD(fmt, args...) \
|
||||
{ \
|
||||
if (mindspore::predict::IsPrint(mindspore::predict::DEBUG)) { \
|
||||
syslog(LOG_DEBUG, "%s|%d|%s[%d]|: " #fmt, mindspore::predict::TAG, \getpid(), __func__, __LINE__, ##args); \
|
||||
} \
|
||||
}
|
||||
#define MS_LOGI(fmt, args...) \
|
||||
{ \
|
||||
if (mindspore::predict::IsPrint(mindspore::predict::INFO)) { \
|
||||
syslog(LOG_INFO, "%s|%d|%s[%d]|: " #fmt, mindspore::predict::TAG, \getpid(), __func__, __LINE__, ##args); \
|
||||
} \
|
||||
}
|
||||
#define MS_LOGW(fmt, args...) \
|
||||
{ \
|
||||
if (mindspore::predict::IsPrint(mindspore::predict::WARN)) { \
|
||||
syslog(LOG_WARNING, "%s|%d|%s[%d]|: " #fmt, mindspore::predict::TAG, \getpid(), __func__, __LINE__, ##args); \
|
||||
} \
|
||||
}
|
||||
#define MS_LOGE(fmt, args...) \
|
||||
{ \
|
||||
if (mindspore::predict::IsPrint(mindspore::predict::ERROR)) { \
|
||||
syslog(LOG_ERR, "%s|%d|%s[%d]|: " #fmt, mindspore::predict::TAG, getpid(), __func__, __LINE__, ##args); \
|
||||
} \
|
||||
}
|
||||
#else
|
||||
|
||||
#define MS_LOGD(fmt, args...) \
|
||||
{ \
|
||||
if (mindspore::predict::IsPrint(mindspore::predict::DEBUG)) { \
|
||||
printf("[DEBUG] %s|%d|%s|%s[%d]|: " #fmt "\r\n", mindspore::predict::TAG, getpid(), __FILE__, __func__, \
|
||||
__LINE__, ##args); \
|
||||
} \
|
||||
}
|
||||
#define MS_LOGI(fmt, args...) \
|
||||
{ \
|
||||
if (mindspore::predict::IsPrint(mindspore::predict::INFO)) { \
|
||||
printf("[INFO] %s|%d|%s|%s[%d]|: " #fmt "\r\n", mindspore::predict::TAG, getpid(), __FILE__, __func__, \
|
||||
__LINE__, ##args); \
|
||||
} \
|
||||
}
|
||||
#define MS_LOGW(fmt, args...) \
|
||||
{ \
|
||||
if (mindspore::predict::IsPrint(mindspore::predict::WARN)) { \
|
||||
printf("[WARN] %s|%d|%s|%s[%d]|: " #fmt "\r\n", mindspore::predict::TAG, getpid(), __FILE__, __func__, \
|
||||
__LINE__, ##args); \
|
||||
} \
|
||||
}
|
||||
#define MS_LOGE(fmt, args...) \
|
||||
{ \
|
||||
if (mindspore::predict::IsPrint(mindspore::predict::ERROR)) { \
|
||||
printf("[ERROR] %s|%d|%s|%s[%d]|: " #fmt "\r\n", mindspore::predict::TAG, getpid(), __FILE__, __func__, \
|
||||
__LINE__, ##args); \
|
||||
} \
|
||||
}
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#define MS_LOGD(fmt, args...) \
|
||||
{ \
|
||||
if (mindspore::predict::IsPrint(mindspore::predict::DEBUG)) \
|
||||
__android_log_print(ANDROID_LOG_DEBUG, mindspore::predict::TAG, "|%d|%s[%d]|: " fmt, getpid(), __func__, \
|
||||
__LINE__, ##args); \
|
||||
}
|
||||
|
||||
#define MS_LOGI(fmt, args...) \
|
||||
{ \
|
||||
if (mindspore::predict::IsPrint(mindspore::predict::INFO)) \
|
||||
__android_log_print(ANDROID_LOG_INFO, mindspore::predict::TAG, "|%d|%s[%d]|: " fmt, getpid(), __func__, \
|
||||
__LINE__, ##args); \
|
||||
}
|
||||
|
||||
#define MS_LOGW(fmt, args...) \
|
||||
{ \
|
||||
if (mindspore::predict::IsPrint(mindspore::predict::WARN)) \
|
||||
__android_log_print(ANDROID_LOG_WARN, mindspore::predict::TAG, "|%d|%s[%d]|: " fmt, getpid(), __func__, \
|
||||
__LINE__, ##args); \
|
||||
}
|
||||
|
||||
#define MS_LOGE(fmt, args...) \
|
||||
{ \
|
||||
if (mindspore::predict::IsPrint(mindspore::predict::ERROR)) \
|
||||
__android_log_print(ANDROID_LOG_ERROR, mindspore::predict::TAG, "|%d|%s[%d]|: " fmt, getpid(), __func__, \
|
||||
__LINE__, ##args); \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#define MS_LOG(severity) std::cout << std::endl
|
||||
#define MS_DLOG(verboselevel) std::cout << std::endl
|
||||
// Kill the process for safe exiting.
|
||||
inline void KillProcess(const std::string &ret) {
|
||||
MS_LOG(ERROR) << "mindspore Exit Tip:" << ret;
|
||||
if (raise(SIGKILL) != 0) {
|
||||
MS_LOGE("Send SIGKILL to kill process failed");
|
||||
}
|
||||
}
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
||||
|
||||
#define MS_ASSERT(expression) \
|
||||
do { \
|
||||
if (!(expression)) { \
|
||||
std::stringstream ss; \
|
||||
ss << "Assertion failed: " << #expression << ", file: " << __FILE__ << ", line: " << __LINE__; \
|
||||
mindspore::predict::KillProcess(ss.str()); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define MS_EXIT(ret) \
|
||||
do { \
|
||||
std::stringstream ss; \
|
||||
ss << (ret) << " ( file: " << __FILE__ << ", line: " << __LINE__ << " )."; \
|
||||
mindspore::predict::KillProcess(ss.str()); \
|
||||
} while (0)
|
||||
|
||||
#define MS_PRINT_ERROR(fmt, args...) \
|
||||
printf(#fmt "\n", ##args); \
|
||||
MS_LOGE(fmt, ##args);
|
||||
|
||||
#define MS_PRINT_INFO(fmt, args...) \
|
||||
printf(fmt "\n", ##args); \
|
||||
MS_LOGI(fmt, ##args);
|
||||
|
||||
constexpr int LOG_CHECK_EVERY_FIRSTNUM = 10;
|
||||
constexpr int LOG_CHECK_EVERY_NUM1 = 10;
|
||||
constexpr int LOG_CHECK_EVERY_NUM2 = 100;
|
||||
constexpr int LOG_CHECK_EVERY_NUM3 = 1000;
|
||||
constexpr int LOG_CHECK_EVERY_NUM4 = 10000;
|
||||
|
||||
#define LOG_CHECK_ID_CONCAT(word1, word2) word1##word2
|
||||
|
||||
#define LOG_CHECK_ID LOG_CHECK_ID_CONCAT(__FUNCTION__, __LINE__)
|
||||
|
||||
#define LOG_CHECK_FIRST_N \
|
||||
[](uint32_t firstNum) { \
|
||||
static uint32_t LOG_CHECK_ID = 0; \
|
||||
++LOG_CHECK_ID; \
|
||||
return (LOG_CHECK_ID <= firstNum); \
|
||||
}
|
||||
|
||||
#define LOG_CHECK_EVERY_N1 \
|
||||
[](uint32_t firstNum, uint32_t num) { \
|
||||
static uint32_t LOG_CHECK_ID = 0; \
|
||||
++LOG_CHECK_ID; \
|
||||
return ((LOG_CHECK_ID <= firstNum) || (LOG_CHECK_ID % num == 0)); \
|
||||
}
|
||||
|
||||
#define LOG_CHECK_EVERY_N2 \
|
||||
[](uint32_t firstNum, uint32_t num1, uint32_t num2) { \
|
||||
static uint32_t LOG_CHECK_ID = 0; \
|
||||
++LOG_CHECK_ID; \
|
||||
return ((LOG_CHECK_ID <= firstNum) || (LOG_CHECK_ID < num2 && LOG_CHECK_ID % num1 == 0) || \
|
||||
(LOG_CHECK_ID % num2 == 0)); \
|
||||
}
|
||||
|
||||
#define LOG_CHECK_EVERY_N3 \
|
||||
[](uint32_t firstNum, uint32_t num1, uint32_t num2, uint32_t num3) { \
|
||||
static uint32_t LOG_CHECK_ID = 0; \
|
||||
++LOG_CHECK_ID; \
|
||||
return ((LOG_CHECK_ID <= firstNum) || (LOG_CHECK_ID < num2 && LOG_CHECK_ID % num1 == 0) || \
|
||||
(LOG_CHECK_ID < num3 && LOG_CHECK_ID % num2 == 0) || (LOG_CHECK_ID % num3 == 0)); \
|
||||
}
|
||||
|
||||
#define LOG_CHECK_EVERY_N4 \
|
||||
[](uint32_t firstNum, uint32_t num1, uint32_t num2, uint32_t num3, uint32_t num4) { \
|
||||
static uint32_t LOG_CHECK_ID = 0; \
|
||||
++LOG_CHECK_ID; \
|
||||
return ((LOG_CHECK_ID <= firstNum) || (LOG_CHECK_ID < num2 && LOG_CHECK_ID % num1 == 0) || \
|
||||
(LOG_CHECK_ID < num3 && LOG_CHECK_ID % num2 == 0) || (LOG_CHECK_ID < num4 && LOG_CHECK_ID % num3 == 0) || \
|
||||
(LOG_CHECK_ID % num4 == 0)); \
|
||||
}
|
||||
|
||||
#define LOG_CHECK_EVERY_N \
|
||||
[]() { \
|
||||
static uint32_t LOG_CHECK_ID = 0; \
|
||||
++LOG_CHECK_ID; \
|
||||
return ((LOG_CHECK_ID <= LOG_CHECK_EVERY_FIRSTNUM) || \
|
||||
(LOG_CHECK_ID < LOG_CHECK_EVERY_NUM2 && LOG_CHECK_ID % LOG_CHECK_EVERY_NUM1 == 0) || \
|
||||
(LOG_CHECK_ID < LOG_CHECK_EVERY_NUM3 && LOG_CHECK_ID % LOG_CHECK_EVERY_NUM2 == 0) || \
|
||||
(LOG_CHECK_ID < LOG_CHECK_EVERY_NUM4 && LOG_CHECK_ID % LOG_CHECK_EVERY_NUM3 == 0) || \
|
||||
(LOG_CHECK_ID % LOG_CHECK_EVERY_NUM4 == 0)); \
|
||||
}
|
||||
|
||||
#endif // PREDICT_COMMON_MSLOG_H_
|
|
@ -1,44 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PREDICT_COMMON_OP_UTILS_H_
|
||||
#define PREDICT_COMMON_OP_UTILS_H_
|
||||
|
||||
#include <functional>
|
||||
#include <string>
|
||||
#include "schema/inner/ms_generated.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
inline OpT GetOpType(const OpDef &opDef) { return opDef.attr_type(); }
|
||||
|
||||
inline OpT GetOpType(const NodeDef &nodeDef) { return GetOpType(*(nodeDef.opDef())); }
|
||||
|
||||
inline std::string GetOpTypeName(const NodeDef &nodeDef) { return EnumNameOpT(GetOpType(nodeDef)); }
|
||||
|
||||
inline std::string GetOpTypeName(const OpDef &opDef) { return EnumNameOpT(GetOpType(opDef)); }
|
||||
|
||||
inline OpT GetOpType(const OpDefT &opDefT) { return opDefT.attr.type; }
|
||||
|
||||
inline OpT GetOpType(const NodeDefT &nodeDefT) { return GetOpType(*(nodeDefT.opDef.get())); }
|
||||
|
||||
inline std::string GetOpTypeName(const NodeDefT &nodeDefT) { return EnumNameOpT(GetOpType(nodeDefT)); }
|
||||
|
||||
inline std::string GetOpTypeName(const OpDefT &opDefT) { return EnumNameOpT(GetOpType(opDefT)); }
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // PREDICT_COMMON_OP_UTILS_H_
|
|
@ -1,119 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PREDICT_COMMON_OPTION_H_
|
||||
#define PREDICT_COMMON_OPTION_H_
|
||||
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
#include "common/mslog.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
template <typename T>
|
||||
struct InnerSome {
|
||||
explicit InnerSome(const T &t) : _t(std::move(t)) {}
|
||||
|
||||
T _t;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
InnerSome<typename std::decay<T>::type> Some(T &&t) {
|
||||
return InnerSome<typename std::decay<T>::type>(std::forward<T>(t));
|
||||
}
|
||||
|
||||
struct None {};
|
||||
|
||||
template <typename T>
|
||||
class Option {
|
||||
public:
|
||||
Option() : state(NONE) {}
|
||||
|
||||
explicit Option(const T &t) : data(t), state(SOME) {}
|
||||
|
||||
explicit Option(T &&t) : data(std::move(t)), state(SOME) {}
|
||||
|
||||
explicit Option(const InnerSome<T> &some) : data(some._t), state(SOME) {}
|
||||
|
||||
explicit Option(const None &none) : state(NONE) {}
|
||||
|
||||
Option(const Option<T> &that) : state(that.state) {
|
||||
if (that.IsSome()) {
|
||||
new (&data) T(that.data);
|
||||
}
|
||||
}
|
||||
|
||||
virtual ~Option() = default;
|
||||
|
||||
bool IsNone() const { return state == NONE; }
|
||||
|
||||
bool IsSome() const { return state == SOME; }
|
||||
|
||||
const T &Get() const & {
|
||||
MS_ASSERT(IsSome());
|
||||
return data;
|
||||
}
|
||||
|
||||
T &Get() & {
|
||||
MS_ASSERT(IsSome());
|
||||
return data;
|
||||
}
|
||||
|
||||
T &&Get() && {
|
||||
MS_ASSERT(IsSome());
|
||||
return std::move(data);
|
||||
}
|
||||
|
||||
const T &&Get() const && {
|
||||
MS_ASSERT(IsSome());
|
||||
return std::move(data);
|
||||
}
|
||||
|
||||
// oprerator override
|
||||
Option<T> &operator=(const Option<T> &that) {
|
||||
if (&that != this) {
|
||||
if (IsSome()) {
|
||||
data.~T();
|
||||
}
|
||||
state = that.state;
|
||||
if (that.IsSome()) {
|
||||
new (&data) T(that.data);
|
||||
}
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool operator==(const Option<T> &that) const {
|
||||
return (IsNone() && that.IsNone()) || (IsSome() && that.IsSome() && data == that.data);
|
||||
}
|
||||
|
||||
bool operator!=(const Option<T> &that) const { return !(*this == that); }
|
||||
|
||||
bool operator==(const T &that) const { return IsSome() && data == that; }
|
||||
|
||||
bool operator!=(const T &that) const { return !(*this == that); }
|
||||
|
||||
private:
|
||||
enum State { NONE = 0, SOME = 1 };
|
||||
|
||||
T data;
|
||||
State state;
|
||||
};
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // PREDICT_COMMON_OPTION_H_
|
|
@ -1,50 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "common/storage.h"
|
||||
#include "flatbuffers/flatbuffers.h"
|
||||
#include "common/mslog.h"
|
||||
#include "common/file_utils.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
int Storage::Save(const GraphDefT &graph, const std::string &outputPath) {
|
||||
flatbuffers::FlatBufferBuilder builder(flatSize);
|
||||
auto offset = GraphDef::Pack(builder, &graph);
|
||||
builder.Finish(offset);
|
||||
int size = builder.GetSize();
|
||||
auto content = builder.GetBufferPointer();
|
||||
if (content == nullptr) {
|
||||
MS_LOGE("GetBufferPointer nullptr");
|
||||
return RET_ERROR;
|
||||
}
|
||||
std::string realPath = RealPath(outputPath.c_str());
|
||||
if (realPath.empty()) {
|
||||
MS_LOGE("Output file path '%s' is not valid", outputPath.c_str());
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
std::ofstream output(realPath, std::ofstream::binary);
|
||||
if (!output.is_open()) {
|
||||
MS_LOGE("ofstream open failed");
|
||||
return RET_ERROR;
|
||||
}
|
||||
output.write((const char *)content, size);
|
||||
output.close();
|
||||
return RET_OK;
|
||||
}
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
|
@ -1,36 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PREDICT_COMMON_STORAGE_H_
|
||||
#define PREDICT_COMMON_STORAGE_H_
|
||||
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include "include/errorcode.h"
|
||||
#include "flatbuffers/flatbuffers.h"
|
||||
#include "schema/inner/ms_generated.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
class Storage {
|
||||
public:
|
||||
int Save(const GraphDefT &graph, const std::string &outputPath);
|
||||
const int flatSize = 1024;
|
||||
};
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // PREDICT_COMMON_STORAGE_H_
|
|
@ -1,228 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "common/utils.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
uint64_t GetTimeUs() {
|
||||
struct timespec ts = {0, 0};
|
||||
if (clock_gettime(CLOCK_MONOTONIC, &ts) != 0) {
|
||||
return 0;
|
||||
}
|
||||
// USECS_IN_SEC *NSECS_IN_USEC;
|
||||
auto retval = static_cast<uint64_t>((ts.tv_sec * USEC) + (ts.tv_nsec / MSEC));
|
||||
return retval;
|
||||
}
|
||||
|
||||
static const unsigned int FP32_BIT_SIZE = 32;
|
||||
static const unsigned int FP32_EXPONENT_BIAS = 127;
|
||||
static const unsigned int FP32_SIGNIFICAND = 23;
|
||||
|
||||
static const unsigned int FP32_EXPONENT_MAX = 255;
|
||||
|
||||
static const unsigned int FP16_BIT_SIZE = 16;
|
||||
static const unsigned int FP16_EXPONENT_BIAS = 15;
|
||||
static const unsigned int FP16_SIGNIFICAND = 10;
|
||||
|
||||
static const int FP16_EXPONENT_MAX = 30;
|
||||
static const int FP16_EXPONENT_MIN = -10;
|
||||
|
||||
float ShortToFloat32(int16_t srcValue) {
|
||||
uint16_t expHalf16 = srcValue & 0x7C00;
|
||||
int exp1 = static_cast<int>(expHalf16);
|
||||
uint16_t mantissa16 = srcValue & 0x03FF;
|
||||
int mantissa1 = static_cast<int>(mantissa16);
|
||||
int sign = static_cast<int>(srcValue & 0x8000);
|
||||
sign = sign << FP16_BIT_SIZE;
|
||||
|
||||
// nan or inf
|
||||
if (expHalf16 == 0x7C00) {
|
||||
// nan
|
||||
if (mantissa16 > 0) {
|
||||
int res = (0x7FC00000 | sign);
|
||||
int *iRes = &res;
|
||||
MS_ASSERT(iRes != nullptr);
|
||||
auto fres = static_cast<float>(*iRes);
|
||||
return fres;
|
||||
}
|
||||
// inf
|
||||
int res = (0x7F800000 | sign);
|
||||
int *iRes = &res;
|
||||
MS_ASSERT(iRes != nullptr);
|
||||
auto fres = static_cast<float>(*iRes);
|
||||
return fres;
|
||||
}
|
||||
if (expHalf16 != 0) {
|
||||
exp1 += ((FP32_EXPONENT_BIAS - FP16_EXPONENT_BIAS) << FP16_SIGNIFICAND); // exponents converted to float32 bias
|
||||
int res = (exp1 | mantissa1);
|
||||
res = res << (FP32_SIGNIFICAND - FP16_SIGNIFICAND);
|
||||
res = (res | sign);
|
||||
int *iRes = &res;
|
||||
|
||||
auto fres = static_cast<float>(*iRes);
|
||||
return fres;
|
||||
}
|
||||
|
||||
int xmm1 = exp1 > (1 << FP16_SIGNIFICAND) ? exp1 : (1 << FP16_SIGNIFICAND);
|
||||
xmm1 = (xmm1 << (FP32_SIGNIFICAND - FP16_SIGNIFICAND));
|
||||
xmm1 += ((FP32_EXPONENT_BIAS - FP16_EXPONENT_BIAS - FP16_SIGNIFICAND)
|
||||
<< FP32_SIGNIFICAND); // add the bias difference to xmm1
|
||||
xmm1 = xmm1 | sign; // Combine with the sign mask
|
||||
|
||||
auto res = static_cast<float>(mantissa1); // Convert mantissa to float
|
||||
res *= static_cast<float>(xmm1);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
int16_t Float32ToShort(float srcValue) {
|
||||
auto srcValueBit = static_cast<unsigned int>(srcValue);
|
||||
int sign = srcValueBit >> (FP32_BIT_SIZE - 1);
|
||||
int mantissa = srcValueBit & 0x007FFFFF;
|
||||
// exponent
|
||||
int exp = ((srcValueBit & 0x7F800000) >> FP32_SIGNIFICAND) + FP16_EXPONENT_BIAS - FP32_EXPONENT_BIAS;
|
||||
int16_t res;
|
||||
if (exp > 0 && exp < FP16_EXPONENT_MAX) {
|
||||
// use rte rounding mode, round the significand, combine sign, exponent and significand into a short.
|
||||
res = (sign << (FP16_BIT_SIZE - 1)) | (exp << FP16_SIGNIFICAND) |
|
||||
((mantissa + 0x00001000) >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND));
|
||||
} else if (srcValueBit == 0) {
|
||||
res = 0;
|
||||
} else {
|
||||
if (exp <= 0) {
|
||||
if (exp < FP16_EXPONENT_MIN) {
|
||||
// value is less than min half float point
|
||||
res = 0;
|
||||
} else {
|
||||
// normalized single, magnitude is less than min normal half float point.
|
||||
mantissa = (mantissa | 0x00800000) >> (1 - exp);
|
||||
// round to nearest
|
||||
if ((mantissa & 0x00001000) > 0) {
|
||||
mantissa = mantissa + 0x00002000;
|
||||
}
|
||||
// combine sign & mantissa (exp is zero to get denormalized number)
|
||||
res = (sign << FP16_EXPONENT_BIAS) | (mantissa >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND));
|
||||
}
|
||||
} else if (exp == (FP32_EXPONENT_MAX - FP32_EXPONENT_BIAS + FP16_EXPONENT_BIAS)) {
|
||||
if (mantissa == 0) {
|
||||
// input float is infinity, return infinity half
|
||||
res = (sign << FP16_EXPONENT_BIAS) | 0x7C00;
|
||||
} else {
|
||||
// input float is NaN, return half NaN
|
||||
res = (sign << FP16_EXPONENT_BIAS) | 0x7C00 | (mantissa >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND));
|
||||
}
|
||||
} else {
|
||||
// exp > 0, normalized single, round to nearest
|
||||
if ((mantissa & 0x00001000) > 0) {
|
||||
mantissa = mantissa + 0x00002000;
|
||||
if ((mantissa & 0x00800000) > 0) {
|
||||
mantissa = 0;
|
||||
exp = exp + 1;
|
||||
}
|
||||
}
|
||||
if (exp > FP16_EXPONENT_MAX) {
|
||||
// exponent overflow - return infinity half
|
||||
res = (sign << FP16_EXPONENT_BIAS) | 0x7C00;
|
||||
} else {
|
||||
// combine sign, exp and mantissa into normalized half
|
||||
res = (sign << FP16_EXPONENT_BIAS) | (exp << FP16_SIGNIFICAND) |
|
||||
(mantissa >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND));
|
||||
}
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
std::string Remove(const std::string &from, const std::string &subStr, Mode mode) {
|
||||
std::string result = from;
|
||||
if (mode == PREFIX) {
|
||||
if (from.substr(0, subStr.length()) == subStr) {
|
||||
result = from.substr(subStr.size());
|
||||
}
|
||||
} else if (mode == SUFFIX) {
|
||||
if (from.rfind(subStr) == from.size() - subStr.size()) {
|
||||
result = from.substr(0, from.size() - subStr.size());
|
||||
}
|
||||
} else {
|
||||
size_t index;
|
||||
while ((index = result.find(subStr)) != std::string::npos) {
|
||||
result = result.erase(index, subStr.size());
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<std::string> StrSplit(const std::string &str, const std::string &pattern) {
|
||||
std::string::size_type pos;
|
||||
std::vector<std::string> result;
|
||||
std::string tmpStr(str + pattern);
|
||||
std::string::size_type size = tmpStr.size();
|
||||
|
||||
for (std::string::size_type i = 0; i < size; i++) {
|
||||
pos = tmpStr.find(pattern, i);
|
||||
if (pos < size) {
|
||||
std::string s = tmpStr.substr(i, pos - i);
|
||||
result.push_back(s);
|
||||
i = pos + pattern.size() - 1;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<std::string> Tokenize(const std::string &src, const std::string &delimiters,
|
||||
const Option<size_t> &maxTokenNum) {
|
||||
if (maxTokenNum.IsSome() && maxTokenNum.Get() == 0) {
|
||||
return {};
|
||||
}
|
||||
|
||||
std::vector<std::string> tokens;
|
||||
size_t offset = 0;
|
||||
|
||||
while (true) {
|
||||
size_t nonDelimiter = src.find_first_not_of(delimiters, offset);
|
||||
if (nonDelimiter == std::string::npos) {
|
||||
break;
|
||||
}
|
||||
size_t delimiter = src.find_first_of(delimiters, nonDelimiter);
|
||||
if (delimiter == std::string::npos || (maxTokenNum.IsSome() && tokens.size() == maxTokenNum.Get() - 1)) {
|
||||
tokens.push_back(src.substr(nonDelimiter));
|
||||
break;
|
||||
}
|
||||
|
||||
tokens.push_back(src.substr(nonDelimiter, delimiter - nonDelimiter));
|
||||
offset = delimiter;
|
||||
}
|
||||
return tokens;
|
||||
}
|
||||
|
||||
void ShortToFloat32(const int16_t *srcdata, float *dstdata, size_t elementSize) {
|
||||
MS_ASSERT(srcdata != nullptr);
|
||||
MS_ASSERT(dstdata != nullptr);
|
||||
for (size_t i = 0; i < elementSize; i++) {
|
||||
dstdata[i] = ShortToFloat32(srcdata[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void Float32ToShort(const float *srcdata, int16_t *dstdata, size_t elementSize) {
|
||||
MS_ASSERT(srcdata != nullptr);
|
||||
MS_ASSERT(dstdata != nullptr);
|
||||
for (size_t i = 0; i < elementSize; i++) {
|
||||
dstdata[i] = Float32ToShort(srcdata[i]);
|
||||
}
|
||||
}
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
|
@ -1,154 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PREDICT_COMMON_UTILS_H_
|
||||
#define PREDICT_COMMON_UTILS_H_
|
||||
|
||||
#include <stdint.h>
|
||||
#include <ctime>
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "common/mslog.h"
|
||||
#include "common/option.h"
|
||||
#include "include/errorcode.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
const int USEC = 1000000;
|
||||
const int MSEC = 1000;
|
||||
|
||||
uint64_t GetTimeUs();
|
||||
|
||||
int16_t Float32ToShort(float srcValue);
|
||||
|
||||
float ShortToFloat32(int16_t srcValue);
|
||||
|
||||
void ShortToFloat32(const int16_t *srcData, float *dstData, size_t elementSize);
|
||||
|
||||
void Float32ToShort(const float *srcData, int16_t *dstData, size_t elementSize);
|
||||
|
||||
template <typename T>
|
||||
bool IsContain(const std::vector<T> &vec, T element) {
|
||||
for (auto iter = vec.begin(); iter != vec.end(); iter++) {
|
||||
if (*iter == element) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
const char WHITESPACE[] = "\t\n\v\f\r ";
|
||||
const char STR_TRUE[] = "true";
|
||||
const char STR_FALSE[] = "false";
|
||||
|
||||
template <typename T>
|
||||
Option<std::string> ToString(T t) {
|
||||
std::ostringstream out;
|
||||
out << t;
|
||||
if (!out.good()) {
|
||||
return Option<std::string>(None());
|
||||
}
|
||||
|
||||
return Option<std::string>(out.str());
|
||||
}
|
||||
|
||||
template <>
|
||||
inline Option<std::string> ToString(bool value) {
|
||||
return value ? Option<std::string>(STR_TRUE) : Option<std::string>(STR_FALSE);
|
||||
}
|
||||
|
||||
// get the file name from a given path
|
||||
// for example: "/usr/bin", we will get "bin"
|
||||
inline std::string GetFileName(const std::string &path) {
|
||||
char delim = '/';
|
||||
|
||||
size_t i = path.rfind(delim, path.length());
|
||||
if (i != std::string::npos) {
|
||||
return (path.substr(i + 1, path.length() - i));
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
// trim the white space character in a string
|
||||
// see also: macro WHITESPACE defined above
|
||||
inline void Trim(std::string *input) {
|
||||
if (input == nullptr) {
|
||||
return;
|
||||
}
|
||||
if (input->empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
input->erase(0, input->find_first_not_of(WHITESPACE));
|
||||
input->erase(input->find_last_not_of(WHITESPACE) + 1);
|
||||
}
|
||||
|
||||
// to judge whether a string is starting with prefix
|
||||
// for example: "hello world" is starting with "hello"
|
||||
inline bool StartsWithPrefix(const std::string &source, const std::string &prefix) {
|
||||
if (source.length() < prefix.length()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return (source.compare(0, prefix.length(), prefix) == 0);
|
||||
}
|
||||
|
||||
// split string
|
||||
std::vector<std::string> StrSplit(const std::string &str, const std::string &pattern);
|
||||
|
||||
// tokenize string
|
||||
std::vector<std::string> Tokenize(const std::string &src, const std::string &delimiters,
|
||||
const Option<size_t> &maxTokenNum = Option<size_t>(None()));
|
||||
|
||||
enum Mode { PREFIX, SUFFIX, ANY };
|
||||
|
||||
// remove redundant character
|
||||
std::string Remove(const std::string &from, const std::string &subStr, Mode mode = ANY);
|
||||
|
||||
template <typename T>
|
||||
inline Option<T> GenericParseValue(const std::string &value) {
|
||||
T ret;
|
||||
std::istringstream input(value);
|
||||
input >> ret;
|
||||
|
||||
if (input && input.eof()) {
|
||||
return Option<T>(ret);
|
||||
}
|
||||
|
||||
return Option<T>(None());
|
||||
}
|
||||
|
||||
template <>
|
||||
inline Option<std::string> GenericParseValue(const std::string &value) {
|
||||
return Option<std::string>(value);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline Option<bool> GenericParseValue(const std::string &value) {
|
||||
if (value == "true") {
|
||||
return Option<bool>(true);
|
||||
} else if (value == "false") {
|
||||
return Option<bool>(false);
|
||||
}
|
||||
|
||||
return Option<bool>(None());
|
||||
}
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // PREDICT_COMMON_UTILS_H_
|
|
@ -1,56 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PREDICT_INCLUDE_CONTEXT_H_
|
||||
#define PREDICT_INCLUDE_CONTEXT_H_
|
||||
|
||||
#include <memory>
|
||||
#include "dlpack/dlpack.h"
|
||||
#include "include/tensor.h"
|
||||
|
||||
#define MSPREDICT_API __attribute__((visibility("default")))
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
///\brief Resource management definition of MindSpore predict.
|
||||
class MSPREDICT_API Context {
|
||||
public:
|
||||
///\brief Constructor of MindSpore predict context using default value for parameters.
|
||||
///
|
||||
///\return Instance of MindSpore predict context.
|
||||
Context();
|
||||
|
||||
///\brief Custum constructor of MindSpore predict context using input value for parameters.
|
||||
///
|
||||
///\param[in] threadNum The number of thread during the runtime.
|
||||
///\param[in] allocator The memory management during the runtime
|
||||
///\param[in] deviceCtx The device information during the runtime.
|
||||
///
|
||||
///\return Instance of MindSpore predict context.
|
||||
Context(int threadNum, std::shared_ptr<Allocator> allocator, DLContext deviceCtx);
|
||||
|
||||
///\brief Destructor of MindSpore predict context.
|
||||
virtual ~Context();
|
||||
|
||||
public:
|
||||
DLContext deviceCtx;
|
||||
int threadNum = 1;
|
||||
std::shared_ptr<Allocator> allocator;
|
||||
};
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // PREDICT_INCLUDE_CONTEXT_H_
|
|
@ -1,52 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PREDICT_INCLUDE_ERRORCODE_H_
|
||||
#define PREDICT_INCLUDE_ERRORCODE_H_
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
using STATUS = int;
|
||||
|
||||
/* Success */
|
||||
constexpr int RET_OK = 0; /**< No error occurs. */
|
||||
|
||||
/* Common error code, range: [-1, -100]*/
|
||||
constexpr int RET_ERROR = -1; /**< Common error code. */
|
||||
constexpr int RET_NULL_PTR = -2; /**< NULL pointer returned.*/
|
||||
constexpr int RET_PARAM_INVALID = -3; /**< Invalid parameter.*/
|
||||
constexpr int RET_NO_CHANGE = -4; /**< No change. */
|
||||
|
||||
/* Executor error code, range: [-101,-200] */
|
||||
constexpr int RET_OUT_OF_TENSOR_RANGE = -101; /**< Failed to checking range. */
|
||||
constexpr int RET_INPUT_TENSOR_ERROR = -102; /**< Failed to checking input tensor. */
|
||||
constexpr int RET_REENTRANT_ERROR = -103; /**< Exist executor running. */
|
||||
|
||||
/* Graph error code, range: [-201,-300] */
|
||||
constexpr int RET_GRAPH_FILE_ERR = -201; /**< Failed to verify graph file. */
|
||||
|
||||
/* Node error code, range: [-301,-400] */
|
||||
constexpr int RET_NOT_FIND_OP = -301; /**< Failed to find OP. */
|
||||
constexpr int RET_INVALID_OP_NAME = -302; /**< Invalid OP name. */
|
||||
constexpr int RET_INVALID_OP_ATTR = -303; /**< Invalid OP attr. */
|
||||
constexpr int RET_OP_EXECUTE_FAILURE = -304; /**< Failed to execution OP. */
|
||||
|
||||
/* Tensor error code, range: [-401,-500] */
|
||||
constexpr int RET_FORMAT_ERR = -401; /**< Failed to checking tensor format. */
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // PREDICT_INCLUDE_ERRORCODE_H_
|
|
@ -1,139 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PREDICT_INCLUDE_SESSION_H_
|
||||
#define PREDICT_INCLUDE_SESSION_H_
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <unordered_set>
|
||||
#include "include/context.h"
|
||||
#include "include/tensor.h"
|
||||
|
||||
#define MSPREDICT_API __attribute__((visibility("default")))
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
using NODE_ID = std::string;
|
||||
|
||||
///\brief Graph defined by MindSpore predict.
|
||||
///
|
||||
///\note
|
||||
/// The caller does not need to care about detailed implementation of this class, so just list the class name here.
|
||||
class Graph;
|
||||
|
||||
///\brief GraphExecution defined by MindSpore predict.
|
||||
///
|
||||
///\note
|
||||
/// The caller does not need to care about detailed implementation of this class, so just list the class name here.
|
||||
class GraphExecution;
|
||||
|
||||
///\brief MindSpore predict session.
|
||||
///
|
||||
/// This class represents session of MindSpore predict.
|
||||
///
|
||||
///\note
|
||||
/// The caller needs to allocate and free memory of inputs and outputs.
|
||||
/// New Session is not suggested, please use CreateSession function to create new session class.
|
||||
class MSPREDICT_API Session {
|
||||
public:
|
||||
///\brief Constructor of MindSpore predict session.
|
||||
///
|
||||
///\param[in] ctx The context of the session.
|
||||
///
|
||||
///\return Instance of MindSpore predict session.
|
||||
explicit Session(const Context &ctx);
|
||||
|
||||
///\brief Destructor of MindSpore predict session.
|
||||
~Session();
|
||||
|
||||
///\brief Init the session.
|
||||
///
|
||||
///\param[in] ctx The context of the session.
|
||||
///\param[in] size The size of the session.
|
||||
///\param[in] graphBuf The buffer of the graph, used for build session.
|
||||
///
|
||||
///\return Return RET_OK if the initialization is success, otherwhise return RET_ERROR.
|
||||
int Init(const char *graphBuf, size_t size);
|
||||
|
||||
///\brief Get the input of session.
|
||||
///
|
||||
///\return Input node's input tensors if found, empty vector otherwise.
|
||||
///
|
||||
///\note
|
||||
/// The caller needs to allocate and free memory of inputs.
|
||||
std::vector<Tensor *> GetInput();
|
||||
|
||||
///\brief Run the session.
|
||||
///
|
||||
///\param[in] inputs The input of the session.
|
||||
///
|
||||
///\return Return RET_OK if run success, otherwhise return RET_ERROR.
|
||||
///\note
|
||||
/// Currently input tensors' data format only support FORMAT_NCHW.
|
||||
/// Currently input tensors' data type only support FLOAT.
|
||||
int Run(const std::vector<Tensor *> &inputs);
|
||||
|
||||
///\brief Get the output of session.
|
||||
///
|
||||
///\param[in] nodeName Given output node name.
|
||||
///
|
||||
///\return Output node's output tensors if found, empty vector otherwise.
|
||||
///
|
||||
///\note
|
||||
/// The caller needs to free memory of outputs.
|
||||
std::vector<Tensor *> GetOutput(const std::string &nodeName);
|
||||
|
||||
///\brief Get the all output of session.
|
||||
///
|
||||
///\return Every output node's output tensors.
|
||||
///
|
||||
///\note
|
||||
/// The caller needs to free memory of outputs.
|
||||
std::map<std::string, std::vector<Tensor *>> GetAllOutput();
|
||||
|
||||
protected:
|
||||
///\brief Init the executor.
|
||||
///
|
||||
///\return Return RET_OK if the initialization is success, otherwhise return RET_ERROR.
|
||||
int InitExecutor();
|
||||
|
||||
const Context &_ctx;
|
||||
Graph *_graph = nullptr;
|
||||
GraphExecution *_executor = nullptr;
|
||||
bool reinitExecutor = true;
|
||||
};
|
||||
|
||||
///\brief MindSpore predict neural network session create function
|
||||
///
|
||||
/// This function used to create MindSpore predict neural network session, which will be used to run the neural network.
|
||||
///
|
||||
///\param[in] sessionName The name of the session.
|
||||
///\param[in] graphBuf The buffer of the graph, used for build session.
|
||||
///\param[in] size The size of the session.
|
||||
///\param[in] ctx The context of the session.
|
||||
///
|
||||
///\return Instance of MindSpore predict session.
|
||||
///
|
||||
///\note
|
||||
/// The caller needs to allocate and free memory of graph buffer.
|
||||
std::shared_ptr<Session> MSPREDICT_API CreateSession(const char *graphBuf, size_t size, const Context &ctx);
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // PREDICT_INCLUDE_SESSION_H_
|
|
@ -1,259 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PREDICT_INCLUDE_TENSOR_H_
|
||||
#define PREDICT_INCLUDE_TENSOR_H_
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include "dlpack/dlpack.h"
|
||||
#include "schema/inner/ms_generated.h"
|
||||
|
||||
#define MSPREDICT_API __attribute__((visibility("default")))
|
||||
|
||||
namespace mindspore {
|
||||
namespace predict {
|
||||
///\brief Allocator definition of MindSpore predict.
|
||||
class Allocator;
|
||||
|
||||
///\brief Tensor definition of MindSpore predict.
|
||||
class MSPREDICT_API Tensor {
|
||||
public:
|
||||
///\brief Constructor of MindSpore predict tensor.
|
||||
///
|
||||
///\param[in] tensor Define the parameters of the tensor.
|
||||
///\param[in] copyData Malloc data for the tensor, and copy origin data from
|
||||
/// input tensor.
|
||||
///
|
||||
///\return Instance of MindSpore predict tensor.
|
||||
Tensor(const Tensor &tensor, bool copyData = false);
|
||||
|
||||
///\brief Constructor of MindSpore predict tensor.
|
||||
///
|
||||
///\param[in] dt Data Type of the tensor, see introduction to 'enum DataType'
|
||||
/// for supported type.
|
||||
///\param[in] dims Dimension Values such as height and width, which defined
|
||||
/// the shape of the tensor.
|
||||
///\param[in] format Tensor format, see introduction to 'enum Format' for
|
||||
/// supported format.
|
||||
///\param[in] data Data of the tensor.
|
||||
///
|
||||
///\return Instance of MindSpore predict tensor.
|
||||
///
|
||||
///\note
|
||||
/// Length of data should align with dt, format and dims, otherwise the
|
||||
/// application might run into unexpected error,
|
||||
/// such as segment fault.
|
||||
/// For example, dt is DT_FLOAT, format is FORMAT_NCHW, dims is [1,3,300,300],
|
||||
/// then minimum length of data should
|
||||
/// be 1 * 3 * 300 * 300 * sizeof(float).
|
||||
Tensor(DataType dt, const std::vector<int64_t> &dims, Format format, void *data);
|
||||
|
||||
///\brief Destructor of MindSpore predict tensor.
|
||||
~Tensor();
|
||||
|
||||
///\brief Get MindSpore predict tensor.
|
||||
///
|
||||
///\param[in] Definition of the tensor.
|
||||
///
|
||||
///\return Address of MindSpore predict tensor.
|
||||
static Tensor *CopyFromTensorDef(const TensorDef &tensordef);
|
||||
|
||||
///\brief Get dtype of MindSpore predict tensor.
|
||||
///
|
||||
///\return Dtype of MindSpore predict tensor.
|
||||
DLDataType GetTensorDtype() const;
|
||||
|
||||
///\brief Get data of MindSpore predict tensor.
|
||||
///
|
||||
///\return Address of MindSpore predict tensor data.
|
||||
void *GetData() const;
|
||||
|
||||
///\brief Set data of MindSpore predict tensor.
|
||||
///
|
||||
///\param[in] data Address for data of the MindSpore predict tensor instance.
|
||||
///
|
||||
///\note
|
||||
/// Length of data should align with dt, format and dims, otherwise the
|
||||
/// application might run into unexpected error,
|
||||
/// such as segment fault.
|
||||
/// For example, dt is DT_FLOAT, format is FORMAT_NCHW, dims is [1,3,300,300],
|
||||
/// then minimum length of data should
|
||||
/// be 1 * 3 * 300 * 300 * sizeof(float).
|
||||
void SetData(void *data);
|
||||
|
||||
///\brief Get data type of MindSpore predict tensor.
|
||||
///
|
||||
///\return Data Type of the tensor.
|
||||
DataType GetDataType() const;
|
||||
|
||||
///\brief Set data type of MindSpore predict tensor.
|
||||
///
|
||||
///\param[in] dt Data Type of the tensor, see introduction to 'enum DataType'
|
||||
/// for supported type.
|
||||
void SetDataType(DataType dt);
|
||||
|
||||
///\brief Get number of dimension of MindSpore predict tensor.
|
||||
///
|
||||
///\return Number of dimension of the MindSpore predict tensor.
|
||||
int GetNDim() const;
|
||||
|
||||
///\brief Get dimension of MindSpore predict tensor.
|
||||
///
|
||||
///\return Dimension of the MindSpore predict tensor.
|
||||
std::vector<int64_t> GetDims() const;
|
||||
|
||||
///\brief Set dimension of MindSpore predict tensor.
|
||||
///
|
||||
///\param[in] dims Vector that has values of dimension.
|
||||
void SetDims(const std::vector<int64_t> &dims);
|
||||
|
||||
///\brief Get format of MindSpore predict tensor.
|
||||
///
|
||||
///\return Format of the MindSpore predict tensor.
|
||||
Format GetFormat() const { return format; }
|
||||
|
||||
///\brief Set format of MindSpore predict tensor.
|
||||
///
|
||||
///\param[in] format Format of the tensor.
|
||||
void SetFormat(Format format) { this->format = format; }
|
||||
|
||||
///\brief Get reference count of MindSpore predict tensor.
|
||||
///
|
||||
///\return Reference count of the MindSpore predict tensor.
|
||||
int RefCount() { return refCount; }
|
||||
|
||||
///\brief Increase reference count of MindSpore predict tensor.
|
||||
///
|
||||
///\param[in] ref The increase of the reference count.
|
||||
void AddRef(int ref) { refCount += ref; }
|
||||
|
||||
///\brief Decrease reference count of MindSpore predict tensor.
|
||||
///
|
||||
///\param[in] ref The decrease of the reference count.
|
||||
void DefRef(int ref) { refCount -= ref; }
|
||||
|
||||
///\brief Get element size of MindSpore predict tensor.
|
||||
///
|
||||
///\return Element size of MindSpore predict tensor.
|
||||
size_t GetElementSize() const;
|
||||
|
||||
///\brief Get data size of MindSpore predict tensor.
|
||||
///
|
||||
///\return Data size of MindSpore predict tensor.
|
||||
size_t GetDataSize() const;
|
||||
|
||||
///\brief Get element size of MindSpore predict tensor in NC4HW4 format.
|
||||
///
|
||||
///\param[in] isNhwc Whether the current format is NHWC.
|
||||
///
|
||||
///\return Element size of MindSpore predict tensor in NC4HW4 format.
|
||||
size_t GetNC4HW4ElementSize(bool isNhwc);
|
||||
|
||||
///\brief Get data size of MindSpore predict tensor in NC4HW4 format.
|
||||
///
|
||||
///\param[in] isNhwc Whether the current format is NHWC.
|
||||
///
|
||||
///\return Data size of MindSpore predict tensor in NC4HW4 format.
|
||||
size_t GetNC4HW4DataSize(bool isNhwc);
|
||||
|
||||
///\brief Malloc data for the MindSpore predict tensor.
|
||||
///
|
||||
///\param[in] allocator The malloc source for data.
|
||||
///\param[in] refCount The reference count of the data.
|
||||
///
|
||||
///\return Return RET_OK if the data is successfully allocated, otherwhise return RET_ERROR.
|
||||
int MallocData(std::shared_ptr<Allocator> allocator = nullptr, int refCount = 0);
|
||||
|
||||
///\brief Free the MindSpore predict tensor.
|
||||
void FreeTensor();
|
||||
|
||||
///\brief Free the data of MindSpore predict tensor.
|
||||
void ForceFreeData();
|
||||
|
||||
///\brief Free the data of MindSpore predict tensor.
|
||||
void FreeData();
|
||||
|
||||
///\brief Compare data size of MindSpore predict tensor in NC4HW4 format.
|
||||
///
|
||||
///\param[in] dst The compare tensor.
|
||||
///
|
||||
///\return The result of fuction.
|
||||
bool CompareShape(const Tensor &dst);
|
||||
|
||||
///\brief Compare shape of MindSpore predict tensor with another shape.
|
||||
///
|
||||
///\param[in] other The compare shape information.
|
||||
///
|
||||
///\return The result of function.
|
||||
bool CompareShape(const std::vector<int64_t> &other);
|
||||
|
||||
///\brief Get instance of MindSpore predict tensor.
|
||||
///
|
||||
///\return Instance of MindSpore predict dlTensor.
|
||||
DLTensor *GetDLTensor() { return &dlTensor; }
|
||||
|
||||
///\brief Get height of MindSpore predict tensor.
|
||||
///
|
||||
///\return Height of MindSpore predict tensor.
|
||||
int64_t Height() const;
|
||||
|
||||
///\brief Get width of MindSpore predict tensor.
|
||||
///
|
||||
///\return Width of MindSpore predict tensor.
|
||||
int64_t Width() const;
|
||||
|
||||
///\brief Get channel of MindSpore predict tensor.
|
||||
///
|
||||
///\return Channel of MindSpore predict tensor.
|
||||
int64_t Channel() const;
|
||||
|
||||
///\brief Get batch of MindSpore predict tensor.
|
||||
///
|
||||
///\return Batch of MindSpore predict tensor.
|
||||
int64_t Batch() const;
|
||||
|
||||
///\brief Get stride of MindSpore predict tensor.
|
||||
///
|
||||
///\param[in] index the index of stride.
|
||||
///
|
||||
///\return Stride of MindSpore predict tensor.
|
||||
int64_t Stride(int index) const;
|
||||
|
||||
///\brief Set stride of MindSpore predict tensor by input.
|
||||
///
|
||||
///\param[in] index Index of stride
|
||||
///\param[in] stride The stride to set
|
||||
void SetStride(int index, int64_t stride);
|
||||
|
||||
///\brief Set stride of MindSpore predict tensor by dims.
|
||||
void SetStride();
|
||||
void SetScale(bool isScale = true);
|
||||
|
||||
private:
|
||||
bool isScale = false;
|
||||
int refCount = 0;
|
||||
int isConst;
|
||||
Format format;
|
||||
DLTensor dlTensor;
|
||||
std::shared_ptr<Allocator> allocator = nullptr;
|
||||
std::vector<float> scale;
|
||||
std::vector<int> zeroPoint;
|
||||
};
|
||||
} // namespace predict
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // PREDICT_INCLUDE_TENSOR_H_
|
|
@ -1 +0,0 @@
|
|||
add_subdirectory(tvm_kernel)
|
|
@ -1,27 +0,0 @@
|
|||
# Created by .ignore support plugin
|
||||
#
|
||||
|
||||
# filter python
|
||||
*.pyc
|
||||
|
||||
# filter build
|
||||
*.so
|
||||
*.o
|
||||
|
||||
# filter coverage
|
||||
coverage/
|
||||
|
||||
# filter report
|
||||
*.xml
|
||||
|
||||
# filter tvm
|
||||
3rdparty/
|
||||
|
||||
# filter build
|
||||
build/
|
||||
cmake-build-debug/
|
||||
.idea/
|
||||
TFLite_Detection_PostProcess_CI
|
||||
app_run
|
||||
output
|
||||
tvm
|
|
@ -1,4 +0,0 @@
|
|||
[submodule "3rdparty/incubator-tvm"]
|
||||
path = 3rdparty/incubator-tvm
|
||||
url = https://github.com/dmlc/tvm.git
|
||||
branch = v0.5
|
|
@ -1,25 +0,0 @@
|
|||
cmake_minimum_required(VERSION 3.12.1)
|
||||
project(autotensor LANGUAGES CXX)
|
||||
set (MINDSPORE "${PROJECT_SOURCE_DIR}/../../..")
|
||||
set (TVM_KERNEL_LITE "${PROJECT_SOURCE_DIR}/lite")
|
||||
set (THIRDPARTY "${MINDSPORE}/third_party")
|
||||
set (TVM_CLEAN_SOURCE "${THIRDPARTY}/incubator-tvm")
|
||||
set (TVM_BUILD_SOURCE "${PROJECT_SOURCE_DIR}/incubator-tvm")
|
||||
set (BUILD_DIR "${PROJECT_SOURCE_DIR}")
|
||||
set (TVM_KERNEL_OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR})
|
||||
set (TVM_OUTPUT_DIR ${TVM_KERNEL_OUTPUT_DIR}/incubator-tvm)
|
||||
|
||||
set (LLVM_CONFIG $ENV{LLVM_PATH})
|
||||
if (NOT LLVM_CONFIG)
|
||||
message(FATAL_ERROR "please set LLVM_PATH in env")
|
||||
endif()
|
||||
set (CMAKE_BUILD_TYPE "Release")
|
||||
|
||||
include(${TVM_BUILD_SOURCE}/cmake/util/Util.cmake)
|
||||
include(${TVM_BUILD_SOURCE}/cmake/util/FindLLVM.cmake)
|
||||
if(EXISTS ${TVM_BUILD_SOURCE}/cmake/config.cmake)
|
||||
include(${TVM_BUILD_SOURCE}/cmake/config.cmake)
|
||||
endif()
|
||||
add_subdirectory(${TVM_KERNEL_LITE})
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
|
|
@ -1,140 +0,0 @@
|
|||
cmake_minimum_required(VERSION 3.12)
|
||||
set(CMAKE_CXX_STANDARD 14)
|
||||
|
||||
if(ENABLE_PREDICT_ARM64)
|
||||
set(TARGS "arm64")
|
||||
elseif(ENABLE_PREDICT_ARM32)
|
||||
set(TARGS "arm32")
|
||||
else()
|
||||
set(TARGS "x86")
|
||||
endif()
|
||||
message("TARGET is set to ${TARGS}")
|
||||
|
||||
set(CMAKE_VERBOSE_MAKEFILE ON)
|
||||
set(CMAKE_SKIP_RPATH TRUE)
|
||||
|
||||
if(MSVC)
|
||||
message("not support MSVC")
|
||||
else(MSVC)
|
||||
include(CheckCXXCompilerFlag)
|
||||
check_cxx_compiler_flag("-std=c++11" SUPPORT_CXX11)
|
||||
if ("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
|
||||
message("Build in Debug mode")
|
||||
set(CMAKE_C_FLAGS "-O0 -g -Wall -Werror -fPIC [${CMAKE_C_FLAGS} -rdynamic")
|
||||
set(CMAKE_CXX_FLAGS "-O0 -g -Wall -Werror -fPIC -std=c++11 ${CMAKE_CXX_FLAGS} -rdynamic")
|
||||
else()
|
||||
set(CMAKE_C_FLAGS "-D_FORTIFY_SOURCE=2 -O2 -fno-rtti -fvisibility=hidden -Wall -Werror -fPIC -fstack-protector-strong ${CMAKE_C_FLAGS}")
|
||||
set(CMAKE_CXX_FLAGS "-D_FORTIFY_SOURCE=2 -O2 -fno-rtti -fvisibility=hidden -Wall -Werror -fPIC -fstack-protector-strong -std=c++11 ${CMAKE_CXX_FLAGS}")
|
||||
set(CMAKE_EXE_LINKER_FLAGS "-Wl,-z,relro,-z,now -Wl,-z,noexecstack")
|
||||
endif ()
|
||||
if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" AND
|
||||
CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0)
|
||||
set(CMAKE_CXX_FLAGS "-Wall -Werror -faligned-new ${CMAKE_CXX_FLAGS}")
|
||||
endif()
|
||||
if (CODE_COVERAGE)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Werror -fprofile-arcs -ftest-coverage -O0")
|
||||
endif()
|
||||
endif(MSVC)
|
||||
|
||||
|
||||
if("${TARGS}" STREQUAL "x86")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D__x86_64__ -fno-strict-aliasing")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__x86_64__ -fno-strict-aliasing")
|
||||
endif()
|
||||
|
||||
|
||||
set(PRJ_SRC_DIR "${PROJECT_SOURCE_DIR}")
|
||||
set(PRJ_KLIB_DIR "${PROJECT_SOURCE_DIR}")
|
||||
set(PRJ_LITE_DIR "${PROJECT_SOURCE_DIR}/lite")
|
||||
|
||||
# include directories
|
||||
message("current PRJ DIR: ${PROJECT_SOURCE_DIR}")
|
||||
message("current SUB_PRJ DIR: ${PRJ_SRC_DIR}")
|
||||
message("current KLIB DIR: ${PRJ_KLIB_DIR}")
|
||||
message("current PRJ_LITE_DIR: ${PRJ_LITE_DIR}")
|
||||
message("CMAKE_CURRENT_BINARY_DIR: ${CMAKE_CURRENT_BINARY_DIR}")
|
||||
set(DMLC_CORE "${TVM_BUILD_SOURCE}/3rdparty/dmlc-core")
|
||||
set(DLPACK "${TVM_BUILD_SOURCE}/3rdparty/dlpack")
|
||||
set(PREDICT "${PRJ_SRC_DIR}/../../")
|
||||
set(SECUREC "${PRJ_SRC_DIR}/../../../third_party/securec")
|
||||
message("include dir: ${DLPACK}/include")
|
||||
include_directories(${DLPACK}/include)
|
||||
include_directories(${DMLC_CORE}/include)
|
||||
include_directories(${TVM_BUILD_SOURCE}/include)
|
||||
include_directories(${TVM_BUILD_SOURCE}/src/pass)
|
||||
include_directories(${PRJ_LITE_DIR})
|
||||
include_directories(${PRJ_LITE_DIR}/include)
|
||||
include_directories(${PRJ_LITE_DIR}/../../..)
|
||||
include_directories(${PRJ_LITE_DIR}/../../../include)
|
||||
include_directories(${PRJ_LITE_DIR}/../../../src/runtime)
|
||||
include_directories(${PRJ_LITE_DIR}/../../../common)
|
||||
include_directories(${SECUREC})
|
||||
message("SECUREC: " "${SECUREC}/build/src")
|
||||
include_directories(${PREDICT})
|
||||
include_directories(${PREDICT}/src)
|
||||
include_directories(${PRJ_SRC_DIR}/../../../third_party/flatbuffers/include)
|
||||
include_directories(${PRJ_SRC_DIR}/../../../third_party)
|
||||
# Source file lists
|
||||
file(GLOB_RECURSE TVM_KERNEL_SRC
|
||||
src/api/*.cc
|
||||
src/tflite/TFLite_Detection_PostProcess.cc)
|
||||
|
||||
set (TVM_RUNTIME_FLG $ENV{TVM_RUNTIME_ON})
|
||||
if ("${TVM_RUNTIME_FLG}" STREQUAL "true")
|
||||
message("Using TVM runtime function")
|
||||
file(GLOB TVM_RUNTIME_SRCS
|
||||
${TVM_ROOT}/apps/howto_deploy/tvm_runtime_pack.cc)
|
||||
else()
|
||||
message("Using LITE runtime function")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DLITE_RUNTIME_ON -DTVM_RUNTIME_HEADER_ONLY -DLITE_THREAD_POOL_SHARED")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DLITE_RUNTIME_ON -DTVM_RUNTIME_HEADER_ONLY -DLITE_THREAD_POOL_SHARED")
|
||||
file(GLOB_RECURSE TVM_RUNTIME_SRCS
|
||||
${PREDICT}/src/runtime/*.cc)
|
||||
endif()
|
||||
|
||||
if("${TARGS}" STREQUAL "arm32" OR "${TARGS}" STREQUAL "arm64")
|
||||
set(CMAKE_SKIP_BUILD_RPATH TRUE)
|
||||
set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
|
||||
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
|
||||
endif()
|
||||
|
||||
set(LIB_X86_PATH "${PRJ_KLIB_DIR}/build/lib_x86")
|
||||
set(LIB_ARM64_PATH "${PRJ_KLIB_DIR}/build/lib_arm64")
|
||||
set(LIB_ARM32_PATH "${PRJ_KLIB_DIR}/build/lib_arm32")
|
||||
if("${TARGS}" STREQUAL "x86")
|
||||
set(KLIBS_PATH "${LIB_X86_PATH}")
|
||||
elseif("${TARGS}" STREQUAL "arm64")
|
||||
set(KLIBS_PATH "${LIB_ARM64_PATH}")
|
||||
elseif("${TARGS}" STREQUAL "arm32")
|
||||
set(KLIBS_PATH "${LIB_ARM32_PATH}")
|
||||
else()
|
||||
message(ERROR " not suport ${TARGS}")
|
||||
endif()
|
||||
|
||||
file(GLOB_RECURSE KERNEL_LIBS "${KLIBS_PATH}/*.o")
|
||||
message("KERNEL_PATH= ${KLIBS_PATH}")
|
||||
|
||||
add_compile_options(-DTVM_CUDA_RUNTIM=0)
|
||||
add_compile_options(-DTVM_METAL_RUNTIM=0)
|
||||
add_compile_options(-DTVM_OPENCL_RUNTIM=0)
|
||||
|
||||
link_directories(${KLIBS_PATH})
|
||||
|
||||
add_library(tvm_runtime_pack STATIC ${TVM_RUNTIME_SRCS})
|
||||
add_library(kernel_manager STATIC ${TVM_KERNEL_SRC})
|
||||
add_library(tvm_kernel_static STATIC ${TVM_KERNEL_SRC} ${KERNEL_LIBS})
|
||||
add_library(tvm_kernel SHARED ${TVM_KERNEL_SRC} ${KERNEL_LIBS})
|
||||
set_target_properties(tvm_kernel PROPERTIES LINK_FLAGS "-Wl,-z,relro,-z,now -Wl,-z,noexecstack")
|
||||
|
||||
set(KERNEL_LD_LIB tvm_runtime_pack dl)
|
||||
|
||||
if("${TARGS}" STREQUAL "x86")
|
||||
set(KERNEL_LD_LIB ${KERNEL_LD_LIB} pthread)
|
||||
else()
|
||||
set(ANDROID_ALLOW_UNDEFINED_SYMBOLS TRUE)
|
||||
endif()
|
||||
|
||||
target_link_libraries(tvm_kernel ${KERNEL_LD_LIB} libsecurec.a)
|
||||
target_link_libraries(tvm_kernel_static OBJECT tvm_runtime_pack libsecurec.a)
|
||||
|
||||
add_dependencies(tvm_kernel securec)
|
|
@ -1,94 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this ${file} except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef PREDICT_MODULE_TVM_KERNEL_LITE_INCLUDE_LITE_API_KM_API_H_
|
||||
#define PREDICT_MODULE_TVM_KERNEL_LITE_INCLUDE_LITE_API_KM_API_H_
|
||||
|
||||
#include <dlpack/dlpack.h>
|
||||
#include <functional>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "schema/inner/ms_generated.h"
|
||||
#include "schema/inner/op_generated.h"
|
||||
|
||||
#define PUBLIC __attribute__((visibility("default")))
|
||||
|
||||
/*!
|
||||
* \brief Call tvm kernel.
|
||||
* \param fid tvm kernel id.
|
||||
* \param tensors tvm kernel arguments.
|
||||
* \return 0 if SUCCESS.
|
||||
*/
|
||||
PUBLIC int CallKernel(const std::string &fid, const std::vector<DLTensor *> &tensors);
|
||||
|
||||
/*!
|
||||
* \brief Get tvm kernel by id.
|
||||
* \param fid tvm kernel id.
|
||||
* \return std::function if SUCCESS else nullptr.
|
||||
*/
|
||||
PUBLIC std::function<int(const std::vector<DLTensor *> &)> GetKernel(const std::string &fid);
|
||||
|
||||
/*!
|
||||
* \brief Get tvm kernel by OpDef.
|
||||
* \param opdef defined by predict schema.
|
||||
* \param tensors.
|
||||
* \param option.
|
||||
* \return std::function if SUCCESS else nullptr.
|
||||
*/
|
||||
struct PUBLIC KernelOption {
|
||||
int numThreads = 0;
|
||||
std::string device;
|
||||
};
|
||||
|
||||
PUBLIC std::function<int(const std::vector<DLTensor *> &)> GetKernel(const mindspore::predict::OpDef &opdef,
|
||||
const std::vector<DLTensor *> &tensors,
|
||||
const KernelOption &option);
|
||||
|
||||
/*!
|
||||
* \brief load TVM Kernel lib
|
||||
* \param mode 0 indicate shared lib
|
||||
* \param fname shared lib path when mode equals 0
|
||||
* \return 0 if SUCCESS
|
||||
*/
|
||||
PUBLIC void InitKernelManager(int mode, const std::string &fname);
|
||||
|
||||
/*
|
||||
* \brief config ThreadPool using mode
|
||||
* \param mode: -1 using mid speed cpu first, 1 using higher speed cpu first
|
||||
* \param nthreads: threads num to be used, can't exceed cpu num
|
||||
* if mode==-1 bind mid cpu first
|
||||
* if mode==1 bind higher cpu first
|
||||
* if mode==0 no bind
|
||||
* \param execute_self: cur thread do arithmetic or not
|
||||
* execute_self: true cur thread do arithmetic work
|
||||
* execute_self: false cur thread not do arithmetic work
|
||||
*/
|
||||
PUBLIC void ConfigThreadPool(int mode = -1, int nthreads = 2, bool execute_self = true);
|
||||
|
||||
/*
|
||||
* \brief provid simple api for mslite, mslite not care mode
|
||||
*/
|
||||
inline void CfgThreadPool(int nthread) { ConfigThreadPool(-1, nthread, true); }
|
||||
|
||||
/*
|
||||
* the Callback function to do cpu bind for master thread.
|
||||
*/
|
||||
PUBLIC void DoMasterThreadBind(bool bindflg);
|
||||
|
||||
PUBLIC void DoAllThreadBind(bool ifBind);
|
||||
|
||||
#undef PUBLIC
|
||||
|
||||
#endif // PREDICT_MODULE_TVM_KERNEL_LITE_INCLUDE_LITE_API_KM_API_H_
|
|
@ -1,17 +0,0 @@
|
|||
# Copyright 2019 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""Neural network operators"""
|
||||
# from . import arm_cpu
|
||||
# from . import at_ops
|
|
@ -1,17 +0,0 @@
|
|||
# Copyright 2019 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""Schedule for ARM CPU"""
|
||||
|
||||
from . import conv2d
|
|
@ -1,470 +0,0 @@
|
|||
# Copyright 2019 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""Conv2D schedule for ARM CPU"""
|
||||
from __future__ import absolute_import as _abs
|
||||
|
||||
import functools
|
||||
|
||||
import tvm
|
||||
from tvm import autotvm
|
||||
import tvm.contrib.nnpack
|
||||
|
||||
from topi.generic import schedule_conv2d_nchw
|
||||
from topi.util import traverse_inline, get_const_tuple
|
||||
from topi.nn import pad, conv2d
|
||||
from topi.nn.util import get_const_int, get_pad_tuple
|
||||
|
||||
|
||||
@autotvm.register_topi_compute(conv2d, "arm_cpu", ["asm"])
|
||||
def conv2d_arm_cpu(cfg, data, kernel, strides, padding, dilation, out_dtype):
|
||||
"""TOPI compute callback for conv2d
|
||||
|
||||
Parameters
|
||||
----------
|
||||
cfg: ConfigEntity
|
||||
The config for this template
|
||||
|
||||
data : tvm.Tensor
|
||||
4-D with shape [batch, in_channel, in_height, in_width]
|
||||
|
||||
kernel : tvm.Tensor
|
||||
4-D with shape [num_filter, in_channel, filter_height, filter_width] or
|
||||
pre-packed 5-D with shape [num_filter_chunk, in_channel, filter_height,
|
||||
filter_width, num_filter_block]
|
||||
|
||||
strides : list of two ints
|
||||
[stride_height, stride_width]
|
||||
|
||||
padding : list of two ints
|
||||
[pad_height, pad_width]
|
||||
|
||||
dilation : list of two ints
|
||||
[dilation_height, dilation_width]
|
||||
|
||||
out_dtype: str
|
||||
The output type. This is used for mixed precision.
|
||||
|
||||
Returns
|
||||
-------
|
||||
output : tvm.Tensor
|
||||
4-D with shape [batch, out_channel, out_height, out_width]
|
||||
"""
|
||||
args = _gen_cfg(cfg, data, kernel, strides, padding, dilation, num_tile=2)
|
||||
return _conv_spatial_pack_asm(
|
||||
args, data, kernel, strides, padding, dilation, out_dtype
|
||||
)
|
||||
|
||||
|
||||
@autotvm.register_topi_schedule(schedule_conv2d_nchw, "arm_cpu", ["asm"])
|
||||
def schedule_conv2d_nchw_arm_cpu(outs):
|
||||
"""TOPI schedule callback for conv2d
|
||||
|
||||
Parameters
|
||||
----------
|
||||
outs: Array of Tensor
|
||||
The computation graph description of conv2d
|
||||
in the format of an array of tensors.
|
||||
|
||||
Returns
|
||||
-------
|
||||
s: Schedule
|
||||
The computation schedule for conv2d.
|
||||
"""
|
||||
s = _conv_schedule_asm(outs)
|
||||
return s
|
||||
|
||||
|
||||
def _gen_cfg(cfg, data, kernel, strides, padding, dilation, num_tile):
|
||||
"""_gen_cfg"""
|
||||
if len(kernel.shape) == 4:
|
||||
co_, _, kh_, kw_ = get_const_tuple(kernel.shape)
|
||||
else: # kernel tensor is pre packed
|
||||
co_, _, kh_, kw_, vc_ = get_const_tuple(kernel.shape)
|
||||
co_ = co_ * vc_
|
||||
|
||||
if isinstance(dilation, int):
|
||||
dilation_h = dilation_w = dilation
|
||||
else:
|
||||
dilation_h, dilation_w = dilation
|
||||
|
||||
n_, ci_, ih_, iw_ = get_const_tuple(data.shape)
|
||||
|
||||
dilated_kernel_h = (kh_ - 1) * dilation_h + 1
|
||||
dilated_kernel_w = (kw_ - 1) * dilation_w + 1
|
||||
pad_top, pad_left, pad_bottom, pad_right = get_pad_tuple(
|
||||
padding, (dilated_kernel_h, dilated_kernel_w)
|
||||
)
|
||||
hstr, wstr = strides if isinstance(strides, (tuple, list)) else (strides, strides)
|
||||
oh_ = (ih_ + pad_top + pad_bottom - dilated_kernel_h) // hstr + 1
|
||||
ow_ = (iw_ + pad_left + pad_right - dilated_kernel_w) // wstr + 1
|
||||
|
||||
n, co, oh, ow = cfg.axis(n_), cfg.axis(co_), cfg.axis(oh_), cfg.axis(ow_)
|
||||
ci, kh, kw = cfg.reduce_axis(ci_), cfg.reduce_axis(kh_), cfg.reduce_axis(kw_)
|
||||
|
||||
if num_tile == 2: # for arm cpu
|
||||
candidate_vc = []
|
||||
for iv in range(3, co_):
|
||||
if co_ % iv == 0:
|
||||
candidate_vc.append([co_ // iv, iv])
|
||||
candidate_vc.append([1, co_])
|
||||
co, vc = cfg.define_split(
|
||||
"tile_co", co, num_outputs=2, policy="candidate", candidate=candidate_vc
|
||||
)
|
||||
oh, vh = cfg.define_split("tile_oh", oh, num_outputs=2)
|
||||
ow, vw = cfg.define_split("tile_ow", ow, num_outputs=2)
|
||||
elif num_tile == 3: # for mali gpu
|
||||
co, _, vc = cfg.define_split("tile_co", co, num_outputs=3)
|
||||
oh, _, vh = cfg.define_split("tile_oh", oh, num_outputs=3)
|
||||
ow, _, vw = cfg.define_split("tile_ow", ow, num_outputs=3)
|
||||
else:
|
||||
raise RuntimeError("Invalid num_tile")
|
||||
|
||||
cfg.define_reorder(
|
||||
"reorder_0",
|
||||
[n, co, oh, ow, ci, kh, kw, vh, vw, vc],
|
||||
policy="candidate",
|
||||
candidate=[[n, co, oh, ow, ci, kh, kw, vh, vw, vc],],
|
||||
)
|
||||
|
||||
vc_ = cfg["tile_co"].size[-1]
|
||||
vh_ = cfg["tile_oh"].size[-1]
|
||||
vw_ = cfg["tile_ow"].size[-1]
|
||||
is_var = False
|
||||
return (is_var, vh_, vw_, vc_)
|
||||
|
||||
def _conv_spatial_pack_asm(args, data, kernel, strides, padding,
|
||||
dilation, out_dtype):
|
||||
"""_conv_spatial_pack_asm"""
|
||||
is_var, vh_, vw_, vc_ = args
|
||||
|
||||
# create workload according to raw arguments
|
||||
out_dtype = out_dtype or data.dtype
|
||||
n_, ci_, ih_, iw_ = data.shape if is_var else get_const_tuple(data.shape)
|
||||
|
||||
if isinstance(dilation, int):
|
||||
dilation_h = dilation_w = dilation
|
||||
else:
|
||||
dilation_h, dilation_w = dilation
|
||||
|
||||
if len(kernel.shape) == 4:
|
||||
pre_packed = False
|
||||
co_, _, kh_, kw_ = kernel.shape if is_var else get_const_tuple(kernel.shape)
|
||||
else: # kernel tensor is pre packed
|
||||
pre_packed = True
|
||||
co_, _, kh_, kw_, vc_ = kernel.shape if is_var else get_const_tuple(kernel.shape)
|
||||
co_ = co_ * vc_
|
||||
|
||||
dilated_kernel_h = (kh_ - 1) * dilation_h + 1
|
||||
dilated_kernel_w = (kw_ - 1) * dilation_w + 1
|
||||
pad_top, pad_left, pad_bottom, pad_right = get_pad_tuple(
|
||||
padding, (dilated_kernel_h, dilated_kernel_w)
|
||||
)
|
||||
hstr, wstr = strides if isinstance(strides, (tuple, list)) else (strides, strides)
|
||||
oh_ = (ih_ + pad_top + pad_bottom - dilated_kernel_h) // hstr + 1
|
||||
ow_ = (iw_ + pad_left + pad_right - dilated_kernel_w) // wstr + 1
|
||||
data_pad = pad(data, [0, 0, pad_top, pad_left], [0, 0, pad_bottom, pad_right])
|
||||
|
||||
oh_div = oh_ // vh_
|
||||
ow_div = ow_ // vw_
|
||||
kvshape = (co_ // vc_, ci_, kh_, kw_, vc_)
|
||||
ovshape = (n_, co_ // vc_, oh_div, ow_div, vh_, vw_, vc_)
|
||||
oshape = (n_, co_, oh_div * vh_, ow_div * vw_)
|
||||
|
||||
if dilation_h != 1 or dilation_w != 1:
|
||||
# undilate input data
|
||||
dvshape = (n_, oh_ // vh_, ow_ // vw_, kh_, kw_, vh_, vw_, ci_)
|
||||
data_vec = tvm.compute(
|
||||
dvshape,
|
||||
lambda n, h, w, kh, kw, vh, vw, ci: data_pad[n][ci][
|
||||
(h * vh_ + vh) * hstr + kh * dilation_h
|
||||
][(w * vw_ + vw) * wstr + kw * dilation_w],
|
||||
name="data_vec_undilated",
|
||||
)
|
||||
else:
|
||||
dvshape = (
|
||||
n_,
|
||||
oh_ // vh_,
|
||||
ow_ // vw_,
|
||||
(vh_ - 1) * hstr + kh_,
|
||||
(vw_ - 1) * wstr + kw_,
|
||||
ci_,
|
||||
)
|
||||
data_vec = tvm.compute(
|
||||
dvshape,
|
||||
lambda n, h, w, vh, vw, ci: data_pad[n][ci][h * vh_ * hstr + vh][
|
||||
w * vw_ * wstr + vw
|
||||
],
|
||||
name="data_vec",
|
||||
)
|
||||
|
||||
if pre_packed:
|
||||
kernel_vec = kernel
|
||||
else:
|
||||
kernel_vec = tvm.compute(
|
||||
kvshape,
|
||||
lambda co, ci, kh, kw, vc: kernel[co * vc_ + vc][ci][kh][kw],
|
||||
name="kernel_vec",
|
||||
)
|
||||
|
||||
ci = tvm.reduce_axis((0, ci_), name="ci")
|
||||
kh = tvm.reduce_axis((0, kh_), name="kh")
|
||||
kw = tvm.reduce_axis((0, kw_), name="kw")
|
||||
|
||||
# asm begin----
|
||||
type_map = {
|
||||
"int8": "int32",
|
||||
"uint8": "uint32",
|
||||
"float32": "float32",
|
||||
"float16": "float16",
|
||||
}
|
||||
acum_dtype = type_map[data.dtype]
|
||||
attrs = {
|
||||
"SH": hstr,
|
||||
"SW": wstr,
|
||||
"PH": pad_top,
|
||||
"PW": pad_left,
|
||||
"DILA_H": dilation_h,
|
||||
"DILA_W": dilation_w,
|
||||
"VH": vh_,
|
||||
"VW": vw_,
|
||||
"VC": vc_,
|
||||
"ACUM_DTYPE": acum_dtype,
|
||||
}
|
||||
# asm end----
|
||||
|
||||
if dilation_h != 1 or dilation_w != 1:
|
||||
conv = tvm.compute(
|
||||
ovshape,
|
||||
lambda n, co, h, w, vh, vw, vc: tvm.sum(
|
||||
data_vec[n, h, w, kh, kw, vh, vw, ci].astype(out_dtype)
|
||||
* kernel_vec[co, ci, kh, kw, vc].astype(out_dtype),
|
||||
axis=[ci, kh, kw],
|
||||
),
|
||||
name="conv",
|
||||
attrs=attrs,
|
||||
)
|
||||
else:
|
||||
conv = tvm.compute(
|
||||
ovshape,
|
||||
lambda n, co, h, w, vh, vw, vc: tvm.sum(
|
||||
data_vec[n, h, w, vh * hstr + kh, vw * wstr + kw, ci].astype(out_dtype)
|
||||
* kernel_vec[co, ci, kh, kw, vc].astype(out_dtype),
|
||||
axis=[ci, kh, kw],
|
||||
),
|
||||
name="conv",
|
||||
attrs=attrs,
|
||||
)
|
||||
|
||||
output = tvm.compute(
|
||||
oshape,
|
||||
lambda n, co, h, w: conv[n][co // vc_][h // vh_][w // vw_][h % vh_][w % vw_][
|
||||
co % vc_
|
||||
],
|
||||
name="output_unpack",
|
||||
tag="asm_conv2d_output",
|
||||
)
|
||||
|
||||
return output
|
||||
|
||||
|
||||
def intrin_conv(args):
|
||||
"""intrin_conv"""
|
||||
(
|
||||
ci_,
|
||||
vh_,
|
||||
vw_,
|
||||
vc_,
|
||||
kh_,
|
||||
kw_,
|
||||
sh_,
|
||||
sw_,
|
||||
dila_h,
|
||||
dila_w,
|
||||
dtype,
|
||||
acum_dtype,
|
||||
opname,
|
||||
core_id,
|
||||
) = args
|
||||
hstr, wstr = sh_, sw_
|
||||
ci_ = tvm.var("ci_") if ci_ is None else ci_
|
||||
kvshape = (ci_, kh_, kw_, vc_)
|
||||
ovshape = (vh_, vw_, vc_)
|
||||
if dila_h != 1 or dila_w != 1:
|
||||
dvshape = (kh_, kw_, vh_, vw_, ci_)
|
||||
else:
|
||||
dvshape = ((vh_ - 1) * hstr + kh_, (vw_ - 1) * wstr + kw_, ci_)
|
||||
|
||||
data_vec = tvm.placeholder(dvshape, name="a", dtype=dtype)
|
||||
kernel_vec = tvm.placeholder(kvshape, name="b", dtype=dtype)
|
||||
ci = tvm.reduce_axis((0, ci_), name="ci")
|
||||
kh = tvm.reduce_axis((0, kh_), name="kh")
|
||||
kw = tvm.reduce_axis((0, kw_), name="kw")
|
||||
if dila_h != 1 or dila_w != 1:
|
||||
conv = tvm.compute(
|
||||
ovshape,
|
||||
lambda vh, vw, vc: tvm.sum(
|
||||
data_vec[kh, kw, vh, vw, ci].astype(acum_dtype)
|
||||
* kernel_vec[ci, kh, kw, vc].astype(acum_dtype),
|
||||
axis=[ci, kh, kw],
|
||||
),
|
||||
name="conv",
|
||||
)
|
||||
else:
|
||||
conv = tvm.compute(
|
||||
ovshape,
|
||||
lambda vh, vw, vc: tvm.sum(
|
||||
data_vec[vh * hstr + kh, vw * wstr + kw, ci].astype(acum_dtype)
|
||||
* kernel_vec[ci, kh, kw, vc].astype(acum_dtype),
|
||||
axis=[ci, kh, kw],
|
||||
),
|
||||
name="conv",
|
||||
)
|
||||
|
||||
stride_a = [
|
||||
functools.reduce(lambda x, y: x * y, dvshape[i + 1: len(dvshape)])
|
||||
for i in range(0, len(dvshape) - 1)
|
||||
]
|
||||
stride_a.append(1)
|
||||
stride_b = [
|
||||
functools.reduce(lambda x, y: x * y, kvshape[i + 1: len(kvshape)])
|
||||
for i in range(0, len(kvshape) - 1)
|
||||
]
|
||||
stride_b.append(1)
|
||||
stride_c = [
|
||||
functools.reduce(lambda x, y: x * y, ovshape[i + 1: len(ovshape)])
|
||||
for i in range(0, len(ovshape) - 1)
|
||||
]
|
||||
stride_c.append(1)
|
||||
|
||||
a_buffer = tvm.decl_buffer(
|
||||
data_vec.shape, data_vec.dtype, name="A", offset_factor=1, strides=stride_a
|
||||
)
|
||||
b_buffer = tvm.decl_buffer(
|
||||
kernel_vec.shape, kernel_vec.dtype, name="B", offset_factor=1, strides=stride_b
|
||||
)
|
||||
c_buffer = tvm.decl_buffer(
|
||||
conv.shape, conv.dtype, name="C", offset_factor=1, strides=stride_c
|
||||
)
|
||||
|
||||
def intrin_func(ins, outs):
|
||||
aa, bb = ins
|
||||
cc = outs[0]
|
||||
|
||||
def _body():
|
||||
ib = tvm.ir_builder.create()
|
||||
ib.emit(
|
||||
tvm.call_extern(
|
||||
"int32",
|
||||
opname,
|
||||
cc.access_ptr("w"),
|
||||
aa.access_ptr("r"),
|
||||
bb.access_ptr("r"),
|
||||
ci_,
|
||||
vh_,
|
||||
vw_,
|
||||
vc_,
|
||||
kh_,
|
||||
sh_,
|
||||
core_id,
|
||||
)
|
||||
)
|
||||
return ib.get()
|
||||
|
||||
return _body()
|
||||
|
||||
return tvm.decl_tensor_intrin(
|
||||
conv.op, intrin_func, binds={data_vec: a_buffer, kernel_vec: b_buffer, conv: c_buffer}
|
||||
)
|
||||
|
||||
|
||||
def _schedule_asm(s, data_vec, kernel_vec, conv, output, last):
|
||||
"""schedule implementation"""
|
||||
n, co, oh, ow, vh, vw, vc = s[conv].op.axis
|
||||
|
||||
axis_extent = []
|
||||
for i in (vh, vw, vc):
|
||||
axis_extent.append(get_const_int(i.dom.extent))
|
||||
reduce_extent = []
|
||||
for i in s[conv].op.reduce_axis[1:]:
|
||||
reduce_extent.append(get_const_int(i.dom.extent))
|
||||
vh_, vw_, vc_ = axis_extent
|
||||
|
||||
# schedule fusion
|
||||
n, co, h, w = s[last].op.axis
|
||||
co, vc = s[last].split(co, vc_)
|
||||
oh, vh = s[last].split(h, vh_)
|
||||
ow, vw = s[last].split(w, vw_)
|
||||
s[last].reorder(n, co, oh, ow, vh, vw, vc)
|
||||
if last != output:
|
||||
s[output].compute_inline()
|
||||
|
||||
s[conv].compute_at(s[last], ow)
|
||||
|
||||
# mark parallel
|
||||
s[last].parallel(co)
|
||||
|
||||
if data_vec.op.name == "data_vec_undilated":
|
||||
_, h, _, _, _, _, _, _ = s[data_vec].op.axis
|
||||
else:
|
||||
_, h, _, _, _, _ = s[data_vec].op.axis
|
||||
s[data_vec].parallel(h)
|
||||
|
||||
if kernel_vec.op.name == "kernel_vec":
|
||||
co, _, _, _, _ = s[kernel_vec].op.axis
|
||||
if autotvm.GLOBAL_SCOPE.in_tuning:
|
||||
# kernel packing will be pre-computed during compilation, so we skip
|
||||
# this part to make tuning records correct
|
||||
s[kernel_vec].pragma(co, "debug_skip_region")
|
||||
else:
|
||||
s[kernel_vec].parallel(co)
|
||||
elif kernel_vec.op.name == "kernel_vec_conv2d_transpose": # for conv2d transpose
|
||||
co, _, _, _, _ = s[kernel_vec].op.axis
|
||||
s[kernel_vec].parallel(co)
|
||||
|
||||
return s
|
||||
|
||||
|
||||
def _conv_schedule_asm(outs):
|
||||
"""_conv_schedule_asm"""
|
||||
s = tvm.create_schedule([x.op for x in outs])
|
||||
|
||||
def _callback(op):
|
||||
if "asm_conv2d_output" in op.tag:
|
||||
# schedule conv2d
|
||||
output = op.output(0)
|
||||
conv = op.input_tensors[0]
|
||||
|
||||
sidx = 0
|
||||
if conv.op.input_tensors[0].name == "attr":
|
||||
sidx = 1
|
||||
data_vec = conv.op.input_tensors[sidx]
|
||||
data_pad = data_vec.op.input_tensors[0]
|
||||
s[data_pad].compute_inline()
|
||||
|
||||
kernel_vec = conv.op.input_tensors[sidx + 1]
|
||||
if kernel_vec.op.name == "kernel_vec":
|
||||
kernel = kernel_vec.op.input_tensors[0]
|
||||
else:
|
||||
kernel = kernel_vec
|
||||
if (isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag):
|
||||
s[kernel].compute_inline()
|
||||
|
||||
if conv.op.input_tensors[0].name == "attr":
|
||||
_schedule_asm(s, data_vec, kernel_vec, conv, output, outs[0])
|
||||
else:
|
||||
_schedule_asm(s, data_vec, kernel_vec, conv, output, outs[0])
|
||||
|
||||
traverse_inline(s, outs[0].op, _callback)
|
||||
return s
|
|
@ -1,477 +0,0 @@
|
|||
# Copyright 2019 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""Conv2D_transpose of stride=2, kernel=2*2 schedule for ARM CPU"""
|
||||
from __future__ import absolute_import as _abs
|
||||
|
||||
import functools
|
||||
|
||||
import tvm
|
||||
from tvm import autotvm
|
||||
import tvm.contrib.nnpack
|
||||
|
||||
from topi.generic import schedule_conv2d_nchw
|
||||
from topi.util import traverse_inline, get_const_tuple
|
||||
from topi.nn import conv2d
|
||||
|
||||
|
||||
@autotvm.register_topi_compute(conv2d, "arm_cpu", ["deconv"])
|
||||
def conv2d_arm_cpu_deconv(cfg, data, kernel, out_dtype):
|
||||
"""TOPI compute callback for conv2d
|
||||
|
||||
Parameters
|
||||
----------
|
||||
cfg: ConfigEntity
|
||||
The config for this template
|
||||
|
||||
data : tvm.Tensor
|
||||
4-D with shape [batch, in_channel, in_height, in_width]
|
||||
|
||||
kernel : tvm.Tensor
|
||||
4-D with shape [num_filter, in_channel, filter_height, filter_width] or
|
||||
pre-packed 5-D with shape [num_filter_chunk, in_channel, filter_height,
|
||||
filter_width, num_filter_block]
|
||||
|
||||
out_dtype: str
|
||||
The output type. This is used for mixed precision.
|
||||
|
||||
Returns
|
||||
-------
|
||||
output : tvm.Tensor
|
||||
4-D with shape [batch, out_channel, out_height, out_width]
|
||||
"""
|
||||
args = _gen_cfg_deconv(cfg, data, kernel, num_tile=2)
|
||||
return _conv_spatial_pack_deconv(
|
||||
args, data, kernel, out_dtype
|
||||
)
|
||||
|
||||
|
||||
@autotvm.register_topi_schedule(schedule_conv2d_nchw, "arm_cpu", ["deconv"])
|
||||
def schedule_conv2d_nchw_arm_cpu_deconv(cfg, outs):
|
||||
"""TOPI schedule callback for conv2d
|
||||
|
||||
Parameters
|
||||
----------
|
||||
cfg: ConfigEntity
|
||||
The config for this template
|
||||
|
||||
outs: Array of Tensor
|
||||
The computation graph description of conv2d
|
||||
in the format of an array of tensors.
|
||||
|
||||
Returns
|
||||
-------
|
||||
s: Schedule
|
||||
The computation schedule for conv2d.
|
||||
"""
|
||||
s = _conv_schedule_deconv(cfg, outs)
|
||||
return s
|
||||
|
||||
|
||||
def _gen_cfg_deconv(cfg, data, kernel, num_tile):
|
||||
"""generation config from input args"""
|
||||
if len(kernel.shape) == 4:
|
||||
co_, _, _, _ = get_const_tuple(kernel.shape)
|
||||
else: # kernel tensor is pre packed
|
||||
co_, _, _, _, vc_ = get_const_tuple(kernel.shape)
|
||||
co_ = co_ * vc_
|
||||
|
||||
if len(data.shape) == 4:
|
||||
_, ci_, ih_, iw_ = get_const_tuple(data.shape)
|
||||
c4 = 4
|
||||
ci_ = ci_ // 4
|
||||
else:
|
||||
_, ci_, ih_, iw_, c4 = get_const_tuple(data.shape)
|
||||
|
||||
oh_ = ih_ * 2
|
||||
ow_ = iw_ * 2
|
||||
|
||||
co, oh, ow = cfg.axis(co_), cfg.axis(oh_), cfg.axis(ow_)
|
||||
ci, ki = cfg.reduce_axis(ci_), cfg.reduce_axis(c4)
|
||||
|
||||
if num_tile == 2: # for arm cpu
|
||||
candidate_vc = [[co_ // c4, c4]]
|
||||
co, vc = cfg.define_split(
|
||||
"tile_co", co, num_outputs=2, policy="candidate", candidate=candidate_vc
|
||||
)
|
||||
candidate_vw = []
|
||||
for iv in range(4, ow_ + 1): # [4, 6, 8, 12, 16, 24, 32, 40]:
|
||||
if iv % 4 == 0 and (ow_ % iv == 0):
|
||||
candidate_vw.append([ow_ // iv, iv])
|
||||
ow, vw = cfg.define_split(
|
||||
"tile_ow", ow, num_outputs=2, policy="candidate", candidate=candidate_vw
|
||||
)
|
||||
candidate_vh = [[1, 2]]
|
||||
oh, vh = cfg.define_split(
|
||||
"tile_oh", oh, num_outputs=2, policy="candidate", candidate=candidate_vh
|
||||
)
|
||||
elif num_tile == 3: # for mali gpu
|
||||
co, _, vc = cfg.define_split("tile_co", co, num_outputs=3)
|
||||
oh, _, vh = cfg.define_split("tile_oh", oh, num_outputs=3)
|
||||
ow, _, vw = cfg.define_split("tile_ow", ow, num_outputs=3)
|
||||
else:
|
||||
raise RuntimeError("Invalid num_tile")
|
||||
|
||||
cfg.define_annotate("ann_reduce", [ci, ki], policy="try_unroll")
|
||||
cfg.define_annotate("ann_spatial", [vh, vw, vc], policy="try_unroll_vec")
|
||||
|
||||
vc_ = cfg["tile_co"].size[-1]
|
||||
vh_ = cfg["tile_oh"].size[-1]
|
||||
vw_ = cfg["tile_ow"].size[-1]
|
||||
is_var = False
|
||||
return (is_var, vh_, vw_, vc_)
|
||||
|
||||
|
||||
def _conv_spatial_pack_deconv(args, data, kernel, out_dtype):
|
||||
"""conv2d_arm_cpu_deconv inner implement"""
|
||||
is_var, vh_, vw_, vc_ = args
|
||||
# create workload according to raw arguments
|
||||
out_dtype = out_dtype or data.dtype
|
||||
if len(data.shape) == 4:
|
||||
n_, ci_, ih_, iw_ = data.shape if is_var else get_const_tuple(data.shape)
|
||||
c4 = 4
|
||||
ci_ = ci_ // c4
|
||||
else:
|
||||
n_, ci_, ih_, iw_, c4 = data.shape if is_var else get_const_tuple(data.shape)
|
||||
|
||||
if len(kernel.shape) == 4:
|
||||
pre_packed = False
|
||||
_, co_, kh_, kw_ = kernel.shape if is_var else get_const_tuple(kernel.shape)
|
||||
else: # kernel tensor is pre packed
|
||||
pre_packed = True
|
||||
_, co_, kh_, kw_, vc_ = kernel.shape if is_var else get_const_tuple(kernel.shape)
|
||||
co_ = co_ * c4
|
||||
|
||||
oh_ = ih_ * 2
|
||||
ow_ = iw_ * 2
|
||||
ow_div = ow_ // vw_
|
||||
oh_div = oh_ // vh_
|
||||
kvshape = (co_ // vc_, kh_, kw_, ci_, c4, c4)
|
||||
ovshape = (n_, co_ // vc_, oh_div, ow_div, vh_, vw_, c4)
|
||||
|
||||
dvshape = (n_, ih_ // (vh_ // 2), iw_ // (vw_ // 2), vh_ // 2, ci_, vw_ // 2, c4)
|
||||
if len(data.shape) == 4:
|
||||
data_vec = tvm.compute(
|
||||
dvshape,
|
||||
lambda n, h, w, vh, ci, vw, ki: data[n][ci * c4 + ki][h * vh_ // 2 + vh][
|
||||
w * vw_ // 2 + vw
|
||||
],
|
||||
name="data_vec",
|
||||
)
|
||||
else:
|
||||
data_vec = tvm.compute(
|
||||
dvshape,
|
||||
lambda n, h, w, vh, ci, vw, ki: data[n][ci][h * vh_ // 2 + vh][
|
||||
w * vw_ // 2 + vw
|
||||
][ki],
|
||||
name="data_vec",
|
||||
)
|
||||
|
||||
if pre_packed:
|
||||
kernel_vec = kernel
|
||||
else:
|
||||
kernel_vec = tvm.compute(
|
||||
kvshape,
|
||||
lambda co, kh, kw, ci, ki, vc: kernel[ci * c4 + ki][co * vc_ + vc][kh][kw],
|
||||
name="kernel_vec",
|
||||
)
|
||||
|
||||
ci = tvm.reduce_axis((0, ci_), name="ci")
|
||||
ki = tvm.reduce_axis((0, c4), name="ki")
|
||||
|
||||
type_map = {
|
||||
"int8": "int32",
|
||||
"uint8": "uint32",
|
||||
"float32": "float32",
|
||||
"float16": "float16",
|
||||
}
|
||||
acum_dtype = type_map[data.dtype]
|
||||
attrs = {
|
||||
"SH": 2,
|
||||
"SW": 2,
|
||||
"PH": 0,
|
||||
"PW": 0,
|
||||
"DILA_H": 1,
|
||||
"DILA_W": 1,
|
||||
"VH": vh_,
|
||||
"VW": vw_,
|
||||
"VC": vc_,
|
||||
"ACUM_DTYPE": acum_dtype,
|
||||
}
|
||||
|
||||
conv = tvm.compute(
|
||||
ovshape,
|
||||
lambda n, co, h, w, vh, vw, vc: tvm.sum(
|
||||
data_vec[n, h, w, vh // 2, ci, vw // 2, ki].astype(out_dtype)
|
||||
* kernel_vec[co, (h * vh_ + vh) % 2, (w * vw_ + vw) % 2, ci, ki, vc].astype(
|
||||
out_dtype
|
||||
),
|
||||
axis=[ci, ki],
|
||||
),
|
||||
name="conv",
|
||||
attrs=attrs,
|
||||
)
|
||||
if len(data.shape) == 4:
|
||||
osshape = (n_, co_, oh_, ow_div * vw_)
|
||||
output = tvm.compute(
|
||||
osshape,
|
||||
lambda n, co, h, w: conv[n][co // c4][h][w // vw_][w % vw_][co % c4],
|
||||
name="output_unpack",
|
||||
tag="deconv_conv2d_output",
|
||||
)
|
||||
else:
|
||||
osshape = (n_, co_ // c4, oh_, ow_div * vw_, c4)
|
||||
output = tvm.compute(
|
||||
osshape,
|
||||
lambda n, co, h, w, vc: conv[n][co][h // vh_][w // vw_][h % vh_][w % vw_][vc],
|
||||
name="output_unpack",
|
||||
tag="deconv_conv2d_output",
|
||||
)
|
||||
|
||||
return output
|
||||
|
||||
|
||||
def intrin_deconv(args):
|
||||
"""deconv inner implement"""
|
||||
(
|
||||
ci_,
|
||||
vh_,
|
||||
vw_,
|
||||
vc_,
|
||||
kh_,
|
||||
kw_,
|
||||
sh_,
|
||||
sw_,
|
||||
dila_h,
|
||||
dila_w,
|
||||
dtype,
|
||||
acum_dtype,
|
||||
opname,
|
||||
core_id,
|
||||
) = args
|
||||
hstr, wstr = sh_, sw_
|
||||
ci_ = tvm.var("ci_") if ci_ is None else ci_
|
||||
kvshape = (ci_, kh_, kw_, vc_)
|
||||
ovshape = (vh_, vw_, vc_)
|
||||
if dila_h != 1 or dila_w != 1:
|
||||
dvshape = (kh_, kw_, vh_, vw_, ci_)
|
||||
else:
|
||||
dvshape = ((vh_ - 1) * hstr + kh_, (vw_ - 1) * wstr + kw_, ci_)
|
||||
|
||||
data_vec = tvm.placeholder(dvshape, name="a", dtype=dtype)
|
||||
kernel_vec = tvm.placeholder(kvshape, name="b", dtype=dtype)
|
||||
ci = tvm.reduce_axis((0, ci_), name="ci")
|
||||
kh = tvm.reduce_axis((0, kh_), name="kh")
|
||||
kw = tvm.reduce_axis((0, kw_), name="kw")
|
||||
if DILA_H != 1 or dila_w != 1:
|
||||
conv = tvm.compute(
|
||||
ovshape,
|
||||
lambda vh, vw, vc: tvm.sum(
|
||||
data_vec[kh, kw, vh, vw, ci].astype(acum_dtype)
|
||||
* kernel_vec[ci, kh, kw, vc].astype(acum_dtype),
|
||||
axis=[ci, kh, kw],
|
||||
),
|
||||
name="conv",
|
||||
)
|
||||
else:
|
||||
conv = tvm.compute(
|
||||
ovshape,
|
||||
lambda vh, vw, vc: tvm.sum(
|
||||
data_vec[vh * hstr + kh, vw * wstr + kw, ci].astype(acum_dtype)
|
||||
* kernel_vec[ci, kh, kw, vc].astype(acum_dtype),
|
||||
axis=[ci, kh, kw],
|
||||
),
|
||||
name="conv",
|
||||
)
|
||||
|
||||
stride_a = [
|
||||
functools.reduce(lambda x, y: x * y, dvshape[i + 1: len(dvshape)])
|
||||
for i in range(0, len(dvshape) - 1)
|
||||
]
|
||||
stride_a.append(1)
|
||||
stride_b = [
|
||||
functools.reduce(lambda x, y: x * y, kvshape[i + 1: len(kvshape)])
|
||||
for i in range(0, len(kvshape) - 1)
|
||||
]
|
||||
stride_b.append(1)
|
||||
stride_c = [
|
||||
functools.reduce(lambda x, y: x * y, ovshape[i + 1: len(ovshape)])
|
||||
for i in range(0, len(ovshape) - 1)
|
||||
]
|
||||
stride_c.append(1)
|
||||
|
||||
a_buffer = tvm.decl_buffer(
|
||||
data_vec.shape, data_vec.dtype, name="A", offset_factor=1, strides=stride_a
|
||||
)
|
||||
b_buffer = tvm.decl_buffer(
|
||||
kernel_vec.shape, kernel_vec.dtype, name="B", offset_factor=1, strides=stride_b
|
||||
)
|
||||
c_buffer = tvm.decl_buffer(
|
||||
conv.shape, conv.dtype, name="C", offset_factor=1, strides=stride_c
|
||||
)
|
||||
|
||||
def intrin_func(ins, outs):
|
||||
aa, bb = ins
|
||||
cc = outs[0]
|
||||
|
||||
def _body():
|
||||
ib = tvm.ir_builder.create()
|
||||
ib.emit(
|
||||
tvm.call_extern(
|
||||
"int32",
|
||||
opname,
|
||||
cc.access_ptr("w"),
|
||||
aa.access_ptr("r"),
|
||||
bb.access_ptr("r"),
|
||||
ci_,
|
||||
vh_,
|
||||
vw_,
|
||||
vc_,
|
||||
kh_,
|
||||
sh_,
|
||||
core_id,
|
||||
)
|
||||
)
|
||||
return ib.get()
|
||||
|
||||
return _body()
|
||||
|
||||
return tvm.decl_tensor_intrin(
|
||||
conv.op, intrin_func, binds={data_vec: a_buffer, kernel_vec: b_buffer, conv: c_buffer}
|
||||
)
|
||||
|
||||
|
||||
def _schedule_deconv(cfg, s, data_vec, kernel_vec, conv, output, last):
|
||||
"""schedule implementation"""
|
||||
is_tune = bool(isinstance(cfg, (tvm.autotvm.ConfigEntity, tvm.autotvm.ConfigSpace)))
|
||||
if is_tune:
|
||||
vh_ = cfg["tile_oh"].size[-1]
|
||||
vw_ = cfg["tile_ow"].size[-1]
|
||||
vc_ = cfg["tile_co"].size[-1]
|
||||
cfg = {
|
||||
"ci_": tvm.var("ci_"),
|
||||
"VH": vh_,
|
||||
"VW": vw_,
|
||||
"VC": vc_,
|
||||
"tile_oh": vh_,
|
||||
"tile_ow": vw_,
|
||||
"tile_co": vc_,
|
||||
"tile_ci": 4,
|
||||
"ann_reduce": cfg["ann_reduce"].anns,
|
||||
"ann_spatial": cfg["ann_spatial"].anns,
|
||||
} # ,'reorder_0':cfg['reorder_0'].perm}
|
||||
else:
|
||||
pass
|
||||
n, co, oh, ow, vh, vw, vc = s[conv].op.axis
|
||||
ci, ki = s[conv].op.reduce_axis
|
||||
s[conv].reorder(n, co, oh, ow, ci, vw, ki, vc)
|
||||
if cfg["ann_reduce"][0] == "unroll":
|
||||
s[conv].unroll(ci)
|
||||
elif cfg["ann_reduce"][0] == "vec":
|
||||
s[conv].vectorize(ci)
|
||||
if cfg["ann_reduce"][1] == "unroll":
|
||||
s[conv].unroll(ki)
|
||||
elif cfg["ann_reduce"][1] == "vec":
|
||||
s[conv].vectorize(ki)
|
||||
if cfg["ann_spatial"][0] == "vec":
|
||||
s[conv].vectorize(vh)
|
||||
elif cfg["ann_spatial"][0] == "unroll":
|
||||
s[conv].unroll(vh)
|
||||
if cfg["ann_spatial"][1] == "vec":
|
||||
s[conv].vectorize(vw)
|
||||
elif cfg["ann_spatial"][1] == "unroll":
|
||||
s[conv].unroll(vw)
|
||||
if cfg["ann_spatial"][2] == "vec":
|
||||
s[conv].vectorize(vc)
|
||||
elif cfg["ann_spatial"][2] == "unroll":
|
||||
s[conv].unroll(vc)
|
||||
|
||||
# schedule conv
|
||||
attrs = conv.op.attrs
|
||||
vh_, vw_, vc_ = (attrs["VH"].value, attrs["VW"].value, attrs["VC"].value)
|
||||
|
||||
# schedule fusion
|
||||
if len(s[last].op.axis) == 4:
|
||||
n, co, h, w = s[last].op.axis
|
||||
co, vc = s[last].split(co, vc_)
|
||||
ow, vw = s[last].split(w, vw_)
|
||||
oh, vh = s[last].split(h, vh_)
|
||||
s[last].reorder(n, co, oh, ow, vh, vw, vc)
|
||||
else:
|
||||
n, co, h, w, vc = s[last].op.axis
|
||||
oh, vh = s[last].split(h, vh_)
|
||||
ow, vw = s[last].split(w, vw_)
|
||||
s[last].reorder(n, co, oh, ow, vh, vw, vc)
|
||||
if last != output and isinstance(output.op, tvm.tensor.ComputeOp):
|
||||
s[output].compute_inline()
|
||||
if cfg["ann_spatial"][0] == "vec":
|
||||
s[last].vectorize(vh)
|
||||
elif cfg["ann_spatial"][0] == "unroll":
|
||||
s[last].unroll(vh)
|
||||
if cfg["ann_spatial"][1] == "vec":
|
||||
s[last].vectorize(vw)
|
||||
elif cfg["ann_spatial"][1] == "unroll":
|
||||
s[last].unroll(vw)
|
||||
if cfg["ann_spatial"][2] == "vec":
|
||||
s[last].vectorize(vc)
|
||||
elif cfg["ann_spatial"][2] == "unroll":
|
||||
s[last].unroll(vc)
|
||||
|
||||
s[conv].compute_at(s[last], ow)
|
||||
|
||||
# mark parallel
|
||||
s[last].parallel(co)
|
||||
|
||||
if data_vec.op.name == "data_vec_undilated":
|
||||
_, h, _, _, _, _, _, _, _ = s[data_vec].op.axis
|
||||
else:
|
||||
_, h, _, _, _, _, _ = s[data_vec].op.axis
|
||||
s[data_vec].parallel(h)
|
||||
|
||||
co, _, _, _, _, vc = s[kernel_vec].op.axis
|
||||
s[kernel_vec].parallel(co)
|
||||
if cfg["ann_spatial"][2] == "vec":
|
||||
s[kernel_vec].vectorize(vc)
|
||||
elif cfg["ann_spatial"][2] == "unroll":
|
||||
s[kernel_vec].unroll(vc)
|
||||
return s
|
||||
|
||||
|
||||
def _conv_schedule_deconv(cfg, outs):
|
||||
"""schedule_conv2d_nchw_arm_cpu_deconv inner implementation"""
|
||||
s = tvm.create_schedule([x.op for x in outs])
|
||||
|
||||
def _callback(op):
|
||||
if "deconv_conv2d_output" in op.tag:
|
||||
# schedule conv2d
|
||||
output = op.output(0)
|
||||
conv = op.input_tensors[0]
|
||||
|
||||
sidx = 0
|
||||
if conv.op.input_tensors[0].name == "attr":
|
||||
sidx = 1
|
||||
data_vec = conv.op.input_tensors[sidx]
|
||||
|
||||
kernel_vec = conv.op.input_tensors[sidx + 1]
|
||||
if kernel_vec.op.name == "kernel_vec":
|
||||
kernel = kernel_vec.op.input_tensors[0]
|
||||
else:
|
||||
kernel = kernel_vec
|
||||
if (isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag):
|
||||
s[kernel].compute_inline()
|
||||
|
||||
_schedule_deconv(cfg, s, data_vec, kernel_vec, conv, output, outs[0])
|
||||
|
||||
traverse_inline(s, outs[0].op, _callback)
|
||||
return s
|
|
@ -1,289 +0,0 @@
|
|||
# Copyright 2019 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""Depthwise convolution schedule for ARM CPU"""
|
||||
|
||||
import tvm
|
||||
from tvm import autotvm
|
||||
|
||||
from topi.generic import schedule_depthwise_conv2d_nchw
|
||||
from topi.nn import depthwise_conv2d_nchw, pad
|
||||
from topi.util import traverse_inline, get_const_tuple
|
||||
from topi.nn.util import get_pad_tuple
|
||||
|
||||
# register customized schedule for arm cpu.
|
||||
@autotvm.register_topi_schedule(
|
||||
schedule_depthwise_conv2d_nchw, ["arm_cpu", "cpu"], ["custom"]
|
||||
)
|
||||
def schedule_depthwise_conv2d_nchw_arm(cfg, outs):
|
||||
"""Schedule depthwise conv2d
|
||||
|
||||
Parameters
|
||||
----------
|
||||
cfg: ConfigEntity
|
||||
The configuration of this template
|
||||
outs: Array of Tensor
|
||||
The computation graph description of depthwise convolution2d
|
||||
in the format of an array of tensors.
|
||||
|
||||
Returns
|
||||
-------
|
||||
s: Schedule
|
||||
The computation schedule for depthwise_conv2d nchw.
|
||||
"""
|
||||
s = _depthwise_schedule_spatial_pack(cfg, outs)
|
||||
return s
|
||||
|
||||
|
||||
@autotvm.register_topi_compute(depthwise_conv2d_nchw, ["arm_cpu", "cpu"], ["custom"])
|
||||
def depthwise_conv2d_arm_cpu(cfg, data, kernel, strides, padding, dilation, out_dtype):
|
||||
"""TOPI compute callback for depthwise_conv2d nchw
|
||||
|
||||
Parameters
|
||||
----------
|
||||
cfg: ConfigEntity
|
||||
The config for this template
|
||||
|
||||
data : tvm.Tensor
|
||||
4-D with shape [batch, in_channel, in_height, in_width]
|
||||
|
||||
kernel : tvm.Tensor
|
||||
4-D with shape [num_filter, multiplier, filter_height, filter_width] or
|
||||
pre-packed 5-D with shape [num_filter_chunk, multiplier, filter_height,
|
||||
filter_width, num_filter_block]
|
||||
|
||||
strides : list of two ints
|
||||
[stride_height, stride_width]
|
||||
|
||||
padding : list of two ints
|
||||
[pad_height, pad_width]
|
||||
|
||||
dilation : list of two ints
|
||||
[dilation_height, dilation_width]
|
||||
|
||||
out_dtype: str
|
||||
The output type. This is used for mixed precision.
|
||||
|
||||
Returns
|
||||
-------
|
||||
output : tvm.Tensor
|
||||
4-D with shape [batch, out_channel, out_height, out_width]
|
||||
"""
|
||||
|
||||
return _depthwise_spatial_pack(
|
||||
cfg, data, kernel, strides, padding, dilation, out_dtype
|
||||
)
|
||||
|
||||
|
||||
def _depthwise_spatial_pack(args, data, kernel, strides, padding, dilation, out_dtype):
|
||||
"""depthwise_conv2d_arm_cpu's inner implement"""
|
||||
is_var, u_vh, u_vw, u_vc = args
|
||||
out_dtype = out_dtype or data.dtype
|
||||
|
||||
u_n, u_c, ih, iw = data.shape if is_var else get_const_tuple(data.shape)
|
||||
|
||||
if isinstance(dilation, int):
|
||||
dilation_h = dilation_w = dilation
|
||||
else:
|
||||
dilation_h, dilation_w = dilation
|
||||
|
||||
if len(kernel.shape) == 4:
|
||||
pre_packed = False
|
||||
u_c, um, ukh, ukw = kernel.shape if is_var else get_const_tuple(kernel.shape)
|
||||
else: # kernel tensor is pre packed
|
||||
pre_packed = True
|
||||
u_c, um, ukh, ukw, u_vc = kernel.shape if is_var else get_const_tuple(kernel.shape)
|
||||
u_c = u_c * u_vc
|
||||
|
||||
dilated_kernel_h = (ukh - 1) * dilation_h + 1
|
||||
dilated_kernel_w = (ukw - 1) * dilation_w + 1
|
||||
|
||||
pad_top, pad_left, pad_down, pad_right = get_pad_tuple(
|
||||
padding, (dilated_kernel_h, dilated_kernel_w)
|
||||
)
|
||||
hstr, wstr = strides if isinstance(strides, (tuple, list)) else (strides, strides)
|
||||
u_oh = (ih + pad_top + pad_down - dilated_kernel_h) // hstr + 1
|
||||
u_ow = (iw + pad_left + pad_right - dilated_kernel_w) // wstr + 1
|
||||
# pack data
|
||||
hpad = pad_top + pad_down
|
||||
wpad = pad_left + pad_right
|
||||
dopad = hpad != 0 or wpad != 0
|
||||
if dopad:
|
||||
data_pad = pad(
|
||||
data,
|
||||
(0, 0, pad_top, pad_left),
|
||||
(0, 0, pad_down, pad_right),
|
||||
name="data_pad",
|
||||
)
|
||||
else:
|
||||
data_pad = data
|
||||
|
||||
oh_div = u_oh // u_vh
|
||||
ow_div = u_ow // u_vw
|
||||
kvshape = (u_c // u_vc, um, ukh, ukw, u_vc)
|
||||
ovshape = (u_n, u_c * um // u_vc, oh_div, u_ow // u_vw, u_vh, u_vw, u_vc)
|
||||
oshape = (u_n, u_c * um, oh_div * u_vh, ow_div * u_vw)
|
||||
|
||||
if dilation_h != 1 or dilation_w != 1:
|
||||
# undilate input data
|
||||
dvshape = (u_n, oh_div, ow_div, u_c, ukh, ukw, u_vh, u_vw)
|
||||
data_vec = tvm.compute(
|
||||
dvshape,
|
||||
lambda n, h, w, c, kh, kw, vh, vw: data_pad[n][c][
|
||||
(h * u_vh + vh) * hstr + kh * dilation_h
|
||||
][(w * u_vw + vw) * wstr + kw * dilation_w],
|
||||
name="data_vec_undilated",
|
||||
)
|
||||
else:
|
||||
dvshape = (u_n, oh_div, ow_div, u_c, u_vh * hstr + ukh - 1, u_vw * wstr + ukw - 1)
|
||||
data_vec = tvm.compute(
|
||||
dvshape,
|
||||
lambda n, h, w, c, vh, vw: data_pad[n][c][h * u_vh * hstr + vh][
|
||||
w * u_vw * wstr + vw
|
||||
],
|
||||
name="data_vec",
|
||||
)
|
||||
|
||||
if pre_packed:
|
||||
kernel_vec = kernel
|
||||
else:
|
||||
kernel_vec = tvm.compute(
|
||||
kvshape,
|
||||
lambda co, m, kh, kw, vc: kernel[co * u_vc + vc][m][kh][kw],
|
||||
name="kernel_vec",
|
||||
)
|
||||
|
||||
kh = tvm.reduce_axis((0, ukh), name="kh")
|
||||
kw = tvm.reduce_axis((0, ukw), name="kw")
|
||||
|
||||
if dilation_h != 1 or dilation_w != 1:
|
||||
conv = tvm.compute(
|
||||
ovshape,
|
||||
lambda n, co, h, w, vh, vw, vc: tvm.sum(
|
||||
data_vec[n, h, w, (co * u_vc + vc) // um, kh, kw, vh, vw].astype(out_dtype)
|
||||
* kernel_vec[co // um, co % um, kh, kw, vc].astype(out_dtype),
|
||||
axis=[kh, kw],
|
||||
),
|
||||
name="depthwise_conv",
|
||||
)
|
||||
else:
|
||||
conv = tvm.compute(
|
||||
ovshape,
|
||||
lambda n, co, h, w, vh, vw, vc: tvm.sum(
|
||||
data_vec[
|
||||
n, h, w, (co * u_vc + vc) // um, vh * hstr + kh, vw * wstr + kw
|
||||
].astype(out_dtype)
|
||||
* kernel_vec[co // um, co % um, kh, kw, vc].astype(out_dtype),
|
||||
axis=[kh, kw],
|
||||
),
|
||||
name="depthwise_conv",
|
||||
)
|
||||
|
||||
output = tvm.compute(
|
||||
oshape,
|
||||
lambda n, co, h, w: conv[n][co // u_vc][h // u_vh][w // u_vw][h % u_vh][w % u_vw][
|
||||
co % u_vc
|
||||
],
|
||||
name="output_unpack",
|
||||
tag="spatial_depthwise_conv_nchw_output",
|
||||
)
|
||||
return output
|
||||
|
||||
|
||||
def _schedule_spatial_pack(cfg, s, data_vec, kernel_vec, conv, output, last):
|
||||
"""schedule implementation"""
|
||||
u_vc = cfg["tile_co"].size[-1] if not isinstance(cfg, dict) else cfg["VC"]
|
||||
u_vh = cfg["tile_oh"].size[-1] if not isinstance(cfg, dict) else cfg["VH"]
|
||||
u_vw = cfg["tile_ow"].size[-1] if not isinstance(cfg, dict) else cfg["VW"]
|
||||
|
||||
n, co, oh, ow, vh, vw, vc = s[conv].op.axis
|
||||
kh, kw = s[conv].op.reduce_axis
|
||||
|
||||
if data_vec.op.name == "data_vec_undilated":
|
||||
_, _, dv_ow, _, _, _, _, _ = s[data_vec].op.axis
|
||||
else:
|
||||
_, _, dv_ow, _, _, _ = s[data_vec].op.axis
|
||||
|
||||
data_pad = data_vec.op.input_tensors[0]
|
||||
|
||||
if isinstance(data_pad.op, tvm.tensor.ComputeOp):
|
||||
s[data_pad].vectorize(list(s[data_pad].op.axis)[-1])
|
||||
s[data_pad].compute_at(s[data_vec], dv_ow)
|
||||
|
||||
s[data_vec].vectorize(list(s[data_vec].op.axis)[-1])
|
||||
s[data_vec].compute_at(s[conv], ow)
|
||||
|
||||
# schedule conv
|
||||
s[conv].reorder(n, co, oh, ow, kh, kw, vh, vw, vc)
|
||||
s[conv].unroll(kh)
|
||||
s[conv].unroll(vh)
|
||||
s[conv].vectorize(vw)
|
||||
s[conv].unroll(vc)
|
||||
s[conv].parallel(co)
|
||||
|
||||
n, co, h, w = s[last].op.axis
|
||||
co, vc = s[last].split(co, u_vc)
|
||||
oh, vh = s[last].split(h, u_vh)
|
||||
ow, vw = s[last].split(w, u_vw)
|
||||
if last != output:
|
||||
s[output].compute_inline()
|
||||
s[last].vectorize(vw)
|
||||
s[last].unroll(vc)
|
||||
else:
|
||||
s[last].vectorize(vw)
|
||||
s[conv].compute_at(s[last], oh)
|
||||
|
||||
# mark parallel
|
||||
s[last].parallel(co)
|
||||
|
||||
if data_vec.op.name == "data_vec_undilated":
|
||||
_, h, _, _, _, _, _, _ = s[data_vec].op.axis
|
||||
else:
|
||||
_, h, _, _, _, _ = s[data_vec].op.axis
|
||||
s[data_vec].parallel(h)
|
||||
|
||||
if kernel_vec.op.name == "kernel_vec":
|
||||
co, _, _, _, _ = s[kernel_vec].op.axis
|
||||
if autotvm.GLOBAL_SCOPE.in_tuning:
|
||||
# kernel packing will be pre-computed during compliation, so we skip
|
||||
# this part to make tuning records correct
|
||||
s[kernel_vec].pragma(co, "debug_skip_region")
|
||||
else:
|
||||
s[kernel_vec].parallel(co)
|
||||
|
||||
return s
|
||||
|
||||
|
||||
def _depthwise_schedule_spatial_pack(cfg, outs):
|
||||
"""schedule_depthwise_conv2d_nchw_arm's inner implement"""
|
||||
outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
|
||||
s = tvm.create_schedule([x.op for x in outs])
|
||||
|
||||
def _callback(op):
|
||||
if op.tag == "spatial_depthwise_conv_nchw_output":
|
||||
output = op.output(0)
|
||||
conv = op.input_tensors[0]
|
||||
data_vec = conv.op.input_tensors[0]
|
||||
kernel_vec = conv.op.input_tensors[1]
|
||||
if kernel_vec.op.name == "kernel_vec":
|
||||
kernel = kernel_vec.op.input_tensors[0]
|
||||
else:
|
||||
kernel = kernel_vec
|
||||
if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag:
|
||||
s[kernel].compute_inline()
|
||||
|
||||
_schedule_spatial_pack(cfg, s, data_vec, kernel_vec, conv, output, outs[0])
|
||||
|
||||
traverse_inline(s, outs[0].op, _callback)
|
||||
return s
|
|
@ -1,472 +0,0 @@
|
|||
# Copyright 2019 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""Conv2D schedule for ARM CPU"""
|
||||
from __future__ import absolute_import as _abs
|
||||
|
||||
import functools
|
||||
|
||||
import tvm
|
||||
from tvm import autotvm
|
||||
import tvm.contrib.nnpack
|
||||
|
||||
from topi.generic import schedule_conv2d_nchw
|
||||
from topi.util import traverse_inline
|
||||
from topi.nn import conv2d
|
||||
|
||||
|
||||
@autotvm.register_topi_compute(conv2d, "arm_cpu", ["matmul"])
|
||||
def matmul_arm_cpu(cfg, a_, b_, layout, out_dtype):
|
||||
"""TOPI compute callback for
|
||||
|
||||
Parameters
|
||||
----------
|
||||
cfg: ConfigEntity
|
||||
The config for this template
|
||||
|
||||
a_ : tvm.Tensor
|
||||
2-D with shape [M, k_]
|
||||
|
||||
b_ : tvm.Tensor
|
||||
2-D with shape [k_, N]
|
||||
|
||||
out_dtype: str
|
||||
The output type. This is used for mixed precision.
|
||||
|
||||
Returns
|
||||
-------
|
||||
output : tvm.Tensor
|
||||
4-D with shape [batch, out_channel, out_height, out_width]
|
||||
"""
|
||||
args = _gen_cfg(cfg, a_, b_)
|
||||
return _matmul_spatial_pack_asm(args, a_, b_, layout, out_dtype)
|
||||
|
||||
|
||||
@autotvm.register_topi_schedule(schedule_conv2d_nchw, "arm_cpu", ["matmul"])
|
||||
def schedule_matmul_arm_cpu(cfg, outs):
|
||||
"""TOPI schedule callback for conv2d
|
||||
|
||||
Parameters
|
||||
----------
|
||||
cfg: ConfigEntity
|
||||
The config for this template
|
||||
|
||||
outs: Array of Tensor
|
||||
The computation graph description of conv2d
|
||||
in the format of an array of tensors.
|
||||
|
||||
Returns
|
||||
-------
|
||||
s: Schedule
|
||||
The computation schedule for conv2d.
|
||||
"""
|
||||
s = _matmul_schedule_asm(cfg, outs)
|
||||
return s
|
||||
|
||||
|
||||
def _gen_cfg(cfg, a_, b_):
|
||||
"""get best loginfo from cfg"""
|
||||
if len(a_.shape) == 2:
|
||||
w_, ci_ = get_const_tuple(a_.shape)
|
||||
h_ = 1
|
||||
elif len(a_.shape) == 3:
|
||||
_, ci_, w_ = get_const_tuple(a_.shape)
|
||||
h_ = 1
|
||||
elif len(a_.shape) == 4:
|
||||
_, ci_, h_, w_ = get_const_tuple(a_.shape)
|
||||
else:
|
||||
raise ValueError("not support shape: " + a_.shape)
|
||||
|
||||
co_, k_ = get_const_tuple(b_.shape)
|
||||
|
||||
oh, ow = cfg.axis(h_), cfg.axis(w_)
|
||||
co = cfg.axis(co_)
|
||||
k = cfg.reduce_axis(k_)
|
||||
|
||||
oh, vh = cfg.define_split("tile_oh", oh, num_outputs=2)
|
||||
ow, vw = cfg.define_split("tile_ow", ow, num_outputs=2)
|
||||
oc, vc = cfg.define_split("tile_co", co, num_outputs=2)
|
||||
|
||||
cfg.define_reorder(
|
||||
"reorder_0",
|
||||
[oc, oh, ow, k, vh, vw, vc],
|
||||
policy="candidate",
|
||||
candidate=[[oc, oh, ow, k, vh, vw, vc],],
|
||||
)
|
||||
|
||||
vh_ = cfg["tile_oh"].size[-1]
|
||||
vw_ = cfg["tile_ow"].size[-1]
|
||||
vc_ = cfg["tile_co"].size[-1]
|
||||
is_var = False
|
||||
is_transpose = False
|
||||
return (is_var, is_transpose, ci_, vh_, vw_, vc_)
|
||||
|
||||
|
||||
def _matmul_spatial_pack_asm(args, a_, b_, layout, out_dtype):
|
||||
"""matmul_spatial_pack_asm's inner interace"""
|
||||
is_var, is_transpose, ci_, vh_, vw_, vc_ = args
|
||||
|
||||
# create workload according to raw arguments
|
||||
out_dtype = out_dtype or a_.dtype
|
||||
if layout == "NCHW":
|
||||
batch, k_, h_, w_ = a_.shape if is_var else get_const_tuple(a_.shape)
|
||||
n_, _ = b_.shape if is_var else get_const_tuple(b_.shape)
|
||||
elif layout == "NCH":
|
||||
batch, k_, h_ = a_.shape if is_var else get_const_tuple(a_.shape)
|
||||
n_, _ = b_.shape if is_var else get_const_tuple(b_.shape)
|
||||
w_ = 1
|
||||
elif layout == "NC":
|
||||
w_, k_ = a_.shape if is_var else get_const_tuple(a_.shape)
|
||||
n_, _ = b_.shape if is_var else get_const_tuple(b_.shape)
|
||||
h_ = 1
|
||||
else:
|
||||
raise ValueError("not support layout: " + layout)
|
||||
|
||||
ki = tvm.reduce_axis((0, k_), name="ki")
|
||||
type_map = {
|
||||
"int8": "int32",
|
||||
"uint8": "uint32",
|
||||
"float32": "float32",
|
||||
"float16": "float16",
|
||||
}
|
||||
acum_dtype = type_map[a_.dtype]
|
||||
attrs = {"ci_": ci_, "vh_": vh_, "vw_": vw_, "vc_": vc_, "ACUM_DTYPE": acum_dtype}
|
||||
|
||||
if layout == "NCHW":
|
||||
h_div = h_ // vh_
|
||||
w_div = w_ // vw_
|
||||
n_div = n_ // vc_
|
||||
avshape = (batch, h_div, w_div, vh_, vw_, k_)
|
||||
bvshape = (n_div, k_, vc_)
|
||||
ovshape = (batch, n_div, h_div, w_div, vh_, vw_, vc_)
|
||||
|
||||
a_vec = tvm.compute(
|
||||
avshape,
|
||||
lambda n, oh, ow, vh, vw, ci: a_[n][ci][oh * vh_ + vh][ow * vw_ + vw],
|
||||
name="a_vec",
|
||||
)
|
||||
b_vec = tvm.compute(
|
||||
bvshape, lambda oc, ci, vc: b_[oc * vc_ + vc][ci], name="b_vec"
|
||||
)
|
||||
|
||||
ma = tvm.compute(
|
||||
ovshape,
|
||||
lambda n, oc, oh, ow, vh, vw, vc: tvm.sum(
|
||||
a_vec[n, oh, ow, vh, vw, ki].astype(out_dtype)
|
||||
* b_vec[oc, ki, vc].astype(out_dtype),
|
||||
axis=[ki],
|
||||
),
|
||||
name="matmul",
|
||||
attrs=attrs,
|
||||
)
|
||||
|
||||
if is_transpose:
|
||||
oshape = (batch, h_div * vh_, w_div * vw_, n_div * vc_)
|
||||
|
||||
output = tvm.compute(
|
||||
oshape,
|
||||
lambda n, h, w, c: ma[n][c // vc_][h // vh_][w // vw_][h % vh_][w % vw_][
|
||||
c % vc_
|
||||
],
|
||||
name="output_unpack",
|
||||
tag="asm_matmul_output",
|
||||
)
|
||||
else:
|
||||
oshape = (batch, n_div * vc_, h_div * vh_, w_div * vw_)
|
||||
output = tvm.compute(
|
||||
oshape,
|
||||
lambda n, c, h, w: ma[n][c // vc_][h // vh_][w // vw_][h % vh_][w % vw_][
|
||||
c % vc_
|
||||
],
|
||||
name="output_unpack",
|
||||
tag="asm_matmul_output",
|
||||
)
|
||||
elif layout == "NCH":
|
||||
w_div = w_ // vw_
|
||||
n_div = n_ // vc_
|
||||
avshape = (batch, w_div, vw_, k_)
|
||||
bvshape = (n_div, k_, vc_)
|
||||
ovshape = (batch, n_div, w_div, vw_, vc_)
|
||||
oshape = (batch, n_div * vc_, w_div * vw_)
|
||||
|
||||
a_vec = tvm.compute(
|
||||
avshape, lambda b, om, vw, ci: a_[b][ci][om * vw_ + vw], name="a_vec"
|
||||
)
|
||||
b_vec = tvm.compute(
|
||||
bvshape, lambda on, ci, vc: b_[on * vc_ + vc][ci], name="b_vec"
|
||||
)
|
||||
|
||||
ma = tvm.compute(
|
||||
ovshape,
|
||||
lambda b, on, om, vm, vn: tvm.sum(
|
||||
a_vec[b, om, vm, ki].astype(out_dtype)
|
||||
* b_vec[on, ki, vn].astype(out_dtype),
|
||||
axis=[ki],
|
||||
),
|
||||
name="matmul",
|
||||
attrs=attrs,
|
||||
)
|
||||
|
||||
output = tvm.compute(
|
||||
oshape,
|
||||
lambda b, n, m: ma[b][n // vc_][m // vw_][m % vw_][n % vc_],
|
||||
name="output_unpack",
|
||||
tag="asm_matmul_output",
|
||||
)
|
||||
elif layout == "NC":
|
||||
w_div = w_ // vw_
|
||||
n_div = n_ // vc_
|
||||
avshape = (w_div, vw_, k_)
|
||||
bvshape = (n_div, k_, vc_)
|
||||
ovshape = (w_div, n_div, vw_, vc_)
|
||||
oshape = (w_div * vw_, n_div * vc_)
|
||||
|
||||
a_vec = tvm.compute(
|
||||
avshape, lambda om, vw, ci: a_[om * vw_ + vw][ci], name="a_vec"
|
||||
)
|
||||
b_vec = tvm.compute(
|
||||
bvshape, lambda on, ci, vc: b_[on * vc_ + vc][ci], name="b_vec"
|
||||
)
|
||||
|
||||
ma = tvm.compute(
|
||||
ovshape,
|
||||
lambda om, on, vm, vn: tvm.sum(
|
||||
a_vec[om, vm, ki].astype(out_dtype)
|
||||
* b_vec[on, ki, vn].astype(out_dtype),
|
||||
axis=[ki],
|
||||
),
|
||||
name="matmul",
|
||||
attrs=attrs,
|
||||
)
|
||||
|
||||
output = tvm.compute(
|
||||
oshape,
|
||||
lambda m, n: ma[m // vw_][n // vc_][m % vw_][n % vc_],
|
||||
name="output_unpack",
|
||||
tag="asm_matmul_output",
|
||||
)
|
||||
else:
|
||||
raise ValueError("not support layout: " + layout)
|
||||
|
||||
return output
|
||||
|
||||
|
||||
def intrin_conv(args):
|
||||
"""intrin_conv is a conv inner interface"""
|
||||
(
|
||||
ndim,
|
||||
ci_,
|
||||
vh_,
|
||||
vw_,
|
||||
vc_,
|
||||
_,
|
||||
_,
|
||||
_,
|
||||
_,
|
||||
_,
|
||||
_,
|
||||
_,
|
||||
_,
|
||||
dtype,
|
||||
acum_dtype,
|
||||
opname,
|
||||
core_id,
|
||||
) = args
|
||||
ci_ = tvm.var("ci_") if ci_ is None else ci_
|
||||
kvshape = (ci_, vc_)
|
||||
if ndim == 2:
|
||||
dvshape = (vw_, ci_)
|
||||
ovshape = (vw_, vc_)
|
||||
|
||||
data_vec = tvm.placeholder(dvshape, name="a", dtype=dtype)
|
||||
kernel_vec = tvm.placeholder(kvshape, name="b", dtype=dtype)
|
||||
ci = tvm.reduce_axis((0, ci_), name="ci")
|
||||
conv = tvm.compute(
|
||||
ovshape,
|
||||
lambda vw, vc: tvm.sum(
|
||||
data_vec[vw, ci].astype(acum_dtype)
|
||||
* kernel_vec[ci, vc].astype(acum_dtype),
|
||||
axis=[ci],
|
||||
),
|
||||
name="conv",
|
||||
)
|
||||
else:
|
||||
dvshape = (vh_, vw_, ci_)
|
||||
ovshape = (vh_, vw_, vc_)
|
||||
|
||||
data_vec = tvm.placeholder(dvshape, name="a", dtype=dtype)
|
||||
kernel_vec = tvm.placeholder(kvshape, name="b", dtype=dtype)
|
||||
ci = tvm.reduce_axis((0, ci_), name="ci")
|
||||
conv = tvm.compute(
|
||||
ovshape,
|
||||
lambda vh, vw, vc: tvm.sum(
|
||||
data_vec[vh, vw, ci].astype(acum_dtype)
|
||||
* kernel_vec[ci, vc].astype(acum_dtype),
|
||||
axis=[ci],
|
||||
),
|
||||
name="conv",
|
||||
)
|
||||
|
||||
stride_a = [
|
||||
functools.reduce(lambda x, y: x * y, dvshape[i + 1: len(dvshape)])
|
||||
for i in range(0, len(dvshape) - 1)
|
||||
]
|
||||
stride_a.append(1)
|
||||
stride_b = [
|
||||
functools.reduce(lambda x, y: x * y, kvshape[i + 1: len(kvshape)])
|
||||
for i in range(0, len(kvshape) - 1)
|
||||
]
|
||||
stride_b.append(1)
|
||||
stride_c = [
|
||||
functools.reduce(lambda x, y: x * y, ovshape[i + 1: len(ovshape)])
|
||||
for i in range(0, len(ovshape) - 1)
|
||||
]
|
||||
stride_c.append(1)
|
||||
|
||||
ab_ = tvm.decl_buffer(
|
||||
data_vec.shape, data_vec.dtype, name="a_", offset_factor=1, strides=stride_a
|
||||
)
|
||||
bb_ = tvm.decl_buffer(
|
||||
kernel_vec.shape, kernel_vec.dtype, name="b_", offset_factor=1, strides=stride_b
|
||||
)
|
||||
cb_ = tvm.decl_buffer(
|
||||
conv.shape, conv.dtype, name="C", offset_factor=1, strides=stride_c
|
||||
)
|
||||
|
||||
def intrin_func(ins, outs):
|
||||
aa, bb = ins
|
||||
cc = outs[0]
|
||||
|
||||
def _body():
|
||||
b_ = tvm.ir_builder.create()
|
||||
b_.emit(
|
||||
tvm.call_extern(
|
||||
"int32",
|
||||
opname,
|
||||
cc.access_ptr("w"),
|
||||
aa.access_ptr("r"),
|
||||
bb.access_ptr("r"),
|
||||
ci_,
|
||||
vh_,
|
||||
vw_,
|
||||
vc_,
|
||||
core_id,
|
||||
)
|
||||
)
|
||||
return b_.get()
|
||||
|
||||
return _body()
|
||||
|
||||
return tvm.decl_tensor_intrin(
|
||||
conv.op, intrin_func, binds={data_vec: ab_, kernel_vec: bb_, conv: cb_}
|
||||
)
|
||||
|
||||
|
||||
def _schedule_asm(cfg, s, a_vec, b_vec, mat, output, last):
|
||||
"""schedule implementation"""
|
||||
is_transpose = 0 if not isinstance(cfg, dict) else cfg["is_transpose"]
|
||||
attrs = mat.op.attrs
|
||||
vh_, vw_, vc_ = (attrs["vh_"].value, attrs["vw_"].value, attrs["vc_"].value)
|
||||
|
||||
# axis split and reorder
|
||||
if len(a_vec.shape) == 3:
|
||||
ow, oc = s[last].op.axis
|
||||
oc, vc = s[last].split(oc, vc_)
|
||||
ow, vw = s[last].split(ow, vw_)
|
||||
s[last].reorder(ow, oc, vw, vc)
|
||||
s[last].vectorize(vc)
|
||||
oh = ow = oc
|
||||
elif len(a_vec.shape) == 4:
|
||||
n, oc, ow, vw, vc = s[last].op.axis
|
||||
oc, vc = s[last].split(oc, vc_)
|
||||
ow, vw = s[last].split(ow, vw_)
|
||||
s[last].reorder(n, oc, ow, vw, vc)
|
||||
elif len(a_vec.shape) == 6:
|
||||
if is_transpose:
|
||||
n, oh, ow, oc = s[last].op.axis
|
||||
else:
|
||||
n, oc, oh, ow = s[last].op.axis
|
||||
oc, vc = s[last].split(oc, vc_)
|
||||
oh, vh = s[last].split(oh, vh_)
|
||||
ow, vw = s[last].split(ow, vw_)
|
||||
s[last].reorder(n, oc, oh, ow, vh, vw, vc)
|
||||
else:
|
||||
raise ValueError("not support a_vec: " + str(len(a_vec.shape)))
|
||||
if last != output and isinstance(output.op, tvm.tensor.ComputeOp):
|
||||
s[output].compute_inline()
|
||||
|
||||
s[mat].compute_at(s[last], ow)
|
||||
s[mat].vectorize(s[mat].op.axis[-1])
|
||||
|
||||
# mark parallel
|
||||
s[last].parallel(oh)
|
||||
|
||||
if len(a_vec.shape) == 3:
|
||||
om, _, _ = s[a_vec].op.axis
|
||||
s[a_vec].compute_at(s[last], ow)
|
||||
s[a_vec].parallel(om)
|
||||
elif len(a_vec.shape) == 4:
|
||||
_, om, _, _ = s[a_vec].op.axis
|
||||
s[a_vec].compute_at(s[last], ow)
|
||||
s[a_vec].parallel(om)
|
||||
else:
|
||||
_, oh, _, _, _, _ = s[a_vec].op.axis
|
||||
s[a_vec].parallel(oh)
|
||||
s[a_vec].vectorize(s[a_vec].op.axis[-1])
|
||||
s[a_vec].compute_inline()
|
||||
|
||||
oc, _, _ = s[b_vec].op.axis
|
||||
s[b_vec].parallel(oc)
|
||||
s[b_vec].vectorize(s[b_vec].op.axis[-1])
|
||||
s[b_vec].compute_inline()
|
||||
return s
|
||||
|
||||
|
||||
def _matmul_schedule_asm(cfg, outs):
|
||||
"""schedule_conv2d_nchw schedule implementation"""
|
||||
s = tvm.create_schedule([x.op for x in outs])
|
||||
|
||||
def _callback(op):
|
||||
if "asm_matmul_output" in op.tag:
|
||||
# schedule conv2d
|
||||
output = op.output(0)
|
||||
mat = op.input_tensors[0]
|
||||
|
||||
sidx = 0
|
||||
if mat.op.input_tensors[0].name == "attr":
|
||||
sidx = 1
|
||||
a_vec = mat.op.input_tensors[sidx]
|
||||
b_vec = mat.op.input_tensors[sidx + 1]
|
||||
|
||||
def recurs_inline(a_):
|
||||
if a_.op.input_tensors:
|
||||
a1 = a_.op.input_tensors[0]
|
||||
if a1.shape == a_.shape:
|
||||
s[a1].compute_inline()
|
||||
recurs_inline(a1)
|
||||
|
||||
def recurs_inline_(a_):
|
||||
if isinstance(a_, tvm.tensor.ComputeOp):
|
||||
if a_.op.input_tensors:
|
||||
a1 = a_.op.input_tensors[0]
|
||||
s[a1].compute_inline()
|
||||
recurs_inline_(a1)
|
||||
|
||||
recurs_inline_(a_vec)
|
||||
recurs_inline_(b_vec)
|
||||
|
||||
_schedule_asm(cfg, s, a_vec, b_vec, mat, output, outs[0])
|
||||
|
||||
traverse_inline(s, outs[0].op, _callback)
|
||||
return s
|
|
@ -1,17 +0,0 @@
|
|||
# Copyright 2019 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""Neural network operators"""
|
||||
# from .at_lib import *
|
||||
# from .at_gen import *
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue