forked from mindspore-Ecosystem/mindspore
commit
d27205ad6c
|
@ -40,13 +40,6 @@ class InnerKernel : public Kernel {
|
||||||
const lite::Context *ctx)
|
const lite::Context *ctx)
|
||||||
: op_parameter_(parameter), in_tensors_(std::move(in_tensors)), out_tensors_(std::move(out_tensors)) {
|
: op_parameter_(parameter), in_tensors_(std::move(in_tensors)), out_tensors_(std::move(out_tensors)) {
|
||||||
context_ = ctx;
|
context_ = ctx;
|
||||||
if (parameter != nullptr && parameter->thread_num_ == 0) {
|
|
||||||
if (ctx != nullptr) {
|
|
||||||
op_parameter_->thread_num_ = ctx->thread_num_;
|
|
||||||
} else {
|
|
||||||
op_parameter_->thread_num_ = 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual ~InnerKernel() {
|
virtual ~InnerKernel() {
|
||||||
|
|
|
@ -28,24 +28,12 @@ namespace mindspore::lite {
|
||||||
void LiteOpActor::RunOpData(OpData<lite::Tensor> *inputs, OpContext<lite::Tensor> *context) {
|
void LiteOpActor::RunOpData(OpData<lite::Tensor> *inputs, OpContext<lite::Tensor> *context) {
|
||||||
auto op_uuid = context->sequential_num_;
|
auto op_uuid = context->sequential_num_;
|
||||||
input_op_datas_[op_uuid].push_back(inputs);
|
input_op_datas_[op_uuid].push_back(inputs);
|
||||||
|
|
||||||
inputs_data_[inputs->index_] = inputs->data_;
|
inputs_data_[inputs->index_] = inputs->data_;
|
||||||
/* in-case infershape done in runtime */
|
|
||||||
kernel_->in_tensors()[inputs->index_]->set_shape(inputs->data_->shape());
|
|
||||||
kernel_->in_tensors()[inputs->index_]->set_format(inputs->data_->format());
|
|
||||||
|
|
||||||
if (input_op_datas_[op_uuid].size() < kernel_->in_tensors().size()) {
|
if (input_op_datas_[op_uuid].size() < kernel_->in_tensors().size()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto ret = CheckInputData();
|
auto ret = SetInputData();
|
||||||
if (ret != RET_OK) {
|
|
||||||
input_op_datas_.erase(op_uuid);
|
|
||||||
context->SetFailed(ret);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = SetInputData();
|
|
||||||
if (ret != RET_OK) {
|
if (ret != RET_OK) {
|
||||||
input_op_datas_.erase(op_uuid);
|
input_op_datas_.erase(op_uuid);
|
||||||
context->SetFailed(ret);
|
context->SetFailed(ret);
|
||||||
|
@ -87,7 +75,7 @@ void LiteOpActor::IsolateInputData(std::vector<std::shared_ptr<LiteOpActor>> *ac
|
||||||
for (QuantArg quant : old_tensor->quant_params()) {
|
for (QuantArg quant : old_tensor->quant_params()) {
|
||||||
new_tensor->AddQuantParam(quant);
|
new_tensor->AddQuantParam(quant);
|
||||||
}
|
}
|
||||||
isolate_input_map_.insert(std::make_pair(old_tensor, new_tensor));
|
isolate_input_map_.insert(std::make_pair(new_tensor, old_tensor));
|
||||||
|
|
||||||
int ref_count = 0;
|
int ref_count = 0;
|
||||||
/* set op input for calculate */
|
/* set op input for calculate */
|
||||||
|
@ -126,8 +114,8 @@ int LiteOpActor::LiteActorInit(std::vector<std::shared_ptr<LiteOpActor>> *actors
|
||||||
int LiteOpActor::ResizeGraphInput(const std::vector<mindspore::tensor::MSTensor *> &inputs,
|
int LiteOpActor::ResizeGraphInput(const std::vector<mindspore::tensor::MSTensor *> &inputs,
|
||||||
const std::vector<std::vector<int>> &dims) {
|
const std::vector<std::vector<int>> &dims) {
|
||||||
for (auto map : isolate_input_map_) {
|
for (auto map : isolate_input_map_) {
|
||||||
auto src_tensor = map.first;
|
auto isolate_tensor = map.first;
|
||||||
auto isolate_tensor = map.second;
|
auto src_tensor = map.second;
|
||||||
for (size_t i = 0; i < inputs.size(); i++) {
|
for (size_t i = 0; i < inputs.size(); i++) {
|
||||||
if (src_tensor == inputs[i]) {
|
if (src_tensor == inputs[i]) {
|
||||||
isolate_tensor->set_shape(dims[i]);
|
isolate_tensor->set_shape(dims[i]);
|
||||||
|
@ -225,23 +213,6 @@ int LiteOpActor::CompileArrow() {
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int LiteOpActor::CheckInputData() {
|
|
||||||
if (kernel_->in_tensors().size() != inputs_data_.size()) {
|
|
||||||
MS_LOG(ERROR) << "kernel:" << kernel_->name() << " inputs_data_.size(): " << inputs_data_.size()
|
|
||||||
<< " vs kernel_->in_tensors().size(): " << kernel_->in_tensors().size() << " are not equal.";
|
|
||||||
return RET_PARAM_INVALID;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (size_t i = 0; i < inputs_data_.size(); ++i) {
|
|
||||||
if (kernel_->in_tensors()[i]->shape() != inputs_data_[i]->shape()) {
|
|
||||||
MS_LOG(ERROR) << "inputs_data_[" << i << "].shape: " << inputs_data_[i]->shape() << " vs kernel_->in_tensors()["
|
|
||||||
<< i << "].shape: " << kernel_->in_tensors()[i]->shape() << " are not equal.";
|
|
||||||
return RET_PARAM_INVALID;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return RET_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
void LiteOpActor::MoveInputData(Tensor *dst_tensor, Tensor *src_tensor) {
|
void LiteOpActor::MoveInputData(Tensor *dst_tensor, Tensor *src_tensor) {
|
||||||
MS_ASSERT(src_tensor != dst_tensor);
|
MS_ASSERT(src_tensor != dst_tensor);
|
||||||
|
|
||||||
|
@ -302,6 +273,11 @@ int LiteOpActor::SetInputData() {
|
||||||
for (size_t i = 0; i < inputs_data_.size(); ++i) {
|
for (size_t i = 0; i < inputs_data_.size(); ++i) {
|
||||||
auto dst_tensor = kernel_->in_tensors()[i];
|
auto dst_tensor = kernel_->in_tensors()[i];
|
||||||
auto src_tensor = inputs_data_[i];
|
auto src_tensor = inputs_data_[i];
|
||||||
|
|
||||||
|
/* infershape done in runtime */
|
||||||
|
dst_tensor->set_shape(src_tensor->shape());
|
||||||
|
dst_tensor->set_format(src_tensor->format());
|
||||||
|
|
||||||
if (src_tensor->data_type() != dst_tensor->data_type()) {
|
if (src_tensor->data_type() != dst_tensor->data_type()) {
|
||||||
CopyInputData(dst_tensor, src_tensor);
|
CopyInputData(dst_tensor, src_tensor);
|
||||||
} else {
|
} else {
|
||||||
|
@ -567,10 +543,7 @@ void LiteSwitchOpActor::AsyncFalseBranchOutput(OpContext<Tensor> *context) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int MindrtInit(bool enable_parallel) {
|
int MindrtInit() { return mindspore::Initialize("tcp://127.0.0.1:8080", "", "", ""); }
|
||||||
int thread_count = enable_parallel ? 2 : 1;
|
|
||||||
return mindspore::Initialize("tcp://127.0.0.1:8080", "", "", "", thread_count);
|
|
||||||
}
|
|
||||||
|
|
||||||
void MindrtTerminate(const std::vector<std::shared_ptr<LiteOpActor>> &actor_list) {
|
void MindrtTerminate(const std::vector<std::shared_ptr<LiteOpActor>> &actor_list) {
|
||||||
for (const auto &actor : actor_list) {
|
for (const auto &actor : actor_list) {
|
||||||
|
|
|
@ -42,7 +42,7 @@ class LiteOpActor : public OpActor<lite::Tensor> {
|
||||||
}
|
}
|
||||||
~LiteOpActor() override {
|
~LiteOpActor() override {
|
||||||
for (auto map : isolate_input_map_) {
|
for (auto map : isolate_input_map_) {
|
||||||
auto isolate_input_tensor = map.second;
|
auto isolate_input_tensor = map.first;
|
||||||
isolate_input_tensor->set_data(nullptr);
|
isolate_input_tensor->set_data(nullptr);
|
||||||
delete isolate_input_tensor;
|
delete isolate_input_tensor;
|
||||||
}
|
}
|
||||||
|
@ -67,7 +67,6 @@ class LiteOpActor : public OpActor<lite::Tensor> {
|
||||||
void SetPartialMap(const std::unordered_map<size_t, AID> &partial_map) { subgraph_index_to_actor = partial_map; }
|
void SetPartialMap(const std::unordered_map<size_t, AID> &partial_map) { subgraph_index_to_actor = partial_map; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
int CheckInputData();
|
|
||||||
int SetInputData();
|
int SetInputData();
|
||||||
void SetOutputData(OpContext<Tensor> *context);
|
void SetOutputData(OpContext<Tensor> *context);
|
||||||
void AsyncOutput(OpContext<Tensor> *context);
|
void AsyncOutput(OpContext<Tensor> *context);
|
||||||
|
@ -89,7 +88,7 @@ class LiteOpActor : public OpActor<lite::Tensor> {
|
||||||
private:
|
private:
|
||||||
kernel::LiteKernel *partial_node_ = nullptr;
|
kernel::LiteKernel *partial_node_ = nullptr;
|
||||||
kernel::LiteKernel *call_node_ = nullptr;
|
kernel::LiteKernel *call_node_ = nullptr;
|
||||||
std::unordered_map<Tensor *, Tensor *> isolate_input_map_;
|
std::unordered_map<Tensor *, Tensor *> isolate_input_map_; /* <calculate-tensor, src-input-tensor> */
|
||||||
};
|
};
|
||||||
|
|
||||||
class LiteSwitchOpActor : public LiteOpActor {
|
class LiteSwitchOpActor : public LiteOpActor {
|
||||||
|
@ -104,14 +103,7 @@ class LiteSwitchOpActor : public LiteOpActor {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
int ret = CheckInputData();
|
auto ret = SetInputData();
|
||||||
if (ret != RET_OK) {
|
|
||||||
input_op_datas_.erase(op_uuid);
|
|
||||||
context->SetFailed(ret);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = SetInputData();
|
|
||||||
if (ret != RET_OK) {
|
if (ret != RET_OK) {
|
||||||
input_op_datas_.erase(op_uuid);
|
input_op_datas_.erase(op_uuid);
|
||||||
context->SetFailed(ret);
|
context->SetFailed(ret);
|
||||||
|
@ -182,7 +174,7 @@ class LiteSwitchOpActor : public LiteOpActor {
|
||||||
std::vector<OpDataPtr<Tensor>> false_branch_outputs_data_;
|
std::vector<OpDataPtr<Tensor>> false_branch_outputs_data_;
|
||||||
};
|
};
|
||||||
|
|
||||||
int MindrtInit(bool subgraph_split = false);
|
int MindrtInit();
|
||||||
void MindrtTerminate(const std::vector<std::shared_ptr<LiteOpActor>> &);
|
void MindrtTerminate(const std::vector<std::shared_ptr<LiteOpActor>> &);
|
||||||
|
|
||||||
std::vector<std::shared_ptr<LiteOpActor>> CreateOpActor(const std::vector<kernel::LiteKernel *> &kernels,
|
std::vector<std::shared_ptr<LiteOpActor>> CreateOpActor(const std::vector<kernel::LiteKernel *> &kernels,
|
||||||
|
|
|
@ -77,9 +77,7 @@ int MindrtExecutor::Resize(const std::vector<mindspore::tensor::MSTensor *> &inp
|
||||||
|
|
||||||
int MindrtExecutor::Prepare(const std::vector<kernel::LiteKernel *> &kernels, const std::vector<Tensor *> &inputs,
|
int MindrtExecutor::Prepare(const std::vector<kernel::LiteKernel *> &kernels, const std::vector<Tensor *> &inputs,
|
||||||
const std::vector<Tensor *> &outputs, const lite::InnerContext *ctx) {
|
const std::vector<Tensor *> &outputs, const lite::InnerContext *ctx) {
|
||||||
MS_ASSERT(kernels.size() != 0);
|
auto ret = MindrtInit();
|
||||||
|
|
||||||
auto ret = MindrtInit(ctx->enable_parallel_);
|
|
||||||
if (ret != RET_OK) {
|
if (ret != RET_OK) {
|
||||||
MS_LOG(ERROR) << "MindrtInit failed";
|
MS_LOG(ERROR) << "MindrtInit failed";
|
||||||
return ret;
|
return ret;
|
||||||
|
|
|
@ -40,6 +40,7 @@ kernel::LiteKernel *NPUPassUtils::CreateNchw2NhwcKernel(const std::vector<Tensor
|
||||||
}
|
}
|
||||||
memset(transpose_param, 0, sizeof(TransposeParameter));
|
memset(transpose_param, 0, sizeof(TransposeParameter));
|
||||||
transpose_param->op_parameter_.type_ = schema::PrimitiveType_Transpose;
|
transpose_param->op_parameter_.type_ = schema::PrimitiveType_Transpose;
|
||||||
|
transpose_param->op_parameter_.thread_num_ = ctx->thread_num_;
|
||||||
transpose_param->perm_[0] = 0;
|
transpose_param->perm_[0] = 0;
|
||||||
transpose_param->perm_[1] = 2;
|
transpose_param->perm_[1] = 2;
|
||||||
transpose_param->perm_[2] = 3;
|
transpose_param->perm_[2] = 3;
|
||||||
|
@ -76,6 +77,7 @@ kernel::LiteKernel *NPUPassUtils::CreateNhwc2NchwKernel(const std::vector<Tensor
|
||||||
}
|
}
|
||||||
memset(transpose_param, 0, sizeof(TransposeParameter));
|
memset(transpose_param, 0, sizeof(TransposeParameter));
|
||||||
transpose_param->op_parameter_.type_ = schema::PrimitiveType_Transpose;
|
transpose_param->op_parameter_.type_ = schema::PrimitiveType_Transpose;
|
||||||
|
transpose_param->op_parameter_.thread_num_ = ctx->thread_num_;
|
||||||
transpose_param->perm_[0] = 0;
|
transpose_param->perm_[0] = 0;
|
||||||
transpose_param->perm_[1] = 3;
|
transpose_param->perm_[1] = 3;
|
||||||
transpose_param->perm_[2] = 1;
|
transpose_param->perm_[2] = 1;
|
||||||
|
|
|
@ -88,13 +88,7 @@ int Scheduler::Schedule(std::vector<kernel::LiteKernel *> *dst_kernels) {
|
||||||
if (context_->enable_parallel_) {
|
if (context_->enable_parallel_) {
|
||||||
auto search_sub_graph =
|
auto search_sub_graph =
|
||||||
SearchSubGraph(context_, src_model_, src_tensors_, &op_parameters_, &graph_output_node_indexes_);
|
SearchSubGraph(context_, src_model_, src_tensors_, &op_parameters_, &graph_output_node_indexes_);
|
||||||
|
search_sub_graph.SubGraphSplit();
|
||||||
bool offline_parallel_enable = src_model_->all_nodes_.front()->device_type_ != kDefaultDeviceType;
|
|
||||||
if (offline_parallel_enable) {
|
|
||||||
search_sub_graph.SubGraphSplitByOffLineParallel();
|
|
||||||
} else {
|
|
||||||
search_sub_graph.SubGraphSplitByOutput();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = ScheduleSubGraphToKernels(kMainSubGraphIndex, dst_kernels, nullptr, nullptr);
|
ret = ScheduleSubGraphToKernels(kMainSubGraphIndex, dst_kernels, nullptr, nullptr);
|
||||||
|
@ -111,14 +105,7 @@ int Scheduler::Schedule(std::vector<kernel::LiteKernel *> *dst_kernels) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
FindAllInoutKernels(*dst_kernels);
|
FindAllInoutKernels(*dst_kernels);
|
||||||
// origin kernel init
|
|
||||||
for (size_t i = 0; i < dst_kernels->size(); i++) {
|
|
||||||
ret = (*dst_kernels)[i]->Init();
|
|
||||||
if (ret != RET_OK) {
|
|
||||||
MS_LOG(ERROR) << "Kernel " << (*dst_kernels)[i]->name() << " Init failed.";
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ret = RunPass(dst_kernels);
|
ret = RunPass(dst_kernels);
|
||||||
if (ret != RET_OK) {
|
if (ret != RET_OK) {
|
||||||
MS_LOG(ERROR) << "Schedule run pass failed.";
|
MS_LOG(ERROR) << "Schedule run pass failed.";
|
||||||
|
@ -679,13 +666,6 @@ kernel::LiteKernel *Scheduler::SchedulePartialToKernel(const lite::Model::Node *
|
||||||
MS_LOG(ERROR) << "Schedule partial failed, name: " << src_node->name_;
|
MS_LOG(ERROR) << "Schedule partial failed, name: " << src_node->name_;
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
for (auto kernel : sub_kernels) {
|
|
||||||
ret = kernel->Init();
|
|
||||||
if (ret != RET_OK) {
|
|
||||||
MS_LOG(ERROR) << "Schedule partial kernel init failed, name: " << kernel->name();
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
FindAllInoutKernels(sub_kernels);
|
FindAllInoutKernels(sub_kernels);
|
||||||
ret = RunPass(&sub_kernels);
|
ret = RunPass(&sub_kernels);
|
||||||
|
@ -725,6 +705,7 @@ int Scheduler::ScheduleSubGraphToKernels(size_t subgraph_index, std::vector<kern
|
||||||
MS_ASSERT(dst_kernels->empty());
|
MS_ASSERT(dst_kernels->empty());
|
||||||
auto subgraph = src_model_->sub_graphs_.at(subgraph_index);
|
auto subgraph = src_model_->sub_graphs_.at(subgraph_index);
|
||||||
for (auto node_index : subgraph->node_indices_) {
|
for (auto node_index : subgraph->node_indices_) {
|
||||||
|
auto ret = RET_OK;
|
||||||
auto node = src_model_->all_nodes_[node_index];
|
auto node = src_model_->all_nodes_[node_index];
|
||||||
MS_ASSERT(node != nullptr);
|
MS_ASSERT(node != nullptr);
|
||||||
auto *primitive = node->primitive_;
|
auto *primitive = node->primitive_;
|
||||||
|
@ -735,8 +716,11 @@ int Scheduler::ScheduleSubGraphToKernels(size_t subgraph_index, std::vector<kern
|
||||||
kernel = SchedulePartialToKernel(node);
|
kernel = SchedulePartialToKernel(node);
|
||||||
} else { // kernel
|
} else { // kernel
|
||||||
kernel = ScheduleNodeToKernel(node, prefer_data_type);
|
kernel = ScheduleNodeToKernel(node, prefer_data_type);
|
||||||
|
if (kernel != nullptr) {
|
||||||
|
ret = kernel->Init();
|
||||||
}
|
}
|
||||||
if (kernel == nullptr) {
|
}
|
||||||
|
if (kernel == nullptr || ret != RET_OK) {
|
||||||
MS_LOG(ERROR) << "FindBackendKernel return nullptr, name: " << node->name_
|
MS_LOG(ERROR) << "FindBackendKernel return nullptr, name: " << node->name_
|
||||||
<< ", type: " << PrimitiveTypeName(prim_type);
|
<< ", type: " << PrimitiveTypeName(prim_type);
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
|
|
|
@ -559,4 +559,13 @@ void SearchSubGraph::SubGraphSplitByOffLineParallel() {
|
||||||
MS_LOG(DEBUG) << "end to split offline parallel subgraph";
|
MS_LOG(DEBUG) << "end to split offline parallel subgraph";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SearchSubGraph::SubGraphSplit() {
|
||||||
|
bool offline_parallel_enable = model_->all_nodes_.front()->device_type_ != kDefaultDeviceType;
|
||||||
|
if (offline_parallel_enable) {
|
||||||
|
SubGraphSplitByOffLineParallel();
|
||||||
|
} else {
|
||||||
|
SubGraphSplitByOutput();
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
} // namespace mindspore::lite
|
} // namespace mindspore::lite
|
||||||
|
|
|
@ -73,14 +73,19 @@ class SearchSubGraph {
|
||||||
~SearchSubGraph() = default;
|
~SearchSubGraph() = default;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
void SubGraphSplitByOutput();
|
void SubGraphSplit();
|
||||||
void SubGraphSplitByMiddle();
|
|
||||||
void SubGraphSplitByOffLineParallel();
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
void SubGraphSplitByOutput();
|
||||||
void InitSearchSubGraphByOutput();
|
void InitSearchSubGraphByOutput();
|
||||||
|
|
||||||
|
private:
|
||||||
|
void SubGraphSplitByMiddle();
|
||||||
void InitSearchSubGraphByMiddle();
|
void InitSearchSubGraphByMiddle();
|
||||||
|
|
||||||
|
private:
|
||||||
|
void SubGraphSplitByOffLineParallel();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void InitSearchTensor();
|
void InitSearchTensor();
|
||||||
void InitSearchParallelSubGraph();
|
void InitSearchParallelSubGraph();
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
# mindrt enable parallel model
|
# mindrt enable parallel model
|
||||||
# model run both CPU-CPU & GPU-CPU
|
# model run both CPU-CPU & GPU-CPU
|
||||||
# model_file ### accuracy_limit ### enable_fp16(true or false)
|
# model_file ### accuracy_limit ### enable_fp16(true or false)
|
||||||
mtk_model_normalize_object_scene_ps_20200519_f32.tflite 0.5 false
|
mtk_model_normalize_object_scene_ps_20200519_f32.tflite;0.5;false
|
||||||
|
# end
|
|
@ -277,17 +277,17 @@ function Run_mindrt_parallel() {
|
||||||
|
|
||||||
data_path="/data/local/tmp/input_output/"
|
data_path="/data/local/tmp/input_output/"
|
||||||
output=${data_path}'output/'${model_name}'.ms.out'
|
output=${data_path}'output/'${model_name}'.ms.out'
|
||||||
input=${model_name}.ms.bin
|
input=${data_path}'input/'${model_name}'.ms.bin'
|
||||||
model=${model_name}'.ms'
|
model=${model_name}'.ms'
|
||||||
echo ${model_name} >> "${run_parallel_log_file}"
|
echo ${model_name} >> "${run_parallel_log_file}"
|
||||||
echo "run mindrt parallel test : ${model_name}"
|
echo "run mindrt parallel test : ${model_name}"
|
||||||
|
|
||||||
########## RUN CPU-GPU parallel
|
########## RUN CPU-CPU parallel
|
||||||
echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt
|
echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt
|
||||||
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test' > adb_run_cmd.txt
|
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test' >> adb_run_cmd.txt
|
||||||
|
|
||||||
echo './benchmark --enableParallel=true --device=GPU --enableFp16='${fp16}' --accuracyThreshold='${limit}' --modelFile='${model}' --inDataFile='${input}' --benchmarkDataFile='${output} >> adb_run_cmd.txt
|
echo './benchmark --enableParallel=true --enableFp16='${fp16}' --accuracyThreshold='${limit}' --modelFile='${model}' --inDataFile='${input}' --benchmarkDataFile='${output} >> adb_run_cmd.txt
|
||||||
echo './benchmark --enableParallel=true --device=GPU --enableFp16='${fp16}' --accuracyThreshold='${limit}' --modelFile='${model}' --inDataFile='${input}' --benchmarkDataFile='${output} >> "${run_parallel_log_file}"
|
echo './benchmark --enableParallel=true --enableFp16='${fp16}' --accuracyThreshold='${limit}' --modelFile='${model}' --inDataFile='${input}' --benchmarkDataFile='${output} >> "${run_parallel_log_file}"
|
||||||
|
|
||||||
adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_parallel_log_file}"
|
adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_parallel_log_file}"
|
||||||
if [ $? = 0 ]; then
|
if [ $? = 0 ]; then
|
||||||
|
@ -296,12 +296,12 @@ function Run_mindrt_parallel() {
|
||||||
run_result='mindrt_parallel_CPU_GPU: '${model_name}' failed'; echo ${run_result} >> ${run_parallel_result_file}; return 1
|
run_result='mindrt_parallel_CPU_GPU: '${model_name}' failed'; echo ${run_result} >> ${run_parallel_result_file}; return 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
########## RUN CPU-CPU parallel
|
########## RUN CPU-GPU parallel
|
||||||
echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt
|
echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt
|
||||||
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test' > adb_run_cmd.txt
|
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test' >> adb_run_cmd.txt
|
||||||
|
|
||||||
echo './benchmark --enableParallel=true --enableFp16='${fp16}' --accuracyThreshold='${limit}' --modelFile='${model}' --inDataFile='${input}' --benchmarkDataFile='${output} >> adb_run_cmd.txt
|
echo './benchmark --enableParallel=true --device=GPU --enableFp16='${fp16}' --accuracyThreshold='${limit}' --modelFile='${model}' --inDataFile='${input}' --benchmarkDataFile='${output} >> adb_run_cmd.txt
|
||||||
echo './benchmark --enableParallel=true --enableFp16='${fp16}' --accuracyThreshold='${limit}' --modelFile='${model}' --inDataFile='${input}' --benchmarkDataFile='${output} >> "${run_parallel_log_file}"
|
echo './benchmark --enableParallel=true --device=GPU --enableFp16='${fp16}' --accuracyThreshold='${limit}' --modelFile='${model}' --inDataFile='${input}' --benchmarkDataFile='${output} >> "${run_parallel_log_file}"
|
||||||
|
|
||||||
adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_parallel_log_file}"
|
adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_parallel_log_file}"
|
||||||
if [ $? = 0 ]; then
|
if [ $? = 0 ]; then
|
||||||
|
@ -486,7 +486,7 @@ if [[ $backend == "all" || $backend == "gpu" ]]; then
|
||||||
|
|
||||||
if [[ ${Run_mindrt_parallel_status} != 0 ]];then
|
if [[ ${Run_mindrt_parallel_status} != 0 ]];then
|
||||||
echo "Run_mindrt_parallel failed"
|
echo "Run_mindrt_parallel failed"
|
||||||
cat ${run_gpu_log_file}
|
cat ${run_parallel_log_file}
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "Run_parallel is ended"
|
echo "Run_parallel is ended"
|
||||||
|
|
Loading…
Reference in New Issue