feature, ThreadPool support unequal Task

This commit is contained in:
yangjie159 2021-06-04 21:12:38 +08:00
parent b0a1963aa2
commit a1062ab942
213 changed files with 421 additions and 324 deletions

View File

@ -35,6 +35,7 @@
#define MSMIN(x, y) ((x) < (y) ? (x) : (y))
#define MSMAX(x, y) ((x) > (y) ? (x) : (y))
#define MSCEIL(x) (int)((x) + (((x) - (int)(x)) > 0 ? 1 : 0))
#define UP_DIV(x, y) (((x) + (y) - (1)) / (y))
#define UP_ROUND(x, y) (((x) + (y) - (1)) / (y) * (y))

View File

@ -27,7 +27,6 @@
#include "thread/threadpool.h"
namespace mindspore {
#define MAX_PATH_SIZE (256)
enum Arch {
@ -215,8 +214,10 @@ int CoreAffinity::InitHardwareCoreInfo() {
core_num_ = std::thread::hardware_concurrency();
std::vector<CpuInfo> freq_set;
freq_set.resize(core_num_);
core_freq_.resize(core_num_);
for (size_t i = 0; i < core_num_; ++i) {
int max_freq = GetMaxFrequency(i);
core_freq_[i] = max_freq;
freq_set[i].core_id = i;
freq_set[i].max_freq = max_freq;
freq_set[i].arch = UnKnown_Arch;
@ -329,6 +330,7 @@ int CoreAffinity::BindThreadsToCoreList(const std::vector<Worker *> &workers) co
return THREAD_ERROR;
}
THREAD_INFO("set thread[%zu] affinity to core[%d] success", i, bind_id_[i % window]);
workers[i]->frequency = core_freq_[bind_id_[i]];
}
#endif // BIND_CORE
return THREAD_OK;

View File

@ -26,7 +26,6 @@
#endif
namespace mindspore {
enum BindMode {
Power_NoBind = 0, // free schedule
Power_Higher = 1,
@ -61,6 +60,9 @@ class CoreAffinity {
// sorted_id contains the ordered CPU core id
// the size of sorted_id is equal to the size of hardware_concurrency
std::vector<int> sorted_id_;
// used to store the frequency of core
// the core id corresponds to the index
std::vector<int> core_freq_;
size_t core_num_{0};
size_t higher_num_{0};
};

View File

@ -18,7 +18,6 @@
#include "thread/core_affinity.h"
namespace mindspore {
InterThreadPool::~InterThreadPool() {
{
THREAD_INFO("wait util actor queue is empty");
@ -31,6 +30,17 @@ InterThreadPool::~InterThreadPool() {
DestructThreads();
}
void InterThreadPool::ThreadAsyncRun(Worker *worker) {
THREAD_RETURN_IF_NULL(worker);
while (alive_) {
if (worker->type == kKernelThread) {
KernelThreadRun(worker);
} else if (worker->type == kActorThread) {
ActorThreadRun();
}
}
}
void InterThreadPool::ActorThreadRun() {
ActorReference actor;
{
@ -46,17 +56,6 @@ void InterThreadPool::ActorThreadRun() {
finish_cond_var_.notify_one();
}
void InterThreadPool::ThreadAsyncRun(Worker *worker) {
THREAD_RETURN_IF_NULL(worker);
while (alive_) {
if (worker->type == kKernelThread) {
KernelThreadRun(worker);
} else if (worker->type == kActorThread) {
ActorThreadRun();
}
}
}
void InterThreadPool::EnqueReadyActor(const ActorReference &actor) {
{
std::lock_guard<std::mutex> _l(actor_mutex_);

View File

@ -25,7 +25,6 @@
#include "actor/actor.h"
namespace mindspore {
class InterThreadPool : public ThreadPool {
public:
// create ThreadPool that contains inter thread and intra thread

View File

@ -0,0 +1,48 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CORE_MINDRT_RUNTIME_THREADPOOL_LOG_H_
#define MINDSPORE_CORE_MINDRT_RUNTIME_THREADPOOL_LOG_H_
namespace mindspore {
#ifdef THREAD_POOL_DEBUG
#include <stdio.h>
#define THREAD_INFO(content, args...) \
{ printf("[INFO] %s|%d: " #content "\r\n", __func__, __LINE__, ##args); }
#define THREAD_ERROR(content, args...) \
{ printf("[ERROR] %s|%d: " #content "\r\n", __func__, __LINE__, ##args); }
#else
#define THREAD_INFO(content, args...)
#define THREAD_ERROR(content, args...)
#endif
#define THREAD_ERROR_IF_NULL(ptr) \
do { \
if ((ptr) == nullptr) { \
return THREAD_ERROR; \
} \
} while (0)
#define THREAD_RETURN_IF_NULL(ptr) \
do { \
if ((ptr) == nullptr) { \
return; \
} \
} while (0)
enum ThreadRet { THREAD_OK = 0, THREAD_ERROR = 1 };
} // namespace mindspore
#endif // MINDSPORE_CORE_MINDRT_RUNTIME_THREADPOOL_LOG_H_

View File

@ -19,9 +19,10 @@
#include "thread/core_affinity.h"
namespace mindspore {
constexpr int kDefaultSpinCount = 300000;
float PartialScale(int partial, int total) { return (partial * 10.0 / total) / 10.0; }
ThreadPool::~ThreadPool() {
alive_.store(false);
DestructThreads();
@ -37,10 +38,8 @@ void ThreadPool::DestructThreads() {
worker = nullptr;
}
workers_.clear();
if (affinity_ != nullptr) {
delete affinity_;
affinity_ = nullptr;
}
delete affinity_;
affinity_ = nullptr;
THREAD_INFO("deconstruct threads success");
}
@ -65,11 +64,18 @@ int ThreadPool::CreateThreads(size_t thread_num) {
return THREAD_OK;
}
void ThreadPool::ThreadAsyncRun(Worker *worker) {
THREAD_RETURN_IF_NULL(worker);
while (alive_) {
KernelThreadRun(worker);
}
}
void ThreadPool::KernelThreadRun(Worker *worker) {
if (worker->active) {
Task *task = worker->task;
THREAD_RETURN_IF_NULL(task);
task->status |= task->func(task->content, ++task->task_id);
task->status |= task->func(task->content, worker->task_id, worker->lhs_scale, worker->rhs_scale);
{
std::lock_guard<std::mutex> _l(worker->mutex);
worker->task = nullptr;
@ -91,21 +97,18 @@ void ThreadPool::KernelThreadRun(Worker *worker) {
}
}
void ThreadPool::ThreadAsyncRun(Worker *worker) {
THREAD_RETURN_IF_NULL(worker);
while (alive_) {
KernelThreadRun(worker);
}
}
int ThreadPool::ParallelLaunch(const Func &func, Contend contend, int task_num) {
int ThreadPool::ParallelLaunch(const Func &func, Content content, int task_num) {
// distribute task to the KernelThread and the free ActorThread,
// if the task num is greater than the KernelThread num
Task task = Task(func, contend);
DistributeTask(&task, task_num);
task.status |= task.func(task.content, 0);
++task.finished;
Task task = Task(func, content);
Worker *curr = CurrentWorker();
if (inter_thread_num_ == thread_num_ || curr == nullptr) {
SyncRunTask(&task, task_num);
} else {
DistributeTask(&task, task_num);
task.status |= task.func(task.content, 0, curr->lhs_scale, curr->rhs_scale);
++task.finished;
}
// synchronization
// wait until the finished is equal to task_num
while (task.finished != task_num) {
@ -118,10 +121,28 @@ int ThreadPool::ParallelLaunch(const Func &func, Contend contend, int task_num)
return THREAD_OK;
}
void ThreadPool::SyncRunTask(Task *task, int task_num) const {
float per_scale = kMaxScale / task_num;
for (int i = 0; i < task_num; ++i) {
float lhs_scale = i * per_scale;
float rhs_scale = (i + 1) * per_scale;
rhs_scale = i == task_num - 1 ? kMaxScale : rhs_scale;
task->status |= task->func(task->content, i, lhs_scale, rhs_scale);
++task->finished;
}
}
void ThreadPool::DistributeTask(Task *task, int task_num) {
int count = 0;
int count = 1;
int sum_frequency = 0;
std::vector<Worker *> assigned;
Worker *curr = CurrentWorker();
THREAD_RETURN_IF_NULL(curr);
assigned.push_back(curr);
sum_frequency += curr->frequency;
Worker *worker;
while (count < task_num - 1) {
while (count < task_num) {
{
std::lock_guard<std::mutex> _l(pool_mutex_);
if (freelist_.empty()) {
@ -130,14 +151,46 @@ void ThreadPool::DistributeTask(Task *task, int task_num) {
worker = freelist_.back();
freelist_.pop_back();
}
{
std::lock_guard<std::mutex> _l(worker->mutex);
worker->task = task;
worker->active = true;
}
worker->cond_var.notify_one();
assigned.push_back(worker);
sum_frequency += worker->frequency;
count++;
}
CalculateScales(assigned, sum_frequency);
ActiveWorkers(assigned, task, task_num);
}
void ThreadPool::CalculateScales(const std::vector<Worker *> &assigned, int sum_frequency) const {
// Divide task according to computing power(core frequency)
float start = 0.;
for (const auto &worker : assigned) {
THREAD_RETURN_IF_NULL(worker);
worker->lhs_scale = start;
start += PartialScale(worker->frequency, sum_frequency);
start = start < 1 ? start : 1;
worker->rhs_scale = start;
}
}
void ThreadPool::ActiveWorkers(const std::vector<Worker *> &workers, Task *task, int task_num) const {
for (int i = 1; i < task_num; ++i) {
Worker *worker = workers[i];
THREAD_RETURN_IF_NULL(worker);
std::lock_guard<std::mutex> _l(worker->mutex);
worker->task = task;
worker->task_id = i;
worker->active = true;
worker->cond_var.notify_one();
}
}
Worker *ThreadPool::CurrentWorker() const {
for (const auto &worker : workers_) {
if (worker->thread.get_id() == std::this_thread::get_id()) {
return worker;
}
}
return nullptr;
}
int ThreadPool::InitAffinityInfo() {

View File

@ -24,46 +24,24 @@
#include <condition_variable>
#include <mutex>
#include <new>
#include "thread/threadlog.h"
#include "thread/core_affinity.h"
namespace mindspore {
constexpr int kDefaultFrequency = 1;
constexpr float kMaxScale = 1.;
#ifdef THREAD_POOL_DEBUG
#include <stdio.h>
#define THREAD_INFO(content, args...) \
{ printf("[INFO] %s|%d: " #content "\r\n", __func__, __LINE__, ##args); }
#define THREAD_ERROR(content, args...) \
{ printf("[ERROR] %s|%d: " #content "\r\n", __func__, __LINE__, ##args); }
#else
#define THREAD_INFO(content, ...)
#define THREAD_ERROR(content, ...)
#endif
#define THREAD_ERROR_IF_NULL(ptr) \
do { \
if ((ptr) == nullptr) { \
return THREAD_ERROR; \
} \
} while (0)
#define THREAD_RETURN_IF_NULL(ptr) \
do { \
if ((ptr) == nullptr) { \
return; \
} \
} while (0)
enum ThreadRet { THREAD_OK = 0, THREAD_ERROR = 1 };
enum ThreadType { kActorThread = 0, kKernelThread = 1 };
using Func = int (*)(void *arg, int);
using Contend = void *;
// used in scenarios with unequal division of task
// the parameters indicate the start and end coefficients
using Func = int (*)(void *, int, float, float);
using Content = void *;
typedef struct Task {
Task(Func f, Contend c) : func(f), content(c) {}
Task(Func f, Content c) : func(f), content(c) {}
Func func;
Contend content;
std::atomic_int task_id{0};
Content content;
std::atomic_int finished{0};
std::atomic_int status{THREAD_OK}; // return status, RET_OK
} Task;
@ -72,9 +50,13 @@ typedef struct Worker {
std::thread thread;
std::atomic_int type{kActorThread};
std::atomic_bool active{false};
Task *task{nullptr};
std::mutex mutex;
std::condition_variable cond_var;
Task *task{nullptr};
int task_id{0};
float lhs_scale{0.};
float rhs_scale{kMaxScale};
int frequency{kDefaultFrequency};
int spin{0};
} Worker;
@ -90,7 +72,7 @@ class ThreadPool {
int SetProcessAffinity(BindMode bind_mode) const;
int ParallelLaunch(const Func &func, Contend contend, int task_num);
int ParallelLaunch(const Func &func, Content content, int task_num);
protected:
ThreadPool() = default;
@ -103,7 +85,13 @@ class ThreadPool {
virtual void ThreadAsyncRun(Worker *worker);
void KernelThreadRun(Worker *worker);
void SyncRunTask(Task *task, int task_num) const;
void DistributeTask(Task *task, int task_num);
void CalculateScales(const std::vector<Worker *> &workers, int sum_frequency) const;
void ActiveWorkers(const std::vector<Worker *> &workers, Task *task, int task_num) const;
Worker *CurrentWorker() const;
std::mutex pool_mutex_;

View File

@ -47,7 +47,7 @@ int SetCoreAffinity(int bind_mode);
int GetCurrentThreadNum();
int ParallelLaunch(int (*func)(void *, int), void *content, int task_num);
int ParallelLaunch(int (*func)(void *, int, float, float), void *content, int task_num);
void ClearThreadPool();

View File

@ -184,9 +184,10 @@ int DeConvolutionFP32Coder::DoCode(CoderContext *const context) {
code.CodeBaseStruct("DeConvFp32Args", kRunArgs, packed_input_, packed_weight_, packed_bias_, packed_output_,
output_ptr_, tmp_buffer_, "&matmul_parameter", "&conv_parameter");
if (!support_parallel_) {
code.CodeFunction("DeConvFp32Run", kRunArgsAddr, kDefaultTaskId);
code.CodeFunction("DeConvFp32Run", kRunArgsAddr, kDefaultTaskId, kLhsScale, kRhsScale);
} else {
code.CodeFunction(kParallelLaunch, "DeConvFp32Run", kRunArgsAddr, "conv_parameter.thread_num_");
code.CodeFunction(kParallelLaunch, "DeConvFp32Run", kRunArgsAddr, "conv_parameter.thread_num_", kLhsScale,
kRhsScale);
}
}
context->AppendCode(code.str());

View File

@ -156,15 +156,15 @@ int AddInt8Coder::DoCode(CoderContext *const context) {
support_opt_add_, input0, input1, output_tensor_);
if (support_parallel_) {
if (arith_para_->broadcasting_) {
code.CodeFunction(kParallelLaunch, "AddBroadcastInt8Run", kRunArgsAddr, gThreadNum);
code.CodeFunction(kParallelLaunch, "AddBroadcastInt8Run", kRunArgsAddr, gThreadNum, kLhsScale, kRhsScale);
} else {
code.CodeFunction(kParallelLaunch, "AddInt8Run", kRunArgsAddr, gThreadNum);
code.CodeFunction(kParallelLaunch, "AddInt8Run", kRunArgsAddr, gThreadNum, kLhsScale, kRhsScale);
}
} else {
if (arith_para_->broadcasting_) {
code.CodeFunction("AddBroadcastInt8Run", kRunArgsAddr, kDefaultTaskId);
code.CodeFunction("AddBroadcastInt8Run", kRunArgsAddr, kDefaultTaskId, kLhsScale, kRhsScale);
} else {
code.CodeFunction("AddInt8Run", kRunArgsAddr, kDefaultTaskId);
code.CodeFunction("AddInt8Run", kRunArgsAddr, kDefaultTaskId, kLhsScale, kRhsScale);
}
}
context->AppendCode(code.str());

View File

@ -113,9 +113,9 @@ int ConcatInt8Coder::DoCode(CoderContext *const context) {
code.CodeBaseStruct<false>("ConcatInt8Args", kRunArgs, "input_data", output_tensor_, "&concat_param", axis_,
before_axis_size, count_unit_);
if (support_parallel_) {
code.CodeFunction(kParallelLaunch, "ConcatInt8Run", kRunArgsAddr, gThreadNum);
code.CodeFunction(kParallelLaunch, "ConcatInt8Run", kRunArgsAddr, gThreadNum, kLhsScale, kRhsScale);
} else {
code.CodeFunction("ConcatInt8Run", kRunArgsAddr, kDefaultTaskId);
code.CodeFunction("ConcatInt8Run", kRunArgsAddr, kDefaultTaskId, kLhsScale, kRhsScale);
}
context->AppendCode(code.str());
return RET_OK;

View File

@ -88,9 +88,9 @@ int Conv2D1x1Int8Coder::DoCode(CoderContext *const context) {
/* input transpose and input sum */
code << "if (GetSupportOptFlag()) {\n";
if (support_parallel_) {
code.CodeFunction(kParallelLaunch, "OcOptPre", kRunArgsAddr, "args.thread_count_hw");
code.CodeFunction(kParallelLaunch, "OcOptPre", kRunArgsAddr, "args.thread_count_hw", kLhsScale, kRhsScale);
} else {
code.CodeFunction("OcOptPre", kRunArgsAddr, kDefaultTaskId);
code.CodeFunction("OcOptPre", kRunArgsAddr, kDefaultTaskId, kLhsScale, kRhsScale);
}
code << "} else {\n";
code << "RowMajor2Row16x4MajorInt8(args.input_ptr_, args.packed_input_, args.matmul_param_->row_, "
@ -107,30 +107,30 @@ int Conv2D1x1Int8Coder::DoCode(CoderContext *const context) {
/* matmul parallel by oc */
code << "if (GetSupportOptFlag()) {\n";
if (support_parallel_) {
code.CodeFunction(kParallelLaunch, "RunArm64OptOc", kRunArgsAddr, "args.thread_count_oc");
code.CodeFunction(kParallelLaunch, "RunArm64OptOc", kRunArgsAddr, "args.thread_count_oc", kLhsScale, kRhsScale);
} else {
code.CodeFunction("RunArm64OptOc", kRunArgsAddr, kDefaultTaskId);
code.CodeFunction("RunArm64OptOc", kRunArgsAddr, kDefaultTaskId, kLhsScale, kRhsScale);
}
code << "} else {\n";
if (support_parallel_) {
code.CodeFunction(kParallelLaunch, "RunArmOc", kRunArgsAddr, "args.thread_count_oc");
code.CodeFunction(kParallelLaunch, "RunArmOc", kRunArgsAddr, "args.thread_count_oc", kLhsScale, kRhsScale);
} else {
code.CodeFunction("RunArmOc", kRunArgsAddr, kDefaultTaskId);
code.CodeFunction("RunArmOc", kRunArgsAddr, kDefaultTaskId, kLhsScale, kRhsScale);
}
code << "}\n";
code << "} else {\n";
/* matmul parallel by hw */
code << "if (GetSupportOptFlag()) {\n";
if (support_parallel_) {
code.CodeFunction(kParallelLaunch, "RunArm64OptHw", kRunArgsAddr, "args.thread_count_hw");
code.CodeFunction(kParallelLaunch, "RunArm64OptHw", kRunArgsAddr, "args.thread_count_hw, kLhsScale, kRhsScale");
} else {
code.CodeFunction("RunArm64OptHw", kRunArgsAddr, kDefaultTaskId);
code.CodeFunction("RunArm64OptHw", kRunArgsAddr, kDefaultTaskId, kLhsScale, kRhsScale);
}
code << "} else {\n";
if (support_parallel_) {
code.CodeFunction(kParallelLaunch, "RunArmHw", kRunArgsAddr, "args.thread_count_hw");
code.CodeFunction(kParallelLaunch, "RunArmHw", kRunArgsAddr, "args.thread_count_hw", kLhsScale, kRhsScale);
} else {
code.CodeFunction("RunArmHw", kRunArgsAddr, kDefaultTaskId);
code.CodeFunction("RunArmHw", kRunArgsAddr, kDefaultTaskId, kLhsScale, kRhsScale);
}
code << "}\n";
code << "}\n";

View File

@ -163,10 +163,11 @@ int Conv2D3x3Int8Coder::DoCode(CoderContext *const context) {
if (thread_num_ > 1) {
code.CodeBaseStruct("Conv3x3Int8Args", kRunArgs, c8_input_, transformed_filter_addr_, new_bias_addr_,
output_tensor_, tile_buffer_, block_unit_buffer_, tmp_dst_buffer_, tmp_out_, "&conv_param_");
code.CodeFunction(kParallelLaunch, "Conv3x3Int8Run", kRunArgsAddr, gThreadNum);
code.CodeFunction(kParallelLaunch, "Conv3x3Int8Run", kRunArgsAddr, gThreadNum, kLhsScale, kRhsScale);
} else {
code.CodeFunction("Conv3x3Int8", c8_input_, transformed_filter_addr_, new_bias_addr_, output_tensor_, tile_buffer_,
block_unit_buffer_, tmp_dst_buffer_, tmp_out_, kDefaultTaskId, "&conv_param_");
block_unit_buffer_, tmp_dst_buffer_, tmp_out_, kDefaultTaskId, "&conv_param_", kLhsScale,
kRhsScale);
}
code.CodeFunction("PackNC4HW4ToNHWCInt8", tmp_out_, output_tensor_, conv_param_->output_batch_,
conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_);

View File

@ -237,9 +237,9 @@ int Conv2DINT8Coder::DoCode(CoderContext *const context) {
}
if (support_parallel_) {
code.CodeFunction(kParallelLaunch, "ConvolutionInt8Run", kRunArgsAddr, gThreadNum);
code.CodeFunction(kParallelLaunch, "ConvolutionInt8Run", kRunArgsAddr, gThreadNum, kLhsScale, kRhsScale);
} else {
code.CodeFunction("ConvolutionInt8Run", kRunArgsAddr, kDefaultTaskId);
code.CodeFunction("ConvolutionInt8Run", kRunArgsAddr, kDefaultTaskId, kLhsScale, kRhsScale);
}
context->AppendCode(code.str());
return RET_OK;

View File

@ -122,9 +122,10 @@ int ConvolutionDepthwiseINT8Coder::DoCode(CoderContext *const context) {
code.CodeBaseStruct("ConvDepthwiseInt8Args", kRunArgs, output_tensor_, row_buffer_, input_tensor_, packed_weight_,
bias_data_, "&conv_param");
if (support_parallel_) {
code.CodeFunction(kParallelLaunch, "ConvDepthwiseInt8Run", kRunArgsAddr, "conv_param.thread_num_");
code.CodeFunction(kParallelLaunch, "ConvDepthwiseInt8Run", kRunArgsAddr, "conv_param.thread_num_", kLhsScale,
kRhsScale);
} else {
code.CodeFunction("ConvDepthwiseInt8Run", kRunArgsAddr, kDefaultTaskId);
code.CodeFunction("ConvDepthwiseInt8Run", kRunArgsAddr, kDefaultTaskId, kLhsScale, kRhsScale);
}
context->AppendCode(code.str());
return RET_OK;

View File

@ -92,9 +92,9 @@ int ResizeInt8Coder::DoCode(CoderContext *const context) {
code.CodeBaseStruct("ResizeInt8Args", kRunArgs, input_tensor_, output_tensor_, "input_shape", "output_shape",
align_corners, gThreadNum);
if (support_parallel_) {
code.CodeFunction(kParallelLaunch, "ResizeInt8Run", kRunArgsAddr, gThreadNum);
code.CodeFunction(kParallelLaunch, "ResizeInt8Run", kRunArgsAddr, gThreadNum, kLhsScale, kRhsScale);
} else {
code.CodeFunction("ResizeInt8Run", kRunArgsAddr, kDefaultTaskId);
code.CodeFunction("ResizeInt8Run", kRunArgsAddr, kDefaultTaskId, kLhsScale, kRhsScale);
}
} else {
MS_LOG(WARNING) << "unsupported parallel launch currently";

View File

@ -35,6 +35,8 @@ constexpr auto gThreadNum = "g_thread_num";
constexpr auto kRunArgs = "args";
constexpr auto kRunArgsAddr = "(void *)&args";
constexpr float kLhsScale = 0;
constexpr float kRhsScale = 1;
} // namespace mindspore::lite::micro
#endif // MINDSPORE_LITE_MICRO_CODER_OPCODERS_PARALLEL_H_

View File

@ -53,7 +53,7 @@ int DoDeconvFp32(const float *packed_input, const float *packed_weight, const fl
return NNACL_OK;
}
int DeConvFp32Run(void *cdata, int task_id) {
int DeConvFp32Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
DeConvFp32Args *args = (DeConvFp32Args *)cdata;
const MatMulParameter *matmul_param = args->matmul_param_;
const ConvParameter *conv_param = args->conv_param_;

View File

@ -40,7 +40,7 @@ int DoDeconvFp32(const float *packed_input, const float *packed_weight, const fl
float *output, float *tmp_ori_buffer, const MatMulParameter *matmul_param,
const ConvParameter *conv_param, int task_id);
int DeConvFp32Run(void *cdata, int task_id);
int DeConvFp32Run(void *cdata, int task_id, float lhs_scale, float rhs_scale);
#ifdef __cplusplus
}

View File

@ -17,7 +17,7 @@
#include "wrapper/int8/add_int8_wrapper.h"
#include "nnacl/errorcode.h"
int AddBroadcastInt8Run(void *cdata, int task_id) {
int AddBroadcastInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
AddInt8Args *args = (AddInt8Args *)(cdata);
int stride = UP_DIV(args->out_size_, args->thread_count_);
int real_out_count = MSMIN(stride, args->out_size_ - stride * task_id);
@ -42,7 +42,7 @@ int AddBroadcastInt8Run(void *cdata, int task_id) {
return NNACL_OK;
}
int AddInt8Run(void *cdata, int task_id) {
int AddInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
AddInt8Args *args = (AddInt8Args *)(cdata);
/* no need broadcast */
int stride = UP_DIV(args->elements_num_, args->thread_count_);

View File

@ -38,9 +38,9 @@ typedef struct {
int8_t *output_data_;
} AddInt8Args;
int AddBroadcastInt8Run(void *cdata, int task_id);
int AddBroadcastInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale);
int AddInt8Run(void *cdata, int task_id);
int AddInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale);
#ifdef __cplusplus
}

View File

@ -18,7 +18,7 @@
#include "nnacl/int8/batchnorm_int8.h"
#include "nnacl/errorcode.h"
int BatchNormInt8Run(void *cdata, int task_id) {
int BatchNormInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
BatchNormArgs *args = (BatchNormArgs *)(cdata);
BatchNormInt8(args->out_addr_, args->in_addr_, args->alpha_addr_, args->beta_addr_, task_id, args->batchnorm_param_);
return NNACL_OK;

View File

@ -27,6 +27,6 @@ typedef struct BatchNormArgs {
BatchNormParameter *batchnorm_param_;
} BatchNormArgs;
int BatchNormInt8Run(void *cdata, int task_id);
int BatchNormInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale);
#endif // MINDSPORE_LITE_MICRO_CODER_OPERATOR_LIBRARY_WRAPPER_INT8_BATCHNORM_INT8_WRAPPER_H_

View File

@ -16,7 +16,7 @@
#include "wrapper/int8/concat_int8_wrapper.h"
int ConcatInt8Run(void *cdata, int task_id) {
int ConcatInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
ConcatInt8Args *args = (ConcatInt8Args *)cdata;
int64_t real_dst_count = MSMIN(args->before_axis_size_ - task_id * args->count_unit_, args->count_unit_);
if (real_dst_count <= 0) {

View File

@ -30,6 +30,6 @@ typedef struct {
int64_t count_unit_;
} ConcatInt8Args;
int ConcatInt8Run(void *cdata, int task_id);
int ConcatInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale);
#endif // MINDSPORE_LITE_MICRO_INT8_CONCAT_WRAPPER_INT8_WRAPPER_H_

View File

@ -30,7 +30,7 @@ void Pre1x1Trans(Conv1x1Args *args, int8_t *src_input, int8_t *src_output) {
}
}
int OcOptPre(void *cdata, int task_id) {
int OcOptPre(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
Conv1x1Args *args = (Conv1x1Args *)(cdata);
int cur_stride = args->thread_stride_hw_ * C4NUM;
int res_stride = args->matmul_param_->row_ - task_id * args->thread_stride_hw_ * C4NUM;
@ -51,7 +51,7 @@ int OcOptPre(void *cdata, int task_id) {
return NNACL_OK;
}
int RunArm64OptOc(void *cdata, int task_id) {
int RunArm64OptOc(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
Conv1x1Args *args = (Conv1x1Args *)(cdata);
int stride = args->thread_stride_oc_ * C16NUM;
int cur_stride = task_id * stride;
@ -77,7 +77,7 @@ int RunArm64OptOc(void *cdata, int task_id) {
return NNACL_OK;
}
int RunArmOc(void *cdata, int task_id) {
int RunArmOc(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
Conv1x1Args *args = (Conv1x1Args *)(cdata);
#ifdef ENABLE_ARM32
int col_tile = C2NUM;
@ -108,7 +108,7 @@ int RunArmOc(void *cdata, int task_id) {
return NNACL_OK;
}
int RunArm64OptHw(void *cdata, int task_id) {
int RunArm64OptHw(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
Conv1x1Args *args = (Conv1x1Args *)(cdata);
int cur_stride = args->thread_stride_hw_ * C4NUM;
int res_stride = args->matmul_param_->row_ - task_id * args->thread_stride_hw_ * C4NUM;
@ -134,7 +134,7 @@ int RunArm64OptHw(void *cdata, int task_id) {
return NNACL_OK;
}
int RunArmHw(void *cdata, int task_id) {
int RunArmHw(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
Conv1x1Args *args = (Conv1x1Args *)(cdata);
int cur_stride = args->thread_stride_hw_ * C4NUM;
int res_stride = args->matmul_param_->row_ - task_id * args->thread_stride_hw_ * C4NUM;

View File

@ -48,10 +48,10 @@ typedef struct {
void Conv1x1PreRun(Conv1x1Args *args, int thread_num);
void Pre1x1Trans(Conv1x1Args *args, int8_t *src_input, int8_t *src_output);
int OcOptPre(void *cdata, int task_id);
int RunArm64OptOc(void *cdata, int task_id);
int RunArmOc(void *cdata, int task_id);
int RunArm64OptHw(void *cdata, int task_id);
int RunArmHw(void *cdata, int task_id);
int OcOptPre(void *cdata, int task_id, float lhs_scale, float rhs_scale);
int RunArm64OptOc(void *cdata, int task_id, float lhs_scale, float rhs_scale);
int RunArmOc(void *cdata, int task_id, float lhs_scale, float rhs_scale);
int RunArm64OptHw(void *cdata, int task_id, float lhs_scale, float rhs_scale);
int RunArmHw(void *cdata, int task_id, float lhs_scale, float rhs_scale);
#endif // MINDSPORE_LITE_MICRO_CODER_OPERATOR_LIBRARY_WRAPPER_INT8_CONV1X1_RUN_H_

View File

@ -16,7 +16,7 @@
#include "wrapper/int8/conv3x3_run_int8_wrapper.h"
int Conv3x3Int8Run(void *cdata, int task_id) {
int Conv3x3Int8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
Conv3x3Int8Args *args = (Conv3x3Int8Args *)cdata;
Conv3x3Int8(args->input_data, args->transed_weight, args->bias_data, args->output_data, args->tile_buffer,
args->block_unit_buffer, args->tmp_dst_buffer, args->tmp_out, task_id, args->conv_param);

View File

@ -33,6 +33,6 @@ typedef struct {
ConvParameter *conv_param;
} Conv3x3Int8Args;
int Conv3x3Int8Run(void *cdata, int task_id);
int Conv3x3Int8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale);
#endif // MINDSPORE_LITE_MICRO_INT8_CONV3x3_WRAPPER_INT8_WRAPPER_H_

View File

@ -16,7 +16,7 @@
#include "wrapper/int8/convolution_depthwise_int8_wrapper.h"
int ConvDepthwiseInt8Run(void *cdata, int task_id) {
int ConvDepthwiseInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
ConvDepthwiseInt8Args *args = (ConvDepthwiseInt8Args *)cdata;
int32_t *buffer = args->row_buffer_ + args->conv_param_->output_w_ * args->conv_param_->output_channel_ * task_id;
ConvDwInt8(args->output_data_, buffer, args->input_data_, args->weight_data_, args->bias_data_, args->conv_param_,

View File

@ -30,6 +30,6 @@ typedef struct {
const ConvParameter *conv_param_;
} ConvDepthwiseInt8Args;
int ConvDepthwiseInt8Run(void *cdata, int task_id);
int ConvDepthwiseInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale);
#endif // MINDSPORE_LITE_MICRO_INT8_CONVOLUTION_DEPTHWISE_WRAPPER_INT8_WRAPPER_H_

View File

@ -16,7 +16,7 @@
#include "wrapper/int8/convolution_int8_wrapper.h"
int ConvolutionInt8Run(void *cdata, int task_id) {
int ConvolutionInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
ConvolutionInt8Args *args = (ConvolutionInt8Args *)cdata;
ConvInt8(args->input_data_, args->packed_input_, args->matmul_input_, args->packed_weight_, args->bias_data_,
args->output_data_, args->filter_zp_, args->input_sum_, task_id, args->conv_param_, args->matmul_func_,

View File

@ -36,6 +36,6 @@ typedef struct {
bool is_optimize_;
} ConvolutionInt8Args;
int ConvolutionInt8Run(void *cdata, int task_id);
int ConvolutionInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale);
#endif // MINDSPORE_LITE_MICRO_INT8_CONVOLUTION_WRAPPER_INT8_WRAPPER_H_

View File

@ -17,7 +17,7 @@
#include "wrapper/int8/resize_int8_wrapper.h"
#include "nnacl/errorcode.h"
int ResizeInt8Run(void *cdata, int task_id) {
int ResizeInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
ResizeInt8Args *args = (ResizeInt8Args *)cdata;
ResizeNearestNeighborInt8Simple(args->input_data_, args->output_data_, args->input_shape_, args->output_shape_,
args->align_corners_, task_id, args->thread_num_);

View File

@ -32,7 +32,7 @@ typedef struct {
int thread_num_;
} ResizeInt8Args;
int ResizeInt8Run(void *cdata, int task_id);
int ResizeInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale);
#ifdef __cplusplus
}

View File

@ -17,7 +17,7 @@
#include "wrapper/int8/slice_int8_wrapper.h"
#include "nnacl/int8/slice_int8.h"
int SliceInt8Run(void *cdata, int task_id) {
int SliceInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
SliceArgs *args = (SliceArgs *)(cdata);
int ret = SliceInt8(args->input_data_, args->output_data_, args->param_, task_id);
return ret;

View File

@ -26,6 +26,6 @@ typedef struct SliceArgs {
SliceParameter *param_;
} SliceArgs;
int SliceInt8Run(void *cdata, int task_id);
int SliceInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale);
#endif // MINDSPORE_LITE_MICRO_CODER_OPERATOR_LIBRARY_WRAPPER_INT8_SLICE_INT8_WRAPPER_H_

View File

@ -45,7 +45,7 @@ int GetCurrentThreadNum() {
return g_pool->thread_num();
}
int ParallelLaunch(int (*func)(void *, int), void *content, int task_num) {
int ParallelLaunch(int (*func)(void *, int, float, float), void *content, int task_num) {
if (g_pool == nullptr) {
return mindspore::THREAD_ERROR;
}

View File

@ -25,7 +25,7 @@ using mindspore::lite::RET_OK;
using mindspore::schema::PrimitiveType_ConstantOfShape;
namespace mindspore::kernel {
int ConstantOfShapeRun(void *cdata, int task_id) {
int ConstantOfShapeRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto g_kernel = reinterpret_cast<ConstantOfShapeCPUKernel *>(cdata);
auto ret = g_kernel->DoExecute(task_id);
if (ret != RET_OK) {

View File

@ -88,7 +88,7 @@ DetectionPostProcessBaseCPUKernel::~DetectionPostProcessBaseCPUKernel() { delete
int DetectionPostProcessBaseCPUKernel::ReSize() { return RET_OK; }
int NmsMultiClassesFastCoreRun(void *cdata, int task_id) {
int NmsMultiClassesFastCoreRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto KernelData = reinterpret_cast<DetectionPostProcessBaseCPUKernel *>(cdata);
int ret = NmsMultiClassesFastCore(KernelData->num_boxes_, KernelData->num_classes_with_bg_, KernelData->input_scores_,
PartialArgSort, KernelData->params_, task_id, KernelData->thread_num_);

View File

@ -153,7 +153,7 @@ int PriorBoxCPUKernel::PriorBoxImpl(int task_id) {
return ret;
}
int RunPriorBox(void *cdata, int task_id) {
int RunPriorBox(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto prior_box = reinterpret_cast<PriorBoxCPUKernel *>(cdata);
auto error_code = prior_box->PriorBoxImpl(task_id);

View File

@ -125,7 +125,7 @@ int QuantDTypeCastCPUKernel::QuantDTypeCast(int task_id) {
return RET_OK;
}
int QuantDTypeCastRun(void *cdata, int task_id) {
int QuantDTypeCastRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto g_kernel = reinterpret_cast<QuantDTypeCastCPUKernel *>(cdata);
auto ret = g_kernel->QuantDTypeCast(task_id);
if (ret != RET_OK) {

View File

@ -53,7 +53,7 @@ int ReshapeBaseCPUKernel::RunImpl(int task_id) {
return RET_OK;
}
int ReshapeRun(void *cdata, int task_id) {
int ReshapeRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto reshape = reinterpret_cast<ReshapeBaseCPUKernel *>(cdata);
auto ret = reshape->RunImpl(task_id);
if (ret != RET_OK) {

View File

@ -24,7 +24,7 @@ using mindspore::lite::RET_OK;
using mindspore::schema::PrimitiveType_SliceFusion;
namespace mindspore::kernel {
int SliceLaunch(void *cdata, int task_id) {
int SliceLaunch(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
if (cdata == nullptr) {
MS_LOG(ERROR) << "Input cdata is nullptr!";
return RET_ERROR;

View File

@ -106,7 +106,7 @@ int SplitBaseCPUKernel::Split(int task_id) {
return RET_OK;
}
static int SplitRun(void *cdata, int task_id) {
static int SplitRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto g_kernel = reinterpret_cast<SplitBaseCPUKernel *>(cdata);
auto ret = g_kernel->Split(task_id);
if (ret != RET_OK) {

View File

@ -72,7 +72,7 @@ int SplitWithOverlapBaseCPUKernel::Split(int task_id) {
return RET_OK;
}
int SplitWithOverlapRun(void *cdata, int task_id) {
int SplitWithOverlapRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto g_kernel = reinterpret_cast<SplitWithOverlapBaseCPUKernel *>(cdata);
auto ret = g_kernel->Split(task_id);
if (ret != RET_OK) {

View File

@ -81,7 +81,7 @@ void StackBaseCPUKernel::Execute(int task_id) {
Stack(all_inputs_, output_data + input_num * start * copy_size_, input_num, copy_size_, start, end);
}
static int StackRun(void *cdata, int task_id) {
static int StackRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto stack = reinterpret_cast<StackBaseCPUKernel *>(cdata);
stack->Execute(task_id);
return RET_OK;

View File

@ -129,7 +129,7 @@ int StridedSliceCPUKernel::FastRunImpl(int task_id) {
return RET_OK;
}
int StrideRun(void *cdata, int task_id) {
int StrideRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto stride = reinterpret_cast<StridedSliceCPUKernel *>(cdata);
auto ret = stride->FastRunImpl(task_id);
if (ret != RET_OK) {

View File

@ -83,7 +83,7 @@ int TileCPUKernel::ReSize() {
return RET_OK;
}
int SimpleTile(void *cdata, int task_id) {
int SimpleTile(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto kernel = reinterpret_cast<TileCPUKernel *>(cdata);
auto ret = kernel->SimpleTileImpl(task_id);
if (ret != RET_OK) {

View File

@ -82,7 +82,7 @@ int ActivationFp16CPUKernel::DoActivation(int task_id) {
return error_code;
}
int ActivationFp16Run(void *cdata, int task_id) {
int ActivationFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto activation_kernel = reinterpret_cast<ActivationFp16CPUKernel *>(cdata);
auto error_code = activation_kernel->DoActivation(task_id);
if (error_code != RET_OK) {

View File

@ -145,7 +145,7 @@ int ArithmeticCompareFP16CPUKernel::DoArithmetic(int task_id) {
return ret;
}
static int ArithmeticsRunFp16(void *cdata, int task_id) {
static int ArithmeticsRunFp16(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto arithmetic_kernel = reinterpret_cast<ArithmeticCompareFP16CPUKernel *>(cdata);
auto ret = arithmetic_kernel->DoArithmetic(task_id);
if (ret != RET_OK) {

View File

@ -26,7 +26,7 @@ using mindspore::schema::PrimitiveType_Cast;
namespace mindspore::kernel {
namespace {
int CastFp16Run(void *cdata, int task_id) {
int CastFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
if (cdata == nullptr) {
MS_LOG(ERROR) << "input cdata is nullptr!";
return RET_ERROR;

View File

@ -189,7 +189,7 @@ int Convolution1x1FP16CPUKernel::RunHw(int task_id) {
return RET_OK;
}
static int Convolution1x1Fp16RunOc(void *cdata, int task_id) {
static int Convolution1x1Fp16RunOc(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto conv = reinterpret_cast<Convolution1x1FP16CPUKernel *>(cdata);
auto error_code = conv->RunOc(task_id);
if (error_code != RET_OK) {
@ -199,7 +199,7 @@ static int Convolution1x1Fp16RunOc(void *cdata, int task_id) {
return RET_OK;
}
static int Convolution1x1Fp16RunHw(void *cdata, int task_id) {
static int Convolution1x1Fp16RunHw(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto conv = reinterpret_cast<Convolution1x1FP16CPUKernel *>(cdata);
auto error_code = conv->RunHw(task_id);
if (error_code != RET_OK) {

View File

@ -92,7 +92,7 @@ int ConvolutionDepthwiseFp16CPUKernel::Execute(int task_id) {
return RET_OK;
}
static int ConvDwFp16Run(void *cdata, int task_id) {
static int ConvDwFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto conv_dw_fp16 = reinterpret_cast<ConvolutionDepthwiseFp16CPUKernel *>(cdata);
auto ret = conv_dw_fp16->Execute(task_id);
if (ret != RET_OK) {

View File

@ -121,7 +121,7 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Execute(int task_id) {
return RET_OK;
}
static int ConvDwSWFp16Run(void *cdata, int task_id) {
static int ConvDwSWFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto conv_dw_fp16 = reinterpret_cast<ConvolutionDepthwiseSWFp16CPUKernel *>(cdata);
auto ret = conv_dw_fp16->Execute(task_id);
if (ret != RET_OK) {

View File

@ -125,7 +125,7 @@ int ConvolutionFP16CPUKernel::RunImpl(int task_id) {
return RET_OK;
}
static int ConvolutionFp16Impl(void *cdata, int task_id) {
static int ConvolutionFp16Impl(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto conv = reinterpret_cast<ConvolutionFP16CPUKernel *>(cdata);
auto error_code = conv->RunImpl(task_id);
if (error_code != RET_OK) {

View File

@ -194,7 +194,7 @@ int ConvolutionWinogradFP16CPUKernel::RunImpl(int task_id) {
return RET_OK;
}
static int ConvolutionWinogradFp16Impl(void *cdata, int task_id) {
static int ConvolutionWinogradFp16Impl(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto conv = reinterpret_cast<ConvolutionWinogradFP16CPUKernel *>(cdata);
auto error_code = conv->RunImpl(task_id);
if (error_code != RET_OK) {

View File

@ -35,7 +35,7 @@ int CropFp16CPUKernel::DoExecute(int task_id) {
return RET_OK;
}
static int CropFp16Run(void *cdata, int task_id) {
static int CropFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto g_kernel = reinterpret_cast<CropFp16CPUKernel *>(cdata);
auto ret = g_kernel->DoExecute(task_id);
if (ret != RET_OK) {

View File

@ -132,7 +132,7 @@ int DeconvolutionDepthwiseFp16CPUKernel::Execute(int task_id) {
return RET_OK;
}
static int DeconvDwFp16Run(void *cdata, int task_id) {
static int DeconvDwFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto deconv_dw_fp16 = reinterpret_cast<DeconvolutionDepthwiseFp16CPUKernel *>(cdata);
auto ret = deconv_dw_fp16->Execute(task_id);
if (ret != RET_OK) {

View File

@ -149,7 +149,7 @@ void DeConvolutionFp16CPUKernel::FreeRunBuf() {
return;
}
static int DeConvFp16Run(void *cdata, int task_id) {
static int DeConvFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto deconv = reinterpret_cast<DeConvolutionFp16CPUKernel *>(cdata);
auto error_code = deconv->DoDeconv(task_id);
if (error_code != RET_OK) {

View File

@ -220,13 +220,13 @@ int DeConvWinogradFp16CPUKernel::DeDeconvPost(int task_id) {
return RET_OK;
}
int DeConvWgFp16Run(void *cdata, int task_id) {
int DeConvWgFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto deconvWg = reinterpret_cast<DeConvWinogradFp16CPUKernel *>(cdata);
deconvWg->DoDeconv(task_id);
return RET_OK;
}
int DeConvWgPostFp16Run(void *cdata, int task_id) {
int DeConvWgPostFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto deconvWg = reinterpret_cast<DeConvWinogradFp16CPUKernel *>(cdata);
deconvWg->DeDeconvPost(task_id);
return RET_OK;

View File

@ -121,7 +121,7 @@ int GatherFp16CPUKernel::DoGather(int task_id) {
return error_code;
}
int GatherRunFp16(void *cdata, int task_id) {
int GatherRunFp16(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto gather_kernel = reinterpret_cast<GatherFp16CPUKernel *>(cdata);
auto error_code = gather_kernel->DoGather(task_id);
if (error_code != RET_OK) {

View File

@ -96,7 +96,7 @@ int InstanceNormFp16CPUKernel::DoInstanceNorm(int task_id) {
return RET_OK;
}
int InstanceNormFp16Run(void *cdata, int task_id) {
int InstanceNormFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto kernel = reinterpret_cast<InstanceNormFp16CPUKernel *>(cdata);
auto ret = kernel->DoInstanceNorm(task_id);
if (ret != RET_OK) {

View File

@ -70,7 +70,7 @@ int LayerNormFp16CPUKernel::DoLayerNormFp16(int thread_id) {
return RET_OK;
}
int LayerNormFp16Run(void *cdata, int task_id) {
int LayerNormFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto kernel = reinterpret_cast<LayerNormFp16CPUKernel *>(cdata);
auto ret = kernel->DoLayerNormFp16(task_id);
if (ret != RET_OK) {

View File

@ -84,7 +84,7 @@ int LogSoftmaxFp16CPUKernel::DoLogSoftmaxLastAxis(int task_id) {
return RET_OK;
}
int LogSoftmaxLastAxisFp16Run(void *cdata, int task_id) {
int LogSoftmaxLastAxisFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto kernel = reinterpret_cast<LogSoftmaxFp16CPUKernel *>(cdata);
auto ret = kernel->DoLogSoftmaxLastAxis(task_id);
if (ret != RET_OK) {

View File

@ -25,7 +25,7 @@ using mindspore::lite::RET_MEMORY_FAILED;
using mindspore::lite::RET_OK;
namespace mindspore::kernel {
int MatmulBaseFP16Run(void *cdata, int task_id) {
int MatmulBaseFP16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto op = reinterpret_cast<MatmulBaseFP16CPUKernel *>(cdata);
auto error_code = op->RunImpl(task_id);
if (error_code != RET_OK) {

View File

@ -72,7 +72,7 @@ int PoolingFp16CPUKernel::RunImpl(int task_id) {
return RET_OK;
}
static int PoolingFp16Impl(void *cdata, int task_id) {
static int PoolingFp16Impl(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto pooling = reinterpret_cast<PoolingFp16CPUKernel *>(cdata);
auto error_code = pooling->RunImpl(task_id);
if (error_code != RET_OK) {

View File

@ -68,7 +68,7 @@ int PowerFp16CPUKernel::GetExpData() {
return RET_OK;
}
int PowerImplFp16(void *cdata, int task_id) {
int PowerImplFp16(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto kernel = reinterpret_cast<PowerFp16CPUKernel *>(cdata);
auto ret = kernel->RunImpl(task_id);
if (ret != RET_OK) {

View File

@ -131,7 +131,7 @@ int QuantDTypeCastFp16CPUKernel::QuantDTypeCast(int task_id) {
return RET_OK;
}
int QuantDTypeCastFP16Run(void *cdata, int task_id) {
int QuantDTypeCastFP16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto g_kernel = reinterpret_cast<QuantDTypeCastFp16CPUKernel *>(cdata);
auto ret = g_kernel->QuantDTypeCast(task_id);
if (ret != RET_OK) {

View File

@ -67,7 +67,7 @@ int ReduceFp16CPUKernel::CallReduceUnit(int task_id) {
return ret;
}
static int ReduceFp16Impl(void *cdata, int task_id) {
static int ReduceFp16Impl(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto reduce = reinterpret_cast<ReduceFp16CPUKernel *>(cdata);
auto error_code = reduce->CallReduceUnit(task_id);
if (error_code != RET_OK) {

View File

@ -84,7 +84,7 @@ int ScaleFp16CPUKernel::Scale(int task_id) {
return RET_OK;
}
int ScaleFp16Run(void *cdata, int task_id) {
int ScaleFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto scale = reinterpret_cast<ScaleFp16CPUKernel *>(cdata);
auto ret = scale->Scale(task_id);
if (ret != RET_OK) {

View File

@ -25,7 +25,7 @@ using mindspore::lite::RET_OK;
using mindspore::schema::PrimitiveType_SliceFusion;
namespace mindspore::kernel {
int SliceFp16Launch(void *cdata, int task_id) {
int SliceFp16Launch(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
if (cdata == nullptr) {
MS_LOG(ERROR) << "Input cdata is nullptr!";
return RET_ERROR;

View File

@ -84,7 +84,7 @@ int SoftmaxFp16CPUKernel::DoSoftmaxLastAxis(int task_id) {
return RET_OK;
}
int SoftmaxLastAxisFp16Run(void *cdata, int task_id) {
int SoftmaxLastAxisFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto kernel = reinterpret_cast<SoftmaxFp16CPUKernel *>(cdata);
auto ret = kernel->DoSoftmaxLastAxis(task_id);
if (ret != RET_OK) {

View File

@ -86,7 +86,7 @@ void StackFp16CPUKernel::Execute(int task_id) {
Stack(inputs, output + input_num * start * copy_size_, input_num, copy_size_, start, end);
}
static int StackRun(void *cdata, int task_id) {
static int StackRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto stack = reinterpret_cast<StackFp16CPUKernel *>(cdata);
stack->Execute(task_id);
return RET_OK;

View File

@ -66,7 +66,7 @@ int ActivationGradCPUKernelFp16::DoActivation(int task_id) {
return RET_OK;
}
int ActivationGradRunFp16(void *cdata, int task_id) {
int ActivationGradRunFp16(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
MS_ASSERT(cdata != nullptr);
auto activationGrad_kernel = reinterpret_cast<ActivationGradCPUKernelFp16 *>(cdata);
auto error_code = activationGrad_kernel->DoActivation(task_id);

View File

@ -60,7 +60,7 @@ int ArithmeticSelfGradFp16CPUKernel::DoActivation(int task_id) {
return RET_OK;
}
int ArithmeticSelfGradFp16Run(void *cdata, int task_id) {
int ArithmeticSelfGradFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
MS_ASSERT(cdata != nullptr);
auto activationGrad_kernel = reinterpret_cast<ArithmeticSelfGradFp16CPUKernel *>(cdata);
auto error_code = activationGrad_kernel->DoActivation(task_id);

View File

@ -93,7 +93,7 @@ int ActivationCPUKernel::DoActivation(int task_id) {
return ret;
}
int ActivationRun(void *cdata, int task_id) {
int ActivationRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto activation_kernel = reinterpret_cast<ActivationCPUKernel *>(cdata);
auto error_code = activation_kernel->DoActivation(task_id);
if (error_code != RET_OK) {

View File

@ -104,7 +104,7 @@ int AdderCPUKernel::RunImpl(int task_id) {
return RET_OK;
}
int AdderImpl(void *cdata, int task_id) {
int AdderImpl(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto adder = reinterpret_cast<AdderCPUKernel *>(cdata);
auto error_code = adder->RunImpl(task_id);
if (error_code != RET_OK) {

View File

@ -27,13 +27,13 @@ using mindspore::schema::PrimitiveType_AddN;
namespace mindspore::kernel {
namespace {
int AddNLaunch(void *cdata, int task_id) {
int AddNLaunch(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
if (cdata == nullptr) {
MS_LOG(ERROR) << "Input cdata is nullptr!";
return RET_NULL_PTR;
}
auto kernel = reinterpret_cast<AddNCPUKernel *>(cdata);
return kernel->AddNParallelRun(task_id);
return kernel->AddNParallelRun(task_id, lhs_scale, rhs_scale);
}
} // namespace
@ -41,7 +41,7 @@ int AddNCPUKernel::Init() { return RET_OK; }
int AddNCPUKernel::ReSize() { return RET_OK; }
int AddNCPUKernel::AddNParallelRun(int thread_id) {
int AddNCPUKernel::AddNParallelRun(int thread_id, float lhs_scale, float rhs_scale) {
int count_per_thread = UP_DIV(elements_num_, op_parameter_->thread_num_);
int count = MSMIN(count_per_thread, elements_num_ - thread_id * count_per_thread);
auto stride = count_per_thread * thread_id;

View File

@ -32,7 +32,7 @@ class AddNCPUKernel : public InnerKernel {
int Init() override;
int ReSize() override;
int Run() override;
int AddNParallelRun(int thread_id);
int AddNParallelRun(int thread_id, float lhs_scale, float rhs_scale);
private:
float *in1_addr_;

View File

@ -69,7 +69,7 @@ class ArithmeticCompareCPUKernel : public ArithmeticCPUKernel {
ArithmeticCompareFp32Func func_fp32_ = nullptr;
ArithmeticCompareIntFunc func_int32_ = nullptr;
};
int ArithmeticCompareRun(void *cdata, int task_id);
int ArithmeticCompareRun(void *cdata, int task_id, float lhs_scale, float rhs_scale);
} // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_ARITHMETIC_COMPARE_H_

View File

@ -398,7 +398,7 @@ int ArithmeticCPUKernel::DoArithmetic(int task_id) {
static_cast<uint8_t *>(output_ptr_) + offset, count, false);
}
int ArithmeticsRun(void *cdata, int task_id) {
int ArithmeticsRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto kernel = reinterpret_cast<ArithmeticCPUKernel *>(cdata);
auto ret = kernel->DoArithmetic(task_id);
if (ret != RET_OK) {

View File

@ -120,6 +120,6 @@ class ArithmeticCPUKernel : public InnerKernel {
ArithmeticOptIntRun arithmetic_opt_run_int_ = nullptr;
ArithmeticBoolRun arithmetic_run_bool_ = nullptr;
};
int ArithmeticsRun(void *cdata, int task_id);
int ArithmeticsRun(void *cdata, int task_id, float lhs_scale, float rhs_scale);
} // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_ARITHMETIC_FP32_H_

View File

@ -104,7 +104,7 @@ int ArithmeticSelfCPUKernel::DoExecute(int task_id) {
return ret;
}
int ArithmeticSelfRun(void *cdata, int task_id) {
int ArithmeticSelfRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto kernel = reinterpret_cast<ArithmeticSelfCPUKernel *>(cdata);
auto ret = kernel->DoExecute(task_id);
if (ret != RET_OK) {

View File

@ -58,7 +58,7 @@ class ArithmeticSelfCPUKernel : public InnerKernel {
ArithmeticSelfFunc func_;
ArithmeticSelfBoolFunc func_bool_;
};
int ArithmeticSelfRun(void *cdata, int task_id);
int ArithmeticSelfRun(void *cdata, int task_id, float lhs_scale, float rhs_scale);
} // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_ARITHMETIC_SELF_H_

View File

@ -89,7 +89,7 @@ int BatchnormCPUKernel::DoExecute(int task_id) {
return RET_OK;
}
int BatchNormRun(void *cdata, int task_id) {
int BatchNormRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto kernel = reinterpret_cast<BatchnormCPUKernel *>(cdata);
auto ret = kernel->DoExecute(task_id);
if (ret != RET_OK) {

View File

@ -50,7 +50,7 @@ class BatchnormCPUKernel : public InnerKernel {
float default_momentum_ = -1.0f;
};
int BatchNormRun(void *cdata, int task_id);
int BatchNormRun(void *cdata, int task_id, float lhs_scale, float rhs_scale);
} // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_BATCHNORM_FP32_H_

View File

@ -25,7 +25,7 @@ using mindspore::schema::PrimitiveType_Cast;
namespace mindspore::kernel {
namespace {
int CastRun(void *cdata, int task_id) {
int CastRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
if (cdata == nullptr) {
MS_LOG(ERROR) << "input cdata is nullptr!";
return RET_ERROR;

View File

@ -58,7 +58,7 @@ int ConcatCPUKernel::DoConcat(int task_id) {
return RET_OK;
}
int ConcatRun(void *cdata, int task_id) {
int ConcatRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto concat_kernel = reinterpret_cast<ConcatCPUKernel *>(cdata);
auto error_code = concat_kernel->DoConcat(task_id);
if (error_code != RET_OK) {

View File

@ -177,7 +177,7 @@ int Convolution1x1CPUKernel::DoConv1x1(int task_id) {
return RET_OK;
}
int Convolution1x1Run(void *cdata, int task_id) {
int Convolution1x1Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto conv1x1 = reinterpret_cast<Convolution1x1CPUKernel *>(cdata);
auto error_code = conv1x1->DoConv1x1(task_id);
if (error_code != RET_OK) {
@ -212,7 +212,7 @@ int Convolution1x1CPUKernel::DoConv1x1Hw(int task_id) {
return RET_OK;
}
int Convolution1x1RunHw(void *cdata, int task_id) {
int Convolution1x1RunHw(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto conv1x1 = reinterpret_cast<Convolution1x1CPUKernel *>(cdata);
auto error_code = conv1x1->DoConv1x1Hw(task_id);
if (error_code != RET_OK) {

View File

@ -95,7 +95,7 @@ int ConvolutionDepthwise3x3CPUKernel::Execute(int task_id) {
return RET_OK;
}
int ConvDw3x3Run(void *cdata, int task_id) {
int ConvDw3x3Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto conv_dw = reinterpret_cast<ConvolutionDepthwise3x3CPUKernel *>(cdata);
auto ret = conv_dw->Execute(task_id);
if (ret != RET_OK) {

View File

@ -85,7 +85,7 @@ int ConvolutionDepthwiseCPUKernel::Execute(int task_id) {
return RET_OK;
}
int ConvDwRun(void *cdata, int task_id) {
int ConvDwRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto conv_dw = reinterpret_cast<ConvolutionDepthwiseCPUKernel *>(cdata);
auto ret = conv_dw->Execute(task_id);
if (ret != RET_OK) {

View File

@ -133,7 +133,7 @@ int ConvolutionDepthwiseIndirectCPUKernel::Execute(int task_id) {
return RET_OK;
}
int ConvDwIndirectRun(void *cdata, int task_id) {
int ConvDwIndirectRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto conv_dw = reinterpret_cast<ConvolutionDepthwiseIndirectCPUKernel *>(cdata);
auto ret = conv_dw->Execute(task_id);
if (ret != RET_OK) {

View File

@ -123,7 +123,7 @@ int ConvolutionDepthwiseSWCPUKernel::Execute(int task_id) {
return RET_OK;
}
int ConvDwSWRun(void *cdata, int task_id) {
int ConvDwSWRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto conv_dw = reinterpret_cast<ConvolutionDepthwiseSWCPUKernel *>(cdata);
auto ret = conv_dw->Execute(task_id);
if (ret != RET_OK) {

View File

@ -126,7 +126,7 @@ int ConvolutionDepthwiseSWCPUKernelX86::Execute(int task_id) {
return RET_OK;
}
int ConvDwSWAvxRun(void *cdata, int task_id) {
int ConvDwSWAvxRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto conv_dw = reinterpret_cast<ConvolutionDepthwiseSWCPUKernelX86 *>(cdata);
auto ret = conv_dw->Execute(task_id);
if (ret != RET_OK) {

View File

@ -130,7 +130,7 @@ int ConvolutionCPUKernel::RunImpl(int task_id) {
return RET_OK;
}
int ConvolutionImpl(void *cdata, int task_id) {
int ConvolutionImpl(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto conv = reinterpret_cast<ConvolutionCPUKernel *>(cdata);
auto error_code = conv->RunImpl(task_id);
if (error_code != RET_OK) {

View File

@ -122,7 +122,7 @@ int ConvolutionSWCPUKernel::RunImpl(int task_id) {
return RET_OK;
}
int ConvolutionSWImpl(void *cdata, int task_id) {
int ConvolutionSWImpl(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto conv = reinterpret_cast<ConvolutionSWCPUKernel *>(cdata);
auto error_code = conv->RunImpl(task_id);
if (error_code != RET_OK) {

View File

@ -197,7 +197,7 @@ int ConvolutionWinogradCPUKernel::RunImpl(int task_id) {
return RET_OK;
}
int ConvolutionWinogradImpl(void *cdata, int task_id) {
int ConvolutionWinogradImpl(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto conv = reinterpret_cast<ConvolutionWinogradCPUKernel *>(cdata);
auto error_code = conv->RunImpl(task_id);
if (error_code != RET_OK) {

View File

@ -96,7 +96,7 @@ void CropAndResizeCPUKernel::FreeTmpBuffer() {
context_->allocator->Free(line_buffer_);
}
int CropAndResizeImpl(void *cdata, int task_id) {
int CropAndResizeImpl(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto resize = reinterpret_cast<CropAndResizeCPUKernel *>(cdata);
auto error_code = resize->RunImpl(task_id);
if (error_code != RET_OK) {

Some files were not shown because too many files have changed in this diff Show More