forked from mindspore-Ecosystem/mindspore
feature, ThreadPool support unequal Task
This commit is contained in:
parent
b0a1963aa2
commit
a1062ab942
|
@ -35,6 +35,7 @@
|
|||
|
||||
#define MSMIN(x, y) ((x) < (y) ? (x) : (y))
|
||||
#define MSMAX(x, y) ((x) > (y) ? (x) : (y))
|
||||
#define MSCEIL(x) (int)((x) + (((x) - (int)(x)) > 0 ? 1 : 0))
|
||||
|
||||
#define UP_DIV(x, y) (((x) + (y) - (1)) / (y))
|
||||
#define UP_ROUND(x, y) (((x) + (y) - (1)) / (y) * (y))
|
||||
|
|
|
@ -27,7 +27,6 @@
|
|||
#include "thread/threadpool.h"
|
||||
|
||||
namespace mindspore {
|
||||
|
||||
#define MAX_PATH_SIZE (256)
|
||||
|
||||
enum Arch {
|
||||
|
@ -215,8 +214,10 @@ int CoreAffinity::InitHardwareCoreInfo() {
|
|||
core_num_ = std::thread::hardware_concurrency();
|
||||
std::vector<CpuInfo> freq_set;
|
||||
freq_set.resize(core_num_);
|
||||
core_freq_.resize(core_num_);
|
||||
for (size_t i = 0; i < core_num_; ++i) {
|
||||
int max_freq = GetMaxFrequency(i);
|
||||
core_freq_[i] = max_freq;
|
||||
freq_set[i].core_id = i;
|
||||
freq_set[i].max_freq = max_freq;
|
||||
freq_set[i].arch = UnKnown_Arch;
|
||||
|
@ -329,6 +330,7 @@ int CoreAffinity::BindThreadsToCoreList(const std::vector<Worker *> &workers) co
|
|||
return THREAD_ERROR;
|
||||
}
|
||||
THREAD_INFO("set thread[%zu] affinity to core[%d] success", i, bind_id_[i % window]);
|
||||
workers[i]->frequency = core_freq_[bind_id_[i]];
|
||||
}
|
||||
#endif // BIND_CORE
|
||||
return THREAD_OK;
|
||||
|
|
|
@ -26,7 +26,6 @@
|
|||
#endif
|
||||
|
||||
namespace mindspore {
|
||||
|
||||
enum BindMode {
|
||||
Power_NoBind = 0, // free schedule
|
||||
Power_Higher = 1,
|
||||
|
@ -61,6 +60,9 @@ class CoreAffinity {
|
|||
// sorted_id contains the ordered CPU core id
|
||||
// the size of sorted_id is equal to the size of hardware_concurrency
|
||||
std::vector<int> sorted_id_;
|
||||
// used to store the frequency of core
|
||||
// the core id corresponds to the index
|
||||
std::vector<int> core_freq_;
|
||||
size_t core_num_{0};
|
||||
size_t higher_num_{0};
|
||||
};
|
||||
|
|
|
@ -18,7 +18,6 @@
|
|||
#include "thread/core_affinity.h"
|
||||
|
||||
namespace mindspore {
|
||||
|
||||
InterThreadPool::~InterThreadPool() {
|
||||
{
|
||||
THREAD_INFO("wait util actor queue is empty");
|
||||
|
@ -31,6 +30,17 @@ InterThreadPool::~InterThreadPool() {
|
|||
DestructThreads();
|
||||
}
|
||||
|
||||
void InterThreadPool::ThreadAsyncRun(Worker *worker) {
|
||||
THREAD_RETURN_IF_NULL(worker);
|
||||
while (alive_) {
|
||||
if (worker->type == kKernelThread) {
|
||||
KernelThreadRun(worker);
|
||||
} else if (worker->type == kActorThread) {
|
||||
ActorThreadRun();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void InterThreadPool::ActorThreadRun() {
|
||||
ActorReference actor;
|
||||
{
|
||||
|
@ -46,17 +56,6 @@ void InterThreadPool::ActorThreadRun() {
|
|||
finish_cond_var_.notify_one();
|
||||
}
|
||||
|
||||
void InterThreadPool::ThreadAsyncRun(Worker *worker) {
|
||||
THREAD_RETURN_IF_NULL(worker);
|
||||
while (alive_) {
|
||||
if (worker->type == kKernelThread) {
|
||||
KernelThreadRun(worker);
|
||||
} else if (worker->type == kActorThread) {
|
||||
ActorThreadRun();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void InterThreadPool::EnqueReadyActor(const ActorReference &actor) {
|
||||
{
|
||||
std::lock_guard<std::mutex> _l(actor_mutex_);
|
||||
|
|
|
@ -25,7 +25,6 @@
|
|||
#include "actor/actor.h"
|
||||
|
||||
namespace mindspore {
|
||||
|
||||
class InterThreadPool : public ThreadPool {
|
||||
public:
|
||||
// create ThreadPool that contains inter thread and intra thread
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CORE_MINDRT_RUNTIME_THREADPOOL_LOG_H_
|
||||
#define MINDSPORE_CORE_MINDRT_RUNTIME_THREADPOOL_LOG_H_
|
||||
|
||||
namespace mindspore {
|
||||
#ifdef THREAD_POOL_DEBUG
|
||||
#include <stdio.h>
|
||||
#define THREAD_INFO(content, args...) \
|
||||
{ printf("[INFO] %s|%d: " #content "\r\n", __func__, __LINE__, ##args); }
|
||||
#define THREAD_ERROR(content, args...) \
|
||||
{ printf("[ERROR] %s|%d: " #content "\r\n", __func__, __LINE__, ##args); }
|
||||
#else
|
||||
#define THREAD_INFO(content, args...)
|
||||
#define THREAD_ERROR(content, args...)
|
||||
#endif
|
||||
|
||||
#define THREAD_ERROR_IF_NULL(ptr) \
|
||||
do { \
|
||||
if ((ptr) == nullptr) { \
|
||||
return THREAD_ERROR; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define THREAD_RETURN_IF_NULL(ptr) \
|
||||
do { \
|
||||
if ((ptr) == nullptr) { \
|
||||
return; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
enum ThreadRet { THREAD_OK = 0, THREAD_ERROR = 1 };
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CORE_MINDRT_RUNTIME_THREADPOOL_LOG_H_
|
|
@ -19,9 +19,10 @@
|
|||
#include "thread/core_affinity.h"
|
||||
|
||||
namespace mindspore {
|
||||
|
||||
constexpr int kDefaultSpinCount = 300000;
|
||||
|
||||
float PartialScale(int partial, int total) { return (partial * 10.0 / total) / 10.0; }
|
||||
|
||||
ThreadPool::~ThreadPool() {
|
||||
alive_.store(false);
|
||||
DestructThreads();
|
||||
|
@ -37,10 +38,8 @@ void ThreadPool::DestructThreads() {
|
|||
worker = nullptr;
|
||||
}
|
||||
workers_.clear();
|
||||
if (affinity_ != nullptr) {
|
||||
delete affinity_;
|
||||
affinity_ = nullptr;
|
||||
}
|
||||
delete affinity_;
|
||||
affinity_ = nullptr;
|
||||
THREAD_INFO("deconstruct threads success");
|
||||
}
|
||||
|
||||
|
@ -65,11 +64,18 @@ int ThreadPool::CreateThreads(size_t thread_num) {
|
|||
return THREAD_OK;
|
||||
}
|
||||
|
||||
void ThreadPool::ThreadAsyncRun(Worker *worker) {
|
||||
THREAD_RETURN_IF_NULL(worker);
|
||||
while (alive_) {
|
||||
KernelThreadRun(worker);
|
||||
}
|
||||
}
|
||||
|
||||
void ThreadPool::KernelThreadRun(Worker *worker) {
|
||||
if (worker->active) {
|
||||
Task *task = worker->task;
|
||||
THREAD_RETURN_IF_NULL(task);
|
||||
task->status |= task->func(task->content, ++task->task_id);
|
||||
task->status |= task->func(task->content, worker->task_id, worker->lhs_scale, worker->rhs_scale);
|
||||
{
|
||||
std::lock_guard<std::mutex> _l(worker->mutex);
|
||||
worker->task = nullptr;
|
||||
|
@ -91,21 +97,18 @@ void ThreadPool::KernelThreadRun(Worker *worker) {
|
|||
}
|
||||
}
|
||||
|
||||
void ThreadPool::ThreadAsyncRun(Worker *worker) {
|
||||
THREAD_RETURN_IF_NULL(worker);
|
||||
while (alive_) {
|
||||
KernelThreadRun(worker);
|
||||
}
|
||||
}
|
||||
|
||||
int ThreadPool::ParallelLaunch(const Func &func, Contend contend, int task_num) {
|
||||
int ThreadPool::ParallelLaunch(const Func &func, Content content, int task_num) {
|
||||
// distribute task to the KernelThread and the free ActorThread,
|
||||
// if the task num is greater than the KernelThread num
|
||||
Task task = Task(func, contend);
|
||||
DistributeTask(&task, task_num);
|
||||
|
||||
task.status |= task.func(task.content, 0);
|
||||
++task.finished;
|
||||
Task task = Task(func, content);
|
||||
Worker *curr = CurrentWorker();
|
||||
if (inter_thread_num_ == thread_num_ || curr == nullptr) {
|
||||
SyncRunTask(&task, task_num);
|
||||
} else {
|
||||
DistributeTask(&task, task_num);
|
||||
task.status |= task.func(task.content, 0, curr->lhs_scale, curr->rhs_scale);
|
||||
++task.finished;
|
||||
}
|
||||
// synchronization
|
||||
// wait until the finished is equal to task_num
|
||||
while (task.finished != task_num) {
|
||||
|
@ -118,10 +121,28 @@ int ThreadPool::ParallelLaunch(const Func &func, Contend contend, int task_num)
|
|||
return THREAD_OK;
|
||||
}
|
||||
|
||||
void ThreadPool::SyncRunTask(Task *task, int task_num) const {
|
||||
float per_scale = kMaxScale / task_num;
|
||||
for (int i = 0; i < task_num; ++i) {
|
||||
float lhs_scale = i * per_scale;
|
||||
float rhs_scale = (i + 1) * per_scale;
|
||||
rhs_scale = i == task_num - 1 ? kMaxScale : rhs_scale;
|
||||
task->status |= task->func(task->content, i, lhs_scale, rhs_scale);
|
||||
++task->finished;
|
||||
}
|
||||
}
|
||||
|
||||
void ThreadPool::DistributeTask(Task *task, int task_num) {
|
||||
int count = 0;
|
||||
int count = 1;
|
||||
int sum_frequency = 0;
|
||||
std::vector<Worker *> assigned;
|
||||
Worker *curr = CurrentWorker();
|
||||
THREAD_RETURN_IF_NULL(curr);
|
||||
assigned.push_back(curr);
|
||||
sum_frequency += curr->frequency;
|
||||
|
||||
Worker *worker;
|
||||
while (count < task_num - 1) {
|
||||
while (count < task_num) {
|
||||
{
|
||||
std::lock_guard<std::mutex> _l(pool_mutex_);
|
||||
if (freelist_.empty()) {
|
||||
|
@ -130,14 +151,46 @@ void ThreadPool::DistributeTask(Task *task, int task_num) {
|
|||
worker = freelist_.back();
|
||||
freelist_.pop_back();
|
||||
}
|
||||
{
|
||||
std::lock_guard<std::mutex> _l(worker->mutex);
|
||||
worker->task = task;
|
||||
worker->active = true;
|
||||
}
|
||||
worker->cond_var.notify_one();
|
||||
assigned.push_back(worker);
|
||||
sum_frequency += worker->frequency;
|
||||
count++;
|
||||
}
|
||||
|
||||
CalculateScales(assigned, sum_frequency);
|
||||
ActiveWorkers(assigned, task, task_num);
|
||||
}
|
||||
|
||||
void ThreadPool::CalculateScales(const std::vector<Worker *> &assigned, int sum_frequency) const {
|
||||
// Divide task according to computing power(core frequency)
|
||||
float start = 0.;
|
||||
for (const auto &worker : assigned) {
|
||||
THREAD_RETURN_IF_NULL(worker);
|
||||
worker->lhs_scale = start;
|
||||
start += PartialScale(worker->frequency, sum_frequency);
|
||||
start = start < 1 ? start : 1;
|
||||
worker->rhs_scale = start;
|
||||
}
|
||||
}
|
||||
|
||||
void ThreadPool::ActiveWorkers(const std::vector<Worker *> &workers, Task *task, int task_num) const {
|
||||
for (int i = 1; i < task_num; ++i) {
|
||||
Worker *worker = workers[i];
|
||||
THREAD_RETURN_IF_NULL(worker);
|
||||
std::lock_guard<std::mutex> _l(worker->mutex);
|
||||
worker->task = task;
|
||||
worker->task_id = i;
|
||||
worker->active = true;
|
||||
worker->cond_var.notify_one();
|
||||
}
|
||||
}
|
||||
|
||||
Worker *ThreadPool::CurrentWorker() const {
|
||||
for (const auto &worker : workers_) {
|
||||
if (worker->thread.get_id() == std::this_thread::get_id()) {
|
||||
return worker;
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
int ThreadPool::InitAffinityInfo() {
|
||||
|
|
|
@ -24,46 +24,24 @@
|
|||
#include <condition_variable>
|
||||
#include <mutex>
|
||||
#include <new>
|
||||
#include "thread/threadlog.h"
|
||||
#include "thread/core_affinity.h"
|
||||
|
||||
namespace mindspore {
|
||||
constexpr int kDefaultFrequency = 1;
|
||||
constexpr float kMaxScale = 1.;
|
||||
|
||||
#ifdef THREAD_POOL_DEBUG
|
||||
#include <stdio.h>
|
||||
#define THREAD_INFO(content, args...) \
|
||||
{ printf("[INFO] %s|%d: " #content "\r\n", __func__, __LINE__, ##args); }
|
||||
#define THREAD_ERROR(content, args...) \
|
||||
{ printf("[ERROR] %s|%d: " #content "\r\n", __func__, __LINE__, ##args); }
|
||||
#else
|
||||
#define THREAD_INFO(content, ...)
|
||||
#define THREAD_ERROR(content, ...)
|
||||
#endif
|
||||
|
||||
#define THREAD_ERROR_IF_NULL(ptr) \
|
||||
do { \
|
||||
if ((ptr) == nullptr) { \
|
||||
return THREAD_ERROR; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define THREAD_RETURN_IF_NULL(ptr) \
|
||||
do { \
|
||||
if ((ptr) == nullptr) { \
|
||||
return; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
enum ThreadRet { THREAD_OK = 0, THREAD_ERROR = 1 };
|
||||
enum ThreadType { kActorThread = 0, kKernelThread = 1 };
|
||||
|
||||
using Func = int (*)(void *arg, int);
|
||||
using Contend = void *;
|
||||
// used in scenarios with unequal division of task
|
||||
// the parameters indicate the start and end coefficients
|
||||
using Func = int (*)(void *, int, float, float);
|
||||
using Content = void *;
|
||||
|
||||
typedef struct Task {
|
||||
Task(Func f, Contend c) : func(f), content(c) {}
|
||||
Task(Func f, Content c) : func(f), content(c) {}
|
||||
Func func;
|
||||
Contend content;
|
||||
std::atomic_int task_id{0};
|
||||
Content content;
|
||||
std::atomic_int finished{0};
|
||||
std::atomic_int status{THREAD_OK}; // return status, RET_OK
|
||||
} Task;
|
||||
|
@ -72,9 +50,13 @@ typedef struct Worker {
|
|||
std::thread thread;
|
||||
std::atomic_int type{kActorThread};
|
||||
std::atomic_bool active{false};
|
||||
Task *task{nullptr};
|
||||
std::mutex mutex;
|
||||
std::condition_variable cond_var;
|
||||
Task *task{nullptr};
|
||||
int task_id{0};
|
||||
float lhs_scale{0.};
|
||||
float rhs_scale{kMaxScale};
|
||||
int frequency{kDefaultFrequency};
|
||||
int spin{0};
|
||||
} Worker;
|
||||
|
||||
|
@ -90,7 +72,7 @@ class ThreadPool {
|
|||
|
||||
int SetProcessAffinity(BindMode bind_mode) const;
|
||||
|
||||
int ParallelLaunch(const Func &func, Contend contend, int task_num);
|
||||
int ParallelLaunch(const Func &func, Content content, int task_num);
|
||||
|
||||
protected:
|
||||
ThreadPool() = default;
|
||||
|
@ -103,7 +85,13 @@ class ThreadPool {
|
|||
virtual void ThreadAsyncRun(Worker *worker);
|
||||
void KernelThreadRun(Worker *worker);
|
||||
|
||||
void SyncRunTask(Task *task, int task_num) const;
|
||||
|
||||
void DistributeTask(Task *task, int task_num);
|
||||
void CalculateScales(const std::vector<Worker *> &workers, int sum_frequency) const;
|
||||
void ActiveWorkers(const std::vector<Worker *> &workers, Task *task, int task_num) const;
|
||||
|
||||
Worker *CurrentWorker() const;
|
||||
|
||||
std::mutex pool_mutex_;
|
||||
|
||||
|
|
|
@ -47,7 +47,7 @@ int SetCoreAffinity(int bind_mode);
|
|||
|
||||
int GetCurrentThreadNum();
|
||||
|
||||
int ParallelLaunch(int (*func)(void *, int), void *content, int task_num);
|
||||
int ParallelLaunch(int (*func)(void *, int, float, float), void *content, int task_num);
|
||||
|
||||
void ClearThreadPool();
|
||||
|
||||
|
|
|
@ -184,9 +184,10 @@ int DeConvolutionFP32Coder::DoCode(CoderContext *const context) {
|
|||
code.CodeBaseStruct("DeConvFp32Args", kRunArgs, packed_input_, packed_weight_, packed_bias_, packed_output_,
|
||||
output_ptr_, tmp_buffer_, "&matmul_parameter", "&conv_parameter");
|
||||
if (!support_parallel_) {
|
||||
code.CodeFunction("DeConvFp32Run", kRunArgsAddr, kDefaultTaskId);
|
||||
code.CodeFunction("DeConvFp32Run", kRunArgsAddr, kDefaultTaskId, kLhsScale, kRhsScale);
|
||||
} else {
|
||||
code.CodeFunction(kParallelLaunch, "DeConvFp32Run", kRunArgsAddr, "conv_parameter.thread_num_");
|
||||
code.CodeFunction(kParallelLaunch, "DeConvFp32Run", kRunArgsAddr, "conv_parameter.thread_num_", kLhsScale,
|
||||
kRhsScale);
|
||||
}
|
||||
}
|
||||
context->AppendCode(code.str());
|
||||
|
|
|
@ -156,15 +156,15 @@ int AddInt8Coder::DoCode(CoderContext *const context) {
|
|||
support_opt_add_, input0, input1, output_tensor_);
|
||||
if (support_parallel_) {
|
||||
if (arith_para_->broadcasting_) {
|
||||
code.CodeFunction(kParallelLaunch, "AddBroadcastInt8Run", kRunArgsAddr, gThreadNum);
|
||||
code.CodeFunction(kParallelLaunch, "AddBroadcastInt8Run", kRunArgsAddr, gThreadNum, kLhsScale, kRhsScale);
|
||||
} else {
|
||||
code.CodeFunction(kParallelLaunch, "AddInt8Run", kRunArgsAddr, gThreadNum);
|
||||
code.CodeFunction(kParallelLaunch, "AddInt8Run", kRunArgsAddr, gThreadNum, kLhsScale, kRhsScale);
|
||||
}
|
||||
} else {
|
||||
if (arith_para_->broadcasting_) {
|
||||
code.CodeFunction("AddBroadcastInt8Run", kRunArgsAddr, kDefaultTaskId);
|
||||
code.CodeFunction("AddBroadcastInt8Run", kRunArgsAddr, kDefaultTaskId, kLhsScale, kRhsScale);
|
||||
} else {
|
||||
code.CodeFunction("AddInt8Run", kRunArgsAddr, kDefaultTaskId);
|
||||
code.CodeFunction("AddInt8Run", kRunArgsAddr, kDefaultTaskId, kLhsScale, kRhsScale);
|
||||
}
|
||||
}
|
||||
context->AppendCode(code.str());
|
||||
|
|
|
@ -113,9 +113,9 @@ int ConcatInt8Coder::DoCode(CoderContext *const context) {
|
|||
code.CodeBaseStruct<false>("ConcatInt8Args", kRunArgs, "input_data", output_tensor_, "&concat_param", axis_,
|
||||
before_axis_size, count_unit_);
|
||||
if (support_parallel_) {
|
||||
code.CodeFunction(kParallelLaunch, "ConcatInt8Run", kRunArgsAddr, gThreadNum);
|
||||
code.CodeFunction(kParallelLaunch, "ConcatInt8Run", kRunArgsAddr, gThreadNum, kLhsScale, kRhsScale);
|
||||
} else {
|
||||
code.CodeFunction("ConcatInt8Run", kRunArgsAddr, kDefaultTaskId);
|
||||
code.CodeFunction("ConcatInt8Run", kRunArgsAddr, kDefaultTaskId, kLhsScale, kRhsScale);
|
||||
}
|
||||
context->AppendCode(code.str());
|
||||
return RET_OK;
|
||||
|
|
|
@ -88,9 +88,9 @@ int Conv2D1x1Int8Coder::DoCode(CoderContext *const context) {
|
|||
/* input transpose and input sum */
|
||||
code << "if (GetSupportOptFlag()) {\n";
|
||||
if (support_parallel_) {
|
||||
code.CodeFunction(kParallelLaunch, "OcOptPre", kRunArgsAddr, "args.thread_count_hw");
|
||||
code.CodeFunction(kParallelLaunch, "OcOptPre", kRunArgsAddr, "args.thread_count_hw", kLhsScale, kRhsScale);
|
||||
} else {
|
||||
code.CodeFunction("OcOptPre", kRunArgsAddr, kDefaultTaskId);
|
||||
code.CodeFunction("OcOptPre", kRunArgsAddr, kDefaultTaskId, kLhsScale, kRhsScale);
|
||||
}
|
||||
code << "} else {\n";
|
||||
code << "RowMajor2Row16x4MajorInt8(args.input_ptr_, args.packed_input_, args.matmul_param_->row_, "
|
||||
|
@ -107,30 +107,30 @@ int Conv2D1x1Int8Coder::DoCode(CoderContext *const context) {
|
|||
/* matmul parallel by oc */
|
||||
code << "if (GetSupportOptFlag()) {\n";
|
||||
if (support_parallel_) {
|
||||
code.CodeFunction(kParallelLaunch, "RunArm64OptOc", kRunArgsAddr, "args.thread_count_oc");
|
||||
code.CodeFunction(kParallelLaunch, "RunArm64OptOc", kRunArgsAddr, "args.thread_count_oc", kLhsScale, kRhsScale);
|
||||
} else {
|
||||
code.CodeFunction("RunArm64OptOc", kRunArgsAddr, kDefaultTaskId);
|
||||
code.CodeFunction("RunArm64OptOc", kRunArgsAddr, kDefaultTaskId, kLhsScale, kRhsScale);
|
||||
}
|
||||
code << "} else {\n";
|
||||
if (support_parallel_) {
|
||||
code.CodeFunction(kParallelLaunch, "RunArmOc", kRunArgsAddr, "args.thread_count_oc");
|
||||
code.CodeFunction(kParallelLaunch, "RunArmOc", kRunArgsAddr, "args.thread_count_oc", kLhsScale, kRhsScale);
|
||||
} else {
|
||||
code.CodeFunction("RunArmOc", kRunArgsAddr, kDefaultTaskId);
|
||||
code.CodeFunction("RunArmOc", kRunArgsAddr, kDefaultTaskId, kLhsScale, kRhsScale);
|
||||
}
|
||||
code << "}\n";
|
||||
code << "} else {\n";
|
||||
/* matmul parallel by hw */
|
||||
code << "if (GetSupportOptFlag()) {\n";
|
||||
if (support_parallel_) {
|
||||
code.CodeFunction(kParallelLaunch, "RunArm64OptHw", kRunArgsAddr, "args.thread_count_hw");
|
||||
code.CodeFunction(kParallelLaunch, "RunArm64OptHw", kRunArgsAddr, "args.thread_count_hw, kLhsScale, kRhsScale");
|
||||
} else {
|
||||
code.CodeFunction("RunArm64OptHw", kRunArgsAddr, kDefaultTaskId);
|
||||
code.CodeFunction("RunArm64OptHw", kRunArgsAddr, kDefaultTaskId, kLhsScale, kRhsScale);
|
||||
}
|
||||
code << "} else {\n";
|
||||
if (support_parallel_) {
|
||||
code.CodeFunction(kParallelLaunch, "RunArmHw", kRunArgsAddr, "args.thread_count_hw");
|
||||
code.CodeFunction(kParallelLaunch, "RunArmHw", kRunArgsAddr, "args.thread_count_hw", kLhsScale, kRhsScale);
|
||||
} else {
|
||||
code.CodeFunction("RunArmHw", kRunArgsAddr, kDefaultTaskId);
|
||||
code.CodeFunction("RunArmHw", kRunArgsAddr, kDefaultTaskId, kLhsScale, kRhsScale);
|
||||
}
|
||||
code << "}\n";
|
||||
code << "}\n";
|
||||
|
|
|
@ -163,10 +163,11 @@ int Conv2D3x3Int8Coder::DoCode(CoderContext *const context) {
|
|||
if (thread_num_ > 1) {
|
||||
code.CodeBaseStruct("Conv3x3Int8Args", kRunArgs, c8_input_, transformed_filter_addr_, new_bias_addr_,
|
||||
output_tensor_, tile_buffer_, block_unit_buffer_, tmp_dst_buffer_, tmp_out_, "&conv_param_");
|
||||
code.CodeFunction(kParallelLaunch, "Conv3x3Int8Run", kRunArgsAddr, gThreadNum);
|
||||
code.CodeFunction(kParallelLaunch, "Conv3x3Int8Run", kRunArgsAddr, gThreadNum, kLhsScale, kRhsScale);
|
||||
} else {
|
||||
code.CodeFunction("Conv3x3Int8", c8_input_, transformed_filter_addr_, new_bias_addr_, output_tensor_, tile_buffer_,
|
||||
block_unit_buffer_, tmp_dst_buffer_, tmp_out_, kDefaultTaskId, "&conv_param_");
|
||||
block_unit_buffer_, tmp_dst_buffer_, tmp_out_, kDefaultTaskId, "&conv_param_", kLhsScale,
|
||||
kRhsScale);
|
||||
}
|
||||
code.CodeFunction("PackNC4HW4ToNHWCInt8", tmp_out_, output_tensor_, conv_param_->output_batch_,
|
||||
conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_);
|
||||
|
|
|
@ -237,9 +237,9 @@ int Conv2DINT8Coder::DoCode(CoderContext *const context) {
|
|||
}
|
||||
|
||||
if (support_parallel_) {
|
||||
code.CodeFunction(kParallelLaunch, "ConvolutionInt8Run", kRunArgsAddr, gThreadNum);
|
||||
code.CodeFunction(kParallelLaunch, "ConvolutionInt8Run", kRunArgsAddr, gThreadNum, kLhsScale, kRhsScale);
|
||||
} else {
|
||||
code.CodeFunction("ConvolutionInt8Run", kRunArgsAddr, kDefaultTaskId);
|
||||
code.CodeFunction("ConvolutionInt8Run", kRunArgsAddr, kDefaultTaskId, kLhsScale, kRhsScale);
|
||||
}
|
||||
context->AppendCode(code.str());
|
||||
return RET_OK;
|
||||
|
|
|
@ -122,9 +122,10 @@ int ConvolutionDepthwiseINT8Coder::DoCode(CoderContext *const context) {
|
|||
code.CodeBaseStruct("ConvDepthwiseInt8Args", kRunArgs, output_tensor_, row_buffer_, input_tensor_, packed_weight_,
|
||||
bias_data_, "&conv_param");
|
||||
if (support_parallel_) {
|
||||
code.CodeFunction(kParallelLaunch, "ConvDepthwiseInt8Run", kRunArgsAddr, "conv_param.thread_num_");
|
||||
code.CodeFunction(kParallelLaunch, "ConvDepthwiseInt8Run", kRunArgsAddr, "conv_param.thread_num_", kLhsScale,
|
||||
kRhsScale);
|
||||
} else {
|
||||
code.CodeFunction("ConvDepthwiseInt8Run", kRunArgsAddr, kDefaultTaskId);
|
||||
code.CodeFunction("ConvDepthwiseInt8Run", kRunArgsAddr, kDefaultTaskId, kLhsScale, kRhsScale);
|
||||
}
|
||||
context->AppendCode(code.str());
|
||||
return RET_OK;
|
||||
|
|
|
@ -92,9 +92,9 @@ int ResizeInt8Coder::DoCode(CoderContext *const context) {
|
|||
code.CodeBaseStruct("ResizeInt8Args", kRunArgs, input_tensor_, output_tensor_, "input_shape", "output_shape",
|
||||
align_corners, gThreadNum);
|
||||
if (support_parallel_) {
|
||||
code.CodeFunction(kParallelLaunch, "ResizeInt8Run", kRunArgsAddr, gThreadNum);
|
||||
code.CodeFunction(kParallelLaunch, "ResizeInt8Run", kRunArgsAddr, gThreadNum, kLhsScale, kRhsScale);
|
||||
} else {
|
||||
code.CodeFunction("ResizeInt8Run", kRunArgsAddr, kDefaultTaskId);
|
||||
code.CodeFunction("ResizeInt8Run", kRunArgsAddr, kDefaultTaskId, kLhsScale, kRhsScale);
|
||||
}
|
||||
} else {
|
||||
MS_LOG(WARNING) << "unsupported parallel launch currently";
|
||||
|
|
|
@ -35,6 +35,8 @@ constexpr auto gThreadNum = "g_thread_num";
|
|||
constexpr auto kRunArgs = "args";
|
||||
constexpr auto kRunArgsAddr = "(void *)&args";
|
||||
|
||||
constexpr float kLhsScale = 0;
|
||||
constexpr float kRhsScale = 1;
|
||||
} // namespace mindspore::lite::micro
|
||||
|
||||
#endif // MINDSPORE_LITE_MICRO_CODER_OPCODERS_PARALLEL_H_
|
||||
|
|
|
@ -53,7 +53,7 @@ int DoDeconvFp32(const float *packed_input, const float *packed_weight, const fl
|
|||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int DeConvFp32Run(void *cdata, int task_id) {
|
||||
int DeConvFp32Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
DeConvFp32Args *args = (DeConvFp32Args *)cdata;
|
||||
const MatMulParameter *matmul_param = args->matmul_param_;
|
||||
const ConvParameter *conv_param = args->conv_param_;
|
||||
|
|
|
@ -40,7 +40,7 @@ int DoDeconvFp32(const float *packed_input, const float *packed_weight, const fl
|
|||
float *output, float *tmp_ori_buffer, const MatMulParameter *matmul_param,
|
||||
const ConvParameter *conv_param, int task_id);
|
||||
|
||||
int DeConvFp32Run(void *cdata, int task_id);
|
||||
int DeConvFp32Run(void *cdata, int task_id, float lhs_scale, float rhs_scale);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
#include "wrapper/int8/add_int8_wrapper.h"
|
||||
#include "nnacl/errorcode.h"
|
||||
|
||||
int AddBroadcastInt8Run(void *cdata, int task_id) {
|
||||
int AddBroadcastInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
AddInt8Args *args = (AddInt8Args *)(cdata);
|
||||
int stride = UP_DIV(args->out_size_, args->thread_count_);
|
||||
int real_out_count = MSMIN(stride, args->out_size_ - stride * task_id);
|
||||
|
@ -42,7 +42,7 @@ int AddBroadcastInt8Run(void *cdata, int task_id) {
|
|||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int AddInt8Run(void *cdata, int task_id) {
|
||||
int AddInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
AddInt8Args *args = (AddInt8Args *)(cdata);
|
||||
/* no need broadcast */
|
||||
int stride = UP_DIV(args->elements_num_, args->thread_count_);
|
||||
|
|
|
@ -38,9 +38,9 @@ typedef struct {
|
|||
int8_t *output_data_;
|
||||
} AddInt8Args;
|
||||
|
||||
int AddBroadcastInt8Run(void *cdata, int task_id);
|
||||
int AddBroadcastInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale);
|
||||
|
||||
int AddInt8Run(void *cdata, int task_id);
|
||||
int AddInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
#include "nnacl/int8/batchnorm_int8.h"
|
||||
#include "nnacl/errorcode.h"
|
||||
|
||||
int BatchNormInt8Run(void *cdata, int task_id) {
|
||||
int BatchNormInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
BatchNormArgs *args = (BatchNormArgs *)(cdata);
|
||||
BatchNormInt8(args->out_addr_, args->in_addr_, args->alpha_addr_, args->beta_addr_, task_id, args->batchnorm_param_);
|
||||
return NNACL_OK;
|
||||
|
|
|
@ -27,6 +27,6 @@ typedef struct BatchNormArgs {
|
|||
BatchNormParameter *batchnorm_param_;
|
||||
} BatchNormArgs;
|
||||
|
||||
int BatchNormInt8Run(void *cdata, int task_id);
|
||||
int BatchNormInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale);
|
||||
|
||||
#endif // MINDSPORE_LITE_MICRO_CODER_OPERATOR_LIBRARY_WRAPPER_INT8_BATCHNORM_INT8_WRAPPER_H_
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
|
||||
#include "wrapper/int8/concat_int8_wrapper.h"
|
||||
|
||||
int ConcatInt8Run(void *cdata, int task_id) {
|
||||
int ConcatInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
ConcatInt8Args *args = (ConcatInt8Args *)cdata;
|
||||
int64_t real_dst_count = MSMIN(args->before_axis_size_ - task_id * args->count_unit_, args->count_unit_);
|
||||
if (real_dst_count <= 0) {
|
||||
|
|
|
@ -30,6 +30,6 @@ typedef struct {
|
|||
int64_t count_unit_;
|
||||
} ConcatInt8Args;
|
||||
|
||||
int ConcatInt8Run(void *cdata, int task_id);
|
||||
int ConcatInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale);
|
||||
|
||||
#endif // MINDSPORE_LITE_MICRO_INT8_CONCAT_WRAPPER_INT8_WRAPPER_H_
|
||||
|
|
|
@ -30,7 +30,7 @@ void Pre1x1Trans(Conv1x1Args *args, int8_t *src_input, int8_t *src_output) {
|
|||
}
|
||||
}
|
||||
|
||||
int OcOptPre(void *cdata, int task_id) {
|
||||
int OcOptPre(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
Conv1x1Args *args = (Conv1x1Args *)(cdata);
|
||||
int cur_stride = args->thread_stride_hw_ * C4NUM;
|
||||
int res_stride = args->matmul_param_->row_ - task_id * args->thread_stride_hw_ * C4NUM;
|
||||
|
@ -51,7 +51,7 @@ int OcOptPre(void *cdata, int task_id) {
|
|||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int RunArm64OptOc(void *cdata, int task_id) {
|
||||
int RunArm64OptOc(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
Conv1x1Args *args = (Conv1x1Args *)(cdata);
|
||||
int stride = args->thread_stride_oc_ * C16NUM;
|
||||
int cur_stride = task_id * stride;
|
||||
|
@ -77,7 +77,7 @@ int RunArm64OptOc(void *cdata, int task_id) {
|
|||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int RunArmOc(void *cdata, int task_id) {
|
||||
int RunArmOc(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
Conv1x1Args *args = (Conv1x1Args *)(cdata);
|
||||
#ifdef ENABLE_ARM32
|
||||
int col_tile = C2NUM;
|
||||
|
@ -108,7 +108,7 @@ int RunArmOc(void *cdata, int task_id) {
|
|||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int RunArm64OptHw(void *cdata, int task_id) {
|
||||
int RunArm64OptHw(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
Conv1x1Args *args = (Conv1x1Args *)(cdata);
|
||||
int cur_stride = args->thread_stride_hw_ * C4NUM;
|
||||
int res_stride = args->matmul_param_->row_ - task_id * args->thread_stride_hw_ * C4NUM;
|
||||
|
@ -134,7 +134,7 @@ int RunArm64OptHw(void *cdata, int task_id) {
|
|||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int RunArmHw(void *cdata, int task_id) {
|
||||
int RunArmHw(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
Conv1x1Args *args = (Conv1x1Args *)(cdata);
|
||||
int cur_stride = args->thread_stride_hw_ * C4NUM;
|
||||
int res_stride = args->matmul_param_->row_ - task_id * args->thread_stride_hw_ * C4NUM;
|
||||
|
|
|
@ -48,10 +48,10 @@ typedef struct {
|
|||
|
||||
void Conv1x1PreRun(Conv1x1Args *args, int thread_num);
|
||||
void Pre1x1Trans(Conv1x1Args *args, int8_t *src_input, int8_t *src_output);
|
||||
int OcOptPre(void *cdata, int task_id);
|
||||
int RunArm64OptOc(void *cdata, int task_id);
|
||||
int RunArmOc(void *cdata, int task_id);
|
||||
int RunArm64OptHw(void *cdata, int task_id);
|
||||
int RunArmHw(void *cdata, int task_id);
|
||||
int OcOptPre(void *cdata, int task_id, float lhs_scale, float rhs_scale);
|
||||
int RunArm64OptOc(void *cdata, int task_id, float lhs_scale, float rhs_scale);
|
||||
int RunArmOc(void *cdata, int task_id, float lhs_scale, float rhs_scale);
|
||||
int RunArm64OptHw(void *cdata, int task_id, float lhs_scale, float rhs_scale);
|
||||
int RunArmHw(void *cdata, int task_id, float lhs_scale, float rhs_scale);
|
||||
|
||||
#endif // MINDSPORE_LITE_MICRO_CODER_OPERATOR_LIBRARY_WRAPPER_INT8_CONV1X1_RUN_H_
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
|
||||
#include "wrapper/int8/conv3x3_run_int8_wrapper.h"
|
||||
|
||||
int Conv3x3Int8Run(void *cdata, int task_id) {
|
||||
int Conv3x3Int8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
Conv3x3Int8Args *args = (Conv3x3Int8Args *)cdata;
|
||||
Conv3x3Int8(args->input_data, args->transed_weight, args->bias_data, args->output_data, args->tile_buffer,
|
||||
args->block_unit_buffer, args->tmp_dst_buffer, args->tmp_out, task_id, args->conv_param);
|
||||
|
|
|
@ -33,6 +33,6 @@ typedef struct {
|
|||
ConvParameter *conv_param;
|
||||
} Conv3x3Int8Args;
|
||||
|
||||
int Conv3x3Int8Run(void *cdata, int task_id);
|
||||
int Conv3x3Int8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale);
|
||||
|
||||
#endif // MINDSPORE_LITE_MICRO_INT8_CONV3x3_WRAPPER_INT8_WRAPPER_H_
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
|
||||
#include "wrapper/int8/convolution_depthwise_int8_wrapper.h"
|
||||
|
||||
int ConvDepthwiseInt8Run(void *cdata, int task_id) {
|
||||
int ConvDepthwiseInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
ConvDepthwiseInt8Args *args = (ConvDepthwiseInt8Args *)cdata;
|
||||
int32_t *buffer = args->row_buffer_ + args->conv_param_->output_w_ * args->conv_param_->output_channel_ * task_id;
|
||||
ConvDwInt8(args->output_data_, buffer, args->input_data_, args->weight_data_, args->bias_data_, args->conv_param_,
|
||||
|
|
|
@ -30,6 +30,6 @@ typedef struct {
|
|||
const ConvParameter *conv_param_;
|
||||
} ConvDepthwiseInt8Args;
|
||||
|
||||
int ConvDepthwiseInt8Run(void *cdata, int task_id);
|
||||
int ConvDepthwiseInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale);
|
||||
|
||||
#endif // MINDSPORE_LITE_MICRO_INT8_CONVOLUTION_DEPTHWISE_WRAPPER_INT8_WRAPPER_H_
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
|
||||
#include "wrapper/int8/convolution_int8_wrapper.h"
|
||||
|
||||
int ConvolutionInt8Run(void *cdata, int task_id) {
|
||||
int ConvolutionInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
ConvolutionInt8Args *args = (ConvolutionInt8Args *)cdata;
|
||||
ConvInt8(args->input_data_, args->packed_input_, args->matmul_input_, args->packed_weight_, args->bias_data_,
|
||||
args->output_data_, args->filter_zp_, args->input_sum_, task_id, args->conv_param_, args->matmul_func_,
|
||||
|
|
|
@ -36,6 +36,6 @@ typedef struct {
|
|||
bool is_optimize_;
|
||||
} ConvolutionInt8Args;
|
||||
|
||||
int ConvolutionInt8Run(void *cdata, int task_id);
|
||||
int ConvolutionInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale);
|
||||
|
||||
#endif // MINDSPORE_LITE_MICRO_INT8_CONVOLUTION_WRAPPER_INT8_WRAPPER_H_
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
#include "wrapper/int8/resize_int8_wrapper.h"
|
||||
#include "nnacl/errorcode.h"
|
||||
|
||||
int ResizeInt8Run(void *cdata, int task_id) {
|
||||
int ResizeInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
ResizeInt8Args *args = (ResizeInt8Args *)cdata;
|
||||
ResizeNearestNeighborInt8Simple(args->input_data_, args->output_data_, args->input_shape_, args->output_shape_,
|
||||
args->align_corners_, task_id, args->thread_num_);
|
||||
|
|
|
@ -32,7 +32,7 @@ typedef struct {
|
|||
int thread_num_;
|
||||
} ResizeInt8Args;
|
||||
|
||||
int ResizeInt8Run(void *cdata, int task_id);
|
||||
int ResizeInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
#include "wrapper/int8/slice_int8_wrapper.h"
|
||||
#include "nnacl/int8/slice_int8.h"
|
||||
|
||||
int SliceInt8Run(void *cdata, int task_id) {
|
||||
int SliceInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
SliceArgs *args = (SliceArgs *)(cdata);
|
||||
int ret = SliceInt8(args->input_data_, args->output_data_, args->param_, task_id);
|
||||
return ret;
|
||||
|
|
|
@ -26,6 +26,6 @@ typedef struct SliceArgs {
|
|||
SliceParameter *param_;
|
||||
} SliceArgs;
|
||||
|
||||
int SliceInt8Run(void *cdata, int task_id);
|
||||
int SliceInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale);
|
||||
|
||||
#endif // MINDSPORE_LITE_MICRO_CODER_OPERATOR_LIBRARY_WRAPPER_INT8_SLICE_INT8_WRAPPER_H_
|
||||
|
|
|
@ -45,7 +45,7 @@ int GetCurrentThreadNum() {
|
|||
return g_pool->thread_num();
|
||||
}
|
||||
|
||||
int ParallelLaunch(int (*func)(void *, int), void *content, int task_num) {
|
||||
int ParallelLaunch(int (*func)(void *, int, float, float), void *content, int task_num) {
|
||||
if (g_pool == nullptr) {
|
||||
return mindspore::THREAD_ERROR;
|
||||
}
|
||||
|
|
|
@ -25,7 +25,7 @@ using mindspore::lite::RET_OK;
|
|||
using mindspore::schema::PrimitiveType_ConstantOfShape;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
int ConstantOfShapeRun(void *cdata, int task_id) {
|
||||
int ConstantOfShapeRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto g_kernel = reinterpret_cast<ConstantOfShapeCPUKernel *>(cdata);
|
||||
auto ret = g_kernel->DoExecute(task_id);
|
||||
if (ret != RET_OK) {
|
||||
|
|
|
@ -88,7 +88,7 @@ DetectionPostProcessBaseCPUKernel::~DetectionPostProcessBaseCPUKernel() { delete
|
|||
|
||||
int DetectionPostProcessBaseCPUKernel::ReSize() { return RET_OK; }
|
||||
|
||||
int NmsMultiClassesFastCoreRun(void *cdata, int task_id) {
|
||||
int NmsMultiClassesFastCoreRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto KernelData = reinterpret_cast<DetectionPostProcessBaseCPUKernel *>(cdata);
|
||||
int ret = NmsMultiClassesFastCore(KernelData->num_boxes_, KernelData->num_classes_with_bg_, KernelData->input_scores_,
|
||||
PartialArgSort, KernelData->params_, task_id, KernelData->thread_num_);
|
||||
|
|
|
@ -153,7 +153,7 @@ int PriorBoxCPUKernel::PriorBoxImpl(int task_id) {
|
|||
return ret;
|
||||
}
|
||||
|
||||
int RunPriorBox(void *cdata, int task_id) {
|
||||
int RunPriorBox(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto prior_box = reinterpret_cast<PriorBoxCPUKernel *>(cdata);
|
||||
|
||||
auto error_code = prior_box->PriorBoxImpl(task_id);
|
||||
|
|
|
@ -125,7 +125,7 @@ int QuantDTypeCastCPUKernel::QuantDTypeCast(int task_id) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int QuantDTypeCastRun(void *cdata, int task_id) {
|
||||
int QuantDTypeCastRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto g_kernel = reinterpret_cast<QuantDTypeCastCPUKernel *>(cdata);
|
||||
auto ret = g_kernel->QuantDTypeCast(task_id);
|
||||
if (ret != RET_OK) {
|
||||
|
|
|
@ -53,7 +53,7 @@ int ReshapeBaseCPUKernel::RunImpl(int task_id) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int ReshapeRun(void *cdata, int task_id) {
|
||||
int ReshapeRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto reshape = reinterpret_cast<ReshapeBaseCPUKernel *>(cdata);
|
||||
auto ret = reshape->RunImpl(task_id);
|
||||
if (ret != RET_OK) {
|
||||
|
|
|
@ -24,7 +24,7 @@ using mindspore::lite::RET_OK;
|
|||
using mindspore::schema::PrimitiveType_SliceFusion;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
int SliceLaunch(void *cdata, int task_id) {
|
||||
int SliceLaunch(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
if (cdata == nullptr) {
|
||||
MS_LOG(ERROR) << "Input cdata is nullptr!";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -106,7 +106,7 @@ int SplitBaseCPUKernel::Split(int task_id) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
static int SplitRun(void *cdata, int task_id) {
|
||||
static int SplitRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto g_kernel = reinterpret_cast<SplitBaseCPUKernel *>(cdata);
|
||||
auto ret = g_kernel->Split(task_id);
|
||||
if (ret != RET_OK) {
|
||||
|
|
|
@ -72,7 +72,7 @@ int SplitWithOverlapBaseCPUKernel::Split(int task_id) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int SplitWithOverlapRun(void *cdata, int task_id) {
|
||||
int SplitWithOverlapRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto g_kernel = reinterpret_cast<SplitWithOverlapBaseCPUKernel *>(cdata);
|
||||
auto ret = g_kernel->Split(task_id);
|
||||
if (ret != RET_OK) {
|
||||
|
|
|
@ -81,7 +81,7 @@ void StackBaseCPUKernel::Execute(int task_id) {
|
|||
Stack(all_inputs_, output_data + input_num * start * copy_size_, input_num, copy_size_, start, end);
|
||||
}
|
||||
|
||||
static int StackRun(void *cdata, int task_id) {
|
||||
static int StackRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto stack = reinterpret_cast<StackBaseCPUKernel *>(cdata);
|
||||
stack->Execute(task_id);
|
||||
return RET_OK;
|
||||
|
|
|
@ -129,7 +129,7 @@ int StridedSliceCPUKernel::FastRunImpl(int task_id) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int StrideRun(void *cdata, int task_id) {
|
||||
int StrideRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto stride = reinterpret_cast<StridedSliceCPUKernel *>(cdata);
|
||||
auto ret = stride->FastRunImpl(task_id);
|
||||
if (ret != RET_OK) {
|
||||
|
|
|
@ -83,7 +83,7 @@ int TileCPUKernel::ReSize() {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int SimpleTile(void *cdata, int task_id) {
|
||||
int SimpleTile(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto kernel = reinterpret_cast<TileCPUKernel *>(cdata);
|
||||
auto ret = kernel->SimpleTileImpl(task_id);
|
||||
if (ret != RET_OK) {
|
||||
|
|
|
@ -82,7 +82,7 @@ int ActivationFp16CPUKernel::DoActivation(int task_id) {
|
|||
return error_code;
|
||||
}
|
||||
|
||||
int ActivationFp16Run(void *cdata, int task_id) {
|
||||
int ActivationFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto activation_kernel = reinterpret_cast<ActivationFp16CPUKernel *>(cdata);
|
||||
auto error_code = activation_kernel->DoActivation(task_id);
|
||||
if (error_code != RET_OK) {
|
||||
|
|
|
@ -145,7 +145,7 @@ int ArithmeticCompareFP16CPUKernel::DoArithmetic(int task_id) {
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int ArithmeticsRunFp16(void *cdata, int task_id) {
|
||||
static int ArithmeticsRunFp16(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto arithmetic_kernel = reinterpret_cast<ArithmeticCompareFP16CPUKernel *>(cdata);
|
||||
auto ret = arithmetic_kernel->DoArithmetic(task_id);
|
||||
if (ret != RET_OK) {
|
||||
|
|
|
@ -26,7 +26,7 @@ using mindspore::schema::PrimitiveType_Cast;
|
|||
|
||||
namespace mindspore::kernel {
|
||||
namespace {
|
||||
int CastFp16Run(void *cdata, int task_id) {
|
||||
int CastFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
if (cdata == nullptr) {
|
||||
MS_LOG(ERROR) << "input cdata is nullptr!";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -189,7 +189,7 @@ int Convolution1x1FP16CPUKernel::RunHw(int task_id) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
static int Convolution1x1Fp16RunOc(void *cdata, int task_id) {
|
||||
static int Convolution1x1Fp16RunOc(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto conv = reinterpret_cast<Convolution1x1FP16CPUKernel *>(cdata);
|
||||
auto error_code = conv->RunOc(task_id);
|
||||
if (error_code != RET_OK) {
|
||||
|
@ -199,7 +199,7 @@ static int Convolution1x1Fp16RunOc(void *cdata, int task_id) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
static int Convolution1x1Fp16RunHw(void *cdata, int task_id) {
|
||||
static int Convolution1x1Fp16RunHw(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto conv = reinterpret_cast<Convolution1x1FP16CPUKernel *>(cdata);
|
||||
auto error_code = conv->RunHw(task_id);
|
||||
if (error_code != RET_OK) {
|
||||
|
|
|
@ -92,7 +92,7 @@ int ConvolutionDepthwiseFp16CPUKernel::Execute(int task_id) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
static int ConvDwFp16Run(void *cdata, int task_id) {
|
||||
static int ConvDwFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto conv_dw_fp16 = reinterpret_cast<ConvolutionDepthwiseFp16CPUKernel *>(cdata);
|
||||
auto ret = conv_dw_fp16->Execute(task_id);
|
||||
if (ret != RET_OK) {
|
||||
|
|
|
@ -121,7 +121,7 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Execute(int task_id) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
static int ConvDwSWFp16Run(void *cdata, int task_id) {
|
||||
static int ConvDwSWFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto conv_dw_fp16 = reinterpret_cast<ConvolutionDepthwiseSWFp16CPUKernel *>(cdata);
|
||||
auto ret = conv_dw_fp16->Execute(task_id);
|
||||
if (ret != RET_OK) {
|
||||
|
|
|
@ -125,7 +125,7 @@ int ConvolutionFP16CPUKernel::RunImpl(int task_id) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
static int ConvolutionFp16Impl(void *cdata, int task_id) {
|
||||
static int ConvolutionFp16Impl(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto conv = reinterpret_cast<ConvolutionFP16CPUKernel *>(cdata);
|
||||
auto error_code = conv->RunImpl(task_id);
|
||||
if (error_code != RET_OK) {
|
||||
|
|
|
@ -194,7 +194,7 @@ int ConvolutionWinogradFP16CPUKernel::RunImpl(int task_id) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
static int ConvolutionWinogradFp16Impl(void *cdata, int task_id) {
|
||||
static int ConvolutionWinogradFp16Impl(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto conv = reinterpret_cast<ConvolutionWinogradFP16CPUKernel *>(cdata);
|
||||
auto error_code = conv->RunImpl(task_id);
|
||||
if (error_code != RET_OK) {
|
||||
|
|
|
@ -35,7 +35,7 @@ int CropFp16CPUKernel::DoExecute(int task_id) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
static int CropFp16Run(void *cdata, int task_id) {
|
||||
static int CropFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto g_kernel = reinterpret_cast<CropFp16CPUKernel *>(cdata);
|
||||
auto ret = g_kernel->DoExecute(task_id);
|
||||
if (ret != RET_OK) {
|
||||
|
|
|
@ -132,7 +132,7 @@ int DeconvolutionDepthwiseFp16CPUKernel::Execute(int task_id) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
static int DeconvDwFp16Run(void *cdata, int task_id) {
|
||||
static int DeconvDwFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto deconv_dw_fp16 = reinterpret_cast<DeconvolutionDepthwiseFp16CPUKernel *>(cdata);
|
||||
auto ret = deconv_dw_fp16->Execute(task_id);
|
||||
if (ret != RET_OK) {
|
||||
|
|
|
@ -149,7 +149,7 @@ void DeConvolutionFp16CPUKernel::FreeRunBuf() {
|
|||
return;
|
||||
}
|
||||
|
||||
static int DeConvFp16Run(void *cdata, int task_id) {
|
||||
static int DeConvFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto deconv = reinterpret_cast<DeConvolutionFp16CPUKernel *>(cdata);
|
||||
auto error_code = deconv->DoDeconv(task_id);
|
||||
if (error_code != RET_OK) {
|
||||
|
|
|
@ -220,13 +220,13 @@ int DeConvWinogradFp16CPUKernel::DeDeconvPost(int task_id) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int DeConvWgFp16Run(void *cdata, int task_id) {
|
||||
int DeConvWgFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto deconvWg = reinterpret_cast<DeConvWinogradFp16CPUKernel *>(cdata);
|
||||
deconvWg->DoDeconv(task_id);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int DeConvWgPostFp16Run(void *cdata, int task_id) {
|
||||
int DeConvWgPostFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto deconvWg = reinterpret_cast<DeConvWinogradFp16CPUKernel *>(cdata);
|
||||
deconvWg->DeDeconvPost(task_id);
|
||||
return RET_OK;
|
||||
|
|
|
@ -121,7 +121,7 @@ int GatherFp16CPUKernel::DoGather(int task_id) {
|
|||
return error_code;
|
||||
}
|
||||
|
||||
int GatherRunFp16(void *cdata, int task_id) {
|
||||
int GatherRunFp16(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto gather_kernel = reinterpret_cast<GatherFp16CPUKernel *>(cdata);
|
||||
auto error_code = gather_kernel->DoGather(task_id);
|
||||
if (error_code != RET_OK) {
|
||||
|
|
|
@ -96,7 +96,7 @@ int InstanceNormFp16CPUKernel::DoInstanceNorm(int task_id) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int InstanceNormFp16Run(void *cdata, int task_id) {
|
||||
int InstanceNormFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto kernel = reinterpret_cast<InstanceNormFp16CPUKernel *>(cdata);
|
||||
auto ret = kernel->DoInstanceNorm(task_id);
|
||||
if (ret != RET_OK) {
|
||||
|
|
|
@ -70,7 +70,7 @@ int LayerNormFp16CPUKernel::DoLayerNormFp16(int thread_id) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int LayerNormFp16Run(void *cdata, int task_id) {
|
||||
int LayerNormFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto kernel = reinterpret_cast<LayerNormFp16CPUKernel *>(cdata);
|
||||
auto ret = kernel->DoLayerNormFp16(task_id);
|
||||
if (ret != RET_OK) {
|
||||
|
|
|
@ -84,7 +84,7 @@ int LogSoftmaxFp16CPUKernel::DoLogSoftmaxLastAxis(int task_id) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int LogSoftmaxLastAxisFp16Run(void *cdata, int task_id) {
|
||||
int LogSoftmaxLastAxisFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto kernel = reinterpret_cast<LogSoftmaxFp16CPUKernel *>(cdata);
|
||||
auto ret = kernel->DoLogSoftmaxLastAxis(task_id);
|
||||
if (ret != RET_OK) {
|
||||
|
|
|
@ -25,7 +25,7 @@ using mindspore::lite::RET_MEMORY_FAILED;
|
|||
using mindspore::lite::RET_OK;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
int MatmulBaseFP16Run(void *cdata, int task_id) {
|
||||
int MatmulBaseFP16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto op = reinterpret_cast<MatmulBaseFP16CPUKernel *>(cdata);
|
||||
auto error_code = op->RunImpl(task_id);
|
||||
if (error_code != RET_OK) {
|
||||
|
|
|
@ -72,7 +72,7 @@ int PoolingFp16CPUKernel::RunImpl(int task_id) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
static int PoolingFp16Impl(void *cdata, int task_id) {
|
||||
static int PoolingFp16Impl(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto pooling = reinterpret_cast<PoolingFp16CPUKernel *>(cdata);
|
||||
auto error_code = pooling->RunImpl(task_id);
|
||||
if (error_code != RET_OK) {
|
||||
|
|
|
@ -68,7 +68,7 @@ int PowerFp16CPUKernel::GetExpData() {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int PowerImplFp16(void *cdata, int task_id) {
|
||||
int PowerImplFp16(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto kernel = reinterpret_cast<PowerFp16CPUKernel *>(cdata);
|
||||
auto ret = kernel->RunImpl(task_id);
|
||||
if (ret != RET_OK) {
|
||||
|
|
|
@ -131,7 +131,7 @@ int QuantDTypeCastFp16CPUKernel::QuantDTypeCast(int task_id) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int QuantDTypeCastFP16Run(void *cdata, int task_id) {
|
||||
int QuantDTypeCastFP16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto g_kernel = reinterpret_cast<QuantDTypeCastFp16CPUKernel *>(cdata);
|
||||
auto ret = g_kernel->QuantDTypeCast(task_id);
|
||||
if (ret != RET_OK) {
|
||||
|
|
|
@ -67,7 +67,7 @@ int ReduceFp16CPUKernel::CallReduceUnit(int task_id) {
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int ReduceFp16Impl(void *cdata, int task_id) {
|
||||
static int ReduceFp16Impl(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto reduce = reinterpret_cast<ReduceFp16CPUKernel *>(cdata);
|
||||
auto error_code = reduce->CallReduceUnit(task_id);
|
||||
if (error_code != RET_OK) {
|
||||
|
|
|
@ -84,7 +84,7 @@ int ScaleFp16CPUKernel::Scale(int task_id) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int ScaleFp16Run(void *cdata, int task_id) {
|
||||
int ScaleFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto scale = reinterpret_cast<ScaleFp16CPUKernel *>(cdata);
|
||||
auto ret = scale->Scale(task_id);
|
||||
if (ret != RET_OK) {
|
||||
|
|
|
@ -25,7 +25,7 @@ using mindspore::lite::RET_OK;
|
|||
using mindspore::schema::PrimitiveType_SliceFusion;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
int SliceFp16Launch(void *cdata, int task_id) {
|
||||
int SliceFp16Launch(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
if (cdata == nullptr) {
|
||||
MS_LOG(ERROR) << "Input cdata is nullptr!";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -84,7 +84,7 @@ int SoftmaxFp16CPUKernel::DoSoftmaxLastAxis(int task_id) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int SoftmaxLastAxisFp16Run(void *cdata, int task_id) {
|
||||
int SoftmaxLastAxisFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto kernel = reinterpret_cast<SoftmaxFp16CPUKernel *>(cdata);
|
||||
auto ret = kernel->DoSoftmaxLastAxis(task_id);
|
||||
if (ret != RET_OK) {
|
||||
|
|
|
@ -86,7 +86,7 @@ void StackFp16CPUKernel::Execute(int task_id) {
|
|||
Stack(inputs, output + input_num * start * copy_size_, input_num, copy_size_, start, end);
|
||||
}
|
||||
|
||||
static int StackRun(void *cdata, int task_id) {
|
||||
static int StackRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto stack = reinterpret_cast<StackFp16CPUKernel *>(cdata);
|
||||
stack->Execute(task_id);
|
||||
return RET_OK;
|
||||
|
|
|
@ -66,7 +66,7 @@ int ActivationGradCPUKernelFp16::DoActivation(int task_id) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int ActivationGradRunFp16(void *cdata, int task_id) {
|
||||
int ActivationGradRunFp16(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
MS_ASSERT(cdata != nullptr);
|
||||
auto activationGrad_kernel = reinterpret_cast<ActivationGradCPUKernelFp16 *>(cdata);
|
||||
auto error_code = activationGrad_kernel->DoActivation(task_id);
|
||||
|
|
|
@ -60,7 +60,7 @@ int ArithmeticSelfGradFp16CPUKernel::DoActivation(int task_id) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int ArithmeticSelfGradFp16Run(void *cdata, int task_id) {
|
||||
int ArithmeticSelfGradFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
MS_ASSERT(cdata != nullptr);
|
||||
auto activationGrad_kernel = reinterpret_cast<ArithmeticSelfGradFp16CPUKernel *>(cdata);
|
||||
auto error_code = activationGrad_kernel->DoActivation(task_id);
|
||||
|
|
|
@ -93,7 +93,7 @@ int ActivationCPUKernel::DoActivation(int task_id) {
|
|||
return ret;
|
||||
}
|
||||
|
||||
int ActivationRun(void *cdata, int task_id) {
|
||||
int ActivationRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto activation_kernel = reinterpret_cast<ActivationCPUKernel *>(cdata);
|
||||
auto error_code = activation_kernel->DoActivation(task_id);
|
||||
if (error_code != RET_OK) {
|
||||
|
|
|
@ -104,7 +104,7 @@ int AdderCPUKernel::RunImpl(int task_id) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int AdderImpl(void *cdata, int task_id) {
|
||||
int AdderImpl(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto adder = reinterpret_cast<AdderCPUKernel *>(cdata);
|
||||
auto error_code = adder->RunImpl(task_id);
|
||||
if (error_code != RET_OK) {
|
||||
|
|
|
@ -27,13 +27,13 @@ using mindspore::schema::PrimitiveType_AddN;
|
|||
|
||||
namespace mindspore::kernel {
|
||||
namespace {
|
||||
int AddNLaunch(void *cdata, int task_id) {
|
||||
int AddNLaunch(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
if (cdata == nullptr) {
|
||||
MS_LOG(ERROR) << "Input cdata is nullptr!";
|
||||
return RET_NULL_PTR;
|
||||
}
|
||||
auto kernel = reinterpret_cast<AddNCPUKernel *>(cdata);
|
||||
return kernel->AddNParallelRun(task_id);
|
||||
return kernel->AddNParallelRun(task_id, lhs_scale, rhs_scale);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
|
@ -41,7 +41,7 @@ int AddNCPUKernel::Init() { return RET_OK; }
|
|||
|
||||
int AddNCPUKernel::ReSize() { return RET_OK; }
|
||||
|
||||
int AddNCPUKernel::AddNParallelRun(int thread_id) {
|
||||
int AddNCPUKernel::AddNParallelRun(int thread_id, float lhs_scale, float rhs_scale) {
|
||||
int count_per_thread = UP_DIV(elements_num_, op_parameter_->thread_num_);
|
||||
int count = MSMIN(count_per_thread, elements_num_ - thread_id * count_per_thread);
|
||||
auto stride = count_per_thread * thread_id;
|
||||
|
|
|
@ -32,7 +32,7 @@ class AddNCPUKernel : public InnerKernel {
|
|||
int Init() override;
|
||||
int ReSize() override;
|
||||
int Run() override;
|
||||
int AddNParallelRun(int thread_id);
|
||||
int AddNParallelRun(int thread_id, float lhs_scale, float rhs_scale);
|
||||
|
||||
private:
|
||||
float *in1_addr_;
|
||||
|
|
|
@ -69,7 +69,7 @@ class ArithmeticCompareCPUKernel : public ArithmeticCPUKernel {
|
|||
ArithmeticCompareFp32Func func_fp32_ = nullptr;
|
||||
ArithmeticCompareIntFunc func_int32_ = nullptr;
|
||||
};
|
||||
int ArithmeticCompareRun(void *cdata, int task_id);
|
||||
int ArithmeticCompareRun(void *cdata, int task_id, float lhs_scale, float rhs_scale);
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_ARITHMETIC_COMPARE_H_
|
||||
|
|
|
@ -398,7 +398,7 @@ int ArithmeticCPUKernel::DoArithmetic(int task_id) {
|
|||
static_cast<uint8_t *>(output_ptr_) + offset, count, false);
|
||||
}
|
||||
|
||||
int ArithmeticsRun(void *cdata, int task_id) {
|
||||
int ArithmeticsRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto kernel = reinterpret_cast<ArithmeticCPUKernel *>(cdata);
|
||||
auto ret = kernel->DoArithmetic(task_id);
|
||||
if (ret != RET_OK) {
|
||||
|
|
|
@ -120,6 +120,6 @@ class ArithmeticCPUKernel : public InnerKernel {
|
|||
ArithmeticOptIntRun arithmetic_opt_run_int_ = nullptr;
|
||||
ArithmeticBoolRun arithmetic_run_bool_ = nullptr;
|
||||
};
|
||||
int ArithmeticsRun(void *cdata, int task_id);
|
||||
int ArithmeticsRun(void *cdata, int task_id, float lhs_scale, float rhs_scale);
|
||||
} // namespace mindspore::kernel
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_ARITHMETIC_FP32_H_
|
||||
|
|
|
@ -104,7 +104,7 @@ int ArithmeticSelfCPUKernel::DoExecute(int task_id) {
|
|||
return ret;
|
||||
}
|
||||
|
||||
int ArithmeticSelfRun(void *cdata, int task_id) {
|
||||
int ArithmeticSelfRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto kernel = reinterpret_cast<ArithmeticSelfCPUKernel *>(cdata);
|
||||
auto ret = kernel->DoExecute(task_id);
|
||||
if (ret != RET_OK) {
|
||||
|
|
|
@ -58,7 +58,7 @@ class ArithmeticSelfCPUKernel : public InnerKernel {
|
|||
ArithmeticSelfFunc func_;
|
||||
ArithmeticSelfBoolFunc func_bool_;
|
||||
};
|
||||
int ArithmeticSelfRun(void *cdata, int task_id);
|
||||
int ArithmeticSelfRun(void *cdata, int task_id, float lhs_scale, float rhs_scale);
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_ARITHMETIC_SELF_H_
|
||||
|
|
|
@ -89,7 +89,7 @@ int BatchnormCPUKernel::DoExecute(int task_id) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int BatchNormRun(void *cdata, int task_id) {
|
||||
int BatchNormRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto kernel = reinterpret_cast<BatchnormCPUKernel *>(cdata);
|
||||
auto ret = kernel->DoExecute(task_id);
|
||||
if (ret != RET_OK) {
|
||||
|
|
|
@ -50,7 +50,7 @@ class BatchnormCPUKernel : public InnerKernel {
|
|||
float default_momentum_ = -1.0f;
|
||||
};
|
||||
|
||||
int BatchNormRun(void *cdata, int task_id);
|
||||
int BatchNormRun(void *cdata, int task_id, float lhs_scale, float rhs_scale);
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_BATCHNORM_FP32_H_
|
||||
|
|
|
@ -25,7 +25,7 @@ using mindspore::schema::PrimitiveType_Cast;
|
|||
|
||||
namespace mindspore::kernel {
|
||||
namespace {
|
||||
int CastRun(void *cdata, int task_id) {
|
||||
int CastRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
if (cdata == nullptr) {
|
||||
MS_LOG(ERROR) << "input cdata is nullptr!";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -58,7 +58,7 @@ int ConcatCPUKernel::DoConcat(int task_id) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int ConcatRun(void *cdata, int task_id) {
|
||||
int ConcatRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto concat_kernel = reinterpret_cast<ConcatCPUKernel *>(cdata);
|
||||
auto error_code = concat_kernel->DoConcat(task_id);
|
||||
if (error_code != RET_OK) {
|
||||
|
|
|
@ -177,7 +177,7 @@ int Convolution1x1CPUKernel::DoConv1x1(int task_id) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int Convolution1x1Run(void *cdata, int task_id) {
|
||||
int Convolution1x1Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto conv1x1 = reinterpret_cast<Convolution1x1CPUKernel *>(cdata);
|
||||
auto error_code = conv1x1->DoConv1x1(task_id);
|
||||
if (error_code != RET_OK) {
|
||||
|
@ -212,7 +212,7 @@ int Convolution1x1CPUKernel::DoConv1x1Hw(int task_id) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int Convolution1x1RunHw(void *cdata, int task_id) {
|
||||
int Convolution1x1RunHw(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto conv1x1 = reinterpret_cast<Convolution1x1CPUKernel *>(cdata);
|
||||
auto error_code = conv1x1->DoConv1x1Hw(task_id);
|
||||
if (error_code != RET_OK) {
|
||||
|
|
|
@ -95,7 +95,7 @@ int ConvolutionDepthwise3x3CPUKernel::Execute(int task_id) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int ConvDw3x3Run(void *cdata, int task_id) {
|
||||
int ConvDw3x3Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto conv_dw = reinterpret_cast<ConvolutionDepthwise3x3CPUKernel *>(cdata);
|
||||
auto ret = conv_dw->Execute(task_id);
|
||||
if (ret != RET_OK) {
|
||||
|
|
|
@ -85,7 +85,7 @@ int ConvolutionDepthwiseCPUKernel::Execute(int task_id) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int ConvDwRun(void *cdata, int task_id) {
|
||||
int ConvDwRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto conv_dw = reinterpret_cast<ConvolutionDepthwiseCPUKernel *>(cdata);
|
||||
auto ret = conv_dw->Execute(task_id);
|
||||
if (ret != RET_OK) {
|
||||
|
|
|
@ -133,7 +133,7 @@ int ConvolutionDepthwiseIndirectCPUKernel::Execute(int task_id) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int ConvDwIndirectRun(void *cdata, int task_id) {
|
||||
int ConvDwIndirectRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto conv_dw = reinterpret_cast<ConvolutionDepthwiseIndirectCPUKernel *>(cdata);
|
||||
auto ret = conv_dw->Execute(task_id);
|
||||
if (ret != RET_OK) {
|
||||
|
|
|
@ -123,7 +123,7 @@ int ConvolutionDepthwiseSWCPUKernel::Execute(int task_id) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int ConvDwSWRun(void *cdata, int task_id) {
|
||||
int ConvDwSWRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto conv_dw = reinterpret_cast<ConvolutionDepthwiseSWCPUKernel *>(cdata);
|
||||
auto ret = conv_dw->Execute(task_id);
|
||||
if (ret != RET_OK) {
|
||||
|
|
|
@ -126,7 +126,7 @@ int ConvolutionDepthwiseSWCPUKernelX86::Execute(int task_id) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int ConvDwSWAvxRun(void *cdata, int task_id) {
|
||||
int ConvDwSWAvxRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto conv_dw = reinterpret_cast<ConvolutionDepthwiseSWCPUKernelX86 *>(cdata);
|
||||
auto ret = conv_dw->Execute(task_id);
|
||||
if (ret != RET_OK) {
|
||||
|
|
|
@ -130,7 +130,7 @@ int ConvolutionCPUKernel::RunImpl(int task_id) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int ConvolutionImpl(void *cdata, int task_id) {
|
||||
int ConvolutionImpl(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto conv = reinterpret_cast<ConvolutionCPUKernel *>(cdata);
|
||||
auto error_code = conv->RunImpl(task_id);
|
||||
if (error_code != RET_OK) {
|
||||
|
|
|
@ -122,7 +122,7 @@ int ConvolutionSWCPUKernel::RunImpl(int task_id) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int ConvolutionSWImpl(void *cdata, int task_id) {
|
||||
int ConvolutionSWImpl(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto conv = reinterpret_cast<ConvolutionSWCPUKernel *>(cdata);
|
||||
auto error_code = conv->RunImpl(task_id);
|
||||
if (error_code != RET_OK) {
|
||||
|
|
|
@ -197,7 +197,7 @@ int ConvolutionWinogradCPUKernel::RunImpl(int task_id) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int ConvolutionWinogradImpl(void *cdata, int task_id) {
|
||||
int ConvolutionWinogradImpl(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto conv = reinterpret_cast<ConvolutionWinogradCPUKernel *>(cdata);
|
||||
auto error_code = conv->RunImpl(task_id);
|
||||
if (error_code != RET_OK) {
|
||||
|
|
|
@ -96,7 +96,7 @@ void CropAndResizeCPUKernel::FreeTmpBuffer() {
|
|||
context_->allocator->Free(line_buffer_);
|
||||
}
|
||||
|
||||
int CropAndResizeImpl(void *cdata, int task_id) {
|
||||
int CropAndResizeImpl(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
auto resize = reinterpret_cast<CropAndResizeCPUKernel *>(cdata);
|
||||
auto error_code = resize->RunImpl(task_id);
|
||||
if (error_code != RET_OK) {
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue