!26853 Mindspore support aicpu ops compile.

Merge pull request !26853 from linqingke/aicpu
This commit is contained in:
i-robot 2021-12-03 01:21:01 +00:00 committed by Gitee
commit 973b8ffec9
22 changed files with 2379 additions and 0 deletions

View File

@ -13,6 +13,11 @@ set(FBS_FILES
)
ms_build_flatbuffers(FBS_FILES ${CMAKE_CURRENT_SOURCE_DIR}../../schema generated_fbs_files ${SERVER_FLATBUFFER_OUTPUT})
if(ENABLE_D)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/backend/kernel_compiler/aicpu/aicpu_ops)
add_subdirectory(backend/kernel_compiler/aicpu/aicpu_ops)
endif()
if(ENABLE_CPU)
if(${CMAKE_HOST_SYSTEM_PROCESSOR} MATCHES "aarch64")
set(PLATFORM_ARM64 "on")

View File

@ -18,6 +18,10 @@ if(ENABLE_D)
"rts/*.cc"
"hccl/*.cc"
)
file(GLOB_RECURSE AICPU_OPS_SRC RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"aicpu/aicpu_ops/*.cc"
)
list(REMOVE_ITEM D_SRC_LIST ${AICPU_OPS_SRC})
add_compile_definitions(ENABLE_D)
endif()

View File

@ -0,0 +1,64 @@
set(NORMAL_CMAKE_C_COMPILER ${CMAKE_C_COMPILER})
set(NORMAL_CMAKE_CXX_COMPILER ${CMAKE_CXX_COMPILER})
if(DEFINED ENV{ASCEND_CUSTOM_PATH})
set(TOOLCHAIN_PATH $ENV{ASCEND_CUSTOM_PATH}/toolkit/toolchain)
else()
set(TOOLCHAIN_PATH /usr/local/Ascend/toolkit/toolchain)
endif()
set(CMAKE_C_COMPILER ${TOOLCHAIN_PATH}/hcc/bin/aarch64-target-linux-gnu-gcc)
set(CMAKE_CXX_COMPILER ${TOOLCHAIN_PATH}/hcc/bin/aarch64-target-linux-gnu-g++)
if(EXISTS ${CMAKE_C_COMPILER} AND EXISTS ${CMAKE_CXX_COMPILER})
set(AICPU_PROTO_SRC
${CMAKE_CURRENT_SOURCE_DIR}/aicpu_op_proto/aicpu_tensor.proto
)
ms_protobuf_generate(PROTO_SRCS PROTO_HDRS ${AICPU_PROTO_SRC})
set(AICPU_SRC
${PROTO_SRCS}
${CMAKE_CURRENT_SOURCE_DIR}/common/kernel_base.cc
${CMAKE_CURRENT_SOURCE_DIR}/common/kernel_log.cc
${CMAKE_CURRENT_SOURCE_DIR}/aicpu_sharder/aicpu_async_event.cc
${CMAKE_CURRENT_SOURCE_DIR}/aicpu_sharder/aicpu_context.cc
${CMAKE_CURRENT_SOURCE_DIR}/aicpu_sharder/aicpu_pulse.cc
${CMAKE_CURRENT_SOURCE_DIR}/aicpu_sharder/aicpu_sharder.cc
${CMAKE_CURRENT_SOURCE_DIR}/random_choice_with_mask_kernels.cc
)
add_library(aicpu_kernels SHARED
${AICPU_SRC}
)
target_compile_options(aicpu_kernels PRIVATE
-march=armv8-a
-O2
-fvisibility-inlines-hidden
-fvisibility=hidden
-fno-strict-aliasing
-fno-common
)
target_link_libraries(aicpu_kernels PRIVATE
-ldl
-shared
PUBLIC
${SECUREC_LIBRARY}
-Wl,--whole-archive
-Wl,--no-whole-archive
-Wl,-Bsymbolic
-rdynamic
mindspore::protobuf
-pthread
)
set(INSTALL_LIBRARY_DIR lib)
install(TARGETS aicpu_kernels OPTIONAL
EXPORT aicpu_kernels-targets
LIBRARY DESTINATION ${INSTALL_LIBRARY_DIR}
)
endif()
set(CMAKE_C_COMPILER ${NORMAL_CMAKE_C_COMPILER})
set(CMAKE_CXX_COMPILER ${NORMAL_CMAKE_CXX_COMPILER})

View File

@ -0,0 +1,118 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
syntax = "proto3";
package aicpuops;
message AttrValue {
message ArrayValue {
repeated bytes s = 2; //"array(string)"
repeated int64 i = 3 [ packed = true ]; //"array(int)"
repeated float f = 4 [ packed = true ]; //"array(float)"
repeated bool b = 5 [ packed = true ]; //"array(bool)"
repeated int32 type = 6 [ packed = true ]; //"array(type)"
repeated TensorShape shape = 7; //"array(shape)"
repeated Tensor tensor = 8; //"array(tensor)"
}
oneof value {
ArrayValue array = 1;
bytes s = 2; //"string"
int64 i = 3; //"int"
float f = 4; //"float"
bool b = 5; //"bool"
int32 type = 6; //"type"
TensorShape shape = 7; //"shape"
Tensor tensor = 8; //"tensor"
}
}
message DynamicIdx {
int32 idx = 1;
int32 num = 2;
}
message NodeDef {
string op = 2;
map<string, AttrValue> attrs = 3;
repeated Tensor inputs = 4;
repeated Tensor outputs = 5;
map<string, DynamicIdx> dym_inputs = 6;
map<string, DynamicIdx> dym_outputs = 7;
}
message TensorShape {
// One dimension of the tensor.
message Dim {
// size must >=0
int64 size = 1;
};
// group dim info
repeated Dim dim = 2;
// If true, the number of dimensions in the shape is unknown.
// If true, "dim.size()" must be 0.
bool unknown_rank = 3;
// data format "NHWC" "NCHW" "NC1HWC0" OR "NONE"
int32 data_format = 4;
};
message Tensor {
// tensor shape info
TensorShape tensor_shape = 1;
// tensor content data type
int32 tensor_type = 2;
// tensor memory device
// data located memory device , "DDR" "HBM" OR "NONE"
string mem_device = 3;
string name = 4;
uint64 data_ptr = 5;
uint64 data_size = 6;
}
enum DataType {
MS_FLOAT32 = 0;
MS_FLOAT16 = 1;
MS_INT8 = 2;
MS_INT32 = 3;
MS_UINT8 = 4;
MS_INT16 = 6;
MS_UINT16 = 7;
MS_UINT32 = 8;
MS_INT64 = 9;
MS_UINT64 = 10;
MS_FLOAT64 = 11;
MS_BOOL = 12;
MS_STRING = 13;
MS_DUAL_SUB_INT8 = 14;
MS_DUAL_SUB_UINT8 = 15;
MS_COMPLEX64 = 16;
MS_COMPLEX128 = 17;
MS_QINT8 = 18;
MS_QINT16 = 19;
MS_QINT32 = 20;
MS_QUINT8 = 21;
MS_QUINT16 = 22;
MS_RESOURCE = 23;
MS_STRING_REF = 24;
MS_DUAL = 25;
MS_UNKNOWN = 26;
}

View File

@ -0,0 +1,137 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "aicpu_sharder/aicpu_async_event.h"
#include <string>
#include "common/kernel_log.h"
#include "aicpu_sharder/aicpu_context.h"
namespace aicpu {
AsyncEventManager &AsyncEventManager::GetInstance() {
static AsyncEventManager async_event_manager;
return async_event_manager;
}
void AsyncEventManager::Register(const NotifyFunc &notify) { notify_func_ = notify; }
void AsyncEventManager::NotifyWait(void *notify_param, const uint32_t param_len) {
if (notify_func_ != nullptr) {
notify_func_(notify_param, param_len);
}
}
bool AsyncEventManager::GenTaskInfoFromCtx(AsyncTaskInfo *task_info) {
if (task_info == nullptr) {
AICPU_LOGE("AsyncEventManager GenTaskInfoFromCtx failed, task_info is nullptr.");
return false;
}
(void)aicpu::GetTaskAndStreamId(&task_info->task_id, &task_info->stream_id);
std::string wait_id_value;
std::string ker_wait_id(aicpu::kContextKeyWaitId);
auto status = aicpu::GetThreadLocalCtx(ker_wait_id, &wait_id_value);
if (status != aicpu::AICPU_ERROR_NONE) {
AICPU_LOGE("GetThreadLocalCtx failed, ret=%d, key=%s.", status, ker_wait_id.c_str());
return false;
}
task_info->wait_id = atoi(wait_id_value.c_str());
std::string wait_type_value;
std::string key_wait_type(aicpu::kContextKeyWaitType);
status = aicpu::GetThreadLocalCtx(key_wait_type, &wait_type_value);
if (status != aicpu::AICPU_ERROR_NONE) {
AICPU_LOGE("GetThreadLocalCtx failed, ret=%d, key=%s.", status, key_wait_type.c_str());
return false;
}
task_info->wait_type = atoi(wait_type_value.c_str());
std::string start_tick_value;
std::string key_start_tick(aicpu::kContextKeyStartTick);
status = aicpu::GetThreadLocalCtx(key_start_tick, &start_tick_value);
if (status != aicpu::AICPU_ERROR_NONE) {
AICPU_LOGE("GetThreadLocalCtx failed, ret=%d, key=%s.", status, key_start_tick.c_str());
return false;
}
task_info->start_tick = atol(start_tick_value.c_str());
status = aicpu::GetOpname(aicpu::GetAicpuThreadIndex(), &task_info->op_name);
if (status != aicpu::AICPU_ERROR_NONE) {
AICPU_LOGE("GetOpname failed, ret=%d.", status);
return false;
}
return true;
}
bool AsyncEventManager::RegEventCb(const uint32_t event_id, const uint32_t sub_event_id,
const EventProcessCallBack &cb) {
if (cb == nullptr) {
AICPU_LOGE("AsyncEventManager RegEventCb failed, cb is nullptr.");
return false;
}
AsyncTaskInfo task_info;
task_info.task_cb = cb;
if (!GenTaskInfoFromCtx(&task_info)) {
AICPU_LOGE("AsyncEventManager GenTaskInfoFromCtx failed.");
return false;
}
AsyncEventInfo info;
info.event_id = event_id;
info.sub_event_id = sub_event_id;
{
std::unique_lock<std::mutex> lk(map_mutex_);
auto iter = asyncTaskMap_.find(info);
if (iter != asyncTaskMap_.end()) {
AICPU_LOGE("AsyncEventManager RegEventCb failed.");
return false;
}
asyncTaskMap_[info] = task_info;
}
AICPU_LOGI(
"AsyncEventManager RegEventCb success, event_id[%u], subeventId[%u], taskId[%lu],"
" streamId[%u], waitType[%u], waitId[%u], opName[%s], startTick[%lu].",
event_id, sub_event_id, task_info.task_id, task_info.stream_id, task_info.wait_type, task_info.wait_id,
task_info.op_name.c_str(), task_info.start_tick);
return true;
}
void AsyncEventManager::ProcessEvent(const uint32_t event_id, const uint32_t sub_event_id, void *param) {
AICPU_LOGI("AsyncEventManager proc event_id = %d, sub_event_id = %d", event_id, sub_event_id);
AsyncEventInfo info;
info.event_id = event_id;
info.sub_event_id = sub_event_id;
EventProcessCallBack taskCb = nullptr;
{
std::unique_lock<std::mutex> lk(map_mutex_);
auto iter = asyncTaskMap_.find(info);
if (iter == asyncTaskMap_.end()) {
AICPU_LOGW("AsyncEventManager no async task to deal with.");
return;
}
taskCb = iter->second.task_cb;
asyncTaskMap_.erase(iter);
}
if (taskCb != nullptr) {
taskCb(param);
}
AICPU_LOGI("AsyncEventManager proc end!");
return;
}
} // namespace aicpu
void AicpuNotifyWait(void *notify_param, const uint32_t param_len) {
aicpu::AsyncEventManager::GetInstance().NotifyWait(notify_param, param_len);
return;
}
bool AicpuRegEventCb(const uint32_t event_id, const uint32_t sub_event_id, const aicpu::EventProcessCallBack &cb) {
return aicpu::AsyncEventManager::GetInstance().RegEventCb(event_id, sub_event_id, cb);
}

View File

@ -0,0 +1,144 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef AICPU_OPS_AICPU_ASYNC_EVENT_H_
#define AICPU_OPS_AICPU_ASYNC_EVENT_H_
#include <functional>
#include <vector>
#include <string>
#include <map>
#include <mutex>
#include "aicpu_sharder/aicpu_context.h"
namespace aicpu {
using NotifyFunc = std::function<void(void *param, const uint32_t param_len)>;
using EventProcessCallBack = std::function<void(void *param)>;
struct AsyncEventInfo {
uint32_t event_id;
uint32_t sub_event_id;
bool operator==(const AsyncEventInfo &info) {
return (event_id == info.event_id) && (sub_event_id == info.sub_event_id);
}
};
inline bool operator<(const AsyncEventInfo &info1, const AsyncEventInfo &info2) {
return (info1.event_id < info2.event_id) ||
((info1.event_id == info2.event_id) && (info1.sub_event_id < info2.sub_event_id));
}
struct AsyncTaskInfo {
uint64_t start_tick;
std::string op_name;
uint8_t wait_type;
uint32_t wait_id;
uint64_t task_id;
uint32_t stream_id;
EventProcessCallBack task_cb;
};
struct AsyncNotifyInfo {
uint8_t wait_type;
uint32_t wait_id;
uint64_t task_id;
uint32_t stream_id;
uint32_t ret_code;
aicpu::aicpuContext_t ctx;
};
class AsyncEventManager {
public:
/**
* Get the unique object of this class
*/
static AsyncEventManager &GetInstance();
/**
* Register notify callback function
* @param notify wait notify callback function
*/
void Register(const NotifyFunc &notify);
/**
* Notify wait task
* @param notify_param notify param info
* @param param_len notify_param len
*/
void NotifyWait(void *notify_param, const uint32_t param_len);
/**
* Register Event callback function, async op call
* @param eventID EventId
* @param sub_event_id queue id
* @param cb Event callback function
* @return whether register success
*/
bool RegEventCb(const uint32_t event_id, const uint32_t sub_event_id, const EventProcessCallBack &cb);
/**
* Process event
* @param event_id EventId
* @param sub_event_id queue id
* @param param event param
*/
void ProcessEvent(const uint32_t event_id, const uint32_t sub_event_id, void *param = nullptr);
private:
AsyncEventManager() : notify_func_(nullptr) {}
~AsyncEventManager() = default;
AsyncEventManager(const AsyncEventManager &) = delete;
AsyncEventManager &operator=(const AsyncEventManager &) = delete;
AsyncEventManager(AsyncEventManager &&) = delete;
AsyncEventManager &operator=(AsyncEventManager &&) = delete;
// generate task info from ctx
bool GenTaskInfoFromCtx(AsyncTaskInfo *task_info);
// wait notify function
NotifyFunc notify_func_;
std::mutex map_mutex_;
std::map<AsyncEventInfo, AsyncTaskInfo> asyncTaskMap_;
};
} // namespace aicpu
#ifdef __cplusplus
extern "C" {
#endif
/**
* Notify wait task
* @param notify_param notify info
* @param param_len
*/
__attribute__((weak)) void AicpuNotifyWait(void *notify_param, const uint32_t param_len);
/**
* Register Event callback function, async op call
* @param info Registered event information
* @param cb Event callback function
* @return whether register success
*/
__attribute__((weak)) bool AicpuRegEventCb(const uint32_t event_id, const uint32_t sub_event_id,
const aicpu::EventProcessCallBack &cb);
#ifdef __cplusplus
}
#endif
#endif // AICPU_OPS_AICPU_ASYNC_EVENT_H_

View File

@ -0,0 +1,308 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "aicpu_sharder/aicpu_context.h"
#include <map>
#include <vector>
#include <memory>
#include <mutex>
#include <thread>
#include <utility>
#include "common/kernel_log.h"
namespace {
// current thread context
aicpu::aicpuContext_t g_cur_ctx;
// task monitor context
std::unique_ptr<std::string[]> g_opsname(nullptr);
thread_local uint32_t g_thread_index = UINT32_MAX;
uint32_t g_aicpu_core_cnt = 0;
thread_local std::map<std::string, std::string> g_thread_local_ctx;
thread_local aicpu::streamAndTaskId_t g_stream_and_task_id;
// aicpu run mode
uint32_t g_run_mode = aicpu::AicpuRunMode::THREAD_MODE;
// context info
std::mutex default_mutex;
std::vector<std::map<std::string, std::string>> g_default_thread_ctx;
std::mutex prof_mutex;
std::vector<std::map<std::string, std::string>> g_prof_thread_ctx;
std::mutex debug_mutex;
std::vector<std::map<std::string, std::string>> g_debug_thread_ctx;
std::mutex func_map_mutex;
std::map<uint32_t, std::map<uint32_t, std::function<void(void *)>>> g_func_map;
std::map<std::string, std::string> &GetThreadCtx(aicpu::CtxType type, uint32_t thread_index) {
if (type == aicpu::CTX_DEBUG) {
std::unique_lock<std::mutex> mutex(default_mutex);
if (thread_index >= g_debug_thread_ctx.size()) {
g_debug_thread_ctx.resize(thread_index + 1);
}
return g_debug_thread_ctx[thread_index];
} else if (type == aicpu::CTX_PROF) {
std::unique_lock<std::mutex> mutex(prof_mutex);
if (thread_index >= g_prof_thread_ctx.size()) {
g_prof_thread_ctx.resize(thread_index + 1);
}
return g_prof_thread_ctx[thread_index];
} else {
std::unique_lock<std::mutex> mutex(debug_mutex);
if (thread_index >= g_default_thread_ctx.size()) {
g_default_thread_ctx.resize(thread_index + 1);
}
return g_default_thread_ctx[thread_index];
}
}
} // namespace
namespace aicpu {
status_t aicpuSetContext(aicpuContext_t *ctx) {
g_cur_ctx = *ctx;
return AICPU_ERROR_NONE;
}
status_t aicpuGetContext(aicpuContext_t *ctx) {
*ctx = g_cur_ctx;
return AICPU_ERROR_NONE;
}
status_t InitTaskMonitorContext(uint32_t aicpu_core_cnt) {
if (aicpu_core_cnt == 0) {
AICPU_LOGE("invalid aicpu core count[%u]", aicpu_core_cnt);
return AICPU_ERROR_FAILED;
}
g_aicpu_core_cnt = aicpu_core_cnt;
AICPU_LOGI("aicpu core count[%u]", aicpu_core_cnt);
g_opsname.reset(new (std::nothrow) std::string[aicpu_core_cnt]);
if (g_opsname == nullptr) {
AICPU_LOGE("malloc ops name memory for task monitor failed");
return AICPU_ERROR_FAILED;
}
for (uint32_t index = 0; index < aicpu_core_cnt; ++index) {
g_opsname[index] = "null";
}
return AICPU_ERROR_NONE;
}
status_t SetAicpuThreadIndex(uint32_t thread_index) {
g_thread_index = thread_index;
return AICPU_ERROR_NONE;
}
uint32_t GetAicpuThreadIndex() { return g_thread_index; }
status_t SetOpname(const std::string &opname) {
if (g_opsname != nullptr && g_thread_index < g_aicpu_core_cnt) {
AICPU_LOGI("set op name to %s for thread[%u]", opname.c_str(), g_thread_index);
g_opsname[g_thread_index] = opname;
return AICPU_ERROR_NONE;
}
// maintenance function, if failed just print event log
AICPU_LOGEVENT(
"set op name[%s] failed, thread index[%u] should be less than total aicpu core count[%u],"
" and ops name array addr[%p] cannot null",
opname.c_str(), g_thread_index, g_aicpu_core_cnt, g_opsname.get());
return AICPU_ERROR_NONE;
}
status_t GetOpname(uint32_t thread_index, std::string *opname) {
*opname = "null";
if (g_opsname != nullptr && thread_index < g_aicpu_core_cnt) {
*opname = g_opsname[thread_index];
return AICPU_ERROR_NONE;
}
// maintenance function, if failed just print event log
AICPU_LOGEVENT(
"get op name failed, thread index[%u] should be less than total aicpu core count[%u],"
" and ops name array addr[%p] cannot null",
g_thread_index, g_aicpu_core_cnt, g_opsname.get());
return AICPU_ERROR_NONE;
}
status_t SetTaskAndStreamId(uint64_t task_id, uint32_t stream_id) {
g_stream_and_task_id.task_id = task_id;
g_stream_and_task_id.stream_id = stream_id;
AICPU_LOGI("Set task_id:[%lu] and stream_id:[%u] success.", task_id, stream_id);
return AICPU_ERROR_NONE;
}
status_t GetTaskAndStreamId(uint64_t *task_id, uint32_t *stream_id) {
*task_id = g_stream_and_task_id.task_id;
*stream_id = g_stream_and_task_id.stream_id;
AICPU_LOGI("Get task_id:[%lu] and stream_id:[%u] success.", *task_id, *stream_id);
return AICPU_ERROR_NONE;
}
status_t SetAicpuRunMode(uint32_t run_mode) {
g_run_mode = run_mode;
AICPU_LOGI("Set run_mode:[%u] success.", run_mode);
return AICPU_ERROR_NONE;
}
status_t GetAicpuRunMode(uint32_t *run_mode) {
*run_mode = g_run_mode;
AICPU_LOGI("Get run_mode:[%u] success.", *run_mode);
return AICPU_ERROR_NONE;
}
status_t SetThreadLocalCtx(const std::string &key, const std::string &value) {
if (key.empty()) {
AICPU_LOGE("set thread local context failed, key is empty");
return AICPU_ERROR_FAILED;
}
try {
g_thread_local_ctx[key] = value;
} catch (std::exception &e) {
AICPU_LOGE("set thread local context failed, %s", e.what());
return AICPU_ERROR_FAILED;
}
return AICPU_ERROR_NONE;
}
status_t GetThreadLocalCtx(const std::string &key, std::string *value) {
if (key.empty()) {
AICPU_LOGE("get thread local context failed, key is empty");
return AICPU_ERROR_FAILED;
}
auto iter = g_thread_local_ctx.find(key);
if (iter != g_thread_local_ctx.end()) {
*value = iter->second;
return AICPU_ERROR_NONE;
}
AICPU_LOGW("get thread local context failed, no such key[%s]", key.c_str());
return AICPU_ERROR_FAILED;
}
status_t RemoveThreadLocalCtx(const std::string &key) {
auto iter = g_thread_local_ctx.find(key);
if (iter != g_thread_local_ctx.end()) {
g_thread_local_ctx.erase(iter);
return AICPU_ERROR_NONE;
}
AICPU_LOGE("remove thread local context failed, no such key[%s]", key.c_str());
return AICPU_ERROR_FAILED;
}
const std::map<std::string, std::string> &GetAllThreadCtxInfo(aicpu::CtxType type, uint32_t thread_index) {
AICPU_LOGI("Get all thread ctx info begin, thread index:%u", thread_index);
auto &ctx = GetThreadCtx(type, thread_index);
return ctx;
}
status_t RegisterEventCallback(uint32_t event_id, uint32_t subevent_id, std::function<void(void *)> func) {
std::lock_guard<std::mutex> lock(func_map_mutex);
std::map<uint32_t, std::function<void(void *)>> &sub_map = g_func_map[event_id];
auto it = sub_map.insert(std::make_pair(subevent_id, func));
if (it.second == false) {
AICPU_LOGE(
"register event call function failed, repulicate register callback function by event_id[%u] "
"subevent_id[%u]",
event_id, subevent_id);
return AICPU_ERROR_FAILED;
}
return AICPU_ERROR_NONE;
}
status_t DoEventCallback(uint32_t event_id, uint32_t subevent_id, void *param) {
std::lock_guard<std::mutex> lock(func_map_mutex);
auto iter = g_func_map.find(event_id);
if (iter == g_func_map.end()) {
AICPU_LOGE("do event callback function failed, cannot find callback function by event_id[%u] subevent_id[%u]",
event_id, event_id);
return AICPU_ERROR_FAILED;
}
std::map<uint32_t, std::function<void(void *)>> &sub_map = iter->second;
auto sub_iter = sub_map.find(subevent_id);
if (sub_iter == sub_map.end()) {
AICPU_LOGE("do event callback function failed, cannot find callback function by event_id[%u] subevent_id[%u]",
event_id, event_id);
return AICPU_ERROR_FAILED;
}
(sub_iter->second)(param);
// erase func after call
sub_map.erase(sub_iter);
return AICPU_ERROR_NONE;
}
status_t UnRegisterCallback(uint32_t event_id, uint32_t subevent_id) {
std::lock_guard<std::mutex> lock(func_map_mutex);
auto iter = g_func_map.find(event_id);
if (iter == g_func_map.end()) {
AICPU_LOGEVENT(
"skip unregister event callback function, cannot find callback function by event_id[%u] "
"subevent_id[%u]",
event_id, event_id);
return AICPU_ERROR_NONE;
}
std::map<uint32_t, std::function<void(void *)>> &sub_map = iter->second;
auto sub_iter = sub_map.find(subevent_id);
if (sub_iter == sub_map.end()) {
AICPU_LOGEVENT(
"skip unregister event callback function, cannot find callback function by event_id[%u] "
"subevent_id[%u]",
event_id, event_id);
return AICPU_ERROR_NONE;
}
sub_map.erase(sub_iter);
return AICPU_ERROR_NONE;
}
} // namespace aicpu
aicpu::status_t SetThreadCtxInfo(aicpu::CtxType type, const std::string &key, const std::string &value) {
if (key.empty()) {
AICPU_LOGE("Set thread context failed, context type[%d], key is empty", type);
return aicpu::AICPU_ERROR_FAILED;
}
auto &ctx = GetThreadCtx(type, g_thread_index);
try {
ctx[key] = value;
} catch (std::exception &e) {
AICPU_LOGE("Set thread context failed, context type[%d], %s", type, e.what());
return aicpu::AICPU_ERROR_FAILED;
}
return aicpu::AICPU_ERROR_NONE;
}
aicpu::status_t GetThreadCtxInfo(aicpu::CtxType type, const std::string &key, std::string *value) {
if (key.empty()) {
AICPU_LOGE("Get thread context failed, context type[%d], key is empty", type);
return aicpu::AICPU_ERROR_FAILED;
}
auto &ctx = GetThreadCtx(type, g_thread_index);
auto iter = ctx.find(key);
if (iter != ctx.end()) {
*value = iter->second;
return aicpu::AICPU_ERROR_NONE;
}
AICPU_LOGE("Get thread context failed, context type[%d], no such key[%s]", type, key.c_str());
return aicpu::AICPU_ERROR_FAILED;
}
aicpu::status_t RemoveThreadCtxInfo(aicpu::CtxType type, const std::string &key) {
auto &ctx = GetThreadCtx(type, g_thread_index);
auto iter = ctx.find(key);
if (iter != ctx.end()) {
ctx.erase(iter);
return aicpu::AICPU_ERROR_NONE;
}
AICPU_LOGE("Remove thread context failed, context type[%d], no such key[%s]", type, key.c_str());
return aicpu::AICPU_ERROR_FAILED;
}

View File

@ -0,0 +1,231 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef AICPU_OPS_AICPU_CONTEXT_H_
#define AICPU_OPS_AICPU_CONTEXT_H_
#include <sys/types.h>
#include <cstdint>
#include <string>
#include <map>
#include <functional>
#include "common/kernel_util.h"
namespace aicpu {
typedef struct {
uint32_t device_id; // device id
uint32_t tsId; // ts id
pid_t host_pid; // host pid
uint32_t vf_id; // vf id
} aicpuContext_t;
enum AicpuRunMode : uint32_t {
PROCESS_PCIE_MODE = 0, // 1910/1980/1951 dc, with host mode
PROCESS_SOCKET_MODE = 1, // MDC
THREAD_MODE = 2, // ctrlcpu/minirc/lhisi
INVALID_MODE,
};
typedef struct {
uint32_t stream_id;
uint64_t task_id;
} streamAndTaskId_t;
typedef enum {
AICPU_ERROR_NONE = 0, // success
AICPU_ERROR_FAILED = 1, // failed
} status_t;
enum CtxType : int32_t { CTX_DEFAULT = 0, CTX_PROF, CTX_DEBUG };
constexpr auto kContextKeyOpName = "opname";
constexpr auto kContextKeyPhaseOneFlag = "phaseOne";
constexpr auto kContextKeyWaitType = "waitType";
constexpr auto kContextKeyWaitId = "waitId";
constexpr auto kContextKeyStartTick = "startTick";
constexpr auto kContextKeyDrvSubmitTick = "drvSubmitTick";
constexpr auto kContextKeyDrvSchedTick = "drvSchedTick";
constexpr auto kContextKeyKernelType = "kernelType";
/**
* set aicpu context for current thread.
* @param [in]ctx aicpu context
* @return status whether this operation success
*/
AICPU_VISIBILITY_API status_t aicpuSetContext(aicpuContext_t *ctx);
/**
* get aicpu context from current thread.
* @param [out]ctx aicpu context
* @return status whether this operation success
*/
AICPU_VISIBILITY_API status_t aicpuGetContext(aicpuContext_t *ctx);
/**
* init context for task monitor, called in compute process start.
* @param [in]aicpu_core_cnt aicpu core number
* @return status whether this operation success
*/
status_t InitTaskMonitorContext(uint32_t aicpu_core_cnt);
/**
* set aicpu thread index for task monitor, called in thread callback function.
* @param [in]thread_index aicpu thread index
* @return status whether this operation success
*/
status_t SetAicpuThreadIndex(uint32_t thread_index);
/**
* get aicpu thread index.
* @return uint32
*/
uint32_t GetAicpuThreadIndex();
/**
* set op name for task monitor.
* called in libtf_kernels.so(tf op) or libaicpu_processer.so(others) or cpu kernel framework.
* @param [in]opname op name
* @return status whether this operation success
*/
status_t __attribute__((weak)) SetOpname(const std::string &opname);
/**
* get op name for task monitor
* @param [in]thread_index thread index
* @param [out]opname op name
* @return status whether this operation success
*/
status_t GetOpname(uint32_t thread_index, std::string *opname);
/**
* get task and stream id.
* @param [in]task_id task id.
* @param [in]stream_id stream id.
* @return status whether this operation success
*/
status_t __attribute__((weak)) GetTaskAndStreamId(uint64_t *task_id, uint32_t *stream_id);
/**
* set task and stream id.
* @param [in]task_id task id.
* @param [in]stream_id stream id.
* @return status whether this operation success
*/
status_t __attribute__((weak)) SetTaskAndStreamId(uint64_t task_id, uint32_t stream_id);
/**
* set thread local context of key
* @param [in]key context key
* @param [in]value context value
* @return status whether this operation success
* @note Deprecated from 20201216, Replaced by SetThreadCtxInfo
*/
status_t __attribute__((weak)) SetThreadLocalCtx(const std::string &key, const std::string &value);
/**
* get thread local context of key
* @param [in]key context key
* @param [out]value context value
* @return status whether this operation success
* @note Deprecated from 20201216, Replaced by GetThreadCtxInfo
*/
status_t GetThreadLocalCtx(const std::string &key, std::string *value);
/**
* remove local context of key
* @param [in]key context key
* @return status whether this operation success
* @note Deprecated from 20201216, Replaced by RemoveThreadCtxInfo
*/
status_t RemoveThreadLocalCtx(const std::string &key);
/**
* get all thread context info of type
* @param [in]type: ctx type
* @param [in]thread_index: thread index
* @return const std::map<std::string, std::string> &: all thread context info
*/
const std::map<std::string, std::string> &GetAllThreadCtxInfo(aicpu::CtxType type, uint32_t thread_index);
/**
* set run mode.
* @param [in]run_mode: run mode.
* @return status whether this operation success
*/
status_t __attribute__((weak)) SetAicpuRunMode(uint32_t run_mode);
/**
* get run mode.
* @param [out]run_mode: run mode.
* @return status whether this operation success
*/
status_t __attribute__((weak)) GetAicpuRunMode(uint32_t *run_mode);
/**
* Register callback function by event_id and subevent_id
* @param event_id event id
* @param subevent_id subevent id
* @param func call back function
*/
status_t __attribute__((weak))
RegisterEventCallback(uint32_t event_id, uint32_t subevent_id, std::function<void(void *)> func);
/**
* Do callback function by event_id and subevent_id
* @param event_id event id
* @param subevent_id subevent id
* @param param event param
*/
status_t __attribute__((weak)) DoEventCallback(uint32_t event_id, uint32_t subevent_id, void *param);
/**
* Unregister callback function by event_id and subevent_id
* @param event_id event id
* @param subevent_id subevent id
*/
status_t __attribute__((weak)) UnRegisterCallback(uint32_t event_id, uint32_t subevent_id);
} // namespace aicpu
extern "C" {
/**
* set thread context info of type
* @param [in]type: ctx type
* @param [in]key: key of context info
* @param [in]value: value of context info
* @return status whether this operation success
*/
AICPU_VISIBILITY_API aicpu::status_t SetThreadCtxInfo(aicpu::CtxType type, const std::string &key,
const std::string &value);
/**
* get thread context info of type
* @param [in]type: ctx type
* @param [in]key: key of context info
* @param [out]value: value of context info
* @return status whether this operation success
*/
AICPU_VISIBILITY_API aicpu::status_t GetThreadCtxInfo(aicpu::CtxType type, const std::string &key, std::string *value);
/**
* remove thread context info of type
* @param [in]type: ctx type
* @param [in]key: key of context info
* @return status whether this operation success
*/
AICPU_VISIBILITY_API aicpu::status_t RemoveThreadCtxInfo(aicpu::CtxType type, const std::string &key);
}
#endif // AICPU_OPS_AICPU_CONTEXT_H_

View File

@ -0,0 +1,58 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "aicpu_sharder/aicpu_pulse.h"
#include <unordered_map>
#include <mutex>
#include <string>
#include "common/kernel_log.h"
namespace {
static std::unordered_map<std::string, PulseNotifyFunc> pulse_notify_func_map;
static std::mutex mtx;
} // namespace
__attribute__((visibility("default"))) void AicpuPulseNotify() {
std::unique_lock<std::mutex> lck(mtx);
AICPU_LOGD("Aicpu pulse notify start, notify func num=%zu.", pulse_notify_func_map.size());
for (auto &notify_func : pulse_notify_func_map) {
AICPU_LOGD("Aicpu pulse notify %s start.", notify_func.first.c_str());
notify_func.second();
AICPU_LOGD("Aicpu pulse notify %s end.", notify_func.first.c_str());
}
AICPU_LOGD("Aicpu pulse notify end.");
}
__attribute__((visibility("default"))) int32_t RegisterPulseNotifyFunc(const char *name, PulseNotifyFunc func) {
if (name == nullptr) {
AICPU_LOGE("Register pulse notify func failed as param name is null");
return -1;
}
if (func == nullptr) {
AICPU_LOGE("Register pulse notify func for %s failed as param func is null", name);
return -1;
}
std::unique_lock<std::mutex> lck(mtx);
auto ret = pulse_notify_func_map.emplace(name, func);
if (!ret.second) {
AICPU_LOGE("Register pulse notify func for %s failed.", name);
return -1;
}
AICPU_LOGI("Register pulse notify func for %s success.", name);
return 0;
}

View File

@ -0,0 +1,46 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef AICPU_OPS_AICPU_PULSE_H_
#define AICPU_OPS_AICPU_PULSE_H_
#include <cstdint>
#ifdef __cplusplus
extern "C" {
#endif
typedef void (*PulseNotifyFunc)(void);
/**
* aicpu pulse notify.
* timer will call this method per second.
*/
void AicpuPulseNotify(void);
/**
* Register kernel pulse notify func.
* @param name name of kernel lib, must end with '\0' and unique.
* @param func pulse notify function.
* @return 0:success, other:failed.
*/
int32_t RegisterPulseNotifyFunc(const char *name, PulseNotifyFunc func);
#ifdef __cplusplus
}
#endif
#endif // AICPU_OPS_AICPU_PULSE_H_

View File

@ -0,0 +1,218 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "aicpu_sharder/aicpu_sharder.h"
#include <semaphore.h>
#include <unistd.h>
#include <error.h>
#include <atomic>
#include <algorithm>
#include <cerrno>
#include <cstring>
#include "common/kernel_log.h"
namespace aicpu {
#define AICPU_SHARDER_IF_TRUE_RUN(expr, run) \
do { \
if (expr) { \
run; \
} \
} while (0)
void SharderNonBlock::Register(const RunnerBool &schedule, const ClosureBool &do_task, uint32_t cpu_core_num) {
schedule_ = schedule;
do_task_ = do_task;
cpu_core_num_ = cpu_core_num;
}
bool SharderNonBlock::Enqueue(const Closure &closure, bool submit_topic) {
if (schedule_ != nullptr) {
return schedule_(closure, submit_topic);
}
return false;
}
void SharderNonBlock::Schedule(const Closure &closure) {
if (!Enqueue(closure)) {
closure();
}
}
uint32_t SharderNonBlock::GetCPUNum() { return cpu_core_num_; }
SharderNonBlock &SharderNonBlock::GetInstance() {
static SharderNonBlock sharder_non_block;
return sharder_non_block;
}
int64_t SharderNonBlock::CeilMultiple(int64_t x, int64_t base) {
if (base == 0) {
return 0;
}
int64_t ret = x / base;
if ((x % base) != 0) {
ret++;
}
return ret;
}
void SharderNonBlock::ParallelFor(int64_t total, int64_t per_unit_size, const SharderWork &work) {
AICPU_LOGI("total: %lld, per_unit_size: %lld", total, per_unit_size);
if ((total <= 0) || (work == nullptr)) {
AICPU_LOGE("invalid param: total<=0 or work is nullptr");
return;
}
// work itself
if ((schedule_ == nullptr) || (cpu_core_num_ <= 1)) {
AICPU_LOGI("work itself all");
work(0, total);
return;
}
// In order to ensure a smaller scheduling delay, the maximum number of slices is twice the number of CPU cores
const int64_t max_shard_num = static_cast<int64_t>(cpu_core_num_) * 2;
// calculate shard number and block size
// i.e., if total is 118, perUintSize is 2, and cpu_core_num_ is 13
// then shard_num is 24, block_size is 5
int64_t block_size = std::max(int64_t{1}, std::min(total, per_unit_size));
int64_t shard_num = CeilMultiple(total, block_size);
shard_num = std::min(max_shard_num, shard_num);
block_size = CeilMultiple(total, shard_num);
shard_num = CeilMultiple(total, block_size);
AICPU_LOGI("shard number: %lld, block size: %lld", shard_num, block_size);
// There is no need to submit an event if shard_num is 1
if (shard_num == 1) {
AICPU_LOGI("executes on the current thread");
work(0, total);
return;
}
std::atomic<int64_t> count(shard_num); // a counter
sem_t sem;
int32_t sem_init_ret = sem_init(&sem, 0, 0);
if (sem_init_ret == -1) {
AICPU_LOGE("sem_init error with message: %s", strerror(errno));
work(0, total);
return;
}
for (int64_t start = 0; start < total; start += block_size) {
auto limit = std::min(start + block_size, total);
Closure closure = [&sem, &work, &count, start, limit]() {
count--;
// In order to ensure that user's work function exception does not affect multithread services,
// exception capture is needed. Exception type is not cared here, and error log is printed.
try {
work(start, limit);
} catch (...) {
AICPU_LOGE("exception occurred in work function with start: %lld, limit: %lld", start, limit);
}
int32_t sem_post_ret = sem_post(&sem);
AICPU_SHARDER_IF_TRUE_RUN(sem_post_ret == -1, AICPU_LOGE("sem_post error with message: %s", strerror(errno)));
};
// if enqueue fail, work itself
if (!Enqueue(closure, true)) {
AICPU_LOGI("Enqueue fail, [%lld, %lld), work itself", start, limit);
closure();
}
}
if (do_task_ != nullptr) {
bool ret = true;
while ((count > 0) && ret) {
AICPU_LOGI("Main thread do task begin.");
ret = do_task_();
AICPU_LOGI("Main thread do task end.");
}
}
for (int64_t i = 0; i < shard_num; ++i) {
int sem_wait_ret = sem_wait(&sem);
AICPU_SHARDER_IF_TRUE_RUN(sem_wait_ret == -1, AICPU_LOGE("sem_wait error with message: %s", strerror(errno)));
}
int32_t sem_des_ret = sem_destroy(&sem);
AICPU_SHARDER_IF_TRUE_RUN(sem_des_ret == -1, AICPU_LOGE("sem_destroy error with message: %s", strerror(errno)));
}
void SharderNonBlock::ParallelForHash(int64_t total, int64_t cpu_nums, const SharderWork &work) {
AICPU_LOGI("total: %lld, cpu_nums: %d", total, cpu_nums);
if (total <= 0 || work == nullptr) {
AICPU_LOGE("invalid param: total<=0 or work is nullptr");
return;
}
if ((schedule_ == nullptr) || (cpu_core_num_ <= 1)) {
AICPU_LOGE("schedule is nullptr or cpu core num is not enough");
return;
}
std::atomic<int64_t> count(cpu_nums); // a counter
sem_t sem;
int32_t sem_init_ret = sem_init(&sem, 0, 0);
if (sem_init_ret == -1) {
AICPU_LOGE("sem_init error with message: %s", strerror(errno));
return;
}
for (int64_t cur = 0; cur < cpu_nums; cur++) {
Closure closure = [&sem, &work, &count, total, cur]() {
work(total, cur);
count--;
int32_t sem_post_ret = sem_post(&sem);
AICPU_SHARDER_IF_TRUE_RUN(sem_post_ret == -1, AICPU_LOGE("sem_post error with message: %s", strerror(errno)));
};
// if enqueue fail, work itself
if (!Enqueue(closure, true)) {
closure();
}
}
if (do_task_ != nullptr) {
bool ret = true;
while ((count > 0) && ret) {
ret = do_task_();
}
}
for (int64_t i = 0; i < cpu_nums; i++) {
int sem_wait_ret = sem_wait(&sem);
AICPU_SHARDER_IF_TRUE_RUN(sem_wait_ret == -1, AICPU_LOGE("sem_wait error with message: %s", strerror(errno)));
}
int32_t sem_des_ret = sem_destroy(&sem);
AICPU_SHARDER_IF_TRUE_RUN(sem_des_ret == -1, AICPU_LOGE("sem_destroy error with message: %s", strerror(errno)));
}
} // namespace aicpu
/**
* Shards the "total" unit of work refer "perUintSize"
*/
void ParallelFor(int64_t total, int64_t per_unit_size, const aicpu::SharderWork &work) {
aicpu::SharderNonBlock::GetInstance().ParallelFor(total, per_unit_size, work);
}
/**
* Get CPU number
*/
uint32_t GetCPUNum() { return aicpu::SharderNonBlock::GetInstance().GetCPUNum(); }

View File

@ -0,0 +1,132 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef AICPU_OPS_AICPU_SHARDER_H_
#define AICPU_OPS_AICPU_SHARDER_H_
#include <functional>
#include <vector>
#include "common/kernel_util.h"
namespace aicpu {
using Closure = std::function<void()>;
using ClosureBool = std::function<bool()>;
using RunnerBool = std::function<bool(Closure, bool)>;
using SharderWork = std::function<void(int64_t, int64_t)>;
class SharderNonBlock {
public:
/**
* Get the unique object of this class
*/
static SharderNonBlock &GetInstance();
/**
* Register schedule callback function, do_task function and cpu core number
* called by compute process
* @param schedule Schedule callback function
* @param do_task Callback function for itself schedule
* @param cpu_core_num aicpu core number
*/
void Register(const RunnerBool &schedule, const ClosureBool &do_task, uint32_t cpu_core_num);
/**
* Shards the "total" unit of work refer "perUintSize"
* @param total Total unit of work
* @param per_unit_size Minimum shard unit
* @param work should be a callable taking (int64, int64) arguments.
work(start, limit) computes the work units from [start, limit),
i.e., [start, limit) is a shard.
*/
void ParallelFor(int64_t total, int64_t per_unit_size, const SharderWork &work);
/**
* Shards the unit of work refer for hash
* @param total, Total unit of work
* @param cpu_nums Number of cpu cores
* @param work should be a callable taking (int64, int64) arguments.
work(cur, cpu_nums) computes the work units with input hash with (cpu_nums-1) equals cur,
i.e. specially used by parallel unique op
*/
void ParallelForHash(int64_t total, int64_t cpu_nums, const SharderWork &work);
/**
* Schedule a task use schedule function registered by compute process,
* note that the task will actually executed asynchronously
* @param closure Closure function with nothrow
*/
void Schedule(const Closure &closure);
/**
* Get CPU number
* @param None
* @return CPU number
*/
uint32_t GetCPUNum();
private:
SharderNonBlock() : schedule_(nullptr), do_task_(nullptr), cpu_core_num_(0) {}
~SharderNonBlock() = default;
SharderNonBlock(const SharderNonBlock &) = delete;
SharderNonBlock &operator=(const SharderNonBlock &) = delete;
SharderNonBlock(SharderNonBlock &&) = delete;
SharderNonBlock &operator=(SharderNonBlock &&) = delete;
/**
* Closure function enqueue
* @param closure Closure function can be called
* @param submit_topic whether submit topic, true means submit topic
* @return whether enqueue of closure success
*/
bool Enqueue(const Closure &closure, bool submit_topic = false);
/**
* Calculate how many times, which ceiled, "x" is "base".
* i.e., x is 1, base is 2, this function will return 1
* @param x An integral
* @param base An integral as base when cal multiple
* @return ceiled multiple
*/
inline int64_t CeilMultiple(int64_t x, int64_t base);
private:
RunnerBool schedule_; // enqueue runner
ClosureBool do_task_; // a callback, do task from task queue
uint32_t cpu_core_num_; // aicpu core number
}; // SharderNonBlock
} // namespace aicpu
extern "C" {
/**
* Shards the "total" unit of work refer "perUintSize"
* @param total Total unit of work
* @param per_unit_size Minimum shard unit
* @param work should be a callable taking (int64, int64) arguments.
work(start, limit) computes the work units from [start, limit),
i.e., [start, limit) is a shard.
*/
AICPU_VISIBILITY_API void ParallelFor(int64_t total, int64_t per_unit_size, const aicpu::SharderWork &work);
/**
* Get CPU number
* @param None
* @return CPU number
*/
AICPU_VISIBILITY_API uint32_t GetCPUNum();
}
#endif // AICPU_OPS_AICPU_SHARDER_H_

View File

@ -0,0 +1,62 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef AICPU_OPS_AICPU_DISTINCT_UNIFORM_INT_DISTRIBUTION_H_
#define AICPU_OPS_AICPU_DISTINCT_UNIFORM_INT_DISTRIBUTION_H_
#include <random>
#include <unordered_set>
namespace aicpu {
template <typename IntType = int>
class distinct_uniform_int_distribution {
public:
using result_type = IntType;
private:
using set_type = std::unordered_set<result_type>;
using distr_type = std::uniform_int_distribution<result_type>;
public:
distinct_uniform_int_distribution(result_type inf, result_type sup)
: inf_(inf), sup_(sup), range_(sup_ - inf_ + 1), distr_(inf_, sup_) {}
~distinct_uniform_int_distribution() = default;
void reset() {
uset_.clear();
distr_.reset();
}
template <typename Generator>
result_type exec(Generator *engine) {
if (!(uset_.size() < range_)) {
std::terminate();
}
result_type res;
do {
res = distr_(*engine);
} while (uset_.count(res) > 0);
uset_.insert(res);
return res;
}
private:
const result_type inf_;
const result_type sup_;
const size_t range_;
distr_type distr_;
set_type uset_;
};
} // namespace aicpu
#endif // AICPU_OPS_AICPU_DISTINCT_UNIFORM_INT_DISTRIBUTION_H_

View File

@ -0,0 +1,255 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <map>
#include "common/kernel_base.h"
#include "common/kernel_errcode.h"
#include "common/tensor.h"
namespace aicpu {
namespace {
// max param len limit 10k.
constexpr uint32_t MAX_PARAM_LEN = 10240;
// max io address num limit 1024
constexpr uint32_t MAX_IO_ADDR_NUMPARAM_LEN = 1024;
} // namespace
static const std::map<const ::aicpuops::DataType, size_t> kKernelBaseDataTypeSize = {
{aicpuops::MS_BOOL, sizeof(bool)}, {aicpuops::MS_INT8, sizeof(int8_t)},
{aicpuops::MS_UINT8, sizeof(uint8_t)}, {aicpuops::MS_INT16, sizeof(int16_t)},
{aicpuops::MS_UINT16, sizeof(uint16_t)}, {aicpuops::MS_INT32, sizeof(int32_t)},
{aicpuops::MS_UINT32, sizeof(uint32_t)}, {aicpuops::MS_INT64, sizeof(int64_t)},
{aicpuops::MS_UINT64, sizeof(uint64_t)}, {aicpuops::MS_FLOAT16, sizeof(float) / 2},
{aicpuops::MS_FLOAT32, sizeof(float)}, {aicpuops::MS_FLOAT64, sizeof(double)}};
KernelBase::KernelBase(const std::string &kernel_name)
: kernel_name_(kernel_name),
extend_param_len_(0),
extend_param_base_(nullptr),
param_head_(nullptr),
unknow_shape_(false) {}
uint32_t KernelBase::ParseParam(void *param) {
if (param == nullptr) {
AICPU_LOGE("Kernel:%s ParseParam param is null.", kernel_name_.c_str());
return AICPU_KERNEL_STATE_PARAM_INVALID;
}
// parse param_len
param_head_ = static_cast<AicpuParamHead *>(param);
if (param_head_->length < sizeof(AicpuParamHead) || param_head_->length > MAX_PARAM_LEN) {
AICPU_LOGE("Kernel:%s param length=%u not in [%zu, %u].", kernel_name_.c_str(), param_head_->length,
sizeof(AicpuParamHead), MAX_PARAM_LEN);
return AICPU_KERNEL_STATE_PARAM_INVALID;
}
auto param_base = static_cast<uint8_t *>(param);
extend_param_base_ = param_base + sizeof(AicpuParamHead);
extend_param_len_ = param_head_->length - sizeof(AicpuParamHead);
if (param_head_->ioAddrNum > 0) {
if (param_head_->ioAddrNum > MAX_IO_ADDR_NUMPARAM_LEN) {
AICPU_LOGE("Kernel:%s param ioAddrNum=%u is over %u.", kernel_name_.c_str(), param_head_->ioAddrNum,
MAX_IO_ADDR_NUMPARAM_LEN);
return AICPU_KERNEL_STATE_PARAM_INVALID;
}
uint32_t addr_len = param_head_->ioAddrNum * sizeof(uint64_t);
if (extend_param_len_ < addr_len) {
AICPU_LOGE("Kernel:%s extend param is not enough for io addr, ioAddrNum=%u, extendParamLen=%u.",
kernel_name_.c_str(), param_head_->ioAddrNum, extend_param_len_);
return AICPU_KERNEL_STATE_PARAM_INVALID;
}
auto io_addr_base = reinterpret_cast<uint64_t *>(extend_param_base_);
for (uint32_t i = 0; i < param_head_->ioAddrNum; ++i) {
io_addrs_.push_back(static_cast<uintptr_t>(io_addr_base[i]));
}
extend_param_base_ = extend_param_base_ + addr_len;
extend_param_len_ -= addr_len;
}
AICPU_CHK_STATUS_RET(ParseNodeDef())
AICPU_CHK_STATUS_RET(ParseExtInfo())
if (unknow_shape_) {
AICPU_LOGI("Unknown shape op: %s", kernel_name_.c_str());
AICPU_CHK_STATUS_RET(UpdateInputShape())
AICPU_CHK_STATUS_RET(UpdateOutputShape())
}
return ParseKernelParam();
}
uint32_t KernelBase::Compute(void *param) {
uint32_t ret = ParseParam(param);
if (ret != AICPU_KERNEL_STATE_SUCCESS) {
AICPU_LOGE("Kernel:%s ParseParam failed, ret=%u.", kernel_name_.c_str(), ret);
return ret;
}
return DoCompute();
}
size_t KernelBase::GetDataTypeSize(::aicpuops::DataType data_type) {
auto it = kKernelBaseDataTypeSize.find(data_type);
if (it == kKernelBaseDataTypeSize.end()) {
AICPU_LOGE("don't support input tensor types");
return 0;
}
return it->second;
}
template <typename T>
uint32_t KernelBase::ParseExtendParam(T *param_var, std::string param_name) {
if (extend_param_len_ < sizeof(T)) {
AICPU_LOGE("Kernel:%s extend param is not enough for [%s] addr, need_len=%u, extendParamLen=%u.",
kernel_name_.c_str(), param_name.c_str(), sizeof(T), extend_param_len_);
return AICPU_KERNEL_STATE_PARAM_INVALID;
}
T *param = reinterpret_cast<T *>(extend_param_base_);
if (param != nullptr) {
*param_var = *param;
extend_param_base_ += sizeof(T);
extend_param_len_ -= sizeof(T);
return AICPU_KERNEL_STATE_SUCCESS;
}
AICPU_LOGE("Kernel:%s extend param for [%s] addr is invalid.", kernel_name_.c_str(), param_name.c_str());
return AICPU_KERNEL_STATE_PARAM_INVALID;
}
uint32_t KernelBase::ParseNodeDef() {
uint32_t node_def_len;
AICPU_CHK_STATUS_RET(ParseExtendParam(&node_def_len, "node_def_len"))
if (extend_param_len_ < node_def_len) {
AICPU_LOGE("Kernel:%s extend param is not enough for customizeAttr addr, node_def_len=%u, extendParamLen=%u.",
kernel_name_.c_str(), node_def_len, extend_param_len_);
return AICPU_KERNEL_STATE_PARAM_INVALID;
}
std::string std_data(reinterpret_cast<char *>(extend_param_base_), node_def_len);
if (!node_def_.ParseFromString(std_data)) {
AICPU_LOGE("parse %s KernelBase proto failed, nodeDef=%s.", kernel_name_.c_str(), std_data.c_str());
return AICPU_KERNEL_STATE_PARAM_INVALID;
}
extend_param_base_ += node_def_len;
extend_param_len_ -= node_def_len;
return AICPU_KERNEL_STATE_SUCCESS;
}
uint32_t KernelBase::ParseExtShapeType(FWKAdapter::ExtInfo *ext_info) {
if (ext_info->infoLen != sizeof(int32_t)) {
AICPU_LOGE("Kernel:%s parse ext shape type failed as infoLen must be %zu but %u.", kernel_name_.c_str(),
sizeof(int32_t), ext_info->infoLen);
return AICPU_KERNEL_STATE_PARAM_INVALID;
}
unknow_shape_ = true;
return AICPU_KERNEL_STATE_SUCCESS;
}
uint32_t KernelBase::ParseExtInputShape(FWKAdapter::ExtInfo *ext_info) {
// no overflow
auto need_len = node_def_.inputs_size() * sizeof(FWKAdapter::ShapeAndType);
if (ext_info->infoLen != need_len) {
AICPU_LOGE(
"Kernel:%s parse ext input shape failed as infoLen must be "
"input_num[%d]*sizeof(ShapeAndType)[%zu], but %u.",
kernel_name_.c_str(), node_def_.inputs_size(), sizeof(FWKAdapter::ShapeAndType), ext_info->infoLen);
return AICPU_KERNEL_STATE_PARAM_INVALID;
}
input_shape_and_type_.clear();
auto input = reinterpret_cast<FWKAdapter::ShapeAndType *>(ext_info->infoMsg);
for (int index = 0; index < node_def_.inputs_size(); ++index) {
input_shape_and_type_.emplace_back(&input[index]);
}
return AICPU_KERNEL_STATE_SUCCESS;
}
uint32_t KernelBase::ParseExtOutputShape(FWKAdapter::ExtInfo *ext_info) {
// no overflow
auto need_len = node_def_.outputs_size() * sizeof(FWKAdapter::ShapeAndType);
if (ext_info->infoLen != need_len) {
AICPU_LOGE(
"Kernel:%s parse ext output shape failed as infoLen must be "
"output_num[%d]*sizeof(ShapeAndType)[%zu], but %u.",
kernel_name_.c_str(), node_def_.outputs_size(), sizeof(FWKAdapter::ShapeAndType), ext_info->infoLen);
return AICPU_KERNEL_STATE_PARAM_INVALID;
}
output_shape_and_type_.clear();
auto output = reinterpret_cast<FWKAdapter::ShapeAndType *>(ext_info->infoMsg);
for (int index = 0; index < node_def_.outputs_size(); ++index) {
output_shape_and_type_.emplace_back(&output[index]);
}
return AICPU_KERNEL_STATE_SUCCESS;
}
uint32_t KernelBase::ParseExtInfo() {
uint32_t offset = 0;
FWKAdapter::ExtInfo *ext_info_ptr = nullptr;
char *ext_info_buf = reinterpret_cast<char *>(static_cast<uintptr_t>(param_head_->extInfoAddr));
while (offset + sizeof(FWKAdapter::ExtInfo) <= param_head_->extInfoLength) {
ext_info_ptr = reinterpret_cast<FWKAdapter::ExtInfo *>(ext_info_buf + offset);
if (ext_info_ptr == nullptr) {
AICPU_LOGE("Kernel:%s ext_info is nullptr, extInfoLength=%u, extInfoAddr=%p, offset=%zu.", kernel_name_.c_str(),
param_head_->extInfoLength, param_head_->extInfoAddr, offset);
return AICPU_KERNEL_STATE_PARAM_INVALID;
}
switch (ext_info_ptr->infoType) {
case FWKAdapter::FWK_ADPT_EXT_SHAPE_TYPE:
AICPU_CHK_STATUS_RET(ParseExtShapeType(ext_info_ptr))
break;
case FWKAdapter::FWK_ADPT_EXT_INPUT_SHAPE:
AICPU_CHK_STATUS_RET(ParseExtInputShape(ext_info_ptr))
break;
case FWKAdapter::FWK_ADPT_EXT_OUTPUT_SHAPE:
AICPU_CHK_STATUS_RET(ParseExtOutputShape(ext_info_ptr))
break;
default:
AICPU_LOGI("Kernel:%s ignore infoType=%d, infoLen=%u.", kernel_name_.c_str(), ext_info_ptr->infoType,
ext_info_ptr->infoLen);
break;
}
// not overflow
offset += FWKAdapter::kExtInfoHeadSize;
offset += ext_info_ptr->infoLen;
}
return AICPU_KERNEL_STATE_SUCCESS;
}
uint32_t KernelBase::UpdateInputShape() {
for (int i = 0; i < node_def_.inputs_size(); ++i) {
aicpuops::Tensor *input_tensor = node_def_.mutable_inputs(i);
aicpuops::TensorShape *input_tensor_shape = input_tensor->mutable_tensor_shape();
input_tensor_shape->clear_dim();
for (uint32_t index = 0; index < FWKAdapter::kMaxShapeDims; ++index) {
// LLONG_MIN for dim end flag
if (input_shape_and_type_[i]->dims[index] == LLONG_MIN) {
break;
}
input_tensor_shape->add_dim()->set_size(input_shape_and_type_[i]->dims[index]);
}
}
return AICPU_KERNEL_STATE_SUCCESS;
}
uint32_t KernelBase::UpdateOutputShape() {
for (int i = 0; i < node_def_.outputs_size(); ++i) {
aicpuops::Tensor *output_tensor = node_def_.mutable_outputs(i);
aicpuops::TensorShape *output_tensor_shape = output_tensor->mutable_tensor_shape();
output_tensor_shape->clear_dim();
for (uint32_t index = 0; index < FWKAdapter::kMaxShapeDims; ++index) {
// LLONG_MIN for dim end flag
if (output_shape_and_type_[i]->dims[index] == LLONG_MIN) {
break;
}
output_tensor_shape->add_dim()->set_size(output_shape_and_type_[i]->dims[index]);
}
}
return AICPU_KERNEL_STATE_SUCCESS;
}
} // namespace aicpu

View File

@ -0,0 +1,82 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef AICPU_OPS_AICPU_COMMON_KERNEL_BASE_H_
#define AICPU_OPS_AICPU_COMMON_KERNEL_BASE_H_
#include <cstdint>
#include <vector>
#include <string>
#include "common/kernel_util.h"
#include "aicpu/common/aicpu_task_struct.h"
#include "securec/include/securec.h"
#include "common/tensor.h"
#include "cce/fwk_adpt_struct.h"
#include "common/kernel_log.h"
#include "proto/aicpu_tensor.pb.h"
namespace aicpu {
class KernelBase {
public:
explicit KernelBase(const std::string &kernel_name);
~KernelBase() = default;
uint32_t Compute(void *param);
size_t GetDataTypeSize(::aicpuops::DataType data_type);
protected:
virtual uint32_t ParseKernelParam() = 0;
virtual uint32_t DoCompute() = 0;
template <typename T>
uint32_t ParseExtendParam(T *param_var, std::string param_name);
uint32_t ParseNodeDef();
uint32_t ParseExtInfo();
uint32_t ParseExtShapeType(FWKAdapter::ExtInfo *ext_info);
uint32_t ParseExtInputShape(FWKAdapter::ExtInfo *ext_info);
uint32_t ParseExtOutputShape(FWKAdapter::ExtInfo *ext_info);
uint32_t UpdateInputShape();
uint32_t UpdateOutputShape();
private:
KernelBase(const KernelBase &) = delete;
KernelBase &operator=(const KernelBase &) = delete;
KernelBase(KernelBase &&) = delete;
KernelBase &operator=(KernelBase &&) = delete;
uint32_t ParseParam(void *param);
protected:
std::string kernel_name_;
std::vector<uintptr_t> io_addrs_;
uint32_t extend_param_len_;
uint8_t *extend_param_base_;
AicpuParamHead *param_head_;
bool unknow_shape_;
aicpuops::NodeDef node_def_;
std::vector<FWKAdapter::ShapeAndType *> input_shape_and_type_;
std::vector<FWKAdapter::ShapeAndType *> output_shape_and_type_;
};
} // namespace aicpu
#endif // AICPU_OPS_AICPU_COMMON_KERNEL_BASE_H_

View File

@ -0,0 +1,30 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef AICPU_OPS_AICPU_COMMON_KENERL_ERRCODE_H_
#define AICPU_OPS_AICPU_COMMON_KENERL_ERRCODE_H_
namespace aicpu {
enum AicpuKernelErrCode {
// 0-3 is fixed error code, runtime need interpret 0-3 error codes
AICPU_KERNEL_STATE_SUCCESS = 0,
AICPU_KERNEL_STATE_PARAM_INVALID = 1,
AICPU_KERNEL_STATE_FAILED = 2,
AICPU_KERNEL_STATE_EXECUTE_TIMEOUT = 3,
AICPU_KERNEL_STATE_INTERNAL_ERROR = 4,
AICPU_KERNEL_STATE_END_OF_SEQUENCE = 201,
};
} // namespace aicpu
#endif // AICPU_OPS_AICPU_COMMON_KENERL_ERRCODE_H_

View File

@ -0,0 +1,29 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "common/kernel_log.h"
namespace aicpu {
static int log_level = AICPU_LOG_ERROR;
int LogSetLevel(int level) {
log_level = level;
return log_level;
}
int LogGetLevel(void) { return log_level; }
bool CheckLogLevel(int log_level_check) { return log_level >= log_level_check; }
} // namespace aicpu

View File

@ -0,0 +1,77 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef AICPU_OPS_AICPU_COMMON_KERNEL_LOG_H_
#define AICPU_OPS_AICPU_COMMON_KERNEL_LOG_H_
#include <unistd.h>
#include <sys/syscall.h>
#include <iostream>
#include <utility>
#include "common/kernel_errcode.h"
inline int GetTid(void) {
thread_local static int tid = syscall(__NR_gettid);
return tid;
}
static const int LOG_COUNT = 0;
namespace aicpu {
#define AICPU_LOG_DEBUG 0
#define AICPU_LOG_INFO 1
#define AICPU_LOG_WARN 2
#define AICPU_LOG_ERROR 3
#define AICPU_LOG_EVENT 0x10
inline void PrintLog(const int level) { std::cerr << level << std::endl; }
template <typename T, typename... Args>
inline void PrintLog(const int level, T &&head, Args &&... tail) {
std::cerr << std::forward<T>(head) << " ";
PrintLog(level, std::forward<Args>(tail)...);
}
int LogSetLevel(int level);
int LogGetLevel(void);
bool CheckLogLevel(int log_level_check);
#define AICPU_LOGD(fmt, ...) \
AICPU_LOG(AICPU_LOG_DEBUG, "%s:%s:%d[tid:%lu]:" #fmt, __FUNCTION__, __FILE__, __LINE__, GetTid(), ##__VA_ARGS__);
#define AICPU_LOGI(fmt, ...) \
AICPU_LOG(AICPU_LOG_INFO, "%s:%s:%d[tid:%lu]:" #fmt, __FUNCTION__, __FILE__, __LINE__, GetTid(), ##__VA_ARGS__);
#define AICPU_LOGW(fmt, ...) \
AICPU_LOG(AICPU_LOG_WARN, "%s:%s:%d[tid:%lu]:" #fmt, __FUNCTION__, __FILE__, __LINE__, GetTid(), ##__VA_ARGS__);
#define AICPU_LOGE(fmt, ...) \
AICPU_LOG(AICPU_LOG_ERROR, "%s:%s:%d[tid:%lu]:" #fmt, __FUNCTION__, __FILE__, __LINE__, GetTid(), ##__VA_ARGS__);
#define AICPU_LOGEVENT(fmt, ...) \
AICPU_LOG(AICPU_LOG_EVENT, "%s:%s:%d[tid:%lu]:" #fmt, __FUNCTION__, __FILE__, __LINE__, GetTid(), ##__VA_ARGS__);
#define AICPU_LOG(level, fmt, ...) \
do { \
if (aicpu::CheckLogLevel(level)) { \
aicpu::PrintLog(level, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \
} \
} while (LOG_COUNT != 0)
#define AICPU_CHK_STATUS_RET(expr...) \
do { \
const uint32_t status = (expr); \
if (status != AICPU_KERNEL_STATE_SUCCESS) { \
return status; \
} \
} while (0);
} // namespace aicpu
#endif // AICPU_OPS_AICPU_COMMON_KERNEL_LOG_H_

View File

@ -0,0 +1,22 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef AICPU_OPS_AICPU_COMMON_KERNEL_UTIL_H_
#define AICPU_OPS_AICPU_COMMON_KERNEL_UTIL_H_
#ifndef AICPU_VISIBILITY_API
#define AICPU_VISIBILITY_API __attribute__((visibility("default")))
#endif
#endif // AICPU_OPS_AICPU_COMMON_KERNEL_UTIL_H_

View File

@ -0,0 +1,41 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef AICPU_OPS_COMMON_TENSOR_H_
#define AICPU_OPS_COMMON_TENSOR_H_
#include <atomic>
#include <memory>
#include <string>
#include <vector>
#include <map>
namespace aicpu {
namespace ms {
class Tensor {
public:
Tensor() = default;
~Tensor() = default;
const uint8_t *GetData() const;
size_t GetSize() const;
void SetData(uint8_t *data, size_t size);
private:
uint8_t *tensor_ptr_;
size_t tensor_len_;
};
} // namespace ms
} // namespace aicpu
#endif // AICPU_OPS_COMMON_TENSOR_H_

View File

@ -0,0 +1,280 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "./random_choice_with_mask_kernels.h"
#include <random>
#include <climits>
#include <vector>
#include <algorithm>
#include <string>
#include "aicpu_sharder/aicpu_sharder.h"
#include "proto/aicpu_tensor.pb.h"
#include "common/distinct_uniform_int_distribution.h"
#include "common/tensor.h"
namespace aicpu {
static void ParseOutputCoordinate(std::vector<int64_t> dims_, int32_t output_length, int32_t input_dim_size,
int32_t input_total_count, const int *tmp_output, int *output) {
int it = 0;
int column = input_total_count / dims_[0];
for (int i = 0; i < output_length; i++) {
int32_t tmp_output_number = tmp_output[i];
int tmp_column = column;
for (int j = 0; j < input_dim_size; j++) {
if (j == input_dim_size - 1) {
output[it++] = tmp_output_number;
continue;
}
output[it++] = tmp_output_number / column;
tmp_output_number = tmp_output_number % column;
tmp_column = tmp_column / dims_[j + 1];
}
}
}
static void GetOutputLength(bool *padding_flag, int32_t *output_length, int32_t *output_non_zero_length, int32_t count,
int32_t non_zero_num) {
if (count == 0) {
*padding_flag = false;
*output_length = non_zero_num;
*output_non_zero_length = non_zero_num;
} else if (count > 0 && count <= non_zero_num) {
*padding_flag = false;
*output_length = count;
*output_non_zero_length = count;
} else if (count > non_zero_num) {
*padding_flag = true;
*output_length = count;
*output_non_zero_length = non_zero_num;
} else {
AICPU_LOGI("input count must greater or equal to 0 but instead is %d", count);
}
}
static bool GetInputTotalCount(const std::vector<int64_t> &dims_, int32_t *input_total_count,
const int32_t &input_dim_size) {
const int32_t max_inpu_dim = 5;
if (input_dim_size < 1 || input_dim_size > max_inpu_dim) {
AICPU_LOGE(
"input dim size is %d, it must greater or equal to 1 channels "
"and less than or equal to 5 channels!",
input_dim_size);
return false;
}
for (int32_t i = 0; i < input_dim_size; i++) {
*input_total_count *= dims_[i];
}
if (*input_total_count <= 0) {
AICPU_LOGE("input_total_count is %d, please check setting.", *input_total_count);
return false;
}
return true;
}
static void UpdateOutput(const std::vector<int64_t> &dims_, const int32_t &non_zero_num, const int32_t &count_,
const int32_t &output_length, const int *mask_dim, int32_t *output_coordinate, bool *mask) {
for (int32_t i = non_zero_num * dims_.size(); i < static_cast<int32_t>(count_ * dims_.size()); i++) {
output_coordinate[i] = 0;
}
for (int32_t i = 0; i < output_length; i++) {
mask[i] = static_cast<bool>(mask_dim[i]);
}
for (int32_t i = non_zero_num; i < count_; i++) {
mask[i] = false;
}
}
static bool GenerateRandomMask(const int32_t &output_length, const int32_t &non_zero_num,
const int32_t &output_non_zero_length, int **input_dim, int **tmp_output,
int **mask_dim) {
*tmp_output = reinterpret_cast<int *>(malloc(output_length * sizeof(int)));
if (*tmp_output == nullptr) {
AICPU_LOGE("malloc memory failed!");
free(*input_dim);
return false;
}
std::random_device rd;
std::mt19937 gen(rd());
aicpu::distinct_uniform_int_distribution<> dis(0, non_zero_num - 1);
*mask_dim = reinterpret_cast<int *>(malloc(output_length * sizeof(int)));
if (*mask_dim == nullptr) {
AICPU_LOGE("malloc memory failed!");
free(*input_dim);
free(*tmp_output);
return false;
}
if (memset_s(*mask_dim, output_length, 0x00, output_length) != EOK) {
AICPU_LOGE("memset_s to mask_dim failed!");
free(*input_dim);
free(*tmp_output);
free(*mask_dim);
return false;
}
if (memset_s(*tmp_output, output_length, 0x00, output_length) != EOK) {
AICPU_LOGE("memset_s to tmp_output failed!");
free(*input_dim);
free(*tmp_output);
free(*mask_dim);
return false;
}
if (output_non_zero_length > output_length) {
AICPU_LOGE("output_non_zero_length size is too long!");
free(*input_dim);
free(*tmp_output);
free(*mask_dim);
return false;
}
for (int32_t i = 0; i < output_non_zero_length; i++) {
int32_t mean = dis.exec(&gen);
*((*tmp_output) + i) = *((*input_dim) + mean);
*((*mask_dim) + i) = 1;
}
return true;
}
uint32_t RandomChoiceWithMaskKernel::DoCompute() {
auto *input = reinterpret_cast<bool *>(io_addrs_[0]);
auto *output_coordinate = reinterpret_cast<int32_t *>(io_addrs_[1]);
auto *mask = reinterpret_cast<bool *>(io_addrs_[2]);
int32_t input_dim_size = dims_.size();
int32_t non_zero_num = 0;
int32_t input_total_count = 1;
bool ret = GetInputTotalCount(dims_, &input_total_count, input_dim_size);
if (!ret) {
AICPU_LOGE("Get input total count failed!");
return AICPU_KERNEL_STATE_INTERNAL_ERROR;
}
int *input_dim = reinterpret_cast<int *>(malloc(input_total_count * sizeof(int)));
if (input_dim == nullptr) {
AICPU_LOGE("Malloc memory failed!");
return AICPU_KERNEL_STATE_INTERNAL_ERROR;
}
for (int32_t i = 0; i < input_total_count; i++) {
if (input[i] != 0) {
input_dim[non_zero_num] = i;
non_zero_num++;
}
}
bool padding_flag = false;
int32_t output_length = 0;
int32_t output_non_zero_length = 0;
GetOutputLength(&padding_flag, &output_length, &output_non_zero_length, count_, non_zero_num);
int *tmp_output = nullptr;
int *mask_dim = nullptr;
ret = GenerateRandomMask(output_length, non_zero_num, output_non_zero_length, &input_dim, &tmp_output, &mask_dim);
if (!ret) {
AICPU_LOGE("Generate random mask failed!");
return AICPU_KERNEL_STATE_INTERNAL_ERROR;
}
if (padding_flag) {
int32_t index = 0;
for (int32_t i = output_length - 1; i > non_zero_num; i--) {
tmp_output[non_zero_num + index] = 0;
mask_dim[non_zero_num + index] = 0;
index++;
}
}
int32_t copy_output_length = 0;
if (output_length * input_dim_size >= INT_MAX || output_length * input_dim_size < 0) {
AICPU_LOGE("Output size exceed INT_MAX");
free(input_dim);
free(tmp_output);
free(mask_dim);
return AICPU_KERNEL_STATE_INTERNAL_ERROR;
}
copy_output_length = output_length * input_dim_size;
int *output = reinterpret_cast<int *>(malloc(copy_output_length * sizeof(int)));
if (output == nullptr) {
AICPU_LOGE("malloc memory failed!");
free(input_dim);
free(tmp_output);
free(mask_dim);
return AICPU_KERNEL_STATE_INTERNAL_ERROR;
}
if (memset_s(output, copy_output_length, 0x00, copy_output_length) != EOK) {
AICPU_LOGE("memset_s memory failed!");
free(input_dim);
free(mask_dim);
free(tmp_output);
free(output);
return AICPU_KERNEL_STATE_INTERNAL_ERROR;
}
ParseOutputCoordinate(dims_, output_length, input_dim_size, input_total_count, tmp_output, output);
int32_t actual_output_length = count_ * dims_.size();
copy_output_length = std::min(actual_output_length, copy_output_length);
int32_t copy_output_bytes = 0;
if (INT_MAX / static_cast<int>(sizeof(int32_t)) < copy_output_length) {
AICPU_LOGE("The output length is out of range!");
free(input_dim);
free(mask_dim);
free(tmp_output);
free(output);
return AICPU_KERNEL_STATE_INTERNAL_ERROR;
}
copy_output_bytes = copy_output_length * sizeof(int32_t);
memcpy_s(output_coordinate, copy_output_bytes, output, copy_output_bytes);
UpdateOutput(dims_, non_zero_num, count_, output_length, mask_dim, output_coordinate, mask);
AICPU_LOGI("no zero num is %d, output_length is %d ", non_zero_num, output_length);
UpdateOutputShapeValue(non_zero_num, output_length);
free(input_dim);
free(mask_dim);
free(tmp_output);
free(output);
return AICPU_KERNEL_STATE_SUCCESS;
}
void RandomChoiceWithMaskKernel::UpdateOutputShapeValue(int32_t non_zero_num, int32_t output_length) {
if (unknow_shape_) {
output_shape_and_type_[0]->dims[0] = non_zero_num;
output_shape_and_type_[1]->dims[0] = output_length;
}
}
uint32_t RandomChoiceWithMaskKernel::ParseKernelParam() {
::google::protobuf::Map<::std::string, ::aicpuops::AttrValue> nodedef_map = node_def_.attrs();
aicpuops::AttrValue random_choice_count_attrs = nodedef_map["count"];
count_ = random_choice_count_attrs.i();
AICPU_LOGI("This op attr count is %d", count_);
if ((count_ == 0) && (!unknow_shape_)) {
AICPU_LOGE("This op attr count is 0, but the shapetype is %d", unknow_shape_);
return AICPU_KERNEL_STATE_PARAM_INVALID;
}
size_t inputs_size = node_def_.inputs_size();
for (size_t i = 0; i < inputs_size; i++) {
aicpuops::Tensor input_tensor = node_def_.inputs(i);
aicpuops::TensorShape input_shape = input_tensor.tensor_shape();
for (int j = 0; j < input_shape.dim_size(); j++) {
dims_.push_back(input_shape.dim(j).size());
}
}
return AICPU_KERNEL_STATE_SUCCESS;
}
} // namespace aicpu
extern "C" {
__attribute__((visibility("default"))) uint32_t RandomChoiceWithMask(void *param) {
aicpu::RandomChoiceWithMaskKernel randomChoiceWithMaskKernel;
return randomChoiceWithMaskKernel.Compute(param);
}
}

View File

@ -0,0 +1,36 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef AICPU_OPS_AICPU_RANDOM_CHOICE_WITH_MASK_KERNELS_H_
#define AICPU_OPS_AICPU_RANDOM_CHOICE_WITH_MASK_KERNELS_H_
#include <vector>
#include "common/kernel_base.h"
namespace aicpu {
class RandomChoiceWithMaskKernel : public KernelBase {
public:
RandomChoiceWithMaskKernel() : KernelBase("RandomChoiceWithMask") {}
~RandomChoiceWithMaskKernel() = default;
protected:
int32_t count_ = 0;
std::vector<int64_t> dims_;
uint32_t DoCompute() override;
uint32_t ParseKernelParam() override;
void UpdateOutputShapeValue(int32_t non_zero_num, int32_t output_length);
};
} // namespace aicpu
#endif // AICPU_OPS_AICPU_RANDOM_CHOICE_WITH_MASK_KERNELS_H_