!26853 Mindspore support aicpu ops compile.
Merge pull request !26853 from linqingke/aicpu
This commit is contained in:
commit
973b8ffec9
|
@ -13,6 +13,11 @@ set(FBS_FILES
|
|||
)
|
||||
ms_build_flatbuffers(FBS_FILES ${CMAKE_CURRENT_SOURCE_DIR}../../schema generated_fbs_files ${SERVER_FLATBUFFER_OUTPUT})
|
||||
|
||||
if(ENABLE_D)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/backend/kernel_compiler/aicpu/aicpu_ops)
|
||||
add_subdirectory(backend/kernel_compiler/aicpu/aicpu_ops)
|
||||
endif()
|
||||
|
||||
if(ENABLE_CPU)
|
||||
if(${CMAKE_HOST_SYSTEM_PROCESSOR} MATCHES "aarch64")
|
||||
set(PLATFORM_ARM64 "on")
|
||||
|
|
|
@ -18,6 +18,10 @@ if(ENABLE_D)
|
|||
"rts/*.cc"
|
||||
"hccl/*.cc"
|
||||
)
|
||||
file(GLOB_RECURSE AICPU_OPS_SRC RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
"aicpu/aicpu_ops/*.cc"
|
||||
)
|
||||
list(REMOVE_ITEM D_SRC_LIST ${AICPU_OPS_SRC})
|
||||
add_compile_definitions(ENABLE_D)
|
||||
endif()
|
||||
|
||||
|
|
|
@ -0,0 +1,64 @@
|
|||
set(NORMAL_CMAKE_C_COMPILER ${CMAKE_C_COMPILER})
|
||||
set(NORMAL_CMAKE_CXX_COMPILER ${CMAKE_CXX_COMPILER})
|
||||
if(DEFINED ENV{ASCEND_CUSTOM_PATH})
|
||||
set(TOOLCHAIN_PATH $ENV{ASCEND_CUSTOM_PATH}/toolkit/toolchain)
|
||||
else()
|
||||
set(TOOLCHAIN_PATH /usr/local/Ascend/toolkit/toolchain)
|
||||
endif()
|
||||
set(CMAKE_C_COMPILER ${TOOLCHAIN_PATH}/hcc/bin/aarch64-target-linux-gnu-gcc)
|
||||
set(CMAKE_CXX_COMPILER ${TOOLCHAIN_PATH}/hcc/bin/aarch64-target-linux-gnu-g++)
|
||||
|
||||
if(EXISTS ${CMAKE_C_COMPILER} AND EXISTS ${CMAKE_CXX_COMPILER})
|
||||
set(AICPU_PROTO_SRC
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/aicpu_op_proto/aicpu_tensor.proto
|
||||
)
|
||||
|
||||
ms_protobuf_generate(PROTO_SRCS PROTO_HDRS ${AICPU_PROTO_SRC})
|
||||
|
||||
set(AICPU_SRC
|
||||
${PROTO_SRCS}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/common/kernel_base.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/common/kernel_log.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/aicpu_sharder/aicpu_async_event.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/aicpu_sharder/aicpu_context.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/aicpu_sharder/aicpu_pulse.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/aicpu_sharder/aicpu_sharder.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/random_choice_with_mask_kernels.cc
|
||||
)
|
||||
|
||||
add_library(aicpu_kernels SHARED
|
||||
${AICPU_SRC}
|
||||
)
|
||||
|
||||
target_compile_options(aicpu_kernels PRIVATE
|
||||
-march=armv8-a
|
||||
-O2
|
||||
-fvisibility-inlines-hidden
|
||||
-fvisibility=hidden
|
||||
-fno-strict-aliasing
|
||||
-fno-common
|
||||
)
|
||||
|
||||
target_link_libraries(aicpu_kernels PRIVATE
|
||||
-ldl
|
||||
-shared
|
||||
PUBLIC
|
||||
${SECUREC_LIBRARY}
|
||||
-Wl,--whole-archive
|
||||
-Wl,--no-whole-archive
|
||||
-Wl,-Bsymbolic
|
||||
-rdynamic
|
||||
mindspore::protobuf
|
||||
-pthread
|
||||
)
|
||||
|
||||
set(INSTALL_LIBRARY_DIR lib)
|
||||
install(TARGETS aicpu_kernels OPTIONAL
|
||||
EXPORT aicpu_kernels-targets
|
||||
LIBRARY DESTINATION ${INSTALL_LIBRARY_DIR}
|
||||
)
|
||||
|
||||
endif()
|
||||
|
||||
set(CMAKE_C_COMPILER ${NORMAL_CMAKE_C_COMPILER})
|
||||
set(CMAKE_CXX_COMPILER ${NORMAL_CMAKE_CXX_COMPILER})
|
|
@ -0,0 +1,118 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
syntax = "proto3";
|
||||
package aicpuops;
|
||||
|
||||
message AttrValue {
|
||||
|
||||
message ArrayValue {
|
||||
repeated bytes s = 2; //"array(string)"
|
||||
repeated int64 i = 3 [ packed = true ]; //"array(int)"
|
||||
repeated float f = 4 [ packed = true ]; //"array(float)"
|
||||
repeated bool b = 5 [ packed = true ]; //"array(bool)"
|
||||
repeated int32 type = 6 [ packed = true ]; //"array(type)"
|
||||
repeated TensorShape shape = 7; //"array(shape)"
|
||||
repeated Tensor tensor = 8; //"array(tensor)"
|
||||
}
|
||||
|
||||
oneof value {
|
||||
ArrayValue array = 1;
|
||||
bytes s = 2; //"string"
|
||||
int64 i = 3; //"int"
|
||||
float f = 4; //"float"
|
||||
bool b = 5; //"bool"
|
||||
int32 type = 6; //"type"
|
||||
TensorShape shape = 7; //"shape"
|
||||
Tensor tensor = 8; //"tensor"
|
||||
}
|
||||
}
|
||||
|
||||
message DynamicIdx {
|
||||
int32 idx = 1;
|
||||
int32 num = 2;
|
||||
}
|
||||
|
||||
message NodeDef {
|
||||
string op = 2;
|
||||
map<string, AttrValue> attrs = 3;
|
||||
repeated Tensor inputs = 4;
|
||||
repeated Tensor outputs = 5;
|
||||
map<string, DynamicIdx> dym_inputs = 6;
|
||||
map<string, DynamicIdx> dym_outputs = 7;
|
||||
}
|
||||
|
||||
message TensorShape {
|
||||
// One dimension of the tensor.
|
||||
message Dim {
|
||||
// size must >=0
|
||||
int64 size = 1;
|
||||
};
|
||||
|
||||
// group dim info
|
||||
repeated Dim dim = 2;
|
||||
|
||||
// If true, the number of dimensions in the shape is unknown.
|
||||
// If true, "dim.size()" must be 0.
|
||||
bool unknown_rank = 3;
|
||||
|
||||
// data format "NHWC" "NCHW" "NC1HWC0" OR "NONE"
|
||||
int32 data_format = 4;
|
||||
};
|
||||
|
||||
message Tensor {
|
||||
|
||||
// tensor shape info
|
||||
TensorShape tensor_shape = 1;
|
||||
|
||||
// tensor content data type
|
||||
int32 tensor_type = 2;
|
||||
|
||||
// tensor memory device
|
||||
// data located memory device , "DDR" "HBM" OR "NONE"
|
||||
string mem_device = 3;
|
||||
string name = 4;
|
||||
uint64 data_ptr = 5;
|
||||
uint64 data_size = 6;
|
||||
}
|
||||
|
||||
enum DataType {
|
||||
MS_FLOAT32 = 0;
|
||||
MS_FLOAT16 = 1;
|
||||
MS_INT8 = 2;
|
||||
MS_INT32 = 3;
|
||||
MS_UINT8 = 4;
|
||||
MS_INT16 = 6;
|
||||
MS_UINT16 = 7;
|
||||
MS_UINT32 = 8;
|
||||
MS_INT64 = 9;
|
||||
MS_UINT64 = 10;
|
||||
MS_FLOAT64 = 11;
|
||||
MS_BOOL = 12;
|
||||
MS_STRING = 13;
|
||||
MS_DUAL_SUB_INT8 = 14;
|
||||
MS_DUAL_SUB_UINT8 = 15;
|
||||
MS_COMPLEX64 = 16;
|
||||
MS_COMPLEX128 = 17;
|
||||
MS_QINT8 = 18;
|
||||
MS_QINT16 = 19;
|
||||
MS_QINT32 = 20;
|
||||
MS_QUINT8 = 21;
|
||||
MS_QUINT16 = 22;
|
||||
MS_RESOURCE = 23;
|
||||
MS_STRING_REF = 24;
|
||||
MS_DUAL = 25;
|
||||
MS_UNKNOWN = 26;
|
||||
}
|
|
@ -0,0 +1,137 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "aicpu_sharder/aicpu_async_event.h"
|
||||
#include <string>
|
||||
#include "common/kernel_log.h"
|
||||
#include "aicpu_sharder/aicpu_context.h"
|
||||
|
||||
namespace aicpu {
|
||||
AsyncEventManager &AsyncEventManager::GetInstance() {
|
||||
static AsyncEventManager async_event_manager;
|
||||
return async_event_manager;
|
||||
}
|
||||
|
||||
void AsyncEventManager::Register(const NotifyFunc ¬ify) { notify_func_ = notify; }
|
||||
|
||||
void AsyncEventManager::NotifyWait(void *notify_param, const uint32_t param_len) {
|
||||
if (notify_func_ != nullptr) {
|
||||
notify_func_(notify_param, param_len);
|
||||
}
|
||||
}
|
||||
|
||||
bool AsyncEventManager::GenTaskInfoFromCtx(AsyncTaskInfo *task_info) {
|
||||
if (task_info == nullptr) {
|
||||
AICPU_LOGE("AsyncEventManager GenTaskInfoFromCtx failed, task_info is nullptr.");
|
||||
return false;
|
||||
}
|
||||
(void)aicpu::GetTaskAndStreamId(&task_info->task_id, &task_info->stream_id);
|
||||
std::string wait_id_value;
|
||||
std::string ker_wait_id(aicpu::kContextKeyWaitId);
|
||||
auto status = aicpu::GetThreadLocalCtx(ker_wait_id, &wait_id_value);
|
||||
if (status != aicpu::AICPU_ERROR_NONE) {
|
||||
AICPU_LOGE("GetThreadLocalCtx failed, ret=%d, key=%s.", status, ker_wait_id.c_str());
|
||||
return false;
|
||||
}
|
||||
task_info->wait_id = atoi(wait_id_value.c_str());
|
||||
std::string wait_type_value;
|
||||
std::string key_wait_type(aicpu::kContextKeyWaitType);
|
||||
status = aicpu::GetThreadLocalCtx(key_wait_type, &wait_type_value);
|
||||
if (status != aicpu::AICPU_ERROR_NONE) {
|
||||
AICPU_LOGE("GetThreadLocalCtx failed, ret=%d, key=%s.", status, key_wait_type.c_str());
|
||||
return false;
|
||||
}
|
||||
task_info->wait_type = atoi(wait_type_value.c_str());
|
||||
std::string start_tick_value;
|
||||
std::string key_start_tick(aicpu::kContextKeyStartTick);
|
||||
status = aicpu::GetThreadLocalCtx(key_start_tick, &start_tick_value);
|
||||
if (status != aicpu::AICPU_ERROR_NONE) {
|
||||
AICPU_LOGE("GetThreadLocalCtx failed, ret=%d, key=%s.", status, key_start_tick.c_str());
|
||||
return false;
|
||||
}
|
||||
task_info->start_tick = atol(start_tick_value.c_str());
|
||||
status = aicpu::GetOpname(aicpu::GetAicpuThreadIndex(), &task_info->op_name);
|
||||
if (status != aicpu::AICPU_ERROR_NONE) {
|
||||
AICPU_LOGE("GetOpname failed, ret=%d.", status);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AsyncEventManager::RegEventCb(const uint32_t event_id, const uint32_t sub_event_id,
|
||||
const EventProcessCallBack &cb) {
|
||||
if (cb == nullptr) {
|
||||
AICPU_LOGE("AsyncEventManager RegEventCb failed, cb is nullptr.");
|
||||
return false;
|
||||
}
|
||||
AsyncTaskInfo task_info;
|
||||
task_info.task_cb = cb;
|
||||
if (!GenTaskInfoFromCtx(&task_info)) {
|
||||
AICPU_LOGE("AsyncEventManager GenTaskInfoFromCtx failed.");
|
||||
return false;
|
||||
}
|
||||
AsyncEventInfo info;
|
||||
info.event_id = event_id;
|
||||
info.sub_event_id = sub_event_id;
|
||||
{
|
||||
std::unique_lock<std::mutex> lk(map_mutex_);
|
||||
auto iter = asyncTaskMap_.find(info);
|
||||
if (iter != asyncTaskMap_.end()) {
|
||||
AICPU_LOGE("AsyncEventManager RegEventCb failed.");
|
||||
return false;
|
||||
}
|
||||
asyncTaskMap_[info] = task_info;
|
||||
}
|
||||
|
||||
AICPU_LOGI(
|
||||
"AsyncEventManager RegEventCb success, event_id[%u], subeventId[%u], taskId[%lu],"
|
||||
" streamId[%u], waitType[%u], waitId[%u], opName[%s], startTick[%lu].",
|
||||
event_id, sub_event_id, task_info.task_id, task_info.stream_id, task_info.wait_type, task_info.wait_id,
|
||||
task_info.op_name.c_str(), task_info.start_tick);
|
||||
return true;
|
||||
}
|
||||
|
||||
void AsyncEventManager::ProcessEvent(const uint32_t event_id, const uint32_t sub_event_id, void *param) {
|
||||
AICPU_LOGI("AsyncEventManager proc event_id = %d, sub_event_id = %d", event_id, sub_event_id);
|
||||
AsyncEventInfo info;
|
||||
info.event_id = event_id;
|
||||
info.sub_event_id = sub_event_id;
|
||||
EventProcessCallBack taskCb = nullptr;
|
||||
{
|
||||
std::unique_lock<std::mutex> lk(map_mutex_);
|
||||
auto iter = asyncTaskMap_.find(info);
|
||||
if (iter == asyncTaskMap_.end()) {
|
||||
AICPU_LOGW("AsyncEventManager no async task to deal with.");
|
||||
return;
|
||||
}
|
||||
taskCb = iter->second.task_cb;
|
||||
asyncTaskMap_.erase(iter);
|
||||
}
|
||||
if (taskCb != nullptr) {
|
||||
taskCb(param);
|
||||
}
|
||||
AICPU_LOGI("AsyncEventManager proc end!");
|
||||
return;
|
||||
}
|
||||
} // namespace aicpu
|
||||
|
||||
void AicpuNotifyWait(void *notify_param, const uint32_t param_len) {
|
||||
aicpu::AsyncEventManager::GetInstance().NotifyWait(notify_param, param_len);
|
||||
return;
|
||||
}
|
||||
|
||||
bool AicpuRegEventCb(const uint32_t event_id, const uint32_t sub_event_id, const aicpu::EventProcessCallBack &cb) {
|
||||
return aicpu::AsyncEventManager::GetInstance().RegEventCb(event_id, sub_event_id, cb);
|
||||
}
|
|
@ -0,0 +1,144 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef AICPU_OPS_AICPU_ASYNC_EVENT_H_
|
||||
#define AICPU_OPS_AICPU_ASYNC_EVENT_H_
|
||||
|
||||
#include <functional>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
#include "aicpu_sharder/aicpu_context.h"
|
||||
|
||||
namespace aicpu {
|
||||
using NotifyFunc = std::function<void(void *param, const uint32_t param_len)>;
|
||||
using EventProcessCallBack = std::function<void(void *param)>;
|
||||
|
||||
struct AsyncEventInfo {
|
||||
uint32_t event_id;
|
||||
uint32_t sub_event_id;
|
||||
|
||||
bool operator==(const AsyncEventInfo &info) {
|
||||
return (event_id == info.event_id) && (sub_event_id == info.sub_event_id);
|
||||
}
|
||||
};
|
||||
|
||||
inline bool operator<(const AsyncEventInfo &info1, const AsyncEventInfo &info2) {
|
||||
return (info1.event_id < info2.event_id) ||
|
||||
((info1.event_id == info2.event_id) && (info1.sub_event_id < info2.sub_event_id));
|
||||
}
|
||||
|
||||
struct AsyncTaskInfo {
|
||||
uint64_t start_tick;
|
||||
std::string op_name;
|
||||
uint8_t wait_type;
|
||||
uint32_t wait_id;
|
||||
uint64_t task_id;
|
||||
uint32_t stream_id;
|
||||
EventProcessCallBack task_cb;
|
||||
};
|
||||
|
||||
struct AsyncNotifyInfo {
|
||||
uint8_t wait_type;
|
||||
uint32_t wait_id;
|
||||
uint64_t task_id;
|
||||
uint32_t stream_id;
|
||||
uint32_t ret_code;
|
||||
aicpu::aicpuContext_t ctx;
|
||||
};
|
||||
|
||||
class AsyncEventManager {
|
||||
public:
|
||||
/**
|
||||
* Get the unique object of this class
|
||||
*/
|
||||
static AsyncEventManager &GetInstance();
|
||||
|
||||
/**
|
||||
* Register notify callback function
|
||||
* @param notify wait notify callback function
|
||||
*/
|
||||
void Register(const NotifyFunc ¬ify);
|
||||
|
||||
/**
|
||||
* Notify wait task
|
||||
* @param notify_param notify param info
|
||||
* @param param_len notify_param len
|
||||
*/
|
||||
void NotifyWait(void *notify_param, const uint32_t param_len);
|
||||
|
||||
/**
|
||||
* Register Event callback function, async op call
|
||||
* @param eventID EventId
|
||||
* @param sub_event_id queue id
|
||||
* @param cb Event callback function
|
||||
* @return whether register success
|
||||
*/
|
||||
bool RegEventCb(const uint32_t event_id, const uint32_t sub_event_id, const EventProcessCallBack &cb);
|
||||
|
||||
/**
|
||||
* Process event
|
||||
* @param event_id EventId
|
||||
* @param sub_event_id queue id
|
||||
* @param param event param
|
||||
*/
|
||||
void ProcessEvent(const uint32_t event_id, const uint32_t sub_event_id, void *param = nullptr);
|
||||
|
||||
private:
|
||||
AsyncEventManager() : notify_func_(nullptr) {}
|
||||
~AsyncEventManager() = default;
|
||||
|
||||
AsyncEventManager(const AsyncEventManager &) = delete;
|
||||
AsyncEventManager &operator=(const AsyncEventManager &) = delete;
|
||||
AsyncEventManager(AsyncEventManager &&) = delete;
|
||||
AsyncEventManager &operator=(AsyncEventManager &&) = delete;
|
||||
|
||||
// generate task info from ctx
|
||||
bool GenTaskInfoFromCtx(AsyncTaskInfo *task_info);
|
||||
|
||||
// wait notify function
|
||||
NotifyFunc notify_func_;
|
||||
|
||||
std::mutex map_mutex_;
|
||||
|
||||
std::map<AsyncEventInfo, AsyncTaskInfo> asyncTaskMap_;
|
||||
};
|
||||
} // namespace aicpu
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
/**
|
||||
* Notify wait task
|
||||
* @param notify_param notify info
|
||||
* @param param_len
|
||||
*/
|
||||
__attribute__((weak)) void AicpuNotifyWait(void *notify_param, const uint32_t param_len);
|
||||
|
||||
/**
|
||||
* Register Event callback function, async op call
|
||||
* @param info Registered event information
|
||||
* @param cb Event callback function
|
||||
* @return whether register success
|
||||
*/
|
||||
__attribute__((weak)) bool AicpuRegEventCb(const uint32_t event_id, const uint32_t sub_event_id,
|
||||
const aicpu::EventProcessCallBack &cb);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif // AICPU_OPS_AICPU_ASYNC_EVENT_H_
|
|
@ -0,0 +1,308 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "aicpu_sharder/aicpu_context.h"
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <thread>
|
||||
#include <utility>
|
||||
|
||||
#include "common/kernel_log.h"
|
||||
|
||||
namespace {
|
||||
// current thread context
|
||||
aicpu::aicpuContext_t g_cur_ctx;
|
||||
// task monitor context
|
||||
std::unique_ptr<std::string[]> g_opsname(nullptr);
|
||||
thread_local uint32_t g_thread_index = UINT32_MAX;
|
||||
uint32_t g_aicpu_core_cnt = 0;
|
||||
thread_local std::map<std::string, std::string> g_thread_local_ctx;
|
||||
thread_local aicpu::streamAndTaskId_t g_stream_and_task_id;
|
||||
// aicpu run mode
|
||||
uint32_t g_run_mode = aicpu::AicpuRunMode::THREAD_MODE;
|
||||
|
||||
// context info
|
||||
std::mutex default_mutex;
|
||||
std::vector<std::map<std::string, std::string>> g_default_thread_ctx;
|
||||
std::mutex prof_mutex;
|
||||
std::vector<std::map<std::string, std::string>> g_prof_thread_ctx;
|
||||
std::mutex debug_mutex;
|
||||
std::vector<std::map<std::string, std::string>> g_debug_thread_ctx;
|
||||
std::mutex func_map_mutex;
|
||||
std::map<uint32_t, std::map<uint32_t, std::function<void(void *)>>> g_func_map;
|
||||
|
||||
std::map<std::string, std::string> &GetThreadCtx(aicpu::CtxType type, uint32_t thread_index) {
|
||||
if (type == aicpu::CTX_DEBUG) {
|
||||
std::unique_lock<std::mutex> mutex(default_mutex);
|
||||
if (thread_index >= g_debug_thread_ctx.size()) {
|
||||
g_debug_thread_ctx.resize(thread_index + 1);
|
||||
}
|
||||
return g_debug_thread_ctx[thread_index];
|
||||
} else if (type == aicpu::CTX_PROF) {
|
||||
std::unique_lock<std::mutex> mutex(prof_mutex);
|
||||
if (thread_index >= g_prof_thread_ctx.size()) {
|
||||
g_prof_thread_ctx.resize(thread_index + 1);
|
||||
}
|
||||
return g_prof_thread_ctx[thread_index];
|
||||
} else {
|
||||
std::unique_lock<std::mutex> mutex(debug_mutex);
|
||||
if (thread_index >= g_default_thread_ctx.size()) {
|
||||
g_default_thread_ctx.resize(thread_index + 1);
|
||||
}
|
||||
return g_default_thread_ctx[thread_index];
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
namespace aicpu {
|
||||
status_t aicpuSetContext(aicpuContext_t *ctx) {
|
||||
g_cur_ctx = *ctx;
|
||||
return AICPU_ERROR_NONE;
|
||||
}
|
||||
|
||||
status_t aicpuGetContext(aicpuContext_t *ctx) {
|
||||
*ctx = g_cur_ctx;
|
||||
return AICPU_ERROR_NONE;
|
||||
}
|
||||
|
||||
status_t InitTaskMonitorContext(uint32_t aicpu_core_cnt) {
|
||||
if (aicpu_core_cnt == 0) {
|
||||
AICPU_LOGE("invalid aicpu core count[%u]", aicpu_core_cnt);
|
||||
return AICPU_ERROR_FAILED;
|
||||
}
|
||||
g_aicpu_core_cnt = aicpu_core_cnt;
|
||||
AICPU_LOGI("aicpu core count[%u]", aicpu_core_cnt);
|
||||
g_opsname.reset(new (std::nothrow) std::string[aicpu_core_cnt]);
|
||||
if (g_opsname == nullptr) {
|
||||
AICPU_LOGE("malloc ops name memory for task monitor failed");
|
||||
return AICPU_ERROR_FAILED;
|
||||
}
|
||||
for (uint32_t index = 0; index < aicpu_core_cnt; ++index) {
|
||||
g_opsname[index] = "null";
|
||||
}
|
||||
return AICPU_ERROR_NONE;
|
||||
}
|
||||
|
||||
status_t SetAicpuThreadIndex(uint32_t thread_index) {
|
||||
g_thread_index = thread_index;
|
||||
return AICPU_ERROR_NONE;
|
||||
}
|
||||
|
||||
uint32_t GetAicpuThreadIndex() { return g_thread_index; }
|
||||
|
||||
status_t SetOpname(const std::string &opname) {
|
||||
if (g_opsname != nullptr && g_thread_index < g_aicpu_core_cnt) {
|
||||
AICPU_LOGI("set op name to %s for thread[%u]", opname.c_str(), g_thread_index);
|
||||
g_opsname[g_thread_index] = opname;
|
||||
return AICPU_ERROR_NONE;
|
||||
}
|
||||
// maintenance function, if failed just print event log
|
||||
AICPU_LOGEVENT(
|
||||
"set op name[%s] failed, thread index[%u] should be less than total aicpu core count[%u],"
|
||||
" and ops name array addr[%p] cannot null",
|
||||
opname.c_str(), g_thread_index, g_aicpu_core_cnt, g_opsname.get());
|
||||
return AICPU_ERROR_NONE;
|
||||
}
|
||||
|
||||
status_t GetOpname(uint32_t thread_index, std::string *opname) {
|
||||
*opname = "null";
|
||||
if (g_opsname != nullptr && thread_index < g_aicpu_core_cnt) {
|
||||
*opname = g_opsname[thread_index];
|
||||
return AICPU_ERROR_NONE;
|
||||
}
|
||||
// maintenance function, if failed just print event log
|
||||
AICPU_LOGEVENT(
|
||||
"get op name failed, thread index[%u] should be less than total aicpu core count[%u],"
|
||||
" and ops name array addr[%p] cannot null",
|
||||
g_thread_index, g_aicpu_core_cnt, g_opsname.get());
|
||||
return AICPU_ERROR_NONE;
|
||||
}
|
||||
|
||||
status_t SetTaskAndStreamId(uint64_t task_id, uint32_t stream_id) {
|
||||
g_stream_and_task_id.task_id = task_id;
|
||||
g_stream_and_task_id.stream_id = stream_id;
|
||||
AICPU_LOGI("Set task_id:[%lu] and stream_id:[%u] success.", task_id, stream_id);
|
||||
return AICPU_ERROR_NONE;
|
||||
}
|
||||
|
||||
status_t GetTaskAndStreamId(uint64_t *task_id, uint32_t *stream_id) {
|
||||
*task_id = g_stream_and_task_id.task_id;
|
||||
*stream_id = g_stream_and_task_id.stream_id;
|
||||
AICPU_LOGI("Get task_id:[%lu] and stream_id:[%u] success.", *task_id, *stream_id);
|
||||
return AICPU_ERROR_NONE;
|
||||
}
|
||||
|
||||
status_t SetAicpuRunMode(uint32_t run_mode) {
|
||||
g_run_mode = run_mode;
|
||||
AICPU_LOGI("Set run_mode:[%u] success.", run_mode);
|
||||
return AICPU_ERROR_NONE;
|
||||
}
|
||||
|
||||
status_t GetAicpuRunMode(uint32_t *run_mode) {
|
||||
*run_mode = g_run_mode;
|
||||
AICPU_LOGI("Get run_mode:[%u] success.", *run_mode);
|
||||
return AICPU_ERROR_NONE;
|
||||
}
|
||||
|
||||
status_t SetThreadLocalCtx(const std::string &key, const std::string &value) {
|
||||
if (key.empty()) {
|
||||
AICPU_LOGE("set thread local context failed, key is empty");
|
||||
return AICPU_ERROR_FAILED;
|
||||
}
|
||||
try {
|
||||
g_thread_local_ctx[key] = value;
|
||||
} catch (std::exception &e) {
|
||||
AICPU_LOGE("set thread local context failed, %s", e.what());
|
||||
return AICPU_ERROR_FAILED;
|
||||
}
|
||||
return AICPU_ERROR_NONE;
|
||||
}
|
||||
|
||||
status_t GetThreadLocalCtx(const std::string &key, std::string *value) {
|
||||
if (key.empty()) {
|
||||
AICPU_LOGE("get thread local context failed, key is empty");
|
||||
return AICPU_ERROR_FAILED;
|
||||
}
|
||||
auto iter = g_thread_local_ctx.find(key);
|
||||
if (iter != g_thread_local_ctx.end()) {
|
||||
*value = iter->second;
|
||||
return AICPU_ERROR_NONE;
|
||||
}
|
||||
AICPU_LOGW("get thread local context failed, no such key[%s]", key.c_str());
|
||||
return AICPU_ERROR_FAILED;
|
||||
}
|
||||
|
||||
status_t RemoveThreadLocalCtx(const std::string &key) {
|
||||
auto iter = g_thread_local_ctx.find(key);
|
||||
if (iter != g_thread_local_ctx.end()) {
|
||||
g_thread_local_ctx.erase(iter);
|
||||
return AICPU_ERROR_NONE;
|
||||
}
|
||||
AICPU_LOGE("remove thread local context failed, no such key[%s]", key.c_str());
|
||||
return AICPU_ERROR_FAILED;
|
||||
}
|
||||
|
||||
const std::map<std::string, std::string> &GetAllThreadCtxInfo(aicpu::CtxType type, uint32_t thread_index) {
|
||||
AICPU_LOGI("Get all thread ctx info begin, thread index:%u", thread_index);
|
||||
auto &ctx = GetThreadCtx(type, thread_index);
|
||||
return ctx;
|
||||
}
|
||||
|
||||
status_t RegisterEventCallback(uint32_t event_id, uint32_t subevent_id, std::function<void(void *)> func) {
|
||||
std::lock_guard<std::mutex> lock(func_map_mutex);
|
||||
std::map<uint32_t, std::function<void(void *)>> &sub_map = g_func_map[event_id];
|
||||
auto it = sub_map.insert(std::make_pair(subevent_id, func));
|
||||
if (it.second == false) {
|
||||
AICPU_LOGE(
|
||||
"register event call function failed, repulicate register callback function by event_id[%u] "
|
||||
"subevent_id[%u]",
|
||||
event_id, subevent_id);
|
||||
return AICPU_ERROR_FAILED;
|
||||
}
|
||||
return AICPU_ERROR_NONE;
|
||||
}
|
||||
|
||||
status_t DoEventCallback(uint32_t event_id, uint32_t subevent_id, void *param) {
|
||||
std::lock_guard<std::mutex> lock(func_map_mutex);
|
||||
auto iter = g_func_map.find(event_id);
|
||||
if (iter == g_func_map.end()) {
|
||||
AICPU_LOGE("do event callback function failed, cannot find callback function by event_id[%u] subevent_id[%u]",
|
||||
event_id, event_id);
|
||||
return AICPU_ERROR_FAILED;
|
||||
}
|
||||
|
||||
std::map<uint32_t, std::function<void(void *)>> &sub_map = iter->second;
|
||||
auto sub_iter = sub_map.find(subevent_id);
|
||||
if (sub_iter == sub_map.end()) {
|
||||
AICPU_LOGE("do event callback function failed, cannot find callback function by event_id[%u] subevent_id[%u]",
|
||||
event_id, event_id);
|
||||
return AICPU_ERROR_FAILED;
|
||||
}
|
||||
(sub_iter->second)(param);
|
||||
// erase func after call
|
||||
sub_map.erase(sub_iter);
|
||||
return AICPU_ERROR_NONE;
|
||||
}
|
||||
|
||||
status_t UnRegisterCallback(uint32_t event_id, uint32_t subevent_id) {
|
||||
std::lock_guard<std::mutex> lock(func_map_mutex);
|
||||
auto iter = g_func_map.find(event_id);
|
||||
if (iter == g_func_map.end()) {
|
||||
AICPU_LOGEVENT(
|
||||
"skip unregister event callback function, cannot find callback function by event_id[%u] "
|
||||
"subevent_id[%u]",
|
||||
event_id, event_id);
|
||||
return AICPU_ERROR_NONE;
|
||||
}
|
||||
|
||||
std::map<uint32_t, std::function<void(void *)>> &sub_map = iter->second;
|
||||
auto sub_iter = sub_map.find(subevent_id);
|
||||
if (sub_iter == sub_map.end()) {
|
||||
AICPU_LOGEVENT(
|
||||
"skip unregister event callback function, cannot find callback function by event_id[%u] "
|
||||
"subevent_id[%u]",
|
||||
event_id, event_id);
|
||||
return AICPU_ERROR_NONE;
|
||||
}
|
||||
sub_map.erase(sub_iter);
|
||||
return AICPU_ERROR_NONE;
|
||||
}
|
||||
} // namespace aicpu
|
||||
|
||||
aicpu::status_t SetThreadCtxInfo(aicpu::CtxType type, const std::string &key, const std::string &value) {
|
||||
if (key.empty()) {
|
||||
AICPU_LOGE("Set thread context failed, context type[%d], key is empty", type);
|
||||
return aicpu::AICPU_ERROR_FAILED;
|
||||
}
|
||||
|
||||
auto &ctx = GetThreadCtx(type, g_thread_index);
|
||||
try {
|
||||
ctx[key] = value;
|
||||
} catch (std::exception &e) {
|
||||
AICPU_LOGE("Set thread context failed, context type[%d], %s", type, e.what());
|
||||
return aicpu::AICPU_ERROR_FAILED;
|
||||
}
|
||||
return aicpu::AICPU_ERROR_NONE;
|
||||
}
|
||||
|
||||
aicpu::status_t GetThreadCtxInfo(aicpu::CtxType type, const std::string &key, std::string *value) {
|
||||
if (key.empty()) {
|
||||
AICPU_LOGE("Get thread context failed, context type[%d], key is empty", type);
|
||||
return aicpu::AICPU_ERROR_FAILED;
|
||||
}
|
||||
|
||||
auto &ctx = GetThreadCtx(type, g_thread_index);
|
||||
auto iter = ctx.find(key);
|
||||
if (iter != ctx.end()) {
|
||||
*value = iter->second;
|
||||
return aicpu::AICPU_ERROR_NONE;
|
||||
}
|
||||
AICPU_LOGE("Get thread context failed, context type[%d], no such key[%s]", type, key.c_str());
|
||||
return aicpu::AICPU_ERROR_FAILED;
|
||||
}
|
||||
|
||||
aicpu::status_t RemoveThreadCtxInfo(aicpu::CtxType type, const std::string &key) {
|
||||
auto &ctx = GetThreadCtx(type, g_thread_index);
|
||||
auto iter = ctx.find(key);
|
||||
if (iter != ctx.end()) {
|
||||
ctx.erase(iter);
|
||||
return aicpu::AICPU_ERROR_NONE;
|
||||
}
|
||||
AICPU_LOGE("Remove thread context failed, context type[%d], no such key[%s]", type, key.c_str());
|
||||
return aicpu::AICPU_ERROR_FAILED;
|
||||
}
|
|
@ -0,0 +1,231 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef AICPU_OPS_AICPU_CONTEXT_H_
|
||||
#define AICPU_OPS_AICPU_CONTEXT_H_
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <functional>
|
||||
#include "common/kernel_util.h"
|
||||
|
||||
namespace aicpu {
|
||||
typedef struct {
|
||||
uint32_t device_id; // device id
|
||||
uint32_t tsId; // ts id
|
||||
pid_t host_pid; // host pid
|
||||
uint32_t vf_id; // vf id
|
||||
} aicpuContext_t;
|
||||
|
||||
enum AicpuRunMode : uint32_t {
|
||||
PROCESS_PCIE_MODE = 0, // 1910/1980/1951 dc, with host mode
|
||||
PROCESS_SOCKET_MODE = 1, // MDC
|
||||
THREAD_MODE = 2, // ctrlcpu/minirc/lhisi
|
||||
INVALID_MODE,
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
uint32_t stream_id;
|
||||
uint64_t task_id;
|
||||
} streamAndTaskId_t;
|
||||
|
||||
typedef enum {
|
||||
AICPU_ERROR_NONE = 0, // success
|
||||
AICPU_ERROR_FAILED = 1, // failed
|
||||
} status_t;
|
||||
|
||||
enum CtxType : int32_t { CTX_DEFAULT = 0, CTX_PROF, CTX_DEBUG };
|
||||
|
||||
constexpr auto kContextKeyOpName = "opname";
|
||||
constexpr auto kContextKeyPhaseOneFlag = "phaseOne";
|
||||
constexpr auto kContextKeyWaitType = "waitType";
|
||||
constexpr auto kContextKeyWaitId = "waitId";
|
||||
constexpr auto kContextKeyStartTick = "startTick";
|
||||
constexpr auto kContextKeyDrvSubmitTick = "drvSubmitTick";
|
||||
constexpr auto kContextKeyDrvSchedTick = "drvSchedTick";
|
||||
constexpr auto kContextKeyKernelType = "kernelType";
|
||||
|
||||
/**
|
||||
* set aicpu context for current thread.
|
||||
* @param [in]ctx aicpu context
|
||||
* @return status whether this operation success
|
||||
*/
|
||||
AICPU_VISIBILITY_API status_t aicpuSetContext(aicpuContext_t *ctx);
|
||||
|
||||
/**
|
||||
* get aicpu context from current thread.
|
||||
* @param [out]ctx aicpu context
|
||||
* @return status whether this operation success
|
||||
*/
|
||||
AICPU_VISIBILITY_API status_t aicpuGetContext(aicpuContext_t *ctx);
|
||||
|
||||
/**
|
||||
* init context for task monitor, called in compute process start.
|
||||
* @param [in]aicpu_core_cnt aicpu core number
|
||||
* @return status whether this operation success
|
||||
*/
|
||||
status_t InitTaskMonitorContext(uint32_t aicpu_core_cnt);
|
||||
|
||||
/**
|
||||
* set aicpu thread index for task monitor, called in thread callback function.
|
||||
* @param [in]thread_index aicpu thread index
|
||||
* @return status whether this operation success
|
||||
*/
|
||||
status_t SetAicpuThreadIndex(uint32_t thread_index);
|
||||
|
||||
/**
|
||||
* get aicpu thread index.
|
||||
* @return uint32
|
||||
*/
|
||||
uint32_t GetAicpuThreadIndex();
|
||||
|
||||
/**
|
||||
* set op name for task monitor.
|
||||
* called in libtf_kernels.so(tf op) or libaicpu_processer.so(others) or cpu kernel framework.
|
||||
* @param [in]opname op name
|
||||
* @return status whether this operation success
|
||||
*/
|
||||
status_t __attribute__((weak)) SetOpname(const std::string &opname);
|
||||
|
||||
/**
|
||||
* get op name for task monitor
|
||||
* @param [in]thread_index thread index
|
||||
* @param [out]opname op name
|
||||
* @return status whether this operation success
|
||||
*/
|
||||
status_t GetOpname(uint32_t thread_index, std::string *opname);
|
||||
|
||||
/**
|
||||
* get task and stream id.
|
||||
* @param [in]task_id task id.
|
||||
* @param [in]stream_id stream id.
|
||||
* @return status whether this operation success
|
||||
*/
|
||||
status_t __attribute__((weak)) GetTaskAndStreamId(uint64_t *task_id, uint32_t *stream_id);
|
||||
|
||||
/**
|
||||
* set task and stream id.
|
||||
* @param [in]task_id task id.
|
||||
* @param [in]stream_id stream id.
|
||||
* @return status whether this operation success
|
||||
*/
|
||||
status_t __attribute__((weak)) SetTaskAndStreamId(uint64_t task_id, uint32_t stream_id);
|
||||
|
||||
/**
|
||||
* set thread local context of key
|
||||
* @param [in]key context key
|
||||
* @param [in]value context value
|
||||
* @return status whether this operation success
|
||||
* @note Deprecated from 20201216, Replaced by SetThreadCtxInfo
|
||||
*/
|
||||
status_t __attribute__((weak)) SetThreadLocalCtx(const std::string &key, const std::string &value);
|
||||
|
||||
/**
|
||||
* get thread local context of key
|
||||
* @param [in]key context key
|
||||
* @param [out]value context value
|
||||
* @return status whether this operation success
|
||||
* @note Deprecated from 20201216, Replaced by GetThreadCtxInfo
|
||||
*/
|
||||
status_t GetThreadLocalCtx(const std::string &key, std::string *value);
|
||||
|
||||
/**
|
||||
* remove local context of key
|
||||
* @param [in]key context key
|
||||
* @return status whether this operation success
|
||||
* @note Deprecated from 20201216, Replaced by RemoveThreadCtxInfo
|
||||
*/
|
||||
status_t RemoveThreadLocalCtx(const std::string &key);
|
||||
|
||||
/**
|
||||
* get all thread context info of type
|
||||
* @param [in]type: ctx type
|
||||
* @param [in]thread_index: thread index
|
||||
* @return const std::map<std::string, std::string> &: all thread context info
|
||||
*/
|
||||
const std::map<std::string, std::string> &GetAllThreadCtxInfo(aicpu::CtxType type, uint32_t thread_index);
|
||||
|
||||
/**
|
||||
* set run mode.
|
||||
* @param [in]run_mode: run mode.
|
||||
* @return status whether this operation success
|
||||
*/
|
||||
status_t __attribute__((weak)) SetAicpuRunMode(uint32_t run_mode);
|
||||
|
||||
/**
|
||||
* get run mode.
|
||||
* @param [out]run_mode: run mode.
|
||||
* @return status whether this operation success
|
||||
*/
|
||||
status_t __attribute__((weak)) GetAicpuRunMode(uint32_t *run_mode);
|
||||
|
||||
/**
|
||||
* Register callback function by event_id and subevent_id
|
||||
* @param event_id event id
|
||||
* @param subevent_id subevent id
|
||||
* @param func call back function
|
||||
*/
|
||||
status_t __attribute__((weak))
|
||||
RegisterEventCallback(uint32_t event_id, uint32_t subevent_id, std::function<void(void *)> func);
|
||||
|
||||
/**
|
||||
* Do callback function by event_id and subevent_id
|
||||
* @param event_id event id
|
||||
* @param subevent_id subevent id
|
||||
* @param param event param
|
||||
*/
|
||||
status_t __attribute__((weak)) DoEventCallback(uint32_t event_id, uint32_t subevent_id, void *param);
|
||||
|
||||
/**
|
||||
* Unregister callback function by event_id and subevent_id
|
||||
* @param event_id event id
|
||||
* @param subevent_id subevent id
|
||||
*/
|
||||
status_t __attribute__((weak)) UnRegisterCallback(uint32_t event_id, uint32_t subevent_id);
|
||||
} // namespace aicpu
|
||||
|
||||
extern "C" {
|
||||
/**
|
||||
* set thread context info of type
|
||||
* @param [in]type: ctx type
|
||||
* @param [in]key: key of context info
|
||||
* @param [in]value: value of context info
|
||||
* @return status whether this operation success
|
||||
*/
|
||||
AICPU_VISIBILITY_API aicpu::status_t SetThreadCtxInfo(aicpu::CtxType type, const std::string &key,
|
||||
const std::string &value);
|
||||
|
||||
/**
|
||||
* get thread context info of type
|
||||
* @param [in]type: ctx type
|
||||
* @param [in]key: key of context info
|
||||
* @param [out]value: value of context info
|
||||
* @return status whether this operation success
|
||||
*/
|
||||
AICPU_VISIBILITY_API aicpu::status_t GetThreadCtxInfo(aicpu::CtxType type, const std::string &key, std::string *value);
|
||||
|
||||
/**
|
||||
* remove thread context info of type
|
||||
* @param [in]type: ctx type
|
||||
* @param [in]key: key of context info
|
||||
* @return status whether this operation success
|
||||
*/
|
||||
AICPU_VISIBILITY_API aicpu::status_t RemoveThreadCtxInfo(aicpu::CtxType type, const std::string &key);
|
||||
}
|
||||
|
||||
#endif // AICPU_OPS_AICPU_CONTEXT_H_
|
|
@ -0,0 +1,58 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "aicpu_sharder/aicpu_pulse.h"
|
||||
#include <unordered_map>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include "common/kernel_log.h"
|
||||
|
||||
namespace {
|
||||
static std::unordered_map<std::string, PulseNotifyFunc> pulse_notify_func_map;
|
||||
static std::mutex mtx;
|
||||
} // namespace
|
||||
|
||||
__attribute__((visibility("default"))) void AicpuPulseNotify() {
|
||||
std::unique_lock<std::mutex> lck(mtx);
|
||||
AICPU_LOGD("Aicpu pulse notify start, notify func num=%zu.", pulse_notify_func_map.size());
|
||||
for (auto ¬ify_func : pulse_notify_func_map) {
|
||||
AICPU_LOGD("Aicpu pulse notify %s start.", notify_func.first.c_str());
|
||||
notify_func.second();
|
||||
AICPU_LOGD("Aicpu pulse notify %s end.", notify_func.first.c_str());
|
||||
}
|
||||
AICPU_LOGD("Aicpu pulse notify end.");
|
||||
}
|
||||
|
||||
__attribute__((visibility("default"))) int32_t RegisterPulseNotifyFunc(const char *name, PulseNotifyFunc func) {
|
||||
if (name == nullptr) {
|
||||
AICPU_LOGE("Register pulse notify func failed as param name is null");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (func == nullptr) {
|
||||
AICPU_LOGE("Register pulse notify func for %s failed as param func is null", name);
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::unique_lock<std::mutex> lck(mtx);
|
||||
auto ret = pulse_notify_func_map.emplace(name, func);
|
||||
if (!ret.second) {
|
||||
AICPU_LOGE("Register pulse notify func for %s failed.", name);
|
||||
return -1;
|
||||
}
|
||||
AICPU_LOGI("Register pulse notify func for %s success.", name);
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,46 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef AICPU_OPS_AICPU_PULSE_H_
|
||||
#define AICPU_OPS_AICPU_PULSE_H_
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef void (*PulseNotifyFunc)(void);
|
||||
|
||||
/**
|
||||
* aicpu pulse notify.
|
||||
* timer will call this method per second.
|
||||
*/
|
||||
void AicpuPulseNotify(void);
|
||||
|
||||
/**
|
||||
* Register kernel pulse notify func.
|
||||
* @param name name of kernel lib, must end with '\0' and unique.
|
||||
* @param func pulse notify function.
|
||||
* @return 0:success, other:failed.
|
||||
*/
|
||||
int32_t RegisterPulseNotifyFunc(const char *name, PulseNotifyFunc func);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // AICPU_OPS_AICPU_PULSE_H_
|
|
@ -0,0 +1,218 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "aicpu_sharder/aicpu_sharder.h"
|
||||
|
||||
#include <semaphore.h>
|
||||
#include <unistd.h>
|
||||
#include <error.h>
|
||||
#include <atomic>
|
||||
#include <algorithm>
|
||||
#include <cerrno>
|
||||
#include <cstring>
|
||||
|
||||
#include "common/kernel_log.h"
|
||||
|
||||
namespace aicpu {
|
||||
#define AICPU_SHARDER_IF_TRUE_RUN(expr, run) \
|
||||
do { \
|
||||
if (expr) { \
|
||||
run; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
void SharderNonBlock::Register(const RunnerBool &schedule, const ClosureBool &do_task, uint32_t cpu_core_num) {
|
||||
schedule_ = schedule;
|
||||
do_task_ = do_task;
|
||||
cpu_core_num_ = cpu_core_num;
|
||||
}
|
||||
|
||||
bool SharderNonBlock::Enqueue(const Closure &closure, bool submit_topic) {
|
||||
if (schedule_ != nullptr) {
|
||||
return schedule_(closure, submit_topic);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void SharderNonBlock::Schedule(const Closure &closure) {
|
||||
if (!Enqueue(closure)) {
|
||||
closure();
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t SharderNonBlock::GetCPUNum() { return cpu_core_num_; }
|
||||
|
||||
SharderNonBlock &SharderNonBlock::GetInstance() {
|
||||
static SharderNonBlock sharder_non_block;
|
||||
return sharder_non_block;
|
||||
}
|
||||
|
||||
int64_t SharderNonBlock::CeilMultiple(int64_t x, int64_t base) {
|
||||
if (base == 0) {
|
||||
return 0;
|
||||
}
|
||||
int64_t ret = x / base;
|
||||
if ((x % base) != 0) {
|
||||
ret++;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void SharderNonBlock::ParallelFor(int64_t total, int64_t per_unit_size, const SharderWork &work) {
|
||||
AICPU_LOGI("total: %lld, per_unit_size: %lld", total, per_unit_size);
|
||||
if ((total <= 0) || (work == nullptr)) {
|
||||
AICPU_LOGE("invalid param: total<=0 or work is nullptr");
|
||||
return;
|
||||
}
|
||||
|
||||
// work itself
|
||||
if ((schedule_ == nullptr) || (cpu_core_num_ <= 1)) {
|
||||
AICPU_LOGI("work itself all");
|
||||
work(0, total);
|
||||
return;
|
||||
}
|
||||
|
||||
// In order to ensure a smaller scheduling delay, the maximum number of slices is twice the number of CPU cores
|
||||
const int64_t max_shard_num = static_cast<int64_t>(cpu_core_num_) * 2;
|
||||
|
||||
// calculate shard number and block size
|
||||
// i.e., if total is 118, perUintSize is 2, and cpu_core_num_ is 13
|
||||
// then shard_num is 24, block_size is 5
|
||||
int64_t block_size = std::max(int64_t{1}, std::min(total, per_unit_size));
|
||||
int64_t shard_num = CeilMultiple(total, block_size);
|
||||
shard_num = std::min(max_shard_num, shard_num);
|
||||
block_size = CeilMultiple(total, shard_num);
|
||||
shard_num = CeilMultiple(total, block_size);
|
||||
AICPU_LOGI("shard number: %lld, block size: %lld", shard_num, block_size);
|
||||
|
||||
// There is no need to submit an event if shard_num is 1
|
||||
if (shard_num == 1) {
|
||||
AICPU_LOGI("executes on the current thread");
|
||||
work(0, total);
|
||||
return;
|
||||
}
|
||||
|
||||
std::atomic<int64_t> count(shard_num); // a counter
|
||||
sem_t sem;
|
||||
int32_t sem_init_ret = sem_init(&sem, 0, 0);
|
||||
if (sem_init_ret == -1) {
|
||||
AICPU_LOGE("sem_init error with message: %s", strerror(errno));
|
||||
work(0, total);
|
||||
return;
|
||||
}
|
||||
|
||||
for (int64_t start = 0; start < total; start += block_size) {
|
||||
auto limit = std::min(start + block_size, total);
|
||||
Closure closure = [&sem, &work, &count, start, limit]() {
|
||||
count--;
|
||||
// In order to ensure that user's work function exception does not affect multithread services,
|
||||
// exception capture is needed. Exception type is not cared here, and error log is printed.
|
||||
try {
|
||||
work(start, limit);
|
||||
} catch (...) {
|
||||
AICPU_LOGE("exception occurred in work function with start: %lld, limit: %lld", start, limit);
|
||||
}
|
||||
|
||||
int32_t sem_post_ret = sem_post(&sem);
|
||||
AICPU_SHARDER_IF_TRUE_RUN(sem_post_ret == -1, AICPU_LOGE("sem_post error with message: %s", strerror(errno)));
|
||||
};
|
||||
|
||||
// if enqueue fail, work itself
|
||||
if (!Enqueue(closure, true)) {
|
||||
AICPU_LOGI("Enqueue fail, [%lld, %lld), work itself", start, limit);
|
||||
closure();
|
||||
}
|
||||
}
|
||||
|
||||
if (do_task_ != nullptr) {
|
||||
bool ret = true;
|
||||
while ((count > 0) && ret) {
|
||||
AICPU_LOGI("Main thread do task begin.");
|
||||
ret = do_task_();
|
||||
AICPU_LOGI("Main thread do task end.");
|
||||
}
|
||||
}
|
||||
|
||||
for (int64_t i = 0; i < shard_num; ++i) {
|
||||
int sem_wait_ret = sem_wait(&sem);
|
||||
AICPU_SHARDER_IF_TRUE_RUN(sem_wait_ret == -1, AICPU_LOGE("sem_wait error with message: %s", strerror(errno)));
|
||||
}
|
||||
int32_t sem_des_ret = sem_destroy(&sem);
|
||||
AICPU_SHARDER_IF_TRUE_RUN(sem_des_ret == -1, AICPU_LOGE("sem_destroy error with message: %s", strerror(errno)));
|
||||
}
|
||||
|
||||
void SharderNonBlock::ParallelForHash(int64_t total, int64_t cpu_nums, const SharderWork &work) {
|
||||
AICPU_LOGI("total: %lld, cpu_nums: %d", total, cpu_nums);
|
||||
if (total <= 0 || work == nullptr) {
|
||||
AICPU_LOGE("invalid param: total<=0 or work is nullptr");
|
||||
return;
|
||||
}
|
||||
|
||||
if ((schedule_ == nullptr) || (cpu_core_num_ <= 1)) {
|
||||
AICPU_LOGE("schedule is nullptr or cpu core num is not enough");
|
||||
return;
|
||||
}
|
||||
|
||||
std::atomic<int64_t> count(cpu_nums); // a counter
|
||||
|
||||
sem_t sem;
|
||||
int32_t sem_init_ret = sem_init(&sem, 0, 0);
|
||||
if (sem_init_ret == -1) {
|
||||
AICPU_LOGE("sem_init error with message: %s", strerror(errno));
|
||||
return;
|
||||
}
|
||||
|
||||
for (int64_t cur = 0; cur < cpu_nums; cur++) {
|
||||
Closure closure = [&sem, &work, &count, total, cur]() {
|
||||
work(total, cur);
|
||||
count--;
|
||||
int32_t sem_post_ret = sem_post(&sem);
|
||||
AICPU_SHARDER_IF_TRUE_RUN(sem_post_ret == -1, AICPU_LOGE("sem_post error with message: %s", strerror(errno)));
|
||||
};
|
||||
|
||||
// if enqueue fail, work itself
|
||||
if (!Enqueue(closure, true)) {
|
||||
closure();
|
||||
}
|
||||
}
|
||||
|
||||
if (do_task_ != nullptr) {
|
||||
bool ret = true;
|
||||
while ((count > 0) && ret) {
|
||||
ret = do_task_();
|
||||
}
|
||||
}
|
||||
|
||||
for (int64_t i = 0; i < cpu_nums; i++) {
|
||||
int sem_wait_ret = sem_wait(&sem);
|
||||
AICPU_SHARDER_IF_TRUE_RUN(sem_wait_ret == -1, AICPU_LOGE("sem_wait error with message: %s", strerror(errno)));
|
||||
}
|
||||
int32_t sem_des_ret = sem_destroy(&sem);
|
||||
AICPU_SHARDER_IF_TRUE_RUN(sem_des_ret == -1, AICPU_LOGE("sem_destroy error with message: %s", strerror(errno)));
|
||||
}
|
||||
} // namespace aicpu
|
||||
|
||||
/**
|
||||
* Shards the "total" unit of work refer "perUintSize"
|
||||
*/
|
||||
void ParallelFor(int64_t total, int64_t per_unit_size, const aicpu::SharderWork &work) {
|
||||
aicpu::SharderNonBlock::GetInstance().ParallelFor(total, per_unit_size, work);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get CPU number
|
||||
*/
|
||||
uint32_t GetCPUNum() { return aicpu::SharderNonBlock::GetInstance().GetCPUNum(); }
|
|
@ -0,0 +1,132 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef AICPU_OPS_AICPU_SHARDER_H_
|
||||
#define AICPU_OPS_AICPU_SHARDER_H_
|
||||
|
||||
#include <functional>
|
||||
#include <vector>
|
||||
#include "common/kernel_util.h"
|
||||
|
||||
namespace aicpu {
|
||||
using Closure = std::function<void()>;
|
||||
using ClosureBool = std::function<bool()>;
|
||||
using RunnerBool = std::function<bool(Closure, bool)>;
|
||||
using SharderWork = std::function<void(int64_t, int64_t)>;
|
||||
|
||||
class SharderNonBlock {
|
||||
public:
|
||||
/**
|
||||
* Get the unique object of this class
|
||||
*/
|
||||
static SharderNonBlock &GetInstance();
|
||||
|
||||
/**
|
||||
* Register schedule callback function, do_task function and cpu core number
|
||||
* called by compute process
|
||||
* @param schedule Schedule callback function
|
||||
* @param do_task Callback function for itself schedule
|
||||
* @param cpu_core_num aicpu core number
|
||||
*/
|
||||
void Register(const RunnerBool &schedule, const ClosureBool &do_task, uint32_t cpu_core_num);
|
||||
|
||||
/**
|
||||
* Shards the "total" unit of work refer "perUintSize"
|
||||
* @param total Total unit of work
|
||||
* @param per_unit_size Minimum shard unit
|
||||
* @param work should be a callable taking (int64, int64) arguments.
|
||||
work(start, limit) computes the work units from [start, limit),
|
||||
i.e., [start, limit) is a shard.
|
||||
*/
|
||||
void ParallelFor(int64_t total, int64_t per_unit_size, const SharderWork &work);
|
||||
|
||||
/**
|
||||
* Shards the unit of work refer for hash
|
||||
* @param total, Total unit of work
|
||||
* @param cpu_nums Number of cpu cores
|
||||
* @param work should be a callable taking (int64, int64) arguments.
|
||||
work(cur, cpu_nums) computes the work units with input hash with (cpu_nums-1) equals cur,
|
||||
i.e. specially used by parallel unique op
|
||||
*/
|
||||
void ParallelForHash(int64_t total, int64_t cpu_nums, const SharderWork &work);
|
||||
|
||||
/**
|
||||
* Schedule a task use schedule function registered by compute process,
|
||||
* note that the task will actually executed asynchronously
|
||||
* @param closure Closure function with nothrow
|
||||
*/
|
||||
void Schedule(const Closure &closure);
|
||||
|
||||
/**
|
||||
* Get CPU number
|
||||
* @param None
|
||||
* @return CPU number
|
||||
*/
|
||||
uint32_t GetCPUNum();
|
||||
|
||||
private:
|
||||
SharderNonBlock() : schedule_(nullptr), do_task_(nullptr), cpu_core_num_(0) {}
|
||||
~SharderNonBlock() = default;
|
||||
|
||||
SharderNonBlock(const SharderNonBlock &) = delete;
|
||||
SharderNonBlock &operator=(const SharderNonBlock &) = delete;
|
||||
SharderNonBlock(SharderNonBlock &&) = delete;
|
||||
SharderNonBlock &operator=(SharderNonBlock &&) = delete;
|
||||
|
||||
/**
|
||||
* Closure function enqueue
|
||||
* @param closure Closure function can be called
|
||||
* @param submit_topic whether submit topic, true means submit topic
|
||||
* @return whether enqueue of closure success
|
||||
*/
|
||||
bool Enqueue(const Closure &closure, bool submit_topic = false);
|
||||
|
||||
/**
|
||||
* Calculate how many times, which ceiled, "x" is "base".
|
||||
* i.e., x is 1, base is 2, this function will return 1
|
||||
* @param x An integral
|
||||
* @param base An integral as base when cal multiple
|
||||
* @return ceiled multiple
|
||||
*/
|
||||
inline int64_t CeilMultiple(int64_t x, int64_t base);
|
||||
|
||||
private:
|
||||
RunnerBool schedule_; // enqueue runner
|
||||
ClosureBool do_task_; // a callback, do task from task queue
|
||||
uint32_t cpu_core_num_; // aicpu core number
|
||||
}; // SharderNonBlock
|
||||
} // namespace aicpu
|
||||
|
||||
extern "C" {
|
||||
/**
|
||||
* Shards the "total" unit of work refer "perUintSize"
|
||||
* @param total Total unit of work
|
||||
* @param per_unit_size Minimum shard unit
|
||||
* @param work should be a callable taking (int64, int64) arguments.
|
||||
work(start, limit) computes the work units from [start, limit),
|
||||
i.e., [start, limit) is a shard.
|
||||
*/
|
||||
AICPU_VISIBILITY_API void ParallelFor(int64_t total, int64_t per_unit_size, const aicpu::SharderWork &work);
|
||||
|
||||
/**
|
||||
* Get CPU number
|
||||
* @param None
|
||||
* @return CPU number
|
||||
*/
|
||||
AICPU_VISIBILITY_API uint32_t GetCPUNum();
|
||||
}
|
||||
|
||||
#endif // AICPU_OPS_AICPU_SHARDER_H_
|
|
@ -0,0 +1,62 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef AICPU_OPS_AICPU_DISTINCT_UNIFORM_INT_DISTRIBUTION_H_
|
||||
#define AICPU_OPS_AICPU_DISTINCT_UNIFORM_INT_DISTRIBUTION_H_
|
||||
|
||||
#include <random>
|
||||
#include <unordered_set>
|
||||
|
||||
namespace aicpu {
|
||||
template <typename IntType = int>
|
||||
class distinct_uniform_int_distribution {
|
||||
public:
|
||||
using result_type = IntType;
|
||||
|
||||
private:
|
||||
using set_type = std::unordered_set<result_type>;
|
||||
using distr_type = std::uniform_int_distribution<result_type>;
|
||||
|
||||
public:
|
||||
distinct_uniform_int_distribution(result_type inf, result_type sup)
|
||||
: inf_(inf), sup_(sup), range_(sup_ - inf_ + 1), distr_(inf_, sup_) {}
|
||||
~distinct_uniform_int_distribution() = default;
|
||||
void reset() {
|
||||
uset_.clear();
|
||||
distr_.reset();
|
||||
}
|
||||
|
||||
template <typename Generator>
|
||||
result_type exec(Generator *engine) {
|
||||
if (!(uset_.size() < range_)) {
|
||||
std::terminate();
|
||||
}
|
||||
result_type res;
|
||||
do {
|
||||
res = distr_(*engine);
|
||||
} while (uset_.count(res) > 0);
|
||||
uset_.insert(res);
|
||||
return res;
|
||||
}
|
||||
|
||||
private:
|
||||
const result_type inf_;
|
||||
const result_type sup_;
|
||||
const size_t range_;
|
||||
distr_type distr_;
|
||||
set_type uset_;
|
||||
};
|
||||
} // namespace aicpu
|
||||
#endif // AICPU_OPS_AICPU_DISTINCT_UNIFORM_INT_DISTRIBUTION_H_
|
|
@ -0,0 +1,255 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include <map>
|
||||
#include "common/kernel_base.h"
|
||||
#include "common/kernel_errcode.h"
|
||||
#include "common/tensor.h"
|
||||
|
||||
namespace aicpu {
|
||||
namespace {
|
||||
// max param len limit 10k.
|
||||
constexpr uint32_t MAX_PARAM_LEN = 10240;
|
||||
// max io address num limit 1024
|
||||
constexpr uint32_t MAX_IO_ADDR_NUMPARAM_LEN = 1024;
|
||||
} // namespace
|
||||
static const std::map<const ::aicpuops::DataType, size_t> kKernelBaseDataTypeSize = {
|
||||
{aicpuops::MS_BOOL, sizeof(bool)}, {aicpuops::MS_INT8, sizeof(int8_t)},
|
||||
{aicpuops::MS_UINT8, sizeof(uint8_t)}, {aicpuops::MS_INT16, sizeof(int16_t)},
|
||||
{aicpuops::MS_UINT16, sizeof(uint16_t)}, {aicpuops::MS_INT32, sizeof(int32_t)},
|
||||
{aicpuops::MS_UINT32, sizeof(uint32_t)}, {aicpuops::MS_INT64, sizeof(int64_t)},
|
||||
{aicpuops::MS_UINT64, sizeof(uint64_t)}, {aicpuops::MS_FLOAT16, sizeof(float) / 2},
|
||||
{aicpuops::MS_FLOAT32, sizeof(float)}, {aicpuops::MS_FLOAT64, sizeof(double)}};
|
||||
|
||||
KernelBase::KernelBase(const std::string &kernel_name)
|
||||
: kernel_name_(kernel_name),
|
||||
extend_param_len_(0),
|
||||
extend_param_base_(nullptr),
|
||||
param_head_(nullptr),
|
||||
unknow_shape_(false) {}
|
||||
|
||||
uint32_t KernelBase::ParseParam(void *param) {
|
||||
if (param == nullptr) {
|
||||
AICPU_LOGE("Kernel:%s ParseParam param is null.", kernel_name_.c_str());
|
||||
return AICPU_KERNEL_STATE_PARAM_INVALID;
|
||||
}
|
||||
|
||||
// parse param_len
|
||||
param_head_ = static_cast<AicpuParamHead *>(param);
|
||||
if (param_head_->length < sizeof(AicpuParamHead) || param_head_->length > MAX_PARAM_LEN) {
|
||||
AICPU_LOGE("Kernel:%s param length=%u not in [%zu, %u].", kernel_name_.c_str(), param_head_->length,
|
||||
sizeof(AicpuParamHead), MAX_PARAM_LEN);
|
||||
return AICPU_KERNEL_STATE_PARAM_INVALID;
|
||||
}
|
||||
|
||||
auto param_base = static_cast<uint8_t *>(param);
|
||||
extend_param_base_ = param_base + sizeof(AicpuParamHead);
|
||||
extend_param_len_ = param_head_->length - sizeof(AicpuParamHead);
|
||||
|
||||
if (param_head_->ioAddrNum > 0) {
|
||||
if (param_head_->ioAddrNum > MAX_IO_ADDR_NUMPARAM_LEN) {
|
||||
AICPU_LOGE("Kernel:%s param ioAddrNum=%u is over %u.", kernel_name_.c_str(), param_head_->ioAddrNum,
|
||||
MAX_IO_ADDR_NUMPARAM_LEN);
|
||||
return AICPU_KERNEL_STATE_PARAM_INVALID;
|
||||
}
|
||||
uint32_t addr_len = param_head_->ioAddrNum * sizeof(uint64_t);
|
||||
if (extend_param_len_ < addr_len) {
|
||||
AICPU_LOGE("Kernel:%s extend param is not enough for io addr, ioAddrNum=%u, extendParamLen=%u.",
|
||||
kernel_name_.c_str(), param_head_->ioAddrNum, extend_param_len_);
|
||||
return AICPU_KERNEL_STATE_PARAM_INVALID;
|
||||
}
|
||||
auto io_addr_base = reinterpret_cast<uint64_t *>(extend_param_base_);
|
||||
for (uint32_t i = 0; i < param_head_->ioAddrNum; ++i) {
|
||||
io_addrs_.push_back(static_cast<uintptr_t>(io_addr_base[i]));
|
||||
}
|
||||
extend_param_base_ = extend_param_base_ + addr_len;
|
||||
extend_param_len_ -= addr_len;
|
||||
}
|
||||
AICPU_CHK_STATUS_RET(ParseNodeDef())
|
||||
AICPU_CHK_STATUS_RET(ParseExtInfo())
|
||||
if (unknow_shape_) {
|
||||
AICPU_LOGI("Unknown shape op: %s", kernel_name_.c_str());
|
||||
AICPU_CHK_STATUS_RET(UpdateInputShape())
|
||||
AICPU_CHK_STATUS_RET(UpdateOutputShape())
|
||||
}
|
||||
return ParseKernelParam();
|
||||
}
|
||||
|
||||
uint32_t KernelBase::Compute(void *param) {
|
||||
uint32_t ret = ParseParam(param);
|
||||
if (ret != AICPU_KERNEL_STATE_SUCCESS) {
|
||||
AICPU_LOGE("Kernel:%s ParseParam failed, ret=%u.", kernel_name_.c_str(), ret);
|
||||
return ret;
|
||||
}
|
||||
return DoCompute();
|
||||
}
|
||||
|
||||
size_t KernelBase::GetDataTypeSize(::aicpuops::DataType data_type) {
|
||||
auto it = kKernelBaseDataTypeSize.find(data_type);
|
||||
if (it == kKernelBaseDataTypeSize.end()) {
|
||||
AICPU_LOGE("don't support input tensor types");
|
||||
return 0;
|
||||
}
|
||||
return it->second;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
uint32_t KernelBase::ParseExtendParam(T *param_var, std::string param_name) {
|
||||
if (extend_param_len_ < sizeof(T)) {
|
||||
AICPU_LOGE("Kernel:%s extend param is not enough for [%s] addr, need_len=%u, extendParamLen=%u.",
|
||||
kernel_name_.c_str(), param_name.c_str(), sizeof(T), extend_param_len_);
|
||||
return AICPU_KERNEL_STATE_PARAM_INVALID;
|
||||
}
|
||||
T *param = reinterpret_cast<T *>(extend_param_base_);
|
||||
if (param != nullptr) {
|
||||
*param_var = *param;
|
||||
extend_param_base_ += sizeof(T);
|
||||
extend_param_len_ -= sizeof(T);
|
||||
return AICPU_KERNEL_STATE_SUCCESS;
|
||||
}
|
||||
AICPU_LOGE("Kernel:%s extend param for [%s] addr is invalid.", kernel_name_.c_str(), param_name.c_str());
|
||||
return AICPU_KERNEL_STATE_PARAM_INVALID;
|
||||
}
|
||||
|
||||
uint32_t KernelBase::ParseNodeDef() {
|
||||
uint32_t node_def_len;
|
||||
AICPU_CHK_STATUS_RET(ParseExtendParam(&node_def_len, "node_def_len"))
|
||||
|
||||
if (extend_param_len_ < node_def_len) {
|
||||
AICPU_LOGE("Kernel:%s extend param is not enough for customizeAttr addr, node_def_len=%u, extendParamLen=%u.",
|
||||
kernel_name_.c_str(), node_def_len, extend_param_len_);
|
||||
return AICPU_KERNEL_STATE_PARAM_INVALID;
|
||||
}
|
||||
std::string std_data(reinterpret_cast<char *>(extend_param_base_), node_def_len);
|
||||
if (!node_def_.ParseFromString(std_data)) {
|
||||
AICPU_LOGE("parse %s KernelBase proto failed, nodeDef=%s.", kernel_name_.c_str(), std_data.c_str());
|
||||
return AICPU_KERNEL_STATE_PARAM_INVALID;
|
||||
}
|
||||
extend_param_base_ += node_def_len;
|
||||
extend_param_len_ -= node_def_len;
|
||||
return AICPU_KERNEL_STATE_SUCCESS;
|
||||
}
|
||||
|
||||
uint32_t KernelBase::ParseExtShapeType(FWKAdapter::ExtInfo *ext_info) {
|
||||
if (ext_info->infoLen != sizeof(int32_t)) {
|
||||
AICPU_LOGE("Kernel:%s parse ext shape type failed as infoLen must be %zu but %u.", kernel_name_.c_str(),
|
||||
sizeof(int32_t), ext_info->infoLen);
|
||||
return AICPU_KERNEL_STATE_PARAM_INVALID;
|
||||
}
|
||||
unknow_shape_ = true;
|
||||
return AICPU_KERNEL_STATE_SUCCESS;
|
||||
}
|
||||
|
||||
uint32_t KernelBase::ParseExtInputShape(FWKAdapter::ExtInfo *ext_info) {
|
||||
// no overflow
|
||||
auto need_len = node_def_.inputs_size() * sizeof(FWKAdapter::ShapeAndType);
|
||||
if (ext_info->infoLen != need_len) {
|
||||
AICPU_LOGE(
|
||||
"Kernel:%s parse ext input shape failed as infoLen must be "
|
||||
"input_num[%d]*sizeof(ShapeAndType)[%zu], but %u.",
|
||||
kernel_name_.c_str(), node_def_.inputs_size(), sizeof(FWKAdapter::ShapeAndType), ext_info->infoLen);
|
||||
return AICPU_KERNEL_STATE_PARAM_INVALID;
|
||||
}
|
||||
input_shape_and_type_.clear();
|
||||
auto input = reinterpret_cast<FWKAdapter::ShapeAndType *>(ext_info->infoMsg);
|
||||
for (int index = 0; index < node_def_.inputs_size(); ++index) {
|
||||
input_shape_and_type_.emplace_back(&input[index]);
|
||||
}
|
||||
return AICPU_KERNEL_STATE_SUCCESS;
|
||||
}
|
||||
|
||||
uint32_t KernelBase::ParseExtOutputShape(FWKAdapter::ExtInfo *ext_info) {
|
||||
// no overflow
|
||||
auto need_len = node_def_.outputs_size() * sizeof(FWKAdapter::ShapeAndType);
|
||||
if (ext_info->infoLen != need_len) {
|
||||
AICPU_LOGE(
|
||||
"Kernel:%s parse ext output shape failed as infoLen must be "
|
||||
"output_num[%d]*sizeof(ShapeAndType)[%zu], but %u.",
|
||||
kernel_name_.c_str(), node_def_.outputs_size(), sizeof(FWKAdapter::ShapeAndType), ext_info->infoLen);
|
||||
return AICPU_KERNEL_STATE_PARAM_INVALID;
|
||||
}
|
||||
output_shape_and_type_.clear();
|
||||
auto output = reinterpret_cast<FWKAdapter::ShapeAndType *>(ext_info->infoMsg);
|
||||
for (int index = 0; index < node_def_.outputs_size(); ++index) {
|
||||
output_shape_and_type_.emplace_back(&output[index]);
|
||||
}
|
||||
return AICPU_KERNEL_STATE_SUCCESS;
|
||||
}
|
||||
|
||||
uint32_t KernelBase::ParseExtInfo() {
|
||||
uint32_t offset = 0;
|
||||
FWKAdapter::ExtInfo *ext_info_ptr = nullptr;
|
||||
char *ext_info_buf = reinterpret_cast<char *>(static_cast<uintptr_t>(param_head_->extInfoAddr));
|
||||
while (offset + sizeof(FWKAdapter::ExtInfo) <= param_head_->extInfoLength) {
|
||||
ext_info_ptr = reinterpret_cast<FWKAdapter::ExtInfo *>(ext_info_buf + offset);
|
||||
if (ext_info_ptr == nullptr) {
|
||||
AICPU_LOGE("Kernel:%s ext_info is nullptr, extInfoLength=%u, extInfoAddr=%p, offset=%zu.", kernel_name_.c_str(),
|
||||
param_head_->extInfoLength, param_head_->extInfoAddr, offset);
|
||||
return AICPU_KERNEL_STATE_PARAM_INVALID;
|
||||
}
|
||||
switch (ext_info_ptr->infoType) {
|
||||
case FWKAdapter::FWK_ADPT_EXT_SHAPE_TYPE:
|
||||
AICPU_CHK_STATUS_RET(ParseExtShapeType(ext_info_ptr))
|
||||
break;
|
||||
case FWKAdapter::FWK_ADPT_EXT_INPUT_SHAPE:
|
||||
AICPU_CHK_STATUS_RET(ParseExtInputShape(ext_info_ptr))
|
||||
break;
|
||||
case FWKAdapter::FWK_ADPT_EXT_OUTPUT_SHAPE:
|
||||
AICPU_CHK_STATUS_RET(ParseExtOutputShape(ext_info_ptr))
|
||||
break;
|
||||
default:
|
||||
AICPU_LOGI("Kernel:%s ignore infoType=%d, infoLen=%u.", kernel_name_.c_str(), ext_info_ptr->infoType,
|
||||
ext_info_ptr->infoLen);
|
||||
break;
|
||||
}
|
||||
// not overflow
|
||||
offset += FWKAdapter::kExtInfoHeadSize;
|
||||
offset += ext_info_ptr->infoLen;
|
||||
}
|
||||
return AICPU_KERNEL_STATE_SUCCESS;
|
||||
}
|
||||
|
||||
uint32_t KernelBase::UpdateInputShape() {
|
||||
for (int i = 0; i < node_def_.inputs_size(); ++i) {
|
||||
aicpuops::Tensor *input_tensor = node_def_.mutable_inputs(i);
|
||||
aicpuops::TensorShape *input_tensor_shape = input_tensor->mutable_tensor_shape();
|
||||
input_tensor_shape->clear_dim();
|
||||
for (uint32_t index = 0; index < FWKAdapter::kMaxShapeDims; ++index) {
|
||||
// LLONG_MIN for dim end flag
|
||||
if (input_shape_and_type_[i]->dims[index] == LLONG_MIN) {
|
||||
break;
|
||||
}
|
||||
input_tensor_shape->add_dim()->set_size(input_shape_and_type_[i]->dims[index]);
|
||||
}
|
||||
}
|
||||
return AICPU_KERNEL_STATE_SUCCESS;
|
||||
}
|
||||
|
||||
uint32_t KernelBase::UpdateOutputShape() {
|
||||
for (int i = 0; i < node_def_.outputs_size(); ++i) {
|
||||
aicpuops::Tensor *output_tensor = node_def_.mutable_outputs(i);
|
||||
aicpuops::TensorShape *output_tensor_shape = output_tensor->mutable_tensor_shape();
|
||||
output_tensor_shape->clear_dim();
|
||||
for (uint32_t index = 0; index < FWKAdapter::kMaxShapeDims; ++index) {
|
||||
// LLONG_MIN for dim end flag
|
||||
if (output_shape_and_type_[i]->dims[index] == LLONG_MIN) {
|
||||
break;
|
||||
}
|
||||
output_tensor_shape->add_dim()->set_size(output_shape_and_type_[i]->dims[index]);
|
||||
}
|
||||
}
|
||||
return AICPU_KERNEL_STATE_SUCCESS;
|
||||
}
|
||||
} // namespace aicpu
|
|
@ -0,0 +1,82 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef AICPU_OPS_AICPU_COMMON_KERNEL_BASE_H_
|
||||
#define AICPU_OPS_AICPU_COMMON_KERNEL_BASE_H_
|
||||
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#include "common/kernel_util.h"
|
||||
#include "aicpu/common/aicpu_task_struct.h"
|
||||
#include "securec/include/securec.h"
|
||||
#include "common/tensor.h"
|
||||
#include "cce/fwk_adpt_struct.h"
|
||||
#include "common/kernel_log.h"
|
||||
#include "proto/aicpu_tensor.pb.h"
|
||||
|
||||
namespace aicpu {
|
||||
class KernelBase {
|
||||
public:
|
||||
explicit KernelBase(const std::string &kernel_name);
|
||||
|
||||
~KernelBase() = default;
|
||||
|
||||
uint32_t Compute(void *param);
|
||||
size_t GetDataTypeSize(::aicpuops::DataType data_type);
|
||||
|
||||
protected:
|
||||
virtual uint32_t ParseKernelParam() = 0;
|
||||
virtual uint32_t DoCompute() = 0;
|
||||
|
||||
template <typename T>
|
||||
uint32_t ParseExtendParam(T *param_var, std::string param_name);
|
||||
|
||||
uint32_t ParseNodeDef();
|
||||
|
||||
uint32_t ParseExtInfo();
|
||||
|
||||
uint32_t ParseExtShapeType(FWKAdapter::ExtInfo *ext_info);
|
||||
|
||||
uint32_t ParseExtInputShape(FWKAdapter::ExtInfo *ext_info);
|
||||
|
||||
uint32_t ParseExtOutputShape(FWKAdapter::ExtInfo *ext_info);
|
||||
|
||||
uint32_t UpdateInputShape();
|
||||
|
||||
uint32_t UpdateOutputShape();
|
||||
|
||||
private:
|
||||
KernelBase(const KernelBase &) = delete;
|
||||
KernelBase &operator=(const KernelBase &) = delete;
|
||||
KernelBase(KernelBase &&) = delete;
|
||||
KernelBase &operator=(KernelBase &&) = delete;
|
||||
|
||||
uint32_t ParseParam(void *param);
|
||||
|
||||
protected:
|
||||
std::string kernel_name_;
|
||||
std::vector<uintptr_t> io_addrs_;
|
||||
uint32_t extend_param_len_;
|
||||
uint8_t *extend_param_base_;
|
||||
AicpuParamHead *param_head_;
|
||||
bool unknow_shape_;
|
||||
aicpuops::NodeDef node_def_;
|
||||
std::vector<FWKAdapter::ShapeAndType *> input_shape_and_type_;
|
||||
std::vector<FWKAdapter::ShapeAndType *> output_shape_and_type_;
|
||||
};
|
||||
} // namespace aicpu
|
||||
#endif // AICPU_OPS_AICPU_COMMON_KERNEL_BASE_H_
|
|
@ -0,0 +1,30 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef AICPU_OPS_AICPU_COMMON_KENERL_ERRCODE_H_
|
||||
#define AICPU_OPS_AICPU_COMMON_KENERL_ERRCODE_H_
|
||||
|
||||
namespace aicpu {
|
||||
enum AicpuKernelErrCode {
|
||||
// 0-3 is fixed error code, runtime need interpret 0-3 error codes
|
||||
AICPU_KERNEL_STATE_SUCCESS = 0,
|
||||
AICPU_KERNEL_STATE_PARAM_INVALID = 1,
|
||||
AICPU_KERNEL_STATE_FAILED = 2,
|
||||
AICPU_KERNEL_STATE_EXECUTE_TIMEOUT = 3,
|
||||
AICPU_KERNEL_STATE_INTERNAL_ERROR = 4,
|
||||
AICPU_KERNEL_STATE_END_OF_SEQUENCE = 201,
|
||||
};
|
||||
} // namespace aicpu
|
||||
#endif // AICPU_OPS_AICPU_COMMON_KENERL_ERRCODE_H_
|
|
@ -0,0 +1,29 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "common/kernel_log.h"
|
||||
|
||||
namespace aicpu {
|
||||
static int log_level = AICPU_LOG_ERROR;
|
||||
|
||||
int LogSetLevel(int level) {
|
||||
log_level = level;
|
||||
return log_level;
|
||||
}
|
||||
|
||||
int LogGetLevel(void) { return log_level; }
|
||||
|
||||
bool CheckLogLevel(int log_level_check) { return log_level >= log_level_check; }
|
||||
} // namespace aicpu
|
|
@ -0,0 +1,77 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef AICPU_OPS_AICPU_COMMON_KERNEL_LOG_H_
|
||||
#define AICPU_OPS_AICPU_COMMON_KERNEL_LOG_H_
|
||||
|
||||
#include <unistd.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <iostream>
|
||||
#include <utility>
|
||||
#include "common/kernel_errcode.h"
|
||||
|
||||
inline int GetTid(void) {
|
||||
thread_local static int tid = syscall(__NR_gettid);
|
||||
return tid;
|
||||
}
|
||||
static const int LOG_COUNT = 0;
|
||||
|
||||
namespace aicpu {
|
||||
#define AICPU_LOG_DEBUG 0
|
||||
#define AICPU_LOG_INFO 1
|
||||
#define AICPU_LOG_WARN 2
|
||||
#define AICPU_LOG_ERROR 3
|
||||
#define AICPU_LOG_EVENT 0x10
|
||||
|
||||
inline void PrintLog(const int level) { std::cerr << level << std::endl; }
|
||||
|
||||
template <typename T, typename... Args>
|
||||
inline void PrintLog(const int level, T &&head, Args &&... tail) {
|
||||
std::cerr << std::forward<T>(head) << " ";
|
||||
PrintLog(level, std::forward<Args>(tail)...);
|
||||
}
|
||||
|
||||
int LogSetLevel(int level);
|
||||
|
||||
int LogGetLevel(void);
|
||||
|
||||
bool CheckLogLevel(int log_level_check);
|
||||
|
||||
#define AICPU_LOGD(fmt, ...) \
|
||||
AICPU_LOG(AICPU_LOG_DEBUG, "%s:%s:%d[tid:%lu]:" #fmt, __FUNCTION__, __FILE__, __LINE__, GetTid(), ##__VA_ARGS__);
|
||||
#define AICPU_LOGI(fmt, ...) \
|
||||
AICPU_LOG(AICPU_LOG_INFO, "%s:%s:%d[tid:%lu]:" #fmt, __FUNCTION__, __FILE__, __LINE__, GetTid(), ##__VA_ARGS__);
|
||||
#define AICPU_LOGW(fmt, ...) \
|
||||
AICPU_LOG(AICPU_LOG_WARN, "%s:%s:%d[tid:%lu]:" #fmt, __FUNCTION__, __FILE__, __LINE__, GetTid(), ##__VA_ARGS__);
|
||||
#define AICPU_LOGE(fmt, ...) \
|
||||
AICPU_LOG(AICPU_LOG_ERROR, "%s:%s:%d[tid:%lu]:" #fmt, __FUNCTION__, __FILE__, __LINE__, GetTid(), ##__VA_ARGS__);
|
||||
#define AICPU_LOGEVENT(fmt, ...) \
|
||||
AICPU_LOG(AICPU_LOG_EVENT, "%s:%s:%d[tid:%lu]:" #fmt, __FUNCTION__, __FILE__, __LINE__, GetTid(), ##__VA_ARGS__);
|
||||
#define AICPU_LOG(level, fmt, ...) \
|
||||
do { \
|
||||
if (aicpu::CheckLogLevel(level)) { \
|
||||
aicpu::PrintLog(level, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \
|
||||
} \
|
||||
} while (LOG_COUNT != 0)
|
||||
|
||||
#define AICPU_CHK_STATUS_RET(expr...) \
|
||||
do { \
|
||||
const uint32_t status = (expr); \
|
||||
if (status != AICPU_KERNEL_STATE_SUCCESS) { \
|
||||
return status; \
|
||||
} \
|
||||
} while (0);
|
||||
} // namespace aicpu
|
||||
#endif // AICPU_OPS_AICPU_COMMON_KERNEL_LOG_H_
|
|
@ -0,0 +1,22 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef AICPU_OPS_AICPU_COMMON_KERNEL_UTIL_H_
|
||||
#define AICPU_OPS_AICPU_COMMON_KERNEL_UTIL_H_
|
||||
|
||||
#ifndef AICPU_VISIBILITY_API
|
||||
#define AICPU_VISIBILITY_API __attribute__((visibility("default")))
|
||||
#endif
|
||||
#endif // AICPU_OPS_AICPU_COMMON_KERNEL_UTIL_H_
|
|
@ -0,0 +1,41 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef AICPU_OPS_COMMON_TENSOR_H_
|
||||
#define AICPU_OPS_COMMON_TENSOR_H_
|
||||
|
||||
#include <atomic>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
|
||||
namespace aicpu {
|
||||
namespace ms {
|
||||
class Tensor {
|
||||
public:
|
||||
Tensor() = default;
|
||||
~Tensor() = default;
|
||||
const uint8_t *GetData() const;
|
||||
size_t GetSize() const;
|
||||
void SetData(uint8_t *data, size_t size);
|
||||
|
||||
private:
|
||||
uint8_t *tensor_ptr_;
|
||||
size_t tensor_len_;
|
||||
};
|
||||
} // namespace ms
|
||||
} // namespace aicpu
|
||||
#endif // AICPU_OPS_COMMON_TENSOR_H_
|
|
@ -0,0 +1,280 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "./random_choice_with_mask_kernels.h"
|
||||
#include <random>
|
||||
#include <climits>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include "aicpu_sharder/aicpu_sharder.h"
|
||||
#include "proto/aicpu_tensor.pb.h"
|
||||
#include "common/distinct_uniform_int_distribution.h"
|
||||
#include "common/tensor.h"
|
||||
|
||||
namespace aicpu {
|
||||
static void ParseOutputCoordinate(std::vector<int64_t> dims_, int32_t output_length, int32_t input_dim_size,
|
||||
int32_t input_total_count, const int *tmp_output, int *output) {
|
||||
int it = 0;
|
||||
int column = input_total_count / dims_[0];
|
||||
for (int i = 0; i < output_length; i++) {
|
||||
int32_t tmp_output_number = tmp_output[i];
|
||||
int tmp_column = column;
|
||||
for (int j = 0; j < input_dim_size; j++) {
|
||||
if (j == input_dim_size - 1) {
|
||||
output[it++] = tmp_output_number;
|
||||
continue;
|
||||
}
|
||||
output[it++] = tmp_output_number / column;
|
||||
tmp_output_number = tmp_output_number % column;
|
||||
tmp_column = tmp_column / dims_[j + 1];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void GetOutputLength(bool *padding_flag, int32_t *output_length, int32_t *output_non_zero_length, int32_t count,
|
||||
int32_t non_zero_num) {
|
||||
if (count == 0) {
|
||||
*padding_flag = false;
|
||||
*output_length = non_zero_num;
|
||||
*output_non_zero_length = non_zero_num;
|
||||
} else if (count > 0 && count <= non_zero_num) {
|
||||
*padding_flag = false;
|
||||
*output_length = count;
|
||||
*output_non_zero_length = count;
|
||||
} else if (count > non_zero_num) {
|
||||
*padding_flag = true;
|
||||
*output_length = count;
|
||||
*output_non_zero_length = non_zero_num;
|
||||
} else {
|
||||
AICPU_LOGI("input count must greater or equal to 0 but instead is %d", count);
|
||||
}
|
||||
}
|
||||
|
||||
static bool GetInputTotalCount(const std::vector<int64_t> &dims_, int32_t *input_total_count,
|
||||
const int32_t &input_dim_size) {
|
||||
const int32_t max_inpu_dim = 5;
|
||||
if (input_dim_size < 1 || input_dim_size > max_inpu_dim) {
|
||||
AICPU_LOGE(
|
||||
"input dim size is %d, it must greater or equal to 1 channels "
|
||||
"and less than or equal to 5 channels!",
|
||||
input_dim_size);
|
||||
return false;
|
||||
}
|
||||
for (int32_t i = 0; i < input_dim_size; i++) {
|
||||
*input_total_count *= dims_[i];
|
||||
}
|
||||
if (*input_total_count <= 0) {
|
||||
AICPU_LOGE("input_total_count is %d, please check setting.", *input_total_count);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static void UpdateOutput(const std::vector<int64_t> &dims_, const int32_t &non_zero_num, const int32_t &count_,
|
||||
const int32_t &output_length, const int *mask_dim, int32_t *output_coordinate, bool *mask) {
|
||||
for (int32_t i = non_zero_num * dims_.size(); i < static_cast<int32_t>(count_ * dims_.size()); i++) {
|
||||
output_coordinate[i] = 0;
|
||||
}
|
||||
for (int32_t i = 0; i < output_length; i++) {
|
||||
mask[i] = static_cast<bool>(mask_dim[i]);
|
||||
}
|
||||
for (int32_t i = non_zero_num; i < count_; i++) {
|
||||
mask[i] = false;
|
||||
}
|
||||
}
|
||||
|
||||
static bool GenerateRandomMask(const int32_t &output_length, const int32_t &non_zero_num,
|
||||
const int32_t &output_non_zero_length, int **input_dim, int **tmp_output,
|
||||
int **mask_dim) {
|
||||
*tmp_output = reinterpret_cast<int *>(malloc(output_length * sizeof(int)));
|
||||
if (*tmp_output == nullptr) {
|
||||
AICPU_LOGE("malloc memory failed!");
|
||||
free(*input_dim);
|
||||
return false;
|
||||
}
|
||||
std::random_device rd;
|
||||
std::mt19937 gen(rd());
|
||||
aicpu::distinct_uniform_int_distribution<> dis(0, non_zero_num - 1);
|
||||
*mask_dim = reinterpret_cast<int *>(malloc(output_length * sizeof(int)));
|
||||
if (*mask_dim == nullptr) {
|
||||
AICPU_LOGE("malloc memory failed!");
|
||||
free(*input_dim);
|
||||
free(*tmp_output);
|
||||
return false;
|
||||
}
|
||||
if (memset_s(*mask_dim, output_length, 0x00, output_length) != EOK) {
|
||||
AICPU_LOGE("memset_s to mask_dim failed!");
|
||||
free(*input_dim);
|
||||
free(*tmp_output);
|
||||
free(*mask_dim);
|
||||
return false;
|
||||
}
|
||||
if (memset_s(*tmp_output, output_length, 0x00, output_length) != EOK) {
|
||||
AICPU_LOGE("memset_s to tmp_output failed!");
|
||||
free(*input_dim);
|
||||
free(*tmp_output);
|
||||
free(*mask_dim);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (output_non_zero_length > output_length) {
|
||||
AICPU_LOGE("output_non_zero_length size is too long!");
|
||||
free(*input_dim);
|
||||
free(*tmp_output);
|
||||
free(*mask_dim);
|
||||
return false;
|
||||
}
|
||||
for (int32_t i = 0; i < output_non_zero_length; i++) {
|
||||
int32_t mean = dis.exec(&gen);
|
||||
*((*tmp_output) + i) = *((*input_dim) + mean);
|
||||
*((*mask_dim) + i) = 1;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
uint32_t RandomChoiceWithMaskKernel::DoCompute() {
|
||||
auto *input = reinterpret_cast<bool *>(io_addrs_[0]);
|
||||
auto *output_coordinate = reinterpret_cast<int32_t *>(io_addrs_[1]);
|
||||
auto *mask = reinterpret_cast<bool *>(io_addrs_[2]);
|
||||
int32_t input_dim_size = dims_.size();
|
||||
int32_t non_zero_num = 0;
|
||||
int32_t input_total_count = 1;
|
||||
|
||||
bool ret = GetInputTotalCount(dims_, &input_total_count, input_dim_size);
|
||||
if (!ret) {
|
||||
AICPU_LOGE("Get input total count failed!");
|
||||
return AICPU_KERNEL_STATE_INTERNAL_ERROR;
|
||||
}
|
||||
|
||||
int *input_dim = reinterpret_cast<int *>(malloc(input_total_count * sizeof(int)));
|
||||
if (input_dim == nullptr) {
|
||||
AICPU_LOGE("Malloc memory failed!");
|
||||
return AICPU_KERNEL_STATE_INTERNAL_ERROR;
|
||||
}
|
||||
for (int32_t i = 0; i < input_total_count; i++) {
|
||||
if (input[i] != 0) {
|
||||
input_dim[non_zero_num] = i;
|
||||
non_zero_num++;
|
||||
}
|
||||
}
|
||||
bool padding_flag = false;
|
||||
int32_t output_length = 0;
|
||||
int32_t output_non_zero_length = 0;
|
||||
GetOutputLength(&padding_flag, &output_length, &output_non_zero_length, count_, non_zero_num);
|
||||
|
||||
int *tmp_output = nullptr;
|
||||
int *mask_dim = nullptr;
|
||||
ret = GenerateRandomMask(output_length, non_zero_num, output_non_zero_length, &input_dim, &tmp_output, &mask_dim);
|
||||
if (!ret) {
|
||||
AICPU_LOGE("Generate random mask failed!");
|
||||
return AICPU_KERNEL_STATE_INTERNAL_ERROR;
|
||||
}
|
||||
|
||||
if (padding_flag) {
|
||||
int32_t index = 0;
|
||||
for (int32_t i = output_length - 1; i > non_zero_num; i--) {
|
||||
tmp_output[non_zero_num + index] = 0;
|
||||
mask_dim[non_zero_num + index] = 0;
|
||||
index++;
|
||||
}
|
||||
}
|
||||
|
||||
int32_t copy_output_length = 0;
|
||||
if (output_length * input_dim_size >= INT_MAX || output_length * input_dim_size < 0) {
|
||||
AICPU_LOGE("Output size exceed INT_MAX");
|
||||
free(input_dim);
|
||||
free(tmp_output);
|
||||
free(mask_dim);
|
||||
return AICPU_KERNEL_STATE_INTERNAL_ERROR;
|
||||
}
|
||||
copy_output_length = output_length * input_dim_size;
|
||||
int *output = reinterpret_cast<int *>(malloc(copy_output_length * sizeof(int)));
|
||||
if (output == nullptr) {
|
||||
AICPU_LOGE("malloc memory failed!");
|
||||
free(input_dim);
|
||||
free(tmp_output);
|
||||
free(mask_dim);
|
||||
return AICPU_KERNEL_STATE_INTERNAL_ERROR;
|
||||
}
|
||||
if (memset_s(output, copy_output_length, 0x00, copy_output_length) != EOK) {
|
||||
AICPU_LOGE("memset_s memory failed!");
|
||||
free(input_dim);
|
||||
free(mask_dim);
|
||||
free(tmp_output);
|
||||
free(output);
|
||||
return AICPU_KERNEL_STATE_INTERNAL_ERROR;
|
||||
}
|
||||
ParseOutputCoordinate(dims_, output_length, input_dim_size, input_total_count, tmp_output, output);
|
||||
|
||||
int32_t actual_output_length = count_ * dims_.size();
|
||||
copy_output_length = std::min(actual_output_length, copy_output_length);
|
||||
int32_t copy_output_bytes = 0;
|
||||
if (INT_MAX / static_cast<int>(sizeof(int32_t)) < copy_output_length) {
|
||||
AICPU_LOGE("The output length is out of range!");
|
||||
free(input_dim);
|
||||
free(mask_dim);
|
||||
free(tmp_output);
|
||||
free(output);
|
||||
return AICPU_KERNEL_STATE_INTERNAL_ERROR;
|
||||
}
|
||||
copy_output_bytes = copy_output_length * sizeof(int32_t);
|
||||
memcpy_s(output_coordinate, copy_output_bytes, output, copy_output_bytes);
|
||||
UpdateOutput(dims_, non_zero_num, count_, output_length, mask_dim, output_coordinate, mask);
|
||||
AICPU_LOGI("no zero num is %d, output_length is %d ", non_zero_num, output_length);
|
||||
UpdateOutputShapeValue(non_zero_num, output_length);
|
||||
free(input_dim);
|
||||
free(mask_dim);
|
||||
free(tmp_output);
|
||||
free(output);
|
||||
return AICPU_KERNEL_STATE_SUCCESS;
|
||||
}
|
||||
|
||||
void RandomChoiceWithMaskKernel::UpdateOutputShapeValue(int32_t non_zero_num, int32_t output_length) {
|
||||
if (unknow_shape_) {
|
||||
output_shape_and_type_[0]->dims[0] = non_zero_num;
|
||||
output_shape_and_type_[1]->dims[0] = output_length;
|
||||
}
|
||||
}
|
||||
uint32_t RandomChoiceWithMaskKernel::ParseKernelParam() {
|
||||
::google::protobuf::Map<::std::string, ::aicpuops::AttrValue> nodedef_map = node_def_.attrs();
|
||||
aicpuops::AttrValue random_choice_count_attrs = nodedef_map["count"];
|
||||
count_ = random_choice_count_attrs.i();
|
||||
AICPU_LOGI("This op attr count is %d", count_);
|
||||
|
||||
if ((count_ == 0) && (!unknow_shape_)) {
|
||||
AICPU_LOGE("This op attr count is 0, but the shapetype is %d", unknow_shape_);
|
||||
return AICPU_KERNEL_STATE_PARAM_INVALID;
|
||||
}
|
||||
|
||||
size_t inputs_size = node_def_.inputs_size();
|
||||
for (size_t i = 0; i < inputs_size; i++) {
|
||||
aicpuops::Tensor input_tensor = node_def_.inputs(i);
|
||||
aicpuops::TensorShape input_shape = input_tensor.tensor_shape();
|
||||
for (int j = 0; j < input_shape.dim_size(); j++) {
|
||||
dims_.push_back(input_shape.dim(j).size());
|
||||
}
|
||||
}
|
||||
|
||||
return AICPU_KERNEL_STATE_SUCCESS;
|
||||
}
|
||||
} // namespace aicpu
|
||||
|
||||
extern "C" {
|
||||
__attribute__((visibility("default"))) uint32_t RandomChoiceWithMask(void *param) {
|
||||
aicpu::RandomChoiceWithMaskKernel randomChoiceWithMaskKernel;
|
||||
return randomChoiceWithMaskKernel.Compute(param);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,36 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef AICPU_OPS_AICPU_RANDOM_CHOICE_WITH_MASK_KERNELS_H_
|
||||
#define AICPU_OPS_AICPU_RANDOM_CHOICE_WITH_MASK_KERNELS_H_
|
||||
|
||||
#include <vector>
|
||||
#include "common/kernel_base.h"
|
||||
|
||||
namespace aicpu {
|
||||
class RandomChoiceWithMaskKernel : public KernelBase {
|
||||
public:
|
||||
RandomChoiceWithMaskKernel() : KernelBase("RandomChoiceWithMask") {}
|
||||
~RandomChoiceWithMaskKernel() = default;
|
||||
|
||||
protected:
|
||||
int32_t count_ = 0;
|
||||
std::vector<int64_t> dims_;
|
||||
uint32_t DoCompute() override;
|
||||
uint32_t ParseKernelParam() override;
|
||||
void UpdateOutputShapeValue(int32_t non_zero_num, int32_t output_length);
|
||||
};
|
||||
} // namespace aicpu
|
||||
#endif // AICPU_OPS_AICPU_RANDOM_CHOICE_WITH_MASK_KERNELS_H_
|
Loading…
Reference in New Issue