!37251 [MS][LITE][STABLE]copy tensorRT impl to src/runtime
Merge pull request !37251 from chenjianping/master_dev1
This commit is contained in:
commit
e67997ff67
|
@ -498,14 +498,14 @@ if(SUPPORT_TENSORRT)
|
|||
set(CUDA_LIB_PATH ${CUDA_PATH}/lib64)
|
||||
include_directories(${TENSORRT_PATH}/include)
|
||||
include_directories(${CUDA_PATH}/include)
|
||||
add_subdirectory(extendrt/delegate/tensorrt)
|
||||
add_subdirectory(runtime/delegate/tensorrt)
|
||||
endif()
|
||||
target_link_libraries(mindspore-lite tensorrt_kernel_mid cuda_kernel_mid gpu_distribution_collective)
|
||||
target_link_libraries(mindspore-lite_static tensorrt_kernel_mid cuda_kernel_mid gpu_distribution_collective)
|
||||
else()
|
||||
if(NOT MSLITE_ENABLE_CLOUD_FUSION_INFERENCE)
|
||||
set(TENSORRT_STUB
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/extendrt/delegate/tensorrt/distribution/distribution_base.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/runtime/delegate/tensorrt/distribution/distribution_base.cc
|
||||
)
|
||||
add_library(tensorrt_stub OBJECT ${TENSORRT_STUB})
|
||||
endif()
|
||||
|
|
|
@ -381,6 +381,10 @@ int TensorRTSubGraph::Prepare() {
|
|||
return RET_ERROR;
|
||||
}
|
||||
int binding_num = this->engine_->getNbBindings();
|
||||
if (binding_num < 0) {
|
||||
MS_LOG(ERROR) << "invalid binding_num " << binding_num;
|
||||
return RET_ERROR;
|
||||
}
|
||||
tensor_bindings_ = new (std::nothrow) void *[binding_num];
|
||||
if (tensor_bindings_ == nullptr) {
|
||||
MS_LOG(ERROR) << "malloc tensor binding array failed.";
|
||||
|
|
|
@ -0,0 +1,43 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_CACHE_ALGORITHM_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_CACHE_ALGORITHM_H_
|
||||
|
||||
#include <vector>
|
||||
#include "include/api/status.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace cache {
|
||||
struct CacheNoe {
|
||||
CacheNoe(int _index, int _frequency, int _value) : key(_index), frequency(_frequency), value(_value) {}
|
||||
int key; // host input index
|
||||
int frequency;
|
||||
int value; // cache index
|
||||
};
|
||||
|
||||
class CacheAlgorithm {
|
||||
public:
|
||||
virtual ~CacheAlgorithm() {}
|
||||
virtual int Get(int key) = 0;
|
||||
virtual void Put(int key, int value) = 0;
|
||||
virtual Status Init(size_t cache_size, int min_host_index, int max_host_index) = 0;
|
||||
virtual Status CheckCacheHit(const int *batch_ids, const size_t batch_ids_len, int *cache_index,
|
||||
std::vector<int> *need_swap_indies, std::vector<int> *need_swap_indies_cache_index) = 0;
|
||||
};
|
||||
} // namespace cache
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_CACHE_ALGORITHM_H_
|
|
@ -0,0 +1,41 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_CACHE_MEM_BASE_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_CACHE_MEM_BASE_H_
|
||||
#include <utility>
|
||||
#include <memory>
|
||||
|
||||
namespace mindspore {
|
||||
namespace cache {
|
||||
class CacheMemBase {
|
||||
public:
|
||||
CacheMemBase() = default;
|
||||
virtual ~CacheMemBase() = default;
|
||||
virtual bool InitDevice(uint32_t device_id, const void *context) = 0;
|
||||
virtual void *MallocMemory(size_t size) = 0;
|
||||
virtual void FreeMemory(void *buf) = 0;
|
||||
virtual bool SynchronizeStream() = 0;
|
||||
virtual bool CopyHostMemToDevice(void *dst, const void *src, size_t size) = 0;
|
||||
virtual bool CopyDeviceMemToHost(void *dst, const void *src, size_t size) = 0;
|
||||
virtual bool HashSwapOut(void *hash_table_addr, void *swap_out_value_addr, void *swap_out_index_addr,
|
||||
size_t cache_vocab_size, size_t embedding_size, size_t swap_out_size) = 0;
|
||||
virtual bool HashSwapIn(void *hash_table_addr, void *swap_in_value_addr, void *swap_in_index_addr,
|
||||
size_t cache_vocab_size, size_t embedding_size, size_t swap_in_size) = 0;
|
||||
};
|
||||
} // namespace cache
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_CACHE_MEM_BASE_H_
|
|
@ -0,0 +1,237 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "src/runtime/delegate/parameter_cache/embedding_cache.h"
|
||||
#include <cuda_runtime.h>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
#include "src/common/log_adapter.h"
|
||||
#include "include/errorcode.h"
|
||||
#include "src/runtime/delegate/parameter_cache/gpu/gpu_cache_mem.h"
|
||||
#include "src/runtime/delegate/parameter_cache/lfu_cache.h"
|
||||
#include "src/runtime/delegate/parameter_cache/factory_mgr_base.h"
|
||||
|
||||
namespace {
|
||||
constexpr size_t kEmbeddingTensorShapeSize = 2;
|
||||
}
|
||||
namespace mindspore {
|
||||
namespace cache {
|
||||
void LookUpTableTask(size_t indices_lens, size_t first_dim_size, const char *input_addr, const int *indices_addr,
|
||||
char *output_addr, size_t embedding_len, int min_host_index) {
|
||||
for (size_t i = 0; i < indices_lens; ++i) {
|
||||
int index = indices_addr[i] - min_host_index;
|
||||
if (index >= 0 && index < static_cast<int>(first_dim_size)) {
|
||||
size_t pos = index * embedding_len;
|
||||
std::memcpy(output_addr, input_addr + pos, embedding_len);
|
||||
} else {
|
||||
memset(output_addr, 0, embedding_len);
|
||||
}
|
||||
output_addr += embedding_len;
|
||||
}
|
||||
}
|
||||
|
||||
EmbeddingCache::~EmbeddingCache() {
|
||||
if (hash_swap_value_device_addr_ != nullptr) {
|
||||
device_cache_->FreeMemory(hash_swap_value_device_addr_);
|
||||
hash_swap_value_device_addr_ = nullptr;
|
||||
}
|
||||
if (hash_swap_value_addr_ != nullptr) {
|
||||
free(hash_swap_value_addr_);
|
||||
hash_swap_value_addr_ = nullptr;
|
||||
}
|
||||
if (hash_swap_index_addr_ != nullptr) {
|
||||
device_cache_->FreeMemory(hash_swap_index_addr_);
|
||||
hash_swap_index_addr_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
Status EmbeddingCache::Init(mindspore::MSTensor host_cache_tensor, mindspore::MSTensor device_tensor) {
|
||||
MS_ASSERT(device_tensor.Shape().size() == kEmbeddingTensorShapeSize);
|
||||
MS_ASSERT(host_cache_tensor.Shape().size() == kEmbeddingTensorShapeSize);
|
||||
MS_ASSERT(device_tensor.DataType() == host_cache_tensor.DataType());
|
||||
MS_ASSERT(host_cache_tensor.Data() != nullptr);
|
||||
|
||||
if (device_tensor.Shape()[1] != host_cache_tensor.Shape()[1]) {
|
||||
MS_LOG(ERROR) << device_tensor.Name() << " embedding_size is invalid, device size is " << device_tensor.Shape()[1]
|
||||
<< ", host size is " << host_cache_tensor.Shape()[1];
|
||||
return kLiteError;
|
||||
}
|
||||
if (host_cache_size_ != host_cache_tensor.Shape()[0]) {
|
||||
MS_LOG(ERROR) << device_tensor.Name() << " host_cache_size is invalid, host_cache_size"
|
||||
<< host_cache_tensor.Shape()[0] << ", index begin:" << min_host_index_
|
||||
<< ", index end:" << max_host_index_ << "rank_group_size_ num:" << rank_group_size_
|
||||
<< ", rank id:" << rank_id_ << ", vocab_size_:" << vocab_size_;
|
||||
return kLiteError;
|
||||
}
|
||||
|
||||
data_type_ = device_tensor.DataType();
|
||||
switch (data_type_) {
|
||||
case DataType::kNumberTypeFloat32:
|
||||
sizeof_data_type_ = sizeof(float);
|
||||
break;
|
||||
default:
|
||||
MS_LOG(ERROR) << device_tensor.Name() << " unsupported data type " << static_cast<int>(data_type_);
|
||||
return kLiteError;
|
||||
}
|
||||
host_addr_ = host_cache_tensor.MutableData();
|
||||
embedding_size_ = device_tensor.Shape()[1];
|
||||
device_start_index_ = device_cache_size_ * rank_id_;
|
||||
// host cache tensor is device tensor
|
||||
if (device_tensor.Shape()[0] == host_cache_tensor.Shape()[0]) {
|
||||
device_start_index_ = min_host_index_;
|
||||
}
|
||||
return kSuccess;
|
||||
}
|
||||
|
||||
Status EmbeddingCache::MallocCacheMemory() {
|
||||
auto hash_swap_value_size = embedding_size_ * batch_elements_ * sizeof_data_type_;
|
||||
hash_swap_value_device_addr_ = device_cache_->MallocMemory(hash_swap_value_size);
|
||||
if (hash_swap_value_device_addr_ == nullptr) {
|
||||
MS_LOG(ERROR) << "malloc hash_swap_value_device failed, malloc size " << hash_swap_value_size;
|
||||
return kLiteMemoryFailed;
|
||||
}
|
||||
|
||||
hash_swap_value_addr_ = malloc(hash_swap_value_size);
|
||||
if (hash_swap_value_addr_ == nullptr) {
|
||||
MS_LOG(ERROR) << "malloc hash_swap_value failed, malloc size " << hash_swap_value_size;
|
||||
return kLiteMemoryFailed;
|
||||
}
|
||||
|
||||
// data type of index
|
||||
hash_swap_index_addr_ = static_cast<int *>(device_cache_->MallocMemory(batch_elements_ * sizeof(int)));
|
||||
if (hash_swap_index_addr_ == nullptr) {
|
||||
MS_LOG(ERROR) << "malloc hash_swap_index failed, malloc size " << batch_elements_ * sizeof(int);
|
||||
return kLiteMemoryFailed;
|
||||
}
|
||||
return kSuccess;
|
||||
}
|
||||
|
||||
Status EmbeddingCache::Init(uint32_t device_id, const void *context, mindspore::MSTensor host_cache_tensor,
|
||||
mindspore::MSTensor device_tensor) {
|
||||
auto ret = Init(host_cache_tensor, device_tensor);
|
||||
if (ret != kSuccess) {
|
||||
return ret;
|
||||
}
|
||||
cache_ = lite::FactoryManagerBase<std::string, cache::CacheAlgorithm>::Instance().GetProduct("lfu");
|
||||
if (cache_ == nullptr) {
|
||||
MS_LOG(ERROR) << "malloc LFUCacheAlgorithm failed";
|
||||
return kLiteMemoryFailed;
|
||||
}
|
||||
ret = cache_->Init(device_cache_size_, min_host_index_, max_host_index_);
|
||||
if (ret != kSuccess) {
|
||||
MS_LOG(ERROR) << "init cache failed," << ret.CodeAsString;
|
||||
return kLiteError;
|
||||
}
|
||||
|
||||
device_cache_ = lite::FactoryManagerBase<std::string, cache::CacheMemBase>::Instance().GetProduct("gpu");
|
||||
if (device_cache_ == nullptr) {
|
||||
MS_LOG(ERROR) << "get cache failed";
|
||||
return kLiteMemoryFailed;
|
||||
}
|
||||
if (!device_cache_->InitDevice(device_id, context)) {
|
||||
MS_LOG(ERROR) << "init device failed";
|
||||
return kLiteError;
|
||||
}
|
||||
ret = MallocCacheMemory();
|
||||
if (ret != kSuccess) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
MS_LOG(INFO) << "init succ, rank_group_size_ num:" << rank_group_size_ << ", rank id:" << rank_id_
|
||||
<< ", vocab_size_:" << vocab_size_ << ", host_cache_size_:" << host_cache_size_
|
||||
<< ", device_cache_size_:" << device_cache_size_ << ", embedding_size_:" << embedding_size_
|
||||
<< ", batch_elements_:" << batch_elements_ << ", index begin:" << min_host_index_
|
||||
<< ", index end:" << max_host_index_;
|
||||
return kSuccess;
|
||||
}
|
||||
|
||||
Status EmbeddingCache::SetHostCacheAddr(void *addr, size_t size) {
|
||||
if (sizeof_data_type_ * host_cache_size_ * embedding_size_ != size) {
|
||||
return kLiteParamInvalid;
|
||||
}
|
||||
host_addr_ = addr;
|
||||
|
||||
// copy part of host mem to device
|
||||
auto ret =
|
||||
device_cache_->CopyHostMemToDevice(device_addr_, addr, sizeof_data_type_ * device_cache_size_ * embedding_size_);
|
||||
if (!ret) {
|
||||
MS_LOG(ERROR) << "CopyHostMemToDevice failed, copy size "
|
||||
<< sizeof_data_type_ * device_cache_size_ * embedding_size_;
|
||||
return kLiteMemoryFailed;
|
||||
}
|
||||
|
||||
// init cache
|
||||
auto index_num = device_cache_size_;
|
||||
for (size_t i = 0; i < index_num; i++) {
|
||||
cache_->Put(min_host_index_ + i, i);
|
||||
}
|
||||
|
||||
return kSuccess;
|
||||
}
|
||||
|
||||
Status EmbeddingCache::SetDeviceCacheAddr(void *device_mem_addr, size_t size) {
|
||||
if (sizeof_data_type_ * device_cache_size_ * embedding_size_ != size) {
|
||||
return kLiteParamInvalid;
|
||||
}
|
||||
|
||||
device_addr_ = device_mem_addr;
|
||||
SetHostCacheAddr(host_addr_, sizeof_data_type_ * host_cache_size_ * embedding_size_);
|
||||
|
||||
return kSuccess;
|
||||
}
|
||||
|
||||
Status EmbeddingCache::CheckCacheHit(const int *batch_ids, const size_t batch_ids_len, int *cache_index) {
|
||||
std::vector<int> need_swap_indies;
|
||||
std::vector<int> need_swap_indies_cache_index;
|
||||
auto ret =
|
||||
cache_->CheckCacheHit(batch_ids, batch_ids_len, cache_index, &need_swap_indies, &need_swap_indies_cache_index);
|
||||
if (ret != kSuccess) {
|
||||
MS_LOG(ERROR) << "CheckCacheHit failed";
|
||||
return ret;
|
||||
}
|
||||
auto swap_indices_size = need_swap_indies.size();
|
||||
if (swap_indices_size > 0) {
|
||||
LookUpTableTask(swap_indices_size, host_cache_size_, static_cast<char *>(host_addr_), need_swap_indies.data(),
|
||||
static_cast<char *>(hash_swap_value_addr_), embedding_size_ * sizeof_data_type_, min_host_index_);
|
||||
|
||||
auto device_cache_ret = device_cache_->CopyHostMemToDevice(hash_swap_value_device_addr_, hash_swap_value_addr_,
|
||||
swap_indices_size * embedding_size_ * sizeof_data_type_);
|
||||
if (!device_cache_ret) {
|
||||
MS_LOG(ERROR) << "copy swap value to device failed";
|
||||
return kLiteMemoryFailed;
|
||||
}
|
||||
|
||||
device_cache_ret = device_cache_->CopyHostMemToDevice(hash_swap_index_addr_, need_swap_indies_cache_index.data(),
|
||||
swap_indices_size * sizeof(int));
|
||||
if (!device_cache_ret) {
|
||||
MS_LOG(ERROR) << "copy swap indies to device failed";
|
||||
return kLiteMemoryFailed;
|
||||
}
|
||||
|
||||
device_cache_ret = device_cache_->HashSwapIn(device_addr_, hash_swap_value_device_addr_, hash_swap_index_addr_,
|
||||
device_cache_size_, embedding_size_, swap_indices_size);
|
||||
if (!device_cache_ret) {
|
||||
MS_LOG(ERROR) << "HashSwapIn failed";
|
||||
return kLiteMemoryFailed;
|
||||
}
|
||||
}
|
||||
|
||||
return kSuccess;
|
||||
}
|
||||
} // namespace cache
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,89 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_EMBEDDING_CACHE_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_EMBEDDING_CACHE_H_
|
||||
#include <cmath>
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include "include/api/status.h"
|
||||
#include "include/api/data_type.h"
|
||||
#include "src/common/log_adapter.h"
|
||||
#include "src/runtime/delegate/parameter_cache/cache_algorithm.h"
|
||||
#include "src/runtime/delegate/parameter_cache/cache_mem_base.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace cache {
|
||||
class EmbeddingCache {
|
||||
public:
|
||||
EmbeddingCache(size_t vocab_size, size_t device_cache_size, size_t batch_elements, int rank_id, int rank_group_size)
|
||||
: vocab_size_(vocab_size),
|
||||
device_cache_size_(device_cache_size),
|
||||
batch_elements_(batch_elements),
|
||||
rank_id_(rank_id),
|
||||
rank_group_size_(rank_group_size) {
|
||||
MS_ASSERT(rank_group_size_ != 0);
|
||||
auto local_shard_size = static_cast<int>(std::ceil(static_cast<float>(vocab_size_) / rank_group_size_));
|
||||
min_host_index_ = local_shard_size * rank_id_;
|
||||
max_host_index_ = std::min(min_host_index_ + local_shard_size, static_cast<int>(vocab_size_));
|
||||
host_cache_size_ = max_host_index_ - min_host_index_;
|
||||
|
||||
MS_LOG(INFO) << "rank_group_size_ num:" << rank_group_size_ << ", rank id:" << rank_id_
|
||||
<< ", vocab_size_:" << vocab_size_ << ", host_cache_size_:" << host_cache_size_
|
||||
<< ", index begin:" << min_host_index_ << ", index end:" << max_host_index_;
|
||||
}
|
||||
|
||||
~EmbeddingCache();
|
||||
Status Init(uint32_t device_id, const void *context, mindspore::MSTensor host_cache_tensor,
|
||||
mindspore::MSTensor device_tensor);
|
||||
Status SetHostCacheAddr(void *addr, size_t size);
|
||||
Status SetDeviceCacheAddr(void *host_mem_addr, size_t size);
|
||||
Status CheckCacheHit(const int *batch_ids, const size_t batch_ids_len, int *hash_index);
|
||||
size_t GetDeviceStartIndex() { return device_start_index_; }
|
||||
|
||||
private:
|
||||
Status Init(mindspore::MSTensor host_cache_tensor, mindspore::MSTensor device_tensor);
|
||||
Status MallocCacheMemory();
|
||||
|
||||
private:
|
||||
std::shared_ptr<cache::CacheMemBase> device_cache_{nullptr};
|
||||
std::shared_ptr<CacheAlgorithm> cache_{nullptr};
|
||||
|
||||
size_t vocab_size_{0}; // total size
|
||||
size_t host_cache_size_{0}; // local host size
|
||||
size_t device_cache_size_{0}; // local device cache size
|
||||
size_t device_start_index_{0};
|
||||
size_t embedding_size_{0};
|
||||
size_t batch_elements_{0};
|
||||
|
||||
DataType data_type_{DataType::kNumberTypeFloat32};
|
||||
size_t sizeof_data_type_{0};
|
||||
|
||||
void *device_addr_{nullptr}; // hash_info.device_address.addr
|
||||
void *host_addr_{nullptr};
|
||||
|
||||
int *hash_swap_index_addr_; // embedding_device_cache_->hash_swap_index_addr_
|
||||
void *hash_swap_value_addr_;
|
||||
void *hash_swap_value_device_addr_;
|
||||
|
||||
int rank_id_;
|
||||
int rank_group_size_;
|
||||
int min_host_index_{0};
|
||||
int max_host_index_{0};
|
||||
};
|
||||
} // namespace cache
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_EMBEDDING_CACHE_H_
|
|
@ -0,0 +1,194 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "src/runtime/delegate/parameter_cache/embedding_cache_manager.h"
|
||||
#include <cuda_runtime.h>
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
#include "src/common/log_adapter.h"
|
||||
#include "include/errorcode.h"
|
||||
|
||||
namespace {
|
||||
constexpr size_t kGatherInputsSize = 3;
|
||||
}
|
||||
namespace mindspore {
|
||||
namespace cache {
|
||||
Status EmbeddingCacheManager::Init(const std::string &cache_model_path, size_t vocab_size, size_t device_cache_size) {
|
||||
if (cache_model_path.empty() || vocab_size == 0 || device_cache_size >= vocab_size) {
|
||||
MS_LOG(INFO) << "no cache model , vocab_size " << vocab_size << ", device_cache_size " << device_cache_size;
|
||||
return kSuccess;
|
||||
}
|
||||
|
||||
host_cache_model_ = std::make_shared<HostCacheModel>();
|
||||
if (host_cache_model_ == nullptr) {
|
||||
MS_LOG(ERROR) << "HostCacheModel malloc failed";
|
||||
return kLiteMemoryFailed;
|
||||
}
|
||||
auto ret = host_cache_model_->LoadCache(cache_model_path);
|
||||
if (ret != kSuccess) {
|
||||
MS_LOG(ERROR) << "load cache failed";
|
||||
return ret;
|
||||
}
|
||||
vocab_size_ = vocab_size;
|
||||
device_cache_size_ = device_cache_size;
|
||||
|
||||
MS_LOG(INFO) << "cache manager init succ, cache model" << cache_model_path << " , vocab_size " << vocab_size
|
||||
<< ", device_cache_size " << device_cache_size;
|
||||
return ret;
|
||||
}
|
||||
|
||||
Status EmbeddingCacheManager::Init(DelegateModel<schema::Primitive> *model, size_t vocab_size,
|
||||
size_t device_cache_size) {
|
||||
if (model == nullptr || vocab_size == 0 || device_cache_size >= vocab_size) {
|
||||
MS_LOG(INFO) << "no cache model , vocab_size " << vocab_size << ", device_cache_size " << device_cache_size;
|
||||
return kSuccess;
|
||||
}
|
||||
|
||||
host_cache_model_ = std::make_shared<HostCacheModel>();
|
||||
if (host_cache_model_ == nullptr) {
|
||||
MS_LOG(ERROR) << "HostCacheModel malloc failed";
|
||||
return kLiteMemoryFailed;
|
||||
}
|
||||
auto ret = host_cache_model_->LoadCache(model);
|
||||
if (ret != kSuccess) {
|
||||
MS_LOG(ERROR) << "load cache failed";
|
||||
return ret;
|
||||
}
|
||||
vocab_size_ = vocab_size;
|
||||
device_cache_size_ = device_cache_size;
|
||||
|
||||
MS_LOG(INFO) << "cache manager init succ, vocab_size " << vocab_size << ", device_cache_size " << device_cache_size;
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool EmbeddingCacheManager::CheckIsCacheKernel(kernel::Kernel *kernel) {
|
||||
if (host_cache_model_ == nullptr) {
|
||||
return false;
|
||||
}
|
||||
return host_cache_model_->CheckIsCacheKernel(kernel);
|
||||
}
|
||||
|
||||
Status EmbeddingCacheManager::InitCacheKernel(kernel::Kernel *kernel, uint32_t device_id, const void *context) {
|
||||
if (host_cache_model_ == nullptr) {
|
||||
MS_LOG(ERROR) << "cache model is nullptr, kernel " << kernel->name() << " init cache failed";
|
||||
return kLiteError;
|
||||
}
|
||||
auto host_cache_tensor = host_cache_model_->GetHostCacheTensor(kernel);
|
||||
if (host_cache_tensor == nullptr) {
|
||||
MS_LOG(ERROR) << kernel->name() << ": invalid cache kernel";
|
||||
return kLiteError;
|
||||
}
|
||||
|
||||
// only support embedding cache
|
||||
if (kernel->type() != schema::PrimitiveType_Gather) {
|
||||
MS_LOG(ERROR) << kernel->name() << " is not embedding kernel";
|
||||
return kLiteError;
|
||||
}
|
||||
MS_ASSERT(kernel->inputs().size() == kGatherInputsSize);
|
||||
auto device_tensor = kernel->inputs()[0];
|
||||
size_t batch_elements = kernel->inputs()[1].ElementNum();
|
||||
auto cache =
|
||||
std::make_shared<EmbeddingCache>(vocab_size_, device_cache_size_, batch_elements, rank_id_, rank_group_size_);
|
||||
if (cache == nullptr) {
|
||||
MS_LOG(ERROR) << kernel->name() << ": malloc EmbeddingCache failed";
|
||||
return kLiteError;
|
||||
}
|
||||
|
||||
auto ret = cache->Init(device_id, context, host_cache_tensor, device_tensor);
|
||||
if (ret != kSuccess) {
|
||||
MS_LOG(ERROR) << kernel->name() << ": EmbeddingCache init failed";
|
||||
return kLiteError;
|
||||
}
|
||||
|
||||
caches_[device_tensor.Name()] = cache;
|
||||
MS_LOG(INFO) << kernel->name() << " is cache kernel, input tensor " << kernel->inputs()[1].Name() << ", cache tensor "
|
||||
<< device_tensor.Name();
|
||||
|
||||
return kSuccess;
|
||||
}
|
||||
|
||||
bool EmbeddingCacheManager::IsCacheTensor(mindspore::MSTensor tensor) {
|
||||
if (host_cache_model_ == nullptr) {
|
||||
return false;
|
||||
}
|
||||
auto cache = caches_.find(tensor.Name());
|
||||
if (cache != caches_.end()) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<int64_t> EmbeddingCacheManager::GetCacheShape(mindspore::MSTensor tensor) {
|
||||
std::vector<int64_t> shape = tensor.Shape();
|
||||
if (shape.size() > 0 && IsCacheTensor(tensor)) {
|
||||
shape[0] = device_cache_size_;
|
||||
}
|
||||
return shape;
|
||||
}
|
||||
|
||||
size_t EmbeddingCacheManager::GetCacheDataSize(mindspore::MSTensor tensor) {
|
||||
auto data_size = tensor.DataSize();
|
||||
auto &shape = tensor.Shape();
|
||||
if (shape.size() > 0 && IsCacheTensor(tensor) && shape[0] > 0) {
|
||||
data_size = data_size * device_cache_size_ / shape[0];
|
||||
}
|
||||
return data_size;
|
||||
}
|
||||
|
||||
Status EmbeddingCacheManager::SetDeviceCacheAddr(const std::string &tensor_name, void *device_mem_addr, size_t size) {
|
||||
auto cache_iter = caches_.find(tensor_name);
|
||||
if (cache_iter == caches_.end() || cache_iter->second == nullptr) {
|
||||
MS_LOG(ERROR) << "not find cache, " << tensor_name;
|
||||
return kLiteError;
|
||||
}
|
||||
auto cache = cache_iter->second;
|
||||
return cache->SetDeviceCacheAddr(device_mem_addr, size);
|
||||
}
|
||||
|
||||
// device_addr is model input device addr
|
||||
int EmbeddingCacheManager::CacheHandle(const std::string &tensor_name, mindspore::MSTensor model_input_tensor,
|
||||
void *model_input_device_addr) {
|
||||
auto cache_iter = caches_.find(tensor_name);
|
||||
if (cache_iter == caches_.end()) {
|
||||
MS_LOG(ERROR) << "not find cache, " << tensor_name;
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
auto cache = cache_iter->second;
|
||||
hash_indices_.resize(model_input_tensor.ElementNum());
|
||||
auto ret = cache->CheckCacheHit(static_cast<int *>(model_input_tensor.MutableData()), hash_indices_.size(),
|
||||
hash_indices_.data());
|
||||
if (ret != kSuccess) {
|
||||
MS_LOG(ERROR) << "CheckCacheHit failed, " << model_input_tensor.Name();
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < hash_indices_.size(); i++) {
|
||||
if (hash_indices_[i] != -1) {
|
||||
hash_indices_[i] += cache->GetDeviceStartIndex();
|
||||
}
|
||||
}
|
||||
|
||||
auto cuda_ret = cudaMemcpy(model_input_device_addr, hash_indices_.data(), hash_indices_.size() * sizeof(int),
|
||||
cudaMemcpyHostToDevice);
|
||||
if (cuda_ret != cudaSuccess) {
|
||||
MS_LOG(ERROR) << "copy mem failed, " << model_input_tensor.Name();
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
MS_LOG(INFO) << "cache handle succ, " << model_input_tensor.Name() << "," << tensor_name;
|
||||
|
||||
return lite::RET_OK;
|
||||
}
|
||||
} // namespace cache
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,60 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_EMBEDDING_CACHE_MANAGER_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_EMBEDDING_CACHE_MANAGER_H_
|
||||
#include <memory>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "include/api/kernel.h"
|
||||
#include "include/api/status.h"
|
||||
#include "include/api/data_type.h"
|
||||
#include "src/runtime/delegate/parameter_cache/embedding_cache.h"
|
||||
#include "src/runtime/delegate/parameter_cache/load_host_cache_model.h"
|
||||
#include "src/runtime/delegate/tensorrt/distribution/distribution_base.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace cache {
|
||||
class EmbeddingCacheManager {
|
||||
public:
|
||||
EmbeddingCacheManager() {
|
||||
rank_id_ = lite::GetRankID();
|
||||
rank_group_size_ = lite::GetGPUGroupSize();
|
||||
}
|
||||
Status Init(const std::string &cache_model_path, size_t vocab_size, size_t device_cache_size);
|
||||
Status Init(DelegateModel<schema::Primitive> *model, size_t vocab_size, size_t device_cache_size);
|
||||
bool CheckIsCacheKernel(kernel::Kernel *kernel);
|
||||
Status InitCacheKernel(kernel::Kernel *kernel, uint32_t device_id, const void *context);
|
||||
bool IsCacheTensor(mindspore::MSTensor tensor);
|
||||
int CacheHandle(const std::string &tensor_name, mindspore::MSTensor model_input_tensor, void *device_addr);
|
||||
Status SetDeviceCacheAddr(const std::string &tensor_name, void *device_mem_addr, size_t size);
|
||||
std::vector<int64_t> GetCacheShape(mindspore::MSTensor tensor);
|
||||
size_t GetCacheDataSize(mindspore::MSTensor tensor);
|
||||
|
||||
private:
|
||||
std::map<std::string, std::shared_ptr<EmbeddingCache>> caches_;
|
||||
std::vector<int> hash_indices_;
|
||||
int rank_id_{0};
|
||||
int rank_group_size_{1};
|
||||
|
||||
std::shared_ptr<HostCacheModel> host_cache_model_;
|
||||
size_t vocab_size_;
|
||||
size_t device_cache_size_;
|
||||
};
|
||||
} // namespace cache
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_EMBEDDING_CACHE_MANAGER_H_
|
|
@ -0,0 +1,81 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_FACTORY_MGR_BASE_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_FACTORY_MGR_BASE_H_
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include "include/api/status.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace lite {
|
||||
template <typename KEY, typename PRODUCT>
|
||||
class ProcductRegistrar {
|
||||
public:
|
||||
virtual std::shared_ptr<PRODUCT> Create() = 0;
|
||||
|
||||
protected:
|
||||
ProcductRegistrar() {}
|
||||
virtual ~ProcductRegistrar() {}
|
||||
|
||||
private:
|
||||
ProcductRegistrar(const ProcductRegistrar &);
|
||||
const ProcductRegistrar &operator=(const ProcductRegistrar &);
|
||||
};
|
||||
|
||||
template <typename KEY, typename PRODUCT>
|
||||
class FactoryManagerBase {
|
||||
public:
|
||||
static FactoryManagerBase &Instance() {
|
||||
static FactoryManagerBase<KEY, PRODUCT> instance;
|
||||
return instance;
|
||||
}
|
||||
void RegProduct(const KEY &key, ProcductRegistrar<KEY, PRODUCT> *registrar) { registrars[key] = registrar; }
|
||||
|
||||
std::shared_ptr<PRODUCT> GetProduct(const KEY &key) {
|
||||
auto registrar_iter = registrars.find(key);
|
||||
if (registrar_iter != registrars.end()) {
|
||||
if (registrar_iter->second != nullptr) {
|
||||
return registrar_iter->second->Create();
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
private:
|
||||
FactoryManagerBase() = default;
|
||||
~FactoryManagerBase() = default;
|
||||
FactoryManagerBase(const FactoryManagerBase &);
|
||||
const FactoryManagerBase &operator=(const FactoryManagerBase &);
|
||||
|
||||
private:
|
||||
std::map<KEY, ProcductRegistrar<KEY, PRODUCT> *> registrars;
|
||||
};
|
||||
|
||||
template <typename KEY, typename PRODUCT, typename PRODUCT_IMPL>
|
||||
class CommonProcductRegistrar : public ProcductRegistrar<KEY, PRODUCT> {
|
||||
public:
|
||||
explicit CommonProcductRegistrar(const KEY &key) {
|
||||
FactoryManagerBase<KEY, PRODUCT>::Instance().RegProduct(key, this);
|
||||
}
|
||||
std::shared_ptr<PRODUCT> Create() { return std::make_shared<PRODUCT_IMPL>(); }
|
||||
};
|
||||
|
||||
#define RET_COMMON_PRODUCT_REGISTRAR(KEY, PRODUCT, PRODUCT_IMPL, key, name) \
|
||||
static mindspore::lite::CommonProcductRegistrar<KEY, PRODUCT, PRODUCT_IMPL> g_commonProcductRegistrar##name(key);
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_FACTORY_MGR_BASE_H_
|
|
@ -0,0 +1,158 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/delegate/parameter_cache/gpu/gpu_cache_mem.h"
|
||||
#include <cuda_runtime.h>
|
||||
#include <string>
|
||||
#include "src/runtime/delegate/tensorrt/cuda_impl/hash.cuh"
|
||||
#include "plugin/device/gpu/hal/device/cuda_driver.h"
|
||||
#include "src/common/log_adapter.h"
|
||||
#include "src/runtime/delegate/parameter_cache/factory_mgr_base.h"
|
||||
namespace mindspore {
|
||||
namespace cache {
|
||||
namespace gpu {
|
||||
RET_COMMON_PRODUCT_REGISTRAR(std::string, cache::CacheMemBase, cache::gpu::GPUCacheMem, "gpu", GPUCacheMem);
|
||||
bool GPUCacheMem::InitDevice(uint32_t device_id, const void *context) {
|
||||
auto cuda_ret = cudaSetDevice(static_cast<int>(device_id));
|
||||
if (cuda_ret != cudaSuccess) {
|
||||
MS_LOG(ERROR) << "Failed to set device id " << device_id << ", cuda_ret " << cuda_ret << " "
|
||||
<< cudaGetErrorString(cuda_ret);
|
||||
return false;
|
||||
}
|
||||
if (context != nullptr) {
|
||||
stream_ = *(reinterpret_cast<const cudaStream_t *>(context));
|
||||
return true;
|
||||
}
|
||||
|
||||
cuda_ret = cudaStreamCreate(&stream_);
|
||||
if (cuda_ret != cudaSuccess) {
|
||||
MS_LOG(ERROR) << "Cuda create stream failed, cuda_ret " << cuda_ret << " " << cudaGetErrorString(cuda_ret);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void *GPUCacheMem::MallocMemory(size_t size) {
|
||||
void *device_ptr = nullptr;
|
||||
auto cuda_ret = cudaMalloc(&device_ptr, size);
|
||||
if (cuda_ret != cudaSuccess) {
|
||||
MS_LOG(ERROR) << "Cuda Malloc failed for size:" << size << ", cuda_ret " << cuda_ret << " "
|
||||
<< cudaGetErrorString(cuda_ret);
|
||||
return nullptr;
|
||||
}
|
||||
MS_LOG(DEBUG) << "cudaMalloc size: " << size;
|
||||
return device_ptr;
|
||||
}
|
||||
|
||||
void GPUCacheMem::FreeMemory(void *device_addr) {
|
||||
auto cuda_ret = cudaFree(device_addr);
|
||||
if (cuda_ret != cudaSuccess && cuda_ret != cudaErrorCudartUnloading) {
|
||||
MS_LOG(WARNING) << "free cuda memory failed, "
|
||||
<< ", cuda_ret " << cuda_ret << " " << cudaGetErrorString(cuda_ret);
|
||||
}
|
||||
}
|
||||
|
||||
bool GPUCacheMem::SynchronizeStream() {
|
||||
auto cuda_ret = cudaStreamSynchronize(stream_);
|
||||
if (cuda_ret != cudaSuccess) {
|
||||
MS_LOG(ERROR) << "Cuda sync stream failed, cuda_ret " << cuda_ret << " " << cudaGetErrorString(cuda_ret);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GPUCacheMem::CopyHostMemToDevice(void *dst, const void *src, size_t size) {
|
||||
if (dst == nullptr) {
|
||||
MS_LOG(ERROR) << "dst is nullptr";
|
||||
return false;
|
||||
}
|
||||
if (src == nullptr) {
|
||||
MS_LOG(ERROR) << "src is nullptr";
|
||||
return false;
|
||||
}
|
||||
|
||||
auto cuda_ret = cudaMemcpyAsync(dst, src, size, cudaMemcpyHostToDevice, stream_);
|
||||
if (cuda_ret != cudaSuccess) {
|
||||
MS_LOG(ERROR) << "Cuda memcpy failed, cuda_ret " << cuda_ret << " " << cudaGetErrorString(cuda_ret);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GPUCacheMem::CopyDeviceMemToHost(void *dst, const void *src, size_t size) {
|
||||
if (dst == nullptr) {
|
||||
MS_LOG(ERROR) << "dst is nullptr";
|
||||
return false;
|
||||
}
|
||||
if (src == nullptr) {
|
||||
MS_LOG(ERROR) << "src is nullptr";
|
||||
return false;
|
||||
}
|
||||
|
||||
auto cuda_ret = cudaMemcpyAsync(dst, src, size, cudaMemcpyDeviceToHost, stream_);
|
||||
if (cuda_ret != cudaSuccess) {
|
||||
MS_LOG(ERROR) << "Cuda memcpy failed, cuda_ret " << cuda_ret << " " << cudaGetErrorString(cuda_ret);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GPUCacheMem::HashSwapOut(void *hash_table_addr, void *swap_out_value_addr, void *swap_out_index_addr, size_t,
|
||||
size_t embedding_size, size_t swap_out_size) {
|
||||
if (hash_table_addr == nullptr) {
|
||||
MS_LOG(ERROR) << "hash_table_addr is nullptr";
|
||||
return false;
|
||||
}
|
||||
if (swap_out_value_addr == nullptr) {
|
||||
MS_LOG(ERROR) << "swap_out_value_addr is nullptr";
|
||||
return false;
|
||||
}
|
||||
if (swap_out_index_addr == nullptr) {
|
||||
MS_LOG(ERROR) << "swap_out_index_addr is nullptr";
|
||||
return false;
|
||||
}
|
||||
|
||||
DoHashSwapOut(reinterpret_cast<float *>(hash_table_addr), reinterpret_cast<float *>(swap_out_value_addr),
|
||||
reinterpret_cast<int *>(swap_out_index_addr), swap_out_size, embedding_size, stream_);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GPUCacheMem::HashSwapIn(void *hash_table_addr, void *swap_in_value_addr, void *swap_in_index_addr, size_t,
|
||||
size_t embedding_size, size_t swap_in_size) {
|
||||
if (hash_table_addr == nullptr) {
|
||||
MS_LOG(ERROR) << "hash_table_addr is nullptr";
|
||||
return false;
|
||||
}
|
||||
if (swap_in_value_addr == nullptr) {
|
||||
MS_LOG(ERROR) << "swap_in_value_addr is nullptr";
|
||||
return false;
|
||||
}
|
||||
if (swap_in_index_addr == nullptr) {
|
||||
MS_LOG(ERROR) << "swap_in_index_addr is nullptr";
|
||||
return false;
|
||||
}
|
||||
|
||||
DoHashSwapIn(reinterpret_cast<float *>(hash_table_addr), reinterpret_cast<float *>(swap_in_value_addr),
|
||||
reinterpret_cast<int *>(swap_in_index_addr), swap_in_size, embedding_size, stream_);
|
||||
return true;
|
||||
}
|
||||
} // namespace gpu
|
||||
} // namespace cache
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,48 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_GPU_GPU_CACHE_MEM_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_GPU_GPU_CACHE_MEM_H_
|
||||
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <memory>
|
||||
#include "src/runtime/delegate/parameter_cache/cache_mem_base.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace cache {
|
||||
namespace gpu {
|
||||
class GPUCacheMem : public cache::CacheMemBase {
|
||||
public:
|
||||
GPUCacheMem() = default;
|
||||
~GPUCacheMem() override = default;
|
||||
bool InitDevice(uint32_t device_id, const void *context) override;
|
||||
void *MallocMemory(size_t size) override;
|
||||
void FreeMemory(void *buf) override;
|
||||
bool SynchronizeStream() override;
|
||||
bool CopyHostMemToDevice(void *dst, const void *src, size_t size) override;
|
||||
bool CopyDeviceMemToHost(void *dst, const void *src, size_t size) override;
|
||||
bool HashSwapOut(void *hash_table_addr, void *swap_out_value_addr, void *swap_out_index_addr, size_t cache_vocab_size,
|
||||
size_t embedding_size, size_t swap_out_size) override;
|
||||
bool HashSwapIn(void *hash_table_addr, void *swap_in_value_addr, void *swap_in_index_addr, size_t cache_vocab_size,
|
||||
size_t embedding_size, size_t swap_in_size) override;
|
||||
|
||||
private:
|
||||
cudaStream_t stream_;
|
||||
};
|
||||
} // namespace gpu
|
||||
} // namespace cache
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_GPU_GPU_CACHE_MEM_H_
|
|
@ -0,0 +1,243 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "src/common/log_adapter.h"
|
||||
#include "src/runtime/delegate/parameter_cache/lfu_cache.h"
|
||||
#include "src/runtime/delegate/parameter_cache/factory_mgr_base.h"
|
||||
namespace mindspore {
|
||||
namespace cache {
|
||||
RET_COMMON_PRODUCT_REGISTRAR(std::string, cache::CacheAlgorithm, cache::LFUCacheAlgorithm, "lfu", LFUCacheAlgorithm);
|
||||
|
||||
LFUCacheAlgorithm::~LFUCacheAlgorithm() {
|
||||
for (auto iter : key_table_) {
|
||||
delete *(iter.second);
|
||||
}
|
||||
key_table_.clear();
|
||||
frequency_table_.clear();
|
||||
}
|
||||
|
||||
Status LFUCacheAlgorithm::Init(size_t cache_size, int min_host_index, int max_host_index) {
|
||||
if (cache_size <= 0 || min_host_index < 0 || max_host_index <= 0) {
|
||||
return kLiteParamInvalid;
|
||||
}
|
||||
cache_size_ = cache_size;
|
||||
min_host_index_ = min_host_index;
|
||||
max_host_index_ = max_host_index;
|
||||
return kSuccess;
|
||||
}
|
||||
|
||||
CacheNoe *LFUCacheAlgorithm::GetNode(int key) {
|
||||
auto key_table_iter = key_table_.find(key);
|
||||
if (key_table_iter == key_table_.end()) {
|
||||
return nullptr;
|
||||
}
|
||||
auto node_iter = key_table_iter->second;
|
||||
auto node = *node_iter;
|
||||
|
||||
auto node_list_iter = frequency_table_.find(key);
|
||||
if (node_list_iter == frequency_table_.end()) {
|
||||
return nullptr;
|
||||
}
|
||||
auto &node_list = node_list_iter->second;
|
||||
node_list.erase(node_iter);
|
||||
|
||||
if (node_list.empty()) {
|
||||
frequency_table_.erase(node_list_iter);
|
||||
}
|
||||
|
||||
node->frequency += 1;
|
||||
frequency_table_[node->frequency].emplace_front(node);
|
||||
key_table_[key] = frequency_table_[node->frequency].begin();
|
||||
return node;
|
||||
}
|
||||
|
||||
int LFUCacheAlgorithm::Get(int key) {
|
||||
auto node = GetNode(key);
|
||||
if (node != nullptr) {
|
||||
return node->value;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
void LFUCacheAlgorithm::Put(int key, int value) {
|
||||
auto node = GetNode(key);
|
||||
if (node != nullptr) {
|
||||
node->value = value;
|
||||
return;
|
||||
}
|
||||
|
||||
if (cache_size_ == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
CacheNoe *add_node = nullptr;
|
||||
if (key_table_.size() == cache_size_) {
|
||||
add_node = frequency_table_.begin()->second.back();
|
||||
key_table_.erase(add_node->key);
|
||||
frequency_table_.begin()->second.pop_back();
|
||||
if (frequency_table_.begin()->second.size() == 0) {
|
||||
frequency_table_.erase(frequency_table_.begin()->first);
|
||||
}
|
||||
add_node->value = value;
|
||||
add_node->key = key;
|
||||
add_node->frequency = 1;
|
||||
} else {
|
||||
add_node = new CacheNoe(key, 1, value);
|
||||
if (add_node == nullptr) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
frequency_table_[1].emplace_front(add_node);
|
||||
key_table_[key] = frequency_table_[1].begin();
|
||||
}
|
||||
|
||||
void LFUCacheAlgorithm::GetHitNodesAndSwapIndex(const int *batch_ids, const size_t batch_ids_len, int *cache_index,
|
||||
std::unordered_map<int, CacheNoe *> *hit_index_nodes,
|
||||
std::unordered_map<int, std::vector<int>> *need_swap_map) {
|
||||
// 找到没有命中和命中的index
|
||||
for (size_t i = 0; i < batch_ids_len; i++) {
|
||||
auto key = batch_ids[i];
|
||||
if (key < min_host_index_ || key >= max_host_index_) {
|
||||
cache_index[i] = -1;
|
||||
// out range
|
||||
continue;
|
||||
}
|
||||
|
||||
auto hit_iter = hit_index_nodes->find(key);
|
||||
if (hit_iter != hit_index_nodes->end()) {
|
||||
auto node = hit_iter->second;
|
||||
node->frequency += 1;
|
||||
cache_index[i] = node->value;
|
||||
continue;
|
||||
}
|
||||
|
||||
auto swap_iter = need_swap_map->find(key);
|
||||
if (swap_iter != need_swap_map->end()) {
|
||||
swap_iter->second.push_back(i);
|
||||
continue;
|
||||
}
|
||||
|
||||
auto node_iter_iter = key_table_.find(key);
|
||||
if (node_iter_iter == key_table_.end()) {
|
||||
(*need_swap_map)[key].push_back(i);
|
||||
continue;
|
||||
}
|
||||
auto node_iter = node_iter_iter->second;
|
||||
auto node = *node_iter;
|
||||
|
||||
auto node_list_iter = frequency_table_.find(node->frequency);
|
||||
if (node_list_iter == frequency_table_.end()) {
|
||||
continue;
|
||||
}
|
||||
auto &node_list = node_list_iter->second;
|
||||
node_list.erase(node_iter);
|
||||
|
||||
if (node_list.empty()) {
|
||||
frequency_table_.erase(node_list_iter);
|
||||
}
|
||||
// hit
|
||||
node->frequency += 1;
|
||||
cache_index[i] = node->value;
|
||||
(*hit_index_nodes)[key] = node;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
std::list<CacheNoe *> LFUCacheAlgorithm::GetSwapNodes(const std::unordered_map<int, std::vector<int>> &need_swap_map) {
|
||||
std::list<CacheNoe *> need_swap_nodes;
|
||||
auto swap_size = need_swap_map.size();
|
||||
|
||||
while (swap_size > 0 && !frequency_table_.empty()) {
|
||||
auto node_list_iter = frequency_table_.begin();
|
||||
if (node_list_iter->second.size() > swap_size) {
|
||||
auto iter = node_list_iter->second.begin();
|
||||
std::advance(iter, swap_size);
|
||||
need_swap_nodes.splice(need_swap_nodes.end(), node_list_iter->second, node_list_iter->second.begin(), iter);
|
||||
swap_size = 0;
|
||||
} else {
|
||||
swap_size -= node_list_iter->second.size();
|
||||
need_swap_nodes.splice(need_swap_nodes.end(), node_list_iter->second);
|
||||
frequency_table_.erase(node_list_iter);
|
||||
}
|
||||
}
|
||||
return need_swap_nodes;
|
||||
}
|
||||
|
||||
Status LFUCacheAlgorithm::CheckCacheHit(const int *batch_ids, const size_t batch_ids_len, int *cache_index,
|
||||
std::vector<int> *need_swap_indies,
|
||||
std::vector<int> *need_swap_indies_cache_index) {
|
||||
if (batch_ids == nullptr) {
|
||||
MS_LOG(ERROR) << "batch_ids is nullptr";
|
||||
return kLiteNullptr;
|
||||
}
|
||||
if (cache_index == nullptr) {
|
||||
MS_LOG(ERROR) << "cache_index is nullptr";
|
||||
return kLiteNullptr;
|
||||
}
|
||||
std::unordered_map<int, std::vector<int>> need_swap_map;
|
||||
std::unordered_map<int, CacheNoe *> hit_index_nodes;
|
||||
GetHitNodesAndSwapIndex(batch_ids, batch_ids_len, cache_index, &hit_index_nodes, &need_swap_map);
|
||||
|
||||
// get need_swap_indies.size() least recently used node
|
||||
std::list<CacheNoe *> need_swap_nodes = GetSwapNodes(need_swap_map);
|
||||
|
||||
// 更新老节点的值
|
||||
{
|
||||
if (need_swap_map.size() != need_swap_nodes.size()) {
|
||||
MS_LOG(ERROR) << " need_swap_map.size() " << need_swap_map.size() << " != need_swap_nodes.size() "
|
||||
<< need_swap_nodes.size();
|
||||
return kLiteError;
|
||||
}
|
||||
need_swap_indies_cache_index->reserve(need_swap_map.size());
|
||||
auto need_swap_map_iter = need_swap_map.begin();
|
||||
for (auto iter = need_swap_nodes.begin();
|
||||
iter != need_swap_nodes.end() && need_swap_map_iter != need_swap_map.end(); iter++, need_swap_map_iter++) {
|
||||
auto node = *iter;
|
||||
key_table_.erase(node->key);
|
||||
node->key = need_swap_map_iter->first;
|
||||
node->frequency = 1;
|
||||
for (auto index : need_swap_map_iter->second) {
|
||||
cache_index[index] = node->value;
|
||||
}
|
||||
need_swap_indies->push_back(need_swap_map_iter->first);
|
||||
need_swap_indies_cache_index->push_back(node->value);
|
||||
MS_LOG(INFO) << "device index " << node->value << ",for host index " << need_swap_map_iter->first;
|
||||
key_table_[(*iter)->key] = iter;
|
||||
}
|
||||
|
||||
auto node_list_iter = frequency_table_.begin();
|
||||
if (node_list_iter->second.size() > 0) {
|
||||
auto iter = node_list_iter->second.begin();
|
||||
if ((*iter)->frequency == 1) {
|
||||
node_list_iter->second.splice(node_list_iter->second.begin(), need_swap_nodes);
|
||||
} else {
|
||||
frequency_table_[1] = need_swap_nodes;
|
||||
}
|
||||
} else {
|
||||
frequency_table_[1] = need_swap_nodes;
|
||||
}
|
||||
}
|
||||
for (auto node_iter : hit_index_nodes) {
|
||||
auto node = node_iter.second;
|
||||
frequency_table_[node->frequency].emplace_front(node);
|
||||
key_table_[node->key] = frequency_table_[node->frequency].begin();
|
||||
}
|
||||
return kSuccess;
|
||||
}
|
||||
} // namespace cache
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,55 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_LFU_CACHE_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_LFU_CACHE_H_
|
||||
|
||||
#include <map>
|
||||
#include <unordered_map>
|
||||
#include <list>
|
||||
#include <vector>
|
||||
#include "include/api/status.h"
|
||||
#include "src/runtime/delegate/parameter_cache/cache_algorithm.h"
|
||||
namespace mindspore {
|
||||
namespace cache {
|
||||
class LFUCacheAlgorithm : public CacheAlgorithm {
|
||||
public:
|
||||
LFUCacheAlgorithm() {}
|
||||
~LFUCacheAlgorithm() override;
|
||||
|
||||
int Get(int key) override;
|
||||
void Put(int key, int value) override;
|
||||
Status Init(size_t cache_size, int min_host_index, int max_host_index) override;
|
||||
Status CheckCacheHit(const int *batch_ids, const size_t batch_ids_len, int *cache_index,
|
||||
std::vector<int> *need_swap_indies, std::vector<int> *need_swap_indies_cache_index) override;
|
||||
|
||||
private:
|
||||
CacheNoe *GetNode(int key);
|
||||
void GetHitNodesAndSwapIndex(const int *batch_ids, const size_t batch_ids_len, int *cache_index,
|
||||
std::unordered_map<int, CacheNoe *> *hit_index_nodes,
|
||||
std::unordered_map<int, std::vector<int>> *need_swap_map);
|
||||
std::list<CacheNoe *> GetSwapNodes(const std::unordered_map<int, std::vector<int>> &need_swap_map);
|
||||
|
||||
std::unordered_map<int, std::list<CacheNoe *>::iterator> key_table_;
|
||||
std::map<int, std::list<CacheNoe *>> frequency_table_;
|
||||
size_t cache_size_{0};
|
||||
|
||||
int min_host_index_{0};
|
||||
int max_host_index_{1};
|
||||
};
|
||||
} // namespace cache
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_LFU_CACHE_H_
|
|
@ -0,0 +1,148 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "src/runtime/delegate/parameter_cache/load_host_cache_model.h"
|
||||
#include "src/common/log_adapter.h"
|
||||
#include "src/common/common.h"
|
||||
#include "include/errorcode.h"
|
||||
#include "src/common/file_utils.h"
|
||||
|
||||
namespace {
|
||||
constexpr size_t kGatherInputsSize = 3;
|
||||
}
|
||||
namespace mindspore {
|
||||
namespace cache {
|
||||
HostCacheModel::~HostCacheModel() {
|
||||
if (cache_model_ != nullptr) {
|
||||
delete cache_model_;
|
||||
cache_model_ = nullptr;
|
||||
}
|
||||
}
|
||||
MSTensor *SchemaTensorToMSTensor(lite::SchemaTensorWrapper *schema_tensor_wrapper,
|
||||
mindspore::schema::Tensor *schema_tensor) {
|
||||
std::vector<int64_t> shape;
|
||||
for (size_t j = 0; j < schema_tensor->dims()->size(); j++) {
|
||||
shape.push_back(schema_tensor->dims()->data()[j]);
|
||||
}
|
||||
std::string tensor_name;
|
||||
if (schema_tensor->name() != nullptr) {
|
||||
tensor_name = schema_tensor->name()->str();
|
||||
}
|
||||
return MSTensor::CreateRefTensor(tensor_name, (DataType)schema_tensor->dataType(), shape,
|
||||
schema_tensor_wrapper->data(), schema_tensor_wrapper->length());
|
||||
}
|
||||
|
||||
Status HostCacheModel::LoadCache(const std::string &model_path) {
|
||||
cache_model_ = lite::LiteImportFromPath(model_path.c_str());
|
||||
if (cache_model_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Import model failed";
|
||||
return kLiteGraphFileError;
|
||||
}
|
||||
|
||||
auto allTensors = cache_model_->graph_.all_tensors_;
|
||||
for (auto node : cache_model_->graph_.all_nodes_) {
|
||||
// only support embedding cache
|
||||
if (node == nullptr || node->node_type_ != schema::PrimitiveType_Gather) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto input_index = node->input_indices_[0];
|
||||
if (input_index > allTensors.size() - 1) {
|
||||
MS_LOG(ERROR) << "invalid kernel input, input_index " << input_index << ",allTensors.size() "
|
||||
<< allTensors.size();
|
||||
return kLiteOutOfTensorRange;
|
||||
}
|
||||
auto schema_tensor_wrapper = cache_model_->GetSchemaTensor(input_index);
|
||||
if (schema_tensor_wrapper == nullptr) {
|
||||
MS_LOG(ERROR) << "invalid kernel input, input_index " << input_index;
|
||||
return kLiteOutOfTensorRange;
|
||||
}
|
||||
|
||||
auto schema_tensor = allTensors[input_index];
|
||||
if (schema_tensor != nullptr && schema_tensor_wrapper->data() != nullptr) {
|
||||
auto tensor = SchemaTensorToMSTensor(schema_tensor_wrapper, schema_tensor);
|
||||
if (tensor == nullptr) {
|
||||
return kLiteMemoryFailed;
|
||||
}
|
||||
cache_tensor_[tensor->Name()] = *tensor;
|
||||
MS_LOG(INFO) << tensor->Name() << " is cache tensor, and the node is [" << node->name_ << "]";
|
||||
delete tensor;
|
||||
}
|
||||
}
|
||||
return kSuccess;
|
||||
}
|
||||
|
||||
size_t GetVocabSize(kernel::Kernel *kernel) {
|
||||
size_t vocab_size = 0;
|
||||
auto cache_config = kernel->GetConfig(lite::kMSCache);
|
||||
auto vocab_size_iter = cache_config.find(lite::kMSCacheVocabSize);
|
||||
if (vocab_size_iter == cache_config.end()) {
|
||||
return vocab_size;
|
||||
}
|
||||
|
||||
auto vocab_size_opt = lite::GenericParseValue<size_t>(vocab_size_iter->second);
|
||||
if (!vocab_size_opt.IsNone()) {
|
||||
vocab_size = vocab_size_opt.Get();
|
||||
}
|
||||
return vocab_size;
|
||||
}
|
||||
|
||||
Status HostCacheModel::LoadCache(DelegateModel<schema::Primitive> *model) {
|
||||
KernelIter from, end;
|
||||
for (KernelIter iter = model->BeginKernelIterator(); iter != model->EndKernelIterator(); iter++) {
|
||||
kernel::Kernel *kernel = *iter;
|
||||
// only support embedding cache
|
||||
if (kernel->type() != schema::PrimitiveType_Gather) {
|
||||
continue;
|
||||
}
|
||||
MS_ASSERT(kernel->inputs().size() == kGatherInputsSize);
|
||||
auto tensor = kernel->inputs()[0];
|
||||
if (tensor.Data() == nullptr) {
|
||||
continue;
|
||||
}
|
||||
|
||||
size_t vocab_size = GetVocabSize(kernel);
|
||||
if (vocab_size == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
cache_tensor_[tensor.Name()] = tensor;
|
||||
}
|
||||
return mindspore::kSuccess;
|
||||
}
|
||||
|
||||
bool HostCacheModel::CheckIsCacheKernel(kernel::Kernel *kernel) {
|
||||
if (GetHostCacheTensor(kernel) == nullptr) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
MSTensor HostCacheModel::GetHostCacheTensor(kernel::Kernel *kernel) {
|
||||
if (kernel != nullptr && kernel->inputs().size() > 0) {
|
||||
auto iter = cache_tensor_.find(kernel->inputs()[0].Name());
|
||||
if (iter != cache_tensor_.end()) {
|
||||
return iter->second;
|
||||
}
|
||||
}
|
||||
return MSTensor(nullptr);
|
||||
}
|
||||
} // namespace cache
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,48 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_LOAD_HOST_CACHE_MODEL_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_LOAD_HOST_CACHE_MODEL_H_
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include "include/api/status.h"
|
||||
#include "include/api/data_type.h"
|
||||
#include "include/api/types.h"
|
||||
#include "include/api/kernel.h"
|
||||
#include "include/api/delegate.h"
|
||||
#include "src/runtime/lite_model.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace cache {
|
||||
class HostCacheModel {
|
||||
public:
|
||||
HostCacheModel() = default;
|
||||
~HostCacheModel();
|
||||
Status LoadCache(const std::string &model_path);
|
||||
Status LoadCache(DelegateModel<schema::Primitive> *model);
|
||||
bool CheckIsCacheKernel(kernel::Kernel *kernel);
|
||||
MSTensor GetHostCacheTensor(kernel::Kernel *kernel);
|
||||
|
||||
private:
|
||||
std::map<std::string, MSTensor> cache_tensor_;
|
||||
mindspore::lite::LiteModel *cache_model_{nullptr};
|
||||
char *model_buf_{nullptr};
|
||||
size_t model_size_;
|
||||
};
|
||||
} // namespace cache
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_LITE_EMBEDDING_CACHE_H_
|
|
@ -0,0 +1,95 @@
|
|||
include_directories(${TENSORRT_PATH}/include)
|
||||
include_directories(${CUDA_PATH}/include)
|
||||
include_directories(${CUDA_PATH})
|
||||
include_directories($(CCSRC_DIR)/plugin/device/cpu/kernel)
|
||||
include_directories(${CCSRC_DIR}/plugin/device/gpu/kernel/cuda_impl/cuda_ops)
|
||||
|
||||
if(DEFINED ENV{MS_ENABLE_CUDA_DISTRIBUTION})
|
||||
set(MS_ENABLE_CUDA_DISTRIBUTION $ENV{MS_ENABLE_CUDA_DISTRIBUTION})
|
||||
else()
|
||||
set(MS_ENABLE_CUDA_DISTRIBUTION "off")
|
||||
endif()
|
||||
|
||||
set(NCCL_MPI_SRC_STUB
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/distribution/distribution_collective.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/distribution/distribution_base.cc
|
||||
)
|
||||
|
||||
# nccl mpi
|
||||
if(MS_ENABLE_CUDA_DISTRIBUTION STREQUAL "on")
|
||||
message("enable cuda gpu distribution collective")
|
||||
file(GLOB NCCL_MPI_SRC LIST_DIRECTORIES false
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/distribution/*.cc
|
||||
${CCSRC_DIR}/plugin/device/gpu/hal/device/distribution/collective_wrapper.cc
|
||||
${CCSRC_DIR}/plugin/device/gpu/hal/device/distribution/mpi_wrapper.cc
|
||||
${CCSRC_DIR}/plugin/device/gpu/hal/device/distribution/nccl_wrapper.cc
|
||||
)
|
||||
list(REMOVE_ITEM NCCL_MPI_SRC ${NCCL_MPI_SRC_STUB})
|
||||
|
||||
add_compile_definitions(LITE_CUDA_DISTRIBUTION)
|
||||
include(${TOP_DIR}/cmake/external_libs/ompi.cmake)
|
||||
include(${TOP_DIR}/cmake/external_libs/nccl.cmake)
|
||||
|
||||
add_library(gpu_distribution_collective OBJECT ${NCCL_MPI_SRC})
|
||||
add_library(mindspore::nccl ALIAS nccl::nccl)
|
||||
add_library(mindspore::ompi ALIAS ompi::mpi)
|
||||
target_link_libraries(gpu_distribution_collective PRIVATE mindspore::ompi mindspore::nccl)
|
||||
else()
|
||||
add_library(gpu_distribution_collective OBJECT ${NCCL_MPI_SRC_STUB})
|
||||
endif()
|
||||
add_dependencies(gpu_distribution_collective fbs_src)
|
||||
|
||||
file(GLOB TENSORRT_RUNTIME_SRC LIST_DIRECTORIES false
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/*.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/op/*.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cuda_impl/*.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../../runtime/delegate/delegate_utils.cc
|
||||
${CCSRC_DIR}/plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.cc
|
||||
)
|
||||
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../parameter_cache)
|
||||
|
||||
set(TENSORRT_RUNTIME_SRC
|
||||
${TENSORRT_RUNTIME_SRC}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../parameter_cache/embedding_cache_manager.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../parameter_cache/load_host_cache_model.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../parameter_cache/lfu_cache.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../parameter_cache/embedding_cache.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../parameter_cache/gpu/gpu_cache_mem.cc
|
||||
)
|
||||
|
||||
link_libraries(${CUDA_LIB_PATH}/libcudnn.so)
|
||||
link_libraries(${CUDA_LIB_PATH}/libnvrtc.so)
|
||||
link_libraries(${CUDA_LIB_PATH}/libcublasLt.so)
|
||||
|
||||
add_library(libcudart SHARED IMPORTED)
|
||||
set_target_properties(libcudart PROPERTIES IMPORTED_LOCATION ${CUDA_LIB_PATH}/libcudart.so)
|
||||
|
||||
add_library(libnvinfer SHARED IMPORTED)
|
||||
set_target_properties(libnvinfer PROPERTIES IMPORTED_LOCATION ${TENSORRT_LIB_PATH}/libnvinfer.so)
|
||||
|
||||
add_library(libcublas SHARED IMPORTED)
|
||||
set_target_properties(libcublas PROPERTIES IMPORTED_LOCATION ${CUDA_LIB_PATH}/libcublas.so)
|
||||
add_library(tensorrt_kernel_mid OBJECT ${TENSORRT_RUNTIME_SRC})
|
||||
|
||||
add_dependencies(tensorrt_kernel_mid fbs_src)
|
||||
|
||||
target_link_libraries(
|
||||
tensorrt_kernel_mid
|
||||
libcudart
|
||||
libcublas
|
||||
libnvinfer
|
||||
)
|
||||
|
||||
# cuda
|
||||
find_package(CUDA)
|
||||
file(GLOB_RECURSE CUDA_KERNEL_SRC
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cuda_impl/*.cu
|
||||
${CCSRC_DIR}/plugin/device/gpu/kernel/cuda_impl/cuda_ops/gather.cu
|
||||
${CCSRC_DIR}/plugin/device/gpu/kernel/cuda_impl/cuda_ops/swish_impl.cu
|
||||
)
|
||||
|
||||
set_source_files_properties(${CUDA_KERNEL_SRC} PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ)
|
||||
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGES} -std=c++14 -fPIC")
|
||||
SET(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-std=c++14;)
|
||||
cuda_add_library(cuda_kernel_mid STATIC ${CUDA_KERNEL_SRC})
|
|
@ -0,0 +1,56 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/delegate/tensorrt/cuda_impl/activation.cuh"
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include "src/runtime/delegate/tensorrt/cuda_impl/cuda_helper.h"
|
||||
|
||||
template <typename T>
|
||||
__global__ void SigmoidKernel(const T *input1, T *output, int element_cnt) {
|
||||
for (int pos = blockIdx.x * blockDim.x + threadIdx.x; pos < element_cnt; pos += blockDim.x * gridDim.x) {
|
||||
output[pos] = static_cast<T>(1) / (static_cast<T>(1) + exp(-input1[pos]));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void GeluKernel(const T *input_addr, T *output_addr, int size) {
|
||||
// formula:
|
||||
// gelu(x) = 0.5 * x * (1.0 + tanh(y))
|
||||
// tanh(y) = 2 / (1 + exp(-2y)) - 1)
|
||||
// y = sqrt(2/pi) * (x + 0.044715 * x^3)
|
||||
for (int pos = blockIdx.x * blockDim.x + threadIdx.x; pos < size; pos += blockDim.x * gridDim.x) {
|
||||
float x = input_addr[pos];
|
||||
float tanh_res = tanh(0.7978845608f * (x + 0.044715f * x * x * x));
|
||||
output_addr[pos] = 0.5f * x * (1.0f + tanh_res);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void Sigmoid(const T *input1, T *output, int element_cnt, cudaStream_t stream) {
|
||||
SigmoidKernel<<<GET_BLOCKS(element_cnt), GET_THREADS, 0, stream>>>(input1, output, element_cnt);
|
||||
return;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void Gelu(const T *input1, T *output, int element_cnt, cudaStream_t stream) {
|
||||
GeluKernel<<<GET_BLOCKS(element_cnt), GET_THREADS, 0, stream>>>(input1, output, element_cnt);
|
||||
return;
|
||||
}
|
||||
|
||||
template void Sigmoid(const float *input1, float *output, int element_cnt, cudaStream_t stream);
|
||||
|
||||
template void Gelu(const float *input1, float *output, int element_cnt, cudaStream_t stream);
|
|
@ -0,0 +1,26 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_CDUA_IMPL_ACTIVATION_H_
|
||||
#define MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_CDUA_IMPL_ACTIVATION_H_
|
||||
|
||||
template <typename T>
|
||||
void Sigmoid(const T *input1, T *output, int element_cnt, cudaStream_t stream);
|
||||
|
||||
template <typename T>
|
||||
void Gelu(const T *input1, T *output, int element_cnt, cudaStream_t stream);
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_CDUA_IMPL_ACTIVATION_H_
|
|
@ -0,0 +1,49 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/delegate/tensorrt/cuda_impl/cast.cuh"
|
||||
#include "src/runtime/delegate/tensorrt/cuda_impl/cuda_helper.h"
|
||||
|
||||
// Generic cast
|
||||
template <typename S, typename T>
|
||||
__device__ __forceinline__ void CastBase(const S *input_addr, T *output_addr) {
|
||||
*output_addr = static_cast<T>((*input_addr));
|
||||
}
|
||||
|
||||
template <typename S, typename T>
|
||||
__global__ void CastKernel(const int input_size, const S *input_addr, T *output_addr) {
|
||||
for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < input_size; pos += blockDim.x * gridDim.x) {
|
||||
CastBase(input_addr + pos, output_addr + pos);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename S, typename T>
|
||||
void Cast(const int input_size, const S *input_addr, T *output_addr, cudaStream_t stream) {
|
||||
CastKernel<<<GET_BLOCKS(input_size), GET_THREADS, 0, stream>>>(input_size, input_addr, output_addr);
|
||||
}
|
||||
|
||||
template void Cast(const int input_size, const int8_t *input_addr, int8_t *output_addr, cudaStream_t stream);
|
||||
template void Cast(const int input_size, const int8_t *input_addr, int32_t *output_addr, cudaStream_t stream);
|
||||
template void Cast(const int input_size, const int8_t *input_addr, float *output_addr, cudaStream_t stream);
|
||||
|
||||
template void Cast(const int input_size, const int32_t *input_addr, int8_t *output_addr, cudaStream_t stream);
|
||||
template void Cast(const int input_size, const int32_t *input_addr, int32_t *output_addr, cudaStream_t stream);
|
||||
template void Cast(const int input_size, const int32_t *input_addr, float *output_addr, cudaStream_t stream);
|
||||
template void Cast(const int input_size, const int32_t *input_addr, bool *output_addr, cudaStream_t stream);
|
||||
|
||||
template void Cast(const int input_size, const float *input_addr, int8_t *output_addr, cudaStream_t stream);
|
||||
template void Cast(const int input_size, const float *input_addr, int32_t *output_addr, cudaStream_t stream);
|
||||
template void Cast(const int input_size, const float *input_addr, float *output_addr, cudaStream_t stream);
|
|
@ -0,0 +1,23 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_CDUA_IMPL_CAST_H_
|
||||
#define MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_CDUA_IMPL_CAST_H_
|
||||
|
||||
template <typename S, typename T>
|
||||
void Cast(const int input_size, const S *input_addr, T *output_addr, cudaStream_t stream);
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_CDUA_IMPL_CAST_H_
|
|
@ -0,0 +1,70 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/delegate/tensorrt/cuda_impl/cublas_utils.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
void Cublas2DTranspose(const float *in_addr, float *out_addr, const int *params, cublasHandle_t cublas_handle) {
|
||||
const int m = params[0];
|
||||
const int n = params[1];
|
||||
const float alpha = 1.0f;
|
||||
const float beta = 0.0f;
|
||||
CUBLAS_CHECK_VOID(
|
||||
cublasSgeam(cublas_handle, CUBLAS_OP_T, CUBLAS_OP_N, m, n, &alpha, in_addr, n, &beta, out_addr, m, out_addr, m));
|
||||
}
|
||||
|
||||
void CublasMM1Batch(const void *a_addr, const void *b_addr, void *c_addr, const int *params,
|
||||
const cublasOperation_t *operations, const cudaDataType *data_types, cublasHandle_t cublas_handle) {
|
||||
const int m = params[0];
|
||||
const int n = params[1];
|
||||
const int k = params[2];
|
||||
cublasOperation_t trans_a = operations[0];
|
||||
cublasOperation_t trans_b = operations[1];
|
||||
const int lda = (trans_a == CUBLAS_OP_N) ? k : m;
|
||||
const int ldb = (trans_b == CUBLAS_OP_N) ? n : k;
|
||||
const int ldc = n;
|
||||
cudaDataType type_a = data_types[0];
|
||||
cudaDataType type_b = data_types[1];
|
||||
cudaDataType type_c = data_types[2];
|
||||
cudaDataType compute_type = data_types[3];
|
||||
const float alpha = 1.0f;
|
||||
const float beta = 0.0f;
|
||||
CUBLAS_CHECK_VOID(cublasGemmEx(cublas_handle, trans_b, trans_a, n, m, k, &alpha, b_addr, type_b, ldb, a_addr, type_a,
|
||||
lda, &beta, c_addr, type_c, ldc, compute_type, CUBLAS_GEMM_DEFAULT_TENSOR_OP));
|
||||
}
|
||||
void CublasMMBatched(void **a_addrs, void **b_addrs, void **c_addrs, const int *params,
|
||||
const cublasOperation_t *operations, const cudaDataType *data_types,
|
||||
cublasHandle_t cublas_handle) {
|
||||
cublasOperation_t trans_a = operations[0];
|
||||
cublasOperation_t trans_b = operations[1];
|
||||
const int m = params[0];
|
||||
const int n = params[1];
|
||||
const int k = params[2];
|
||||
const int batch = params[3];
|
||||
const int lda = (trans_a == CUBLAS_OP_N) ? k : m;
|
||||
const int ldb = (trans_b == CUBLAS_OP_N) ? n : k;
|
||||
const int ldc = n;
|
||||
cudaDataType type_a = data_types[0];
|
||||
cudaDataType type_b = data_types[1];
|
||||
cudaDataType type_c = data_types[2];
|
||||
cudaDataType compute_type = data_types[3];
|
||||
const float alpha = 1.0f;
|
||||
const float beta = 0.0f;
|
||||
CUBLAS_CHECK_VOID(cublasGemmBatchedEx(cublas_handle, trans_b, trans_a, n, m, k, &alpha, b_addrs, type_b, ldb, a_addrs,
|
||||
type_a, lda, &beta, c_addrs, type_c, ldc, batch, compute_type,
|
||||
CUBLAS_GEMM_DEFAULT_TENSOR_OP));
|
||||
}
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,62 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_CUDA_IMPL_CUBLAS_UTILS_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_CUDA_IMPL_CUBLAS_UTILS_H_
|
||||
|
||||
#include <cublas_v2.h>
|
||||
#include "src/runtime/delegate/tensorrt/cuda_impl/cuda_helper.h"
|
||||
#include "src/common/log_util.h"
|
||||
|
||||
// cublas API error checking
|
||||
#define CUBLAS_CHECK_VOID(err) \
|
||||
do { \
|
||||
cublasStatus_t cublas_err = (err); \
|
||||
if (cublas_err != CUBLAS_STATUS_SUCCESS) { \
|
||||
MS_LOG(ERROR) << "cublas error " << cublas_err; \
|
||||
return; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define CUBLAS_CHECK(err) \
|
||||
do { \
|
||||
cublasStatus_t cublas_err = (err); \
|
||||
if (cublas_err != CUBLAS_STATUS_SUCCESS) { \
|
||||
MS_LOG(ERROR) << "cublas error " << cublas_err; \
|
||||
return -1; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
namespace mindspore::lite {
|
||||
// a: m * n
|
||||
// params order: m, n
|
||||
void Cublas2DTranspose(const float *in_addr, float *out_addr, const int *params, cublasHandle_t cublas_handle);
|
||||
|
||||
// a: m * k, b: k * n, c: m * n
|
||||
// params order: m, n, k
|
||||
// operations order: trans_a, trans_b
|
||||
// data_types: type_a, type_b, type_c, compute type
|
||||
void CublasMM1Batch(const void *a_addr, const void *b_addr, void *c_addr, const int *params,
|
||||
const cublasOperation_t *operations, const cudaDataType *data_types, cublasHandle_t cublas_handle);
|
||||
|
||||
// a: batch * m * k, b: batch * k * n, c: batch * m * n
|
||||
// params order: m, n, k, batch
|
||||
// operations order: trans_a, trans_b
|
||||
// data_types: type_a, type_b, type_c, compute type
|
||||
void CublasMMBatched(void **a_addrs, void **b_addrs, void **c_addrs, const int *params,
|
||||
const cublasOperation_t *operations, const cudaDataType *data_types, cublasHandle_t cublas_handle);
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_CUDA_IMPL_CUBLAS_UTILS_H_
|
|
@ -0,0 +1,48 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/delegate/tensorrt/cuda_impl/cuda_helper.h"
|
||||
#include <cmath>
|
||||
#include "src/common/log_util.h"
|
||||
|
||||
CudaHelper &CudaHelper::GetInstance() {
|
||||
static CudaHelper instance;
|
||||
return instance;
|
||||
}
|
||||
int CudaHelper::GetThreadNum() const { return threads_per_block_; }
|
||||
int CudaHelper::GetThreadNum(const int block_size) const {
|
||||
return std::min(threads_per_block_, ((block_size - 1) / 32 + 1) * 32);
|
||||
}
|
||||
int CudaHelper::GetBlocksNum(const int total_threads) const {
|
||||
return std::min(((total_threads - 1) / threads_per_block_) + 1, max_blocks_);
|
||||
}
|
||||
int CudaHelper::GetBlocksNum(const int total_threads, const int block_size) const {
|
||||
int valid_block_size = std::min(block_size, threads_per_block_);
|
||||
if (valid_block_size == 0) {
|
||||
MS_LOG(ERROR) << "invalid input of block_size: " << block_size;
|
||||
return 0;
|
||||
}
|
||||
return std::min(((total_threads - 1) / valid_block_size) + 1, max_blocks_);
|
||||
}
|
||||
|
||||
CudaHelper::CudaHelper() {
|
||||
int device_id = 0;
|
||||
(void)cudaGetDevice(&device_id);
|
||||
cudaDeviceProp prop;
|
||||
(void)cudaGetDeviceProperties(&prop, device_id);
|
||||
threads_per_block_ = prop.maxThreadsPerBlock;
|
||||
max_blocks_ = prop.multiProcessorCount;
|
||||
}
|
|
@ -0,0 +1,63 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_CUDA_IMPL_CUDA_HELPER_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_CUDA_IMPL_CUDA_HELPER_H_
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <algorithm>
|
||||
|
||||
class CudaHelper {
|
||||
public:
|
||||
int GetThreadNum() const;
|
||||
int GetThreadNum(const int block_size) const;
|
||||
int GetBlocksNum(const int total_threads) const;
|
||||
int GetBlocksNum(const int total_threads, const int block_size) const;
|
||||
static CudaHelper &GetInstance();
|
||||
|
||||
private:
|
||||
CudaHelper();
|
||||
~CudaHelper() = default;
|
||||
CudaHelper(const CudaHelper &) = delete;
|
||||
CudaHelper &operator=(const CudaHelper &) = delete;
|
||||
|
||||
int max_blocks_;
|
||||
int threads_per_block_;
|
||||
};
|
||||
|
||||
#define GET_BLOCKS(total_threads) CudaHelper::GetInstance().GetBlocksNum(total_threads)
|
||||
#define GET_BLOCKS_CAL(total_threads, block_size) CudaHelper::GetInstance().GetBlocksNum(total_threads, block_size)
|
||||
|
||||
#define GET_THREADS CudaHelper::GetInstance().GetThreadNum()
|
||||
#define GET_THREADS_CAL(block_size) CudaHelper::GetInstance().GetThreadNum(block_size)
|
||||
|
||||
#define CUDA_CHECK(ret) \
|
||||
do { \
|
||||
cudaError_t cuda_ret = (ret); \
|
||||
if ((cuda_ret) != cudaSuccess) { \
|
||||
return -1; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define CUDA_CHECK_VOID(ret) \
|
||||
do { \
|
||||
cudaError_t cuda_ret = (ret); \
|
||||
if ((cuda_ret) != cudaSuccess) { \
|
||||
return; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_CUDA_IMPL_CUDA_HELPER_H_
|
|
@ -0,0 +1,41 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/delegate/tensorrt/cuda_impl/cudnn_utils.h"
|
||||
#include <unordered_map>
|
||||
|
||||
namespace mindspore::lite {
|
||||
cudnnDataType_t ConvertCudnnDataType(nvinfer1::DataType trt_datatype) {
|
||||
std::unordered_map<nvinfer1::DataType, cudnnDataType_t> data_types = {{nvinfer1::DataType::kFLOAT, CUDNN_DATA_FLOAT},
|
||||
{nvinfer1::DataType::kHALF, CUDNN_DATA_HALF},
|
||||
{nvinfer1::DataType::kINT32, CUDNN_DATA_INT32},
|
||||
{nvinfer1::DataType::kINT8, CUDNN_DATA_INT8}};
|
||||
if (data_types.find(trt_datatype) != data_types.end()) {
|
||||
return data_types[trt_datatype];
|
||||
} else {
|
||||
MS_LOG(ERROR) << "invalid datatype for cudnn: " << static_cast<int>(trt_datatype);
|
||||
}
|
||||
return CUDNN_DATA_FLOAT;
|
||||
}
|
||||
|
||||
int CudnnActivation(cudnnHandle_t handle, cudnnActivationDescriptor_t activation_desc,
|
||||
const cudnnTensorDescriptor_t x_dsc, const void *x, const cudnnTensorDescriptor_t y_dsc, void *y) {
|
||||
float alpha = 1.0f;
|
||||
float beta = 0.0f;
|
||||
CUDNN_CHECK(cudnnActivationForward(handle, activation_desc, &alpha, x_dsc, x, &beta, y_dsc, y));
|
||||
return 0;
|
||||
}
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,48 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_CUDA_IMPL_CUDNN_UTILS_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_CUDA_IMPL_CUDNN_UTILS_H_
|
||||
|
||||
#include <cudnn.h>
|
||||
#include <NvInfer.h>
|
||||
#include "src/runtime/delegate/tensorrt/cuda_impl/cuda_helper.h"
|
||||
#include "src/common/log_util.h"
|
||||
|
||||
#define CUDNN_CHECK_VOID(err) \
|
||||
do { \
|
||||
cudnnStatus_t cudnn_err = (err); \
|
||||
if (cudnn_err != CUDNN_STATUS_SUCCESS) { \
|
||||
MS_LOG(ERROR) << "cudnn error " << cudnnGetErrorString(cudnn_err); \
|
||||
return; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define CUDNN_CHECK(err) \
|
||||
do { \
|
||||
cudnnStatus_t cudnn_err = (err); \
|
||||
if (cudnn_err != CUDNN_STATUS_SUCCESS) { \
|
||||
MS_LOG(ERROR) << "cudnn error " << cudnnGetErrorString(cudnn_err); \
|
||||
return -1; \
|
||||
} \
|
||||
} while (0)
|
||||
namespace mindspore::lite {
|
||||
cudnnDataType_t ConvertCudnnDataType(nvinfer1::DataType trt_datatype);
|
||||
|
||||
int CudnnActivation(cudnnHandle_t handle, cudnnActivationDescriptor_t activation_desc,
|
||||
const cudnnTensorDescriptor_t x_esc, const void *x, const cudnnTensorDescriptor_t y_dsc, void *y);
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_CUDA_IMPL_CUDNN_UTILS_H_
|
|
@ -0,0 +1,35 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/delegate/tensorrt/cuda_impl/equal.cuh"
|
||||
#include <stdio.h>
|
||||
#include "src/runtime/delegate/tensorrt/cuda_impl/cuda_helper.h"
|
||||
|
||||
template <typename T>
|
||||
__global__ void EqualKernel(const T *input1, const T *input2, T *output, int element_cnt) {
|
||||
for (int pos = blockIdx.x * blockDim.x + threadIdx.x; pos < element_cnt; pos += blockDim.x * gridDim.x) {
|
||||
output[pos] = (input1[pos] - input2[pos] < 1e-6 && input1[pos] - input2[pos] > -1e-6);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void Equal(const T *input1, const T *input2, T *output, int element_cnt, cudaStream_t stream) {
|
||||
EqualKernel<<<GET_BLOCKS(element_cnt), GET_THREADS, 0, stream>>>(input1, input2, output, element_cnt);
|
||||
return;
|
||||
}
|
||||
|
||||
template void Equal(const float *input1, const float *input2, float *output, int element_cnt, cudaStream_t stream);
|
||||
template void Equal(const int *input1, const int *input2, int *output, int element_cnt, cudaStream_t stream);
|
|
@ -0,0 +1,23 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_CDUA_IMPL_EQUAL_H_
|
||||
#define MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_CDUA_IMPL_EQUAL_H_
|
||||
|
||||
template <typename T>
|
||||
void Equal(const T *input1, const T *input2, T *output, int element_cnt, cudaStream_t stream);
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_CDUA_IMPL_EQUAL_H_
|
|
@ -0,0 +1,64 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/delegate/tensorrt/cuda_impl/hash.cuh"
|
||||
#include "src/runtime/delegate/tensorrt/cuda_impl/cuda_helper.h"
|
||||
|
||||
template <typename T>
|
||||
__global__ void HashSwapOut(const T *hash_table, T *swap_out_value, const int *swap_out_index, const int index_size,
|
||||
const int hash_dim) {
|
||||
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < index_size; i += blockDim.x * gridDim.x) {
|
||||
int hash_index = swap_out_index[i];
|
||||
for (int j = 0; j < hash_dim; j++) {
|
||||
swap_out_value[i * hash_dim + j] = hash_table[hash_index * hash_dim + j];
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void HashSwapIn(T *hash_table, const T *swap_in_value, const int *swap_in_index, const int index_size,
|
||||
const int hash_dim) {
|
||||
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < index_size; i += blockDim.x * gridDim.x) {
|
||||
int hash_index = swap_in_index[i];
|
||||
for (int j = 0; j < hash_dim; j++) {
|
||||
hash_table[hash_index * hash_dim + j] = swap_in_value[i * hash_dim + j];
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void DoHashSwapOut(const T *hash_table, T *swap_out_value, const int *swap_out_index, const int index_size,
|
||||
const int hash_dim, cudaStream_t cuda_stream) {
|
||||
HashSwapOut<<<GET_BLOCKS(index_size), GET_THREADS, 0, cuda_stream>>>(hash_table, swap_out_value, swap_out_index,
|
||||
index_size, hash_dim);
|
||||
return;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void DoHashSwapIn(T *hash_table, const T *swap_in_value, const int *swap_in_index, const int index_size,
|
||||
const int hash_dim, cudaStream_t cuda_stream) {
|
||||
HashSwapIn<<<GET_BLOCKS(index_size), GET_THREADS, 0, cuda_stream>>>(hash_table, swap_in_value, swap_in_index,
|
||||
index_size, hash_dim);
|
||||
return;
|
||||
}
|
||||
|
||||
template void DoHashSwapOut<float>(const float *hash_table, float *swap_out_value, const int *swap_out_index,
|
||||
const int index_size, const int hash_dim, cudaStream_t cuda_stream);
|
||||
|
||||
template void DoHashSwapIn<float>(float *hash_table, const float *swap_in_value, const int *swap_in_index,
|
||||
const int index_size, const int hash_dim, cudaStream_t cuda_stream);
|
|
@ -0,0 +1,27 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_CDUA_IMPL_HASH_H_
|
||||
#define MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_CDUA_IMPL_HASH_H_
|
||||
|
||||
template <typename T>
|
||||
void DoHashSwapOut(const T *hash_table, T *swap_out_value, const int *swap_out_index, const int index_size,
|
||||
const int hash_dim, cudaStream_t cuda_stream);
|
||||
|
||||
template <typename T>
|
||||
void DoHashSwapIn(T *hash_table, const T *swap_in_value, const int *swap_in_index, const int index_size,
|
||||
const int hash_dim, cudaStream_t cuda_stream);
|
||||
#endif // MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_CDUA_IMPL_HASH_H_
|
|
@ -0,0 +1,63 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/delegate/tensorrt/cuda_impl/logical.cuh"
|
||||
#include "src/runtime/delegate/tensorrt/cuda_impl/cuda_helper.h"
|
||||
|
||||
template <typename T>
|
||||
__global__ void LogicalNotKernel(const T *input1, T *output, int element_cnt) {
|
||||
for (int pos = blockIdx.x * blockDim.x + threadIdx.x; pos < element_cnt; pos += blockDim.x * gridDim.x) {
|
||||
output[pos] = static_cast<T>(input1[pos] == 0);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void LogicalAndKernel(const T *input_addr1, const T *input_addr2, T *output, int size) {
|
||||
for (int pos = blockIdx.x * blockDim.x + threadIdx.x; pos < size; pos += blockDim.x * gridDim.x) {
|
||||
output[pos] = input_addr1[pos] * input_addr2[pos];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void LogicalOrKernel(const T *input_addr1, const T *input_addr2, T *output, int size) {
|
||||
for (int pos = blockIdx.x * blockDim.x + threadIdx.x; pos < size; pos += blockDim.x * gridDim.x) {
|
||||
T sum = input_addr1[pos] + input_addr2[pos];
|
||||
output[pos] = static_cast<T>(sum > 0);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void LogicalNot(const T *input1, T *output, int element_cnt, cudaStream_t stream) {
|
||||
LogicalNotKernel<<<GET_BLOCKS(element_cnt), GET_THREADS, 0, stream>>>(input1, output, element_cnt);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void LogicalAnd(const T *input1, const T *input2, T *output, int element_cnt, cudaStream_t stream) {
|
||||
LogicalAndKernel<<<GET_BLOCKS(element_cnt), GET_THREADS, 0, stream>>>(input1, input2, output, element_cnt);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void LogicalOr(const T *input1, const T *input2, T *output, int element_cnt, cudaStream_t stream) {
|
||||
LogicalOrKernel<<<GET_BLOCKS(element_cnt), GET_THREADS, 0, stream>>>(input1, input2, output, element_cnt);
|
||||
}
|
||||
|
||||
template void LogicalNot(const int32_t *input1, int32_t *output, int element_cnt, cudaStream_t stream);
|
||||
|
||||
template void LogicalAnd(const int32_t *input1, const int32_t *input2, int32_t *output, int element_cnt,
|
||||
cudaStream_t stream);
|
||||
|
||||
template void LogicalOr(const int32_t *input1, const int32_t *input2, int32_t *output, int element_cnt,
|
||||
cudaStream_t stream);
|
|
@ -0,0 +1,29 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_CDUA_IMPL_LOGICAL_H_
|
||||
#define MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_CDUA_IMPL_LOGICAL_H_
|
||||
|
||||
template <typename T>
|
||||
void LogicalAnd(const T *input1, const T *input2, T *output, int element_cnt, cudaStream_t stream);
|
||||
|
||||
template <typename T>
|
||||
void LogicalOr(const T *input1, const T *input2, T *output, int element_cnt, cudaStream_t stream);
|
||||
|
||||
template <typename T>
|
||||
void LogicalNot(const T *input1, T *output, int element_cnt, cudaStream_t stream);
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_CDUA_IMPL_LOGICAL_H_
|
|
@ -0,0 +1,98 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/delegate/tensorrt/cuda_impl/normalize.cuh"
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include "src/runtime/delegate/tensorrt/cuda_impl/cuda_helper.h"
|
||||
#include "src/runtime/delegate/tensorrt/cuda_impl/utils.cuh"
|
||||
|
||||
template <typename T>
|
||||
__global__ void NormalizeKernel(const T *input, const T *gamma, const T *beta, T *output, size_t n, float epsilion,
|
||||
int dim_before_axis) {
|
||||
const int tid = threadIdx.x;
|
||||
const int bid = blockIdx.x;
|
||||
const int block_loop = (dim_before_axis - 1) / gridDim.x + 1;
|
||||
const int element_cnt = dim_before_axis * n;
|
||||
|
||||
__shared__ float s_mean[2048];
|
||||
__shared__ float s_variance[2048];
|
||||
float sum = 0.0f;
|
||||
float variance = 0.0f;
|
||||
|
||||
for (int block = 0; block < block_loop; block++) {
|
||||
float local_sum = 0.0f;
|
||||
int mean_index = bid + block * gridDim.x;
|
||||
int num_index = bid * n + block * gridDim.x * blockDim.x;
|
||||
for (int i = tid; i < n; i += blockDim.x) {
|
||||
if (num_index + i >= element_cnt) {
|
||||
break;
|
||||
}
|
||||
local_sum += static_cast<float>(input[num_index + i]);
|
||||
}
|
||||
sum = blockReduceSum(local_sum);
|
||||
if (tid == 0) {
|
||||
s_mean[mean_index] = sum / n;
|
||||
}
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
for (int block = 0; block < block_loop; block++) {
|
||||
float local_var_sum = 0.0f;
|
||||
int var_index = bid + block * gridDim.x;
|
||||
int num_index = bid * n + block * gridDim.x * blockDim.x;
|
||||
for (int i = tid; i < n; i += blockDim.x) {
|
||||
if (num_index + i >= element_cnt) {
|
||||
break;
|
||||
}
|
||||
float diff = static_cast<float>(input[num_index + i]) - s_mean[var_index];
|
||||
local_var_sum += diff * diff;
|
||||
}
|
||||
variance = blockReduceSum(local_var_sum);
|
||||
if (tid == 0) {
|
||||
s_variance[var_index] = rsqrtf(variance / n + epsilion);
|
||||
}
|
||||
}
|
||||
__syncthreads();
|
||||
for (int block = 0; block < block_loop; block++) {
|
||||
int var_index = bid + block * gridDim.x;
|
||||
int num_index = bid * n + block * gridDim.x * blockDim.x;
|
||||
for (int i = tid; i < n; i += blockDim.x) {
|
||||
if (num_index + i >= element_cnt) {
|
||||
break;
|
||||
}
|
||||
float beta_val = (beta == nullptr) ? 0.0f : static_cast<float>(beta[i]);
|
||||
output[num_index + i] =
|
||||
static_cast<T>(((static_cast<float>(input[num_index + i]) - s_mean[var_index]) * s_variance[var_index]) *
|
||||
static_cast<float>(gamma[i]) +
|
||||
beta_val);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void Normalize(const T *input, const T *gamma, const T *beta, T *output, size_t dim_at_axis, float epsilion,
|
||||
int element_cnt, cudaStream_t stream) {
|
||||
int thread_num = GET_THREADS_CAL(dim_at_axis);
|
||||
int block_num = GET_BLOCKS_CAL(element_cnt, thread_num);
|
||||
int dim_before_axis = element_cnt / dim_at_axis;
|
||||
NormalizeKernel<<<block_num, thread_num, 0, stream>>>(input, gamma, beta, output, dim_at_axis, epsilion,
|
||||
dim_before_axis);
|
||||
return;
|
||||
}
|
||||
|
||||
template void Normalize(const float *input, const float *gamma, const float *beta, float *output, size_t dim_at_axis,
|
||||
float epsilion, int element_cnt, cudaStream_t stream);
|
|
@ -0,0 +1,24 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_CDUA_IMPL_NORMALIZE_H_
|
||||
#define MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_CDUA_IMPL_NORMALIZE_H_
|
||||
|
||||
template <typename T>
|
||||
void Normalize(const T *input, const T *gamma, const T *beta, T *output, size_t dim_at_axis, float epsilion,
|
||||
int element_cnt, cudaStream_t stream);
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_CDUA_IMPL_NORMALIZE_H_
|
|
@ -0,0 +1,41 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <curand_kernel.h>
|
||||
|
||||
#define FINAL_MASK 0xffffffff
|
||||
|
||||
template <typename T>
|
||||
__device__ T warpedReduceSum(T val) {
|
||||
#pragma unroll
|
||||
for (int mask = 16; mask > 0; mask >>= 1) {
|
||||
val += __shfl_xor_sync(FINAL_MASK, val, mask, 32);
|
||||
}
|
||||
return val;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ T blockReduceSum(T val) {
|
||||
static __shared__ T shared[32];
|
||||
int warped = threadIdx.x & 0x1f;
|
||||
val = warpedReduceSum<T>(val);
|
||||
if (warped == 0) shared[threadIdx.x >> 5] = val;
|
||||
__syncthreads();
|
||||
val = (threadIdx.x < (blockDim.x / 32.f)) ? shared[warped] : static_cast<T>(0.0);
|
||||
val = warpedReduceSum<T>(val);
|
||||
return val;
|
||||
}
|
|
@ -0,0 +1,23 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/delegate/tensorrt/distribution/distribution_base.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
int GetGPUGroupSize() { return 1; }
|
||||
|
||||
int GetRankID() { return 0; }
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,31 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_DISTRIBUTION_DISTRIBUTION_BASE_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_DISTRIBUTION_DISTRIBUTION_BASE_H_
|
||||
|
||||
#include <string>
|
||||
#include "src/common/log_adapter.h"
|
||||
#include "include/errorcode.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
constexpr char NCCL_WORLD_GROUP[] = "nccl_world_group";
|
||||
|
||||
int GetGPUGroupSize();
|
||||
|
||||
int GetRankID();
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_DISTRIBUTION_DISTRIBUTION_BASE_H_
|
|
@ -0,0 +1,28 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/delegate/tensorrt/distribution/distribution_base.h"
|
||||
#include <unistd.h>
|
||||
#include <thread>
|
||||
#include <string>
|
||||
#include "plugin/device/gpu/hal/device/distribution/collective_wrapper.h"
|
||||
#include "src/runtime/delegate/tensorrt/tensorrt_utils.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
int GetGPUGroupSize() { return GetGroupSize(NCCL_WORLD_GROUP); }
|
||||
|
||||
int GetRankID() { return GetRankIDByGroup(NCCL_WORLD_GROUP); }
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,38 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/delegate/tensorrt/distribution/distribution_collective.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
DistributionCollective::DistributionCollective() {}
|
||||
|
||||
DistributionCollective &DistributionCollective::instance() {
|
||||
static DistributionCollective instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
int DistributionCollective::ReduceScatterWrapper(const void *input_addr, void *output_addr, size_t count,
|
||||
nvinfer1::DataType data_type, schema::ReduceMode reduce_type,
|
||||
cudaStream_t stream, const std::string &group) {
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int DistributionCollective::AllGatherWrapper(const void *input_addr, void *output_addr, size_t count,
|
||||
nvinfer1::DataType data_type, cudaStream_t stream,
|
||||
const std::string &group_name) {
|
||||
return RET_OK;
|
||||
}
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,45 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_DISTRIBUTION_DISTRIBUTION_COLLECTIVE_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_DISTRIBUTION_DISTRIBUTION_COLLECTIVE_H_
|
||||
|
||||
#include <string>
|
||||
#include "NvInfer.h"
|
||||
#include "schema/ops_types_generated.h"
|
||||
#include "src/runtime/delegate/tensorrt/distribution/distribution_base.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
class DistributionCollective {
|
||||
public:
|
||||
DistributionCollective(DistributionCollective const &) = delete;
|
||||
|
||||
DistributionCollective &operator=(const DistributionCollective &) = delete;
|
||||
|
||||
static DistributionCollective &instance();
|
||||
|
||||
int ReduceScatterWrapper(const void *input_addr, void *output_addr, size_t count, nvinfer1::DataType data_type,
|
||||
schema::ReduceMode reduce_type, cudaStream_t stream, const std::string &group);
|
||||
|
||||
int AllGatherWrapper(const void *input_addr, void *output_addr, size_t count, nvinfer1::DataType data_type,
|
||||
cudaStream_t stream, const std::string &group_name);
|
||||
|
||||
private:
|
||||
DistributionCollective();
|
||||
|
||||
~DistributionCollective() = default;
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_DISTRIBUTION_DISTRIBUTION_COLLECTIVE_H_
|
|
@ -0,0 +1,72 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/delegate/tensorrt/distribution/distribution_collective.h"
|
||||
#include <unistd.h>
|
||||
#include <thread>
|
||||
#include <string>
|
||||
#include "plugin/device/gpu/hal/device/distribution/collective_wrapper.h"
|
||||
#include "src/runtime/delegate/tensorrt/distribution/distribution_utils.h"
|
||||
#include "src/runtime/delegate/tensorrt/distribution/distribution_base.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
DistributionCollective::DistributionCollective() {
|
||||
InitMPI();
|
||||
InitNCCLComm();
|
||||
}
|
||||
|
||||
DistributionCollective &DistributionCollective::instance() {
|
||||
static DistributionCollective instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
int DistributionCollective::ReduceScatterWrapper(const void *input_addr, void *output_addr, size_t count,
|
||||
nvinfer1::DataType data_type, schema::ReduceMode reduce_type,
|
||||
cudaStream_t stream, const std::string &group) {
|
||||
int rank_id = GetRankID();
|
||||
MS_LOG(DEBUG) << "ReduceScatter on rank: " << rank_id;
|
||||
ncclResult_t ret = ReduceScatter(input_addr, output_addr, count, ConvertNCCLDataType(data_type),
|
||||
ConvertNCCLReduceMode(reduce_type), stream, group);
|
||||
if (ret != ncclSuccess) {
|
||||
MS_LOG(ERROR) << "ReduceScatter failed: " << static_cast<int>(ret);
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto cuda_ret = cudaStreamSynchronize(stream);
|
||||
if (cuda_ret != cudaSuccess) {
|
||||
MS_LOG(ERROR) << "cudaStreamSynchronize failed: " << static_cast<int>(cuda_ret);
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int DistributionCollective::AllGatherWrapper(const void *input_addr, void *output_addr, size_t count,
|
||||
nvinfer1::DataType data_type, cudaStream_t stream,
|
||||
const std::string &group_name) {
|
||||
int rank_id = GetRankID();
|
||||
MS_LOG(DEBUG) << "AllGather on rank: " << rank_id;
|
||||
ncclResult_t ret = AllGather(input_addr, output_addr, count, ConvertNCCLDataType(data_type), stream, group_name);
|
||||
if (ret != ncclSuccess) {
|
||||
MS_LOG(ERROR) << "AllGather failed: " << static_cast<int>(ret);
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto cuda_ret = cudaStreamSynchronize(stream);
|
||||
if (cuda_ret != cudaSuccess) {
|
||||
MS_LOG(ERROR) << "cudaStreamSynchronize failed: " << static_cast<int>(cuda_ret);
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,58 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/delegate/tensorrt/distribution/distribution_utils.h"
|
||||
#include <unordered_map>
|
||||
#include "src/common/log_adapter.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
ncclDataType_t ConvertNCCLDataType(nvinfer1::DataType type_id) {
|
||||
std::unordered_map<nvinfer1::DataType, ncclDataType_t> data_type_map = {
|
||||
{nvinfer1::DataType::kINT8, ncclInt8},
|
||||
{nvinfer1::DataType::kINT32, ncclInt32},
|
||||
{nvinfer1::DataType::kFLOAT, ncclFloat32},
|
||||
{nvinfer1::DataType::kHALF, ncclHalf},
|
||||
};
|
||||
auto iter = data_type_map.find(type_id);
|
||||
ncclDataType_t data_type;
|
||||
if (iter != data_type_map.end()) {
|
||||
data_type = iter->second;
|
||||
} else {
|
||||
data_type = ncclFloat32;
|
||||
MS_LOG(WARNING) << "invalid data_type for NCCL, need check: " << static_cast<int>(type_id);
|
||||
}
|
||||
return data_type;
|
||||
}
|
||||
|
||||
ncclRedOp_t ConvertNCCLReduceMode(schema::ReduceMode mode) {
|
||||
std::unordered_map<schema::ReduceMode, ncclRedOp_t> reduce_ops_ = {
|
||||
// higher version support mean {schema::ReduceMode::ReduceMode_ReduceMean, ncclAvg},
|
||||
{schema::ReduceMode::ReduceMode_ReduceMax, ncclMax},
|
||||
{schema::ReduceMode::ReduceMode_ReduceMin, ncclMin},
|
||||
{schema::ReduceMode::ReduceMode_ReduceProd, ncclProd},
|
||||
{schema::ReduceMode::ReduceMode_ReduceSum, ncclSum},
|
||||
};
|
||||
auto iter = reduce_ops_.find(mode);
|
||||
ncclRedOp_t nccl_mode;
|
||||
if (iter != reduce_ops_.end()) {
|
||||
nccl_mode = iter->second;
|
||||
} else {
|
||||
nccl_mode = ncclSum;
|
||||
MS_LOG(WARNING) << "invalid reduce for NCCL, need check: " << static_cast<int>(mode);
|
||||
}
|
||||
return nccl_mode;
|
||||
}
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,32 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_DISTRIBUTION_DISTRIBUTION_UTILS_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_DISTRIBUTION_DISTRIBUTION_UTILS_H_
|
||||
|
||||
#include <nccl.h>
|
||||
#include "include/errorcode.h"
|
||||
#include "NvInfer.h"
|
||||
#include "schema/ops_types_generated.h"
|
||||
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
|
||||
namespace mindspore::lite {
|
||||
ncclDataType_t ConvertNCCLDataType(nvinfer1::DataType type_id);
|
||||
|
||||
ncclRedOp_t ConvertNCCLReduceMode(schema::ReduceMode mode);
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_DISTRIBUTION_DISTRIBUTION_UTILS_H_
|
|
@ -0,0 +1,116 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <numeric>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <functional>
|
||||
#include <unordered_map>
|
||||
#include <algorithm>
|
||||
#include "src/runtime/delegate/tensorrt/tensorrt_utils.h"
|
||||
#include "NvInferRuntimeCommon.h"
|
||||
#include "src/runtime/delegate/tensorrt/op/activation_opt_plugin.h"
|
||||
#include "src/runtime/delegate/tensorrt/cuda_impl/activation.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/swish_impl.cuh"
|
||||
|
||||
namespace mindspore::lite {
|
||||
REGISTER_TENSORRT_PLUGIN(ActivationOptPluginCreater);
|
||||
template class TensorRTPluginCreater<ActivationOptPlugin>;
|
||||
template <class T>
|
||||
nvinfer1::PluginFieldCollection TensorRTPluginCreater<T>::field_collection_{};
|
||||
template <class T>
|
||||
std::vector<nvinfer1::PluginField> TensorRTPluginCreater<T>::fields_;
|
||||
|
||||
int ActivationOptPlugin::enqueue(const nvinfer1::PluginTensorDesc *inputDesc,
|
||||
const nvinfer1::PluginTensorDesc *outputDesc, const void *const *inputs,
|
||||
void *const *outputs, void *workspace, cudaStream_t stream) noexcept {
|
||||
return RunCudaActivation(inputDesc, inputs, outputs, stream);
|
||||
}
|
||||
|
||||
bool ActivationOptPlugin::needResize(const int *current_dims, const int *last_dims) {
|
||||
for (int i = 0; i < infer_dims_cnt_; i++) {
|
||||
if (current_dims[i] != last_dims[i]) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
int ActivationOptPlugin::RunCuDNNActivation(const nvinfer1::PluginTensorDesc *inputDesc, const void *const *inputs,
|
||||
void *const *outputs, cudaStream_t stream) {
|
||||
if (needResize(infer_dims_, inputDesc[0].dims.d)) {
|
||||
if (input_desc_ != nullptr) {
|
||||
CUDNN_CHECK(cudnnDestroyTensorDescriptor(input_desc_));
|
||||
input_desc_ = nullptr;
|
||||
}
|
||||
CUDNN_CHECK(cudnnCreateTensorDescriptor(&input_desc_));
|
||||
for (int i = 0; i < inputDesc[0].dims.nbDims; i++) {
|
||||
infer_dims_[i] = inputDesc[0].dims.d[i];
|
||||
}
|
||||
CUDNN_CHECK(cudnnSetTensorNdDescriptor(input_desc_, ConvertCudnnDataType(inputDesc[0].type), infer_dims_cnt_,
|
||||
infer_dims_, infer_stride_));
|
||||
}
|
||||
CHECK_NULL_RETURN(cudnn_handle_);
|
||||
CHECK_NULL_RETURN(activation_desc_);
|
||||
CHECK_NULL_RETURN(input_desc_);
|
||||
CUDNN_CHECK(cudnnSetStream(cudnn_handle_, stream));
|
||||
auto ret = CudnnActivation(cudnn_handle_, activation_desc_, input_desc_, inputs[0], input_desc_, outputs[0]);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "cudnn activation func call failed " << layer_name_;
|
||||
return ret;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int ActivationOptPlugin::RunCudaActivation(const nvinfer1::PluginTensorDesc *inputDesc, const void *const *inputs,
|
||||
void *const *outputs, cudaStream_t stream) {
|
||||
switch (activation_type_) {
|
||||
case (schema::ActivationType::ActivationType_SIGMOID): {
|
||||
Sigmoid(static_cast<const float *>(inputs[0]), static_cast<float *>(outputs[0]), GetDimsVolume(inputDesc[0].dims),
|
||||
stream);
|
||||
break;
|
||||
}
|
||||
case (schema::ActivationType::ActivationType_GELU): {
|
||||
Gelu(static_cast<const float *>(inputs[0]), static_cast<float *>(outputs[0]), GetDimsVolume(inputDesc[0].dims),
|
||||
stream);
|
||||
break;
|
||||
}
|
||||
case (schema::ActivationType::ActivationType_SWISH): {
|
||||
CalSwish(GetDimsVolume(inputDesc[0].dims), static_cast<const float *>(inputs[0]),
|
||||
static_cast<float *>(outputs[0]), stream, device_id_);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
MS_LOG(ERROR) << "invalid activation type: " << static_cast<int>(activation_type_);
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
nvinfer1::IPluginV2DynamicExt *ActivationOptPlugin::clone() const noexcept {
|
||||
auto *plugin = new ActivationOptPlugin(*this);
|
||||
plugin->setPluginNamespace(name_space_.c_str());
|
||||
return plugin;
|
||||
}
|
||||
|
||||
size_t ActivationOptPlugin::getSerializationSize() const noexcept { return sizeof(schema::ActivationType); }
|
||||
|
||||
void ActivationOptPlugin::serialize(void *buffer) const noexcept {
|
||||
SerializeValue(&buffer, &activation_type_, sizeof(schema::ActivationType));
|
||||
}
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,72 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_ACTIVATION_OPT_PLUGIN_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_ACTIVATION_OPT_PLUGIN_H_
|
||||
|
||||
#include <string>
|
||||
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
|
||||
#include "src/runtime/delegate/tensorrt/op/tensorrt_plugin.h"
|
||||
#include "src/runtime/delegate/tensorrt/cuda_impl/cudnn_utils.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
constexpr char *ACTIVATION_OPT_PLUGIN_NAME{"ActivationOptPlugin"};
|
||||
class ActivationOptPlugin : public TensorRTPlugin {
|
||||
public:
|
||||
ActivationOptPlugin(const std::string name, schema::ActivationType activation_type, uint32_t device_id)
|
||||
: TensorRTPlugin(name, std::string(ACTIVATION_OPT_PLUGIN_NAME), device_id), activation_type_(activation_type) {}
|
||||
|
||||
ActivationOptPlugin(const char *name, const nvinfer1::PluginFieldCollection *fc)
|
||||
: TensorRTPlugin(std::string(name), std::string(ACTIVATION_OPT_PLUGIN_NAME)) {
|
||||
const nvinfer1::PluginField *fields = fc->fields;
|
||||
activation_type_ = static_cast<const schema::ActivationType *>(fields[0].data)[0];
|
||||
}
|
||||
|
||||
ActivationOptPlugin(const char *name, const void *serialData, size_t serialLength)
|
||||
: TensorRTPlugin(std::string(name), std::string(ACTIVATION_OPT_PLUGIN_NAME)) {
|
||||
DeserializeValue(&serialData, &serialLength, &activation_type_, sizeof(schema::ActivationType));
|
||||
}
|
||||
|
||||
ActivationOptPlugin() = delete;
|
||||
|
||||
nvinfer1::IPluginV2DynamicExt *clone() const noexcept override;
|
||||
int enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc,
|
||||
const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept override;
|
||||
size_t getSerializationSize() const noexcept override;
|
||||
void serialize(void *buffer) const noexcept override;
|
||||
|
||||
private:
|
||||
bool needResize(const int *current_dims, const int *last_dims);
|
||||
int RunCudaActivation(const nvinfer1::PluginTensorDesc *inputDesc, const void *const *inputs, void *const *outputs,
|
||||
cudaStream_t stream);
|
||||
int RunCuDNNActivation(const nvinfer1::PluginTensorDesc *inputDesc, const void *const *inputs, void *const *outputs,
|
||||
cudaStream_t stream);
|
||||
const std::string layer_name_;
|
||||
std::string name_space_;
|
||||
schema::ActivationType activation_type_;
|
||||
cudnnHandle_t cudnn_handle_{nullptr};
|
||||
cudnnActivationDescriptor_t activation_desc_{nullptr};
|
||||
cudnnTensorDescriptor_t input_desc_{nullptr};
|
||||
int infer_dims_[5]{1, 1, 1, 1, 1};
|
||||
int infer_stride_[5]{1, 1, 1, 1, 1};
|
||||
int infer_dims_cnt_{0};
|
||||
};
|
||||
class ActivationOptPluginCreater : public TensorRTPluginCreater<ActivationOptPlugin> {
|
||||
public:
|
||||
ActivationOptPluginCreater() : TensorRTPluginCreater(std::string(ACTIVATION_OPT_PLUGIN_NAME)) {}
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_ACTIVATION_OPT_PLUGIN_H_
|
|
@ -0,0 +1,153 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/delegate/tensorrt/op/activation_tensorrt.h"
|
||||
#include <cfloat>
|
||||
#include <memory>
|
||||
#include <unordered_set>
|
||||
#include "src/runtime/delegate/tensorrt/op/cast_tensorrt.h"
|
||||
#include "src/runtime/delegate/tensorrt/op/activation_opt_plugin.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
namespace {
|
||||
bool HasCustomActivationPlugin(schema::ActivationType type) {
|
||||
std::unordered_set<schema::ActivationType> plugin_activation = {schema::ActivationType::ActivationType_SIGMOID,
|
||||
schema::ActivationType::ActivationType_GELU,
|
||||
schema::ActivationType::ActivationType_SWISH};
|
||||
return plugin_activation.find(type) != plugin_activation.end();
|
||||
}
|
||||
} // namespace
|
||||
|
||||
int ActivationTensorRT::IsSupport(const schema::Primitive *primitive,
|
||||
const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) {
|
||||
if (!IsShapeKnown()) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors.size() != 1) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (out_tensors.size() != 1) {
|
||||
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto activation_op = this->op_primitive_->value_as_Activation();
|
||||
if (activation_op == nullptr) {
|
||||
MS_LOG(ERROR) << "op convert failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto activation_params_opt = TryConvertActivationType(activation_op->activation_type());
|
||||
bool has_custom_plugin = HasCustomActivationPlugin(activation_op->activation_type());
|
||||
if (!activation_params_opt && !has_custom_plugin) {
|
||||
MS_LOG(ERROR) << "Unsupported op action type for TensorRT: " << activation_op->activation_type();
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
int ActivationTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
||||
if (ctx->network() == nullptr) {
|
||||
MS_LOG(ERROR) << "network is invalid";
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto activation_op = this->op_primitive_->value_as_Activation();
|
||||
if (activation_op == nullptr) {
|
||||
MS_LOG(ERROR) << "op convert failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
float alpha = activation_op->alpha();
|
||||
nvinfer1::ITensor *activation_input = tensorrt_in_tensors_[0].trt_tensor_;
|
||||
if (tensorrt_in_tensors_[0].trt_tensor_->getType() == nvinfer1::DataType::kINT32) {
|
||||
activation_input =
|
||||
TRTTensorCast(ctx, tensorrt_in_tensors_[0].trt_tensor_, nvinfer1::DataType::kFLOAT, op_name_ + "_cast_in");
|
||||
}
|
||||
|
||||
auto activation_layer =
|
||||
ActivationTensorRT::AddActivation(ctx, activation_op->activation_type(), alpha,
|
||||
std::isfinite(activation_op->min_val()) ? activation_op->min_val() : FLT_MIN,
|
||||
std::isfinite(activation_op->max_val()) ? activation_op->max_val() : FLT_MAX,
|
||||
activation_input, device_id_, quant_type_);
|
||||
if (activation_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "add activation op failed for TensorRT.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
activation_layer->setName(op_name_.c_str());
|
||||
// cast to origin type
|
||||
nvinfer1::ITensor *out_tensor = activation_layer->getOutput(0);
|
||||
if (out_tensor->getType() != ConvertDataType(out_tensors_[0].DataType())) {
|
||||
out_tensor = TRTTensorCast(ctx, activation_layer->getOutput(0), ConvertDataType(out_tensors_[0].DataType()),
|
||||
op_name_ + "_cast_out");
|
||||
}
|
||||
out_tensor->setName((op_name_ + "_output").c_str());
|
||||
this->AddInnerOutTensors(
|
||||
ITensorHelper{out_tensor, tensorrt_in_tensors_[0].format_, tensorrt_in_tensors_[0].same_format_});
|
||||
this->layer_ = activation_layer;
|
||||
return RET_OK;
|
||||
}
|
||||
nvinfer1::ILayer *ActivationTensorRT::AddActivation(TensorRTContext *ctx, schema::ActivationType activation_type,
|
||||
float alpha, float min_value, float max_value,
|
||||
nvinfer1::ITensor *trt_in_tensor, uint32_t device_id,
|
||||
schema::QuantType quant_type) {
|
||||
bool has_custom_plugin = HasCustomActivationPlugin(activation_type);
|
||||
// sigmoid precision is wrong for trt
|
||||
if (quant_type == schema::QuantType_QUANT_NONE && has_custom_plugin) {
|
||||
std::string layer_name = std::string(trt_in_tensor->getName()) + "_activation";
|
||||
auto plugin = std::make_shared<ActivationOptPlugin>(layer_name.c_str(), activation_type, device_id);
|
||||
MS_LOG(INFO) << "using opt plugin for " << layer_name;
|
||||
if (plugin == nullptr) {
|
||||
MS_LOG(ERROR) << "create ActivationOptPlugin failed for " << layer_name;
|
||||
return nullptr;
|
||||
}
|
||||
nvinfer1::ITensor *inputTensors[] = {trt_in_tensor};
|
||||
nvinfer1::IPluginV2Layer *activation_opt_layer = ctx->network()->addPluginV2(inputTensors, 1, *plugin);
|
||||
activation_opt_layer->setName(layer_name.c_str());
|
||||
return activation_opt_layer;
|
||||
}
|
||||
|
||||
// Just some action_code correct, unfind code is set to default relu. need double check.
|
||||
auto action_param_opt = TryConvertActivationType(activation_type);
|
||||
if (!action_param_opt) {
|
||||
MS_LOG(ERROR) << "Unsupported op action type for TensorRT: " << activation_type;
|
||||
return nullptr;
|
||||
}
|
||||
auto action_param = action_param_opt.value();
|
||||
nvinfer1::IActivationLayer *activation_layer =
|
||||
ctx->network()->addActivation(*trt_in_tensor, action_param.activation_type);
|
||||
if (activation_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "add activation op failed for TensorRT.";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (activation_type == schema::ActivationType_HARD_TANH) {
|
||||
activation_layer->setAlpha(min_value);
|
||||
activation_layer->setBeta(max_value);
|
||||
return activation_layer;
|
||||
}
|
||||
|
||||
if (action_param.has_alpha) {
|
||||
activation_layer->setAlpha(alpha);
|
||||
}
|
||||
|
||||
if (action_param.has_beta) {
|
||||
activation_layer->setBeta(action_param.beta);
|
||||
}
|
||||
|
||||
return activation_layer;
|
||||
}
|
||||
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_Activation, ActivationTensorRT)
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,43 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_ACTIVATION_TENSORRT_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_ACTIVATION_TENSORRT_H_
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
class ActivationTensorRT : public TensorRTOp {
|
||||
public:
|
||||
ActivationTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
|
||||
const schema::QuantType &quant_type)
|
||||
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
|
||||
|
||||
~ActivationTensorRT() override = default;
|
||||
|
||||
int AddInnerOp(TensorRTContext *ctx) override;
|
||||
|
||||
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) override;
|
||||
|
||||
static nvinfer1::ILayer *AddActivation(TensorRTContext *ctx, schema::ActivationType activation_type, float alpha,
|
||||
float min_value, float max_value, nvinfer1::ITensor *trt_in_tensor,
|
||||
uint32_t device_id = 0,
|
||||
schema::QuantType quant_type = schema::QuantType_QUANT_NONE);
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_ACTIVATION_TENSORRT_H_
|
|
@ -0,0 +1,113 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/delegate/tensorrt/op/allgather_tensorrt.h"
|
||||
#include <numeric>
|
||||
#include "NvInferRuntimeCommon.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
REGISTER_TENSORRT_PLUGIN(AllGatherPluginCreater);
|
||||
template class TensorRTPluginCreater<AllGatherPlugin>;
|
||||
template <class T>
|
||||
nvinfer1::PluginFieldCollection TensorRTPluginCreater<T>::field_collection_{};
|
||||
template <class T>
|
||||
std::vector<nvinfer1::PluginField> TensorRTPluginCreater<T>::fields_;
|
||||
|
||||
int AllGatherTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) {
|
||||
#ifndef LITE_CUDA_DISTRIBUTION
|
||||
MS_LOG(ERROR)
|
||||
<< "Unsupported package for gpu distribution feature, please recompile with MS_ENABLE_CUDA_DISTRIBUTION set to on.";
|
||||
return RET_ERROR;
|
||||
#else
|
||||
if (!IsShapeKnown()) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors.size() != 1) {
|
||||
MS_LOG(ERROR) << "invalid input tensor size: " << in_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (out_tensors.size() != 1) {
|
||||
MS_LOG(ERROR) << "invalid output tensor size: " << out_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
dynamic_shape_params_.support_hw_dynamic_ = false;
|
||||
return RET_OK;
|
||||
#endif
|
||||
}
|
||||
|
||||
int AllGatherTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
||||
nvinfer1::ITensor *inputTensors[] = {tensorrt_in_tensors_[0].trt_tensor_};
|
||||
auto allgather_op = op_primitive_->value_as_AllGather();
|
||||
if (allgather_op == nullptr) {
|
||||
MS_LOG(ERROR) << "convert failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
int rank = GetGPUGroupSize();
|
||||
auto plugin = std::make_shared<AllGatherPlugin>(op_name_, rank, device_id_);
|
||||
MS_LOG(INFO) << op_name_ << " group size: " << rank << ", rank id: " << GetRankID();
|
||||
nvinfer1::IPluginV2Layer *allgather_layer = ctx->network()->addPluginV2(inputTensors, 1, *plugin);
|
||||
if (allgather_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "create AllGather layer failed for: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
nvinfer1::ITensor *allgather_out = allgather_layer->getOutput(0);
|
||||
allgather_layer->setName(op_name_.c_str());
|
||||
allgather_out->setName((op_name_ + "_output").c_str());
|
||||
this->AddInnerOutTensors(
|
||||
ITensorHelper{allgather_out, tensorrt_in_tensors_[0].format_, tensorrt_in_tensors_[0].same_format_});
|
||||
this->layer_ = allgather_layer;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
// AllGatherPlugin
|
||||
int AllGatherPlugin::enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc,
|
||||
const void *const *inputs, void *const *outputs, void *workspace,
|
||||
cudaStream_t stream) noexcept {
|
||||
MS_LOG(INFO) << "all gather run at rank id: " << GetRankID() << " stream: " << stream;
|
||||
nvinfer1::Dims input_dims = inputDesc[0].dims;
|
||||
int send_element_cnt = std::accumulate(input_dims.d, input_dims.d + input_dims.nbDims, 1, std::multiplies<int64_t>());
|
||||
const void *input = inputs[0];
|
||||
void *output = outputs[0];
|
||||
auto ret = DistributionCollective::instance().AllGatherWrapper(input, output, send_element_cnt, inputDesc->type,
|
||||
stream, NCCL_WORLD_GROUP);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "AllGather nccl run failed for " << layer_name_;
|
||||
return ret;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
nvinfer1::IPluginV2DynamicExt *AllGatherPlugin::clone() const noexcept {
|
||||
auto *plugin = new AllGatherPlugin(*this);
|
||||
plugin->setPluginNamespace(name_space_.c_str());
|
||||
return plugin;
|
||||
}
|
||||
|
||||
nvinfer1::DimsExprs AllGatherPlugin::getOutputDimensions(int outputIndex, const nvinfer1::DimsExprs *inputs,
|
||||
int nbInputs, nvinfer1::IExprBuilder &exprBuilder) noexcept {
|
||||
nvinfer1::DimsExprs out_dims{};
|
||||
out_dims.nbDims = inputs->nbDims;
|
||||
auto rank_dim = exprBuilder.constant(rank_);
|
||||
out_dims.d[0] = exprBuilder.operation(nvinfer1::DimensionOperation::kPROD, *inputs->d[0], *rank_dim);
|
||||
for (int i = 1; i < inputs->nbDims; i++) {
|
||||
out_dims.d[i] = inputs->d[i];
|
||||
}
|
||||
return out_dims;
|
||||
}
|
||||
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_AllGather, AllGatherTensorRT)
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,75 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_ALLGATHER_TENSORRT_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_ALLGATHER_TENSORRT_H_
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <functional>
|
||||
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
|
||||
#include "src/runtime/delegate/tensorrt/op/tensorrt_plugin.h"
|
||||
#include "src/runtime/delegate/tensorrt/distribution/distribution_collective.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
constexpr char *ALLGATHER_PLUGIN_NAME{"AllGatherPlugin"};
|
||||
class AllGatherTensorRT : public TensorRTOp {
|
||||
public:
|
||||
AllGatherTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
|
||||
const schema::QuantType &quant_type)
|
||||
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
|
||||
|
||||
~AllGatherTensorRT() override = default;
|
||||
|
||||
int AddInnerOp(TensorRTContext *ctx) override;
|
||||
|
||||
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) override;
|
||||
};
|
||||
|
||||
class AllGatherPlugin : public TensorRTPlugin {
|
||||
public:
|
||||
AllGatherPlugin(const std::string name, int rank, uint32_t device_id)
|
||||
: TensorRTPlugin(name, std::string(ALLGATHER_PLUGIN_NAME), device_id), rank_(rank) {}
|
||||
|
||||
AllGatherPlugin(const char *name, const nvinfer1::PluginFieldCollection *fc)
|
||||
: TensorRTPlugin(std::string(name), std::string(ALLGATHER_PLUGIN_NAME)) {
|
||||
const nvinfer1::PluginField *fields = fc->fields;
|
||||
rank_ = static_cast<const int *>(fields[0].data)[0];
|
||||
}
|
||||
|
||||
AllGatherPlugin(const char *name, const void *serialData, size_t serialLength)
|
||||
: TensorRTPlugin(std::string(name), std::string(ALLGATHER_PLUGIN_NAME)) {
|
||||
DeserializeValue(&serialData, &serialLength, &rank_, sizeof(int));
|
||||
}
|
||||
|
||||
AllGatherPlugin() = delete;
|
||||
|
||||
nvinfer1::IPluginV2DynamicExt *clone() const noexcept override;
|
||||
nvinfer1::DimsExprs getOutputDimensions(int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs,
|
||||
nvinfer1::IExprBuilder &exprBuilder) noexcept override;
|
||||
int enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc,
|
||||
const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept override;
|
||||
|
||||
private:
|
||||
int rank_{0};
|
||||
};
|
||||
class AllGatherPluginCreater : public TensorRTPluginCreater<AllGatherPlugin> {
|
||||
public:
|
||||
AllGatherPluginCreater() : TensorRTPluginCreater(std::string(ALLGATHER_PLUGIN_NAME)) {}
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_ALLGATHER_TENSORRT_H_
|
|
@ -0,0 +1,83 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/delegate/tensorrt/op/cast_plugin.h"
|
||||
#include "src/runtime/delegate/tensorrt/cuda_impl/cast.cuh"
|
||||
#include <cuda_runtime.h>
|
||||
#include <numeric>
|
||||
#include <memory>
|
||||
#include <functional>
|
||||
|
||||
namespace mindspore::lite {
|
||||
REGISTER_TENSORRT_PLUGIN(CastPluginCreater);
|
||||
template class TensorRTPluginCreater<CastPlugin>;
|
||||
template <class T>
|
||||
nvinfer1::PluginFieldCollection TensorRTPluginCreater<T>::field_collection_{};
|
||||
template <class T>
|
||||
std::vector<nvinfer1::PluginField> TensorRTPluginCreater<T>::fields_;
|
||||
|
||||
int CastPlugin::enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc,
|
||||
const void *const *inputs, void *const *outputs, void *workspace,
|
||||
cudaStream_t stream) noexcept {
|
||||
nvinfer1::Dims input_dims = inputDesc[0].dims;
|
||||
int element_cnt = std::accumulate(input_dims.d, input_dims.d + input_dims.nbDims, 1, std::multiplies<int64_t>());
|
||||
|
||||
if (inputDesc->type == outputDesc->type) {
|
||||
int element_size = (outputDesc->type == nvinfer1::DataType::kFLOAT)
|
||||
? sizeof(float)
|
||||
: ((outputDesc->type == nvinfer1::DataType::kINT32) ? sizeof(int) : 0);
|
||||
auto cuda_ret = cudaMemcpy(outputs[0], inputs[0], element_cnt * element_size, cudaMemcpyDeviceToDevice);
|
||||
if (cuda_ret != cudaSuccess) {
|
||||
MS_LOG(ERROR) << "copy mem failed for " << layer_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
if (inputDesc->type == nvinfer1::DataType::kINT32 && dest_datatype_ == nvinfer1::DataType::kFLOAT) {
|
||||
auto input = static_cast<const int *>(inputs[0]);
|
||||
auto output = static_cast<float *>(outputs[0]);
|
||||
Cast(element_cnt, input, output, stream);
|
||||
} else if (inputDesc->type == nvinfer1::DataType::kFLOAT && dest_datatype_ == nvinfer1::DataType::kINT32) {
|
||||
auto input = static_cast<const float *>(inputs[0]);
|
||||
auto output = static_cast<int *>(outputs[0]);
|
||||
Cast(element_cnt, input, output, stream);
|
||||
} else {
|
||||
MS_LOG(ERROR) << "unsupported data type cast " << layer_name_;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
nvinfer1::IPluginV2DynamicExt *CastPlugin::clone() const noexcept {
|
||||
auto *plugin = new CastPlugin(*this);
|
||||
plugin->setPluginNamespace(name_space_.c_str());
|
||||
return plugin;
|
||||
}
|
||||
|
||||
nvinfer1::DataType CastPlugin::getOutputDataType(int index, const nvinfer1::DataType *inputTypes, int nbInputs) const
|
||||
noexcept {
|
||||
return dest_datatype_;
|
||||
}
|
||||
|
||||
size_t CastPlugin::getSerializationSize() const noexcept {
|
||||
// origin_datatype_ and dest_datatype_
|
||||
return sizeof(nvinfer1::DataType) * 2;
|
||||
}
|
||||
|
||||
void CastPlugin::serialize(void *buffer) const noexcept {
|
||||
SerializeValue(&buffer, &origin_datatype_, sizeof(nvinfer1::DataType));
|
||||
SerializeValue(&buffer, &dest_datatype_, sizeof(nvinfer1::DataType));
|
||||
}
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,67 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_CAST_PLUGIN_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_CAST_PLUGIN_H_
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "src/runtime/delegate/tensorrt/op/tensorrt_plugin.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
constexpr char *CAST_PLUGIN_NAME{"CastPluginCreater"};
|
||||
class CastPlugin : public TensorRTPlugin {
|
||||
public:
|
||||
CastPlugin(const std::string name, nvinfer1::DataType origin_datatype, nvinfer1::DataType dest_datatype,
|
||||
uint32_t device_id = 0)
|
||||
: TensorRTPlugin(name, std::string(CAST_PLUGIN_NAME), device_id),
|
||||
origin_datatype_(origin_datatype),
|
||||
dest_datatype_(dest_datatype) {}
|
||||
|
||||
CastPlugin(const char *name, const nvinfer1::PluginFieldCollection *fc)
|
||||
: TensorRTPlugin(std::string(name), std::string(CAST_PLUGIN_NAME)) {
|
||||
const nvinfer1::PluginField *fields = fc->fields;
|
||||
origin_datatype_ = static_cast<const nvinfer1::DataType *>(fields[0].data)[0];
|
||||
dest_datatype_ = static_cast<const nvinfer1::DataType *>(fields[1].data)[0];
|
||||
}
|
||||
|
||||
CastPlugin(const char *name, const void *serialData, size_t serialLength)
|
||||
: TensorRTPlugin(std::string(name), std::string(CAST_PLUGIN_NAME)) {
|
||||
DeserializeValue(&serialData, &serialLength, &origin_datatype_, sizeof(nvinfer1::DataType));
|
||||
DeserializeValue(&serialData, &serialLength, &dest_datatype_, sizeof(nvinfer1::DataType));
|
||||
}
|
||||
|
||||
CastPlugin() = delete;
|
||||
|
||||
nvinfer1::IPluginV2DynamicExt *clone() const noexcept override;
|
||||
|
||||
int enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc,
|
||||
const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept override;
|
||||
|
||||
nvinfer1::DataType getOutputDataType(int index, const nvinfer1::DataType *inputTypes, int nbInputs) const
|
||||
noexcept override;
|
||||
|
||||
size_t getSerializationSize() const noexcept override;
|
||||
void serialize(void *buffer) const noexcept override;
|
||||
|
||||
private:
|
||||
nvinfer1::DataType origin_datatype_;
|
||||
nvinfer1::DataType dest_datatype_;
|
||||
};
|
||||
class CastPluginCreater : public TensorRTPluginCreater<CastPlugin> {
|
||||
public:
|
||||
CastPluginCreater() : TensorRTPluginCreater(std::string(CAST_PLUGIN_NAME)) {}
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_CAST_PLUGIN_H_
|
|
@ -0,0 +1,79 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/delegate/tensorrt/op/cast_tensorrt.h"
|
||||
#include "src/runtime/delegate/tensorrt/op/cast_plugin.h"
|
||||
#include <cuda_runtime.h>
|
||||
#include <numeric>
|
||||
#include <memory>
|
||||
#include <functional>
|
||||
|
||||
namespace mindspore::lite {
|
||||
int CastTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) {
|
||||
if (!IsShapeKnown()) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors.size() != INPUT_SIZE2) {
|
||||
MS_LOG(ERROR) << "invalid input tensor size: " << in_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (out_tensors.size() != 1) {
|
||||
MS_LOG(ERROR) << "invalid output tensor size: " << out_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int CastTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
||||
// cast to type tensor
|
||||
auto type_tensor = in_tensors_[1];
|
||||
if (type_tensor.Data() == nullptr) {
|
||||
MS_LOG(ERROR) << "unknown cast type of " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto type_data = static_cast<const int *>(type_tensor.Data().get());
|
||||
DataType data_type = static_cast<DataType>(type_data[0]);
|
||||
MS_LOG(DEBUG) << op_name_ << " cast to data type(43 float): " << type_data[0];
|
||||
nvinfer1::DataType dest_datatype = ConvertDataType(data_type);
|
||||
auto trt_tensor = tensorrt_in_tensors_[0].trt_tensor_;
|
||||
|
||||
#if TRT_VERSION_GE(7, 2)
|
||||
dest_datatype = (dest_datatype == nvinfer1::DataType::kBOOL ? nvinfer1::DataType::kINT32 : dest_datatype);
|
||||
auto cast_layer = ctx->network()->addIdentity(*trt_tensor);
|
||||
#else
|
||||
auto plugin = std::make_shared<CastPlugin>(op_name_, trt_tensor->getType(), dest_datatype);
|
||||
nvinfer1::ITensor *inputTensors[] = {trt_tensor};
|
||||
nvinfer1::IPluginV2Layer *cast_layer = ctx->network()->addPluginV2(inputTensors, 1, *plugin);
|
||||
#endif
|
||||
if (cast_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "create cast layer failed for: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
#if TRT_VERSION_GE(7, 2)
|
||||
cast_layer->setOutputType(0, dest_datatype);
|
||||
#endif
|
||||
cast_layer->setName(op_name_.c_str());
|
||||
nvinfer1::ITensor *cast_out = cast_layer->getOutput(0);
|
||||
cast_out->setName((op_name_ + "_output").c_str());
|
||||
this->AddInnerOutTensors(
|
||||
ITensorHelper{cast_out, tensorrt_in_tensors_[0].format_, tensorrt_in_tensors_[0].same_format_});
|
||||
this->layer_ = cast_layer;
|
||||
return RET_OK;
|
||||
}
|
||||
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_Cast, CastTensorRT)
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,43 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_CAST_TENSORRT_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_CAST_TENSORRT_H_
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
|
||||
#include "src/runtime/delegate/tensorrt/op/tensorrt_plugin.h"
|
||||
#include "src/runtime/delegate/tensorrt/cuda_impl/cast.cuh"
|
||||
|
||||
namespace mindspore::lite {
|
||||
class CastTensorRT : public TensorRTOp {
|
||||
public:
|
||||
CastTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
|
||||
const schema::QuantType &quant_type)
|
||||
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
|
||||
|
||||
~CastTensorRT() override = default;
|
||||
|
||||
int AddInnerOp(TensorRTContext *ctx) override;
|
||||
|
||||
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) override;
|
||||
|
||||
private:
|
||||
// CastTensorRT
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_CAST_TENSORRT_H_
|
|
@ -0,0 +1,158 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/delegate/tensorrt/op/concate_tensorrt.h"
|
||||
#include <experimental/optional>
|
||||
#include <algorithm>
|
||||
|
||||
namespace mindspore::lite {
|
||||
int ConcateTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) {
|
||||
if (!IsShapeKnown()) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (type_ != schema::PrimitiveType_Stack && type_ != schema::PrimitiveType_Concat) {
|
||||
MS_LOG(ERROR) << "Unsupported op :" << op_name_ << " , type: " << type_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors.size() == 0 || in_tensors.size() < INPUT_SIZE2 && type_ != schema::PrimitiveType_Stack) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (out_tensors.size() != 1) {
|
||||
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
int input_nbDims = in_tensors_[0].Shape().size();
|
||||
if (axis_ == -1) {
|
||||
axis_ = input_nbDims - 1;
|
||||
}
|
||||
if (axis_ < 0 || axis_ > input_nbDims || axis_ == input_nbDims && type_ != schema::PrimitiveType_Stack) {
|
||||
MS_LOG(ERROR) << "concate_op valid axis : " << axis_ << " , input dims : " << input_nbDims;
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
int ConcateTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
||||
if (ctx == nullptr || ctx->network() == nullptr) {
|
||||
MS_LOG(ERROR) << "context or network is invalid";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (tensorrt_in_tensors_.size() != in_tensors_.size()) {
|
||||
MS_LOG(ERROR) << "concate_op in tensor is invalid, trt tensor has " << tensorrt_in_tensors_.size()
|
||||
<< ", but origin ms tensor has " << in_tensors_.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
nvinfer1::ITensor *trt_input_tensors[tensorrt_in_tensors_.size()];
|
||||
int ret = PreProcessInputs(ctx, trt_input_tensors);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "PreProcessInputs failed for " << op_name_;
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (!same_format_) {
|
||||
if (trt_input_tensors[0]->getDimensions().nbDims == DIMENSION_4D && out_format_ == Format::NCHW) {
|
||||
// when inputs all NCHW, change axis
|
||||
axis_ = ConvertAxisFromNHWC2NCHW(axis_);
|
||||
MS_LOG(DEBUG) << "concate axis change to " << axis_ << " when using NCHW format.";
|
||||
} else {
|
||||
MS_LOG(WARNING) << "input tensor format needs check, convert concat axis failed for " << op_name_;
|
||||
}
|
||||
}
|
||||
|
||||
if (type_ == schema::PrimitiveType_Stack) {
|
||||
for (size_t i = 0; i != tensorrt_in_tensors_.size(); ++i) {
|
||||
auto shuffle_layer = ctx->network()->addShuffle(*trt_input_tensors[i]);
|
||||
if (shuffle_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "addShuffle failed for TensorRT.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto shuffer_dims_opt = UnsqueezeDims(trt_input_tensors[i]->getDimensions(), axis_, 1);
|
||||
if (!shuffer_dims_opt) {
|
||||
MS_LOG(ERROR) << "UnsqueezeDims failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
shuffle_layer->setReshapeDimensions(shuffer_dims_opt.value());
|
||||
trt_input_tensors[i] = shuffle_layer->getOutput(0);
|
||||
}
|
||||
}
|
||||
nvinfer1::IConcatenationLayer *concate_layer =
|
||||
ctx->network()->addConcatenation(trt_input_tensors, static_cast<int>(tensorrt_in_tensors_.size()));
|
||||
if (concate_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "addConcatenation failed for TensorRT.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (axis_ != RET_INVALID_OP_ATTR) {
|
||||
concate_layer->setAxis(axis_);
|
||||
}
|
||||
concate_layer->setName(op_name_.c_str());
|
||||
auto concat_output = concate_layer->getOutput(0);
|
||||
concat_output->setName((op_name_ + "_output").c_str());
|
||||
this->AddInnerOutTensors(ITensorHelper{concat_output, out_format_, same_format_});
|
||||
this->layer_ = concate_layer;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int ConcateTensorRT::PreProcessInputs(TensorRTContext *ctx, nvinfer1::ITensor *trt_input_tensors[]) {
|
||||
int input_nbDims = tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims;
|
||||
out_format_ = tensorrt_in_tensors_[0].format_;
|
||||
same_format_ = tensorrt_in_tensors_[0].same_format_;
|
||||
|
||||
for (size_t i = 0; i < tensorrt_in_tensors_.size(); i++) {
|
||||
if (tensorrt_in_tensors_[i].trt_tensor_->getDimensions().nbDims != input_nbDims) {
|
||||
MS_LOG(ERROR) << "dims of inputs is invalid for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
// keep origin format if all input format are the same
|
||||
if (input_nbDims == DIMENSION_4D && tensorrt_in_tensors_[i].format_ != out_format_) {
|
||||
out_format_ = Format::NHWC;
|
||||
}
|
||||
}
|
||||
|
||||
// make sure all inputs are same format
|
||||
if (input_nbDims == DIMENSION_4D) {
|
||||
for (size_t i = 0; i < tensorrt_in_tensors_.size(); i++) {
|
||||
if (tensorrt_in_tensors_[i].format_ == out_format_) {
|
||||
trt_input_tensors[i] = tensorrt_in_tensors_[i].trt_tensor_;
|
||||
MS_LOG(DEBUG) << "concate input " << GetTensorFormat(tensorrt_in_tensors_[i]);
|
||||
} else {
|
||||
nvinfer1::IShuffleLayer *transpose_layer = NCHW2NHWC(ctx, *tensorrt_in_tensors_[i].trt_tensor_);
|
||||
if (transpose_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "op action convert failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
trt_input_tensors[i] = transpose_layer->getOutput(0);
|
||||
this->transpose_layer_ = transpose_layer;
|
||||
same_format_ = true;
|
||||
MS_LOG(DEBUG) << "concate input " << GetTensorFormat(trt_input_tensors[i], Format::NHWC, true);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (size_t i = 0; i < tensorrt_in_tensors_.size(); i++) {
|
||||
trt_input_tensors[i] = tensorrt_in_tensors_[i].trt_tensor_;
|
||||
MS_LOG(DEBUG) << "concate input " << GetTensorFormat(tensorrt_in_tensors_[i]);
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_Concat, ConcateTensorRT)
|
||||
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_Stack, ConcateTensorRT)
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,50 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_CONCATE_TENSORRT_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_CONCATE_TENSORRT_H_
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
class ConcateTensorRT : public TensorRTOp {
|
||||
public:
|
||||
ConcateTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
|
||||
const schema::QuantType &quant_type)
|
||||
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {
|
||||
type_ = primitive->value_type();
|
||||
axis_ = (type_ == schema::PrimitiveType_Concat ? primitive->value_as_Concat()->axis()
|
||||
: primitive->value_as_Stack()->axis());
|
||||
}
|
||||
|
||||
~ConcateTensorRT() override = default;
|
||||
|
||||
int AddInnerOp(TensorRTContext *ctx) override;
|
||||
|
||||
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) override;
|
||||
|
||||
private:
|
||||
int PreProcessInputs(TensorRTContext *ctx, nvinfer1::ITensor *trt_input_tensors[]);
|
||||
|
||||
Format out_format_{Format::NHWC};
|
||||
bool same_format_{true};
|
||||
schema::PrimitiveType type_;
|
||||
int axis_;
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_CONCATE_TENSORRT_H_
|
|
@ -0,0 +1,187 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/delegate/tensorrt/op/convolution_tensorrt.h"
|
||||
#include "src/runtime/delegate/tensorrt/op/activation_tensorrt.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
constexpr int BIAS_INDEX = 2;
|
||||
|
||||
int ConvolutionTensorRT::IsSupport(const schema::Primitive *primitive,
|
||||
const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) {
|
||||
if (!IsShapeKnown()) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors.size() != INPUT_SIZE2 && in_tensors.size() != INPUT_SIZE3) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (out_tensors.size() != 1) {
|
||||
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors[0].format() != Format::NHWC && in_tensors[0].format() != Format::NCHW) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor format of " << in_tensors[0].format();
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int ConvolutionTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
||||
if (ctx == nullptr || ctx->network() == nullptr) {
|
||||
MS_LOG(ERROR) << "context or network is invalid";
|
||||
return RET_ERROR;
|
||||
}
|
||||
const schema::Conv2DFusion *conv_op = this->op_primitive_->value_as_Conv2DFusion();
|
||||
if (conv_op == nullptr) {
|
||||
MS_LOG(ERROR) << "op action convert failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
nvinfer1::ITensor *conv_input = tensorrt_in_tensors_[0].trt_tensor_;
|
||||
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D &&
|
||||
tensorrt_in_tensors_[0].format_ == Format::NHWC) {
|
||||
// transpose: NHWC->NCHW
|
||||
nvinfer1::IShuffleLayer *transpose_layer_in = NHWC2NCHW(ctx, *tensorrt_in_tensors_[0].trt_tensor_);
|
||||
if (transpose_layer_in == nullptr) {
|
||||
MS_LOG(ERROR) << "transpose: NHWC->NCHW failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
transpose_layer_in->setName((op_name_ + "_transpose2NCHW").c_str());
|
||||
this->transpose_layer_ = transpose_layer_in;
|
||||
conv_input = transpose_layer_in->getOutput(0);
|
||||
}
|
||||
|
||||
// transpose weight
|
||||
const mindspore::MSTensor &weight_tensor = in_tensors_[1];
|
||||
nvinfer1::Weights kernelWeights = lite::TransposeWeight4D(weight_tensor, &pack_weight_);
|
||||
|
||||
// conv
|
||||
int nbOutputMaps = weight_tensor.Shape()[0];
|
||||
if (nbOutputMaps <= 0) {
|
||||
MS_LOG(ERROR) << "out_channel is invalid";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
auto kernel_size = conv_op->kernel_size();
|
||||
if (kernel_size == nullptr) {
|
||||
MS_LOG(ERROR) << "kernel_size is null";
|
||||
return RET_ERROR;
|
||||
}
|
||||
nvinfer1::Dims kernelSize = lite::ConvertCudaDims(std::vector<int64_t>(kernel_size->begin(), kernel_size->end()));
|
||||
if (kernelSize.nbDims == -1) {
|
||||
MS_LOG(ERROR) << "ConvertCudaDims failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
// bias
|
||||
nvinfer1::Weights biasWeights{};
|
||||
if (in_tensors_.size() >= INPUT_SIZE3) {
|
||||
biasWeights = lite::ConvertWeight(in_tensors_[BIAS_INDEX]);
|
||||
} else {
|
||||
biasWeights.type = ConvertDataType(weight_tensor.DataType());
|
||||
biasWeights.count = 0;
|
||||
biasWeights.values = nullptr;
|
||||
}
|
||||
|
||||
nvinfer1::IConvolutionLayer *conv_layer =
|
||||
ctx->network()->addConvolutionNd(*conv_input, nbOutputMaps, kernelSize, kernelWeights, biasWeights);
|
||||
|
||||
if (conv_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "ConvolutionLayer failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
conv_layer->setName((op_name_ + "_conv").c_str());
|
||||
this->layer_ = conv_layer;
|
||||
|
||||
// add params
|
||||
SetAttributes(conv_op, conv_layer);
|
||||
|
||||
// add activation
|
||||
nvinfer1::ILayer *activation_layer = nullptr;
|
||||
if (conv_op->activation_type() == schema::ActivationType::ActivationType_NO_ACTIVATION) {
|
||||
activation_layer = conv_layer;
|
||||
} else {
|
||||
activation_layer =
|
||||
ActivationTensorRT::AddActivation(ctx, conv_op->activation_type(), 0, 0, 0, conv_layer->getOutput(0), device_id_);
|
||||
if (activation_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "addActivation for conv failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
activation_layer->setName((op_name_ + "_activation").c_str());
|
||||
}
|
||||
activation_layer->getOutput(0)->setName((op_name_ + "_output").c_str());
|
||||
this->AddInnerOutTensors(ITensorHelper{activation_layer->getOutput(0), Format::NCHW, false});
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
void ConvolutionTensorRT::SetAttributes(const schema::Conv2DFusion *conv_op, nvinfer1::IConvolutionLayer *conv_layer) {
|
||||
auto stride = conv_op->stride();
|
||||
if (stride != nullptr) {
|
||||
auto stride_val = std::vector<int64_t>(stride->begin(), stride->end());
|
||||
auto dims = ConvertCudaDims(stride_val);
|
||||
if (dims.nbDims == -1) {
|
||||
MS_LOG(ERROR) << "ConvertCudaDims failed for " << op_name_;
|
||||
return;
|
||||
}
|
||||
conv_layer->setStrideNd(dims);
|
||||
}
|
||||
|
||||
auto dilation = conv_op->dilation();
|
||||
if (dilation != nullptr) {
|
||||
auto dilation_val = std::vector<int64_t>(dilation->begin(), dilation->end());
|
||||
auto dims = ConvertCudaDims(dilation_val);
|
||||
if (dims.nbDims == -1) {
|
||||
MS_LOG(ERROR) << "ConvertCudaDims failed for " << op_name_;
|
||||
return;
|
||||
}
|
||||
conv_layer->setDilationNd(dims);
|
||||
}
|
||||
int nbGroups = conv_op->group();
|
||||
if (nbGroups > 0) {
|
||||
conv_layer->setNbGroups(nbGroups);
|
||||
}
|
||||
|
||||
schema::PadMode pad_mode = conv_op->pad_mode();
|
||||
if (pad_mode == schema::PadMode::PadMode_SAME) {
|
||||
conv_layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
|
||||
} else {
|
||||
auto padding = conv_op->pad_list();
|
||||
if (padding != nullptr && padding->size() == DIMENSION_4D) {
|
||||
auto padding_val = std::vector<int64_t>(padding->begin(), padding->end());
|
||||
if (padding_val[0] != padding_val[1] || padding_val[DIMENSION_2D] != padding_val[DIMENSION_3D]) {
|
||||
MS_LOG(WARNING) << op_name_ << " has different up and down padding value";
|
||||
}
|
||||
nvinfer1::Dims2 dims(padding_val[0], padding_val[DIMENSION_2D]);
|
||||
conv_layer->setPaddingNd(dims);
|
||||
} else if (padding == nullptr || padding->size() == 0) {
|
||||
nvinfer1::Dims2 dims;
|
||||
conv_layer->setPaddingNd(dims);
|
||||
} else {
|
||||
MS_LOG(WARNING) << "pad list is invalid for " << op_name_;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ConvolutionTensorRT::~ConvolutionTensorRT() {
|
||||
if (pack_weight_ != nullptr) {
|
||||
free(pack_weight_);
|
||||
pack_weight_ = nullptr;
|
||||
}
|
||||
}
|
||||
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_Conv2DFusion, ConvolutionTensorRT)
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,43 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_CONVOLUTION_TENSORRT_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_CONVOLUTION_TENSORRT_H_
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
class ConvolutionTensorRT : public TensorRTOp {
|
||||
public:
|
||||
ConvolutionTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
|
||||
const schema::QuantType &quant_type)
|
||||
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
|
||||
|
||||
~ConvolutionTensorRT() override;
|
||||
|
||||
int AddInnerOp(TensorRTContext *ctx) override;
|
||||
|
||||
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) override;
|
||||
|
||||
private:
|
||||
void SetAttributes(const schema::Conv2DFusion *ms_op, nvinfer1::IConvolutionLayer *current_layer_);
|
||||
|
||||
void *pack_weight_{nullptr};
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_CONVOLUTION_TENSORRT_H_
|
|
@ -0,0 +1,199 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/delegate/tensorrt/op/deconvolution_tensorrt.h"
|
||||
#include "src/runtime/delegate/tensorrt/op/activation_tensorrt.h"
|
||||
#include "nnacl/pack.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
int DeconvolutionTensorRT::IsSupport(const schema::Primitive *primitive,
|
||||
const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) {
|
||||
if (!IsShapeKnown()) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors.size() != INPUT_SIZE2 && in_tensors.size() != INPUT_SIZE3) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (out_tensors.size() != 1) {
|
||||
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors[0].format() != Format::NHWC && in_tensors[0].format() != Format::NCHW) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor format of " << in_tensors[0].format();
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
int DeconvolutionTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
||||
if (ctx == nullptr || ctx->network() == nullptr) {
|
||||
MS_LOG(ERROR) << "context or network is invalid";
|
||||
return RET_ERROR;
|
||||
}
|
||||
const schema::Conv2dTransposeFusion *deconv_op = this->op_primitive_->value_as_Conv2dTransposeFusion();
|
||||
if (deconv_op == nullptr) {
|
||||
MS_LOG(ERROR) << "op action convert failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
nvinfer1::ITensor *deconv_input = tensorrt_in_tensors_[0].trt_tensor_;
|
||||
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D &&
|
||||
tensorrt_in_tensors_[0].format_ == Format::NHWC) {
|
||||
// transpose: NHWC->NCHW
|
||||
nvinfer1::IShuffleLayer *transpose_layer_in = NHWC2NCHW(ctx, *tensorrt_in_tensors_[0].trt_tensor_);
|
||||
if (transpose_layer_in == nullptr) {
|
||||
MS_LOG(ERROR) << "transpose: NHWC->NCHW failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
transpose_layer_in->setName((op_name_ + "_transpose2NCHW").c_str());
|
||||
this->transpose_layer_ = transpose_layer_in;
|
||||
deconv_input = transpose_layer_in->getOutput(0);
|
||||
}
|
||||
|
||||
// transpose weight
|
||||
const mindspore::MSTensor &weight_tensor = in_tensors_[1];
|
||||
nvinfer1::Weights kernelWeights = lite::TransposeWeight4D(weight_tensor, &pack_weight_);
|
||||
|
||||
// deconv basic params
|
||||
int nbOutputMaps = weight_tensor.Shape()[0];
|
||||
if (nbOutputMaps <= 0) {
|
||||
MS_LOG(ERROR) << "out_channel is invalid";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
auto kernel_size = deconv_op->kernel_size();
|
||||
if (kernel_size == nullptr) {
|
||||
MS_LOG(ERROR) << "kernel_size is null";
|
||||
return RET_ERROR;
|
||||
}
|
||||
nvinfer1::Dims kernelSize = lite::ConvertCudaDims(std::vector<int64_t>(kernel_size->begin(), kernel_size->end()));
|
||||
if (kernelSize.nbDims == -1) {
|
||||
MS_LOG(ERROR) << "ConvertCudaDims failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
// bias
|
||||
nvinfer1::Weights biasWeights{};
|
||||
if (in_tensors_.size() >= INPUT_SIZE3) {
|
||||
biasWeights = lite::ConvertWeight(in_tensors_[INPUT_SIZE3 - 1]);
|
||||
} else {
|
||||
biasWeights.type = ConvertDataType(weight_tensor.DataType());
|
||||
biasWeights.count = 0;
|
||||
biasWeights.values = nullptr;
|
||||
}
|
||||
|
||||
nvinfer1::IDeconvolutionLayer *deconv_layer =
|
||||
ctx->network()->addDeconvolutionNd(*deconv_input, nbOutputMaps, kernelSize, kernelWeights, biasWeights);
|
||||
|
||||
if (deconv_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "DeconvolutionLayer failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
deconv_layer->setName((op_name_ + "_deconv").c_str());
|
||||
this->layer_ = deconv_layer;
|
||||
// set extra params
|
||||
SetAttributes(deconv_op, deconv_layer);
|
||||
|
||||
// add activation
|
||||
nvinfer1::ILayer *activation_layer = nullptr;
|
||||
if (deconv_op->activation_type() == schema::ActivationType::ActivationType_NO_ACTIVATION) {
|
||||
activation_layer = deconv_layer;
|
||||
} else {
|
||||
activation_layer = ActivationTensorRT::AddActivation(ctx, deconv_op->activation_type(), 0, 0, 0,
|
||||
deconv_layer->getOutput(0), device_id_);
|
||||
if (activation_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "addActivation for conv failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
activation_layer->setName((op_name_ + "_activation").c_str());
|
||||
}
|
||||
activation_layer->getOutput(0)->setName((op_name_ + "_output").c_str());
|
||||
this->AddInnerOutTensors(ITensorHelper{activation_layer->getOutput(0), Format::NCHW, false});
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
void DeconvolutionTensorRT::SetAttributes(const schema::Conv2dTransposeFusion *ms_op,
|
||||
nvinfer1::IDeconvolutionLayer *decon_layer) {
|
||||
// kernel_size
|
||||
auto kernel_size = ms_op->kernel_size();
|
||||
if (kernel_size != nullptr) {
|
||||
auto kernel_size_val = std::vector<int64_t>(kernel_size->begin(), kernel_size->end());
|
||||
nvinfer1::Dims kernel_size_dims = lite::ConvertCudaDims(kernel_size_val);
|
||||
if (kernel_size_dims.nbDims == -1) {
|
||||
MS_LOG(ERROR) << "ConvertCudaDims failed for " << op_name_;
|
||||
return;
|
||||
}
|
||||
decon_layer->setKernelSizeNd(kernel_size_dims);
|
||||
}
|
||||
|
||||
// nbOutputMaps
|
||||
int32_t nbOutputMaps = static_cast<int32_t>(ms_op->out_channel());
|
||||
decon_layer->setNbOutputMaps(nbOutputMaps);
|
||||
|
||||
// stride
|
||||
auto stride = ms_op->stride();
|
||||
if (stride != nullptr) {
|
||||
auto stride_val = std::vector<int64_t>(stride->begin(), stride->end());
|
||||
nvinfer1::Dims stride_dims = lite::ConvertCudaDims(stride_val);
|
||||
if (stride_dims.nbDims == -1) {
|
||||
MS_LOG(ERROR) << "ConvertCudaDims failed for " << op_name_;
|
||||
return;
|
||||
}
|
||||
decon_layer->setStrideNd(stride_dims);
|
||||
}
|
||||
|
||||
// nbGroups
|
||||
int32_t nbGroups = static_cast<int32_t>(ms_op->group());
|
||||
decon_layer->setNbGroups(nbGroups);
|
||||
|
||||
// padding
|
||||
schema::PadMode pad_mode = ms_op->pad_mode();
|
||||
if (pad_mode == schema::PadMode::PadMode_SAME) {
|
||||
decon_layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
|
||||
} else {
|
||||
auto padding = ms_op->pad_list();
|
||||
auto out_pad = ms_op->output_paddings();
|
||||
if (padding == nullptr || out_pad == nullptr) {
|
||||
MS_LOG(WARNING) << "on pad value of " << op_name_;
|
||||
return;
|
||||
}
|
||||
auto padding_val = std::vector<int64_t>(padding->begin(), padding->end());
|
||||
auto out_pad_val = std::vector<int64_t>(out_pad->begin(), out_pad->end()); // h, w
|
||||
if (out_pad_val.size() != DIMENSION_2D || padding_val.size() != DIMENSION_4D) {
|
||||
MS_LOG(ERROR) << "invalid size of pad " << op_name_;
|
||||
return;
|
||||
}
|
||||
nvinfer1::Dims dims_pre{};
|
||||
dims_pre.nbDims = DIMENSION_2D;
|
||||
dims_pre.d[0] = padding_val[0]; // up
|
||||
dims_pre.d[1] = padding_val[2]; // left
|
||||
decon_layer->setPrePadding(dims_pre);
|
||||
nvinfer1::Dims dims_post{};
|
||||
dims_post.nbDims = DIMENSION_2D;
|
||||
dims_post.d[0] = padding_val[1] - out_pad_val[0]; // down
|
||||
dims_post.d[1] = padding_val[3] - out_pad_val[1]; // right
|
||||
decon_layer->setPostPadding(dims_post);
|
||||
}
|
||||
}
|
||||
|
||||
DeconvolutionTensorRT::~DeconvolutionTensorRT() {
|
||||
if (pack_weight_ != nullptr) {
|
||||
free(pack_weight_);
|
||||
pack_weight_ = nullptr;
|
||||
}
|
||||
}
|
||||
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_Conv2dTransposeFusion, DeconvolutionTensorRT)
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,43 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_DECONVOLUTION_TENSORRT_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_DECONVOLUTION_TENSORRT_H_
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
class DeconvolutionTensorRT : public TensorRTOp {
|
||||
public:
|
||||
DeconvolutionTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
|
||||
const schema::QuantType &quant_type)
|
||||
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
|
||||
|
||||
~DeconvolutionTensorRT() override;
|
||||
|
||||
int AddInnerOp(TensorRTContext *ctx) override;
|
||||
|
||||
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) override;
|
||||
|
||||
private:
|
||||
void SetAttributes(const schema::Conv2dTransposeFusion *ms_op, nvinfer1::IDeconvolutionLayer *decon_layer);
|
||||
|
||||
void *pack_weight_{nullptr};
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_DECONVOLUTION_TENSORRT_H_
|
|
@ -0,0 +1,312 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include "src/runtime/delegate/tensorrt/op/elementwise_tensorrt.h"
|
||||
#include "src/runtime/delegate/tensorrt/tensorrt_utils.h"
|
||||
#include "src/runtime/delegate/tensorrt/op/activation_tensorrt.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
namespace {
|
||||
std::unordered_map<schema::PrimitiveType, nvinfer1::ElementWiseOperation> NOT_BOOL_PRIM2NV_ELEM_OP = {
|
||||
#if TRT_VERSION_GE(7, 2)
|
||||
{schema::PrimitiveType_Less, nvinfer1::ElementWiseOperation::kLESS},
|
||||
{schema::PrimitiveType_Greater, nvinfer1::ElementWiseOperation::kGREATER},
|
||||
#endif
|
||||
{schema::PrimitiveType_AddFusion, nvinfer1::ElementWiseOperation::kSUM},
|
||||
{schema::PrimitiveType_PowFusion, nvinfer1::ElementWiseOperation::kPOW},
|
||||
{schema::PrimitiveType_DivFusion, nvinfer1::ElementWiseOperation::kDIV},
|
||||
{schema::PrimitiveType_RealDiv, nvinfer1::ElementWiseOperation::kDIV},
|
||||
{schema::PrimitiveType_FloorDiv, nvinfer1::ElementWiseOperation::kFLOOR_DIV},
|
||||
{schema::PrimitiveType_SubFusion, nvinfer1::ElementWiseOperation::kSUB},
|
||||
{schema::PrimitiveType_MulFusion, nvinfer1::ElementWiseOperation::kPROD},
|
||||
{schema::PrimitiveType_Minimum, nvinfer1::ElementWiseOperation::kMIN},
|
||||
{schema::PrimitiveType_Maximum, nvinfer1::ElementWiseOperation::kMAX},
|
||||
{schema::PrimitiveType_BiasAdd, nvinfer1::ElementWiseOperation::kSUM},
|
||||
#if TRT_VERSION_GE(7, 2)
|
||||
{schema::PrimitiveType_Equal, nvinfer1::ElementWiseOperation::kEQUAL},
|
||||
#endif
|
||||
};
|
||||
} // namespace
|
||||
|
||||
int ElementWiseTensorRT::IsSupport(const schema::Primitive *primitive,
|
||||
const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) {
|
||||
if (!IsShapeKnown()) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors.size() != INPUT_SIZE2) {
|
||||
MS_LOG(ERROR) << "invalid input tensort size: " << in_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (out_tensors.size() != 1) {
|
||||
MS_LOG(ERROR) << "invalid output tensort size: " << out_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
// if constant tensor is scalar, it needs to know another input tensor's shape to broadcast
|
||||
if ((in_tensors[0].Shape().size() > 0 && in_tensors[0].Shape()[0] == -1 && in_tensors[1].Shape().size() == 0) ||
|
||||
(in_tensors[1].Shape().size() > 0 && in_tensors[1].Shape()[0] == -1 && in_tensors[0].Shape().size() == 0)) {
|
||||
MS_LOG(ERROR) << "invalid all input tensor shape unknown for: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
bool is_not_bool_arith = NOT_BOOL_PRIM2NV_ELEM_OP.find(type_) != NOT_BOOL_PRIM2NV_ELEM_OP.end();
|
||||
if (is_not_bool_arith) {
|
||||
if (std::any_of(in_tensors.begin(), in_tensors.end(),
|
||||
[](const mindspore::MSTensor &tensor) { return tensor.DataType() == DataType::kNumberTypeBool; })) {
|
||||
MS_LOG(ERROR) << "invalid input type for : " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
element_wise_op_ = NOT_BOOL_PRIM2NV_ELEM_OP[type_];
|
||||
}
|
||||
if (!is_not_bool_arith) {
|
||||
// PrimitiveType_Eltwise
|
||||
auto eltwise_op = op_primitive_->value_as_Eltwise();
|
||||
if (eltwise_op == nullptr) {
|
||||
MS_LOG(ERROR) << "convert to Eltwise failed: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
schema::EltwiseMode eltwiseMode = eltwise_op->mode();
|
||||
std::map<schema::EltwiseMode, nvinfer1::ElementWiseOperation> eltwise_modes = {
|
||||
{schema::EltwiseMode::EltwiseMode_SUM, nvinfer1::ElementWiseOperation::kSUM},
|
||||
{schema::EltwiseMode::EltwiseMode_PROD, nvinfer1::ElementWiseOperation::kPROD},
|
||||
{schema::EltwiseMode::EltwiseMode_MAXIMUM, nvinfer1::ElementWiseOperation::kMAX},
|
||||
};
|
||||
auto iter_mode = eltwise_modes.find(eltwiseMode);
|
||||
if (iter_mode != eltwise_modes.end()) {
|
||||
element_wise_op_ = iter_mode->second;
|
||||
} else {
|
||||
MS_LOG(ERROR) << "unsupported type for ElementWise op" << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int ElementWiseTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
||||
if (ctx == nullptr || ctx->network() == nullptr) {
|
||||
MS_LOG(ERROR) << "network or input tensor size is invalid";
|
||||
return RET_ERROR;
|
||||
}
|
||||
ITensorHelper x_input;
|
||||
ITensorHelper y_input;
|
||||
int ret = PreprocessInputTensors(ctx, &x_input, &y_input);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "PreprocessInputTensors failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
nvinfer1::IElementWiseLayer *cal_layer =
|
||||
ctx->network()->addElementWise(*x_input.trt_tensor_, *y_input.trt_tensor_, element_wise_op_);
|
||||
|
||||
if (cal_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "addElementWise failed for TensorRT.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
cal_layer->setName(op_name_.c_str());
|
||||
this->layer_ = cal_layer;
|
||||
|
||||
nvinfer1::ITensor *op_out_tensor = cal_layer->getOutput(0);
|
||||
if (op_out_tensor == nullptr) {
|
||||
MS_LOG(ERROR) << "addElementWise out tensor is nullptr.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
// add activation
|
||||
nvinfer1::ITensor *activation_out_tensor = AddActivation(ctx, op_out_tensor);
|
||||
op_out_tensor = (activation_out_tensor == nullptr) ? op_out_tensor : activation_out_tensor;
|
||||
|
||||
// scale and shift
|
||||
if (type_ == schema::PrimitiveType_PowFusion) {
|
||||
auto pow_op = op_primitive_->value_as_PowFusion();
|
||||
if (pow_op == nullptr) {
|
||||
MS_LOG(ERROR) << "PowFusion convert failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
float scale = pow_op->scale();
|
||||
float shift = pow_op->shift();
|
||||
if (abs(scale - 1) >= 1.0e-05 || abs(shift - 0) >= 1.0e-05) {
|
||||
MS_LOG(WARNING) << "deal with scale and shift for pow op";
|
||||
}
|
||||
}
|
||||
#if TRT_VERSION_GE(7, 2)
|
||||
std::unordered_set<schema::PrimitiveType> bool_producer_ops = {
|
||||
schema::PrimitiveType_Equal, schema::PrimitiveType_Greater, schema::PrimitiveType_Less};
|
||||
if (bool_producer_ops.find(type_) != bool_producer_ops.end()) {
|
||||
auto cast_layer = ctx->network()->addIdentity(*op_out_tensor);
|
||||
if (cast_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "create cast layer failed for: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
cast_layer->setOutputType(0, nvinfer1::DataType::kINT32);
|
||||
op_out_tensor = cast_layer->getOutput(0);
|
||||
MS_LOG(INFO) << "bool result cast to int32" << op_name_;
|
||||
}
|
||||
#endif
|
||||
op_out_tensor->setName((op_name_ + "_output").c_str());
|
||||
this->AddInnerOutTensors(ITensorHelper{op_out_tensor, x_input.format_, x_input.same_format_});
|
||||
MS_LOG(DEBUG) << "output " << GetTensorFormat(tensorrt_out_tensors_[0]);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int ElementWiseTensorRT::PreprocessInputTensors(TensorRTContext *ctx, ITensorHelper *x_input, ITensorHelper *y_input) {
|
||||
int input_x_index = SameTensor(tensorrt_in_tensors_[0].trt_tensor_, &in_tensors_[0]) ? 0 : 1;
|
||||
if (in_tensors_[0].Shape() == in_tensors_[1].Shape() && in_tensors_[0].IsConst()) {
|
||||
input_x_index = 1;
|
||||
}
|
||||
|
||||
if (this->tensorrt_in_tensors_.size() != INPUT_SIZE2) {
|
||||
int ret = AddConstTensor(ctx);
|
||||
if (ret != RET_OK) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
*x_input = tensorrt_in_tensors_[input_x_index];
|
||||
*y_input = tensorrt_in_tensors_[1 - input_x_index];
|
||||
MS_LOG(DEBUG) << "before transpose " << GetTensorFormat(*x_input);
|
||||
MS_LOG(DEBUG) << "before transpose " << GetTensorFormat(*y_input);
|
||||
|
||||
if (x_input->trt_tensor_->getDimensions().nbDims == DIMENSION_4D && x_input->format_ != y_input->format_) {
|
||||
// when inputs format are different, change to NHWC
|
||||
auto need_trans = x_input->format_ == Format::NCHW ? x_input : y_input;
|
||||
nvinfer1::IShuffleLayer *transpose_layer = NCHW2NHWC(ctx, *need_trans->trt_tensor_);
|
||||
if (transpose_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "op action convert failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
transpose_layer->setName((op_name_ + "_input_transpose2NHWC").c_str());
|
||||
need_trans->trt_tensor_ = transpose_layer->getOutput(0);
|
||||
need_trans->format_ = Format::NHWC;
|
||||
need_trans->same_format_ = true;
|
||||
}
|
||||
MS_LOG(DEBUG) << "after transpose " << GetTensorFormat(*x_input);
|
||||
MS_LOG(DEBUG) << "after transpose " << GetTensorFormat(*y_input);
|
||||
if (GetDimsVolume(x_input->trt_tensor_->getDimensions()) == GetDimsVolume(y_input->trt_tensor_->getDimensions()) &&
|
||||
x_input->trt_tensor_->getDimensions().nbDims != y_input->trt_tensor_->getDimensions().nbDims) {
|
||||
bool x_large = x_input->trt_tensor_->getDimensions().nbDims > y_input->trt_tensor_->getDimensions().nbDims;
|
||||
auto input_tensor = x_large ? y_input : x_input;
|
||||
auto output_dim = x_large ? x_input->trt_tensor_->getDimensions() : y_input->trt_tensor_->getDimensions();
|
||||
auto reshape_layer = ctx->network()->addShuffle(*input_tensor->trt_tensor_);
|
||||
if (reshape_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "add reshape failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
reshape_layer->setReshapeDimensions(output_dim);
|
||||
input_tensor->trt_tensor_ = reshape_layer->getOutput(0);
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
nvinfer1::ITensor *ElementWiseTensorRT::AddActivation(TensorRTContext *ctx, nvinfer1::ITensor *in_tensor) {
|
||||
schema::ActivationType activation = schema::ActivationType::ActivationType_NO_ACTIVATION;
|
||||
switch (type_) {
|
||||
case schema::PrimitiveType_AddFusion: {
|
||||
auto sum_op = op_primitive_->value_as_AddFusion();
|
||||
if (sum_op == nullptr) {
|
||||
MS_LOG(ERROR) << "AddFusion convert failed.";
|
||||
return nullptr;
|
||||
}
|
||||
activation = sum_op->activation_type();
|
||||
break;
|
||||
}
|
||||
case schema::PrimitiveType_DivFusion: {
|
||||
auto div_op = op_primitive_->value_as_DivFusion();
|
||||
if (div_op == nullptr) {
|
||||
MS_LOG(ERROR) << "DivFusion convert failed.";
|
||||
return nullptr;
|
||||
}
|
||||
activation = div_op->activation_type();
|
||||
break;
|
||||
}
|
||||
case schema::PrimitiveType_SubFusion: {
|
||||
auto sub_op = op_primitive_->value_as_SubFusion();
|
||||
if (sub_op == nullptr) {
|
||||
MS_LOG(ERROR) << "SubFusion convert failed.";
|
||||
return nullptr;
|
||||
}
|
||||
activation = sub_op->activation_type();
|
||||
break;
|
||||
}
|
||||
case schema::PrimitiveType_MulFusion: {
|
||||
auto mul_op = op_primitive_->value_as_MulFusion();
|
||||
if (mul_op == nullptr) {
|
||||
MS_LOG(ERROR) << "MulFusion convert failed.";
|
||||
return nullptr;
|
||||
}
|
||||
activation = mul_op->activation_type();
|
||||
break;
|
||||
}
|
||||
default:
|
||||
MS_LOG(DEBUG) << "no activation need for: " << op_name_;
|
||||
}
|
||||
nvinfer1::ITensor *activation_out_tensor = nullptr;
|
||||
if (activation != schema::ActivationType::ActivationType_NO_ACTIVATION) {
|
||||
auto activation_layer = ActivationTensorRT::AddActivation(ctx, activation, 0, 0, 0, in_tensor, device_id_);
|
||||
if (activation_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "addActivation for element wise failed";
|
||||
return nullptr;
|
||||
}
|
||||
activation_layer->setName((op_name_ + "_activation").c_str());
|
||||
activation_out_tensor = activation_layer->getOutput(0);
|
||||
}
|
||||
return activation_out_tensor;
|
||||
}
|
||||
int ElementWiseTensorRT::AddConstTensor(TensorRTContext *ctx) {
|
||||
int const_tensor_index = (in_tensors_[0].Data() != nullptr && in_tensors_[0].IsConst()) ? 0 : 1;
|
||||
nvinfer1::ITensor *constant_input = ConvertConstantTensorWithDims(
|
||||
ctx, in_tensors_[const_tensor_index], in_tensors_[1 - const_tensor_index].Shape(), op_name_);
|
||||
CHECK_NULL_RETURN(constant_input);
|
||||
AddInnerInTensors(ITensorHelper{constant_input, tensorrt_in_tensors_[0].format_, true});
|
||||
return RET_OK;
|
||||
}
|
||||
bool ElementWiseTensorRT::SameTensor(nvinfer1::ITensor *trt_tensor, mindspore::MSTensor *ms_tensor) {
|
||||
if (SameDims(trt_tensor->getDimensions(), ms_tensor->Shape())) {
|
||||
return true;
|
||||
}
|
||||
if (ms_tensor->Shape().size() == DIMENSION_4D) {
|
||||
// nhwc nchw
|
||||
auto nchw_shape = NHWC2NCHW(ms_tensor->Shape());
|
||||
if (SameDims(trt_tensor->getDimensions(), nchw_shape)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
auto str_name = strstr(trt_tensor->getName(), ms_tensor->Name().c_str());
|
||||
if (str_name != nullptr) {
|
||||
return true;
|
||||
}
|
||||
str_name = strstr(ms_tensor->Name().c_str(), trt_tensor->getName());
|
||||
if (str_name != nullptr) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_SubFusion, ElementWiseTensorRT)
|
||||
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_DivFusion, ElementWiseTensorRT)
|
||||
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_RealDiv, ElementWiseTensorRT)
|
||||
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_PowFusion, ElementWiseTensorRT)
|
||||
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_AddFusion, ElementWiseTensorRT)
|
||||
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_MulFusion, ElementWiseTensorRT)
|
||||
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_Eltwise, ElementWiseTensorRT)
|
||||
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_Minimum, ElementWiseTensorRT)
|
||||
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_Maximum, ElementWiseTensorRT)
|
||||
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_BiasAdd, ElementWiseTensorRT)
|
||||
#if TRT_VERSION_GE(7, 2)
|
||||
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_Equal, ElementWiseTensorRT)
|
||||
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_Less, ElementWiseTensorRT)
|
||||
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_Greater, ElementWiseTensorRT)
|
||||
#endif
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,50 @@
|
|||
/**
|
||||
* Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_ELEMENTWISE_TENSORRT_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_ELEMENTWISE_TENSORRT_H_
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
class ElementWiseTensorRT : public TensorRTOp {
|
||||
public:
|
||||
ElementWiseTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
|
||||
const schema::QuantType &quant_type)
|
||||
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
|
||||
|
||||
~ElementWiseTensorRT() override = default;
|
||||
|
||||
int AddInnerOp(TensorRTContext *ctx) override;
|
||||
|
||||
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) override;
|
||||
|
||||
private:
|
||||
nvinfer1::ITensor *AddActivation(TensorRTContext *ctx, nvinfer1::ITensor *in_tensor);
|
||||
|
||||
int AddConstTensor(TensorRTContext *ctx);
|
||||
|
||||
bool SameTensor(nvinfer1::ITensor *trt_tensor, mindspore::MSTensor *ms_tensor);
|
||||
|
||||
int PreprocessInputTensors(TensorRTContext *ctx, ITensorHelper *x_input, ITensorHelper *y_input);
|
||||
|
||||
nvinfer1::ElementWiseOperation element_wise_op_;
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_ELEMENTWISE_TENSORRT_H_
|
|
@ -0,0 +1,96 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/delegate/tensorrt/op/equal_tensorrt.h"
|
||||
#include <numeric>
|
||||
#include <memory>
|
||||
#include <functional>
|
||||
#include "src/runtime/delegate/tensorrt/tensorrt_utils.h"
|
||||
#include "NvInferRuntimeCommon.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
REGISTER_TENSORRT_PLUGIN(EqualPluginCreater);
|
||||
template class TensorRTPluginCreater<EqualPlugin>;
|
||||
template <class T>
|
||||
nvinfer1::PluginFieldCollection TensorRTPluginCreater<T>::field_collection_{};
|
||||
template <class T>
|
||||
std::vector<nvinfer1::PluginField> TensorRTPluginCreater<T>::fields_;
|
||||
|
||||
int EqualTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) {
|
||||
if (!IsShapeKnown()) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors.size() != INPUT_SIZE2) {
|
||||
MS_LOG(ERROR) << "invalid input tensor size: " << in_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (out_tensors.size() != 1) {
|
||||
MS_LOG(ERROR) << "invalid output tensor size: " << out_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int EqualTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
||||
nvinfer1::ITensor *inputTensors[] = {tensorrt_in_tensors_[0].trt_tensor_, tensorrt_in_tensors_[1].trt_tensor_};
|
||||
auto plugin = std::make_shared<EqualPlugin>(op_name_, device_id_);
|
||||
nvinfer1::IPluginV2Layer *equal_layer = ctx->network()->addPluginV2(inputTensors, INPUT_SIZE2, *plugin);
|
||||
if (equal_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "create equal layer failed for: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
layer_ = equal_layer;
|
||||
nvinfer1::ITensor *equal_out = equal_layer->getOutput(0);
|
||||
equal_layer->setName(op_name_.c_str());
|
||||
equal_out->setName((op_name_ + "_output").c_str());
|
||||
this->AddInnerOutTensors(
|
||||
ITensorHelper{equal_out, tensorrt_in_tensors_[0].format_, tensorrt_in_tensors_[0].same_format_});
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int EqualPlugin::enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc,
|
||||
const void *const *inputs, void *const *outputs, void *workspace,
|
||||
cudaStream_t stream) noexcept {
|
||||
nvinfer1::Dims input_dims = inputDesc[0].dims;
|
||||
int element_cnt = std::accumulate(input_dims.d, input_dims.d + input_dims.nbDims, 1, std::multiplies<int64_t>());
|
||||
|
||||
if (inputDesc->type == nvinfer1::DataType::kINT32) {
|
||||
const int *input1 = static_cast<const int *>(inputs[0]);
|
||||
const int *input2 = static_cast<const int *>(inputs[1]);
|
||||
int *output = static_cast<int *>(outputs[0]);
|
||||
Equal(input1, input2, output, element_cnt, stream);
|
||||
} else if (inputDesc->type == nvinfer1::DataType::kFLOAT) {
|
||||
const float *input1 = static_cast<const float *>(inputs[0]);
|
||||
const float *input2 = static_cast<const float *>(inputs[1]);
|
||||
float *output = static_cast<float *>(outputs[0]);
|
||||
Equal(input1, input2, output, element_cnt, stream);
|
||||
} else {
|
||||
MS_LOG(ERROR) << "unsupported equal data type";
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
nvinfer1::IPluginV2DynamicExt *EqualPlugin::clone() const noexcept {
|
||||
auto *plugin = new EqualPlugin(*this);
|
||||
plugin->setPluginNamespace(name_space_.c_str());
|
||||
return plugin;
|
||||
}
|
||||
#if TRT_VERSION_LS(7, 2)
|
||||
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_Equal, EqualTensorRT)
|
||||
#endif
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,63 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_EQUAL_TENSORRT_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_EQUAL_TENSORRT_H_
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
|
||||
#include "src/runtime/delegate/tensorrt/op/tensorrt_plugin.h"
|
||||
#include "src/runtime/delegate/tensorrt/cuda_impl/equal.cuh"
|
||||
|
||||
namespace mindspore::lite {
|
||||
constexpr char *EQUAL_PLUGIN_NAME{"EqualPlugin"};
|
||||
class EqualTensorRT : public TensorRTOp {
|
||||
public:
|
||||
EqualTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
|
||||
const schema::QuantType &quant_type)
|
||||
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
|
||||
|
||||
~EqualTensorRT() override = default;
|
||||
|
||||
int AddInnerOp(TensorRTContext *ctx) override;
|
||||
|
||||
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) override;
|
||||
};
|
||||
|
||||
class EqualPlugin : public TensorRTPlugin {
|
||||
public:
|
||||
EqualPlugin(const std::string name, uint32_t device_id)
|
||||
: TensorRTPlugin(name, std::string(EQUAL_PLUGIN_NAME), device_id) {}
|
||||
|
||||
EqualPlugin(const char *name, const nvinfer1::PluginFieldCollection *fc)
|
||||
: TensorRTPlugin(std::string(name), std::string(EQUAL_PLUGIN_NAME)) {}
|
||||
|
||||
EqualPlugin(const char *name, const void *serialData, size_t serialLength)
|
||||
: TensorRTPlugin(std::string(name), std::string(EQUAL_PLUGIN_NAME)) {}
|
||||
|
||||
EqualPlugin() = delete;
|
||||
|
||||
nvinfer1::IPluginV2DynamicExt *clone() const noexcept override;
|
||||
int enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc,
|
||||
const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept override;
|
||||
};
|
||||
class EqualPluginCreater : public TensorRTPluginCreater<EqualPlugin> {
|
||||
public:
|
||||
EqualPluginCreater() : TensorRTPluginCreater(std::string(EQUAL_PLUGIN_NAME)) {}
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_EQUAL_TENSORRT_H_
|
|
@ -0,0 +1,106 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/delegate/tensorrt/op/fullyconnected_tensorrt.h"
|
||||
#include "src/runtime/delegate/tensorrt/tensorrt_utils.h"
|
||||
#include "src/runtime/delegate/tensorrt/op/activation_tensorrt.h"
|
||||
namespace mindspore::lite {
|
||||
constexpr int BIAS_INDEX = 2;
|
||||
int FullyConnectedTensorRT::IsSupport(const mindspore::schema::Primitive *primitive,
|
||||
const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) {
|
||||
if (!IsShapeKnown()) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors.size() != INPUT_SIZE2 && in_tensors.size() != INPUT_SIZE3) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int FullyConnectedTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
||||
auto primitive = op_primitive_->value_as_FullConnection();
|
||||
CHECK_NULL_RETURN(primitive);
|
||||
activation_ = primitive->activation_type();
|
||||
int axis = primitive->axis();
|
||||
if (axis < 0 || axis >= out_tensors_[0].Shape().size()) {
|
||||
MS_LOG(ERROR) << "axis: " << axis << " is invalid for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
ITensorHelper fc_input;
|
||||
auto ret = PreprocessInputs(ctx, &fc_input);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "PreprocessInputs failed for " << op_name_;
|
||||
return ret;
|
||||
}
|
||||
auto kernel_weight = ConvertWeight(in_tensors_[1].Data().get() == nullptr ? in_tensors_[0] : in_tensors_[1]);
|
||||
nvinfer1::Weights bias_weight{};
|
||||
if (primitive->has_bias()) {
|
||||
bias_weight = ConvertWeight(in_tensors_[BIAS_INDEX]);
|
||||
}
|
||||
nvinfer1::IFullyConnectedLayer *fc_layer = ctx->network()->addFullyConnected(
|
||||
*(fc_input.trt_tensor_), out_tensors_[0].Shape()[axis], kernel_weight, bias_weight);
|
||||
if (fc_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "addFullyConnected failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
this->layer_ = fc_layer;
|
||||
fc_layer->setName(op_name_.c_str());
|
||||
nvinfer1::ITensor *out_tensor = fc_layer->getOutput(0);
|
||||
|
||||
if (out_tensor->getDimensions().nbDims != out_tensors_[0].Shape().size()) {
|
||||
std::vector<int64_t> squeeze_dim(out_tensors_[0].Shape());
|
||||
squeeze_dim[0] = out_tensor->getDimensions().d[0] == -1 ? -1 : squeeze_dim[0];
|
||||
out_tensor = Reshape(ctx, out_tensor, squeeze_dim);
|
||||
}
|
||||
// add activation
|
||||
if (activation_ != schema::ActivationType::ActivationType_NO_ACTIVATION) {
|
||||
nvinfer1::ILayer *activation_layer =
|
||||
ActivationTensorRT::AddActivation(ctx, activation_, 0, 0, 0, out_tensor, device_id_);
|
||||
if (activation_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "addActivation for matmul failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
activation_layer->setName((op_name_ + "_activation").c_str());
|
||||
out_tensor = activation_layer->getOutput(0);
|
||||
}
|
||||
|
||||
out_tensor->setName((op_name_ + "_output").c_str());
|
||||
MS_LOG(DEBUG) << "output " << GetTensorFormat(out_tensor);
|
||||
this->AddInnerOutTensors(ITensorHelper{out_tensor, fc_input.format_});
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int FullyConnectedTensorRT::PreprocessInputs(TensorRTContext *ctx, ITensorHelper *fc_input) {
|
||||
auto ret = PreprocessInputs2SameDim(ctx, tensorrt_in_tensors_[0], fc_input);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "PreprocessInputs2SameDim failed for " << op_name_;
|
||||
return ret;
|
||||
}
|
||||
auto origin_dims = fc_input->trt_tensor_->getDimensions();
|
||||
if (origin_dims.nbDims != DIMENSION_4D) {
|
||||
std::vector<int64_t> expand_dim(origin_dims.d, origin_dims.d + origin_dims.nbDims);
|
||||
for (int i = 0; i < DIMENSION_4D - origin_dims.nbDims; i++) {
|
||||
expand_dim.push_back(1);
|
||||
}
|
||||
fc_input->trt_tensor_ = Reshape(ctx, fc_input->trt_tensor_, expand_dim);
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_FullConnection, FullyConnectedTensorRT)
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,45 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_FULLYCONNECTED_TENSORRT_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_FULLYCONNECTED_TENSORRT_H_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
class FullyConnectedTensorRT : public TensorRTOp {
|
||||
public:
|
||||
FullyConnectedTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
|
||||
const schema::QuantType &quant_type)
|
||||
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
|
||||
|
||||
~FullyConnectedTensorRT() override = default;
|
||||
|
||||
int AddInnerOp(TensorRTContext *ctx) override;
|
||||
|
||||
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) override;
|
||||
|
||||
private:
|
||||
int PreprocessInputs(TensorRTContext *ctx, ITensorHelper *fc_input);
|
||||
|
||||
schema::ActivationType activation_{schema::ActivationType::ActivationType_NO_ACTIVATION};
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_FULLYCONNECTED_TENSORRT_H_
|
|
@ -0,0 +1,139 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/delegate/tensorrt/op/gather_d_tensorrt.h"
|
||||
#include <cuda_runtime.h>
|
||||
#include <numeric>
|
||||
#include <memory>
|
||||
#include <functional>
|
||||
#include "src/runtime/delegate/tensorrt/tensorrt_utils.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
REGISTER_TENSORRT_PLUGIN(GatherDPluginCreater);
|
||||
template class TensorRTPluginCreater<GatherDPlugin>;
|
||||
template <class T>
|
||||
nvinfer1::PluginFieldCollection TensorRTPluginCreater<T>::field_collection_{};
|
||||
template <class T>
|
||||
std::vector<nvinfer1::PluginField> TensorRTPluginCreater<T>::fields_;
|
||||
|
||||
int GatherDTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) {
|
||||
if (!IsShapeKnown()) {
|
||||
MS_LOG(ERROR) << "Unsupported gatherd input tensor unknown shape: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors.size() != INPUT_SIZE3) {
|
||||
MS_LOG(ERROR) << "invalid gatherd input tensor size: " << in_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (out_tensors.size() != 1) {
|
||||
MS_LOG(ERROR) << "invalid gatherd output tensor size: " << out_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int GatherDTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
||||
nvinfer1::ITensor *inputTensors[] = {tensorrt_in_tensors_[0].trt_tensor_, tensorrt_in_tensors_[2].trt_tensor_};
|
||||
auto dim_tensor = static_cast<const int *>(in_tensors_[1].Data().get());
|
||||
if (dim_tensor == nullptr) {
|
||||
MS_LOG(ERROR) << op_name_ << " gatherd dim_tensor is null!";
|
||||
return RET_ERROR;
|
||||
}
|
||||
size_t dim = static_cast<size_t>(dim_tensor[0]);
|
||||
|
||||
auto plugin = std::make_shared<GatherDPlugin>(op_name_, dim, device_id_);
|
||||
nvinfer1::IPluginV2Layer *gatherd_layer = ctx->network()->addPluginV2(inputTensors, INPUT_SIZE2, *plugin);
|
||||
if (gatherd_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "create gatherd failed for: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
nvinfer1::ITensor *gatherd_out = gatherd_layer->getOutput(0);
|
||||
gatherd_layer->setName(op_name_.c_str());
|
||||
gatherd_out->setName((op_name_ + "_output").c_str());
|
||||
this->AddInnerOutTensors(
|
||||
ITensorHelper{gatherd_out, tensorrt_in_tensors_[0].format_, tensorrt_in_tensors_[0].same_format_});
|
||||
this->layer_ = gatherd_layer;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int GatherDPlugin::enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc,
|
||||
const void *const *inputs, void *const *outputs, void *workspace,
|
||||
cudaStream_t stream) noexcept {
|
||||
nvinfer1::Dims input_dims = inputDesc[0].dims;
|
||||
int dims = input_dims.nbDims;
|
||||
if (axis_ < 0) {
|
||||
axis_ += dims;
|
||||
}
|
||||
|
||||
if (inputDesc->type == nvinfer1::DataType::kINT32) {
|
||||
auto input = static_cast<const int *>(inputs[0]);
|
||||
auto index = static_cast<const int *>(inputs[1]);
|
||||
auto output = static_cast<int *>(outputs[0]);
|
||||
Reshape(inputDesc, outputDesc);
|
||||
Gather<int, int>(input, index, output, dim_before_axis_, dim_at_axis_input_, dim_at_axis_output_, dim_after_axis_,
|
||||
stream, device_id_);
|
||||
} else if (inputDesc->type == nvinfer1::DataType::kFLOAT) {
|
||||
auto input = static_cast<const float *>(inputs[0]);
|
||||
auto index = static_cast<const int *>(inputs[1]);
|
||||
auto output = static_cast<float *>(outputs[0]);
|
||||
Reshape(inputDesc, outputDesc);
|
||||
Gather<float, int>(input, index, output, dim_before_axis_, dim_at_axis_input_, dim_at_axis_output_, dim_after_axis_,
|
||||
stream, device_id_);
|
||||
} else {
|
||||
MS_LOG(ERROR) << "unsupported data type gatherd" << layer_name_;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
nvinfer1::IPluginV2DynamicExt *GatherDPlugin::clone() const noexcept {
|
||||
auto *plugin = new GatherDPlugin(*this);
|
||||
plugin->setPluginNamespace(name_space_.c_str());
|
||||
return plugin;
|
||||
}
|
||||
|
||||
nvinfer1::DimsExprs GatherDPlugin::getOutputDimensions(int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs,
|
||||
nvinfer1::IExprBuilder &exprBuilder) noexcept {
|
||||
nvinfer1::DimsExprs out_dims{};
|
||||
out_dims.nbDims = inputs[1].nbDims;
|
||||
for (int i = 0; i < inputs[1].nbDims; i++) {
|
||||
out_dims.d[i] = inputs[1].d[i];
|
||||
}
|
||||
return out_dims;
|
||||
}
|
||||
|
||||
void GatherDPlugin::Reshape(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc) {
|
||||
nvinfer1::Dims input_dims = inputDesc[0].dims;
|
||||
nvinfer1::Dims output_dims = outputDesc[0].dims;
|
||||
size_t dim_before_axis = 1;
|
||||
for (size_t i = 0; i < IntToSize(axis_); i++) {
|
||||
dim_before_axis *= output_dims.d[i];
|
||||
}
|
||||
size_t dim_at_axis_input = input_dims.d[IntToSize(axis_)];
|
||||
size_t dim_at_axis_output = output_dims.d[IntToSize(axis_)];
|
||||
size_t dim_after_axis = 1;
|
||||
for (size_t i = IntToSize(axis_) + 1; i < output_dims.nbDims; i++) {
|
||||
dim_after_axis *= output_dims.d[i];
|
||||
}
|
||||
|
||||
dim_before_axis_ = dim_before_axis;
|
||||
dim_at_axis_input_ = dim_at_axis_input;
|
||||
dim_at_axis_output_ = dim_at_axis_output;
|
||||
dim_after_axis_ = dim_after_axis;
|
||||
return;
|
||||
}
|
||||
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_GatherD, GatherDTensorRT)
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,80 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_GATHER_D_TENSORRT_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_GATHER_D_TENSORRT_H_
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
|
||||
#include "src/runtime/delegate/tensorrt/op/tensorrt_plugin.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/gather.cuh"
|
||||
#include "src/runtime/delegate/tensorrt/tensorrt_utils.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
constexpr char *GATHER_D_PLUGIN_NAME{"GatherDPluginCreater"};
|
||||
class GatherDTensorRT : public TensorRTOp {
|
||||
public:
|
||||
GatherDTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
|
||||
const schema::QuantType &quant_type)
|
||||
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
|
||||
|
||||
~GatherDTensorRT() override = default;
|
||||
|
||||
int AddInnerOp(TensorRTContext *ctx) override;
|
||||
|
||||
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) override;
|
||||
};
|
||||
|
||||
class GatherDPlugin : public TensorRTPlugin {
|
||||
public:
|
||||
GatherDPlugin(const std::string name, size_t dim, uint32_t device_id)
|
||||
: TensorRTPlugin(name, std::string(GATHER_D_PLUGIN_NAME), device_id), axis_(dim) {}
|
||||
|
||||
GatherDPlugin(const char *name, const nvinfer1::PluginFieldCollection *fc)
|
||||
: TensorRTPlugin(std::string(name), std::string(GATHER_D_PLUGIN_NAME)) {
|
||||
const nvinfer1::PluginField *fields = fc->fields;
|
||||
axis_ = static_cast<const int *>(fields[0].data)[0];
|
||||
}
|
||||
|
||||
GatherDPlugin(const char *name, const void *serialData, size_t serialLength)
|
||||
: TensorRTPlugin(std::string(name), std::string(GATHER_D_PLUGIN_NAME)) {
|
||||
DeserializeValue(&serialData, &serialLength, &axis_, sizeof(int));
|
||||
}
|
||||
|
||||
GatherDPlugin() = delete;
|
||||
|
||||
nvinfer1::IPluginV2DynamicExt *clone() const noexcept override;
|
||||
nvinfer1::DimsExprs getOutputDimensions(int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs,
|
||||
nvinfer1::IExprBuilder &exprBuilder) noexcept override;
|
||||
int enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc,
|
||||
const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept override;
|
||||
|
||||
private:
|
||||
int axis_;
|
||||
size_t dim_before_axis_;
|
||||
size_t dim_at_axis_input_;
|
||||
size_t dim_at_axis_output_;
|
||||
size_t dim_after_axis_;
|
||||
void Reshape(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc);
|
||||
};
|
||||
|
||||
class GatherDPluginCreater : public TensorRTPluginCreater<GatherDPlugin> {
|
||||
public:
|
||||
GatherDPluginCreater() : TensorRTPluginCreater(std::string(GATHER_D_PLUGIN_NAME)) {}
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_GATHER_D_TENSORRT_H_
|
|
@ -0,0 +1,108 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/delegate/tensorrt/op/gather_tensorrt.h"
|
||||
#include "src/runtime/delegate/tensorrt/tensorrt_utils.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
constexpr int AXIS_INDEX = 2;
|
||||
|
||||
int GatherTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) {
|
||||
if (!IsShapeKnown()) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors.size() != INPUT_SIZE3) {
|
||||
MS_LOG(ERROR) << "invalid input tensor size: " << in_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (out_tensors.size() != 1) {
|
||||
MS_LOG(ERROR) << "invalid output tensor size: " << out_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors[1].DataType() != DataType::kNumberTypeInt32) {
|
||||
MS_LOG(ERROR) << "Gather indices only support Int32";
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors[AXIS_INDEX].ElementNum() == 1) {
|
||||
MS_ASSERT(in_tensors[AXIS_INDEX].Data().get());
|
||||
axis_ = static_cast<const int *>(in_tensors[AXIS_INDEX].Data().get())[0];
|
||||
} else {
|
||||
MS_LOG(ERROR) << "TensorRT axis is attribute.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int GatherTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
||||
if (ctx == nullptr || ctx->network() == nullptr) {
|
||||
MS_LOG(ERROR) << "context or network is invalid";
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (tensorrt_in_tensors_.size() < INPUT_SIZE2 && in_tensors_.size() >= INPUT_SIZE2) {
|
||||
int const_ms_tensor_index = in_tensors_[0].IsConst() ? 0 : 1;
|
||||
auto const_input = ConvertConstantTensor(ctx, in_tensors_[const_ms_tensor_index], op_name_);
|
||||
if (const_input == nullptr) {
|
||||
MS_LOG(ERROR) << "add const input tensor failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
tensorrt_in_tensors_.push_back(ITensorHelper{const_input});
|
||||
}
|
||||
|
||||
int indices_tensor_index = tensorrt_in_tensors_[0].trt_tensor_->getType() == nvinfer1::DataType::kINT32 ? 0 : 1;
|
||||
ITensorHelper gather_input;
|
||||
int ret = PreprocessInputs2SameDim(ctx, tensorrt_in_tensors_[1 - indices_tensor_index], &gather_input);
|
||||
if (ret != RET_OK || gather_input.trt_tensor_ == nullptr) {
|
||||
MS_LOG(ERROR) << "PreprocessInputs2SameDim gather failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
ITensorHelper indices_tensor;
|
||||
ret = PreprocessInputs2SameDim(ctx, tensorrt_in_tensors_[indices_tensor_index], &indices_tensor);
|
||||
if (ret != RET_OK || indices_tensor.trt_tensor_ == nullptr) {
|
||||
MS_LOG(ERROR) << "PreprocessInputs2SameDim indices failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
nvinfer1::IGatherLayer *gather_layer =
|
||||
ctx->network()->addGather(*gather_input.trt_tensor_, *indices_tensor.trt_tensor_, axis_);
|
||||
if (gather_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "addGather failed for TensorRT.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
this->layer_ = gather_layer;
|
||||
gather_layer->setName(op_name_.c_str());
|
||||
nvinfer1::ITensor *op_output = gather_layer->getOutput(0);
|
||||
// keep shape
|
||||
if (in_tensors_[1].Shape().empty()) {
|
||||
auto squeeze = ctx->network()->addShuffle(*op_output);
|
||||
if (squeeze == nullptr) {
|
||||
MS_LOG(ERROR) << "add output squeeze failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
squeeze->setName((op_name_ + "_squeeze_out").c_str());
|
||||
auto old_shape = ConvertMSShape(op_output->getDimensions());
|
||||
old_shape.erase(old_shape.begin() + axis_);
|
||||
squeeze->setReshapeDimensions(ConvertCudaDims(old_shape));
|
||||
op_output = squeeze->getOutput(0);
|
||||
}
|
||||
op_output->setName((op_name_ + "_output").c_str());
|
||||
this->AddInnerOutTensors(ITensorHelper{op_output, gather_input.format_, gather_input.same_format_});
|
||||
return RET_OK;
|
||||
}
|
||||
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_Gather, GatherTensorRT)
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,42 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_GATHER_TENSORRT_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_GATHER_TENSORRT_H_
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
class GatherTensorRT : public TensorRTOp {
|
||||
public:
|
||||
GatherTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
|
||||
const schema::QuantType &quant_type)
|
||||
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
|
||||
|
||||
~GatherTensorRT() override = default;
|
||||
|
||||
int AddInnerOp(TensorRTContext *ctx) override;
|
||||
|
||||
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) override;
|
||||
|
||||
private:
|
||||
int axis_{0};
|
||||
mindspore::MSTensor indices_;
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_GATHER_TENSORRT_H_
|
|
@ -0,0 +1,119 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <numeric>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <functional>
|
||||
#include <unordered_map>
|
||||
#include "src/runtime/delegate/tensorrt/tensorrt_utils.h"
|
||||
#include "NvInferRuntimeCommon.h"
|
||||
#include "src/runtime/delegate/tensorrt/op/logical_not_tensorrt.h"
|
||||
#include "src/runtime/delegate/tensorrt/cuda_impl/logical.cuh"
|
||||
|
||||
namespace mindspore::lite {
|
||||
int LogicalNotTensorRT::IsSupport(const schema::Primitive *primitive,
|
||||
const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) {
|
||||
if (!IsShapeKnown()) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors.size() != 1) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
|
||||
}
|
||||
if (out_tensors.size() != 1) {
|
||||
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int LogicalNotTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
||||
if (ctx == nullptr || ctx->network() == nullptr || this->tensorrt_in_tensors_.size() != 1) {
|
||||
MS_LOG(ERROR) << "network or input tensor is invalid";
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (tensorrt_in_tensors_[0].trt_tensor_->getType() != nvinfer1::DataType::kINT32) {
|
||||
auto cast_layer = ctx->network()->addIdentity(*tensorrt_in_tensors_[0].trt_tensor_);
|
||||
if (cast_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "create cast layer failed for: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
cast_layer->setOutputType(0, nvinfer1::DataType::kINT32);
|
||||
tensorrt_in_tensors_[0].trt_tensor_ = cast_layer->getOutput(0);
|
||||
}
|
||||
auto plugin = std::make_shared<LogicalNotPlugin>(op_name_, op_primitive_->value_type());
|
||||
if (plugin == nullptr) {
|
||||
MS_LOG(ERROR) << "create ActivationOptPlugin failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
nvinfer1::ITensor *inputTensors[] = {tensorrt_in_tensors_[0].trt_tensor_};
|
||||
nvinfer1::IPluginV2Layer *logical_layer = ctx->network()->addPluginV2(inputTensors, 1, *plugin);
|
||||
this->layer_ = logical_layer;
|
||||
nvinfer1::ITensor *op_out_tensor = logical_layer->getOutput(0);
|
||||
if (op_out_tensor == nullptr) {
|
||||
MS_LOG(ERROR) << "addElementWise out tensor is nullptr.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
op_out_tensor->setName((op_name_ + "_output").c_str());
|
||||
this->AddInnerOutTensors(
|
||||
ITensorHelper{op_out_tensor, tensorrt_in_tensors_[0].format_, tensorrt_in_tensors_[0].same_format_});
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
REGISTER_TENSORRT_PLUGIN(LogicalNotPluginCreater);
|
||||
template class TensorRTPluginCreater<LogicalNotPlugin>;
|
||||
template <class T>
|
||||
nvinfer1::PluginFieldCollection TensorRTPluginCreater<T>::field_collection_{};
|
||||
template <class T>
|
||||
std::vector<nvinfer1::PluginField> TensorRTPluginCreater<T>::fields_;
|
||||
|
||||
int LogicalNotPlugin::enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc,
|
||||
const void *const *inputs, void *const *outputs, void *workspace,
|
||||
cudaStream_t stream) noexcept {
|
||||
return RunCudaLogical(inputDesc, inputs, outputs, stream);
|
||||
}
|
||||
|
||||
int LogicalNotPlugin::RunCudaLogical(const nvinfer1::PluginTensorDesc *inputDesc, const void *const *inputs,
|
||||
void *const *outputs, cudaStream_t stream) {
|
||||
switch (primitive_type_) {
|
||||
case (schema::PrimitiveType_LogicalNot): {
|
||||
LogicalNot(static_cast<const int *>(inputs[0]), static_cast<int *>(outputs[0]), GetDimsVolume(inputDesc[0].dims),
|
||||
stream);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
MS_LOG(ERROR) << "invalid logical type: " << static_cast<int>(primitive_type_);
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
nvinfer1::IPluginV2DynamicExt *LogicalNotPlugin::clone() const noexcept {
|
||||
auto *plugin = new LogicalNotPlugin(*this);
|
||||
plugin->setPluginNamespace(name_space_.c_str());
|
||||
return plugin;
|
||||
}
|
||||
|
||||
size_t LogicalNotPlugin::getSerializationSize() const noexcept { return sizeof(schema::PrimitiveType); }
|
||||
|
||||
void LogicalNotPlugin::serialize(void *buffer) const noexcept {
|
||||
SerializeValue(&buffer, &primitive_type_, sizeof(schema::PrimitiveType));
|
||||
}
|
||||
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_LogicalNot, LogicalNotTensorRT)
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,78 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_LOGICAL_NOT_TENSORRT_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_LOGICAL_NOT_TENSORRT_H_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "src/runtime/delegate/tensorrt/op/tensorrt_plugin.h"
|
||||
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
class LogicalNotTensorRT : public TensorRTOp {
|
||||
public:
|
||||
LogicalNotTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
|
||||
const schema::QuantType &quant_type)
|
||||
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
|
||||
|
||||
~LogicalNotTensorRT() override = default;
|
||||
|
||||
int AddInnerOp(TensorRTContext *ctx) override;
|
||||
|
||||
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) override;
|
||||
};
|
||||
|
||||
constexpr char *LOGICAL_NOT_PLUGIN_NAME{"LogicalNotPlugin"};
|
||||
class LogicalNotPlugin : public TensorRTPlugin {
|
||||
public:
|
||||
LogicalNotPlugin(const std::string name, schema::PrimitiveType primitive_type)
|
||||
: TensorRTPlugin(name, std::string(LOGICAL_NOT_PLUGIN_NAME)), primitive_type_(primitive_type) {}
|
||||
|
||||
LogicalNotPlugin(const char *name, const nvinfer1::PluginFieldCollection *fc)
|
||||
: TensorRTPlugin(std::string(name), std::string(LOGICAL_NOT_PLUGIN_NAME)) {
|
||||
const nvinfer1::PluginField *fields = fc->fields;
|
||||
primitive_type_ = static_cast<const schema::PrimitiveType *>(fields[0].data)[0];
|
||||
}
|
||||
|
||||
LogicalNotPlugin(const char *name, const void *serialData, size_t serialLength)
|
||||
: TensorRTPlugin(std::string(name), std::string(LOGICAL_NOT_PLUGIN_NAME)) {
|
||||
DeserializeValue(&serialData, &serialLength, &primitive_type_, sizeof(schema::PrimitiveType));
|
||||
}
|
||||
|
||||
LogicalNotPlugin() = delete;
|
||||
|
||||
nvinfer1::IPluginV2DynamicExt *clone() const noexcept override;
|
||||
int enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc,
|
||||
const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept override;
|
||||
size_t getSerializationSize() const noexcept override;
|
||||
void serialize(void *buffer) const noexcept override;
|
||||
|
||||
private:
|
||||
int RunCudaLogical(const nvinfer1::PluginTensorDesc *inputDesc, const void *const *inputs, void *const *outputs,
|
||||
cudaStream_t stream);
|
||||
const std::string layer_name_;
|
||||
std::string name_space_;
|
||||
schema::PrimitiveType primitive_type_;
|
||||
};
|
||||
class LogicalNotPluginCreater : public TensorRTPluginCreater<LogicalNotPlugin> {
|
||||
public:
|
||||
LogicalNotPluginCreater() : TensorRTPluginCreater(std::string(LOGICAL_NOT_PLUGIN_NAME)) {}
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_LOGICAL_NOT_TENSORRT_H_
|
|
@ -0,0 +1,129 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <numeric>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <functional>
|
||||
#include <unordered_map>
|
||||
#include "src/runtime/delegate/tensorrt/tensorrt_utils.h"
|
||||
#include "NvInferRuntimeCommon.h"
|
||||
#include "src/runtime/delegate/tensorrt/op/logical_tensorrt.h"
|
||||
#include "src/runtime/delegate/tensorrt/cuda_impl/logical.cuh"
|
||||
|
||||
namespace mindspore::lite {
|
||||
int LogicalTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) {
|
||||
if (!IsShapeKnown()) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors.size() != INPUT_SIZE2) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (out_tensors.size() != 1) {
|
||||
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int LogicalTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
||||
if (ctx == nullptr || ctx->network() == nullptr) {
|
||||
MS_LOG(ERROR) << "network or input tensor is invalid";
|
||||
return RET_ERROR;
|
||||
}
|
||||
for (int i = 0; i != tensorrt_in_tensors_.size(); ++i) {
|
||||
if (tensorrt_in_tensors_[i].trt_tensor_->getType() != nvinfer1::DataType::kINT32) {
|
||||
auto cast_layer = ctx->network()->addIdentity(*tensorrt_in_tensors_[0].trt_tensor_);
|
||||
if (cast_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "create cast layer failed for: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
cast_layer->setOutputType(0, nvinfer1::DataType::kINT32);
|
||||
tensorrt_in_tensors_[0].trt_tensor_ = cast_layer->getOutput(0);
|
||||
}
|
||||
}
|
||||
auto plugin = std::make_shared<LogicalPlugin>(op_name_, op_primitive_->value_type());
|
||||
if (plugin == nullptr) {
|
||||
MS_LOG(ERROR) << "create ActivationOptPlugin failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
nvinfer1::ITensor *inputTensors[] = {tensorrt_in_tensors_[0].trt_tensor_, tensorrt_in_tensors_[1].trt_tensor_};
|
||||
nvinfer1::IPluginV2Layer *logical_layer = ctx->network()->addPluginV2(inputTensors, 2, *plugin);
|
||||
this->layer_ = logical_layer;
|
||||
nvinfer1::ITensor *op_out_tensor = logical_layer->getOutput(0);
|
||||
if (op_out_tensor == nullptr) {
|
||||
MS_LOG(ERROR) << "addElementWise out tensor is nullptr.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
op_out_tensor->setName((op_name_ + "_output").c_str());
|
||||
this->AddInnerOutTensors(
|
||||
ITensorHelper{op_out_tensor, tensorrt_in_tensors_[0].format_, tensorrt_in_tensors_[0].same_format_});
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
REGISTER_TENSORRT_PLUGIN(LogicalPluginCreater);
|
||||
template class TensorRTPluginCreater<LogicalPlugin>;
|
||||
template <class T>
|
||||
nvinfer1::PluginFieldCollection TensorRTPluginCreater<T>::field_collection_{};
|
||||
template <class T>
|
||||
std::vector<nvinfer1::PluginField> TensorRTPluginCreater<T>::fields_;
|
||||
|
||||
int LogicalPlugin::enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc,
|
||||
const void *const *inputs, void *const *outputs, void *workspace,
|
||||
cudaStream_t stream) noexcept {
|
||||
return RunCudaLogical(inputDesc, inputs, outputs, stream);
|
||||
}
|
||||
|
||||
int LogicalPlugin::RunCudaLogical(const nvinfer1::PluginTensorDesc *inputDesc, const void *const *inputs,
|
||||
void *const *outputs, cudaStream_t stream) {
|
||||
switch (primitive_type_) {
|
||||
case (schema::PrimitiveType_LogicalAnd): {
|
||||
LogicalAnd(static_cast<const int *>(inputs[0]), static_cast<const int *>(inputs[1]),
|
||||
static_cast<int *>(outputs[0]), GetDimsVolume(inputDesc[0].dims), stream);
|
||||
break;
|
||||
}
|
||||
case (schema::PrimitiveType_LogicalOr): {
|
||||
LogicalOr(static_cast<const int *>(inputs[0]), static_cast<const int *>(inputs[1]),
|
||||
static_cast<int *>(outputs[0]), GetDimsVolume(inputDesc[0].dims), stream);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
MS_LOG(ERROR) << "invalid logical type: " << static_cast<int>(primitive_type_);
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
nvinfer1::IPluginV2DynamicExt *LogicalPlugin::clone() const noexcept {
|
||||
auto *plugin = new LogicalPlugin(*this);
|
||||
plugin->setPluginNamespace(name_space_.c_str());
|
||||
return plugin;
|
||||
}
|
||||
|
||||
size_t LogicalPlugin::getSerializationSize() const noexcept { return sizeof(schema::PrimitiveType); }
|
||||
|
||||
void LogicalPlugin::serialize(void *buffer) const noexcept {
|
||||
SerializeValue(&buffer, &primitive_type_, sizeof(schema::PrimitiveType));
|
||||
}
|
||||
|
||||
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_LogicalOr, LogicalTensorRT)
|
||||
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_LogicalAnd, LogicalTensorRT)
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,78 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_LOGICAL_PLUGIN_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_LOGICAL_PLUGIN_H_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
|
||||
#include "src/runtime/delegate/tensorrt/op/tensorrt_plugin.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
class LogicalTensorRT : public TensorRTOp {
|
||||
public:
|
||||
LogicalTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
|
||||
const schema::QuantType &quant_type)
|
||||
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
|
||||
|
||||
~LogicalTensorRT() override = default;
|
||||
|
||||
int AddInnerOp(TensorRTContext *ctx) override;
|
||||
|
||||
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) override;
|
||||
};
|
||||
|
||||
constexpr char *LOGICAL_PLUGIN_NAME{"LogicalPlugin"};
|
||||
class LogicalPlugin : public TensorRTPlugin {
|
||||
public:
|
||||
LogicalPlugin(const std::string name, schema::PrimitiveType primitive_type)
|
||||
: TensorRTPlugin(name, std::string(LOGICAL_PLUGIN_NAME)), primitive_type_(primitive_type) {}
|
||||
|
||||
LogicalPlugin(const char *name, const nvinfer1::PluginFieldCollection *fc)
|
||||
: TensorRTPlugin(std::string(name), std::string(LOGICAL_PLUGIN_NAME)) {
|
||||
const nvinfer1::PluginField *fields = fc->fields;
|
||||
primitive_type_ = static_cast<const schema::PrimitiveType *>(fields[0].data)[0];
|
||||
}
|
||||
|
||||
LogicalPlugin(const char *name, const void *serialData, size_t serialLength)
|
||||
: TensorRTPlugin(std::string(name), std::string(LOGICAL_PLUGIN_NAME)) {
|
||||
DeserializeValue(&serialData, &serialLength, &primitive_type_, sizeof(schema::PrimitiveType));
|
||||
}
|
||||
|
||||
LogicalPlugin() = delete;
|
||||
|
||||
nvinfer1::IPluginV2DynamicExt *clone() const noexcept override;
|
||||
int enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc,
|
||||
const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept override;
|
||||
size_t getSerializationSize() const noexcept override;
|
||||
void serialize(void *buffer) const noexcept override;
|
||||
|
||||
private:
|
||||
int RunCudaLogical(const nvinfer1::PluginTensorDesc *inputDesc, const void *const *inputs, void *const *outputs,
|
||||
cudaStream_t stream);
|
||||
const std::string layer_name_;
|
||||
std::string name_space_;
|
||||
schema::PrimitiveType primitive_type_;
|
||||
};
|
||||
class LogicalPluginCreater : public TensorRTPluginCreater<LogicalPlugin> {
|
||||
public:
|
||||
LogicalPluginCreater() : TensorRTPluginCreater(std::string(LOGICAL_PLUGIN_NAME)) {}
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_LOGICAL_PLUGIN_H_
|
|
@ -0,0 +1,493 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/delegate/tensorrt/op/lstm_tensorrt.h"
|
||||
#include "src/runtime/delegate/tensorrt/tensorrt_runtime.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
int LSTMTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) {
|
||||
#if TRT_VERSION_GE(7, 0)
|
||||
if (!IsShapeKnown()) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors.size() < INPUT_TENSOR_SIZE) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (out_tensors.size() != OUTPUT_TENSOR_SIZE) {
|
||||
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
mindspore::MSTensor &hidden_in_init = in_tensors_[HIDDEN_IN_TENSOR_INIT];
|
||||
hidden_init_name_ = hidden_in_init.Name() + "_hidden_init";
|
||||
mindspore::MSTensor &cell_in_init = in_tensors_[CELL_IN_TENSOR_INIT];
|
||||
cell_init_name_ = cell_in_init.Name() + "_cell_init";
|
||||
|
||||
dynamic_shape_params_.support_dynamic_ = false;
|
||||
dynamic_shape_params_.support_hw_dynamic_ = false;
|
||||
return RET_OK;
|
||||
#else
|
||||
MS_LOG(WARNING) << "low TensorRT version don't support LSTM op, please upgrade TensorRT version to 7 or higher";
|
||||
return RET_ERROR;
|
||||
#endif
|
||||
}
|
||||
|
||||
int LSTMTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
||||
if (ctx == nullptr || ctx->network() == nullptr) {
|
||||
MS_LOG(ERROR) << "context or network is invalid";
|
||||
return RET_ERROR;
|
||||
}
|
||||
int input_data_dims_cnt = tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims;
|
||||
if (input_data_dims_cnt != DIMENSION_3D) {
|
||||
MS_LOG(ERROR) << "invalid input data shape dims for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
network_ = ctx->network();
|
||||
int ret = PreProcess();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "PreProcess for " << op_name_;
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = AddLSTMLayers();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "AddLSTMLayers for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (op_data_out_ == nullptr) {
|
||||
MS_LOG(ERROR) << "layers final output tensor is invalid for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
op_data_out_->setName((op_name_ + "_output").c_str());
|
||||
MS_LOG(DEBUG) << "lstm op_data_out_ " << GetTensorFormat(op_data_out_);
|
||||
MS_LOG(DEBUG) << "lstm op_hidden_out_ " << GetTensorFormat(op_hidden_out_);
|
||||
MS_LOG(DEBUG) << "lstm op_cell_out_ " << GetTensorFormat(op_cell_out_);
|
||||
this->AddInnerOutTensors(ITensorHelper{op_data_out_});
|
||||
this->AddInnerOutTensors(ITensorHelper{op_hidden_out_});
|
||||
this->AddInnerOutTensors(ITensorHelper{op_cell_out_});
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int LSTMTensorRT::PreProcess() {
|
||||
auto ms_input_shape = in_tensors_[0].Shape();
|
||||
params_.sequence_size_ = ms_input_shape[0];
|
||||
params_.batch_size_ = ms_input_shape[1];
|
||||
params_.input_data_size_ = ms_input_shape[INPUT_SIZE_INDEX];
|
||||
if (params_.batch_size_ != 1) {
|
||||
MS_LOG(WARNING) << op_name_ << " lstm has batchsize " << params_.batch_size_ << ", needs further verify";
|
||||
}
|
||||
// ms: 0 sequence size, 1 batch size, 2 input size -> tensorrt: 0 batch size, 1 sequence size, 2 input size
|
||||
auto transpose_in_layer = network_->addShuffle(*tensorrt_in_tensors_[0].trt_tensor_);
|
||||
if (transpose_in_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "create transpose_in_layer failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
nvinfer1::Permutation transpose_perm{{1, 0, INPUT_SIZE_INDEX}};
|
||||
transpose_in_layer->setFirstTranspose(transpose_perm);
|
||||
transpose_in_layer->setName((op_name_ + "transpose_in").c_str());
|
||||
input_data_ = transpose_in_layer->getOutput(0);
|
||||
MS_LOG(DEBUG) << "lstm input " << GetTensorFormat(input_data_);
|
||||
|
||||
auto lstm_op = op_primitive_->value_as_LSTM();
|
||||
params_.layer_count_ = lstm_op->num_layers() == 0 ? 1 : lstm_op->num_layers();
|
||||
params_.hidden_size_ = lstm_op->hidden_size();
|
||||
params_.directional_cnt_ = lstm_op->bidirectional() ? BIDIRECTIONAL : 1;
|
||||
params_.data_type_ = ConvertDataType(in_tensors_[1].DataType());
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int LSTMTensorRT::AddLSTMLayers() {
|
||||
mindspore::MSTensor &hidden_in_init = in_tensors_[HIDDEN_IN_TENSOR_INIT];
|
||||
mindspore::MSTensor &cell_in_init = in_tensors_[CELL_IN_TENSOR_INIT];
|
||||
|
||||
nvinfer1::ITensor *data_out{nullptr};
|
||||
nvinfer1::ITensor *hidden_init = network_->addInput(
|
||||
hidden_init_name_.c_str(), nvinfer1::DataType::kFLOAT,
|
||||
nvinfer1::Dims3(params_.layer_count_ * params_.directional_cnt_, params_.batch_size_, params_.hidden_size_));
|
||||
if (hidden_init == nullptr) {
|
||||
MS_LOG(ERROR) << "add hidden_init input tensor failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
op_binding_tensor_.push_back(BindingHelper{hidden_init_name_, hidden_in_init.MutableData(),
|
||||
nvinfer1::DataType::kFLOAT, hidden_in_init.DataSize()});
|
||||
nvinfer1::ITensor *cell_init = network_->addInput(
|
||||
cell_init_name_.c_str(), nvinfer1::DataType::kFLOAT,
|
||||
nvinfer1::Dims3(params_.layer_count_ * params_.directional_cnt_, params_.batch_size_, params_.hidden_size_));
|
||||
if (cell_init == nullptr) {
|
||||
MS_LOG(ERROR) << "add cell_init input tensor failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
op_binding_tensor_.push_back(
|
||||
BindingHelper{cell_init_name_, cell_in_init.MutableData(), nvinfer1::DataType::kFLOAT, cell_in_init.DataSize()});
|
||||
|
||||
sequence_size_input_ =
|
||||
network_->addInput((op_name_ + "_seq_input").c_str(), nvinfer1::DataType::kINT32, nvinfer1::Dims{});
|
||||
if (sequence_size_input_ == nullptr) {
|
||||
MS_LOG(ERROR) << "add sequence_size_input_ input tensor failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
op_binding_tensor_.push_back(
|
||||
BindingHelper{(op_name_ + "_seq_input"), ¶ms_.sequence_size_, nvinfer1::DataType::kINT32, sizeof(int)});
|
||||
|
||||
nvinfer1::ITensor *max_sequence_size =
|
||||
network_->addConstant(nvinfer1::Dims{}, nvinfer1::Weights{nvinfer1::DataType::kINT32, ¶ms_.sequence_size_, 1})
|
||||
->getOutput(0);
|
||||
if (max_sequence_size == nullptr) {
|
||||
MS_LOG(ERROR) << "add max_sequence_size constant tensor failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
LstmState next_state{input_data_, nullptr, nullptr}; // init states
|
||||
std::vector<nvinfer1::ITensor *> hidden_outputs;
|
||||
std::vector<nvinfer1::ITensor *> cell_outputs;
|
||||
int input_weight_offset = 0;
|
||||
int state_weight_offset = 0;
|
||||
int bias_offset = 0;
|
||||
|
||||
if (params_.layer_count_ != 1) {
|
||||
MS_LOG(WARNING) << op_name_ << " needs verify for layer cnt: " << params_.layer_count_;
|
||||
}
|
||||
for (int i = 0; i < params_.layer_count_; i++) {
|
||||
LstmState layer_input_states[BIDIRECTIONAL];
|
||||
LstmWeights layer_weights[BIDIRECTIONAL];
|
||||
layer_weights[0].max_seq_size_ = max_sequence_size;
|
||||
int ret = ParseLSTMCellInputs(i, hidden_init, cell_init, layer_input_states, &input_weight_offset,
|
||||
&state_weight_offset, &bias_offset, layer_weights, next_state);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ParseLSTMCellInputs failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
data_out = AddLSTMCell(layer_input_states, layer_weights, &next_state);
|
||||
hidden_outputs.push_back(next_state.hidden_);
|
||||
cell_outputs.push_back(next_state.cell_);
|
||||
if (data_out == nullptr || next_state.hidden_ == nullptr || next_state.cell_ == nullptr) {
|
||||
MS_LOG(ERROR) << "AddLSTMCell failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
op_hidden_out_ = ConcateAll(hidden_outputs);
|
||||
if (op_hidden_out_ == nullptr) {
|
||||
MS_LOG(ERROR) << "concat hidden output failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
op_hidden_out_->setName(out_tensors_[OUTPUT_HIDDEN_INDEX].Name().c_str());
|
||||
op_cell_out_ = ConcateAll(cell_outputs);
|
||||
if (op_cell_out_ == nullptr) {
|
||||
MS_LOG(ERROR) << "concat cell output failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
op_cell_out_->setName(out_tensors_[OUTPUT_CELL_INDEX].Name().c_str());
|
||||
op_data_out_ = data_out;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int LSTMTensorRT::ParseLSTMCellInputs(int layer_index, nvinfer1::ITensor *hidden_init, nvinfer1::ITensor *cell_init,
|
||||
LstmState *layer_input_states, int *input_weight_offset, int *state_weight_offset,
|
||||
int *bias_offset, LstmWeights *layer_weights, const LstmState &next_state) {
|
||||
nvinfer1::Dims2 dim_input_weight(LSTM_GATE_NUM * params_.hidden_size_, params_.input_data_size_);
|
||||
nvinfer1::Dims2 dim_state_weight(LSTM_GATE_NUM * params_.hidden_size_, params_.hidden_size_);
|
||||
nvinfer1::Dims dim_bias{1, {LSTM_GATE_NUM * params_.hidden_size_}};
|
||||
|
||||
mindspore::MSTensor &input_weight = in_tensors_[INPUT_WEIGHT];
|
||||
mindspore::MSTensor &state_weight = in_tensors_[STATE_WEIGHT];
|
||||
mindspore::MSTensor &bias = in_tensors_[BIAS];
|
||||
|
||||
nvinfer1::Dims dimW = layer_index == 0 ? dim_input_weight : dim_state_weight;
|
||||
|
||||
for (int direction_index = 0; direction_index < params_.directional_cnt_; direction_index++) {
|
||||
nvinfer1::ITensor *index =
|
||||
network_
|
||||
->addConstant(nvinfer1::Dims{},
|
||||
nvinfer1::Weights{nvinfer1::DataType::kINT32,
|
||||
&INDICES[layer_index * params_.directional_cnt_ + direction_index], 1})
|
||||
->getOutput(0);
|
||||
MS_ASSERT(index);
|
||||
layer_input_states[direction_index].data_ = next_state.data_;
|
||||
layer_input_states[direction_index].hidden_ = network_->addGather(*hidden_init, *index, 0)->getOutput(0);
|
||||
layer_input_states[direction_index].cell_ = network_->addGather(*cell_init, *index, 0)->getOutput(0);
|
||||
MS_ASSERT(layer_input_states[direction_index].hidden_);
|
||||
MS_ASSERT(layer_input_states[direction_index].cell_);
|
||||
|
||||
// weight order: input, output, forget, cell
|
||||
if (params_.data_type_ != nvinfer1::DataType::kFLOAT) {
|
||||
MS_LOG(WARNING) << "more data type need to be done";
|
||||
return RET_ERROR;
|
||||
}
|
||||
const float *input_weight_ptr = static_cast<const float *>(input_weight.Data().get());
|
||||
const float *state_weight_ptr = static_cast<const float *>(state_weight.Data().get());
|
||||
const float *bias_ptr = static_cast<const float *>(bias.Data().get());
|
||||
nvinfer1::Weights slice_input_weight{params_.data_type_, input_weight_ptr + *input_weight_offset,
|
||||
GetDimsVolume(dimW)};
|
||||
(*input_weight_offset) += slice_input_weight.count;
|
||||
nvinfer1::Weights slice_state_weight{params_.data_type_, state_weight_ptr + *state_weight_offset,
|
||||
GetDimsVolume(dim_state_weight)};
|
||||
(*state_weight_offset) += slice_state_weight.count;
|
||||
layer_weights[direction_index].input_weights_ = network_->addConstant(dimW, slice_input_weight)->getOutput(0);
|
||||
layer_weights[direction_index].state_weights_ =
|
||||
network_->addConstant(dim_state_weight, slice_state_weight)->getOutput(0);
|
||||
MS_ASSERT(layer_weights[direction_index].input_weights_);
|
||||
MS_ASSERT(layer_weights[direction_index].state_weights_);
|
||||
|
||||
// bias
|
||||
nvinfer1::Weights slice_input_bias{params_.data_type_, bias_ptr + *bias_offset, GetDimsVolume(dim_bias)};
|
||||
(*bias_offset) += slice_input_bias.count;
|
||||
nvinfer1::Weights slice_state_bias{params_.data_type_, bias_ptr + *bias_offset, GetDimsVolume(dim_bias)};
|
||||
(*bias_offset) += slice_state_bias.count;
|
||||
layer_weights[direction_index].input_bias_ = network_->addConstant(dim_bias, slice_input_bias)->getOutput(0);
|
||||
layer_weights[direction_index].state_bias_ = network_->addConstant(dim_bias, slice_state_bias)->getOutput(0);
|
||||
MS_ASSERT(layer_weights[direction_index].input_bias_);
|
||||
MS_ASSERT(layer_weights[direction_index].state_bias_);
|
||||
}
|
||||
if (params_.directional_cnt_ == BIDIRECTIONAL) {
|
||||
layer_weights[1].max_seq_size_ = layer_weights[0].max_seq_size_;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
nvinfer1::ITensor *LSTMTensorRT::Reshape(nvinfer1::ITensor *tensor, nvinfer1::Dims dims) {
|
||||
nvinfer1::IShuffleLayer *shuffle = network_->addShuffle(*tensor);
|
||||
shuffle->setReshapeDimensions(dims);
|
||||
return shuffle->getOutput(0);
|
||||
}
|
||||
|
||||
nvinfer1::ITensor *LSTMTensorRT::ConcateAll(std::vector<nvinfer1::ITensor *> all_tensor, int axis) {
|
||||
if (all_tensor.size() == 1) {
|
||||
return all_tensor[0];
|
||||
}
|
||||
nvinfer1::IConcatenationLayer *concat = network_->addConcatenation(all_tensor.data(), all_tensor.size());
|
||||
if (concat == nullptr) {
|
||||
MS_LOG(ERROR) << "addConcatenation failed for " << op_name_;
|
||||
return nullptr;
|
||||
}
|
||||
if (axis >= all_tensor[0]->getDimensions().nbDims) {
|
||||
MS_LOG(ERROR) << op_name_ << " concat axis is " << axis << ", larger than tensor dims "
|
||||
<< all_tensor[0]->getDimensions().nbDims;
|
||||
return nullptr;
|
||||
}
|
||||
concat->setAxis(axis);
|
||||
return concat->getOutput(0);
|
||||
}
|
||||
|
||||
nvinfer1::ITensor *LSTMTensorRT::AddLSTMCell(const LstmState *layer_input_states, const LstmWeights *layer_weights,
|
||||
LstmState *next_state) {
|
||||
nvinfer1::ITensor *backward_output = nullptr;
|
||||
nvinfer1::ITensor *backward_hidden_out = nullptr;
|
||||
nvinfer1::ITensor *backward_cell_out = nullptr;
|
||||
nvinfer1::ITensor *forward_hidden_out = nullptr;
|
||||
nvinfer1::ITensor *forward_cell_out = nullptr;
|
||||
|
||||
nvinfer1::ITensor *forward_output =
|
||||
AddLSTMCalculation(layer_input_states[0], layer_weights[0], &forward_hidden_out, &forward_cell_out);
|
||||
if (params_.directional_cnt_ == BIDIRECTIONAL) {
|
||||
backward_output =
|
||||
AddLSTMCalculation(layer_input_states[1], layer_weights[1], &backward_hidden_out, &backward_cell_out, true);
|
||||
}
|
||||
|
||||
// concate forward and backward
|
||||
nvinfer1::ITensor *output_tensor = forward_output;
|
||||
nvinfer1::ITensor *cell_out = forward_cell_out;
|
||||
nvinfer1::ITensor *hidden_out = forward_hidden_out;
|
||||
if (backward_output != nullptr && backward_hidden_out != nullptr && backward_cell_out != nullptr) {
|
||||
nvinfer1::ITensor *output_concat_input[BIDIRECTIONAL] = {forward_output, backward_output};
|
||||
auto ouput_out_layer = network_->addConcatenation(output_concat_input, BIDIRECTIONAL);
|
||||
this->layer_ = ouput_out_layer;
|
||||
if (ouput_out_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "create one loop output concat failed for " << op_name_;
|
||||
return nullptr;
|
||||
}
|
||||
ouput_out_layer->setAxis(1); // ms: 0 sequence size, 1 layer * direction, 2 batchsize, 3 hidden
|
||||
output_tensor = ouput_out_layer->getOutput(0);
|
||||
|
||||
nvinfer1::ITensor *hidden_concat_input[BIDIRECTIONAL] = {forward_hidden_out, backward_hidden_out};
|
||||
auto hidden_out_layer = network_->addConcatenation(hidden_concat_input, BIDIRECTIONAL);
|
||||
hidden_out_layer->setAxis(0);
|
||||
hidden_out = hidden_out_layer->getOutput(0);
|
||||
|
||||
nvinfer1::ITensor *cell_concat_input[BIDIRECTIONAL] = {forward_cell_out, backward_cell_out};
|
||||
auto cell_out_layer = network_->addConcatenation(cell_concat_input, BIDIRECTIONAL);
|
||||
cell_out_layer->setAxis(0);
|
||||
cell_out = cell_out_layer->getOutput(0);
|
||||
}
|
||||
if (hidden_out == nullptr || cell_out == nullptr) {
|
||||
MS_LOG(ERROR) << "get one loop hidden_out and cell_out failed for " << op_name_;
|
||||
return nullptr;
|
||||
}
|
||||
*next_state = LstmState{output_tensor, hidden_out, cell_out};
|
||||
return output_tensor;
|
||||
}
|
||||
nvinfer1::ITensor *LSTMTensorRT::AddLSTMCalculation(const LstmState &input_state, const LstmWeights &lstm_weights,
|
||||
nvinfer1::ITensor **hidden_out, nvinfer1::ITensor **cell_out,
|
||||
bool is_backward) {
|
||||
std::vector<nvinfer1::ITensor *> all_batch_outputs;
|
||||
std::vector<nvinfer1::ITensor *> all_batch_hidden;
|
||||
std::vector<nvinfer1::ITensor *> all_batch_cell;
|
||||
for (int batch_index = 0; batch_index < params_.batch_size_; batch_index++) {
|
||||
LstmState one_batch_input_state;
|
||||
nvinfer1::ITensor *batch_index_tensor =
|
||||
network_->addConstant(nvinfer1::Dims{}, nvinfer1::Weights{nvinfer1::DataType::kINT32, &INDICES[batch_index], 1})
|
||||
->getOutput(0);
|
||||
one_batch_input_state.data_ = network_->addGather(*input_state.data_, *batch_index_tensor, 0)->getOutput(0);
|
||||
one_batch_input_state.hidden_ = network_->addGather(*input_state.hidden_, *batch_index_tensor, 0)->getOutput(0);
|
||||
one_batch_input_state.cell_ = network_->addGather(*input_state.cell_, *batch_index_tensor, 0)->getOutput(0);
|
||||
nvinfer1::ITensor *one_batch_hidden = nullptr;
|
||||
nvinfer1::ITensor *one_batch_cell = nullptr;
|
||||
nvinfer1::ITensor *one_batch_output =
|
||||
AddLSTMOneLoop(one_batch_input_state, lstm_weights, &one_batch_hidden, &one_batch_cell, is_backward);
|
||||
if (one_batch_output == nullptr || one_batch_cell == nullptr || one_batch_hidden == nullptr) {
|
||||
MS_LOG(ERROR) << "AddLSTMOneLoop failed for " << op_name_ << " at batch index " << batch_index;
|
||||
return nullptr;
|
||||
}
|
||||
all_batch_outputs.push_back(one_batch_output);
|
||||
all_batch_hidden.push_back(one_batch_hidden);
|
||||
all_batch_cell.push_back(one_batch_cell);
|
||||
}
|
||||
*hidden_out = ConcateAll(all_batch_hidden, 1);
|
||||
*cell_out = ConcateAll(all_batch_cell, 1);
|
||||
return ConcateAll(all_batch_outputs, BATCH_SIZE_INDEX);
|
||||
}
|
||||
|
||||
nvinfer1::ITensor *LSTMTensorRT::AddLSTMOneLoop(const LstmState &input_state, const LstmWeights &lstm_weights,
|
||||
nvinfer1::ITensor **hidden_out, nvinfer1::ITensor **cell_out,
|
||||
bool is_backward) {
|
||||
#if TRT_VERSION_GE(7, 0)
|
||||
nvinfer1::ILoop *sequence_loop = network_->addLoop();
|
||||
if (sequence_loop == nullptr) {
|
||||
MS_LOG(ERROR) << "add sequence_loop layer failed for " << op_name_;
|
||||
return nullptr;
|
||||
}
|
||||
std::string loop_name = op_name_ + "_loop" + (is_backward ? "_backward" : "_forward");
|
||||
sequence_loop->setName(loop_name.c_str());
|
||||
sequence_loop->addTripLimit(*sequence_size_input_, nvinfer1::TripLimit::kCOUNT);
|
||||
nvinfer1::ITensor *input = sequence_loop->addIterator(*input_state.data_, 0, is_backward)->getOutput(0);
|
||||
|
||||
nvinfer1::ILayer *hidden_mid = sequence_loop->addRecurrence(*input_state.hidden_);
|
||||
if (hidden_mid == nullptr) {
|
||||
MS_LOG(ERROR) << "add hidden layer failed for " << op_name_;
|
||||
return nullptr;
|
||||
}
|
||||
nvinfer1::ILayer *cell_mid = sequence_loop->addRecurrence(*input_state.cell_);
|
||||
if (cell_mid == nullptr) {
|
||||
MS_LOG(ERROR) << "add cell layer failed for " << op_name_;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
nvinfer1::ITensor *input_matmul =
|
||||
network_
|
||||
->addMatrixMultiply(*input, nvinfer1::MatrixOperation::kVECTOR, *lstm_weights.input_weights_,
|
||||
nvinfer1::MatrixOperation::kTRANSPOSE)
|
||||
->getOutput(0);
|
||||
|
||||
nvinfer1::ITensor *hidden_matmul =
|
||||
network_
|
||||
->addMatrixMultiply(*hidden_mid->getOutput(0), nvinfer1::MatrixOperation::kVECTOR, *lstm_weights.state_weights_,
|
||||
nvinfer1::MatrixOperation::kTRANSPOSE)
|
||||
->getOutput(0);
|
||||
|
||||
nvinfer1::ITensor *weights_add =
|
||||
network_->addElementWise(*input_matmul, *hidden_matmul, nvinfer1::ElementWiseOperation::kSUM)->getOutput(0);
|
||||
|
||||
nvinfer1::ITensor *bias =
|
||||
network_->addElementWise(*lstm_weights.input_bias_, *lstm_weights.state_bias_, nvinfer1::ElementWiseOperation::kSUM)
|
||||
->getOutput(0);
|
||||
|
||||
nvinfer1::ITensor *gates_calculate =
|
||||
network_->addElementWise(*weights_add, *bias, nvinfer1::ElementWiseOperation::kSUM)->getOutput(0);
|
||||
|
||||
const auto isolateGate = [&](nvinfer1::ITensor &gates, int gateIndex) -> nvinfer1::ITensor * {
|
||||
nvinfer1::ISliceLayer *slice =
|
||||
network_->addSlice(gates, nvinfer1::Dims{1, {gateIndex * params_.hidden_size_}},
|
||||
nvinfer1::Dims{1, {params_.hidden_size_}}, nvinfer1::Dims{1, {1}});
|
||||
return Reshape(slice->getOutput(0), nvinfer1::Dims{1, {params_.hidden_size_}});
|
||||
};
|
||||
// weight order: input, output, forget, cell
|
||||
nvinfer1::ITensor *i =
|
||||
network_->addActivation(*isolateGate(*gates_calculate, 0), nvinfer1::ActivationType::kSIGMOID)->getOutput(0);
|
||||
|
||||
nvinfer1::ITensor *o =
|
||||
network_->addActivation(*isolateGate(*gates_calculate, 1), nvinfer1::ActivationType::kSIGMOID)->getOutput(0);
|
||||
|
||||
nvinfer1::ITensor *f =
|
||||
network_->addActivation(*isolateGate(*gates_calculate, FORGET_GATE), nvinfer1::ActivationType::kSIGMOID)
|
||||
->getOutput(0);
|
||||
|
||||
nvinfer1::ITensor *c =
|
||||
network_->addActivation(*isolateGate(*gates_calculate, CELL_GATE), nvinfer1::ActivationType::kTANH)->getOutput(0);
|
||||
|
||||
nvinfer1::ITensor *C =
|
||||
network_
|
||||
->addElementWise(
|
||||
*network_->addElementWise(*f, *cell_mid->getOutput(0), nvinfer1::ElementWiseOperation::kPROD)->getOutput(0),
|
||||
*network_->addElementWise(*i, *c, nvinfer1::ElementWiseOperation::kPROD)->getOutput(0),
|
||||
nvinfer1::ElementWiseOperation::kSUM)
|
||||
->getOutput(0);
|
||||
nvinfer1::ITensor *H =
|
||||
network_
|
||||
->addElementWise(*o, *network_->addActivation(*C, nvinfer1::ActivationType::kTANH)->getOutput(0),
|
||||
nvinfer1::ElementWiseOperation::kPROD)
|
||||
->getOutput(0);
|
||||
|
||||
// Recurrent backedge input for hidden and cell.
|
||||
cell_mid->setInput(1, *C);
|
||||
hidden_mid->setInput(1, *H);
|
||||
// outputs
|
||||
nvinfer1::LoopOutput output_mode = is_backward ? nvinfer1::LoopOutput::kREVERSE : nvinfer1::LoopOutput::kCONCATENATE;
|
||||
nvinfer1::ILoopOutputLayer *output_layer = sequence_loop->addLoopOutput(*H, output_mode);
|
||||
output_layer->setInput(1, *lstm_weights.max_seq_size_);
|
||||
*hidden_out =
|
||||
Reshape(sequence_loop->addLoopOutput(*hidden_mid->getOutput(0), nvinfer1::LoopOutput::kLAST_VALUE)->getOutput(0),
|
||||
nvinfer1::Dims3(1, 1, params_.hidden_size_));
|
||||
*cell_out =
|
||||
Reshape(sequence_loop->addLoopOutput(*cell_mid->getOutput(0), nvinfer1::LoopOutput::kLAST_VALUE)->getOutput(0),
|
||||
nvinfer1::Dims3(1, 1, params_.hidden_size_));
|
||||
return Reshape(output_layer->getOutput(0), nvinfer1::Dims4(params_.sequence_size_, 1, 1, params_.hidden_size_));
|
||||
#else
|
||||
MS_LOG(ERROR) << "low TensorRT version don't support LSTM op, please upgrade TensorRT version to 7 or higher";
|
||||
return nullptr;
|
||||
#endif
|
||||
}
|
||||
|
||||
int LSTMTensorRT::Prepare(void **network_tensor_bindings, nvinfer1::ICudaEngine *engine) {
|
||||
if (op_binding_tensor_.size() == 0) {
|
||||
MS_LOG(DEBUG) << "unsing serialized engine, add input tensor for " << op_name_;
|
||||
mindspore::MSTensor &hidden_in_init = in_tensors_[HIDDEN_IN_TENSOR_INIT];
|
||||
mindspore::MSTensor &cell_in_init = in_tensors_[CELL_IN_TENSOR_INIT];
|
||||
|
||||
op_binding_tensor_.push_back(BindingHelper{hidden_init_name_, hidden_in_init.MutableData(),
|
||||
nvinfer1::DataType::kFLOAT, hidden_in_init.DataSize()});
|
||||
op_binding_tensor_.push_back(
|
||||
BindingHelper{cell_init_name_, cell_in_init.MutableData(), nvinfer1::DataType::kFLOAT, cell_in_init.DataSize()});
|
||||
params_.sequence_size_ = in_tensors_[0].Shape()[0];
|
||||
op_binding_tensor_.push_back(
|
||||
BindingHelper{(op_name_ + "_seq_input"), ¶ms_.sequence_size_, nvinfer1::DataType::kINT32, sizeof(int)});
|
||||
}
|
||||
for (auto tensor : op_binding_tensor_) {
|
||||
auto device_ptr = runtime_->GetAllocator()->MallocDeviceMem(tensor.name_, tensor.size_, tensor.data_type_);
|
||||
if (device_ptr == nullptr) {
|
||||
MS_LOG(ERROR) << "malloc for inputs tensor device memory failed " << tensor.name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
int index = engine->getBindingIndex(tensor.name_.c_str());
|
||||
network_tensor_bindings[index] = device_ptr;
|
||||
runtime_->GetAllocator()->SyncMemInHostAndDevice(tensor.data_, tensor.name_, tensor.size_, true);
|
||||
runtime_->GetAllocator()->MarkMemValid(tensor.name_, true);
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_LSTM, LSTMTensorRT)
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,115 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_LSTM_TENSORRT_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_LSTM_TENSORRT_H_
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <array>
|
||||
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
constexpr int INPUT_TENSOR_SIZE = 6;
|
||||
constexpr int OUTPUT_TENSOR_SIZE = 3;
|
||||
constexpr int INPUT_WEIGHT = 1;
|
||||
constexpr int STATE_WEIGHT = 2;
|
||||
constexpr int BIAS = 3;
|
||||
constexpr int HIDDEN_IN_TENSOR_INIT = 4;
|
||||
constexpr int CELL_IN_TENSOR_INIT = 5;
|
||||
constexpr int LSTM_GATE_NUM = 4;
|
||||
constexpr int BIDIRECTIONAL = 2;
|
||||
constexpr int OUTPUT_HIDDEN_INDEX = 1;
|
||||
constexpr int OUTPUT_CELL_INDEX = 2;
|
||||
constexpr int INPUT_SIZE_INDEX = 2;
|
||||
constexpr int FORGET_GATE = 2;
|
||||
constexpr int CELL_GATE = 3;
|
||||
constexpr int BATCH_SIZE_INDEX = 2;
|
||||
static const std::array<int, 4> INDICES{0, 1, 2, 3};
|
||||
|
||||
struct LSTMParams {
|
||||
int sequence_size_;
|
||||
int input_data_size_;
|
||||
int batch_size_;
|
||||
int layer_count_;
|
||||
int hidden_size_;
|
||||
nvinfer1::DataType data_type_;
|
||||
int directional_cnt_;
|
||||
};
|
||||
|
||||
struct LstmState {
|
||||
nvinfer1::ITensor *data_{nullptr};
|
||||
nvinfer1::ITensor *hidden_{nullptr};
|
||||
nvinfer1::ITensor *cell_{nullptr};
|
||||
};
|
||||
|
||||
struct LstmWeights {
|
||||
nvinfer1::ITensor *input_weights_{nullptr};
|
||||
nvinfer1::ITensor *state_weights_{nullptr};
|
||||
nvinfer1::ITensor *input_bias_{nullptr};
|
||||
nvinfer1::ITensor *state_bias_{nullptr};
|
||||
nvinfer1::ITensor *max_seq_size_{nullptr};
|
||||
};
|
||||
|
||||
class LSTMTensorRT : public TensorRTOp {
|
||||
public:
|
||||
LSTMTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
|
||||
const schema::QuantType &quant_type)
|
||||
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
|
||||
|
||||
~LSTMTensorRT() override = default;
|
||||
|
||||
int AddInnerOp(TensorRTContext *ctx) override;
|
||||
|
||||
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) override;
|
||||
|
||||
int Prepare(void **network_tensor_bindings, nvinfer1::ICudaEngine *engine) override;
|
||||
|
||||
private:
|
||||
int PreProcess();
|
||||
|
||||
int AddLSTMLayers();
|
||||
|
||||
nvinfer1::ITensor *AddLSTMCell(const LstmState *layer_input_states, const LstmWeights *layer_weights,
|
||||
LstmState *next_state);
|
||||
|
||||
nvinfer1::ITensor *Reshape(nvinfer1::ITensor *tensor, nvinfer1::Dims dims);
|
||||
|
||||
nvinfer1::ITensor *ConcateAll(std::vector<nvinfer1::ITensor *> all_tensort, int axis = 0);
|
||||
|
||||
nvinfer1::ITensor *AddLSTMCalculation(const LstmState &input_state, const LstmWeights &lstm_weights,
|
||||
nvinfer1::ITensor **hidden_out, nvinfer1::ITensor **cell_out,
|
||||
bool is_backward = false);
|
||||
nvinfer1::ITensor *AddLSTMOneLoop(const LstmState &input_state, const LstmWeights &lstm_weights,
|
||||
nvinfer1::ITensor **hidden_out, nvinfer1::ITensor **cell_out,
|
||||
bool is_backward = false);
|
||||
|
||||
int ParseLSTMCellInputs(int layer_index, nvinfer1::ITensor *hidden_init, nvinfer1::ITensor *cell_init,
|
||||
LstmState *input_state, int *input_weight_offset, int *state_weight_offset, int *bias_offset,
|
||||
LstmWeights *lstm_weights, const LstmState &next_state);
|
||||
|
||||
nvinfer1::INetworkDefinition *network_{nullptr};
|
||||
nvinfer1::ITensor *input_data_{nullptr};
|
||||
nvinfer1::ITensor *sequence_size_input_{nullptr};
|
||||
nvinfer1::ITensor *op_data_out_{nullptr};
|
||||
nvinfer1::ITensor *op_hidden_out_{nullptr};
|
||||
nvinfer1::ITensor *op_cell_out_{nullptr};
|
||||
LSTMParams params_;
|
||||
std::string hidden_init_name_;
|
||||
std::string cell_init_name_;
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_LSTM_TENSORRT_H_
|
|
@ -0,0 +1,202 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/delegate/tensorrt/op/matmul_opt_plugin.h"
|
||||
#include <cuda_runtime.h>
|
||||
#include <numeric>
|
||||
#include <memory>
|
||||
#include <functional>
|
||||
#include "src/runtime/delegate/tensorrt/tensorrt_utils.h"
|
||||
#include "src/runtime/delegate/tensorrt/cuda_impl/cuda_helper.h"
|
||||
#include "NvInferRuntimeCommon.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
REGISTER_TENSORRT_PLUGIN(MatmulOptPluginCreater);
|
||||
template class TensorRTPluginCreater<MatmulOptPlugin>;
|
||||
template <class T>
|
||||
nvinfer1::PluginFieldCollection TensorRTPluginCreater<T>::field_collection_{};
|
||||
template <class T>
|
||||
std::vector<nvinfer1::PluginField> TensorRTPluginCreater<T>::fields_;
|
||||
|
||||
// MatmulOptPlugin
|
||||
int MatmulOptPlugin::enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc,
|
||||
const void *const *inputs, void *const *outputs, void *workspace,
|
||||
cudaStream_t stream) noexcept {
|
||||
CHECK_NULL_RETURN(cublas_handle_);
|
||||
CUBLAS_CHECK(cublasSetStream(cublas_handle_, stream));
|
||||
const nvinfer1::PluginTensorDesc desc_a = inputDesc[0];
|
||||
const nvinfer1::PluginTensorDesc desc_b = inputDesc[1];
|
||||
const nvinfer1::PluginTensorDesc desc_c = outputDesc[0];
|
||||
|
||||
if (desc_a.dims.nbDims == DIMENSION_2D) {
|
||||
// a: m * k, b: k * n, c: m * n
|
||||
int m = desc_c.dims.d[0];
|
||||
int n = desc_c.dims.d[1];
|
||||
int k = b_trans_ ? desc_b.dims.d[1] : desc_b.dims.d[0];
|
||||
const int mm_params[]{m, n, k};
|
||||
CublasMM1Batch(inputs[0], inputs[1], outputs[0], mm_params, operations_, data_types_, cublas_handle_);
|
||||
} else if (desc_a.dims.nbDims == DIMENSION_3D) {
|
||||
return RunBatchedMatmul(inputDesc, outputDesc, inputs, outputs, workspace, stream);
|
||||
} else {
|
||||
MS_LOG(ERROR) << layer_name_ << " input dims needs check a: " << desc_a.dims.nbDims;
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int MatmulOptPlugin::RunBatchedMatmul(const nvinfer1::PluginTensorDesc *inputDesc,
|
||||
const nvinfer1::PluginTensorDesc *outputDesc, const void *const *inputs,
|
||||
void *const *outputs, void *workspace, cudaStream_t stream) {
|
||||
const nvinfer1::PluginTensorDesc desc_b = inputDesc[1];
|
||||
const nvinfer1::PluginTensorDesc desc_c = outputDesc[0];
|
||||
int batch = desc_c.dims.d[0];
|
||||
int m = desc_c.dims.d[1];
|
||||
int n = desc_c.dims.d[DIMENSION_2D];
|
||||
int k = b_trans_ ? desc_b.dims.d[DIMENSION_2D] : desc_b.dims.d[1];
|
||||
const int mm_params[]{m, n, k, batch};
|
||||
for (int i = 0; i < batch; i++) {
|
||||
a_addrs_[i] = inputs[0] + i * m * k * sizeof(float);
|
||||
b_addrs_[i] = inputs[1] + i * k * n * sizeof(float);
|
||||
c_addrs_[i] = outputs[0] + i * m * n * sizeof(float);
|
||||
}
|
||||
int data_size = batch * sizeof(void *);
|
||||
int max_batchsize = a_addrs_.size();
|
||||
if (a_device_addrs_ == nullptr) {
|
||||
CUDA_CHECK(cudaMalloc(&a_device_addrs_, sizeof(void *) * max_batchsize));
|
||||
}
|
||||
if (b_device_addrs_ == nullptr) {
|
||||
CUDA_CHECK(cudaMalloc(&b_device_addrs_, sizeof(void *) * max_batchsize));
|
||||
}
|
||||
if (c_device_addrs_ == nullptr) {
|
||||
CUDA_CHECK(cudaMalloc(&c_device_addrs_, sizeof(void *) * max_batchsize));
|
||||
}
|
||||
CUDA_CHECK(cudaMemcpy(a_device_addrs_, a_addrs_.data(), data_size, cudaMemcpyHostToDevice));
|
||||
CUDA_CHECK(cudaMemcpy(b_device_addrs_, b_addrs_.data(), data_size, cudaMemcpyHostToDevice));
|
||||
CUDA_CHECK(cudaMemcpy(c_device_addrs_, c_addrs_.data(), data_size, cudaMemcpyHostToDevice));
|
||||
|
||||
CublasMMBatched(a_device_addrs_, b_device_addrs_, c_device_addrs_, mm_params, operations_, data_types_,
|
||||
cublas_handle_);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
nvinfer1::IPluginV2DynamicExt *MatmulOptPlugin::clone() const noexcept {
|
||||
auto *plugin = new MatmulOptPlugin(*this);
|
||||
plugin->setPluginNamespace(name_space_.c_str());
|
||||
return plugin;
|
||||
}
|
||||
|
||||
nvinfer1::DimsExprs MatmulOptPlugin::getOutputDimensions(int outputIndex, const nvinfer1::DimsExprs *inputs,
|
||||
int nbInputs, nvinfer1::IExprBuilder &exprBuilder) noexcept {
|
||||
nvinfer1::DimsExprs out_dims{};
|
||||
if (nbInputs != INPUT_SIZE2 && nbInputs != INPUT_SIZE3) {
|
||||
MS_LOG(ERROR) << "invalid input size " << nbInputs << " of " << layer_name_;
|
||||
return out_dims;
|
||||
}
|
||||
out_dims.nbDims = inputs[0].nbDims;
|
||||
if (out_dims.nbDims == DIMENSION_2D) {
|
||||
out_dims.d[0] = a_trans_ ? inputs[0].d[1] : inputs[0].d[0];
|
||||
out_dims.d[1] = b_trans_ ? inputs[1].d[0] : inputs[1].d[1];
|
||||
return out_dims;
|
||||
} else if (out_dims.nbDims == DIMENSION_3D) {
|
||||
out_dims.d[0] = inputs[0].d[0];
|
||||
out_dims.d[1] = a_trans_ ? inputs[0].d[DIMENSION_2D] : inputs[0].d[1];
|
||||
out_dims.d[DIMENSION_2D] = b_trans_ ? inputs[1].d[1] : inputs[1].d[DIMENSION_2D];
|
||||
return out_dims;
|
||||
}
|
||||
MS_LOG(ERROR) << "invalid input dims " << out_dims.nbDims << " of " << layer_name_;
|
||||
return out_dims;
|
||||
}
|
||||
|
||||
void MatmulOptPlugin::configurePlugin(const nvinfer1::DynamicPluginTensorDesc *in, int nbInputs,
|
||||
const nvinfer1::DynamicPluginTensorDesc *out, int nbOutputs) noexcept {
|
||||
operations_[0] = a_trans_ ? CUBLAS_OP_T : CUBLAS_OP_N;
|
||||
operations_[1] = b_trans_ ? CUBLAS_OP_T : CUBLAS_OP_N;
|
||||
data_types_[0] = ConvertDataType(in[0].desc.type); // input a
|
||||
data_types_[1] = ConvertDataType(in[1].desc.type); // input b
|
||||
data_types_[THIRD_INPUT] = ConvertDataType(out[0].desc.type); // output c
|
||||
data_types_[FOURTH_INPUT] =
|
||||
(in[0].desc.type == nvinfer1::DataType::kHALF || in[1].desc.type == nvinfer1::DataType::kHALF)
|
||||
? CUDA_R_16F
|
||||
: CUDA_R_32F; // compute type
|
||||
if (in[0].max.nbDims == DIMENSION_3D) {
|
||||
int max_batchsize = in[0].max.d[0];
|
||||
a_addrs_.resize(max_batchsize);
|
||||
b_addrs_.resize(max_batchsize);
|
||||
c_addrs_.resize(max_batchsize);
|
||||
if (a_device_addrs_ == nullptr) {
|
||||
CUDA_CHECK_VOID(cudaMalloc(&a_device_addrs_, sizeof(void *) * max_batchsize));
|
||||
}
|
||||
if (b_device_addrs_ == nullptr) {
|
||||
CUDA_CHECK_VOID(cudaMalloc(&b_device_addrs_, sizeof(void *) * max_batchsize));
|
||||
}
|
||||
if (c_device_addrs_ == nullptr) {
|
||||
CUDA_CHECK_VOID(cudaMalloc(&c_device_addrs_, sizeof(void *) * max_batchsize));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int MatmulOptPlugin::initialize() noexcept {
|
||||
if (cublas_handle_ == nullptr) {
|
||||
CUBLAS_CHECK(cublasCreate(&cublas_handle_));
|
||||
}
|
||||
for (int i = 0; i < DIMENSION_4D; i++) {
|
||||
if (data_types_[i] != CUDA_R_32F) {
|
||||
MS_LOG(ERROR) << layer_name_ << " only support fp32";
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MatmulOptPlugin::terminate() noexcept {
|
||||
if (cublas_handle_ != nullptr) {
|
||||
auto cublas_ret = cublasDestroy(cublas_handle_);
|
||||
if (cublas_ret != CUBLAS_STATUS_SUCCESS) {
|
||||
MS_LOG(ERROR) << "cublasDestroy failed: " << cublas_ret;
|
||||
} else {
|
||||
cublas_handle_ = nullptr;
|
||||
}
|
||||
}
|
||||
cudaError_t err;
|
||||
if (a_device_addrs_ != nullptr) {
|
||||
err = cudaFree(a_device_addrs_);
|
||||
if (err != cudaSuccess) {
|
||||
MS_LOG(ERROR) << layer_name_ << " free cuda device mem failed " << err;
|
||||
}
|
||||
a_device_addrs_ = nullptr;
|
||||
}
|
||||
if (b_device_addrs_ != nullptr) {
|
||||
err = cudaFree(b_device_addrs_);
|
||||
if (err != cudaSuccess) {
|
||||
MS_LOG(ERROR) << layer_name_ << " free cuda device mem failed " << err;
|
||||
}
|
||||
b_device_addrs_ = nullptr;
|
||||
}
|
||||
if (c_device_addrs_ != nullptr) {
|
||||
err = cudaFree(c_device_addrs_);
|
||||
if (err != cudaSuccess) {
|
||||
MS_LOG(ERROR) << layer_name_ << " free cuda device mem failed " << err;
|
||||
}
|
||||
c_device_addrs_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
size_t MatmulOptPlugin::getSerializationSize() const noexcept { return 2 * sizeof(bool); }
|
||||
|
||||
void MatmulOptPlugin::serialize(void *buffer) const noexcept {
|
||||
SerializeValue(&buffer, &a_trans_, sizeof(bool));
|
||||
SerializeValue(&buffer, &b_trans_, sizeof(bool));
|
||||
}
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,80 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_MATMUL_OPT_PLUGIN_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_MATMUL_OPT_PLUGIN_H_
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
|
||||
#include "src/runtime/delegate/tensorrt/op/tensorrt_plugin.h"
|
||||
#include "src/runtime/delegate/tensorrt/cuda_impl/cublas_utils.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
constexpr char *MATMUL_OPT_PLUGIN_NAME{"MatmulOptPlugin"};
|
||||
class MatmulOptPlugin : public TensorRTPlugin {
|
||||
public:
|
||||
MatmulOptPlugin(const std::string name, bool a_trans, bool b_trans, uint32_t device_id)
|
||||
: TensorRTPlugin(name, std::string(MATMUL_OPT_PLUGIN_NAME), device_id), a_trans_(a_trans), b_trans_(b_trans) {}
|
||||
|
||||
MatmulOptPlugin(const char *name, const nvinfer1::PluginFieldCollection *fc)
|
||||
: TensorRTPlugin(std::string(name), std::string(MATMUL_OPT_PLUGIN_NAME)) {
|
||||
const nvinfer1::PluginField *fields = fc->fields;
|
||||
a_trans_ = static_cast<const bool *>(fields[0].data)[0];
|
||||
b_trans_ = static_cast<const bool *>(fields[1].data)[0];
|
||||
}
|
||||
|
||||
MatmulOptPlugin(const char *name, const void *serialData, size_t serialLength)
|
||||
: TensorRTPlugin(std::string(name), std::string(MATMUL_OPT_PLUGIN_NAME)) {
|
||||
DeserializeValue(&serialData, &serialLength, &a_trans_, sizeof(bool));
|
||||
DeserializeValue(&serialData, &serialLength, &b_trans_, sizeof(bool));
|
||||
}
|
||||
|
||||
MatmulOptPlugin() = delete;
|
||||
|
||||
// IPluginV2DynamicExt Methods
|
||||
nvinfer1::IPluginV2DynamicExt *clone() const noexcept override;
|
||||
nvinfer1::DimsExprs getOutputDimensions(int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs,
|
||||
nvinfer1::IExprBuilder &exprBuilder) noexcept override;
|
||||
void configurePlugin(const nvinfer1::DynamicPluginTensorDesc *in, int nbInputs,
|
||||
const nvinfer1::DynamicPluginTensorDesc *out, int nbOutputs) noexcept override;
|
||||
int enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc,
|
||||
const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept override;
|
||||
int initialize() noexcept override;
|
||||
void terminate() noexcept override;
|
||||
size_t getSerializationSize() const noexcept override;
|
||||
void serialize(void *buffer) const noexcept override;
|
||||
|
||||
private:
|
||||
int RunBatchedMatmul(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc,
|
||||
const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream);
|
||||
|
||||
bool a_trans_{false};
|
||||
bool b_trans_{false};
|
||||
cublasHandle_t cublas_handle_{nullptr};
|
||||
cublasOperation_t operations_[2]{CUBLAS_OP_N, CUBLAS_OP_N};
|
||||
cudaDataType data_types_[4]{CUDA_R_32F, CUDA_R_32F, CUDA_R_32F, CUDA_R_32F};
|
||||
std::vector<const void *> a_addrs_;
|
||||
std::vector<const void *> b_addrs_;
|
||||
std::vector<void *> c_addrs_;
|
||||
void **a_device_addrs_{nullptr};
|
||||
void **b_device_addrs_{nullptr};
|
||||
void **c_device_addrs_{nullptr};
|
||||
};
|
||||
class MatmulOptPluginCreater : public TensorRTPluginCreater<MatmulOptPlugin> {
|
||||
public:
|
||||
MatmulOptPluginCreater() : TensorRTPluginCreater(std::string(MATMUL_OPT_PLUGIN_NAME)) {}
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_MATMUL_OPT_PLUGIN_H_
|
|
@ -0,0 +1,310 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/delegate/tensorrt/op/matmul_tensorrt.h"
|
||||
#include <memory>
|
||||
#include "src/runtime/delegate/tensorrt/tensorrt_utils.h"
|
||||
#include "src/runtime/delegate/tensorrt/op/activation_tensorrt.h"
|
||||
#include "src/runtime/delegate/tensorrt/op/matmul_opt_plugin.h"
|
||||
#include "src/runtime/delegate/tensorrt/tensorrt_runtime.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
MatMulTensorRT::~MatMulTensorRT() {
|
||||
if (weight_ptr_ != nullptr) {
|
||||
free(weight_ptr_);
|
||||
weight_ptr_ = nullptr;
|
||||
}
|
||||
}
|
||||
int MatMulTensorRT::IsSupport(const mindspore::schema::Primitive *primitive,
|
||||
const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) {
|
||||
if (!IsShapeKnown()) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors.size() != INPUT_SIZE2 && in_tensors.size() != INPUT_SIZE3) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (out_tensors.size() != 1) {
|
||||
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int MatMulTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
||||
if (type_ == schema::PrimitiveType_MatMulFusion) {
|
||||
auto primitive = this->GetPrimitive()->value_as_MatMulFusion();
|
||||
if (primitive == nullptr) {
|
||||
MS_LOG(ERROR) << "convert to primitive matmul failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
transpose_a_ = primitive->transpose_a();
|
||||
transpose_b_ = primitive->transpose_b();
|
||||
activation_ = primitive->activation_type();
|
||||
}
|
||||
nvinfer1::ITensor *out_tensor = nullptr;
|
||||
if (RunOptPlugin()) {
|
||||
out_tensor = AddAsOptPlugin(ctx);
|
||||
} else if (RunFullConnect()) {
|
||||
MS_LOG(DEBUG) << "use fully connected instead of matmul for " << op_name_;
|
||||
out_tensor = AddAsFullConnect(ctx);
|
||||
} else {
|
||||
MS_LOG(DEBUG) << "use origin tensorrt matmul for " << op_name_;
|
||||
out_tensor = AddAsMatmul(ctx);
|
||||
}
|
||||
if (out_tensor == nullptr) {
|
||||
MS_LOG(ERROR) << "add matmul failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
// add activation
|
||||
if (activation_ != schema::ActivationType::ActivationType_NO_ACTIVATION) {
|
||||
nvinfer1::ILayer *activation_layer =
|
||||
ActivationTensorRT::AddActivation(ctx, activation_, 0, 0, 0, out_tensor, device_id_);
|
||||
if (activation_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "addActivation for matmul failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
activation_layer->setName((op_name_ + "_activation").c_str());
|
||||
out_tensor = activation_layer->getOutput(0);
|
||||
}
|
||||
|
||||
out_tensor->setName((op_name_ + "_output").c_str());
|
||||
MS_LOG(DEBUG) << "output " << GetTensorFormat(out_tensor, out_format_, true);
|
||||
this->AddInnerOutTensors(ITensorHelper{out_tensor, out_format_});
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int MatMulTensorRT::PreprocessMatMulInputs(TensorRTContext *ctx, ITensorHelper *matmul_a, ITensorHelper *matmul_b) {
|
||||
if (tensorrt_in_tensors_.size() == INPUT_SIZE2) {
|
||||
int a_index =
|
||||
GetDimsVolume(tensorrt_in_tensors_[0].trt_tensor_->getDimensions()) == GetDimsVolume(in_tensors_[0].Shape()) ? 0
|
||||
: 1;
|
||||
int ret = PreprocessInputs2SameDim(ctx, tensorrt_in_tensors_[a_index], matmul_a);
|
||||
ret += PreprocessInputs2SameDim(ctx, tensorrt_in_tensors_[1 - a_index], matmul_b);
|
||||
if (ret != RET_OK || matmul_a->trt_tensor_ == nullptr || matmul_b->trt_tensor_ == nullptr) {
|
||||
MS_LOG(ERROR) << "PreprocessInputs2SameDim of matmul inputs failed for " << op_name_;
|
||||
return ret;
|
||||
}
|
||||
out_format_ = matmul_a->format_;
|
||||
if (matmul_a->format_ != matmul_b->format_) {
|
||||
MS_LOG(WARNING) << "matmul input tensor has different format " << op_name_;
|
||||
out_format_ = Format::NHWC;
|
||||
}
|
||||
} else if (tensorrt_in_tensors_.size() == 1) {
|
||||
auto weight = ProcessWeightTensor(ctx);
|
||||
if (weight == nullptr) {
|
||||
MS_LOG(ERROR) << "create constant weight tensor failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
int weight_index = in_tensors_[1].Data() != nullptr ? 1 : 0;
|
||||
ITensorHelper *weight_helper = (weight_index == 1) ? matmul_b : matmul_a;
|
||||
ITensorHelper *var_helper = (weight_index == 1) ? matmul_a : matmul_b;
|
||||
weight_helper->trt_tensor_ = weight;
|
||||
int ret = PreprocessInputs2SameDim(ctx, tensorrt_in_tensors_[1 - weight_index], var_helper);
|
||||
if (ret != RET_OK || var_helper->trt_tensor_ == nullptr) {
|
||||
MS_LOG(ERROR) << "PreprocessInputs2SameDim of matmul input var_helper failed for " << op_name_;
|
||||
return ret;
|
||||
}
|
||||
out_format_ = var_helper->format_;
|
||||
} else {
|
||||
MS_LOG(ERROR) << op_name_ << " tensorrt in tensor size is invalid " << tensorrt_in_tensors_.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
nvinfer1::ITensor *MatMulTensorRT::ProcessWeightTensor(TensorRTContext *ctx) {
|
||||
nvinfer1::ITensor *weight = nullptr;
|
||||
int weight_index = in_tensors_[1].Data() != nullptr ? 1 : 0;
|
||||
if (in_tensors_[weight_index].Shape().size() <
|
||||
static_cast<size_t>(tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims)) {
|
||||
std::vector<int64_t> expect_shape(in_tensors_[1 - weight_index].Shape().size(), 1);
|
||||
auto origin_shape = in_tensors_[weight_index].Shape();
|
||||
for (int i = 0; i < origin_shape.size(); i++) {
|
||||
expect_shape[expect_shape.size() - 1 - i] = origin_shape[origin_shape.size() - 1 - i];
|
||||
}
|
||||
weight = ConvertTensorWithExpandDims(ctx, in_tensors_[weight_index], expect_shape, op_name_);
|
||||
} else if (in_tensors_[weight_index].Shape().size() ==
|
||||
static_cast<size_t>(tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims)) {
|
||||
weight = ConvertConstantTensor(ctx, in_tensors_[weight_index], op_name_);
|
||||
} else {
|
||||
MS_LOG(ERROR) << "input tensor shape is invalid for " << op_name_;
|
||||
return nullptr;
|
||||
}
|
||||
return weight;
|
||||
}
|
||||
|
||||
nvinfer1::ITensor *MatMulTensorRT::AddAsMatmul(TensorRTContext *ctx) {
|
||||
ITensorHelper matmul_a;
|
||||
ITensorHelper matmul_b;
|
||||
|
||||
int ret = PreprocessMatMulInputs(ctx, &matmul_a, &matmul_b);
|
||||
if (ret != RET_OK || matmul_a.trt_tensor_ == nullptr || matmul_b.trt_tensor_ == nullptr) {
|
||||
MS_LOG(ERROR) << "PreprocessMatMulInputs matmul failed for " << op_name_;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
MS_LOG(DEBUG) << "matmul input a " << GetTensorFormat(matmul_a);
|
||||
MS_LOG(DEBUG) << "matmul input b " << GetTensorFormat(matmul_b);
|
||||
|
||||
auto matmul_layer = ctx->network()->addMatrixMultiply(
|
||||
*matmul_a.trt_tensor_, transpose_a_ ? nvinfer1::MatrixOperation::kTRANSPOSE : nvinfer1::MatrixOperation::kNONE,
|
||||
*matmul_b.trt_tensor_, transpose_b_ ? nvinfer1::MatrixOperation::kTRANSPOSE : nvinfer1::MatrixOperation::kNONE);
|
||||
if (matmul_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "addMatrixMultiply failed for " << op_name_;
|
||||
return nullptr;
|
||||
}
|
||||
this->layer_ = matmul_layer;
|
||||
matmul_layer->setName(op_name_.c_str());
|
||||
return AddBias(ctx, matmul_layer->getOutput(0));
|
||||
}
|
||||
|
||||
nvinfer1::ITensor *MatMulTensorRT::AddAsFullConnect(TensorRTContext *ctx) {
|
||||
nvinfer1::Weights weight;
|
||||
nvinfer1::Weights bias = ConvertWeight(in_tensors_[kBiasIndex]);
|
||||
nvinfer1::ITensor *input_a = tensorrt_in_tensors_[0].trt_tensor_;
|
||||
out_format_ = tensorrt_in_tensors_[0].format_;
|
||||
if (input_a->getDimensions().nbDims != DIMENSION_4D) {
|
||||
nvinfer1::Dims in_dims(input_a->getDimensions());
|
||||
in_dims.nbDims = DIMENSION_4D;
|
||||
for (int i = input_a->getDimensions().nbDims; i < DIMENSION_4D; i++) {
|
||||
in_dims.d[i] = 1;
|
||||
}
|
||||
input_a = Reshape(ctx, input_a, in_dims);
|
||||
if (input_a == nullptr) {
|
||||
MS_LOG(ERROR) << "reshape input failed for " << op_name_;
|
||||
return nullptr;
|
||||
}
|
||||
MS_LOG(DEBUG) << "full connect expand input a to " << GetTensorFormat(input_a);
|
||||
} else {
|
||||
ITensorHelper tmp_input;
|
||||
int ret = PreprocessInputs2SameDim(ctx, tensorrt_in_tensors_[0], &tmp_input);
|
||||
if (ret != RET_OK || tmp_input.trt_tensor_ == nullptr) {
|
||||
MS_LOG(ERROR) << "rPreprocessInputs2SameDim failed for " << op_name_;
|
||||
return nullptr;
|
||||
}
|
||||
input_a = tmp_input.trt_tensor_;
|
||||
out_format_ = tmp_input.format_;
|
||||
MS_LOG(DEBUG) << "full connect preprocess input a to " << GetTensorFormat(tmp_input);
|
||||
}
|
||||
if (!transpose_b_) {
|
||||
// transpose weight
|
||||
weight = TransposeWeight2D(in_tensors_[1], &weight_ptr_);
|
||||
if (weight.values == nullptr || weight_ptr_ == nullptr) {
|
||||
MS_LOG(ERROR) << "TransposeWeight2D input weight failed for " << op_name_;
|
||||
return nullptr;
|
||||
}
|
||||
} else {
|
||||
weight = ConvertWeight(in_tensors_[1]);
|
||||
}
|
||||
|
||||
int output_cnt = in_tensors_[kBiasIndex].Shape()[0];
|
||||
|
||||
auto fc_layer = ctx->network()->addFullyConnected(*input_a, output_cnt, weight, bias);
|
||||
if (fc_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "add fully connected layer failed for " << op_name_;
|
||||
return nullptr;
|
||||
}
|
||||
this->layer_ = fc_layer;
|
||||
fc_layer->setName((op_name_ + "_fullyconnected").c_str());
|
||||
nvinfer1::ITensor *out_tensor = fc_layer->getOutput(0);
|
||||
if (out_tensor->getDimensions().nbDims != out_tensors_[0].Shape().size()) {
|
||||
std::vector<int64_t> out_dims(out_tensors_[0].Shape());
|
||||
out_dims[0] = out_tensor->getDimensions().d[0];
|
||||
out_tensor = Reshape(ctx, out_tensor, out_dims);
|
||||
}
|
||||
return out_tensor;
|
||||
}
|
||||
nvinfer1::ITensor *MatMulTensorRT::AddAsOptPlugin(TensorRTContext *ctx) {
|
||||
nvinfer1::ITensor *weight_tensor = nullptr;
|
||||
if (tensorrt_in_tensors_.size() >= INPUT_SIZE2) {
|
||||
weight_tensor = tensorrt_in_tensors_[1].trt_tensor_;
|
||||
} else {
|
||||
weight_tensor = ConvertConstantTensor(ctx, in_tensors_[1], op_name_);
|
||||
}
|
||||
|
||||
auto plugin = std::make_shared<MatmulOptPlugin>(op_name_, transpose_a_, transpose_b_, device_id_);
|
||||
if (plugin == nullptr) {
|
||||
MS_LOG(ERROR) << "create MatmulOptPlugin failed for " << op_name_;
|
||||
return nullptr;
|
||||
}
|
||||
nvinfer1::ITensor *inputTensors[] = {tensorrt_in_tensors_[0].trt_tensor_, weight_tensor};
|
||||
nvinfer1::IPluginV2Layer *matmul_layer = ctx->network()->addPluginV2(inputTensors, INPUT_SIZE2, *plugin);
|
||||
if (matmul_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "add matmul opt plugin layer failed for " << op_name_;
|
||||
return nullptr;
|
||||
}
|
||||
layer_ = matmul_layer;
|
||||
return AddBias(ctx, matmul_layer->getOutput(0));
|
||||
}
|
||||
nvinfer1::ITensor *MatMulTensorRT::AddBias(TensorRTContext *ctx, nvinfer1::ITensor *input_tensor) {
|
||||
nvinfer1::ITensor *out_tensor = input_tensor;
|
||||
if (in_tensors_.size() == kBiasIndex + 1) {
|
||||
nvinfer1::ITensor *bias = nullptr;
|
||||
if (in_tensors_[kBiasIndex].Shape().size() < static_cast<size_t>(out_tensor->getDimensions().nbDims)) {
|
||||
std::vector<int64_t> expect_dims(out_tensors_[0].Shape());
|
||||
expect_dims[0] = out_tensor->getDimensions().d[0];
|
||||
bias = ConvertTensorWithExpandDims(ctx, in_tensors_[kBiasIndex], expect_dims, op_name_);
|
||||
} else if (in_tensors_[kBiasIndex].Shape().size() == static_cast<size_t>(out_tensor->getDimensions().nbDims)) {
|
||||
bias = ConvertConstantTensor(ctx, in_tensors_[kBiasIndex], op_name_);
|
||||
} else {
|
||||
MS_LOG(ERROR) << "input tensor shape is invalid for " << op_name_;
|
||||
return nullptr;
|
||||
}
|
||||
if (bias == nullptr) {
|
||||
MS_LOG(ERROR) << "create constant bias tensor failed for " << op_name_;
|
||||
return nullptr;
|
||||
}
|
||||
auto bias_layer = ctx->network()->addElementWise(*out_tensor, *bias, nvinfer1::ElementWiseOperation::kSUM);
|
||||
if (bias_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "add bias add layer failed for " << op_name_;
|
||||
return nullptr;
|
||||
}
|
||||
auto bias_layer_name = op_name_ + "_bias";
|
||||
bias_layer->setName(bias_layer_name.c_str());
|
||||
out_tensor = bias_layer->getOutput(0);
|
||||
}
|
||||
return out_tensor;
|
||||
}
|
||||
|
||||
bool MatMulTensorRT::RunOptPlugin() {
|
||||
if (quant_type_ == schema::QuantType_QUANT_NONE &&
|
||||
runtime_->GetRuntimePrecisionMode() == RuntimePrecisionMode::RuntimePrecisionMode_FP32) {
|
||||
if (in_tensors_[0].Shape().size() == DIMENSION_2D && in_tensors_[1].Shape().size() == DIMENSION_2D &&
|
||||
in_tensors_[0].Shape()[0] > 1 && tensorrt_in_tensors_[0].trt_tensor_->getDimensions().d[0] == -1) {
|
||||
MS_LOG(INFO) << op_name_ << " uses optimize matmul plugin for 2D dynamic batchsize";
|
||||
return true;
|
||||
} else if (in_tensors_[0].Shape().size() == DIMENSION_3D && in_tensors_[1].Shape().size() == DIMENSION_3D) {
|
||||
// batched matmul using opt
|
||||
MS_LOG(INFO) << op_name_ << " uses optimize matmul plugin for 3D batchsized";
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
bool MatMulTensorRT::RunFullConnect() {
|
||||
if (in_tensors_.size() == INPUT_SIZE3 && in_tensors_[1].Data() != nullptr &&
|
||||
in_tensors_[kBiasIndex].Data() != nullptr && !transpose_a_ && in_tensors_[1].Shape().size() == DIMENSION_2D &&
|
||||
(in_tensors_[0].Shape().size() == DIMENSION_2D || in_tensors_[0].Shape().size() == DIMENSION_4D)) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_MatMulFusion, MatMulTensorRT)
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,62 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_MATMUL_TENSORRT_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_MATMUL_TENSORRT_H_
|
||||
#include <utility>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
class MatMulTensorRT : public TensorRTOp {
|
||||
public:
|
||||
MatMulTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
|
||||
const schema::QuantType &quant_type)
|
||||
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
|
||||
|
||||
~MatMulTensorRT() override;
|
||||
|
||||
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) override;
|
||||
|
||||
int AddInnerOp(TensorRTContext *ctx) override;
|
||||
|
||||
private:
|
||||
int PreprocessMatMulInputs(TensorRTContext *ctx, ITensorHelper *matmul_a, ITensorHelper *matmul_b);
|
||||
|
||||
nvinfer1::ITensor *ProcessWeightTensor(TensorRTContext *ctx);
|
||||
|
||||
nvinfer1::ITensor *AddAsMatmul(TensorRTContext *ctx);
|
||||
|
||||
nvinfer1::ITensor *AddAsFullConnect(TensorRTContext *ctx);
|
||||
|
||||
nvinfer1::ITensor *AddAsOptPlugin(TensorRTContext *ctx);
|
||||
|
||||
nvinfer1::ITensor *AddBias(TensorRTContext *ctx, nvinfer1::ITensor *input_tensor);
|
||||
|
||||
bool RunOptPlugin();
|
||||
bool RunFullConnect();
|
||||
|
||||
bool transpose_a_{false};
|
||||
bool transpose_b_{false};
|
||||
Format out_format_{Format::NHWC};
|
||||
schema::ActivationType activation_{schema::ActivationType::ActivationType_NO_ACTIVATION};
|
||||
void *weight_ptr_{nullptr};
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_MATMUL_TENSORRT_H_
|
|
@ -0,0 +1,59 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/delegate/tensorrt/op/normalize_opt_plugin.h"
|
||||
#include <cuda_runtime.h>
|
||||
#include <numeric>
|
||||
#include <memory>
|
||||
#include <functional>
|
||||
#include "src/runtime/delegate/tensorrt/cuda_impl/cuda_helper.h"
|
||||
#include "NvInferRuntimeCommon.h"
|
||||
#include "src/runtime/delegate/tensorrt/cuda_impl/normalize.cuh"
|
||||
|
||||
namespace mindspore::lite {
|
||||
REGISTER_TENSORRT_PLUGIN(NormalizeOptPluginCreater);
|
||||
template class TensorRTPluginCreater<NormalizeOptPlugin>;
|
||||
template <class T>
|
||||
nvinfer1::PluginFieldCollection TensorRTPluginCreater<T>::field_collection_{};
|
||||
template <class T>
|
||||
std::vector<nvinfer1::PluginField> TensorRTPluginCreater<T>::fields_;
|
||||
|
||||
int NormalizeOptPlugin::enqueue(const nvinfer1::PluginTensorDesc *inputDesc,
|
||||
const nvinfer1::PluginTensorDesc *outputDesc, const void *const *inputs,
|
||||
void *const *outputs, void *workspace, cudaStream_t stream) noexcept {
|
||||
auto input = static_cast<const float *>(inputs[0]);
|
||||
auto gamma = static_cast<const float *>(inputs[1]);
|
||||
auto beta = static_cast<const float *>(inputs[2]);
|
||||
auto output = static_cast<float *>(outputs[0]);
|
||||
auto input_dims = inputDesc[0].dims;
|
||||
size_t dim_at_axis = input_dims.d[axis_];
|
||||
int element_cnt = std::accumulate(input_dims.d, input_dims.d + input_dims.nbDims, 1, std::multiplies<int64_t>());
|
||||
Normalize(input, gamma, beta, output, dim_at_axis, epsilion_, element_cnt, stream);
|
||||
}
|
||||
|
||||
nvinfer1::IPluginV2DynamicExt *NormalizeOptPlugin::clone() const noexcept {
|
||||
auto *plugin = new NormalizeOptPlugin(*this);
|
||||
plugin->setPluginNamespace(name_space_.c_str());
|
||||
return plugin;
|
||||
}
|
||||
|
||||
size_t NormalizeOptPlugin::getSerializationSize() const noexcept { return sizeof(size_t) + sizeof(float); }
|
||||
|
||||
void NormalizeOptPlugin::serialize(void *buffer) const noexcept {
|
||||
SerializeValue(&buffer, &axis_, sizeof(size_t));
|
||||
SerializeValue(&buffer, &epsilion_, sizeof(float));
|
||||
}
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,61 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_NORMALIZE_OPT_PLUGIN_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_NORMALIZE_OPT_PLUGIN_H_
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
|
||||
#include "src/runtime/delegate/tensorrt/op/tensorrt_plugin.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
constexpr char *NORMALIZE_OPT_PLUGIN_NAME{"NormalizeOptPlugin"};
|
||||
class NormalizeOptPlugin : public TensorRTPlugin {
|
||||
public:
|
||||
NormalizeOptPlugin(const std::string name, size_t axis, float epsilion, uint32_t device_id)
|
||||
: TensorRTPlugin(name, std::string(NORMALIZE_OPT_PLUGIN_NAME), device_id), axis_(axis), epsilion_(epsilion) {}
|
||||
|
||||
NormalizeOptPlugin(const char *name, const nvinfer1::PluginFieldCollection *fc)
|
||||
: TensorRTPlugin(std::string(name), std::string(NORMALIZE_OPT_PLUGIN_NAME)) {
|
||||
const nvinfer1::PluginField *fields = fc->fields;
|
||||
axis_ = static_cast<const size_t *>(fields[0].data)[0];
|
||||
epsilion_ = static_cast<const float *>(fields[1].data)[0];
|
||||
}
|
||||
|
||||
NormalizeOptPlugin(const char *name, const void *serialData, size_t serialLength)
|
||||
: TensorRTPlugin(std::string(name), std::string(NORMALIZE_OPT_PLUGIN_NAME)) {
|
||||
DeserializeValue(&serialData, &serialLength, &axis_, sizeof(size_t));
|
||||
DeserializeValue(&serialData, &serialLength, &epsilion_, sizeof(float));
|
||||
}
|
||||
|
||||
NormalizeOptPlugin() = delete;
|
||||
|
||||
// IPluginV2DynamicExt Methods
|
||||
nvinfer1::IPluginV2DynamicExt *clone() const noexcept override;
|
||||
int enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc,
|
||||
const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept override;
|
||||
size_t getSerializationSize() const noexcept override;
|
||||
void serialize(void *buffer) const noexcept override;
|
||||
|
||||
private:
|
||||
size_t axis_{0};
|
||||
float epsilion_{0.0f};
|
||||
};
|
||||
class NormalizeOptPluginCreater : public TensorRTPluginCreater<NormalizeOptPlugin> {
|
||||
public:
|
||||
NormalizeOptPluginCreater() : TensorRTPluginCreater(std::string(NORMALIZE_OPT_PLUGIN_NAME)) {}
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_OP_MATMUL_OPT_PLUGIN_H_
|
|
@ -0,0 +1,178 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/delegate/tensorrt/op/normalize_tensorrt.h"
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <numeric>
|
||||
#include "src/runtime/delegate/tensorrt/op/normalize_opt_plugin.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
int NormalizeTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) {
|
||||
if (!IsShapeKnown()) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors.size() != INPUT_SIZE3 && in_tensors.size() != 1) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (out_tensors.size() != INPUT_SIZE3 && out_tensors.size() != 1) {
|
||||
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << in_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto norm_op = primitive->value_as_LayerNormFusion();
|
||||
CHECK_NULL_RETURN(norm_op);
|
||||
int being_norm_axis = norm_op->begin_norm_axis();
|
||||
being_norm_axis = being_norm_axis >= 0 ? being_norm_axis : in_tensors[0].Shape().size() + being_norm_axis;
|
||||
int begin_params_axis = norm_op->begin_params_axis();
|
||||
begin_params_axis = begin_params_axis >= 0 ? begin_params_axis : in_tensors[0].Shape().size() + begin_params_axis;
|
||||
if (begin_params_axis != being_norm_axis || begin_params_axis != in_tensors[0].Shape().size() - 1) {
|
||||
MS_LOG(ERROR) << "only support normalize on last one dim, being_norm_axis is " << being_norm_axis << " for "
|
||||
<< op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
axis_ = begin_params_axis;
|
||||
epsilon_ = norm_op->epsilon();
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int NormalizeTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
||||
CHECK_NULL_RETURN(ctx->network());
|
||||
int ret = PreprocessInputs(ctx);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "preprocess input failed for " << op_name_;
|
||||
return ret;
|
||||
}
|
||||
return RunOptPlugin() ? RunAsOptPlugin(ctx) : RunAsTrtOps(ctx);
|
||||
}
|
||||
|
||||
int NormalizeTensorRT::PreprocessInputs(TensorRTContext *ctx) {
|
||||
int ret = PreprocessInputs2SameDim(ctx, tensorrt_in_tensors_[0], &norm_input_);
|
||||
if (ret != RET_OK || norm_input_.trt_tensor_ == nullptr) {
|
||||
MS_LOG(ERROR) << "PreprocessInputs2SameDim norm_input failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors_.size() == BETA_INDEX + 1) {
|
||||
gamma_ = ConvertTensorWithExpandDims(ctx, in_tensors_[1], in_tensors_[0].Shape(), op_name_ + in_tensors_[1].Name());
|
||||
CHECK_NULL_RETURN(gamma_);
|
||||
beta_ = ConvertTensorWithExpandDims(ctx, in_tensors_[BETA_INDEX], in_tensors_[0].Shape(),
|
||||
op_name_ + in_tensors_[BETA_INDEX].Name());
|
||||
CHECK_NULL_RETURN(beta_);
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int NormalizeTensorRT::RunAsOptPlugin(TensorRTContext *ctx) {
|
||||
auto plugin = std::make_shared<NormalizeOptPlugin>(op_name_, axis_, epsilon_, device_id_);
|
||||
if (plugin == nullptr) {
|
||||
MS_LOG(ERROR) << "create NormalizeOptPlugin failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
nvinfer1::ITensor *inputTensors[] = {norm_input_.trt_tensor_, gamma_, beta_};
|
||||
nvinfer1::IPluginV2Layer *norm_layer = ctx->network()->addPluginV2(inputTensors, INPUT_SIZE3, *plugin);
|
||||
if (norm_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "add norm opt plugin layer failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
layer_ = norm_layer;
|
||||
layer_->setName(op_name_.c_str());
|
||||
AddInnerOutTensors(ITensorHelper{norm_layer->getOutput(0), norm_input_.format_, norm_input_.same_format_});
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int NormalizeTensorRT::RunAsTrtOps(TensorRTContext *ctx) {
|
||||
size_t axis = 1u << axis_;
|
||||
// first output, add later
|
||||
AddInnerOutTensors(ITensorHelper{nullptr, norm_input_.format_, norm_input_.same_format_});
|
||||
|
||||
// mean
|
||||
auto mean =
|
||||
ctx->network()->addReduce(*(norm_input_.trt_tensor_), nvinfer1::ReduceOperation::kAVG, axis, true)->getOutput(0);
|
||||
CHECK_NULL_RETURN(mean);
|
||||
if (out_tensors_.size() == INPUT_SIZE3) {
|
||||
AddInnerOutTensors(ITensorHelper{mean, norm_input_.format_, norm_input_.same_format_});
|
||||
}
|
||||
// x - mean
|
||||
auto sub_mean = ctx->network()
|
||||
->addElementWise(*(norm_input_.trt_tensor_), *mean, nvinfer1::ElementWiseOperation::kSUB)
|
||||
->getOutput(0);
|
||||
CHECK_NULL_RETURN(sub_mean);
|
||||
// (x - mean)^2
|
||||
auto const_two =
|
||||
ConvertScalarToITensor(ctx, in_tensors_[0].Shape().size(), &two_, DataType::kNumberTypeFloat32, op_name_ + "_two");
|
||||
CHECK_NULL_RETURN(const_two);
|
||||
auto pow = ctx->network()->addElementWise(*sub_mean, *const_two, nvinfer1::ElementWiseOperation::kPOW)->getOutput(0);
|
||||
CHECK_NULL_RETURN(pow);
|
||||
// mean of (x - mean)^2
|
||||
auto var = ctx->network()->addReduce(*pow, nvinfer1::ReduceOperation::kAVG, axis, true)->getOutput(0);
|
||||
CHECK_NULL_RETURN(var);
|
||||
if (out_tensors_.size() == INPUT_SIZE3) {
|
||||
AddInnerOutTensors(ITensorHelper{var, norm_input_.format_, norm_input_.same_format_});
|
||||
}
|
||||
|
||||
// var + min epsilon
|
||||
auto const_epsilon = ConvertScalarToITensor(ctx, in_tensors_[0].Shape().size(), &epsilon_,
|
||||
DataType::kNumberTypeFloat32, op_name_ + "_epsilion");
|
||||
CHECK_NULL_RETURN(const_epsilon);
|
||||
auto var_epsilon =
|
||||
ctx->network()->addElementWise(*var, *const_epsilon, nvinfer1::ElementWiseOperation::kSUM)->getOutput(0);
|
||||
CHECK_NULL_RETURN(var_epsilon);
|
||||
|
||||
// standard deviation
|
||||
auto std_dev = ctx->network()->addUnary(*var_epsilon, nvinfer1::UnaryOperation::kSQRT)->getOutput(0);
|
||||
CHECK_NULL_RETURN(std_dev);
|
||||
|
||||
// sub_mean / std_dev
|
||||
auto norm_layer = ctx->network()->addElementWise(*sub_mean, *std_dev, nvinfer1::ElementWiseOperation::kDIV);
|
||||
CHECK_NULL_RETURN(norm_layer);
|
||||
this->layer_ = norm_layer;
|
||||
auto norm = norm_layer->getOutput(0);
|
||||
CHECK_NULL_RETURN(norm);
|
||||
|
||||
// scale with gamma and beta
|
||||
if (gamma_ != nullptr && beta_ != nullptr) {
|
||||
auto gamma_out =
|
||||
ctx->network()->addElementWise(*norm, *gamma_, nvinfer1::ElementWiseOperation::kPROD)->getOutput(0);
|
||||
CHECK_NULL_RETURN(gamma_out);
|
||||
auto beta_out =
|
||||
ctx->network()->addElementWise(*gamma_out, *beta_, nvinfer1::ElementWiseOperation::kSUM)->getOutput(0);
|
||||
CHECK_NULL_RETURN(beta_out);
|
||||
tensorrt_out_tensors_[0].trt_tensor_ = beta_out;
|
||||
} else {
|
||||
tensorrt_out_tensors_[0].trt_tensor_ = norm;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
bool NormalizeTensorRT::RunOptPlugin() {
|
||||
if (out_tensors_.size() == 1 && in_tensors_.size() == INPUT_SIZE3 && axis_ == in_tensors_[0].Shape().size() - 1 &&
|
||||
in_tensors_[0].Shape()[axis_] < GET_THREADS) {
|
||||
// insufficient shared memory
|
||||
int dim_sum = std::accumulate(in_tensors_[0].Shape().begin(), in_tensors_[0].Shape().begin() + axis_, 1,
|
||||
std::multiplies<int>());
|
||||
const int kSharedMemoryThreshold = 2048;
|
||||
if (dim_sum > kSharedMemoryThreshold) {
|
||||
return false;
|
||||
}
|
||||
MS_LOG(INFO) << op_name_ << " use opt plugin";
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_LayerNormFusion, NormalizeTensorRT)
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,56 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_NORMALIZE_TENSORRT_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_NORMALIZE_TENSORRT_H_
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
constexpr int BETA_INDEX = 2;
|
||||
|
||||
class NormalizeTensorRT : public TensorRTOp {
|
||||
public:
|
||||
NormalizeTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
|
||||
const schema::QuantType &quant_type)
|
||||
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
|
||||
|
||||
~NormalizeTensorRT() override = default;
|
||||
|
||||
int AddInnerOp(TensorRTContext *ctx) override;
|
||||
|
||||
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) override;
|
||||
|
||||
private:
|
||||
int PreprocessInputs(TensorRTContext *ctx);
|
||||
|
||||
int RunAsOptPlugin(TensorRTContext *ctx);
|
||||
|
||||
int RunAsTrtOps(TensorRTContext *ctx);
|
||||
|
||||
bool RunOptPlugin();
|
||||
|
||||
ITensorHelper norm_input_;
|
||||
nvinfer1::ITensor *gamma_{nullptr};
|
||||
nvinfer1::ITensor *beta_{nullptr};
|
||||
size_t axis_{0};
|
||||
const float two_{2.0f};
|
||||
float epsilon_{0.0f};
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_NORMALIZE_TENSORRT_H_
|
|
@ -0,0 +1,140 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <numeric>
|
||||
#include <functional>
|
||||
#include "src/runtime/delegate/tensorrt/op/pad_tensorrt.h"
|
||||
#include "src/runtime/delegate/tensorrt/tensorrt_utils.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
int PadTensorRT::IsSupport(const mindspore::schema::Primitive *primitive,
|
||||
const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) {
|
||||
if (!IsShapeKnown()) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors.size() != INPUT_SIZE2 && in_tensors.size() != INPUT_SIZE3) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (out_tensors.size() != 1) {
|
||||
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors_[1].Data() == nullptr) {
|
||||
MS_LOG(ERROR) << "invalid pad tensor for: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto pad_primitive = this->GetPrimitive()->value_as_PadFusion();
|
||||
if (pad_primitive == nullptr) {
|
||||
MS_LOG(ERROR) << "convert PadFusion failed: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
schema::PaddingMode padding_mode = pad_primitive->padding_mode();
|
||||
if (padding_mode != schema::PaddingMode::PaddingMode_CONSTANT) {
|
||||
MS_LOG(ERROR) << "Unsupported padding mode: " << schema::PaddingMode(padding_mode) << ", for op: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors[0].format() != Format::NHWC && in_tensors[0].format() != Format::NCHW) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor format of " << in_tensors[0].format();
|
||||
return RET_ERROR;
|
||||
}
|
||||
constant_value_ = pad_primitive->constant_value();
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int PadTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
||||
mindspore::MSTensor &pad_tensor = in_tensors_[1];
|
||||
int element_cnt = std::accumulate(pad_tensor.Shape().begin(), pad_tensor.Shape().end(), 1, std::multiplies<int>());
|
||||
if (element_cnt != tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims * INPUT_SIZE2) {
|
||||
MS_LOG(ERROR) << "pad tensor cnt is invalid. cnt: " << element_cnt
|
||||
<< ", input tensor dims cnt: " << tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
nvinfer1::ITensor *pad_input = tensorrt_in_tensors_[0].trt_tensor_;
|
||||
MS_LOG(DEBUG) << "before transpose "
|
||||
<< GetTensorFormat(pad_input, tensorrt_in_tensors_[0].format_, tensorrt_in_tensors_[0].same_format_);
|
||||
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D &&
|
||||
tensorrt_in_tensors_[0].format_ == Format::NHWC) {
|
||||
// transpose: NHWC->NCHW
|
||||
nvinfer1::IShuffleLayer *transpose_layer_in = NHWC2NCHW(ctx, *tensorrt_in_tensors_[0].trt_tensor_);
|
||||
if (transpose_layer_in == nullptr) {
|
||||
MS_LOG(ERROR) << "transpose: NHWC->NCHW failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
transpose_layer_in->setName((op_name_ + "_transpose2NCHW").c_str());
|
||||
this->transpose_layer_ = transpose_layer_in;
|
||||
pad_input = transpose_layer_in->getOutput(0);
|
||||
MS_LOG(DEBUG) << "after transpose " << GetTensorFormat(pad_input, Format::NCHW, false);
|
||||
}
|
||||
|
||||
// trt 6 only support 2D padding
|
||||
const int *padding_data = reinterpret_cast<const int *>(in_tensors_[1].Data().get());
|
||||
MS_ASSERT(padding_data);
|
||||
nvinfer1::IPaddingLayer *padding_layer = nullptr;
|
||||
if (element_cnt == index_NHWC_ * INPUT_SIZE2) {
|
||||
// only support pad at HW index
|
||||
int h_pre;
|
||||
int h_post;
|
||||
int w_pre;
|
||||
int w_post;
|
||||
if (SameDims(pad_input->getDimensions(), in_tensors_[0].Shape())) {
|
||||
// NCHW: 0: N_pre, 1: N_post, 2: C_pre, 3: C_post, 4: H_pre, 5: H_post, 6: W_pre, 7: W_post
|
||||
if (*padding_data != 0 || *(padding_data + 1) != 0 || *(padding_data + 2) != 0 || *(padding_data + 3) != 0) {
|
||||
MS_LOG(WARNING) << "tensorrt padding only support pad at HW index, unsupported padding value of: " << op_name_;
|
||||
}
|
||||
h_pre = 4;
|
||||
h_post = 5;
|
||||
w_pre = 6;
|
||||
w_post = 7;
|
||||
} else {
|
||||
// NHWC: 0: N_pre, 1: N_post, 2: H_pre, 3: H_post, 4: W_pre, 5: W_post, 6: C_pre, 7: C_post
|
||||
if (*padding_data != 0 || *(padding_data + 1) != 0 || *(padding_data + 6) != 0 || *(padding_data + 7) != 0) {
|
||||
MS_LOG(WARNING) << "tensorrt padding only support pad at HW index, unsupported padding value of: " << op_name_;
|
||||
}
|
||||
h_pre = 2;
|
||||
h_post = 3;
|
||||
w_pre = 4;
|
||||
w_post = 5;
|
||||
}
|
||||
nvinfer1::DimsHW prePadding{*(padding_data + h_pre), *(padding_data + w_pre)};
|
||||
nvinfer1::DimsHW postPadding{*(padding_data + h_post), *(padding_data + w_post)};
|
||||
MS_LOG(DEBUG) << op_name_ << " prePadding: " << prePadding.d[0] << ", " << prePadding.d[1]
|
||||
<< "; postPadding: " << postPadding.d[0] << ", " << postPadding.d[1];
|
||||
|
||||
padding_layer = ctx->network()->addPadding(*pad_input, prePadding, postPadding);
|
||||
} else {
|
||||
MS_LOG(ERROR) << "need check for pad_tensor dims: " << op_name_
|
||||
<< ", pad_tensor ElementNum: " << pad_tensor.ElementNum();
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (padding_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "add padding layer failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
this->layer_ = padding_layer;
|
||||
padding_layer->setName(op_name_.c_str());
|
||||
padding_layer->getOutput(0)->setName((op_name_ + "_output").c_str());
|
||||
bool same_format = SameDims(padding_layer->getOutput(0)->getDimensions(), out_tensors_[0].Shape()) &&
|
||||
SameDims(tensorrt_in_tensors_[0].trt_tensor_->getDimensions(), in_tensors_[0].Shape());
|
||||
this->AddInnerOutTensors(ITensorHelper{padding_layer->getOutput(0), Format::NCHW, same_format});
|
||||
MS_LOG(DEBUG) << "after transpose " << GetTensorFormat(tensorrt_out_tensors_[0]);
|
||||
return RET_OK;
|
||||
}
|
||||
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_PadFusion, PadTensorRT)
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,42 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_PAD_TENSORRT_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_PAD_TENSORRT_H_
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
class PadTensorRT : public TensorRTOp {
|
||||
public:
|
||||
PadTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
|
||||
const schema::QuantType &quant_type)
|
||||
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
|
||||
|
||||
~PadTensorRT() override = default;
|
||||
|
||||
int AddInnerOp(TensorRTContext *ctx) override;
|
||||
|
||||
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) override;
|
||||
|
||||
private:
|
||||
const int index_NHWC_ = 4;
|
||||
float constant_value_ = 0.0f;
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_PAD_TENSORRT_H_
|
|
@ -0,0 +1,220 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/delegate/tensorrt/op/pool_tensorrt.h"
|
||||
#include "src/runtime/delegate/tensorrt/op/activation_tensorrt.h"
|
||||
#include "src/runtime/delegate/tensorrt/tensorrt_utils.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
int PoolTensorRT::IsSupport(const mindspore::schema::Primitive *primitive,
|
||||
const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) {
|
||||
if (!IsShapeKnown()) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors.size() != 1) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (out_tensors.size() != 1) {
|
||||
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors[0].format() != Format::NHWC && in_tensors[0].format() != Format::NCHW) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor format of " << in_tensors[0].format();
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int PoolTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
||||
if (tensorrt_in_tensors_.size() != 1) {
|
||||
MS_LOG(ERROR) << "invalid input tensor size: " << tensorrt_in_tensors_.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
MS_LOG(DEBUG) << "before transpose " << GetTensorFormat(tensorrt_in_tensors_[0]);
|
||||
int ret = ParseParams();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ParseParams failed for : " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
nvinfer1::ITensor *pool_input = tensorrt_in_tensors_[0].trt_tensor_;
|
||||
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D &&
|
||||
tensorrt_in_tensors_[0].format_ == Format::NHWC) {
|
||||
// transpose: NHWC->NCHW
|
||||
nvinfer1::IShuffleLayer *transpose_layer_in = NHWC2NCHW(ctx, *tensorrt_in_tensors_[0].trt_tensor_);
|
||||
if (transpose_layer_in == nullptr) {
|
||||
MS_LOG(ERROR) << "transpose: NHWC->NCHW failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
transpose_layer_in->setName((op_name_ + "_transpose2NCHW").c_str());
|
||||
this->transpose_layer_ = transpose_layer_in;
|
||||
pool_input = transpose_layer_in->getOutput(0);
|
||||
}
|
||||
|
||||
// pooling layer
|
||||
nvinfer1::Dims windowSize = lite::ConvertCudaDims(kernel_size_);
|
||||
if (windowSize.nbDims == -1) {
|
||||
MS_LOG(ERROR) << "ConvertCudaDims failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
nvinfer1::IPoolingLayer *pooling_layer = ctx->network()->addPoolingNd(*pool_input, pooling_type_, windowSize);
|
||||
if (pooling_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "addPoolingNd failed for TensorRT.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
AddParams(pooling_layer);
|
||||
pooling_layer->setName(op_name_.c_str());
|
||||
this->layer_ = pooling_layer;
|
||||
|
||||
// add activation
|
||||
nvinfer1::ILayer *activation_layer = nullptr;
|
||||
if (activation_type_ == schema::ActivationType::ActivationType_NO_ACTIVATION) {
|
||||
activation_layer = pooling_layer;
|
||||
} else {
|
||||
activation_layer =
|
||||
ActivationTensorRT::AddActivation(ctx, activation_type_, 0, 0, 0, pooling_layer->getOutput(0), device_id_);
|
||||
if (activation_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "addActivation for pool failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
activation_layer->setName((op_name_ + "_activation").c_str());
|
||||
}
|
||||
nvinfer1::ITensor *out_trt_tensor = activation_layer->getOutput(0);
|
||||
out_trt_tensor->setName((op_name_ + "_output").c_str());
|
||||
this->AddInnerOutTensors(ITensorHelper{out_trt_tensor, Format::NCHW, false});
|
||||
MS_LOG(DEBUG) << "output " << GetTensorFormat(tensorrt_out_tensors_[0]);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int PoolTensorRT::ParseParams() {
|
||||
int in_h = in_tensors_[0].Shape()[kNHWC_H];
|
||||
int in_w = in_tensors_[0].Shape()[kNHWC_W];
|
||||
int out_h = out_tensors_[0].Shape()[kNHWC_H];
|
||||
int out_w = out_tensors_[0].Shape()[kNHWC_W];
|
||||
int kernel_h;
|
||||
int kernel_w;
|
||||
switch (type_) {
|
||||
case (schema::PrimitiveType_AvgPoolFusion): {
|
||||
const schema::AvgPoolFusion *pool_primitive = this->GetPrimitive()->value_as_AvgPoolFusion();
|
||||
if (pool_primitive == nullptr) {
|
||||
MS_LOG(ERROR) << "convert PoolFusion failed: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
pooling_type_ = nvinfer1::PoolingType::kAVERAGE;
|
||||
|
||||
auto stride = pool_primitive->strides();
|
||||
if (stride == nullptr) {
|
||||
MS_LOG(ERROR) << "get stride failed: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
stride_ = std::vector<int64_t>(stride->begin(), stride->end());
|
||||
kernel_h = in_h - (out_h - 1) * stride_[0];
|
||||
kernel_w = in_w - (out_w - 1) * stride_[1];
|
||||
auto kernel_size = pool_primitive->kernel_size();
|
||||
if (kernel_size == nullptr) {
|
||||
kernel_size_.push_back(kernel_h);
|
||||
kernel_size_.push_back(kernel_w);
|
||||
MS_LOG(WARNING) << op_name_ << "don't has kernel size, calculate kernel size on ms tensor, kernel_h is "
|
||||
<< kernel_h << ", kernel_w is " << kernel_w;
|
||||
} else {
|
||||
kernel_size_ = std::vector<int64_t>(kernel_size->begin(), kernel_size->end());
|
||||
}
|
||||
auto padding = pool_primitive->pad();
|
||||
if (padding != nullptr && padding->size() != DIMENSION_4D) {
|
||||
MS_LOG(ERROR) << op_name_ << "has invalid pad dims: " << padding->size();
|
||||
return RET_ERROR;
|
||||
} else if (padding == nullptr || padding->size() == 0) {
|
||||
padding_ = std::vector<int64_t>(DIMENSION_4D, 0);
|
||||
} else {
|
||||
padding_ = std::vector<int64_t>(padding->begin(), padding->end());
|
||||
}
|
||||
|
||||
pad_mode_ = pool_primitive->pad_mode();
|
||||
activation_type_ = pool_primitive->activation_type();
|
||||
break;
|
||||
}
|
||||
case (schema::PrimitiveType_MaxPoolFusion): {
|
||||
const schema::MaxPoolFusion *pool_primitive = this->GetPrimitive()->value_as_MaxPoolFusion();
|
||||
if (pool_primitive == nullptr) {
|
||||
MS_LOG(ERROR) << "convert PoolFusion failed: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
pooling_type_ = nvinfer1::PoolingType::kMAX;
|
||||
|
||||
auto kernel_size = pool_primitive->kernel_size();
|
||||
if (kernel_size == nullptr) {
|
||||
MS_LOG(ERROR) << "get kernel size failed: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
kernel_size_ = std::vector<int64_t>(kernel_size->begin(), kernel_size->end());
|
||||
|
||||
auto stride = pool_primitive->strides();
|
||||
if (stride == nullptr) {
|
||||
MS_LOG(ERROR) << "get stride failed: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
stride_ = std::vector<int64_t>(stride->begin(), stride->end());
|
||||
kernel_h = in_h - (out_h - 1) * stride_[0];
|
||||
kernel_w = in_w - (out_w - 1) * stride_[1];
|
||||
auto padding = pool_primitive->pad();
|
||||
if (padding == nullptr) {
|
||||
MS_LOG(INFO) << "get padding is null, set to default 0: " << op_name_;
|
||||
padding_ = {0, 0, 0, 0};
|
||||
} else {
|
||||
padding_ = std::vector<int64_t>(padding->begin(), padding->end());
|
||||
}
|
||||
|
||||
pad_mode_ = pool_primitive->pad_mode();
|
||||
activation_type_ = pool_primitive->activation_type();
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
MS_LOG(ERROR) << "unsupported primitive type of " << type_ << " for node: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
// some model kernel size is large than hw, correct it
|
||||
if (kernel_size_[0] > in_h || kernel_size_[1] > in_w) {
|
||||
MS_LOG(WARNING) << op_name_ << " kernel size is larger than input size";
|
||||
kernel_size_[0] = kernel_size_[0] > kernel_h ? kernel_h : kernel_size_[0];
|
||||
kernel_size_[1] = kernel_size_[1] > kernel_w ? kernel_w : kernel_size_[1];
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
void PoolTensorRT::AddParams(nvinfer1::IPoolingLayer *pooling_layer) {
|
||||
nvinfer1::Dims stride_dims = ConvertCudaDims(stride_);
|
||||
if (stride_dims.nbDims == -1) {
|
||||
MS_LOG(ERROR) << "ConvertCudaDims failed for " << op_name_;
|
||||
return;
|
||||
}
|
||||
pooling_layer->setStrideNd(stride_dims);
|
||||
if (pad_mode_ == schema::PadMode::PadMode_SAME) {
|
||||
pooling_layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
|
||||
} else {
|
||||
nvinfer1::Dims dims{};
|
||||
dims.nbDims = DIMENSION_2D;
|
||||
dims.d[0] = padding_[0];
|
||||
dims.d[1] = padding_[DIMENSION_2D];
|
||||
pooling_layer->setPaddingNd(dims);
|
||||
}
|
||||
}
|
||||
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_AvgPoolFusion, PoolTensorRT)
|
||||
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_MaxPoolFusion, PoolTensorRT)
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,55 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_POOL_TENSORRT_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_POOL_TENSORRT_H_
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
class PoolTensorRT : public TensorRTOp {
|
||||
public:
|
||||
PoolTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
|
||||
const schema::QuantType &quant_type)
|
||||
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
|
||||
|
||||
~PoolTensorRT() override = default;
|
||||
|
||||
int AddInnerOp(TensorRTContext *ctx) override;
|
||||
|
||||
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) override;
|
||||
|
||||
private:
|
||||
int ParseParams();
|
||||
|
||||
void AddParams(nvinfer1::IPoolingLayer *pooling_layer);
|
||||
|
||||
std::vector<int64_t> kernel_size_;
|
||||
|
||||
std::vector<int64_t> stride_;
|
||||
|
||||
std::vector<int64_t> padding_;
|
||||
|
||||
nvinfer1::PoolingType pooling_type_;
|
||||
|
||||
schema::PadMode pad_mode_;
|
||||
|
||||
schema::ActivationType activation_type_;
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_POOL_TENSORRT_H_
|
|
@ -0,0 +1,79 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <numeric>
|
||||
#include "src/runtime/delegate/tensorrt/op/prelu_tensorrt.h"
|
||||
#include "src/runtime/delegate/tensorrt/tensorrt_utils.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
int PReluTensorRT::IsSupport(const mindspore::schema::Primitive *primitive,
|
||||
const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) {
|
||||
if (!IsShapeKnown()) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors.size() != INPUT_SIZE2) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size() << " : " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (out_tensors.size() != 1) {
|
||||
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size() << " : " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int PReluTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
||||
ITensorHelper prelu_input;
|
||||
int ret = PreprocessInputs2SameDim(ctx, tensorrt_in_tensors_[0], &prelu_input);
|
||||
if (ret != RET_OK || prelu_input.trt_tensor_ == nullptr) {
|
||||
MS_LOG(ERROR) << "PreprocessInputs2SameDim input tensor failed for " << op_name_;
|
||||
return ret;
|
||||
}
|
||||
int input_nbdims = prelu_input.trt_tensor_->getDimensions().nbDims;
|
||||
int slope_nbdims = in_tensors_[1].Shape().size();
|
||||
auto slope = tensorrt_in_tensors_[1].trt_tensor_;
|
||||
if (input_nbdims != slope_nbdims) {
|
||||
slope = ConvertTensorWithExpandDims(ctx, in_tensors_[1], in_tensors_[0].Shape(), op_name_ + "_slope");
|
||||
tensorrt_in_tensors_[1].trt_tensor_ = slope;
|
||||
}
|
||||
if (slope == nullptr) {
|
||||
MS_LOG(ERROR) << "add const input tensor failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
ITensorHelper slope_helper;
|
||||
ret = PreprocessInputs2SameDim(ctx, tensorrt_in_tensors_[1], &slope_helper);
|
||||
if (ret != RET_OK || slope_helper.trt_tensor_ == nullptr) {
|
||||
MS_LOG(ERROR) << "PreprocessInputs2SameDim slope tensor failed for " << op_name_;
|
||||
return ret;
|
||||
}
|
||||
|
||||
auto *prelu_layer = ctx->network()->addParametricReLU(*prelu_input.trt_tensor_, *slope_helper.trt_tensor_);
|
||||
if (prelu_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "addParameticReLU failed for TensorRT : " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
nvinfer1::ITensor *out_tensor = prelu_layer->getOutput(0);
|
||||
out_tensor->setName((op_name_ + "_0").c_str());
|
||||
this->AddInnerOutTensors(ITensorHelper{out_tensor, prelu_input.format_, prelu_input.same_format_});
|
||||
this->layer_ = prelu_layer;
|
||||
return RET_OK;
|
||||
}
|
||||
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_PReLUFusion, PReluTensorRT)
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,39 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_OP_PRELU_TENSORRT_H_
|
||||
#define MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_OP_PRELU_TENSORRT_H_
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
class PReluTensorRT : public TensorRTOp {
|
||||
public:
|
||||
PReluTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
|
||||
const schema::QuantType &quant_type)
|
||||
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
|
||||
|
||||
~PReluTensorRT() override = default;
|
||||
|
||||
int AddInnerOp(TensorRTContext *ctx) override;
|
||||
|
||||
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) override;
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_OP_PRELU_TENSORRT_H_
|
|
@ -0,0 +1,139 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <valarray>
|
||||
#include "src/runtime/delegate/tensorrt/op/reduce_tensorrt.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
int ReduceTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) {
|
||||
if (!IsShapeKnown()) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors.size() != INPUT_SIZE2) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
|
||||
}
|
||||
if (out_tensors.size() != 1) {
|
||||
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int ReduceTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
||||
if (ctx == nullptr || ctx->network() == nullptr) {
|
||||
MS_LOG(ERROR) << "context or network is invalid";
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto reduce_op = op_primitive_->value_as_ReduceFusion();
|
||||
if (reduce_op == nullptr) {
|
||||
MS_LOG(ERROR) << "convert failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
bool keep_dims = reduce_op->keep_dims();
|
||||
out_format_ = tensorrt_in_tensors_[0].format_;
|
||||
nvinfer1::ITensor *reduce_input = tensorrt_in_tensors_[0].trt_tensor_;
|
||||
MS_LOG(DEBUG) << "origin input " << GetTensorFormat(tensorrt_in_tensors_[0]);
|
||||
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D &&
|
||||
!SameDims(tensorrt_in_tensors_[0].trt_tensor_->getDimensions(), in_tensors_[0].Shape())) {
|
||||
if (tensorrt_in_tensors_[0].format_ == Format::NCHW) {
|
||||
// NCHW->NHWC
|
||||
nvinfer1::IShuffleLayer *transpose_layer = NCHW2NHWC(ctx, *tensorrt_in_tensors_[0].trt_tensor_);
|
||||
if (transpose_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "create transpose layer failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
transpose_layer->setName((op_name_ + "_transpose_in").c_str());
|
||||
reduce_input = transpose_layer->getOutput(0);
|
||||
out_format_ = Format::NHWC;
|
||||
this->transpose_layer_ = transpose_layer;
|
||||
} else if (tensorrt_in_tensors_[0].format_ == Format::NHWC) {
|
||||
// NHWC->NCHW
|
||||
nvinfer1::IShuffleLayer *transpose_layer = NHWC2NCHW(ctx, *tensorrt_in_tensors_[0].trt_tensor_);
|
||||
if (transpose_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "create transpose layer failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
transpose_layer->setName((op_name_ + "_transpose_in").c_str());
|
||||
reduce_input = transpose_layer->getOutput(0);
|
||||
out_format_ = Format::NCHW;
|
||||
this->transpose_layer_ = transpose_layer;
|
||||
} else {
|
||||
MS_LOG(WARNING) << "input tensor format needs check: " << op_name_;
|
||||
}
|
||||
}
|
||||
MS_LOG(DEBUG) << "after transpose input " << GetTensorFormat(reduce_input, out_format_, true);
|
||||
if (reduce_op->mode() == schema::ReduceMode::ReduceMode_ReduceL2) {
|
||||
// x^2
|
||||
auto *pow2_layer =
|
||||
ctx->network()->addElementWise(*reduce_input, *reduce_input, nvinfer1::ElementWiseOperation::kPROD);
|
||||
CHECK_NULL_RETURN(pow2_layer);
|
||||
pow2_layer->setName((op_name_ + "_pow2").c_str());
|
||||
|
||||
reduce_input = pow2_layer->getOutput(0);
|
||||
CHECK_NULL_RETURN(reduce_input);
|
||||
}
|
||||
|
||||
uint32_t reduceAxis = GetAxis();
|
||||
auto reduce_operation_opt = TryConvertTRTReduceMode(reduce_op->mode());
|
||||
if (!reduce_operation_opt) {
|
||||
MS_LOG(WARNING) << "invalid reduce for TensorRT, need check: " << static_cast<int>(reduce_op->mode());
|
||||
return RET_ERROR;
|
||||
}
|
||||
nvinfer1::IReduceLayer *layer =
|
||||
ctx->network()->addReduce(*reduce_input, reduce_operation_opt.value(), reduceAxis, keep_dims);
|
||||
CHECK_NULL_RETURN(layer);
|
||||
layer->setName(op_name_.c_str());
|
||||
this->layer_ = layer;
|
||||
|
||||
nvinfer1::ITensor *out_tensor = layer->getOutput(0);
|
||||
CHECK_NULL_RETURN(out_tensor);
|
||||
|
||||
if (reduce_op->mode() == schema::ReduceMode::ReduceMode_ReduceL2) {
|
||||
auto sqrt_layer = ctx->network()->addUnary(*out_tensor, nvinfer1::UnaryOperation::kSQRT);
|
||||
CHECK_NULL_RETURN(sqrt_layer);
|
||||
sqrt_layer->setName((op_name_ + "_sqrt").c_str());
|
||||
out_tensor = sqrt_layer->getOutput(0);
|
||||
}
|
||||
out_tensor->setName((op_name_ + "_output").c_str());
|
||||
this->AddInnerOutTensors(ITensorHelper{out_tensor, out_format_, true});
|
||||
MS_LOG(DEBUG) << "output " << GetTensorFormat(tensorrt_out_tensors_[0]);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
uint32_t ReduceTensorRT::GetAxis() {
|
||||
// axis
|
||||
uint32_t reduceAxis = 0;
|
||||
mindspore::MSTensor axis_tensor = this->in_tensors_[1];
|
||||
if (axis_tensor.Data() == nullptr) {
|
||||
MS_LOG(ERROR) << "invalid axis_tensor";
|
||||
return reduceAxis;
|
||||
}
|
||||
if (axis_tensor.DataType() != DataType::kNumberTypeInt32) {
|
||||
MS_LOG(WARNING) << "not int data type";
|
||||
}
|
||||
int *axis_data = reinterpret_cast<int *>(axis_tensor.MutableData());
|
||||
CHECK_NULL_RETURN(axis_data);
|
||||
for (int i = 0; i < axis_tensor.ElementNum(); i++) {
|
||||
int format_axis_data = (*axis_data == -1) ? in_tensors_[0].Shape().size() - 1 : *axis_data;
|
||||
MS_LOG(DEBUG) << op_name_ << " reduceAxis at index : " << *axis_data;
|
||||
reduceAxis |= 1u << format_axis_data;
|
||||
axis_data++;
|
||||
}
|
||||
return reduceAxis;
|
||||
}
|
||||
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_ReduceFusion, ReduceTensorRT)
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,44 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_REDUCE_TENSORRT_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_REDUCE_TENSORRT_H_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
class ReduceTensorRT : public TensorRTOp {
|
||||
public:
|
||||
ReduceTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
|
||||
const schema::QuantType &quant_type)
|
||||
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
|
||||
|
||||
~ReduceTensorRT() override = default;
|
||||
|
||||
int AddInnerOp(TensorRTContext *ctx) override;
|
||||
|
||||
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) override;
|
||||
|
||||
private:
|
||||
uint32_t GetAxis();
|
||||
Format out_format_;
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_REDUCE_TENSORRT_H_
|
|
@ -0,0 +1,126 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/delegate/tensorrt/op/reducescatter_tensorrt.h"
|
||||
#include <numeric>
|
||||
#include <thread>
|
||||
#include "NvInferRuntimeCommon.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
REGISTER_TENSORRT_PLUGIN(ReduceScatterPluginCreater);
|
||||
template class TensorRTPluginCreater<ReduceScatterPlugin>;
|
||||
template <class T>
|
||||
nvinfer1::PluginFieldCollection TensorRTPluginCreater<T>::field_collection_{};
|
||||
template <class T>
|
||||
std::vector<nvinfer1::PluginField> TensorRTPluginCreater<T>::fields_;
|
||||
|
||||
int ReduceScatterTensorRT::IsSupport(const schema::Primitive *primitive,
|
||||
const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) {
|
||||
#ifndef LITE_CUDA_DISTRIBUTION
|
||||
MS_LOG(ERROR)
|
||||
<< "Unsupported package for gpu distribution feature, please recompile with MS_ENABLE_CUDA_DISTRIBUTION set to on.";
|
||||
return RET_ERROR;
|
||||
#else
|
||||
if (!IsShapeKnown()) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors.size() != 1) {
|
||||
MS_LOG(ERROR) << "invalid input tensor size: " << in_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (out_tensors.size() != 1) {
|
||||
MS_LOG(ERROR) << "invalid output tensor size: " << out_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
dynamic_shape_params_.support_hw_dynamic_ = false;
|
||||
return RET_OK;
|
||||
#endif
|
||||
}
|
||||
|
||||
int ReduceScatterTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
||||
nvinfer1::ITensor *inputTensors[] = {tensorrt_in_tensors_[0].trt_tensor_};
|
||||
auto reduce_op = op_primitive_->value_as_ReduceScatter();
|
||||
if (reduce_op == nullptr) {
|
||||
MS_LOG(ERROR) << "convert failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto reduce_mode = reduce_op->mode();
|
||||
auto rank = GetGPUGroupSize();
|
||||
auto plugin = std::make_shared<ReduceScatterPlugin>(op_name_, reduce_mode, rank, device_id_);
|
||||
MS_LOG(INFO) << op_name_ << " group size: " << rank << ", rank id: " << GetRankID();
|
||||
nvinfer1::IPluginV2Layer *reduce_scatter_layer = ctx->network()->addPluginV2(inputTensors, 1, *plugin);
|
||||
if (reduce_scatter_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "create ReduceScatter layer failed for: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
nvinfer1::ITensor *reduce_scatter_out = reduce_scatter_layer->getOutput(0);
|
||||
reduce_scatter_layer->setName(op_name_.c_str());
|
||||
reduce_scatter_out->setName((op_name_ + "_output").c_str());
|
||||
this->layer_ = reduce_scatter_layer;
|
||||
this->AddInnerOutTensors(
|
||||
ITensorHelper{reduce_scatter_out, tensorrt_in_tensors_[0].format_, tensorrt_in_tensors_[0].same_format_});
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
// ReduceScatterPlugin
|
||||
int ReduceScatterPlugin::enqueue(const nvinfer1::PluginTensorDesc *inputDesc,
|
||||
const nvinfer1::PluginTensorDesc *outputDesc, const void *const *inputs,
|
||||
void *const *outputs, void *workspace, cudaStream_t stream) noexcept {
|
||||
MS_LOG(INFO) << "ReduceScatter run at rank id: " << GetRankID() << " stream: " << stream;
|
||||
nvinfer1::Dims output_dims = outputDesc[0].dims;
|
||||
int recieve_element_cnt =
|
||||
std::accumulate(output_dims.d, output_dims.d + output_dims.nbDims, 1, std::multiplies<int64_t>());
|
||||
const void *input = inputs[0];
|
||||
void *output = outputs[0];
|
||||
auto data_type = inputDesc->type;
|
||||
auto ret = DistributionCollective::instance().ReduceScatterWrapper(input, output, recieve_element_cnt, data_type,
|
||||
red_mode_, stream, NCCL_WORLD_GROUP);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ReduceScatter nccl run failed for " << layer_name_;
|
||||
return ret;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
nvinfer1::IPluginV2DynamicExt *ReduceScatterPlugin::clone() const noexcept {
|
||||
auto *plugin = new ReduceScatterPlugin(*this);
|
||||
plugin->setPluginNamespace(name_space_.c_str());
|
||||
return plugin;
|
||||
}
|
||||
|
||||
nvinfer1::DimsExprs ReduceScatterPlugin::getOutputDimensions(int outputIndex, const nvinfer1::DimsExprs *inputs,
|
||||
int nbInputs,
|
||||
nvinfer1::IExprBuilder &exprBuilder) noexcept {
|
||||
nvinfer1::DimsExprs out_dims{};
|
||||
out_dims.nbDims = inputs->nbDims;
|
||||
auto rank_dim = exprBuilder.constant(rank_);
|
||||
out_dims.d[0] = exprBuilder.operation(nvinfer1::DimensionOperation::kCEIL_DIV, *inputs->d[0], *rank_dim);
|
||||
for (int i = 1; i < inputs->nbDims; i++) {
|
||||
out_dims.d[i] = inputs->d[i];
|
||||
}
|
||||
return out_dims;
|
||||
}
|
||||
|
||||
size_t ReduceScatterPlugin::getSerializationSize() const noexcept { return sizeof(schema::ReduceMode); }
|
||||
|
||||
void ReduceScatterPlugin::serialize(void *buffer) const noexcept {
|
||||
SerializeValue(&buffer, &red_mode_, sizeof(schema::ReduceMode));
|
||||
}
|
||||
|
||||
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_ReduceScatter, ReduceScatterTensorRT)
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,83 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_REDUCESCATTER_TENSORRT_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_REDUCESCATTER_TENSORRT_H_
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <functional>
|
||||
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
|
||||
#include "src/runtime/delegate/tensorrt/op/tensorrt_plugin.h"
|
||||
#include "src/runtime/delegate/tensorrt/tensorrt_utils.h"
|
||||
#include "src/runtime/delegate/tensorrt/distribution/distribution_collective.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
constexpr char *REDUCESCATTER_PLUGIN_NAME{"ReduceScatterPlugin"};
|
||||
class ReduceScatterTensorRT : public TensorRTOp {
|
||||
public:
|
||||
ReduceScatterTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
|
||||
const schema::QuantType &quant_type)
|
||||
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
|
||||
|
||||
~ReduceScatterTensorRT() override = default;
|
||||
|
||||
int AddInnerOp(TensorRTContext *ctx) override;
|
||||
|
||||
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) override;
|
||||
};
|
||||
|
||||
class ReduceScatterPlugin : public TensorRTPlugin {
|
||||
public:
|
||||
ReduceScatterPlugin(const std::string name, schema::ReduceMode red_mode, int rank, uint32_t device_id)
|
||||
: TensorRTPlugin(name, std::string(REDUCESCATTER_PLUGIN_NAME), device_id), red_mode_(red_mode), rank_(rank) {}
|
||||
|
||||
ReduceScatterPlugin(const char *name, const nvinfer1::PluginFieldCollection *fc)
|
||||
: TensorRTPlugin(std::string(name), std::string(REDUCESCATTER_PLUGIN_NAME)) {
|
||||
const nvinfer1::PluginField *fields = fc->fields;
|
||||
red_mode_ = static_cast<const schema::ReduceMode *>(fields[0].data)[0];
|
||||
rank_ = static_cast<const int *>(fields[1].data)[0];
|
||||
}
|
||||
|
||||
ReduceScatterPlugin(const char *name, const void *serialData, size_t serialLength)
|
||||
: TensorRTPlugin(std::string(name), std::string(REDUCESCATTER_PLUGIN_NAME)) {
|
||||
DeserializeValue(&serialData, &serialLength, &red_mode_, sizeof(schema::ReduceMode));
|
||||
DeserializeValue(&serialData, &serialLength, &rank_, sizeof(int));
|
||||
}
|
||||
|
||||
ReduceScatterPlugin() = delete;
|
||||
|
||||
// IPluginV2DynamicExt Methods
|
||||
nvinfer1::IPluginV2DynamicExt *clone() const noexcept override;
|
||||
nvinfer1::DimsExprs getOutputDimensions(int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs,
|
||||
nvinfer1::IExprBuilder &exprBuilder) noexcept override;
|
||||
int enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc,
|
||||
const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept override;
|
||||
|
||||
size_t getSerializationSize() const noexcept override;
|
||||
void serialize(void *buffer) const noexcept override;
|
||||
|
||||
private:
|
||||
int rank_{0};
|
||||
schema::ReduceMode red_mode_;
|
||||
};
|
||||
class ReduceScatterPluginCreater : public TensorRTPluginCreater<ReduceScatterPlugin> {
|
||||
public:
|
||||
ReduceScatterPluginCreater() : TensorRTPluginCreater(std::string(REDUCESCATTER_PLUGIN_NAME)) {}
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_REDUCESCATTER_TENSORRT_H_
|
|
@ -0,0 +1,230 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
#include "src/runtime/delegate/tensorrt/op/resize_tensorrt.h"
|
||||
#include "nnacl/nnacl_common.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
int ResizeTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) {
|
||||
if (!IsShapeKnown()) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors.size() != 1 && in_tensors.size() != INPUT_SIZE2) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
|
||||
}
|
||||
if (out_tensors.size() != 1) {
|
||||
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
|
||||
}
|
||||
resize_op_ = op_primitive_->value_as_Resize();
|
||||
if (resize_op_ == nullptr) {
|
||||
MS_LOG(ERROR) << "convert failed " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (resize_op_->method() == schema::ResizeMethod_LINEAR) {
|
||||
MS_LOG(WARNING) << "TensorRT linear resize has precision issue, using cpu instead for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
dynamic_shape_params_.support_hw_dynamic_ =
|
||||
(resize_op_->new_height() > 0 && resize_op_->new_width() > 0) ? false : true;
|
||||
// constant new hw op don't support hw resize
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int ResizeTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
||||
if (ctx == nullptr || ctx->network() == nullptr) {
|
||||
MS_LOG(ERROR) << "context or network is invalid";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
nvinfer1::ITensor *resize_in_tensor = tensorrt_in_tensors_[0].trt_tensor_;
|
||||
MS_LOG(DEBUG) << "origin input " << GetTensorFormat(tensorrt_in_tensors_[0]);
|
||||
|
||||
if (resize_in_tensor->getDimensions().nbDims == DIMENSION_4D && tensorrt_in_tensors_[0].format_ == Format::NHWC) {
|
||||
// NHWC->NCHW
|
||||
nvinfer1::IShuffleLayer *transpose_layer = NHWC2NCHW(ctx, *tensorrt_in_tensors_[0].trt_tensor_);
|
||||
if (transpose_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "create transpose layer failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
transpose_layer->setName((op_name_ + "_transpose_in").c_str());
|
||||
resize_in_tensor = transpose_layer->getOutput(0);
|
||||
this->transpose_layer_ = transpose_layer;
|
||||
}
|
||||
MS_LOG(DEBUG) << "after transpose input " << GetTensorFormat(resize_in_tensor, Format::NCHW, false);
|
||||
|
||||
nvinfer1::IResizeLayer *resize_layer = ctx->network()->addResize(*resize_in_tensor);
|
||||
if (resize_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "create resize layer failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
int ret = SetOutputDims(resize_in_tensor, resize_layer);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "SetOutputDims failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
ret = SetParams(resize_layer);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "SetParams failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
resize_layer->getOutput(0)->setName((op_name_ + "_output").c_str());
|
||||
this->AddInnerOutTensors(ITensorHelper{resize_layer->getOutput(0), Format::NCHW, false});
|
||||
MS_LOG(DEBUG) << "output " << GetTensorFormat(tensorrt_out_tensors_[0]);
|
||||
this->layer_ = resize_layer;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int ResizeTensorRT::SetOutputDims(nvinfer1::ITensor *resize_in_tensor, nvinfer1::IResizeLayer *resize_layer) {
|
||||
nvinfer1::Dims in_dims = resize_in_tensor->getDimensions();
|
||||
if (in_tensors_.size() == 1 && !dynamic_shape_params_.support_dynamic_ && in_dims.nbDims == DIMENSION_4D) {
|
||||
nvinfer1::Dims4 new_dims(in_dims.d[0], in_dims.d[1], resize_op_->new_height(), resize_op_->new_width()); // nchw
|
||||
resize_layer->setOutputDimensions(new_dims); // static shape
|
||||
} else if (in_tensors_.size() == 1 && !dynamic_shape_params_.support_hw_dynamic_ &&
|
||||
dynamic_shape_params_.support_dynamic_ && in_dims.nbDims == DIMENSION_4D) {
|
||||
// hw is static, but has dynamic batch size
|
||||
float scales[DIMENSION_4D]{1, 1, 1, 1};
|
||||
scales[kNCHW_H] = static_cast<float>(resize_op_->new_height()) / static_cast<float>(in_dims.d[kNCHW_H]);
|
||||
scales[kNCHW_W] = static_cast<float>(resize_op_->new_width()) / static_cast<float>(in_dims.d[kNCHW_W]);
|
||||
resize_layer->setScales(scales, DIMENSION_4D);
|
||||
} else {
|
||||
auto shape_value_tensor = in_tensors_[1];
|
||||
if (shape_value_tensor.Data() == nullptr && tensorrt_in_tensors_.size() >= INPUT_SIZE2) {
|
||||
// dynamic output shape
|
||||
resize_layer->setInput(1, *tensorrt_in_tensors_[1].trt_tensor_);
|
||||
} else {
|
||||
std::vector<float> out_shape;
|
||||
ParseValueFromShapeTensor(shape_value_tensor, &out_shape);
|
||||
if (SameDims(out_shape, out_tensors_[0].Shape())) {
|
||||
// static dims
|
||||
if (out_shape.size() == DIMENSION_4D) {
|
||||
// convert nhwc to nchw
|
||||
auto channel = out_shape[out_shape.size() - 1];
|
||||
out_shape.insert(out_shape.begin() + 1, channel);
|
||||
out_shape.erase(out_shape.begin() + out_shape.size() - 1);
|
||||
}
|
||||
resize_layer->setOutputDimensions(ConvertCudaDims(out_shape));
|
||||
} else if (IsScaleOutputDim(in_tensors_[0].Shape(), out_tensors_[0].Shape(), out_shape)) {
|
||||
// scale dims
|
||||
float scales[DIMENSION_4D]{1, 1, 1, 1};
|
||||
scales[kNCHW_H] =
|
||||
static_cast<float>(out_tensors_[0].Shape()[kNHWC_H]) / static_cast<float>(in_tensors_[0].Shape()[kNHWC_H]);
|
||||
scales[kNCHW_W] =
|
||||
static_cast<float>(out_tensors_[0].Shape()[kNHWC_W]) / static_cast<float>(in_tensors_[0].Shape()[kNHWC_W]);
|
||||
resize_layer->setScales(scales, DIMENSION_4D);
|
||||
} else if (out_tensors_[0].Shape().size() == DIMENSION_4D) {
|
||||
MS_LOG(DEBUG) << op_name_ << " output shape tensor value is const, but set to scales for dynamic input shape.";
|
||||
float scales[out_tensors_[0].Shape().size()];
|
||||
for (size_t i = 0; i < out_tensors_[0].Shape().size(); i++) {
|
||||
scales[i] = static_cast<float>(out_tensors_[0].Shape()[i]) / static_cast<float>(in_tensors_[0].Shape()[i]);
|
||||
}
|
||||
// change to nchw
|
||||
scales[kNCHW_W] = scales[kNHWC_W];
|
||||
scales[kNCHW_H] = scales[kNHWC_H];
|
||||
scales[kNCHW_C] = 1;
|
||||
MS_LOG(DEBUG) << op_name_ << "scale at H " << kNCHW_H << ": " << scales[kNCHW_H] << ", W " << kNCHW_W << ": "
|
||||
<< scales[kNCHW_W];
|
||||
resize_layer->setScales(scales, out_tensors_[0].Shape().size());
|
||||
} else {
|
||||
MS_LOG(ERROR) << "resize dims needs check for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
void ResizeTensorRT::ParseValueFromShapeTensor(const mindspore::MSTensor &shape_value_tensor,
|
||||
std::vector<float> *out_shape) {
|
||||
switch (shape_value_tensor.DataType()) {
|
||||
case DataType::kNumberTypeFloat32: {
|
||||
const float *shape_data_fp32 = static_cast<const float *>(shape_value_tensor.Data().get());
|
||||
for (int i = 0; i < shape_value_tensor.ElementNum(); i++) {
|
||||
out_shape->push_back(*(shape_data_fp32 + i));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case DataType::kNumberTypeFloat16: {
|
||||
const uint16_t *shape_data_fp16 = static_cast<const uint16_t *>(shape_value_tensor.Data().get());
|
||||
for (int i = 0; i < shape_value_tensor.ElementNum(); i++) {
|
||||
out_shape->push_back(ShortToFloat32(*(shape_data_fp16 + i)));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case DataType::kNumberTypeInt32: {
|
||||
const int *shape_data_fp16 = static_cast<const int *>(shape_value_tensor.Data().get());
|
||||
for (int i = 0; i < shape_value_tensor.ElementNum(); i++) {
|
||||
out_shape->push_back(*(shape_data_fp16 + i));
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
MS_LOG(WARNING) << op_name_
|
||||
<< " more datatype need to check: " << static_cast<int>(shape_value_tensor.DataType());
|
||||
break;
|
||||
}
|
||||
if (out_shape->size() == DIMENSION_2D &&
|
||||
tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D) {
|
||||
// out_shape: origin_n, out_shape[0], out_shape[1], origin_c
|
||||
out_shape->insert(out_shape->begin(),
|
||||
tensorrt_in_tensors_[0].trt_tensor_->getDimensions().d[0]); // batch size is dynamic
|
||||
out_shape->push_back(in_tensors_[0].Shape()[kNHWC_C]); // channel is const
|
||||
}
|
||||
}
|
||||
|
||||
bool ResizeTensorRT::IsScaleOutputDim(const std::vector<int64_t> &in_shape, const std::vector<int64_t> &out_shape,
|
||||
const std::vector<float> &shape_tensor_val) {
|
||||
if (out_shape.size() != DIMENSION_4D) {
|
||||
MS_LOG(WARNING) << "dims count needs check for " << op_name_;
|
||||
return false;
|
||||
}
|
||||
if (in_shape.size() != out_shape.size() || shape_tensor_val.size() != in_shape.size()) {
|
||||
MS_LOG(WARNING) << "tensor shape is not same for " << op_name_;
|
||||
return false;
|
||||
}
|
||||
for (size_t i = 0; i < in_shape.size(); i++) {
|
||||
if (std::abs(in_shape[i] * shape_tensor_val[i] - out_shape[i]) > 1e-6) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
int ResizeTensorRT::SetParams(nvinfer1::IResizeLayer *resize_layer) {
|
||||
auto method = resize_op_->method();
|
||||
std::map<schema::ResizeMethod, nvinfer1::ResizeMode> method_map = {
|
||||
{schema::ResizeMethod_LINEAR, nvinfer1::ResizeMode::kLINEAR},
|
||||
{schema::ResizeMethod_NEAREST, nvinfer1::ResizeMode::kNEAREST}};
|
||||
if (method_map.find(method) == method_map.end()) {
|
||||
MS_LOG(ERROR) << op_name_ << " unsupported resize mode " << EnumNameResizeMethod(method);
|
||||
return RET_ERROR;
|
||||
}
|
||||
resize_layer->setResizeMode(method_map.at(method));
|
||||
|
||||
// unsupported for trt6, but support setCoordinateTransformation() in version8
|
||||
auto coordinate_transform_mode = resize_op_->coordinate_transform_mode();
|
||||
if (coordinate_transform_mode != schema::CoordinateTransformMode_ASYMMETRIC) {
|
||||
MS_LOG(WARNING) << op_name_ << " has coordinate_transform_mode may not supported: "
|
||||
<< EnumNameCoordinateTransformMode(coordinate_transform_mode);
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_Resize, ResizeTensorRT)
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,52 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_RESIZE_TENSORRT_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_RESIZE_TENSORRT_H_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
class ResizeTensorRT : public TensorRTOp {
|
||||
public:
|
||||
ResizeTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
|
||||
const schema::QuantType &quant_type)
|
||||
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
|
||||
|
||||
~ResizeTensorRT() override = default;
|
||||
|
||||
int AddInnerOp(TensorRTContext *ctx) override;
|
||||
|
||||
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) override;
|
||||
|
||||
private:
|
||||
int SetOutputDims(nvinfer1::ITensor *resize_in_tensor, nvinfer1::IResizeLayer *resize_layer);
|
||||
|
||||
void ParseValueFromShapeTensor(const mindspore::MSTensor &shape_value_tensor, std::vector<float> *out_shape);
|
||||
|
||||
bool IsScaleOutputDim(const std::vector<int64_t> &in_shape, const std::vector<int64_t> &out_shape,
|
||||
const std::vector<float> &shape_tensor_val);
|
||||
|
||||
int SetParams(nvinfer1::IResizeLayer *resize_layer);
|
||||
|
||||
const schema::Resize *resize_op_{nullptr};
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_RESIZE_TENSORRT_H_
|
|
@ -0,0 +1,227 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <numeric>
|
||||
#include <functional>
|
||||
#include "src/runtime/delegate/tensorrt/op/scale_tensorrt.h"
|
||||
#include "src/runtime/delegate/tensorrt/op/activation_tensorrt.h"
|
||||
#include "src/runtime/delegate/tensorrt/tensorrt_utils.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
constexpr int SCALE_INDEX = 1;
|
||||
constexpr int SHIFT_INDEX = 2;
|
||||
constexpr int POWER_INDEX = 3;
|
||||
|
||||
int ScaleTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) {
|
||||
if (!IsShapeKnown()) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors.size() != INPUT_SIZE2 && in_tensors.size() != INPUT_SIZE3 && in_tensors.size() != INPUT_SIZE4) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor size, size is: " << in_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (out_tensors.size() != 1) {
|
||||
MS_LOG(ERROR) << "Unsupported output tensor size, size is: " << out_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int ScaleTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
||||
CHECK_NULL_RETURN(ctx);
|
||||
auto scale_op = op_primitive_->value_as_ScaleFusion();
|
||||
CHECK_NULL_RETURN(scale_op);
|
||||
|
||||
schema::ActivationType activation_type = scale_op->activation_type();
|
||||
// mode of scale
|
||||
axis_ = scale_op->axis();
|
||||
axis_ = axis_ < 0 ? static_cast<int64_t>(in_tensors_[0].Shape().size() + axis_) : axis_;
|
||||
out_format_ = tensorrt_in_tensors_[0].format_;
|
||||
out_same_format_ = tensorrt_in_tensors_[0].same_format_;
|
||||
mode_ = GetScaleMode(axis_);
|
||||
MS_LOG(DEBUG) << "before transpose " << GetTensorFormat(tensorrt_in_tensors_[0]);
|
||||
|
||||
nvinfer1::ITensor *scale_in_tensor = PreProcessInputTensor(ctx);
|
||||
if (scale_in_tensor == nullptr) {
|
||||
MS_LOG(ERROR) << "PreProcessInputTensor failed: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
MS_LOG(DEBUG) << "after transpose " << GetTensorFormat(scale_in_tensor, out_format_, out_same_format_);
|
||||
|
||||
nvinfer1::ITensor *op_out_tensor{nullptr};
|
||||
if (scale_in_tensor->getDimensions().nbDims == DIMENSION_4D) {
|
||||
op_out_tensor = RunAs4DimsScale(ctx, scale_in_tensor);
|
||||
} else {
|
||||
op_out_tensor = RunAsMutiDimsScale(ctx, scale_in_tensor);
|
||||
}
|
||||
CHECK_NULL_RETURN(op_out_tensor);
|
||||
|
||||
// add activation
|
||||
if (activation_type != schema::ActivationType::ActivationType_NO_ACTIVATION) {
|
||||
auto activation_layer = ActivationTensorRT::AddActivation(ctx, activation_type, 0, 0, 0, op_out_tensor, device_id_);
|
||||
CHECK_NULL_RETURN(activation_layer);
|
||||
activation_layer->setName((op_name_ + "_activation").c_str());
|
||||
op_out_tensor = activation_layer->getOutput(0);
|
||||
}
|
||||
|
||||
op_out_tensor->setName((op_name_ + "_output").c_str());
|
||||
this->AddInnerOutTensors(ITensorHelper{op_out_tensor, out_format_, out_same_format_});
|
||||
MS_LOG(DEBUG) << "output " << GetTensorFormat(tensorrt_out_tensors_[0]);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
nvinfer1::ITensor *ScaleTensorRT::PreProcessInputTensor(TensorRTContext *ctx) {
|
||||
nvinfer1::ITensor *scale_in_tensor = tensorrt_in_tensors_[0].trt_tensor_;
|
||||
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D &&
|
||||
mode_ == nvinfer1::ScaleMode::kCHANNEL) {
|
||||
// per channel input format should be nchw, otherwise should be same with scale nhwc
|
||||
// transpose: NHWC->NCHW
|
||||
if ((tensorrt_in_tensors_[0].format_ == Format::NHWC && axis_ == kNHWC_C) ||
|
||||
(tensorrt_in_tensors_[0].same_format_ == true && axis_ == kNHWC_C)) {
|
||||
nvinfer1::IShuffleLayer *transpose_layer_in = NHWC2NCHW(ctx, *tensorrt_in_tensors_[0].trt_tensor_);
|
||||
if (transpose_layer_in == nullptr) {
|
||||
MS_LOG(ERROR) << "op action convert failed";
|
||||
return nullptr;
|
||||
}
|
||||
transpose_layer_in->setName((op_name_ + "_transpose2NCHW").c_str());
|
||||
scale_in_tensor = transpose_layer_in->getOutput(0);
|
||||
out_format_ = Format::NCHW;
|
||||
out_same_format_ = !out_same_format_;
|
||||
} else if (out_format_ != Format::NCHW && axis_ != kNCHW_C) {
|
||||
MS_LOG(WARNING) << op_name_ << " out format (NHWC:1, NCHW:0) infer as " << out_format_ << ", and axis is "
|
||||
<< axis_;
|
||||
}
|
||||
} else if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D &&
|
||||
tensorrt_in_tensors_[0].format_ == Format::NCHW && mode_ == nvinfer1::ScaleMode::kELEMENTWISE) {
|
||||
// transpose: NCHW->NHWC
|
||||
nvinfer1::IShuffleLayer *transpose_layer_in = NCHW2NHWC(ctx, *tensorrt_in_tensors_[0].trt_tensor_);
|
||||
if (transpose_layer_in == nullptr) {
|
||||
MS_LOG(ERROR) << "op action convert failed";
|
||||
return nullptr;
|
||||
}
|
||||
transpose_layer_in->setName((op_name_ + "_transpose2NHWC").c_str());
|
||||
scale_in_tensor = transpose_layer_in->getOutput(0);
|
||||
out_format_ = Format::NHWC;
|
||||
out_same_format_ = true;
|
||||
}
|
||||
return scale_in_tensor;
|
||||
}
|
||||
|
||||
nvinfer1::ScaleMode ScaleTensorRT::GetScaleMode(int64_t axis) {
|
||||
nvinfer1::ScaleMode mode = nvinfer1::ScaleMode::kUNIFORM;
|
||||
auto input_data_shape = in_tensors_[0].Shape();
|
||||
auto input_weight_shape = in_tensors_[1].Shape();
|
||||
int total = std::accumulate(input_data_shape.begin(), input_data_shape.end(), 1, std::multiplies<int>());
|
||||
if (input_weight_shape.size() == 0 || (input_weight_shape.size() == 1 && input_weight_shape[0] == 1)) {
|
||||
mode = nvinfer1::ScaleMode::kUNIFORM;
|
||||
} else if ((axis < static_cast<int64_t>(input_data_shape.size()) && input_weight_shape.size() == 1 &&
|
||||
input_data_shape[axis] == input_weight_shape[0]) ||
|
||||
(input_data_shape.size() == DIMENSION_4D && axis == DIMENSION_3D)) {
|
||||
mode = nvinfer1::ScaleMode::kCHANNEL;
|
||||
} else if (input_weight_shape.size() == 1 && input_weight_shape[0] == total) {
|
||||
mode = nvinfer1::ScaleMode::kELEMENTWISE;
|
||||
} else {
|
||||
MS_LOG(ERROR) << "ScaleMode create failed: " << op_name_;
|
||||
return mode;
|
||||
}
|
||||
MS_LOG(DEBUG) << op_name_ << " ScaleMode(UNIFORM 0, CHANNEL 1, ELEMENTWISE 2): " << static_cast<int>(mode);
|
||||
return mode;
|
||||
}
|
||||
|
||||
nvinfer1::ITensor *ScaleTensorRT::RunAs4DimsScale(TensorRTContext *ctx, nvinfer1::ITensor *scale_in_tensor) {
|
||||
bool nd = false;
|
||||
// (input * scale + shift) ^ power
|
||||
nvinfer1::Weights power{nvinfer1::DataType::kFLOAT, nullptr, 0};
|
||||
nvinfer1::Weights shift{nvinfer1::DataType::kFLOAT, nullptr, 0};
|
||||
nvinfer1::Weights scale{nvinfer1::DataType::kFLOAT, nullptr, 0};
|
||||
if (in_tensors_.size() > SCALE_INDEX) {
|
||||
scale.values = in_tensors_[SCALE_INDEX].MutableData();
|
||||
MS_ASSERT(scale.values);
|
||||
scale.count = in_tensors_[SCALE_INDEX].ElementNum();
|
||||
scale.type = ConvertDataType(in_tensors_[SCALE_INDEX].DataType());
|
||||
shift.type = scale.type;
|
||||
power.type = scale.type;
|
||||
nd = in_tensors_[1].Shape().size() == 1 ? false : true;
|
||||
}
|
||||
if (in_tensors_.size() > SHIFT_INDEX) {
|
||||
shift.values = in_tensors_[SHIFT_INDEX].MutableData();
|
||||
MS_ASSERT(shift.values);
|
||||
shift.count = in_tensors_[SHIFT_INDEX].ElementNum();
|
||||
}
|
||||
if (in_tensors_.size() > POWER_INDEX) {
|
||||
power.values = in_tensors_[POWER_INDEX].MutableData();
|
||||
MS_ASSERT(power.values);
|
||||
power.count = in_tensors_[POWER_INDEX].ElementNum();
|
||||
}
|
||||
nvinfer1::IScaleLayer *cal_layer = nullptr;
|
||||
|
||||
if (nd) {
|
||||
MS_LOG(WARNING) << "multi dims ScaleMode enter";
|
||||
cal_layer = ctx->network()->addScaleNd(*scale_in_tensor, mode_, shift, scale, power, axis_);
|
||||
} else {
|
||||
cal_layer = ctx->network()->addScale(*scale_in_tensor, mode_, shift, scale, power);
|
||||
}
|
||||
|
||||
if (cal_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "addScaleNd failed for: " << op_name_;
|
||||
return nullptr;
|
||||
}
|
||||
cal_layer->setName(op_name_.c_str());
|
||||
this->layer_ = cal_layer;
|
||||
return cal_layer->getOutput(0);
|
||||
}
|
||||
|
||||
nvinfer1::ITensor *ScaleTensorRT::RunAsMutiDimsScale(TensorRTContext *ctx, nvinfer1::ITensor *scale_in_tensor) {
|
||||
auto scale_tensor = ConvertConstantTensorWithDims(ctx, in_tensors_[1], in_tensors_[0].Shape(), op_name_);
|
||||
if (scale_tensor == nullptr) {
|
||||
MS_LOG(ERROR) << "ConvertConstantTensorWithDims failed for " << op_name_;
|
||||
return nullptr;
|
||||
}
|
||||
auto mul_layer =
|
||||
ctx->network()->addElementWise(*scale_in_tensor, *scale_tensor, nvinfer1::ElementWiseOperation::kPROD);
|
||||
if (mul_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "add mul failed for " << op_name_;
|
||||
return nullptr;
|
||||
}
|
||||
mul_layer->setName((op_name_ + "_scale").c_str());
|
||||
layer_ = mul_layer;
|
||||
nvinfer1::ITensor *out_tensor = mul_layer->getOutput(0);
|
||||
// add shift
|
||||
if (in_tensors_.size() >= INPUT_SIZE3) {
|
||||
auto shift_tensor = ConvertConstantTensorWithDims(ctx, in_tensors_[SHIFT_INDEX], in_tensors_[0].Shape(), op_name_);
|
||||
if (shift_tensor == nullptr) {
|
||||
MS_LOG(ERROR) << "ConvertConstantTensorWithDims failed for " << op_name_;
|
||||
return nullptr;
|
||||
}
|
||||
auto shift_layer = ctx->network()->addElementWise(*out_tensor, *shift_tensor, nvinfer1::ElementWiseOperation::kSUM);
|
||||
if (shift_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "add bias failed for " << op_name_;
|
||||
return nullptr;
|
||||
}
|
||||
shift_layer->setName((op_name_ + "_shift").c_str());
|
||||
out_tensor = shift_layer->getOutput(0);
|
||||
}
|
||||
if (in_tensors_.size() == INPUT_SIZE4) {
|
||||
MS_LOG(WARNING) << op_name_ << " has power";
|
||||
return nullptr;
|
||||
}
|
||||
return out_tensor;
|
||||
}
|
||||
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_ScaleFusion, ScaleTensorRT)
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,57 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_SCALE_TENSORRT_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_SCALE_TENSORRT_H_
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
|
||||
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
namespace mindspore::lite {
|
||||
class ScaleTensorRT : public TensorRTOp {
|
||||
public:
|
||||
ScaleTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
|
||||
const schema::QuantType &quant_type)
|
||||
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
|
||||
|
||||
~ScaleTensorRT() override = default;
|
||||
|
||||
int AddInnerOp(TensorRTContext *ctx) override;
|
||||
|
||||
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) override;
|
||||
|
||||
private:
|
||||
nvinfer1::ScaleMode GetScaleMode(int64_t axis);
|
||||
|
||||
nvinfer1::ITensor *PreProcessInputTensor(TensorRTContext *ctx);
|
||||
|
||||
nvinfer1::ITensor *RunAs4DimsScale(TensorRTContext *ctx, nvinfer1::ITensor *scale_in_tensor);
|
||||
|
||||
nvinfer1::ITensor *RunAsMutiDimsScale(TensorRTContext *ctx, nvinfer1::ITensor *scale_in_tensor);
|
||||
|
||||
Format out_format_;
|
||||
|
||||
bool out_same_format_{false};
|
||||
|
||||
nvinfer1::ScaleMode mode_;
|
||||
|
||||
int64_t axis_{0};
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_SCALE_TENSORRT_H_
|
|
@ -0,0 +1,99 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <numeric>
|
||||
#include "src/runtime/delegate/tensorrt/op/scatternd_tensorrt.h"
|
||||
#include "src/runtime/delegate/tensorrt/tensorrt_utils.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
int ScatterNdTensorRT::IsSupport(const mindspore::schema::Primitive *primitive,
|
||||
const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) {
|
||||
#if TRT_VERSION_GE(8, 2)
|
||||
if (!IsShapeKnown()) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors.size() != INPUT_SIZE3) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size() << " : " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (out_tensors.size() != 1) {
|
||||
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size() << " : " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
#else
|
||||
MS_LOG(WARNING) << "low TensorRT version don't support Scatter op, please upgrade TensorRT version to 8.2 or higher";
|
||||
return RET_ERROR;
|
||||
#endif
|
||||
}
|
||||
|
||||
int ScatterNdTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
||||
#if TRT_VERSION_GE(8, 2)
|
||||
ITensorHelper scatter_input;
|
||||
int ret = PreprocessInputs2SameDim(ctx, tensorrt_in_tensors_[0], &scatter_input);
|
||||
if (ret != RET_OK || scatter_input.trt_tensor_ == nullptr) {
|
||||
MS_LOG(ERROR) << "PreprocessInputs2SameDim input tensor failed for " << op_name_;
|
||||
return ret;
|
||||
}
|
||||
if (tensorrt_in_tensors_.size() < INPUT_SIZE3) {
|
||||
auto indices = ConvertConstantTensor(ctx, in_tensors_[1], op_name_ + "_indice");
|
||||
if (indices == nullptr) {
|
||||
MS_LOG(ERROR) << "add const input tensor failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
tensorrt_in_tensors_.push_back(ITensorHelper{indices});
|
||||
auto updates = ConvertConstantTensor(ctx, in_tensors_[INPUT_SIZE2], op_name_ + "_update");
|
||||
if (updates == nullptr) {
|
||||
MS_LOG(ERROR) << "add const input tensor failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
tensorrt_in_tensors_.push_back(ITensorHelper{updates});
|
||||
}
|
||||
ITensorHelper indices_helper;
|
||||
ret = PreprocessInputs2SameDim(ctx, tensorrt_in_tensors_[1], &indices_helper);
|
||||
if (ret != RET_OK || indices_helper.trt_tensor_ == nullptr) {
|
||||
MS_LOG(ERROR) << "PreprocessInputs2SameDim indices tensor failed for " << op_name_;
|
||||
return ret;
|
||||
}
|
||||
ITensorHelper updates_helper;
|
||||
ret = PreprocessInputs2SameDim(ctx, tensorrt_in_tensors_[INPUT_SIZE2], &updates_helper);
|
||||
if (ret != RET_OK || updates_helper.trt_tensor_ == nullptr) {
|
||||
MS_LOG(ERROR) << "PreprocessInputs2SameDim update tensor failed for " << op_name_;
|
||||
return ret;
|
||||
}
|
||||
|
||||
nvinfer1::IScatterLayer *scatter_layer = ctx->network()->addScatter(
|
||||
*scatter_input.trt_tensor_, *indices_helper.trt_tensor_, *updates_helper.trt_tensor_, nvinfer1::ScatterMode::kND);
|
||||
if (scatter_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "addScatter failed for TensorRT.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
nvinfer1::ITensor *out_tensor = scatter_layer->getOutput(0);
|
||||
out_tensor->setName((op_name_ + "_0").c_str());
|
||||
this->AddInnerOutTensors(ITensorHelper{out_tensor, scatter_input.format_, scatter_input.same_format_});
|
||||
this->layer_ = scatter_layer;
|
||||
return RET_OK;
|
||||
#else
|
||||
MS_LOG(WARNING) << "low TensorRT version don't support Scatter op, please upgrade TensorRT version to 8.2 or higher";
|
||||
return RET_ERROR;
|
||||
#endif
|
||||
}
|
||||
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_ScatterNdUpdate, ScatterNdTensorRT)
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,39 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_OP_SCATTERND_TENSORRT_H_
|
||||
#define MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_OP_SCATTERND_TENSORRT_H_
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
class ScatterNdTensorRT : public TensorRTOp {
|
||||
public:
|
||||
ScatterNdTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
|
||||
const schema::QuantType &quant_type)
|
||||
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
|
||||
|
||||
~ScatterNdTensorRT() override = default;
|
||||
|
||||
int AddInnerOp(TensorRTContext *ctx) override;
|
||||
|
||||
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) override;
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_OP_SCATTERND_TENSORRT_H_
|
|
@ -0,0 +1,69 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/delegate/tensorrt/op/shape_tensorrt.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
int ShapeTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) {
|
||||
if (!IsShapeKnown()) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors.size() != 1) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (out_tensors.size() != 1) {
|
||||
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
dynamic_shape_params_.support_dynamic_ = false;
|
||||
dynamic_shape_params_.support_hw_dynamic_ = false;
|
||||
return RET_OK;
|
||||
}
|
||||
int ShapeTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
||||
if (ctx == nullptr || ctx->network() == nullptr) {
|
||||
MS_LOG(ERROR) << "context or network is invalid";
|
||||
return RET_ERROR;
|
||||
}
|
||||
nvinfer1::ITensor *shape_input = tensorrt_in_tensors_[0].trt_tensor_;
|
||||
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D &&
|
||||
tensorrt_in_tensors_[0].format_ == Format::NCHW) {
|
||||
// transpose: NCHW->NHWC
|
||||
nvinfer1::IShuffleLayer *transpose_layer_in = NCHW2NHWC(ctx, *tensorrt_in_tensors_[0].trt_tensor_);
|
||||
if (transpose_layer_in == nullptr) {
|
||||
MS_LOG(ERROR) << "transpose: NCHW->NHWC failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
transpose_layer_in->setName((op_name_ + "_transpose2NHWC").c_str());
|
||||
shape_input = transpose_layer_in->getOutput(0);
|
||||
this->transpose_layer_ = transpose_layer_in;
|
||||
}
|
||||
nvinfer1::IShapeLayer *shape_layer = ctx->network()->addShape(*shape_input);
|
||||
|
||||
if (shape_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "add shape op failed for TensorRT.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
shape_layer->setName(op_name_.c_str());
|
||||
shape_layer->getOutput(0)->setName((op_name_ + "_output").c_str());
|
||||
this->AddInnerOutTensors(ITensorHelper{shape_layer->getOutput(0), Format::NHWC, true});
|
||||
this->layer_ = shape_layer;
|
||||
return RET_OK;
|
||||
}
|
||||
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_Shape, ShapeTensorRT)
|
||||
} // namespace mindspore::lite
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue