!37251 [MS][LITE][STABLE]copy tensorRT impl to src/runtime

Merge pull request !37251 from chenjianping/master_dev1
This commit is contained in:
i-robot 2022-07-05 05:46:26 +00:00 committed by Gitee
commit e67997ff67
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
133 changed files with 14041 additions and 2 deletions

View File

@ -498,14 +498,14 @@ if(SUPPORT_TENSORRT)
set(CUDA_LIB_PATH ${CUDA_PATH}/lib64)
include_directories(${TENSORRT_PATH}/include)
include_directories(${CUDA_PATH}/include)
add_subdirectory(extendrt/delegate/tensorrt)
add_subdirectory(runtime/delegate/tensorrt)
endif()
target_link_libraries(mindspore-lite tensorrt_kernel_mid cuda_kernel_mid gpu_distribution_collective)
target_link_libraries(mindspore-lite_static tensorrt_kernel_mid cuda_kernel_mid gpu_distribution_collective)
else()
if(NOT MSLITE_ENABLE_CLOUD_FUSION_INFERENCE)
set(TENSORRT_STUB
${CMAKE_CURRENT_SOURCE_DIR}/extendrt/delegate/tensorrt/distribution/distribution_base.cc
${CMAKE_CURRENT_SOURCE_DIR}/runtime/delegate/tensorrt/distribution/distribution_base.cc
)
add_library(tensorrt_stub OBJECT ${TENSORRT_STUB})
endif()

View File

@ -381,6 +381,10 @@ int TensorRTSubGraph::Prepare() {
return RET_ERROR;
}
int binding_num = this->engine_->getNbBindings();
if (binding_num < 0) {
MS_LOG(ERROR) << "invalid binding_num " << binding_num;
return RET_ERROR;
}
tensor_bindings_ = new (std::nothrow) void *[binding_num];
if (tensor_bindings_ == nullptr) {
MS_LOG(ERROR) << "malloc tensor binding array failed.";

View File

@ -0,0 +1,43 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_CACHE_ALGORITHM_H_
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_CACHE_ALGORITHM_H_
#include <vector>
#include "include/api/status.h"
namespace mindspore {
namespace cache {
struct CacheNoe {
CacheNoe(int _index, int _frequency, int _value) : key(_index), frequency(_frequency), value(_value) {}
int key; // host input index
int frequency;
int value; // cache index
};
class CacheAlgorithm {
public:
virtual ~CacheAlgorithm() {}
virtual int Get(int key) = 0;
virtual void Put(int key, int value) = 0;
virtual Status Init(size_t cache_size, int min_host_index, int max_host_index) = 0;
virtual Status CheckCacheHit(const int *batch_ids, const size_t batch_ids_len, int *cache_index,
std::vector<int> *need_swap_indies, std::vector<int> *need_swap_indies_cache_index) = 0;
};
} // namespace cache
} // namespace mindspore
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_CACHE_ALGORITHM_H_

View File

@ -0,0 +1,41 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_CACHE_MEM_BASE_H_
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_CACHE_MEM_BASE_H_
#include <utility>
#include <memory>
namespace mindspore {
namespace cache {
class CacheMemBase {
public:
CacheMemBase() = default;
virtual ~CacheMemBase() = default;
virtual bool InitDevice(uint32_t device_id, const void *context) = 0;
virtual void *MallocMemory(size_t size) = 0;
virtual void FreeMemory(void *buf) = 0;
virtual bool SynchronizeStream() = 0;
virtual bool CopyHostMemToDevice(void *dst, const void *src, size_t size) = 0;
virtual bool CopyDeviceMemToHost(void *dst, const void *src, size_t size) = 0;
virtual bool HashSwapOut(void *hash_table_addr, void *swap_out_value_addr, void *swap_out_index_addr,
size_t cache_vocab_size, size_t embedding_size, size_t swap_out_size) = 0;
virtual bool HashSwapIn(void *hash_table_addr, void *swap_in_value_addr, void *swap_in_index_addr,
size_t cache_vocab_size, size_t embedding_size, size_t swap_in_size) = 0;
};
} // namespace cache
} // namespace mindspore
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_CACHE_MEM_BASE_H_

View File

@ -0,0 +1,237 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/delegate/parameter_cache/embedding_cache.h"
#include <cuda_runtime.h>
#include <memory>
#include <vector>
#include <cmath>
#include <cstring>
#include <string>
#include "src/common/log_adapter.h"
#include "include/errorcode.h"
#include "src/runtime/delegate/parameter_cache/gpu/gpu_cache_mem.h"
#include "src/runtime/delegate/parameter_cache/lfu_cache.h"
#include "src/runtime/delegate/parameter_cache/factory_mgr_base.h"
namespace {
constexpr size_t kEmbeddingTensorShapeSize = 2;
}
namespace mindspore {
namespace cache {
void LookUpTableTask(size_t indices_lens, size_t first_dim_size, const char *input_addr, const int *indices_addr,
char *output_addr, size_t embedding_len, int min_host_index) {
for (size_t i = 0; i < indices_lens; ++i) {
int index = indices_addr[i] - min_host_index;
if (index >= 0 && index < static_cast<int>(first_dim_size)) {
size_t pos = index * embedding_len;
std::memcpy(output_addr, input_addr + pos, embedding_len);
} else {
memset(output_addr, 0, embedding_len);
}
output_addr += embedding_len;
}
}
EmbeddingCache::~EmbeddingCache() {
if (hash_swap_value_device_addr_ != nullptr) {
device_cache_->FreeMemory(hash_swap_value_device_addr_);
hash_swap_value_device_addr_ = nullptr;
}
if (hash_swap_value_addr_ != nullptr) {
free(hash_swap_value_addr_);
hash_swap_value_addr_ = nullptr;
}
if (hash_swap_index_addr_ != nullptr) {
device_cache_->FreeMemory(hash_swap_index_addr_);
hash_swap_index_addr_ = nullptr;
}
}
Status EmbeddingCache::Init(mindspore::MSTensor host_cache_tensor, mindspore::MSTensor device_tensor) {
MS_ASSERT(device_tensor.Shape().size() == kEmbeddingTensorShapeSize);
MS_ASSERT(host_cache_tensor.Shape().size() == kEmbeddingTensorShapeSize);
MS_ASSERT(device_tensor.DataType() == host_cache_tensor.DataType());
MS_ASSERT(host_cache_tensor.Data() != nullptr);
if (device_tensor.Shape()[1] != host_cache_tensor.Shape()[1]) {
MS_LOG(ERROR) << device_tensor.Name() << " embedding_size is invalid, device size is " << device_tensor.Shape()[1]
<< ", host size is " << host_cache_tensor.Shape()[1];
return kLiteError;
}
if (host_cache_size_ != host_cache_tensor.Shape()[0]) {
MS_LOG(ERROR) << device_tensor.Name() << " host_cache_size is invalid, host_cache_size"
<< host_cache_tensor.Shape()[0] << ", index begin:" << min_host_index_
<< ", index end:" << max_host_index_ << "rank_group_size_ num:" << rank_group_size_
<< ", rank id:" << rank_id_ << ", vocab_size_:" << vocab_size_;
return kLiteError;
}
data_type_ = device_tensor.DataType();
switch (data_type_) {
case DataType::kNumberTypeFloat32:
sizeof_data_type_ = sizeof(float);
break;
default:
MS_LOG(ERROR) << device_tensor.Name() << " unsupported data type " << static_cast<int>(data_type_);
return kLiteError;
}
host_addr_ = host_cache_tensor.MutableData();
embedding_size_ = device_tensor.Shape()[1];
device_start_index_ = device_cache_size_ * rank_id_;
// host cache tensor is device tensor
if (device_tensor.Shape()[0] == host_cache_tensor.Shape()[0]) {
device_start_index_ = min_host_index_;
}
return kSuccess;
}
Status EmbeddingCache::MallocCacheMemory() {
auto hash_swap_value_size = embedding_size_ * batch_elements_ * sizeof_data_type_;
hash_swap_value_device_addr_ = device_cache_->MallocMemory(hash_swap_value_size);
if (hash_swap_value_device_addr_ == nullptr) {
MS_LOG(ERROR) << "malloc hash_swap_value_device failed, malloc size " << hash_swap_value_size;
return kLiteMemoryFailed;
}
hash_swap_value_addr_ = malloc(hash_swap_value_size);
if (hash_swap_value_addr_ == nullptr) {
MS_LOG(ERROR) << "malloc hash_swap_value failed, malloc size " << hash_swap_value_size;
return kLiteMemoryFailed;
}
// data type of index
hash_swap_index_addr_ = static_cast<int *>(device_cache_->MallocMemory(batch_elements_ * sizeof(int)));
if (hash_swap_index_addr_ == nullptr) {
MS_LOG(ERROR) << "malloc hash_swap_index failed, malloc size " << batch_elements_ * sizeof(int);
return kLiteMemoryFailed;
}
return kSuccess;
}
Status EmbeddingCache::Init(uint32_t device_id, const void *context, mindspore::MSTensor host_cache_tensor,
mindspore::MSTensor device_tensor) {
auto ret = Init(host_cache_tensor, device_tensor);
if (ret != kSuccess) {
return ret;
}
cache_ = lite::FactoryManagerBase<std::string, cache::CacheAlgorithm>::Instance().GetProduct("lfu");
if (cache_ == nullptr) {
MS_LOG(ERROR) << "malloc LFUCacheAlgorithm failed";
return kLiteMemoryFailed;
}
ret = cache_->Init(device_cache_size_, min_host_index_, max_host_index_);
if (ret != kSuccess) {
MS_LOG(ERROR) << "init cache failed," << ret.CodeAsString;
return kLiteError;
}
device_cache_ = lite::FactoryManagerBase<std::string, cache::CacheMemBase>::Instance().GetProduct("gpu");
if (device_cache_ == nullptr) {
MS_LOG(ERROR) << "get cache failed";
return kLiteMemoryFailed;
}
if (!device_cache_->InitDevice(device_id, context)) {
MS_LOG(ERROR) << "init device failed";
return kLiteError;
}
ret = MallocCacheMemory();
if (ret != kSuccess) {
return ret;
}
MS_LOG(INFO) << "init succ, rank_group_size_ num:" << rank_group_size_ << ", rank id:" << rank_id_
<< ", vocab_size_:" << vocab_size_ << ", host_cache_size_:" << host_cache_size_
<< ", device_cache_size_:" << device_cache_size_ << ", embedding_size_:" << embedding_size_
<< ", batch_elements_:" << batch_elements_ << ", index begin:" << min_host_index_
<< ", index end:" << max_host_index_;
return kSuccess;
}
Status EmbeddingCache::SetHostCacheAddr(void *addr, size_t size) {
if (sizeof_data_type_ * host_cache_size_ * embedding_size_ != size) {
return kLiteParamInvalid;
}
host_addr_ = addr;
// copy part of host mem to device
auto ret =
device_cache_->CopyHostMemToDevice(device_addr_, addr, sizeof_data_type_ * device_cache_size_ * embedding_size_);
if (!ret) {
MS_LOG(ERROR) << "CopyHostMemToDevice failed, copy size "
<< sizeof_data_type_ * device_cache_size_ * embedding_size_;
return kLiteMemoryFailed;
}
// init cache
auto index_num = device_cache_size_;
for (size_t i = 0; i < index_num; i++) {
cache_->Put(min_host_index_ + i, i);
}
return kSuccess;
}
Status EmbeddingCache::SetDeviceCacheAddr(void *device_mem_addr, size_t size) {
if (sizeof_data_type_ * device_cache_size_ * embedding_size_ != size) {
return kLiteParamInvalid;
}
device_addr_ = device_mem_addr;
SetHostCacheAddr(host_addr_, sizeof_data_type_ * host_cache_size_ * embedding_size_);
return kSuccess;
}
Status EmbeddingCache::CheckCacheHit(const int *batch_ids, const size_t batch_ids_len, int *cache_index) {
std::vector<int> need_swap_indies;
std::vector<int> need_swap_indies_cache_index;
auto ret =
cache_->CheckCacheHit(batch_ids, batch_ids_len, cache_index, &need_swap_indies, &need_swap_indies_cache_index);
if (ret != kSuccess) {
MS_LOG(ERROR) << "CheckCacheHit failed";
return ret;
}
auto swap_indices_size = need_swap_indies.size();
if (swap_indices_size > 0) {
LookUpTableTask(swap_indices_size, host_cache_size_, static_cast<char *>(host_addr_), need_swap_indies.data(),
static_cast<char *>(hash_swap_value_addr_), embedding_size_ * sizeof_data_type_, min_host_index_);
auto device_cache_ret = device_cache_->CopyHostMemToDevice(hash_swap_value_device_addr_, hash_swap_value_addr_,
swap_indices_size * embedding_size_ * sizeof_data_type_);
if (!device_cache_ret) {
MS_LOG(ERROR) << "copy swap value to device failed";
return kLiteMemoryFailed;
}
device_cache_ret = device_cache_->CopyHostMemToDevice(hash_swap_index_addr_, need_swap_indies_cache_index.data(),
swap_indices_size * sizeof(int));
if (!device_cache_ret) {
MS_LOG(ERROR) << "copy swap indies to device failed";
return kLiteMemoryFailed;
}
device_cache_ret = device_cache_->HashSwapIn(device_addr_, hash_swap_value_device_addr_, hash_swap_index_addr_,
device_cache_size_, embedding_size_, swap_indices_size);
if (!device_cache_ret) {
MS_LOG(ERROR) << "HashSwapIn failed";
return kLiteMemoryFailed;
}
}
return kSuccess;
}
} // namespace cache
} // namespace mindspore

View File

@ -0,0 +1,89 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_EMBEDDING_CACHE_H_
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_EMBEDDING_CACHE_H_
#include <cmath>
#include <algorithm>
#include <memory>
#include "include/api/status.h"
#include "include/api/data_type.h"
#include "src/common/log_adapter.h"
#include "src/runtime/delegate/parameter_cache/cache_algorithm.h"
#include "src/runtime/delegate/parameter_cache/cache_mem_base.h"
namespace mindspore {
namespace cache {
class EmbeddingCache {
public:
EmbeddingCache(size_t vocab_size, size_t device_cache_size, size_t batch_elements, int rank_id, int rank_group_size)
: vocab_size_(vocab_size),
device_cache_size_(device_cache_size),
batch_elements_(batch_elements),
rank_id_(rank_id),
rank_group_size_(rank_group_size) {
MS_ASSERT(rank_group_size_ != 0);
auto local_shard_size = static_cast<int>(std::ceil(static_cast<float>(vocab_size_) / rank_group_size_));
min_host_index_ = local_shard_size * rank_id_;
max_host_index_ = std::min(min_host_index_ + local_shard_size, static_cast<int>(vocab_size_));
host_cache_size_ = max_host_index_ - min_host_index_;
MS_LOG(INFO) << "rank_group_size_ num:" << rank_group_size_ << ", rank id:" << rank_id_
<< ", vocab_size_:" << vocab_size_ << ", host_cache_size_:" << host_cache_size_
<< ", index begin:" << min_host_index_ << ", index end:" << max_host_index_;
}
~EmbeddingCache();
Status Init(uint32_t device_id, const void *context, mindspore::MSTensor host_cache_tensor,
mindspore::MSTensor device_tensor);
Status SetHostCacheAddr(void *addr, size_t size);
Status SetDeviceCacheAddr(void *host_mem_addr, size_t size);
Status CheckCacheHit(const int *batch_ids, const size_t batch_ids_len, int *hash_index);
size_t GetDeviceStartIndex() { return device_start_index_; }
private:
Status Init(mindspore::MSTensor host_cache_tensor, mindspore::MSTensor device_tensor);
Status MallocCacheMemory();
private:
std::shared_ptr<cache::CacheMemBase> device_cache_{nullptr};
std::shared_ptr<CacheAlgorithm> cache_{nullptr};
size_t vocab_size_{0}; // total size
size_t host_cache_size_{0}; // local host size
size_t device_cache_size_{0}; // local device cache size
size_t device_start_index_{0};
size_t embedding_size_{0};
size_t batch_elements_{0};
DataType data_type_{DataType::kNumberTypeFloat32};
size_t sizeof_data_type_{0};
void *device_addr_{nullptr}; // hash_info.device_address.addr
void *host_addr_{nullptr};
int *hash_swap_index_addr_; // embedding_device_cache_->hash_swap_index_addr_
void *hash_swap_value_addr_;
void *hash_swap_value_device_addr_;
int rank_id_;
int rank_group_size_;
int min_host_index_{0};
int max_host_index_{0};
};
} // namespace cache
} // namespace mindspore
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_EMBEDDING_CACHE_H_

View File

@ -0,0 +1,194 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/delegate/parameter_cache/embedding_cache_manager.h"
#include <cuda_runtime.h>
#include <cmath>
#include <cstring>
#include "src/common/log_adapter.h"
#include "include/errorcode.h"
namespace {
constexpr size_t kGatherInputsSize = 3;
}
namespace mindspore {
namespace cache {
Status EmbeddingCacheManager::Init(const std::string &cache_model_path, size_t vocab_size, size_t device_cache_size) {
if (cache_model_path.empty() || vocab_size == 0 || device_cache_size >= vocab_size) {
MS_LOG(INFO) << "no cache model , vocab_size " << vocab_size << ", device_cache_size " << device_cache_size;
return kSuccess;
}
host_cache_model_ = std::make_shared<HostCacheModel>();
if (host_cache_model_ == nullptr) {
MS_LOG(ERROR) << "HostCacheModel malloc failed";
return kLiteMemoryFailed;
}
auto ret = host_cache_model_->LoadCache(cache_model_path);
if (ret != kSuccess) {
MS_LOG(ERROR) << "load cache failed";
return ret;
}
vocab_size_ = vocab_size;
device_cache_size_ = device_cache_size;
MS_LOG(INFO) << "cache manager init succ, cache model" << cache_model_path << " , vocab_size " << vocab_size
<< ", device_cache_size " << device_cache_size;
return ret;
}
Status EmbeddingCacheManager::Init(DelegateModel<schema::Primitive> *model, size_t vocab_size,
size_t device_cache_size) {
if (model == nullptr || vocab_size == 0 || device_cache_size >= vocab_size) {
MS_LOG(INFO) << "no cache model , vocab_size " << vocab_size << ", device_cache_size " << device_cache_size;
return kSuccess;
}
host_cache_model_ = std::make_shared<HostCacheModel>();
if (host_cache_model_ == nullptr) {
MS_LOG(ERROR) << "HostCacheModel malloc failed";
return kLiteMemoryFailed;
}
auto ret = host_cache_model_->LoadCache(model);
if (ret != kSuccess) {
MS_LOG(ERROR) << "load cache failed";
return ret;
}
vocab_size_ = vocab_size;
device_cache_size_ = device_cache_size;
MS_LOG(INFO) << "cache manager init succ, vocab_size " << vocab_size << ", device_cache_size " << device_cache_size;
return ret;
}
bool EmbeddingCacheManager::CheckIsCacheKernel(kernel::Kernel *kernel) {
if (host_cache_model_ == nullptr) {
return false;
}
return host_cache_model_->CheckIsCacheKernel(kernel);
}
Status EmbeddingCacheManager::InitCacheKernel(kernel::Kernel *kernel, uint32_t device_id, const void *context) {
if (host_cache_model_ == nullptr) {
MS_LOG(ERROR) << "cache model is nullptr, kernel " << kernel->name() << " init cache failed";
return kLiteError;
}
auto host_cache_tensor = host_cache_model_->GetHostCacheTensor(kernel);
if (host_cache_tensor == nullptr) {
MS_LOG(ERROR) << kernel->name() << ": invalid cache kernel";
return kLiteError;
}
// only support embedding cache
if (kernel->type() != schema::PrimitiveType_Gather) {
MS_LOG(ERROR) << kernel->name() << " is not embedding kernel";
return kLiteError;
}
MS_ASSERT(kernel->inputs().size() == kGatherInputsSize);
auto device_tensor = kernel->inputs()[0];
size_t batch_elements = kernel->inputs()[1].ElementNum();
auto cache =
std::make_shared<EmbeddingCache>(vocab_size_, device_cache_size_, batch_elements, rank_id_, rank_group_size_);
if (cache == nullptr) {
MS_LOG(ERROR) << kernel->name() << ": malloc EmbeddingCache failed";
return kLiteError;
}
auto ret = cache->Init(device_id, context, host_cache_tensor, device_tensor);
if (ret != kSuccess) {
MS_LOG(ERROR) << kernel->name() << ": EmbeddingCache init failed";
return kLiteError;
}
caches_[device_tensor.Name()] = cache;
MS_LOG(INFO) << kernel->name() << " is cache kernel, input tensor " << kernel->inputs()[1].Name() << ", cache tensor "
<< device_tensor.Name();
return kSuccess;
}
bool EmbeddingCacheManager::IsCacheTensor(mindspore::MSTensor tensor) {
if (host_cache_model_ == nullptr) {
return false;
}
auto cache = caches_.find(tensor.Name());
if (cache != caches_.end()) {
return true;
}
return false;
}
std::vector<int64_t> EmbeddingCacheManager::GetCacheShape(mindspore::MSTensor tensor) {
std::vector<int64_t> shape = tensor.Shape();
if (shape.size() > 0 && IsCacheTensor(tensor)) {
shape[0] = device_cache_size_;
}
return shape;
}
size_t EmbeddingCacheManager::GetCacheDataSize(mindspore::MSTensor tensor) {
auto data_size = tensor.DataSize();
auto &shape = tensor.Shape();
if (shape.size() > 0 && IsCacheTensor(tensor) && shape[0] > 0) {
data_size = data_size * device_cache_size_ / shape[0];
}
return data_size;
}
Status EmbeddingCacheManager::SetDeviceCacheAddr(const std::string &tensor_name, void *device_mem_addr, size_t size) {
auto cache_iter = caches_.find(tensor_name);
if (cache_iter == caches_.end() || cache_iter->second == nullptr) {
MS_LOG(ERROR) << "not find cache, " << tensor_name;
return kLiteError;
}
auto cache = cache_iter->second;
return cache->SetDeviceCacheAddr(device_mem_addr, size);
}
// device_addr is model input device addr
int EmbeddingCacheManager::CacheHandle(const std::string &tensor_name, mindspore::MSTensor model_input_tensor,
void *model_input_device_addr) {
auto cache_iter = caches_.find(tensor_name);
if (cache_iter == caches_.end()) {
MS_LOG(ERROR) << "not find cache, " << tensor_name;
return lite::RET_ERROR;
}
auto cache = cache_iter->second;
hash_indices_.resize(model_input_tensor.ElementNum());
auto ret = cache->CheckCacheHit(static_cast<int *>(model_input_tensor.MutableData()), hash_indices_.size(),
hash_indices_.data());
if (ret != kSuccess) {
MS_LOG(ERROR) << "CheckCacheHit failed, " << model_input_tensor.Name();
return lite::RET_ERROR;
}
for (size_t i = 0; i < hash_indices_.size(); i++) {
if (hash_indices_[i] != -1) {
hash_indices_[i] += cache->GetDeviceStartIndex();
}
}
auto cuda_ret = cudaMemcpy(model_input_device_addr, hash_indices_.data(), hash_indices_.size() * sizeof(int),
cudaMemcpyHostToDevice);
if (cuda_ret != cudaSuccess) {
MS_LOG(ERROR) << "copy mem failed, " << model_input_tensor.Name();
return lite::RET_ERROR;
}
MS_LOG(INFO) << "cache handle succ, " << model_input_tensor.Name() << "," << tensor_name;
return lite::RET_OK;
}
} // namespace cache
} // namespace mindspore

View File

@ -0,0 +1,60 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_EMBEDDING_CACHE_MANAGER_H_
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_EMBEDDING_CACHE_MANAGER_H_
#include <memory>
#include <map>
#include <string>
#include <vector>
#include "include/api/kernel.h"
#include "include/api/status.h"
#include "include/api/data_type.h"
#include "src/runtime/delegate/parameter_cache/embedding_cache.h"
#include "src/runtime/delegate/parameter_cache/load_host_cache_model.h"
#include "src/runtime/delegate/tensorrt/distribution/distribution_base.h"
namespace mindspore {
namespace cache {
class EmbeddingCacheManager {
public:
EmbeddingCacheManager() {
rank_id_ = lite::GetRankID();
rank_group_size_ = lite::GetGPUGroupSize();
}
Status Init(const std::string &cache_model_path, size_t vocab_size, size_t device_cache_size);
Status Init(DelegateModel<schema::Primitive> *model, size_t vocab_size, size_t device_cache_size);
bool CheckIsCacheKernel(kernel::Kernel *kernel);
Status InitCacheKernel(kernel::Kernel *kernel, uint32_t device_id, const void *context);
bool IsCacheTensor(mindspore::MSTensor tensor);
int CacheHandle(const std::string &tensor_name, mindspore::MSTensor model_input_tensor, void *device_addr);
Status SetDeviceCacheAddr(const std::string &tensor_name, void *device_mem_addr, size_t size);
std::vector<int64_t> GetCacheShape(mindspore::MSTensor tensor);
size_t GetCacheDataSize(mindspore::MSTensor tensor);
private:
std::map<std::string, std::shared_ptr<EmbeddingCache>> caches_;
std::vector<int> hash_indices_;
int rank_id_{0};
int rank_group_size_{1};
std::shared_ptr<HostCacheModel> host_cache_model_;
size_t vocab_size_;
size_t device_cache_size_;
};
} // namespace cache
} // namespace mindspore
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_EMBEDDING_CACHE_MANAGER_H_

View File

@ -0,0 +1,81 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_FACTORY_MGR_BASE_H_
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_FACTORY_MGR_BASE_H_
#include <map>
#include <memory>
#include "include/api/status.h"
namespace mindspore {
namespace lite {
template <typename KEY, typename PRODUCT>
class ProcductRegistrar {
public:
virtual std::shared_ptr<PRODUCT> Create() = 0;
protected:
ProcductRegistrar() {}
virtual ~ProcductRegistrar() {}
private:
ProcductRegistrar(const ProcductRegistrar &);
const ProcductRegistrar &operator=(const ProcductRegistrar &);
};
template <typename KEY, typename PRODUCT>
class FactoryManagerBase {
public:
static FactoryManagerBase &Instance() {
static FactoryManagerBase<KEY, PRODUCT> instance;
return instance;
}
void RegProduct(const KEY &key, ProcductRegistrar<KEY, PRODUCT> *registrar) { registrars[key] = registrar; }
std::shared_ptr<PRODUCT> GetProduct(const KEY &key) {
auto registrar_iter = registrars.find(key);
if (registrar_iter != registrars.end()) {
if (registrar_iter->second != nullptr) {
return registrar_iter->second->Create();
}
}
return nullptr;
}
private:
FactoryManagerBase() = default;
~FactoryManagerBase() = default;
FactoryManagerBase(const FactoryManagerBase &);
const FactoryManagerBase &operator=(const FactoryManagerBase &);
private:
std::map<KEY, ProcductRegistrar<KEY, PRODUCT> *> registrars;
};
template <typename KEY, typename PRODUCT, typename PRODUCT_IMPL>
class CommonProcductRegistrar : public ProcductRegistrar<KEY, PRODUCT> {
public:
explicit CommonProcductRegistrar(const KEY &key) {
FactoryManagerBase<KEY, PRODUCT>::Instance().RegProduct(key, this);
}
std::shared_ptr<PRODUCT> Create() { return std::make_shared<PRODUCT_IMPL>(); }
};
#define RET_COMMON_PRODUCT_REGISTRAR(KEY, PRODUCT, PRODUCT_IMPL, key, name) \
static mindspore::lite::CommonProcductRegistrar<KEY, PRODUCT, PRODUCT_IMPL> g_commonProcductRegistrar##name(key);
} // namespace lite
} // namespace mindspore
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_FACTORY_MGR_BASE_H_

View File

@ -0,0 +1,158 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/delegate/parameter_cache/gpu/gpu_cache_mem.h"
#include <cuda_runtime.h>
#include <string>
#include "src/runtime/delegate/tensorrt/cuda_impl/hash.cuh"
#include "plugin/device/gpu/hal/device/cuda_driver.h"
#include "src/common/log_adapter.h"
#include "src/runtime/delegate/parameter_cache/factory_mgr_base.h"
namespace mindspore {
namespace cache {
namespace gpu {
RET_COMMON_PRODUCT_REGISTRAR(std::string, cache::CacheMemBase, cache::gpu::GPUCacheMem, "gpu", GPUCacheMem);
bool GPUCacheMem::InitDevice(uint32_t device_id, const void *context) {
auto cuda_ret = cudaSetDevice(static_cast<int>(device_id));
if (cuda_ret != cudaSuccess) {
MS_LOG(ERROR) << "Failed to set device id " << device_id << ", cuda_ret " << cuda_ret << " "
<< cudaGetErrorString(cuda_ret);
return false;
}
if (context != nullptr) {
stream_ = *(reinterpret_cast<const cudaStream_t *>(context));
return true;
}
cuda_ret = cudaStreamCreate(&stream_);
if (cuda_ret != cudaSuccess) {
MS_LOG(ERROR) << "Cuda create stream failed, cuda_ret " << cuda_ret << " " << cudaGetErrorString(cuda_ret);
return false;
}
return true;
}
void *GPUCacheMem::MallocMemory(size_t size) {
void *device_ptr = nullptr;
auto cuda_ret = cudaMalloc(&device_ptr, size);
if (cuda_ret != cudaSuccess) {
MS_LOG(ERROR) << "Cuda Malloc failed for size:" << size << ", cuda_ret " << cuda_ret << " "
<< cudaGetErrorString(cuda_ret);
return nullptr;
}
MS_LOG(DEBUG) << "cudaMalloc size: " << size;
return device_ptr;
}
void GPUCacheMem::FreeMemory(void *device_addr) {
auto cuda_ret = cudaFree(device_addr);
if (cuda_ret != cudaSuccess && cuda_ret != cudaErrorCudartUnloading) {
MS_LOG(WARNING) << "free cuda memory failed, "
<< ", cuda_ret " << cuda_ret << " " << cudaGetErrorString(cuda_ret);
}
}
bool GPUCacheMem::SynchronizeStream() {
auto cuda_ret = cudaStreamSynchronize(stream_);
if (cuda_ret != cudaSuccess) {
MS_LOG(ERROR) << "Cuda sync stream failed, cuda_ret " << cuda_ret << " " << cudaGetErrorString(cuda_ret);
return false;
}
return true;
}
bool GPUCacheMem::CopyHostMemToDevice(void *dst, const void *src, size_t size) {
if (dst == nullptr) {
MS_LOG(ERROR) << "dst is nullptr";
return false;
}
if (src == nullptr) {
MS_LOG(ERROR) << "src is nullptr";
return false;
}
auto cuda_ret = cudaMemcpyAsync(dst, src, size, cudaMemcpyHostToDevice, stream_);
if (cuda_ret != cudaSuccess) {
MS_LOG(ERROR) << "Cuda memcpy failed, cuda_ret " << cuda_ret << " " << cudaGetErrorString(cuda_ret);
return false;
}
return true;
}
bool GPUCacheMem::CopyDeviceMemToHost(void *dst, const void *src, size_t size) {
if (dst == nullptr) {
MS_LOG(ERROR) << "dst is nullptr";
return false;
}
if (src == nullptr) {
MS_LOG(ERROR) << "src is nullptr";
return false;
}
auto cuda_ret = cudaMemcpyAsync(dst, src, size, cudaMemcpyDeviceToHost, stream_);
if (cuda_ret != cudaSuccess) {
MS_LOG(ERROR) << "Cuda memcpy failed, cuda_ret " << cuda_ret << " " << cudaGetErrorString(cuda_ret);
return false;
}
return true;
}
bool GPUCacheMem::HashSwapOut(void *hash_table_addr, void *swap_out_value_addr, void *swap_out_index_addr, size_t,
size_t embedding_size, size_t swap_out_size) {
if (hash_table_addr == nullptr) {
MS_LOG(ERROR) << "hash_table_addr is nullptr";
return false;
}
if (swap_out_value_addr == nullptr) {
MS_LOG(ERROR) << "swap_out_value_addr is nullptr";
return false;
}
if (swap_out_index_addr == nullptr) {
MS_LOG(ERROR) << "swap_out_index_addr is nullptr";
return false;
}
DoHashSwapOut(reinterpret_cast<float *>(hash_table_addr), reinterpret_cast<float *>(swap_out_value_addr),
reinterpret_cast<int *>(swap_out_index_addr), swap_out_size, embedding_size, stream_);
return true;
}
bool GPUCacheMem::HashSwapIn(void *hash_table_addr, void *swap_in_value_addr, void *swap_in_index_addr, size_t,
size_t embedding_size, size_t swap_in_size) {
if (hash_table_addr == nullptr) {
MS_LOG(ERROR) << "hash_table_addr is nullptr";
return false;
}
if (swap_in_value_addr == nullptr) {
MS_LOG(ERROR) << "swap_in_value_addr is nullptr";
return false;
}
if (swap_in_index_addr == nullptr) {
MS_LOG(ERROR) << "swap_in_index_addr is nullptr";
return false;
}
DoHashSwapIn(reinterpret_cast<float *>(hash_table_addr), reinterpret_cast<float *>(swap_in_value_addr),
reinterpret_cast<int *>(swap_in_index_addr), swap_in_size, embedding_size, stream_);
return true;
}
} // namespace gpu
} // namespace cache
} // namespace mindspore

View File

@ -0,0 +1,48 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_GPU_GPU_CACHE_MEM_H_
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_GPU_GPU_CACHE_MEM_H_
#include <cuda_runtime_api.h>
#include <memory>
#include "src/runtime/delegate/parameter_cache/cache_mem_base.h"
namespace mindspore {
namespace cache {
namespace gpu {
class GPUCacheMem : public cache::CacheMemBase {
public:
GPUCacheMem() = default;
~GPUCacheMem() override = default;
bool InitDevice(uint32_t device_id, const void *context) override;
void *MallocMemory(size_t size) override;
void FreeMemory(void *buf) override;
bool SynchronizeStream() override;
bool CopyHostMemToDevice(void *dst, const void *src, size_t size) override;
bool CopyDeviceMemToHost(void *dst, const void *src, size_t size) override;
bool HashSwapOut(void *hash_table_addr, void *swap_out_value_addr, void *swap_out_index_addr, size_t cache_vocab_size,
size_t embedding_size, size_t swap_out_size) override;
bool HashSwapIn(void *hash_table_addr, void *swap_in_value_addr, void *swap_in_index_addr, size_t cache_vocab_size,
size_t embedding_size, size_t swap_in_size) override;
private:
cudaStream_t stream_;
};
} // namespace gpu
} // namespace cache
} // namespace mindspore
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_GPU_GPU_CACHE_MEM_H_

View File

@ -0,0 +1,243 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <vector>
#include <string>
#include "src/common/log_adapter.h"
#include "src/runtime/delegate/parameter_cache/lfu_cache.h"
#include "src/runtime/delegate/parameter_cache/factory_mgr_base.h"
namespace mindspore {
namespace cache {
RET_COMMON_PRODUCT_REGISTRAR(std::string, cache::CacheAlgorithm, cache::LFUCacheAlgorithm, "lfu", LFUCacheAlgorithm);
LFUCacheAlgorithm::~LFUCacheAlgorithm() {
for (auto iter : key_table_) {
delete *(iter.second);
}
key_table_.clear();
frequency_table_.clear();
}
Status LFUCacheAlgorithm::Init(size_t cache_size, int min_host_index, int max_host_index) {
if (cache_size <= 0 || min_host_index < 0 || max_host_index <= 0) {
return kLiteParamInvalid;
}
cache_size_ = cache_size;
min_host_index_ = min_host_index;
max_host_index_ = max_host_index;
return kSuccess;
}
CacheNoe *LFUCacheAlgorithm::GetNode(int key) {
auto key_table_iter = key_table_.find(key);
if (key_table_iter == key_table_.end()) {
return nullptr;
}
auto node_iter = key_table_iter->second;
auto node = *node_iter;
auto node_list_iter = frequency_table_.find(key);
if (node_list_iter == frequency_table_.end()) {
return nullptr;
}
auto &node_list = node_list_iter->second;
node_list.erase(node_iter);
if (node_list.empty()) {
frequency_table_.erase(node_list_iter);
}
node->frequency += 1;
frequency_table_[node->frequency].emplace_front(node);
key_table_[key] = frequency_table_[node->frequency].begin();
return node;
}
int LFUCacheAlgorithm::Get(int key) {
auto node = GetNode(key);
if (node != nullptr) {
return node->value;
}
return -1;
}
void LFUCacheAlgorithm::Put(int key, int value) {
auto node = GetNode(key);
if (node != nullptr) {
node->value = value;
return;
}
if (cache_size_ == 0) {
return;
}
CacheNoe *add_node = nullptr;
if (key_table_.size() == cache_size_) {
add_node = frequency_table_.begin()->second.back();
key_table_.erase(add_node->key);
frequency_table_.begin()->second.pop_back();
if (frequency_table_.begin()->second.size() == 0) {
frequency_table_.erase(frequency_table_.begin()->first);
}
add_node->value = value;
add_node->key = key;
add_node->frequency = 1;
} else {
add_node = new CacheNoe(key, 1, value);
if (add_node == nullptr) {
return;
}
}
frequency_table_[1].emplace_front(add_node);
key_table_[key] = frequency_table_[1].begin();
}
void LFUCacheAlgorithm::GetHitNodesAndSwapIndex(const int *batch_ids, const size_t batch_ids_len, int *cache_index,
std::unordered_map<int, CacheNoe *> *hit_index_nodes,
std::unordered_map<int, std::vector<int>> *need_swap_map) {
// 找到没有命中和命中的index
for (size_t i = 0; i < batch_ids_len; i++) {
auto key = batch_ids[i];
if (key < min_host_index_ || key >= max_host_index_) {
cache_index[i] = -1;
// out range
continue;
}
auto hit_iter = hit_index_nodes->find(key);
if (hit_iter != hit_index_nodes->end()) {
auto node = hit_iter->second;
node->frequency += 1;
cache_index[i] = node->value;
continue;
}
auto swap_iter = need_swap_map->find(key);
if (swap_iter != need_swap_map->end()) {
swap_iter->second.push_back(i);
continue;
}
auto node_iter_iter = key_table_.find(key);
if (node_iter_iter == key_table_.end()) {
(*need_swap_map)[key].push_back(i);
continue;
}
auto node_iter = node_iter_iter->second;
auto node = *node_iter;
auto node_list_iter = frequency_table_.find(node->frequency);
if (node_list_iter == frequency_table_.end()) {
continue;
}
auto &node_list = node_list_iter->second;
node_list.erase(node_iter);
if (node_list.empty()) {
frequency_table_.erase(node_list_iter);
}
// hit
node->frequency += 1;
cache_index[i] = node->value;
(*hit_index_nodes)[key] = node;
}
return;
}
std::list<CacheNoe *> LFUCacheAlgorithm::GetSwapNodes(const std::unordered_map<int, std::vector<int>> &need_swap_map) {
std::list<CacheNoe *> need_swap_nodes;
auto swap_size = need_swap_map.size();
while (swap_size > 0 && !frequency_table_.empty()) {
auto node_list_iter = frequency_table_.begin();
if (node_list_iter->second.size() > swap_size) {
auto iter = node_list_iter->second.begin();
std::advance(iter, swap_size);
need_swap_nodes.splice(need_swap_nodes.end(), node_list_iter->second, node_list_iter->second.begin(), iter);
swap_size = 0;
} else {
swap_size -= node_list_iter->second.size();
need_swap_nodes.splice(need_swap_nodes.end(), node_list_iter->second);
frequency_table_.erase(node_list_iter);
}
}
return need_swap_nodes;
}
Status LFUCacheAlgorithm::CheckCacheHit(const int *batch_ids, const size_t batch_ids_len, int *cache_index,
std::vector<int> *need_swap_indies,
std::vector<int> *need_swap_indies_cache_index) {
if (batch_ids == nullptr) {
MS_LOG(ERROR) << "batch_ids is nullptr";
return kLiteNullptr;
}
if (cache_index == nullptr) {
MS_LOG(ERROR) << "cache_index is nullptr";
return kLiteNullptr;
}
std::unordered_map<int, std::vector<int>> need_swap_map;
std::unordered_map<int, CacheNoe *> hit_index_nodes;
GetHitNodesAndSwapIndex(batch_ids, batch_ids_len, cache_index, &hit_index_nodes, &need_swap_map);
// get need_swap_indies.size() least recently used node
std::list<CacheNoe *> need_swap_nodes = GetSwapNodes(need_swap_map);
// 更新老节点的值
{
if (need_swap_map.size() != need_swap_nodes.size()) {
MS_LOG(ERROR) << " need_swap_map.size() " << need_swap_map.size() << " != need_swap_nodes.size() "
<< need_swap_nodes.size();
return kLiteError;
}
need_swap_indies_cache_index->reserve(need_swap_map.size());
auto need_swap_map_iter = need_swap_map.begin();
for (auto iter = need_swap_nodes.begin();
iter != need_swap_nodes.end() && need_swap_map_iter != need_swap_map.end(); iter++, need_swap_map_iter++) {
auto node = *iter;
key_table_.erase(node->key);
node->key = need_swap_map_iter->first;
node->frequency = 1;
for (auto index : need_swap_map_iter->second) {
cache_index[index] = node->value;
}
need_swap_indies->push_back(need_swap_map_iter->first);
need_swap_indies_cache_index->push_back(node->value);
MS_LOG(INFO) << "device index " << node->value << ",for host index " << need_swap_map_iter->first;
key_table_[(*iter)->key] = iter;
}
auto node_list_iter = frequency_table_.begin();
if (node_list_iter->second.size() > 0) {
auto iter = node_list_iter->second.begin();
if ((*iter)->frequency == 1) {
node_list_iter->second.splice(node_list_iter->second.begin(), need_swap_nodes);
} else {
frequency_table_[1] = need_swap_nodes;
}
} else {
frequency_table_[1] = need_swap_nodes;
}
}
for (auto node_iter : hit_index_nodes) {
auto node = node_iter.second;
frequency_table_[node->frequency].emplace_front(node);
key_table_[node->key] = frequency_table_[node->frequency].begin();
}
return kSuccess;
}
} // namespace cache
} // namespace mindspore

View File

@ -0,0 +1,55 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_LFU_CACHE_H_
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_LFU_CACHE_H_
#include <map>
#include <unordered_map>
#include <list>
#include <vector>
#include "include/api/status.h"
#include "src/runtime/delegate/parameter_cache/cache_algorithm.h"
namespace mindspore {
namespace cache {
class LFUCacheAlgorithm : public CacheAlgorithm {
public:
LFUCacheAlgorithm() {}
~LFUCacheAlgorithm() override;
int Get(int key) override;
void Put(int key, int value) override;
Status Init(size_t cache_size, int min_host_index, int max_host_index) override;
Status CheckCacheHit(const int *batch_ids, const size_t batch_ids_len, int *cache_index,
std::vector<int> *need_swap_indies, std::vector<int> *need_swap_indies_cache_index) override;
private:
CacheNoe *GetNode(int key);
void GetHitNodesAndSwapIndex(const int *batch_ids, const size_t batch_ids_len, int *cache_index,
std::unordered_map<int, CacheNoe *> *hit_index_nodes,
std::unordered_map<int, std::vector<int>> *need_swap_map);
std::list<CacheNoe *> GetSwapNodes(const std::unordered_map<int, std::vector<int>> &need_swap_map);
std::unordered_map<int, std::list<CacheNoe *>::iterator> key_table_;
std::map<int, std::list<CacheNoe *>> frequency_table_;
size_t cache_size_{0};
int min_host_index_{0};
int max_host_index_{1};
};
} // namespace cache
} // namespace mindspore
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_LFU_CACHE_H_

View File

@ -0,0 +1,148 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <cmath>
#include <cstring>
#include <string>
#include <vector>
#include "src/runtime/delegate/parameter_cache/load_host_cache_model.h"
#include "src/common/log_adapter.h"
#include "src/common/common.h"
#include "include/errorcode.h"
#include "src/common/file_utils.h"
namespace {
constexpr size_t kGatherInputsSize = 3;
}
namespace mindspore {
namespace cache {
HostCacheModel::~HostCacheModel() {
if (cache_model_ != nullptr) {
delete cache_model_;
cache_model_ = nullptr;
}
}
MSTensor *SchemaTensorToMSTensor(lite::SchemaTensorWrapper *schema_tensor_wrapper,
mindspore::schema::Tensor *schema_tensor) {
std::vector<int64_t> shape;
for (size_t j = 0; j < schema_tensor->dims()->size(); j++) {
shape.push_back(schema_tensor->dims()->data()[j]);
}
std::string tensor_name;
if (schema_tensor->name() != nullptr) {
tensor_name = schema_tensor->name()->str();
}
return MSTensor::CreateRefTensor(tensor_name, (DataType)schema_tensor->dataType(), shape,
schema_tensor_wrapper->data(), schema_tensor_wrapper->length());
}
Status HostCacheModel::LoadCache(const std::string &model_path) {
cache_model_ = lite::LiteImportFromPath(model_path.c_str());
if (cache_model_ == nullptr) {
MS_LOG(ERROR) << "Import model failed";
return kLiteGraphFileError;
}
auto allTensors = cache_model_->graph_.all_tensors_;
for (auto node : cache_model_->graph_.all_nodes_) {
// only support embedding cache
if (node == nullptr || node->node_type_ != schema::PrimitiveType_Gather) {
continue;
}
auto input_index = node->input_indices_[0];
if (input_index > allTensors.size() - 1) {
MS_LOG(ERROR) << "invalid kernel input, input_index " << input_index << ",allTensors.size() "
<< allTensors.size();
return kLiteOutOfTensorRange;
}
auto schema_tensor_wrapper = cache_model_->GetSchemaTensor(input_index);
if (schema_tensor_wrapper == nullptr) {
MS_LOG(ERROR) << "invalid kernel input, input_index " << input_index;
return kLiteOutOfTensorRange;
}
auto schema_tensor = allTensors[input_index];
if (schema_tensor != nullptr && schema_tensor_wrapper->data() != nullptr) {
auto tensor = SchemaTensorToMSTensor(schema_tensor_wrapper, schema_tensor);
if (tensor == nullptr) {
return kLiteMemoryFailed;
}
cache_tensor_[tensor->Name()] = *tensor;
MS_LOG(INFO) << tensor->Name() << " is cache tensor, and the node is [" << node->name_ << "]";
delete tensor;
}
}
return kSuccess;
}
size_t GetVocabSize(kernel::Kernel *kernel) {
size_t vocab_size = 0;
auto cache_config = kernel->GetConfig(lite::kMSCache);
auto vocab_size_iter = cache_config.find(lite::kMSCacheVocabSize);
if (vocab_size_iter == cache_config.end()) {
return vocab_size;
}
auto vocab_size_opt = lite::GenericParseValue<size_t>(vocab_size_iter->second);
if (!vocab_size_opt.IsNone()) {
vocab_size = vocab_size_opt.Get();
}
return vocab_size;
}
Status HostCacheModel::LoadCache(DelegateModel<schema::Primitive> *model) {
KernelIter from, end;
for (KernelIter iter = model->BeginKernelIterator(); iter != model->EndKernelIterator(); iter++) {
kernel::Kernel *kernel = *iter;
// only support embedding cache
if (kernel->type() != schema::PrimitiveType_Gather) {
continue;
}
MS_ASSERT(kernel->inputs().size() == kGatherInputsSize);
auto tensor = kernel->inputs()[0];
if (tensor.Data() == nullptr) {
continue;
}
size_t vocab_size = GetVocabSize(kernel);
if (vocab_size == 0) {
continue;
}
cache_tensor_[tensor.Name()] = tensor;
}
return mindspore::kSuccess;
}
bool HostCacheModel::CheckIsCacheKernel(kernel::Kernel *kernel) {
if (GetHostCacheTensor(kernel) == nullptr) {
return false;
}
return true;
}
MSTensor HostCacheModel::GetHostCacheTensor(kernel::Kernel *kernel) {
if (kernel != nullptr && kernel->inputs().size() > 0) {
auto iter = cache_tensor_.find(kernel->inputs()[0].Name());
if (iter != cache_tensor_.end()) {
return iter->second;
}
}
return MSTensor(nullptr);
}
} // namespace cache
} // namespace mindspore

View File

@ -0,0 +1,48 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_LOAD_HOST_CACHE_MODEL_H_
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_PARAMETER_CACHE_LOAD_HOST_CACHE_MODEL_H_
#include <map>
#include <string>
#include "include/api/status.h"
#include "include/api/data_type.h"
#include "include/api/types.h"
#include "include/api/kernel.h"
#include "include/api/delegate.h"
#include "src/runtime/lite_model.h"
namespace mindspore {
namespace cache {
class HostCacheModel {
public:
HostCacheModel() = default;
~HostCacheModel();
Status LoadCache(const std::string &model_path);
Status LoadCache(DelegateModel<schema::Primitive> *model);
bool CheckIsCacheKernel(kernel::Kernel *kernel);
MSTensor GetHostCacheTensor(kernel::Kernel *kernel);
private:
std::map<std::string, MSTensor> cache_tensor_;
mindspore::lite::LiteModel *cache_model_{nullptr};
char *model_buf_{nullptr};
size_t model_size_;
};
} // namespace cache
} // namespace mindspore
#endif // MINDSPORE_LITE_EMBEDDING_CACHE_H_

View File

@ -0,0 +1,95 @@
include_directories(${TENSORRT_PATH}/include)
include_directories(${CUDA_PATH}/include)
include_directories(${CUDA_PATH})
include_directories($(CCSRC_DIR)/plugin/device/cpu/kernel)
include_directories(${CCSRC_DIR}/plugin/device/gpu/kernel/cuda_impl/cuda_ops)
if(DEFINED ENV{MS_ENABLE_CUDA_DISTRIBUTION})
set(MS_ENABLE_CUDA_DISTRIBUTION $ENV{MS_ENABLE_CUDA_DISTRIBUTION})
else()
set(MS_ENABLE_CUDA_DISTRIBUTION "off")
endif()
set(NCCL_MPI_SRC_STUB
${CMAKE_CURRENT_SOURCE_DIR}/distribution/distribution_collective.cc
${CMAKE_CURRENT_SOURCE_DIR}/distribution/distribution_base.cc
)
# nccl mpi
if(MS_ENABLE_CUDA_DISTRIBUTION STREQUAL "on")
message("enable cuda gpu distribution collective")
file(GLOB NCCL_MPI_SRC LIST_DIRECTORIES false
${CMAKE_CURRENT_SOURCE_DIR}/distribution/*.cc
${CCSRC_DIR}/plugin/device/gpu/hal/device/distribution/collective_wrapper.cc
${CCSRC_DIR}/plugin/device/gpu/hal/device/distribution/mpi_wrapper.cc
${CCSRC_DIR}/plugin/device/gpu/hal/device/distribution/nccl_wrapper.cc
)
list(REMOVE_ITEM NCCL_MPI_SRC ${NCCL_MPI_SRC_STUB})
add_compile_definitions(LITE_CUDA_DISTRIBUTION)
include(${TOP_DIR}/cmake/external_libs/ompi.cmake)
include(${TOP_DIR}/cmake/external_libs/nccl.cmake)
add_library(gpu_distribution_collective OBJECT ${NCCL_MPI_SRC})
add_library(mindspore::nccl ALIAS nccl::nccl)
add_library(mindspore::ompi ALIAS ompi::mpi)
target_link_libraries(gpu_distribution_collective PRIVATE mindspore::ompi mindspore::nccl)
else()
add_library(gpu_distribution_collective OBJECT ${NCCL_MPI_SRC_STUB})
endif()
add_dependencies(gpu_distribution_collective fbs_src)
file(GLOB TENSORRT_RUNTIME_SRC LIST_DIRECTORIES false
${CMAKE_CURRENT_SOURCE_DIR}/*.cc
${CMAKE_CURRENT_SOURCE_DIR}/op/*.cc
${CMAKE_CURRENT_SOURCE_DIR}/cuda_impl/*.cc
${CMAKE_CURRENT_SOURCE_DIR}/../../../runtime/delegate/delegate_utils.cc
${CCSRC_DIR}/plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.cc
)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../parameter_cache)
set(TENSORRT_RUNTIME_SRC
${TENSORRT_RUNTIME_SRC}
${CMAKE_CURRENT_SOURCE_DIR}/../parameter_cache/embedding_cache_manager.cc
${CMAKE_CURRENT_SOURCE_DIR}/../parameter_cache/load_host_cache_model.cc
${CMAKE_CURRENT_SOURCE_DIR}/../parameter_cache/lfu_cache.cc
${CMAKE_CURRENT_SOURCE_DIR}/../parameter_cache/embedding_cache.cc
${CMAKE_CURRENT_SOURCE_DIR}/../parameter_cache/gpu/gpu_cache_mem.cc
)
link_libraries(${CUDA_LIB_PATH}/libcudnn.so)
link_libraries(${CUDA_LIB_PATH}/libnvrtc.so)
link_libraries(${CUDA_LIB_PATH}/libcublasLt.so)
add_library(libcudart SHARED IMPORTED)
set_target_properties(libcudart PROPERTIES IMPORTED_LOCATION ${CUDA_LIB_PATH}/libcudart.so)
add_library(libnvinfer SHARED IMPORTED)
set_target_properties(libnvinfer PROPERTIES IMPORTED_LOCATION ${TENSORRT_LIB_PATH}/libnvinfer.so)
add_library(libcublas SHARED IMPORTED)
set_target_properties(libcublas PROPERTIES IMPORTED_LOCATION ${CUDA_LIB_PATH}/libcublas.so)
add_library(tensorrt_kernel_mid OBJECT ${TENSORRT_RUNTIME_SRC})
add_dependencies(tensorrt_kernel_mid fbs_src)
target_link_libraries(
tensorrt_kernel_mid
libcudart
libcublas
libnvinfer
)
# cuda
find_package(CUDA)
file(GLOB_RECURSE CUDA_KERNEL_SRC
${CMAKE_CURRENT_SOURCE_DIR}/cuda_impl/*.cu
${CCSRC_DIR}/plugin/device/gpu/kernel/cuda_impl/cuda_ops/gather.cu
${CCSRC_DIR}/plugin/device/gpu/kernel/cuda_impl/cuda_ops/swish_impl.cu
)
set_source_files_properties(${CUDA_KERNEL_SRC} PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGES} -std=c++14 -fPIC")
SET(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-std=c++14;)
cuda_add_library(cuda_kernel_mid STATIC ${CUDA_KERNEL_SRC})

View File

@ -0,0 +1,56 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/delegate/tensorrt/cuda_impl/activation.cuh"
#include <stdio.h>
#include <math.h>
#include "src/runtime/delegate/tensorrt/cuda_impl/cuda_helper.h"
template <typename T>
__global__ void SigmoidKernel(const T *input1, T *output, int element_cnt) {
for (int pos = blockIdx.x * blockDim.x + threadIdx.x; pos < element_cnt; pos += blockDim.x * gridDim.x) {
output[pos] = static_cast<T>(1) / (static_cast<T>(1) + exp(-input1[pos]));
}
}
template <typename T>
__global__ void GeluKernel(const T *input_addr, T *output_addr, int size) {
// formula:
// gelu(x) = 0.5 * x * (1.0 + tanh(y))
// tanh(y) = 2 / (1 + exp(-2y)) - 1)
// y = sqrt(2/pi) * (x + 0.044715 * x^3)
for (int pos = blockIdx.x * blockDim.x + threadIdx.x; pos < size; pos += blockDim.x * gridDim.x) {
float x = input_addr[pos];
float tanh_res = tanh(0.7978845608f * (x + 0.044715f * x * x * x));
output_addr[pos] = 0.5f * x * (1.0f + tanh_res);
}
}
template <typename T>
void Sigmoid(const T *input1, T *output, int element_cnt, cudaStream_t stream) {
SigmoidKernel<<<GET_BLOCKS(element_cnt), GET_THREADS, 0, stream>>>(input1, output, element_cnt);
return;
}
template <typename T>
void Gelu(const T *input1, T *output, int element_cnt, cudaStream_t stream) {
GeluKernel<<<GET_BLOCKS(element_cnt), GET_THREADS, 0, stream>>>(input1, output, element_cnt);
return;
}
template void Sigmoid(const float *input1, float *output, int element_cnt, cudaStream_t stream);
template void Gelu(const float *input1, float *output, int element_cnt, cudaStream_t stream);

View File

@ -0,0 +1,26 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_CDUA_IMPL_ACTIVATION_H_
#define MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_CDUA_IMPL_ACTIVATION_H_
template <typename T>
void Sigmoid(const T *input1, T *output, int element_cnt, cudaStream_t stream);
template <typename T>
void Gelu(const T *input1, T *output, int element_cnt, cudaStream_t stream);
#endif // MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_CDUA_IMPL_ACTIVATION_H_

View File

@ -0,0 +1,49 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/delegate/tensorrt/cuda_impl/cast.cuh"
#include "src/runtime/delegate/tensorrt/cuda_impl/cuda_helper.h"
// Generic cast
template <typename S, typename T>
__device__ __forceinline__ void CastBase(const S *input_addr, T *output_addr) {
*output_addr = static_cast<T>((*input_addr));
}
template <typename S, typename T>
__global__ void CastKernel(const int input_size, const S *input_addr, T *output_addr) {
for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < input_size; pos += blockDim.x * gridDim.x) {
CastBase(input_addr + pos, output_addr + pos);
}
}
template <typename S, typename T>
void Cast(const int input_size, const S *input_addr, T *output_addr, cudaStream_t stream) {
CastKernel<<<GET_BLOCKS(input_size), GET_THREADS, 0, stream>>>(input_size, input_addr, output_addr);
}
template void Cast(const int input_size, const int8_t *input_addr, int8_t *output_addr, cudaStream_t stream);
template void Cast(const int input_size, const int8_t *input_addr, int32_t *output_addr, cudaStream_t stream);
template void Cast(const int input_size, const int8_t *input_addr, float *output_addr, cudaStream_t stream);
template void Cast(const int input_size, const int32_t *input_addr, int8_t *output_addr, cudaStream_t stream);
template void Cast(const int input_size, const int32_t *input_addr, int32_t *output_addr, cudaStream_t stream);
template void Cast(const int input_size, const int32_t *input_addr, float *output_addr, cudaStream_t stream);
template void Cast(const int input_size, const int32_t *input_addr, bool *output_addr, cudaStream_t stream);
template void Cast(const int input_size, const float *input_addr, int8_t *output_addr, cudaStream_t stream);
template void Cast(const int input_size, const float *input_addr, int32_t *output_addr, cudaStream_t stream);
template void Cast(const int input_size, const float *input_addr, float *output_addr, cudaStream_t stream);

View File

@ -0,0 +1,23 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_CDUA_IMPL_CAST_H_
#define MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_CDUA_IMPL_CAST_H_
template <typename S, typename T>
void Cast(const int input_size, const S *input_addr, T *output_addr, cudaStream_t stream);
#endif // MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_CDUA_IMPL_CAST_H_

View File

@ -0,0 +1,70 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/delegate/tensorrt/cuda_impl/cublas_utils.h"
namespace mindspore::lite {
void Cublas2DTranspose(const float *in_addr, float *out_addr, const int *params, cublasHandle_t cublas_handle) {
const int m = params[0];
const int n = params[1];
const float alpha = 1.0f;
const float beta = 0.0f;
CUBLAS_CHECK_VOID(
cublasSgeam(cublas_handle, CUBLAS_OP_T, CUBLAS_OP_N, m, n, &alpha, in_addr, n, &beta, out_addr, m, out_addr, m));
}
void CublasMM1Batch(const void *a_addr, const void *b_addr, void *c_addr, const int *params,
const cublasOperation_t *operations, const cudaDataType *data_types, cublasHandle_t cublas_handle) {
const int m = params[0];
const int n = params[1];
const int k = params[2];
cublasOperation_t trans_a = operations[0];
cublasOperation_t trans_b = operations[1];
const int lda = (trans_a == CUBLAS_OP_N) ? k : m;
const int ldb = (trans_b == CUBLAS_OP_N) ? n : k;
const int ldc = n;
cudaDataType type_a = data_types[0];
cudaDataType type_b = data_types[1];
cudaDataType type_c = data_types[2];
cudaDataType compute_type = data_types[3];
const float alpha = 1.0f;
const float beta = 0.0f;
CUBLAS_CHECK_VOID(cublasGemmEx(cublas_handle, trans_b, trans_a, n, m, k, &alpha, b_addr, type_b, ldb, a_addr, type_a,
lda, &beta, c_addr, type_c, ldc, compute_type, CUBLAS_GEMM_DEFAULT_TENSOR_OP));
}
void CublasMMBatched(void **a_addrs, void **b_addrs, void **c_addrs, const int *params,
const cublasOperation_t *operations, const cudaDataType *data_types,
cublasHandle_t cublas_handle) {
cublasOperation_t trans_a = operations[0];
cublasOperation_t trans_b = operations[1];
const int m = params[0];
const int n = params[1];
const int k = params[2];
const int batch = params[3];
const int lda = (trans_a == CUBLAS_OP_N) ? k : m;
const int ldb = (trans_b == CUBLAS_OP_N) ? n : k;
const int ldc = n;
cudaDataType type_a = data_types[0];
cudaDataType type_b = data_types[1];
cudaDataType type_c = data_types[2];
cudaDataType compute_type = data_types[3];
const float alpha = 1.0f;
const float beta = 0.0f;
CUBLAS_CHECK_VOID(cublasGemmBatchedEx(cublas_handle, trans_b, trans_a, n, m, k, &alpha, b_addrs, type_b, ldb, a_addrs,
type_a, lda, &beta, c_addrs, type_c, ldc, batch, compute_type,
CUBLAS_GEMM_DEFAULT_TENSOR_OP));
}
} // namespace mindspore::lite

View File

@ -0,0 +1,62 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_CUDA_IMPL_CUBLAS_UTILS_H_
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_CUDA_IMPL_CUBLAS_UTILS_H_
#include <cublas_v2.h>
#include "src/runtime/delegate/tensorrt/cuda_impl/cuda_helper.h"
#include "src/common/log_util.h"
// cublas API error checking
#define CUBLAS_CHECK_VOID(err) \
do { \
cublasStatus_t cublas_err = (err); \
if (cublas_err != CUBLAS_STATUS_SUCCESS) { \
MS_LOG(ERROR) << "cublas error " << cublas_err; \
return; \
} \
} while (0)
#define CUBLAS_CHECK(err) \
do { \
cublasStatus_t cublas_err = (err); \
if (cublas_err != CUBLAS_STATUS_SUCCESS) { \
MS_LOG(ERROR) << "cublas error " << cublas_err; \
return -1; \
} \
} while (0)
namespace mindspore::lite {
// a: m * n
// params order: m, n
void Cublas2DTranspose(const float *in_addr, float *out_addr, const int *params, cublasHandle_t cublas_handle);
// a: m * k, b: k * n, c: m * n
// params order: m, n, k
// operations order: trans_a, trans_b
// data_types: type_a, type_b, type_c, compute type
void CublasMM1Batch(const void *a_addr, const void *b_addr, void *c_addr, const int *params,
const cublasOperation_t *operations, const cudaDataType *data_types, cublasHandle_t cublas_handle);
// a: batch * m * k, b: batch * k * n, c: batch * m * n
// params order: m, n, k, batch
// operations order: trans_a, trans_b
// data_types: type_a, type_b, type_c, compute type
void CublasMMBatched(void **a_addrs, void **b_addrs, void **c_addrs, const int *params,
const cublasOperation_t *operations, const cudaDataType *data_types, cublasHandle_t cublas_handle);
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_CUDA_IMPL_CUBLAS_UTILS_H_

View File

@ -0,0 +1,48 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/delegate/tensorrt/cuda_impl/cuda_helper.h"
#include <cmath>
#include "src/common/log_util.h"
CudaHelper &CudaHelper::GetInstance() {
static CudaHelper instance;
return instance;
}
int CudaHelper::GetThreadNum() const { return threads_per_block_; }
int CudaHelper::GetThreadNum(const int block_size) const {
return std::min(threads_per_block_, ((block_size - 1) / 32 + 1) * 32);
}
int CudaHelper::GetBlocksNum(const int total_threads) const {
return std::min(((total_threads - 1) / threads_per_block_) + 1, max_blocks_);
}
int CudaHelper::GetBlocksNum(const int total_threads, const int block_size) const {
int valid_block_size = std::min(block_size, threads_per_block_);
if (valid_block_size == 0) {
MS_LOG(ERROR) << "invalid input of block_size: " << block_size;
return 0;
}
return std::min(((total_threads - 1) / valid_block_size) + 1, max_blocks_);
}
CudaHelper::CudaHelper() {
int device_id = 0;
(void)cudaGetDevice(&device_id);
cudaDeviceProp prop;
(void)cudaGetDeviceProperties(&prop, device_id);
threads_per_block_ = prop.maxThreadsPerBlock;
max_blocks_ = prop.multiProcessorCount;
}

View File

@ -0,0 +1,63 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_CUDA_IMPL_CUDA_HELPER_H_
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_CUDA_IMPL_CUDA_HELPER_H_
#include <cuda_runtime.h>
#include <algorithm>
class CudaHelper {
public:
int GetThreadNum() const;
int GetThreadNum(const int block_size) const;
int GetBlocksNum(const int total_threads) const;
int GetBlocksNum(const int total_threads, const int block_size) const;
static CudaHelper &GetInstance();
private:
CudaHelper();
~CudaHelper() = default;
CudaHelper(const CudaHelper &) = delete;
CudaHelper &operator=(const CudaHelper &) = delete;
int max_blocks_;
int threads_per_block_;
};
#define GET_BLOCKS(total_threads) CudaHelper::GetInstance().GetBlocksNum(total_threads)
#define GET_BLOCKS_CAL(total_threads, block_size) CudaHelper::GetInstance().GetBlocksNum(total_threads, block_size)
#define GET_THREADS CudaHelper::GetInstance().GetThreadNum()
#define GET_THREADS_CAL(block_size) CudaHelper::GetInstance().GetThreadNum(block_size)
#define CUDA_CHECK(ret) \
do { \
cudaError_t cuda_ret = (ret); \
if ((cuda_ret) != cudaSuccess) { \
return -1; \
} \
} while (0)
#define CUDA_CHECK_VOID(ret) \
do { \
cudaError_t cuda_ret = (ret); \
if ((cuda_ret) != cudaSuccess) { \
return; \
} \
} while (0)
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_CUDA_IMPL_CUDA_HELPER_H_

View File

@ -0,0 +1,41 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/delegate/tensorrt/cuda_impl/cudnn_utils.h"
#include <unordered_map>
namespace mindspore::lite {
cudnnDataType_t ConvertCudnnDataType(nvinfer1::DataType trt_datatype) {
std::unordered_map<nvinfer1::DataType, cudnnDataType_t> data_types = {{nvinfer1::DataType::kFLOAT, CUDNN_DATA_FLOAT},
{nvinfer1::DataType::kHALF, CUDNN_DATA_HALF},
{nvinfer1::DataType::kINT32, CUDNN_DATA_INT32},
{nvinfer1::DataType::kINT8, CUDNN_DATA_INT8}};
if (data_types.find(trt_datatype) != data_types.end()) {
return data_types[trt_datatype];
} else {
MS_LOG(ERROR) << "invalid datatype for cudnn: " << static_cast<int>(trt_datatype);
}
return CUDNN_DATA_FLOAT;
}
int CudnnActivation(cudnnHandle_t handle, cudnnActivationDescriptor_t activation_desc,
const cudnnTensorDescriptor_t x_dsc, const void *x, const cudnnTensorDescriptor_t y_dsc, void *y) {
float alpha = 1.0f;
float beta = 0.0f;
CUDNN_CHECK(cudnnActivationForward(handle, activation_desc, &alpha, x_dsc, x, &beta, y_dsc, y));
return 0;
}
} // namespace mindspore::lite

View File

@ -0,0 +1,48 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_CUDA_IMPL_CUDNN_UTILS_H_
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_CUDA_IMPL_CUDNN_UTILS_H_
#include <cudnn.h>
#include <NvInfer.h>
#include "src/runtime/delegate/tensorrt/cuda_impl/cuda_helper.h"
#include "src/common/log_util.h"
#define CUDNN_CHECK_VOID(err) \
do { \
cudnnStatus_t cudnn_err = (err); \
if (cudnn_err != CUDNN_STATUS_SUCCESS) { \
MS_LOG(ERROR) << "cudnn error " << cudnnGetErrorString(cudnn_err); \
return; \
} \
} while (0)
#define CUDNN_CHECK(err) \
do { \
cudnnStatus_t cudnn_err = (err); \
if (cudnn_err != CUDNN_STATUS_SUCCESS) { \
MS_LOG(ERROR) << "cudnn error " << cudnnGetErrorString(cudnn_err); \
return -1; \
} \
} while (0)
namespace mindspore::lite {
cudnnDataType_t ConvertCudnnDataType(nvinfer1::DataType trt_datatype);
int CudnnActivation(cudnnHandle_t handle, cudnnActivationDescriptor_t activation_desc,
const cudnnTensorDescriptor_t x_esc, const void *x, const cudnnTensorDescriptor_t y_dsc, void *y);
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_CUDA_IMPL_CUDNN_UTILS_H_

View File

@ -0,0 +1,35 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/delegate/tensorrt/cuda_impl/equal.cuh"
#include <stdio.h>
#include "src/runtime/delegate/tensorrt/cuda_impl/cuda_helper.h"
template <typename T>
__global__ void EqualKernel(const T *input1, const T *input2, T *output, int element_cnt) {
for (int pos = blockIdx.x * blockDim.x + threadIdx.x; pos < element_cnt; pos += blockDim.x * gridDim.x) {
output[pos] = (input1[pos] - input2[pos] < 1e-6 && input1[pos] - input2[pos] > -1e-6);
}
}
template <typename T>
void Equal(const T *input1, const T *input2, T *output, int element_cnt, cudaStream_t stream) {
EqualKernel<<<GET_BLOCKS(element_cnt), GET_THREADS, 0, stream>>>(input1, input2, output, element_cnt);
return;
}
template void Equal(const float *input1, const float *input2, float *output, int element_cnt, cudaStream_t stream);
template void Equal(const int *input1, const int *input2, int *output, int element_cnt, cudaStream_t stream);

View File

@ -0,0 +1,23 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_CDUA_IMPL_EQUAL_H_
#define MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_CDUA_IMPL_EQUAL_H_
template <typename T>
void Equal(const T *input1, const T *input2, T *output, int element_cnt, cudaStream_t stream);
#endif // MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_CDUA_IMPL_EQUAL_H_

View File

@ -0,0 +1,64 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/delegate/tensorrt/cuda_impl/hash.cuh"
#include "src/runtime/delegate/tensorrt/cuda_impl/cuda_helper.h"
template <typename T>
__global__ void HashSwapOut(const T *hash_table, T *swap_out_value, const int *swap_out_index, const int index_size,
const int hash_dim) {
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < index_size; i += blockDim.x * gridDim.x) {
int hash_index = swap_out_index[i];
for (int j = 0; j < hash_dim; j++) {
swap_out_value[i * hash_dim + j] = hash_table[hash_index * hash_dim + j];
}
}
return;
}
template <typename T>
__global__ void HashSwapIn(T *hash_table, const T *swap_in_value, const int *swap_in_index, const int index_size,
const int hash_dim) {
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < index_size; i += blockDim.x * gridDim.x) {
int hash_index = swap_in_index[i];
for (int j = 0; j < hash_dim; j++) {
hash_table[hash_index * hash_dim + j] = swap_in_value[i * hash_dim + j];
}
}
return;
}
template <typename T>
void DoHashSwapOut(const T *hash_table, T *swap_out_value, const int *swap_out_index, const int index_size,
const int hash_dim, cudaStream_t cuda_stream) {
HashSwapOut<<<GET_BLOCKS(index_size), GET_THREADS, 0, cuda_stream>>>(hash_table, swap_out_value, swap_out_index,
index_size, hash_dim);
return;
}
template <typename T>
void DoHashSwapIn(T *hash_table, const T *swap_in_value, const int *swap_in_index, const int index_size,
const int hash_dim, cudaStream_t cuda_stream) {
HashSwapIn<<<GET_BLOCKS(index_size), GET_THREADS, 0, cuda_stream>>>(hash_table, swap_in_value, swap_in_index,
index_size, hash_dim);
return;
}
template void DoHashSwapOut<float>(const float *hash_table, float *swap_out_value, const int *swap_out_index,
const int index_size, const int hash_dim, cudaStream_t cuda_stream);
template void DoHashSwapIn<float>(float *hash_table, const float *swap_in_value, const int *swap_in_index,
const int index_size, const int hash_dim, cudaStream_t cuda_stream);

View File

@ -0,0 +1,27 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_CDUA_IMPL_HASH_H_
#define MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_CDUA_IMPL_HASH_H_
template <typename T>
void DoHashSwapOut(const T *hash_table, T *swap_out_value, const int *swap_out_index, const int index_size,
const int hash_dim, cudaStream_t cuda_stream);
template <typename T>
void DoHashSwapIn(T *hash_table, const T *swap_in_value, const int *swap_in_index, const int index_size,
const int hash_dim, cudaStream_t cuda_stream);
#endif // MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_CDUA_IMPL_HASH_H_

View File

@ -0,0 +1,63 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/delegate/tensorrt/cuda_impl/logical.cuh"
#include "src/runtime/delegate/tensorrt/cuda_impl/cuda_helper.h"
template <typename T>
__global__ void LogicalNotKernel(const T *input1, T *output, int element_cnt) {
for (int pos = blockIdx.x * blockDim.x + threadIdx.x; pos < element_cnt; pos += blockDim.x * gridDim.x) {
output[pos] = static_cast<T>(input1[pos] == 0);
}
}
template <typename T>
__global__ void LogicalAndKernel(const T *input_addr1, const T *input_addr2, T *output, int size) {
for (int pos = blockIdx.x * blockDim.x + threadIdx.x; pos < size; pos += blockDim.x * gridDim.x) {
output[pos] = input_addr1[pos] * input_addr2[pos];
}
}
template <typename T>
__global__ void LogicalOrKernel(const T *input_addr1, const T *input_addr2, T *output, int size) {
for (int pos = blockIdx.x * blockDim.x + threadIdx.x; pos < size; pos += blockDim.x * gridDim.x) {
T sum = input_addr1[pos] + input_addr2[pos];
output[pos] = static_cast<T>(sum > 0);
}
}
template <typename T>
void LogicalNot(const T *input1, T *output, int element_cnt, cudaStream_t stream) {
LogicalNotKernel<<<GET_BLOCKS(element_cnt), GET_THREADS, 0, stream>>>(input1, output, element_cnt);
}
template <typename T>
void LogicalAnd(const T *input1, const T *input2, T *output, int element_cnt, cudaStream_t stream) {
LogicalAndKernel<<<GET_BLOCKS(element_cnt), GET_THREADS, 0, stream>>>(input1, input2, output, element_cnt);
}
template <typename T>
void LogicalOr(const T *input1, const T *input2, T *output, int element_cnt, cudaStream_t stream) {
LogicalOrKernel<<<GET_BLOCKS(element_cnt), GET_THREADS, 0, stream>>>(input1, input2, output, element_cnt);
}
template void LogicalNot(const int32_t *input1, int32_t *output, int element_cnt, cudaStream_t stream);
template void LogicalAnd(const int32_t *input1, const int32_t *input2, int32_t *output, int element_cnt,
cudaStream_t stream);
template void LogicalOr(const int32_t *input1, const int32_t *input2, int32_t *output, int element_cnt,
cudaStream_t stream);

View File

@ -0,0 +1,29 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_CDUA_IMPL_LOGICAL_H_
#define MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_CDUA_IMPL_LOGICAL_H_
template <typename T>
void LogicalAnd(const T *input1, const T *input2, T *output, int element_cnt, cudaStream_t stream);
template <typename T>
void LogicalOr(const T *input1, const T *input2, T *output, int element_cnt, cudaStream_t stream);
template <typename T>
void LogicalNot(const T *input1, T *output, int element_cnt, cudaStream_t stream);
#endif // MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_CDUA_IMPL_LOGICAL_H_

View File

@ -0,0 +1,98 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/delegate/tensorrt/cuda_impl/normalize.cuh"
#include <stdio.h>
#include <math.h>
#include "src/runtime/delegate/tensorrt/cuda_impl/cuda_helper.h"
#include "src/runtime/delegate/tensorrt/cuda_impl/utils.cuh"
template <typename T>
__global__ void NormalizeKernel(const T *input, const T *gamma, const T *beta, T *output, size_t n, float epsilion,
int dim_before_axis) {
const int tid = threadIdx.x;
const int bid = blockIdx.x;
const int block_loop = (dim_before_axis - 1) / gridDim.x + 1;
const int element_cnt = dim_before_axis * n;
__shared__ float s_mean[2048];
__shared__ float s_variance[2048];
float sum = 0.0f;
float variance = 0.0f;
for (int block = 0; block < block_loop; block++) {
float local_sum = 0.0f;
int mean_index = bid + block * gridDim.x;
int num_index = bid * n + block * gridDim.x * blockDim.x;
for (int i = tid; i < n; i += blockDim.x) {
if (num_index + i >= element_cnt) {
break;
}
local_sum += static_cast<float>(input[num_index + i]);
}
sum = blockReduceSum(local_sum);
if (tid == 0) {
s_mean[mean_index] = sum / n;
}
}
__syncthreads();
for (int block = 0; block < block_loop; block++) {
float local_var_sum = 0.0f;
int var_index = bid + block * gridDim.x;
int num_index = bid * n + block * gridDim.x * blockDim.x;
for (int i = tid; i < n; i += blockDim.x) {
if (num_index + i >= element_cnt) {
break;
}
float diff = static_cast<float>(input[num_index + i]) - s_mean[var_index];
local_var_sum += diff * diff;
}
variance = blockReduceSum(local_var_sum);
if (tid == 0) {
s_variance[var_index] = rsqrtf(variance / n + epsilion);
}
}
__syncthreads();
for (int block = 0; block < block_loop; block++) {
int var_index = bid + block * gridDim.x;
int num_index = bid * n + block * gridDim.x * blockDim.x;
for (int i = tid; i < n; i += blockDim.x) {
if (num_index + i >= element_cnt) {
break;
}
float beta_val = (beta == nullptr) ? 0.0f : static_cast<float>(beta[i]);
output[num_index + i] =
static_cast<T>(((static_cast<float>(input[num_index + i]) - s_mean[var_index]) * s_variance[var_index]) *
static_cast<float>(gamma[i]) +
beta_val);
}
}
}
template <typename T>
void Normalize(const T *input, const T *gamma, const T *beta, T *output, size_t dim_at_axis, float epsilion,
int element_cnt, cudaStream_t stream) {
int thread_num = GET_THREADS_CAL(dim_at_axis);
int block_num = GET_BLOCKS_CAL(element_cnt, thread_num);
int dim_before_axis = element_cnt / dim_at_axis;
NormalizeKernel<<<block_num, thread_num, 0, stream>>>(input, gamma, beta, output, dim_at_axis, epsilion,
dim_before_axis);
return;
}
template void Normalize(const float *input, const float *gamma, const float *beta, float *output, size_t dim_at_axis,
float epsilion, int element_cnt, cudaStream_t stream);

View File

@ -0,0 +1,24 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_CDUA_IMPL_NORMALIZE_H_
#define MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_CDUA_IMPL_NORMALIZE_H_
template <typename T>
void Normalize(const T *input, const T *gamma, const T *beta, T *output, size_t dim_at_axis, float epsilion,
int element_cnt, cudaStream_t stream);
#endif // MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_CDUA_IMPL_NORMALIZE_H_

View File

@ -0,0 +1,41 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <cuda_runtime.h>
#include <curand_kernel.h>
#define FINAL_MASK 0xffffffff
template <typename T>
__device__ T warpedReduceSum(T val) {
#pragma unroll
for (int mask = 16; mask > 0; mask >>= 1) {
val += __shfl_xor_sync(FINAL_MASK, val, mask, 32);
}
return val;
}
template <typename T>
__device__ T blockReduceSum(T val) {
static __shared__ T shared[32];
int warped = threadIdx.x & 0x1f;
val = warpedReduceSum<T>(val);
if (warped == 0) shared[threadIdx.x >> 5] = val;
__syncthreads();
val = (threadIdx.x < (blockDim.x / 32.f)) ? shared[warped] : static_cast<T>(0.0);
val = warpedReduceSum<T>(val);
return val;
}

View File

@ -0,0 +1,23 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/delegate/tensorrt/distribution/distribution_base.h"
namespace mindspore::lite {
int GetGPUGroupSize() { return 1; }
int GetRankID() { return 0; }
} // namespace mindspore::lite

View File

@ -0,0 +1,31 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_DISTRIBUTION_DISTRIBUTION_BASE_H_
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_DISTRIBUTION_DISTRIBUTION_BASE_H_
#include <string>
#include "src/common/log_adapter.h"
#include "include/errorcode.h"
namespace mindspore::lite {
constexpr char NCCL_WORLD_GROUP[] = "nccl_world_group";
int GetGPUGroupSize();
int GetRankID();
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_DISTRIBUTION_DISTRIBUTION_BASE_H_

View File

@ -0,0 +1,28 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/delegate/tensorrt/distribution/distribution_base.h"
#include <unistd.h>
#include <thread>
#include <string>
#include "plugin/device/gpu/hal/device/distribution/collective_wrapper.h"
#include "src/runtime/delegate/tensorrt/tensorrt_utils.h"
namespace mindspore::lite {
int GetGPUGroupSize() { return GetGroupSize(NCCL_WORLD_GROUP); }
int GetRankID() { return GetRankIDByGroup(NCCL_WORLD_GROUP); }
} // namespace mindspore::lite

View File

@ -0,0 +1,38 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/delegate/tensorrt/distribution/distribution_collective.h"
namespace mindspore::lite {
DistributionCollective::DistributionCollective() {}
DistributionCollective &DistributionCollective::instance() {
static DistributionCollective instance;
return instance;
}
int DistributionCollective::ReduceScatterWrapper(const void *input_addr, void *output_addr, size_t count,
nvinfer1::DataType data_type, schema::ReduceMode reduce_type,
cudaStream_t stream, const std::string &group) {
return RET_OK;
}
int DistributionCollective::AllGatherWrapper(const void *input_addr, void *output_addr, size_t count,
nvinfer1::DataType data_type, cudaStream_t stream,
const std::string &group_name) {
return RET_OK;
}
} // namespace mindspore::lite

View File

@ -0,0 +1,45 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_DISTRIBUTION_DISTRIBUTION_COLLECTIVE_H_
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_DISTRIBUTION_DISTRIBUTION_COLLECTIVE_H_
#include <string>
#include "NvInfer.h"
#include "schema/ops_types_generated.h"
#include "src/runtime/delegate/tensorrt/distribution/distribution_base.h"
namespace mindspore::lite {
class DistributionCollective {
public:
DistributionCollective(DistributionCollective const &) = delete;
DistributionCollective &operator=(const DistributionCollective &) = delete;
static DistributionCollective &instance();
int ReduceScatterWrapper(const void *input_addr, void *output_addr, size_t count, nvinfer1::DataType data_type,
schema::ReduceMode reduce_type, cudaStream_t stream, const std::string &group);
int AllGatherWrapper(const void *input_addr, void *output_addr, size_t count, nvinfer1::DataType data_type,
cudaStream_t stream, const std::string &group_name);
private:
DistributionCollective();
~DistributionCollective() = default;
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_DISTRIBUTION_DISTRIBUTION_COLLECTIVE_H_

View File

@ -0,0 +1,72 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/delegate/tensorrt/distribution/distribution_collective.h"
#include <unistd.h>
#include <thread>
#include <string>
#include "plugin/device/gpu/hal/device/distribution/collective_wrapper.h"
#include "src/runtime/delegate/tensorrt/distribution/distribution_utils.h"
#include "src/runtime/delegate/tensorrt/distribution/distribution_base.h"
namespace mindspore::lite {
DistributionCollective::DistributionCollective() {
InitMPI();
InitNCCLComm();
}
DistributionCollective &DistributionCollective::instance() {
static DistributionCollective instance;
return instance;
}
int DistributionCollective::ReduceScatterWrapper(const void *input_addr, void *output_addr, size_t count,
nvinfer1::DataType data_type, schema::ReduceMode reduce_type,
cudaStream_t stream, const std::string &group) {
int rank_id = GetRankID();
MS_LOG(DEBUG) << "ReduceScatter on rank: " << rank_id;
ncclResult_t ret = ReduceScatter(input_addr, output_addr, count, ConvertNCCLDataType(data_type),
ConvertNCCLReduceMode(reduce_type), stream, group);
if (ret != ncclSuccess) {
MS_LOG(ERROR) << "ReduceScatter failed: " << static_cast<int>(ret);
return RET_ERROR;
}
auto cuda_ret = cudaStreamSynchronize(stream);
if (cuda_ret != cudaSuccess) {
MS_LOG(ERROR) << "cudaStreamSynchronize failed: " << static_cast<int>(cuda_ret);
return RET_ERROR;
}
return RET_OK;
}
int DistributionCollective::AllGatherWrapper(const void *input_addr, void *output_addr, size_t count,
nvinfer1::DataType data_type, cudaStream_t stream,
const std::string &group_name) {
int rank_id = GetRankID();
MS_LOG(DEBUG) << "AllGather on rank: " << rank_id;
ncclResult_t ret = AllGather(input_addr, output_addr, count, ConvertNCCLDataType(data_type), stream, group_name);
if (ret != ncclSuccess) {
MS_LOG(ERROR) << "AllGather failed: " << static_cast<int>(ret);
return RET_ERROR;
}
auto cuda_ret = cudaStreamSynchronize(stream);
if (cuda_ret != cudaSuccess) {
MS_LOG(ERROR) << "cudaStreamSynchronize failed: " << static_cast<int>(cuda_ret);
return RET_ERROR;
}
return RET_OK;
}
} // namespace mindspore::lite

View File

@ -0,0 +1,58 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/delegate/tensorrt/distribution/distribution_utils.h"
#include <unordered_map>
#include "src/common/log_adapter.h"
namespace mindspore::lite {
ncclDataType_t ConvertNCCLDataType(nvinfer1::DataType type_id) {
std::unordered_map<nvinfer1::DataType, ncclDataType_t> data_type_map = {
{nvinfer1::DataType::kINT8, ncclInt8},
{nvinfer1::DataType::kINT32, ncclInt32},
{nvinfer1::DataType::kFLOAT, ncclFloat32},
{nvinfer1::DataType::kHALF, ncclHalf},
};
auto iter = data_type_map.find(type_id);
ncclDataType_t data_type;
if (iter != data_type_map.end()) {
data_type = iter->second;
} else {
data_type = ncclFloat32;
MS_LOG(WARNING) << "invalid data_type for NCCL, need check: " << static_cast<int>(type_id);
}
return data_type;
}
ncclRedOp_t ConvertNCCLReduceMode(schema::ReduceMode mode) {
std::unordered_map<schema::ReduceMode, ncclRedOp_t> reduce_ops_ = {
// higher version support mean {schema::ReduceMode::ReduceMode_ReduceMean, ncclAvg},
{schema::ReduceMode::ReduceMode_ReduceMax, ncclMax},
{schema::ReduceMode::ReduceMode_ReduceMin, ncclMin},
{schema::ReduceMode::ReduceMode_ReduceProd, ncclProd},
{schema::ReduceMode::ReduceMode_ReduceSum, ncclSum},
};
auto iter = reduce_ops_.find(mode);
ncclRedOp_t nccl_mode;
if (iter != reduce_ops_.end()) {
nccl_mode = iter->second;
} else {
nccl_mode = ncclSum;
MS_LOG(WARNING) << "invalid reduce for NCCL, need check: " << static_cast<int>(mode);
}
return nccl_mode;
}
} // namespace mindspore::lite

View File

@ -0,0 +1,32 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_DISTRIBUTION_DISTRIBUTION_UTILS_H_
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_DISTRIBUTION_DISTRIBUTION_UTILS_H_
#include <nccl.h>
#include "include/errorcode.h"
#include "NvInfer.h"
#include "schema/ops_types_generated.h"
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
namespace mindspore::lite {
ncclDataType_t ConvertNCCLDataType(nvinfer1::DataType type_id);
ncclRedOp_t ConvertNCCLReduceMode(schema::ReduceMode mode);
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_DISTRIBUTION_DISTRIBUTION_UTILS_H_

View File

@ -0,0 +1,116 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <cuda_runtime.h>
#include <numeric>
#include <memory>
#include <vector>
#include <functional>
#include <unordered_map>
#include <algorithm>
#include "src/runtime/delegate/tensorrt/tensorrt_utils.h"
#include "NvInferRuntimeCommon.h"
#include "src/runtime/delegate/tensorrt/op/activation_opt_plugin.h"
#include "src/runtime/delegate/tensorrt/cuda_impl/activation.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/swish_impl.cuh"
namespace mindspore::lite {
REGISTER_TENSORRT_PLUGIN(ActivationOptPluginCreater);
template class TensorRTPluginCreater<ActivationOptPlugin>;
template <class T>
nvinfer1::PluginFieldCollection TensorRTPluginCreater<T>::field_collection_{};
template <class T>
std::vector<nvinfer1::PluginField> TensorRTPluginCreater<T>::fields_;
int ActivationOptPlugin::enqueue(const nvinfer1::PluginTensorDesc *inputDesc,
const nvinfer1::PluginTensorDesc *outputDesc, const void *const *inputs,
void *const *outputs, void *workspace, cudaStream_t stream) noexcept {
return RunCudaActivation(inputDesc, inputs, outputs, stream);
}
bool ActivationOptPlugin::needResize(const int *current_dims, const int *last_dims) {
for (int i = 0; i < infer_dims_cnt_; i++) {
if (current_dims[i] != last_dims[i]) {
return true;
}
}
return false;
}
int ActivationOptPlugin::RunCuDNNActivation(const nvinfer1::PluginTensorDesc *inputDesc, const void *const *inputs,
void *const *outputs, cudaStream_t stream) {
if (needResize(infer_dims_, inputDesc[0].dims.d)) {
if (input_desc_ != nullptr) {
CUDNN_CHECK(cudnnDestroyTensorDescriptor(input_desc_));
input_desc_ = nullptr;
}
CUDNN_CHECK(cudnnCreateTensorDescriptor(&input_desc_));
for (int i = 0; i < inputDesc[0].dims.nbDims; i++) {
infer_dims_[i] = inputDesc[0].dims.d[i];
}
CUDNN_CHECK(cudnnSetTensorNdDescriptor(input_desc_, ConvertCudnnDataType(inputDesc[0].type), infer_dims_cnt_,
infer_dims_, infer_stride_));
}
CHECK_NULL_RETURN(cudnn_handle_);
CHECK_NULL_RETURN(activation_desc_);
CHECK_NULL_RETURN(input_desc_);
CUDNN_CHECK(cudnnSetStream(cudnn_handle_, stream));
auto ret = CudnnActivation(cudnn_handle_, activation_desc_, input_desc_, inputs[0], input_desc_, outputs[0]);
if (ret != RET_OK) {
MS_LOG(ERROR) << "cudnn activation func call failed " << layer_name_;
return ret;
}
return RET_OK;
}
int ActivationOptPlugin::RunCudaActivation(const nvinfer1::PluginTensorDesc *inputDesc, const void *const *inputs,
void *const *outputs, cudaStream_t stream) {
switch (activation_type_) {
case (schema::ActivationType::ActivationType_SIGMOID): {
Sigmoid(static_cast<const float *>(inputs[0]), static_cast<float *>(outputs[0]), GetDimsVolume(inputDesc[0].dims),
stream);
break;
}
case (schema::ActivationType::ActivationType_GELU): {
Gelu(static_cast<const float *>(inputs[0]), static_cast<float *>(outputs[0]), GetDimsVolume(inputDesc[0].dims),
stream);
break;
}
case (schema::ActivationType::ActivationType_SWISH): {
CalSwish(GetDimsVolume(inputDesc[0].dims), static_cast<const float *>(inputs[0]),
static_cast<float *>(outputs[0]), stream, device_id_);
break;
}
default: {
MS_LOG(ERROR) << "invalid activation type: " << static_cast<int>(activation_type_);
return RET_ERROR;
}
}
return RET_OK;
}
nvinfer1::IPluginV2DynamicExt *ActivationOptPlugin::clone() const noexcept {
auto *plugin = new ActivationOptPlugin(*this);
plugin->setPluginNamespace(name_space_.c_str());
return plugin;
}
size_t ActivationOptPlugin::getSerializationSize() const noexcept { return sizeof(schema::ActivationType); }
void ActivationOptPlugin::serialize(void *buffer) const noexcept {
SerializeValue(&buffer, &activation_type_, sizeof(schema::ActivationType));
}
} // namespace mindspore::lite

View File

@ -0,0 +1,72 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_ACTIVATION_OPT_PLUGIN_H_
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_ACTIVATION_OPT_PLUGIN_H_
#include <string>
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
#include "src/runtime/delegate/tensorrt/op/tensorrt_plugin.h"
#include "src/runtime/delegate/tensorrt/cuda_impl/cudnn_utils.h"
namespace mindspore::lite {
constexpr char *ACTIVATION_OPT_PLUGIN_NAME{"ActivationOptPlugin"};
class ActivationOptPlugin : public TensorRTPlugin {
public:
ActivationOptPlugin(const std::string name, schema::ActivationType activation_type, uint32_t device_id)
: TensorRTPlugin(name, std::string(ACTIVATION_OPT_PLUGIN_NAME), device_id), activation_type_(activation_type) {}
ActivationOptPlugin(const char *name, const nvinfer1::PluginFieldCollection *fc)
: TensorRTPlugin(std::string(name), std::string(ACTIVATION_OPT_PLUGIN_NAME)) {
const nvinfer1::PluginField *fields = fc->fields;
activation_type_ = static_cast<const schema::ActivationType *>(fields[0].data)[0];
}
ActivationOptPlugin(const char *name, const void *serialData, size_t serialLength)
: TensorRTPlugin(std::string(name), std::string(ACTIVATION_OPT_PLUGIN_NAME)) {
DeserializeValue(&serialData, &serialLength, &activation_type_, sizeof(schema::ActivationType));
}
ActivationOptPlugin() = delete;
nvinfer1::IPluginV2DynamicExt *clone() const noexcept override;
int enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc,
const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept override;
size_t getSerializationSize() const noexcept override;
void serialize(void *buffer) const noexcept override;
private:
bool needResize(const int *current_dims, const int *last_dims);
int RunCudaActivation(const nvinfer1::PluginTensorDesc *inputDesc, const void *const *inputs, void *const *outputs,
cudaStream_t stream);
int RunCuDNNActivation(const nvinfer1::PluginTensorDesc *inputDesc, const void *const *inputs, void *const *outputs,
cudaStream_t stream);
const std::string layer_name_;
std::string name_space_;
schema::ActivationType activation_type_;
cudnnHandle_t cudnn_handle_{nullptr};
cudnnActivationDescriptor_t activation_desc_{nullptr};
cudnnTensorDescriptor_t input_desc_{nullptr};
int infer_dims_[5]{1, 1, 1, 1, 1};
int infer_stride_[5]{1, 1, 1, 1, 1};
int infer_dims_cnt_{0};
};
class ActivationOptPluginCreater : public TensorRTPluginCreater<ActivationOptPlugin> {
public:
ActivationOptPluginCreater() : TensorRTPluginCreater(std::string(ACTIVATION_OPT_PLUGIN_NAME)) {}
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_ACTIVATION_OPT_PLUGIN_H_

View File

@ -0,0 +1,153 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/delegate/tensorrt/op/activation_tensorrt.h"
#include <cfloat>
#include <memory>
#include <unordered_set>
#include "src/runtime/delegate/tensorrt/op/cast_tensorrt.h"
#include "src/runtime/delegate/tensorrt/op/activation_opt_plugin.h"
namespace mindspore::lite {
namespace {
bool HasCustomActivationPlugin(schema::ActivationType type) {
std::unordered_set<schema::ActivationType> plugin_activation = {schema::ActivationType::ActivationType_SIGMOID,
schema::ActivationType::ActivationType_GELU,
schema::ActivationType::ActivationType_SWISH};
return plugin_activation.find(type) != plugin_activation.end();
}
} // namespace
int ActivationTensorRT::IsSupport(const schema::Primitive *primitive,
const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) {
if (!IsShapeKnown()) {
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
return RET_ERROR;
}
if (in_tensors.size() != 1) {
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
return RET_ERROR;
}
if (out_tensors.size() != 1) {
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
return RET_ERROR;
}
auto activation_op = this->op_primitive_->value_as_Activation();
if (activation_op == nullptr) {
MS_LOG(ERROR) << "op convert failed";
return RET_ERROR;
}
auto activation_params_opt = TryConvertActivationType(activation_op->activation_type());
bool has_custom_plugin = HasCustomActivationPlugin(activation_op->activation_type());
if (!activation_params_opt && !has_custom_plugin) {
MS_LOG(ERROR) << "Unsupported op action type for TensorRT: " << activation_op->activation_type();
return RET_ERROR;
}
return RET_OK;
}
int ActivationTensorRT::AddInnerOp(TensorRTContext *ctx) {
if (ctx->network() == nullptr) {
MS_LOG(ERROR) << "network is invalid";
return RET_ERROR;
}
auto activation_op = this->op_primitive_->value_as_Activation();
if (activation_op == nullptr) {
MS_LOG(ERROR) << "op convert failed";
return RET_ERROR;
}
float alpha = activation_op->alpha();
nvinfer1::ITensor *activation_input = tensorrt_in_tensors_[0].trt_tensor_;
if (tensorrt_in_tensors_[0].trt_tensor_->getType() == nvinfer1::DataType::kINT32) {
activation_input =
TRTTensorCast(ctx, tensorrt_in_tensors_[0].trt_tensor_, nvinfer1::DataType::kFLOAT, op_name_ + "_cast_in");
}
auto activation_layer =
ActivationTensorRT::AddActivation(ctx, activation_op->activation_type(), alpha,
std::isfinite(activation_op->min_val()) ? activation_op->min_val() : FLT_MIN,
std::isfinite(activation_op->max_val()) ? activation_op->max_val() : FLT_MAX,
activation_input, device_id_, quant_type_);
if (activation_layer == nullptr) {
MS_LOG(ERROR) << "add activation op failed for TensorRT.";
return RET_ERROR;
}
activation_layer->setName(op_name_.c_str());
// cast to origin type
nvinfer1::ITensor *out_tensor = activation_layer->getOutput(0);
if (out_tensor->getType() != ConvertDataType(out_tensors_[0].DataType())) {
out_tensor = TRTTensorCast(ctx, activation_layer->getOutput(0), ConvertDataType(out_tensors_[0].DataType()),
op_name_ + "_cast_out");
}
out_tensor->setName((op_name_ + "_output").c_str());
this->AddInnerOutTensors(
ITensorHelper{out_tensor, tensorrt_in_tensors_[0].format_, tensorrt_in_tensors_[0].same_format_});
this->layer_ = activation_layer;
return RET_OK;
}
nvinfer1::ILayer *ActivationTensorRT::AddActivation(TensorRTContext *ctx, schema::ActivationType activation_type,
float alpha, float min_value, float max_value,
nvinfer1::ITensor *trt_in_tensor, uint32_t device_id,
schema::QuantType quant_type) {
bool has_custom_plugin = HasCustomActivationPlugin(activation_type);
// sigmoid precision is wrong for trt
if (quant_type == schema::QuantType_QUANT_NONE && has_custom_plugin) {
std::string layer_name = std::string(trt_in_tensor->getName()) + "_activation";
auto plugin = std::make_shared<ActivationOptPlugin>(layer_name.c_str(), activation_type, device_id);
MS_LOG(INFO) << "using opt plugin for " << layer_name;
if (plugin == nullptr) {
MS_LOG(ERROR) << "create ActivationOptPlugin failed for " << layer_name;
return nullptr;
}
nvinfer1::ITensor *inputTensors[] = {trt_in_tensor};
nvinfer1::IPluginV2Layer *activation_opt_layer = ctx->network()->addPluginV2(inputTensors, 1, *plugin);
activation_opt_layer->setName(layer_name.c_str());
return activation_opt_layer;
}
// Just some action_code correct, unfind code is set to default relu. need double check.
auto action_param_opt = TryConvertActivationType(activation_type);
if (!action_param_opt) {
MS_LOG(ERROR) << "Unsupported op action type for TensorRT: " << activation_type;
return nullptr;
}
auto action_param = action_param_opt.value();
nvinfer1::IActivationLayer *activation_layer =
ctx->network()->addActivation(*trt_in_tensor, action_param.activation_type);
if (activation_layer == nullptr) {
MS_LOG(ERROR) << "add activation op failed for TensorRT.";
return nullptr;
}
if (activation_type == schema::ActivationType_HARD_TANH) {
activation_layer->setAlpha(min_value);
activation_layer->setBeta(max_value);
return activation_layer;
}
if (action_param.has_alpha) {
activation_layer->setAlpha(alpha);
}
if (action_param.has_beta) {
activation_layer->setBeta(action_param.beta);
}
return activation_layer;
}
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_Activation, ActivationTensorRT)
} // namespace mindspore::lite

View File

@ -0,0 +1,43 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_ACTIVATION_TENSORRT_H_
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_ACTIVATION_TENSORRT_H_
#include <string>
#include <vector>
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
namespace mindspore::lite {
class ActivationTensorRT : public TensorRTOp {
public:
ActivationTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
const schema::QuantType &quant_type)
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
~ActivationTensorRT() override = default;
int AddInnerOp(TensorRTContext *ctx) override;
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) override;
static nvinfer1::ILayer *AddActivation(TensorRTContext *ctx, schema::ActivationType activation_type, float alpha,
float min_value, float max_value, nvinfer1::ITensor *trt_in_tensor,
uint32_t device_id = 0,
schema::QuantType quant_type = schema::QuantType_QUANT_NONE);
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_ACTIVATION_TENSORRT_H_

View File

@ -0,0 +1,113 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/delegate/tensorrt/op/allgather_tensorrt.h"
#include <numeric>
#include "NvInferRuntimeCommon.h"
namespace mindspore::lite {
REGISTER_TENSORRT_PLUGIN(AllGatherPluginCreater);
template class TensorRTPluginCreater<AllGatherPlugin>;
template <class T>
nvinfer1::PluginFieldCollection TensorRTPluginCreater<T>::field_collection_{};
template <class T>
std::vector<nvinfer1::PluginField> TensorRTPluginCreater<T>::fields_;
int AllGatherTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) {
#ifndef LITE_CUDA_DISTRIBUTION
MS_LOG(ERROR)
<< "Unsupported package for gpu distribution feature, please recompile with MS_ENABLE_CUDA_DISTRIBUTION set to on.";
return RET_ERROR;
#else
if (!IsShapeKnown()) {
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
return RET_ERROR;
}
if (in_tensors.size() != 1) {
MS_LOG(ERROR) << "invalid input tensor size: " << in_tensors.size();
return RET_ERROR;
}
if (out_tensors.size() != 1) {
MS_LOG(ERROR) << "invalid output tensor size: " << out_tensors.size();
return RET_ERROR;
}
dynamic_shape_params_.support_hw_dynamic_ = false;
return RET_OK;
#endif
}
int AllGatherTensorRT::AddInnerOp(TensorRTContext *ctx) {
nvinfer1::ITensor *inputTensors[] = {tensorrt_in_tensors_[0].trt_tensor_};
auto allgather_op = op_primitive_->value_as_AllGather();
if (allgather_op == nullptr) {
MS_LOG(ERROR) << "convert failed for " << op_name_;
return RET_ERROR;
}
int rank = GetGPUGroupSize();
auto plugin = std::make_shared<AllGatherPlugin>(op_name_, rank, device_id_);
MS_LOG(INFO) << op_name_ << " group size: " << rank << ", rank id: " << GetRankID();
nvinfer1::IPluginV2Layer *allgather_layer = ctx->network()->addPluginV2(inputTensors, 1, *plugin);
if (allgather_layer == nullptr) {
MS_LOG(ERROR) << "create AllGather layer failed for: " << op_name_;
return RET_ERROR;
}
nvinfer1::ITensor *allgather_out = allgather_layer->getOutput(0);
allgather_layer->setName(op_name_.c_str());
allgather_out->setName((op_name_ + "_output").c_str());
this->AddInnerOutTensors(
ITensorHelper{allgather_out, tensorrt_in_tensors_[0].format_, tensorrt_in_tensors_[0].same_format_});
this->layer_ = allgather_layer;
return RET_OK;
}
// AllGatherPlugin
int AllGatherPlugin::enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc,
const void *const *inputs, void *const *outputs, void *workspace,
cudaStream_t stream) noexcept {
MS_LOG(INFO) << "all gather run at rank id: " << GetRankID() << " stream: " << stream;
nvinfer1::Dims input_dims = inputDesc[0].dims;
int send_element_cnt = std::accumulate(input_dims.d, input_dims.d + input_dims.nbDims, 1, std::multiplies<int64_t>());
const void *input = inputs[0];
void *output = outputs[0];
auto ret = DistributionCollective::instance().AllGatherWrapper(input, output, send_element_cnt, inputDesc->type,
stream, NCCL_WORLD_GROUP);
if (ret != RET_OK) {
MS_LOG(ERROR) << "AllGather nccl run failed for " << layer_name_;
return ret;
}
return RET_OK;
}
nvinfer1::IPluginV2DynamicExt *AllGatherPlugin::clone() const noexcept {
auto *plugin = new AllGatherPlugin(*this);
plugin->setPluginNamespace(name_space_.c_str());
return plugin;
}
nvinfer1::DimsExprs AllGatherPlugin::getOutputDimensions(int outputIndex, const nvinfer1::DimsExprs *inputs,
int nbInputs, nvinfer1::IExprBuilder &exprBuilder) noexcept {
nvinfer1::DimsExprs out_dims{};
out_dims.nbDims = inputs->nbDims;
auto rank_dim = exprBuilder.constant(rank_);
out_dims.d[0] = exprBuilder.operation(nvinfer1::DimensionOperation::kPROD, *inputs->d[0], *rank_dim);
for (int i = 1; i < inputs->nbDims; i++) {
out_dims.d[i] = inputs->d[i];
}
return out_dims;
}
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_AllGather, AllGatherTensorRT)
} // namespace mindspore::lite

View File

@ -0,0 +1,75 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_ALLGATHER_TENSORRT_H_
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_ALLGATHER_TENSORRT_H_
#include <string>
#include <vector>
#include <memory>
#include <functional>
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
#include "src/runtime/delegate/tensorrt/op/tensorrt_plugin.h"
#include "src/runtime/delegate/tensorrt/distribution/distribution_collective.h"
namespace mindspore::lite {
constexpr char *ALLGATHER_PLUGIN_NAME{"AllGatherPlugin"};
class AllGatherTensorRT : public TensorRTOp {
public:
AllGatherTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
const schema::QuantType &quant_type)
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
~AllGatherTensorRT() override = default;
int AddInnerOp(TensorRTContext *ctx) override;
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) override;
};
class AllGatherPlugin : public TensorRTPlugin {
public:
AllGatherPlugin(const std::string name, int rank, uint32_t device_id)
: TensorRTPlugin(name, std::string(ALLGATHER_PLUGIN_NAME), device_id), rank_(rank) {}
AllGatherPlugin(const char *name, const nvinfer1::PluginFieldCollection *fc)
: TensorRTPlugin(std::string(name), std::string(ALLGATHER_PLUGIN_NAME)) {
const nvinfer1::PluginField *fields = fc->fields;
rank_ = static_cast<const int *>(fields[0].data)[0];
}
AllGatherPlugin(const char *name, const void *serialData, size_t serialLength)
: TensorRTPlugin(std::string(name), std::string(ALLGATHER_PLUGIN_NAME)) {
DeserializeValue(&serialData, &serialLength, &rank_, sizeof(int));
}
AllGatherPlugin() = delete;
nvinfer1::IPluginV2DynamicExt *clone() const noexcept override;
nvinfer1::DimsExprs getOutputDimensions(int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs,
nvinfer1::IExprBuilder &exprBuilder) noexcept override;
int enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc,
const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept override;
private:
int rank_{0};
};
class AllGatherPluginCreater : public TensorRTPluginCreater<AllGatherPlugin> {
public:
AllGatherPluginCreater() : TensorRTPluginCreater(std::string(ALLGATHER_PLUGIN_NAME)) {}
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_ALLGATHER_TENSORRT_H_

View File

@ -0,0 +1,83 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/delegate/tensorrt/op/cast_plugin.h"
#include "src/runtime/delegate/tensorrt/cuda_impl/cast.cuh"
#include <cuda_runtime.h>
#include <numeric>
#include <memory>
#include <functional>
namespace mindspore::lite {
REGISTER_TENSORRT_PLUGIN(CastPluginCreater);
template class TensorRTPluginCreater<CastPlugin>;
template <class T>
nvinfer1::PluginFieldCollection TensorRTPluginCreater<T>::field_collection_{};
template <class T>
std::vector<nvinfer1::PluginField> TensorRTPluginCreater<T>::fields_;
int CastPlugin::enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc,
const void *const *inputs, void *const *outputs, void *workspace,
cudaStream_t stream) noexcept {
nvinfer1::Dims input_dims = inputDesc[0].dims;
int element_cnt = std::accumulate(input_dims.d, input_dims.d + input_dims.nbDims, 1, std::multiplies<int64_t>());
if (inputDesc->type == outputDesc->type) {
int element_size = (outputDesc->type == nvinfer1::DataType::kFLOAT)
? sizeof(float)
: ((outputDesc->type == nvinfer1::DataType::kINT32) ? sizeof(int) : 0);
auto cuda_ret = cudaMemcpy(outputs[0], inputs[0], element_cnt * element_size, cudaMemcpyDeviceToDevice);
if (cuda_ret != cudaSuccess) {
MS_LOG(ERROR) << "copy mem failed for " << layer_name_;
return RET_ERROR;
}
return RET_OK;
}
if (inputDesc->type == nvinfer1::DataType::kINT32 && dest_datatype_ == nvinfer1::DataType::kFLOAT) {
auto input = static_cast<const int *>(inputs[0]);
auto output = static_cast<float *>(outputs[0]);
Cast(element_cnt, input, output, stream);
} else if (inputDesc->type == nvinfer1::DataType::kFLOAT && dest_datatype_ == nvinfer1::DataType::kINT32) {
auto input = static_cast<const float *>(inputs[0]);
auto output = static_cast<int *>(outputs[0]);
Cast(element_cnt, input, output, stream);
} else {
MS_LOG(ERROR) << "unsupported data type cast " << layer_name_;
}
return RET_OK;
}
nvinfer1::IPluginV2DynamicExt *CastPlugin::clone() const noexcept {
auto *plugin = new CastPlugin(*this);
plugin->setPluginNamespace(name_space_.c_str());
return plugin;
}
nvinfer1::DataType CastPlugin::getOutputDataType(int index, const nvinfer1::DataType *inputTypes, int nbInputs) const
noexcept {
return dest_datatype_;
}
size_t CastPlugin::getSerializationSize() const noexcept {
// origin_datatype_ and dest_datatype_
return sizeof(nvinfer1::DataType) * 2;
}
void CastPlugin::serialize(void *buffer) const noexcept {
SerializeValue(&buffer, &origin_datatype_, sizeof(nvinfer1::DataType));
SerializeValue(&buffer, &dest_datatype_, sizeof(nvinfer1::DataType));
}
} // namespace mindspore::lite

View File

@ -0,0 +1,67 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_CAST_PLUGIN_H_
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_CAST_PLUGIN_H_
#include <string>
#include <vector>
#include "src/runtime/delegate/tensorrt/op/tensorrt_plugin.h"
namespace mindspore::lite {
constexpr char *CAST_PLUGIN_NAME{"CastPluginCreater"};
class CastPlugin : public TensorRTPlugin {
public:
CastPlugin(const std::string name, nvinfer1::DataType origin_datatype, nvinfer1::DataType dest_datatype,
uint32_t device_id = 0)
: TensorRTPlugin(name, std::string(CAST_PLUGIN_NAME), device_id),
origin_datatype_(origin_datatype),
dest_datatype_(dest_datatype) {}
CastPlugin(const char *name, const nvinfer1::PluginFieldCollection *fc)
: TensorRTPlugin(std::string(name), std::string(CAST_PLUGIN_NAME)) {
const nvinfer1::PluginField *fields = fc->fields;
origin_datatype_ = static_cast<const nvinfer1::DataType *>(fields[0].data)[0];
dest_datatype_ = static_cast<const nvinfer1::DataType *>(fields[1].data)[0];
}
CastPlugin(const char *name, const void *serialData, size_t serialLength)
: TensorRTPlugin(std::string(name), std::string(CAST_PLUGIN_NAME)) {
DeserializeValue(&serialData, &serialLength, &origin_datatype_, sizeof(nvinfer1::DataType));
DeserializeValue(&serialData, &serialLength, &dest_datatype_, sizeof(nvinfer1::DataType));
}
CastPlugin() = delete;
nvinfer1::IPluginV2DynamicExt *clone() const noexcept override;
int enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc,
const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept override;
nvinfer1::DataType getOutputDataType(int index, const nvinfer1::DataType *inputTypes, int nbInputs) const
noexcept override;
size_t getSerializationSize() const noexcept override;
void serialize(void *buffer) const noexcept override;
private:
nvinfer1::DataType origin_datatype_;
nvinfer1::DataType dest_datatype_;
};
class CastPluginCreater : public TensorRTPluginCreater<CastPlugin> {
public:
CastPluginCreater() : TensorRTPluginCreater(std::string(CAST_PLUGIN_NAME)) {}
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_CAST_PLUGIN_H_

View File

@ -0,0 +1,79 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/delegate/tensorrt/op/cast_tensorrt.h"
#include "src/runtime/delegate/tensorrt/op/cast_plugin.h"
#include <cuda_runtime.h>
#include <numeric>
#include <memory>
#include <functional>
namespace mindspore::lite {
int CastTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) {
if (!IsShapeKnown()) {
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
return RET_ERROR;
}
if (in_tensors.size() != INPUT_SIZE2) {
MS_LOG(ERROR) << "invalid input tensor size: " << in_tensors.size();
return RET_ERROR;
}
if (out_tensors.size() != 1) {
MS_LOG(ERROR) << "invalid output tensor size: " << out_tensors.size();
return RET_ERROR;
}
return RET_OK;
}
int CastTensorRT::AddInnerOp(TensorRTContext *ctx) {
// cast to type tensor
auto type_tensor = in_tensors_[1];
if (type_tensor.Data() == nullptr) {
MS_LOG(ERROR) << "unknown cast type of " << op_name_;
return RET_ERROR;
}
auto type_data = static_cast<const int *>(type_tensor.Data().get());
DataType data_type = static_cast<DataType>(type_data[0]);
MS_LOG(DEBUG) << op_name_ << " cast to data type(43 float): " << type_data[0];
nvinfer1::DataType dest_datatype = ConvertDataType(data_type);
auto trt_tensor = tensorrt_in_tensors_[0].trt_tensor_;
#if TRT_VERSION_GE(7, 2)
dest_datatype = (dest_datatype == nvinfer1::DataType::kBOOL ? nvinfer1::DataType::kINT32 : dest_datatype);
auto cast_layer = ctx->network()->addIdentity(*trt_tensor);
#else
auto plugin = std::make_shared<CastPlugin>(op_name_, trt_tensor->getType(), dest_datatype);
nvinfer1::ITensor *inputTensors[] = {trt_tensor};
nvinfer1::IPluginV2Layer *cast_layer = ctx->network()->addPluginV2(inputTensors, 1, *plugin);
#endif
if (cast_layer == nullptr) {
MS_LOG(ERROR) << "create cast layer failed for: " << op_name_;
return RET_ERROR;
}
#if TRT_VERSION_GE(7, 2)
cast_layer->setOutputType(0, dest_datatype);
#endif
cast_layer->setName(op_name_.c_str());
nvinfer1::ITensor *cast_out = cast_layer->getOutput(0);
cast_out->setName((op_name_ + "_output").c_str());
this->AddInnerOutTensors(
ITensorHelper{cast_out, tensorrt_in_tensors_[0].format_, tensorrt_in_tensors_[0].same_format_});
this->layer_ = cast_layer;
return RET_OK;
}
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_Cast, CastTensorRT)
} // namespace mindspore::lite

View File

@ -0,0 +1,43 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_CAST_TENSORRT_H_
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_CAST_TENSORRT_H_
#include <string>
#include <vector>
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
#include "src/runtime/delegate/tensorrt/op/tensorrt_plugin.h"
#include "src/runtime/delegate/tensorrt/cuda_impl/cast.cuh"
namespace mindspore::lite {
class CastTensorRT : public TensorRTOp {
public:
CastTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
const schema::QuantType &quant_type)
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
~CastTensorRT() override = default;
int AddInnerOp(TensorRTContext *ctx) override;
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) override;
private:
// CastTensorRT
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_CAST_TENSORRT_H_

View File

@ -0,0 +1,158 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/delegate/tensorrt/op/concate_tensorrt.h"
#include <experimental/optional>
#include <algorithm>
namespace mindspore::lite {
int ConcateTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) {
if (!IsShapeKnown()) {
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
return RET_ERROR;
}
if (type_ != schema::PrimitiveType_Stack && type_ != schema::PrimitiveType_Concat) {
MS_LOG(ERROR) << "Unsupported op :" << op_name_ << " , type: " << type_;
return RET_ERROR;
}
if (in_tensors.size() == 0 || in_tensors.size() < INPUT_SIZE2 && type_ != schema::PrimitiveType_Stack) {
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
return RET_ERROR;
}
if (out_tensors.size() != 1) {
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
return RET_ERROR;
}
int input_nbDims = in_tensors_[0].Shape().size();
if (axis_ == -1) {
axis_ = input_nbDims - 1;
}
if (axis_ < 0 || axis_ > input_nbDims || axis_ == input_nbDims && type_ != schema::PrimitiveType_Stack) {
MS_LOG(ERROR) << "concate_op valid axis : " << axis_ << " , input dims : " << input_nbDims;
return RET_ERROR;
}
return RET_OK;
}
int ConcateTensorRT::AddInnerOp(TensorRTContext *ctx) {
if (ctx == nullptr || ctx->network() == nullptr) {
MS_LOG(ERROR) << "context or network is invalid";
return RET_ERROR;
}
if (tensorrt_in_tensors_.size() != in_tensors_.size()) {
MS_LOG(ERROR) << "concate_op in tensor is invalid, trt tensor has " << tensorrt_in_tensors_.size()
<< ", but origin ms tensor has " << in_tensors_.size();
return RET_ERROR;
}
nvinfer1::ITensor *trt_input_tensors[tensorrt_in_tensors_.size()];
int ret = PreProcessInputs(ctx, trt_input_tensors);
if (ret != RET_OK) {
MS_LOG(ERROR) << "PreProcessInputs failed for " << op_name_;
return ret;
}
if (!same_format_) {
if (trt_input_tensors[0]->getDimensions().nbDims == DIMENSION_4D && out_format_ == Format::NCHW) {
// when inputs all NCHW, change axis
axis_ = ConvertAxisFromNHWC2NCHW(axis_);
MS_LOG(DEBUG) << "concate axis change to " << axis_ << " when using NCHW format.";
} else {
MS_LOG(WARNING) << "input tensor format needs check, convert concat axis failed for " << op_name_;
}
}
if (type_ == schema::PrimitiveType_Stack) {
for (size_t i = 0; i != tensorrt_in_tensors_.size(); ++i) {
auto shuffle_layer = ctx->network()->addShuffle(*trt_input_tensors[i]);
if (shuffle_layer == nullptr) {
MS_LOG(ERROR) << "addShuffle failed for TensorRT.";
return RET_ERROR;
}
auto shuffer_dims_opt = UnsqueezeDims(trt_input_tensors[i]->getDimensions(), axis_, 1);
if (!shuffer_dims_opt) {
MS_LOG(ERROR) << "UnsqueezeDims failed.";
return RET_ERROR;
}
shuffle_layer->setReshapeDimensions(shuffer_dims_opt.value());
trt_input_tensors[i] = shuffle_layer->getOutput(0);
}
}
nvinfer1::IConcatenationLayer *concate_layer =
ctx->network()->addConcatenation(trt_input_tensors, static_cast<int>(tensorrt_in_tensors_.size()));
if (concate_layer == nullptr) {
MS_LOG(ERROR) << "addConcatenation failed for TensorRT.";
return RET_ERROR;
}
if (axis_ != RET_INVALID_OP_ATTR) {
concate_layer->setAxis(axis_);
}
concate_layer->setName(op_name_.c_str());
auto concat_output = concate_layer->getOutput(0);
concat_output->setName((op_name_ + "_output").c_str());
this->AddInnerOutTensors(ITensorHelper{concat_output, out_format_, same_format_});
this->layer_ = concate_layer;
return RET_OK;
}
int ConcateTensorRT::PreProcessInputs(TensorRTContext *ctx, nvinfer1::ITensor *trt_input_tensors[]) {
int input_nbDims = tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims;
out_format_ = tensorrt_in_tensors_[0].format_;
same_format_ = tensorrt_in_tensors_[0].same_format_;
for (size_t i = 0; i < tensorrt_in_tensors_.size(); i++) {
if (tensorrt_in_tensors_[i].trt_tensor_->getDimensions().nbDims != input_nbDims) {
MS_LOG(ERROR) << "dims of inputs is invalid for " << op_name_;
return RET_ERROR;
}
// keep origin format if all input format are the same
if (input_nbDims == DIMENSION_4D && tensorrt_in_tensors_[i].format_ != out_format_) {
out_format_ = Format::NHWC;
}
}
// make sure all inputs are same format
if (input_nbDims == DIMENSION_4D) {
for (size_t i = 0; i < tensorrt_in_tensors_.size(); i++) {
if (tensorrt_in_tensors_[i].format_ == out_format_) {
trt_input_tensors[i] = tensorrt_in_tensors_[i].trt_tensor_;
MS_LOG(DEBUG) << "concate input " << GetTensorFormat(tensorrt_in_tensors_[i]);
} else {
nvinfer1::IShuffleLayer *transpose_layer = NCHW2NHWC(ctx, *tensorrt_in_tensors_[i].trt_tensor_);
if (transpose_layer == nullptr) {
MS_LOG(ERROR) << "op action convert failed";
return RET_ERROR;
}
trt_input_tensors[i] = transpose_layer->getOutput(0);
this->transpose_layer_ = transpose_layer;
same_format_ = true;
MS_LOG(DEBUG) << "concate input " << GetTensorFormat(trt_input_tensors[i], Format::NHWC, true);
}
}
} else {
for (size_t i = 0; i < tensorrt_in_tensors_.size(); i++) {
trt_input_tensors[i] = tensorrt_in_tensors_[i].trt_tensor_;
MS_LOG(DEBUG) << "concate input " << GetTensorFormat(tensorrt_in_tensors_[i]);
}
}
return RET_OK;
}
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_Concat, ConcateTensorRT)
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_Stack, ConcateTensorRT)
} // namespace mindspore::lite

View File

@ -0,0 +1,50 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_CONCATE_TENSORRT_H_
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_CONCATE_TENSORRT_H_
#include <string>
#include <vector>
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
namespace mindspore::lite {
class ConcateTensorRT : public TensorRTOp {
public:
ConcateTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
const schema::QuantType &quant_type)
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {
type_ = primitive->value_type();
axis_ = (type_ == schema::PrimitiveType_Concat ? primitive->value_as_Concat()->axis()
: primitive->value_as_Stack()->axis());
}
~ConcateTensorRT() override = default;
int AddInnerOp(TensorRTContext *ctx) override;
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) override;
private:
int PreProcessInputs(TensorRTContext *ctx, nvinfer1::ITensor *trt_input_tensors[]);
Format out_format_{Format::NHWC};
bool same_format_{true};
schema::PrimitiveType type_;
int axis_;
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_CONCATE_TENSORRT_H_

View File

@ -0,0 +1,187 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/delegate/tensorrt/op/convolution_tensorrt.h"
#include "src/runtime/delegate/tensorrt/op/activation_tensorrt.h"
namespace mindspore::lite {
constexpr int BIAS_INDEX = 2;
int ConvolutionTensorRT::IsSupport(const schema::Primitive *primitive,
const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) {
if (!IsShapeKnown()) {
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
return RET_ERROR;
}
if (in_tensors.size() != INPUT_SIZE2 && in_tensors.size() != INPUT_SIZE3) {
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
return RET_ERROR;
}
if (out_tensors.size() != 1) {
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
return RET_ERROR;
}
if (in_tensors[0].format() != Format::NHWC && in_tensors[0].format() != Format::NCHW) {
MS_LOG(ERROR) << "Unsupported input tensor format of " << in_tensors[0].format();
return RET_ERROR;
}
return RET_OK;
}
int ConvolutionTensorRT::AddInnerOp(TensorRTContext *ctx) {
if (ctx == nullptr || ctx->network() == nullptr) {
MS_LOG(ERROR) << "context or network is invalid";
return RET_ERROR;
}
const schema::Conv2DFusion *conv_op = this->op_primitive_->value_as_Conv2DFusion();
if (conv_op == nullptr) {
MS_LOG(ERROR) << "op action convert failed";
return RET_ERROR;
}
nvinfer1::ITensor *conv_input = tensorrt_in_tensors_[0].trt_tensor_;
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D &&
tensorrt_in_tensors_[0].format_ == Format::NHWC) {
// transpose: NHWC->NCHW
nvinfer1::IShuffleLayer *transpose_layer_in = NHWC2NCHW(ctx, *tensorrt_in_tensors_[0].trt_tensor_);
if (transpose_layer_in == nullptr) {
MS_LOG(ERROR) << "transpose: NHWC->NCHW failed";
return RET_ERROR;
}
transpose_layer_in->setName((op_name_ + "_transpose2NCHW").c_str());
this->transpose_layer_ = transpose_layer_in;
conv_input = transpose_layer_in->getOutput(0);
}
// transpose weight
const mindspore::MSTensor &weight_tensor = in_tensors_[1];
nvinfer1::Weights kernelWeights = lite::TransposeWeight4D(weight_tensor, &pack_weight_);
// conv
int nbOutputMaps = weight_tensor.Shape()[0];
if (nbOutputMaps <= 0) {
MS_LOG(ERROR) << "out_channel is invalid";
return RET_ERROR;
}
auto kernel_size = conv_op->kernel_size();
if (kernel_size == nullptr) {
MS_LOG(ERROR) << "kernel_size is null";
return RET_ERROR;
}
nvinfer1::Dims kernelSize = lite::ConvertCudaDims(std::vector<int64_t>(kernel_size->begin(), kernel_size->end()));
if (kernelSize.nbDims == -1) {
MS_LOG(ERROR) << "ConvertCudaDims failed for " << op_name_;
return RET_ERROR;
}
// bias
nvinfer1::Weights biasWeights{};
if (in_tensors_.size() >= INPUT_SIZE3) {
biasWeights = lite::ConvertWeight(in_tensors_[BIAS_INDEX]);
} else {
biasWeights.type = ConvertDataType(weight_tensor.DataType());
biasWeights.count = 0;
biasWeights.values = nullptr;
}
nvinfer1::IConvolutionLayer *conv_layer =
ctx->network()->addConvolutionNd(*conv_input, nbOutputMaps, kernelSize, kernelWeights, biasWeights);
if (conv_layer == nullptr) {
MS_LOG(ERROR) << "ConvolutionLayer failed";
return RET_ERROR;
}
conv_layer->setName((op_name_ + "_conv").c_str());
this->layer_ = conv_layer;
// add params
SetAttributes(conv_op, conv_layer);
// add activation
nvinfer1::ILayer *activation_layer = nullptr;
if (conv_op->activation_type() == schema::ActivationType::ActivationType_NO_ACTIVATION) {
activation_layer = conv_layer;
} else {
activation_layer =
ActivationTensorRT::AddActivation(ctx, conv_op->activation_type(), 0, 0, 0, conv_layer->getOutput(0), device_id_);
if (activation_layer == nullptr) {
MS_LOG(ERROR) << "addActivation for conv failed";
return RET_ERROR;
}
activation_layer->setName((op_name_ + "_activation").c_str());
}
activation_layer->getOutput(0)->setName((op_name_ + "_output").c_str());
this->AddInnerOutTensors(ITensorHelper{activation_layer->getOutput(0), Format::NCHW, false});
return RET_OK;
}
void ConvolutionTensorRT::SetAttributes(const schema::Conv2DFusion *conv_op, nvinfer1::IConvolutionLayer *conv_layer) {
auto stride = conv_op->stride();
if (stride != nullptr) {
auto stride_val = std::vector<int64_t>(stride->begin(), stride->end());
auto dims = ConvertCudaDims(stride_val);
if (dims.nbDims == -1) {
MS_LOG(ERROR) << "ConvertCudaDims failed for " << op_name_;
return;
}
conv_layer->setStrideNd(dims);
}
auto dilation = conv_op->dilation();
if (dilation != nullptr) {
auto dilation_val = std::vector<int64_t>(dilation->begin(), dilation->end());
auto dims = ConvertCudaDims(dilation_val);
if (dims.nbDims == -1) {
MS_LOG(ERROR) << "ConvertCudaDims failed for " << op_name_;
return;
}
conv_layer->setDilationNd(dims);
}
int nbGroups = conv_op->group();
if (nbGroups > 0) {
conv_layer->setNbGroups(nbGroups);
}
schema::PadMode pad_mode = conv_op->pad_mode();
if (pad_mode == schema::PadMode::PadMode_SAME) {
conv_layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
} else {
auto padding = conv_op->pad_list();
if (padding != nullptr && padding->size() == DIMENSION_4D) {
auto padding_val = std::vector<int64_t>(padding->begin(), padding->end());
if (padding_val[0] != padding_val[1] || padding_val[DIMENSION_2D] != padding_val[DIMENSION_3D]) {
MS_LOG(WARNING) << op_name_ << " has different up and down padding value";
}
nvinfer1::Dims2 dims(padding_val[0], padding_val[DIMENSION_2D]);
conv_layer->setPaddingNd(dims);
} else if (padding == nullptr || padding->size() == 0) {
nvinfer1::Dims2 dims;
conv_layer->setPaddingNd(dims);
} else {
MS_LOG(WARNING) << "pad list is invalid for " << op_name_;
}
}
}
ConvolutionTensorRT::~ConvolutionTensorRT() {
if (pack_weight_ != nullptr) {
free(pack_weight_);
pack_weight_ = nullptr;
}
}
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_Conv2DFusion, ConvolutionTensorRT)
} // namespace mindspore::lite

View File

@ -0,0 +1,43 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_CONVOLUTION_TENSORRT_H_
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_CONVOLUTION_TENSORRT_H_
#include <string>
#include <vector>
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
namespace mindspore::lite {
class ConvolutionTensorRT : public TensorRTOp {
public:
ConvolutionTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
const schema::QuantType &quant_type)
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
~ConvolutionTensorRT() override;
int AddInnerOp(TensorRTContext *ctx) override;
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) override;
private:
void SetAttributes(const schema::Conv2DFusion *ms_op, nvinfer1::IConvolutionLayer *current_layer_);
void *pack_weight_{nullptr};
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_CONVOLUTION_TENSORRT_H_

View File

@ -0,0 +1,199 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/delegate/tensorrt/op/deconvolution_tensorrt.h"
#include "src/runtime/delegate/tensorrt/op/activation_tensorrt.h"
#include "nnacl/pack.h"
namespace mindspore::lite {
int DeconvolutionTensorRT::IsSupport(const schema::Primitive *primitive,
const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) {
if (!IsShapeKnown()) {
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
return RET_ERROR;
}
if (in_tensors.size() != INPUT_SIZE2 && in_tensors.size() != INPUT_SIZE3) {
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
return RET_ERROR;
}
if (out_tensors.size() != 1) {
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
return RET_ERROR;
}
if (in_tensors[0].format() != Format::NHWC && in_tensors[0].format() != Format::NCHW) {
MS_LOG(ERROR) << "Unsupported input tensor format of " << in_tensors[0].format();
return RET_ERROR;
}
return RET_OK;
}
int DeconvolutionTensorRT::AddInnerOp(TensorRTContext *ctx) {
if (ctx == nullptr || ctx->network() == nullptr) {
MS_LOG(ERROR) << "context or network is invalid";
return RET_ERROR;
}
const schema::Conv2dTransposeFusion *deconv_op = this->op_primitive_->value_as_Conv2dTransposeFusion();
if (deconv_op == nullptr) {
MS_LOG(ERROR) << "op action convert failed";
return RET_ERROR;
}
nvinfer1::ITensor *deconv_input = tensorrt_in_tensors_[0].trt_tensor_;
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D &&
tensorrt_in_tensors_[0].format_ == Format::NHWC) {
// transpose: NHWC->NCHW
nvinfer1::IShuffleLayer *transpose_layer_in = NHWC2NCHW(ctx, *tensorrt_in_tensors_[0].trt_tensor_);
if (transpose_layer_in == nullptr) {
MS_LOG(ERROR) << "transpose: NHWC->NCHW failed";
return RET_ERROR;
}
transpose_layer_in->setName((op_name_ + "_transpose2NCHW").c_str());
this->transpose_layer_ = transpose_layer_in;
deconv_input = transpose_layer_in->getOutput(0);
}
// transpose weight
const mindspore::MSTensor &weight_tensor = in_tensors_[1];
nvinfer1::Weights kernelWeights = lite::TransposeWeight4D(weight_tensor, &pack_weight_);
// deconv basic params
int nbOutputMaps = weight_tensor.Shape()[0];
if (nbOutputMaps <= 0) {
MS_LOG(ERROR) << "out_channel is invalid";
return RET_ERROR;
}
auto kernel_size = deconv_op->kernel_size();
if (kernel_size == nullptr) {
MS_LOG(ERROR) << "kernel_size is null";
return RET_ERROR;
}
nvinfer1::Dims kernelSize = lite::ConvertCudaDims(std::vector<int64_t>(kernel_size->begin(), kernel_size->end()));
if (kernelSize.nbDims == -1) {
MS_LOG(ERROR) << "ConvertCudaDims failed for " << op_name_;
return RET_ERROR;
}
// bias
nvinfer1::Weights biasWeights{};
if (in_tensors_.size() >= INPUT_SIZE3) {
biasWeights = lite::ConvertWeight(in_tensors_[INPUT_SIZE3 - 1]);
} else {
biasWeights.type = ConvertDataType(weight_tensor.DataType());
biasWeights.count = 0;
biasWeights.values = nullptr;
}
nvinfer1::IDeconvolutionLayer *deconv_layer =
ctx->network()->addDeconvolutionNd(*deconv_input, nbOutputMaps, kernelSize, kernelWeights, biasWeights);
if (deconv_layer == nullptr) {
MS_LOG(ERROR) << "DeconvolutionLayer failed";
return RET_ERROR;
}
deconv_layer->setName((op_name_ + "_deconv").c_str());
this->layer_ = deconv_layer;
// set extra params
SetAttributes(deconv_op, deconv_layer);
// add activation
nvinfer1::ILayer *activation_layer = nullptr;
if (deconv_op->activation_type() == schema::ActivationType::ActivationType_NO_ACTIVATION) {
activation_layer = deconv_layer;
} else {
activation_layer = ActivationTensorRT::AddActivation(ctx, deconv_op->activation_type(), 0, 0, 0,
deconv_layer->getOutput(0), device_id_);
if (activation_layer == nullptr) {
MS_LOG(ERROR) << "addActivation for conv failed";
return RET_ERROR;
}
activation_layer->setName((op_name_ + "_activation").c_str());
}
activation_layer->getOutput(0)->setName((op_name_ + "_output").c_str());
this->AddInnerOutTensors(ITensorHelper{activation_layer->getOutput(0), Format::NCHW, false});
return RET_OK;
}
void DeconvolutionTensorRT::SetAttributes(const schema::Conv2dTransposeFusion *ms_op,
nvinfer1::IDeconvolutionLayer *decon_layer) {
// kernel_size
auto kernel_size = ms_op->kernel_size();
if (kernel_size != nullptr) {
auto kernel_size_val = std::vector<int64_t>(kernel_size->begin(), kernel_size->end());
nvinfer1::Dims kernel_size_dims = lite::ConvertCudaDims(kernel_size_val);
if (kernel_size_dims.nbDims == -1) {
MS_LOG(ERROR) << "ConvertCudaDims failed for " << op_name_;
return;
}
decon_layer->setKernelSizeNd(kernel_size_dims);
}
// nbOutputMaps
int32_t nbOutputMaps = static_cast<int32_t>(ms_op->out_channel());
decon_layer->setNbOutputMaps(nbOutputMaps);
// stride
auto stride = ms_op->stride();
if (stride != nullptr) {
auto stride_val = std::vector<int64_t>(stride->begin(), stride->end());
nvinfer1::Dims stride_dims = lite::ConvertCudaDims(stride_val);
if (stride_dims.nbDims == -1) {
MS_LOG(ERROR) << "ConvertCudaDims failed for " << op_name_;
return;
}
decon_layer->setStrideNd(stride_dims);
}
// nbGroups
int32_t nbGroups = static_cast<int32_t>(ms_op->group());
decon_layer->setNbGroups(nbGroups);
// padding
schema::PadMode pad_mode = ms_op->pad_mode();
if (pad_mode == schema::PadMode::PadMode_SAME) {
decon_layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
} else {
auto padding = ms_op->pad_list();
auto out_pad = ms_op->output_paddings();
if (padding == nullptr || out_pad == nullptr) {
MS_LOG(WARNING) << "on pad value of " << op_name_;
return;
}
auto padding_val = std::vector<int64_t>(padding->begin(), padding->end());
auto out_pad_val = std::vector<int64_t>(out_pad->begin(), out_pad->end()); // h, w
if (out_pad_val.size() != DIMENSION_2D || padding_val.size() != DIMENSION_4D) {
MS_LOG(ERROR) << "invalid size of pad " << op_name_;
return;
}
nvinfer1::Dims dims_pre{};
dims_pre.nbDims = DIMENSION_2D;
dims_pre.d[0] = padding_val[0]; // up
dims_pre.d[1] = padding_val[2]; // left
decon_layer->setPrePadding(dims_pre);
nvinfer1::Dims dims_post{};
dims_post.nbDims = DIMENSION_2D;
dims_post.d[0] = padding_val[1] - out_pad_val[0]; // down
dims_post.d[1] = padding_val[3] - out_pad_val[1]; // right
decon_layer->setPostPadding(dims_post);
}
}
DeconvolutionTensorRT::~DeconvolutionTensorRT() {
if (pack_weight_ != nullptr) {
free(pack_weight_);
pack_weight_ = nullptr;
}
}
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_Conv2dTransposeFusion, DeconvolutionTensorRT)
} // namespace mindspore::lite

View File

@ -0,0 +1,43 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_DECONVOLUTION_TENSORRT_H_
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_DECONVOLUTION_TENSORRT_H_
#include <string>
#include <vector>
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
namespace mindspore::lite {
class DeconvolutionTensorRT : public TensorRTOp {
public:
DeconvolutionTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
const schema::QuantType &quant_type)
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
~DeconvolutionTensorRT() override;
int AddInnerOp(TensorRTContext *ctx) override;
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) override;
private:
void SetAttributes(const schema::Conv2dTransposeFusion *ms_op, nvinfer1::IDeconvolutionLayer *decon_layer);
void *pack_weight_{nullptr};
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_DECONVOLUTION_TENSORRT_H_

View File

@ -0,0 +1,312 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <unordered_map>
#include <unordered_set>
#include "src/runtime/delegate/tensorrt/op/elementwise_tensorrt.h"
#include "src/runtime/delegate/tensorrt/tensorrt_utils.h"
#include "src/runtime/delegate/tensorrt/op/activation_tensorrt.h"
namespace mindspore::lite {
namespace {
std::unordered_map<schema::PrimitiveType, nvinfer1::ElementWiseOperation> NOT_BOOL_PRIM2NV_ELEM_OP = {
#if TRT_VERSION_GE(7, 2)
{schema::PrimitiveType_Less, nvinfer1::ElementWiseOperation::kLESS},
{schema::PrimitiveType_Greater, nvinfer1::ElementWiseOperation::kGREATER},
#endif
{schema::PrimitiveType_AddFusion, nvinfer1::ElementWiseOperation::kSUM},
{schema::PrimitiveType_PowFusion, nvinfer1::ElementWiseOperation::kPOW},
{schema::PrimitiveType_DivFusion, nvinfer1::ElementWiseOperation::kDIV},
{schema::PrimitiveType_RealDiv, nvinfer1::ElementWiseOperation::kDIV},
{schema::PrimitiveType_FloorDiv, nvinfer1::ElementWiseOperation::kFLOOR_DIV},
{schema::PrimitiveType_SubFusion, nvinfer1::ElementWiseOperation::kSUB},
{schema::PrimitiveType_MulFusion, nvinfer1::ElementWiseOperation::kPROD},
{schema::PrimitiveType_Minimum, nvinfer1::ElementWiseOperation::kMIN},
{schema::PrimitiveType_Maximum, nvinfer1::ElementWiseOperation::kMAX},
{schema::PrimitiveType_BiasAdd, nvinfer1::ElementWiseOperation::kSUM},
#if TRT_VERSION_GE(7, 2)
{schema::PrimitiveType_Equal, nvinfer1::ElementWiseOperation::kEQUAL},
#endif
};
} // namespace
int ElementWiseTensorRT::IsSupport(const schema::Primitive *primitive,
const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) {
if (!IsShapeKnown()) {
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
return RET_ERROR;
}
if (in_tensors.size() != INPUT_SIZE2) {
MS_LOG(ERROR) << "invalid input tensort size: " << in_tensors.size();
return RET_ERROR;
}
if (out_tensors.size() != 1) {
MS_LOG(ERROR) << "invalid output tensort size: " << out_tensors.size();
return RET_ERROR;
}
// if constant tensor is scalar, it needs to know another input tensor's shape to broadcast
if ((in_tensors[0].Shape().size() > 0 && in_tensors[0].Shape()[0] == -1 && in_tensors[1].Shape().size() == 0) ||
(in_tensors[1].Shape().size() > 0 && in_tensors[1].Shape()[0] == -1 && in_tensors[0].Shape().size() == 0)) {
MS_LOG(ERROR) << "invalid all input tensor shape unknown for: " << op_name_;
return RET_ERROR;
}
bool is_not_bool_arith = NOT_BOOL_PRIM2NV_ELEM_OP.find(type_) != NOT_BOOL_PRIM2NV_ELEM_OP.end();
if (is_not_bool_arith) {
if (std::any_of(in_tensors.begin(), in_tensors.end(),
[](const mindspore::MSTensor &tensor) { return tensor.DataType() == DataType::kNumberTypeBool; })) {
MS_LOG(ERROR) << "invalid input type for : " << op_name_;
return RET_ERROR;
}
element_wise_op_ = NOT_BOOL_PRIM2NV_ELEM_OP[type_];
}
if (!is_not_bool_arith) {
// PrimitiveType_Eltwise
auto eltwise_op = op_primitive_->value_as_Eltwise();
if (eltwise_op == nullptr) {
MS_LOG(ERROR) << "convert to Eltwise failed: " << op_name_;
return RET_ERROR;
}
schema::EltwiseMode eltwiseMode = eltwise_op->mode();
std::map<schema::EltwiseMode, nvinfer1::ElementWiseOperation> eltwise_modes = {
{schema::EltwiseMode::EltwiseMode_SUM, nvinfer1::ElementWiseOperation::kSUM},
{schema::EltwiseMode::EltwiseMode_PROD, nvinfer1::ElementWiseOperation::kPROD},
{schema::EltwiseMode::EltwiseMode_MAXIMUM, nvinfer1::ElementWiseOperation::kMAX},
};
auto iter_mode = eltwise_modes.find(eltwiseMode);
if (iter_mode != eltwise_modes.end()) {
element_wise_op_ = iter_mode->second;
} else {
MS_LOG(ERROR) << "unsupported type for ElementWise op" << op_name_;
return RET_ERROR;
}
}
return RET_OK;
}
int ElementWiseTensorRT::AddInnerOp(TensorRTContext *ctx) {
if (ctx == nullptr || ctx->network() == nullptr) {
MS_LOG(ERROR) << "network or input tensor size is invalid";
return RET_ERROR;
}
ITensorHelper x_input;
ITensorHelper y_input;
int ret = PreprocessInputTensors(ctx, &x_input, &y_input);
if (ret != RET_OK) {
MS_LOG(ERROR) << "PreprocessInputTensors failed.";
return RET_ERROR;
}
nvinfer1::IElementWiseLayer *cal_layer =
ctx->network()->addElementWise(*x_input.trt_tensor_, *y_input.trt_tensor_, element_wise_op_);
if (cal_layer == nullptr) {
MS_LOG(ERROR) << "addElementWise failed for TensorRT.";
return RET_ERROR;
}
cal_layer->setName(op_name_.c_str());
this->layer_ = cal_layer;
nvinfer1::ITensor *op_out_tensor = cal_layer->getOutput(0);
if (op_out_tensor == nullptr) {
MS_LOG(ERROR) << "addElementWise out tensor is nullptr.";
return RET_ERROR;
}
// add activation
nvinfer1::ITensor *activation_out_tensor = AddActivation(ctx, op_out_tensor);
op_out_tensor = (activation_out_tensor == nullptr) ? op_out_tensor : activation_out_tensor;
// scale and shift
if (type_ == schema::PrimitiveType_PowFusion) {
auto pow_op = op_primitive_->value_as_PowFusion();
if (pow_op == nullptr) {
MS_LOG(ERROR) << "PowFusion convert failed.";
return RET_ERROR;
}
float scale = pow_op->scale();
float shift = pow_op->shift();
if (abs(scale - 1) >= 1.0e-05 || abs(shift - 0) >= 1.0e-05) {
MS_LOG(WARNING) << "deal with scale and shift for pow op";
}
}
#if TRT_VERSION_GE(7, 2)
std::unordered_set<schema::PrimitiveType> bool_producer_ops = {
schema::PrimitiveType_Equal, schema::PrimitiveType_Greater, schema::PrimitiveType_Less};
if (bool_producer_ops.find(type_) != bool_producer_ops.end()) {
auto cast_layer = ctx->network()->addIdentity(*op_out_tensor);
if (cast_layer == nullptr) {
MS_LOG(ERROR) << "create cast layer failed for: " << op_name_;
return RET_ERROR;
}
cast_layer->setOutputType(0, nvinfer1::DataType::kINT32);
op_out_tensor = cast_layer->getOutput(0);
MS_LOG(INFO) << "bool result cast to int32" << op_name_;
}
#endif
op_out_tensor->setName((op_name_ + "_output").c_str());
this->AddInnerOutTensors(ITensorHelper{op_out_tensor, x_input.format_, x_input.same_format_});
MS_LOG(DEBUG) << "output " << GetTensorFormat(tensorrt_out_tensors_[0]);
return RET_OK;
}
int ElementWiseTensorRT::PreprocessInputTensors(TensorRTContext *ctx, ITensorHelper *x_input, ITensorHelper *y_input) {
int input_x_index = SameTensor(tensorrt_in_tensors_[0].trt_tensor_, &in_tensors_[0]) ? 0 : 1;
if (in_tensors_[0].Shape() == in_tensors_[1].Shape() && in_tensors_[0].IsConst()) {
input_x_index = 1;
}
if (this->tensorrt_in_tensors_.size() != INPUT_SIZE2) {
int ret = AddConstTensor(ctx);
if (ret != RET_OK) {
return ret;
}
}
*x_input = tensorrt_in_tensors_[input_x_index];
*y_input = tensorrt_in_tensors_[1 - input_x_index];
MS_LOG(DEBUG) << "before transpose " << GetTensorFormat(*x_input);
MS_LOG(DEBUG) << "before transpose " << GetTensorFormat(*y_input);
if (x_input->trt_tensor_->getDimensions().nbDims == DIMENSION_4D && x_input->format_ != y_input->format_) {
// when inputs format are different, change to NHWC
auto need_trans = x_input->format_ == Format::NCHW ? x_input : y_input;
nvinfer1::IShuffleLayer *transpose_layer = NCHW2NHWC(ctx, *need_trans->trt_tensor_);
if (transpose_layer == nullptr) {
MS_LOG(ERROR) << "op action convert failed";
return RET_ERROR;
}
transpose_layer->setName((op_name_ + "_input_transpose2NHWC").c_str());
need_trans->trt_tensor_ = transpose_layer->getOutput(0);
need_trans->format_ = Format::NHWC;
need_trans->same_format_ = true;
}
MS_LOG(DEBUG) << "after transpose " << GetTensorFormat(*x_input);
MS_LOG(DEBUG) << "after transpose " << GetTensorFormat(*y_input);
if (GetDimsVolume(x_input->trt_tensor_->getDimensions()) == GetDimsVolume(y_input->trt_tensor_->getDimensions()) &&
x_input->trt_tensor_->getDimensions().nbDims != y_input->trt_tensor_->getDimensions().nbDims) {
bool x_large = x_input->trt_tensor_->getDimensions().nbDims > y_input->trt_tensor_->getDimensions().nbDims;
auto input_tensor = x_large ? y_input : x_input;
auto output_dim = x_large ? x_input->trt_tensor_->getDimensions() : y_input->trt_tensor_->getDimensions();
auto reshape_layer = ctx->network()->addShuffle(*input_tensor->trt_tensor_);
if (reshape_layer == nullptr) {
MS_LOG(ERROR) << "add reshape failed for " << op_name_;
return RET_ERROR;
}
reshape_layer->setReshapeDimensions(output_dim);
input_tensor->trt_tensor_ = reshape_layer->getOutput(0);
}
return RET_OK;
}
nvinfer1::ITensor *ElementWiseTensorRT::AddActivation(TensorRTContext *ctx, nvinfer1::ITensor *in_tensor) {
schema::ActivationType activation = schema::ActivationType::ActivationType_NO_ACTIVATION;
switch (type_) {
case schema::PrimitiveType_AddFusion: {
auto sum_op = op_primitive_->value_as_AddFusion();
if (sum_op == nullptr) {
MS_LOG(ERROR) << "AddFusion convert failed.";
return nullptr;
}
activation = sum_op->activation_type();
break;
}
case schema::PrimitiveType_DivFusion: {
auto div_op = op_primitive_->value_as_DivFusion();
if (div_op == nullptr) {
MS_LOG(ERROR) << "DivFusion convert failed.";
return nullptr;
}
activation = div_op->activation_type();
break;
}
case schema::PrimitiveType_SubFusion: {
auto sub_op = op_primitive_->value_as_SubFusion();
if (sub_op == nullptr) {
MS_LOG(ERROR) << "SubFusion convert failed.";
return nullptr;
}
activation = sub_op->activation_type();
break;
}
case schema::PrimitiveType_MulFusion: {
auto mul_op = op_primitive_->value_as_MulFusion();
if (mul_op == nullptr) {
MS_LOG(ERROR) << "MulFusion convert failed.";
return nullptr;
}
activation = mul_op->activation_type();
break;
}
default:
MS_LOG(DEBUG) << "no activation need for: " << op_name_;
}
nvinfer1::ITensor *activation_out_tensor = nullptr;
if (activation != schema::ActivationType::ActivationType_NO_ACTIVATION) {
auto activation_layer = ActivationTensorRT::AddActivation(ctx, activation, 0, 0, 0, in_tensor, device_id_);
if (activation_layer == nullptr) {
MS_LOG(ERROR) << "addActivation for element wise failed";
return nullptr;
}
activation_layer->setName((op_name_ + "_activation").c_str());
activation_out_tensor = activation_layer->getOutput(0);
}
return activation_out_tensor;
}
int ElementWiseTensorRT::AddConstTensor(TensorRTContext *ctx) {
int const_tensor_index = (in_tensors_[0].Data() != nullptr && in_tensors_[0].IsConst()) ? 0 : 1;
nvinfer1::ITensor *constant_input = ConvertConstantTensorWithDims(
ctx, in_tensors_[const_tensor_index], in_tensors_[1 - const_tensor_index].Shape(), op_name_);
CHECK_NULL_RETURN(constant_input);
AddInnerInTensors(ITensorHelper{constant_input, tensorrt_in_tensors_[0].format_, true});
return RET_OK;
}
bool ElementWiseTensorRT::SameTensor(nvinfer1::ITensor *trt_tensor, mindspore::MSTensor *ms_tensor) {
if (SameDims(trt_tensor->getDimensions(), ms_tensor->Shape())) {
return true;
}
if (ms_tensor->Shape().size() == DIMENSION_4D) {
// nhwc nchw
auto nchw_shape = NHWC2NCHW(ms_tensor->Shape());
if (SameDims(trt_tensor->getDimensions(), nchw_shape)) {
return true;
}
}
auto str_name = strstr(trt_tensor->getName(), ms_tensor->Name().c_str());
if (str_name != nullptr) {
return true;
}
str_name = strstr(ms_tensor->Name().c_str(), trt_tensor->getName());
if (str_name != nullptr) {
return true;
}
return false;
}
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_SubFusion, ElementWiseTensorRT)
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_DivFusion, ElementWiseTensorRT)
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_RealDiv, ElementWiseTensorRT)
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_PowFusion, ElementWiseTensorRT)
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_AddFusion, ElementWiseTensorRT)
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_MulFusion, ElementWiseTensorRT)
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_Eltwise, ElementWiseTensorRT)
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_Minimum, ElementWiseTensorRT)
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_Maximum, ElementWiseTensorRT)
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_BiasAdd, ElementWiseTensorRT)
#if TRT_VERSION_GE(7, 2)
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_Equal, ElementWiseTensorRT)
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_Less, ElementWiseTensorRT)
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_Greater, ElementWiseTensorRT)
#endif
} // namespace mindspore::lite

View File

@ -0,0 +1,50 @@
/**
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_ELEMENTWISE_TENSORRT_H_
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_ELEMENTWISE_TENSORRT_H_
#include <string>
#include <vector>
#include <map>
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
namespace mindspore::lite {
class ElementWiseTensorRT : public TensorRTOp {
public:
ElementWiseTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
const schema::QuantType &quant_type)
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
~ElementWiseTensorRT() override = default;
int AddInnerOp(TensorRTContext *ctx) override;
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) override;
private:
nvinfer1::ITensor *AddActivation(TensorRTContext *ctx, nvinfer1::ITensor *in_tensor);
int AddConstTensor(TensorRTContext *ctx);
bool SameTensor(nvinfer1::ITensor *trt_tensor, mindspore::MSTensor *ms_tensor);
int PreprocessInputTensors(TensorRTContext *ctx, ITensorHelper *x_input, ITensorHelper *y_input);
nvinfer1::ElementWiseOperation element_wise_op_;
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_ELEMENTWISE_TENSORRT_H_

View File

@ -0,0 +1,96 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/delegate/tensorrt/op/equal_tensorrt.h"
#include <numeric>
#include <memory>
#include <functional>
#include "src/runtime/delegate/tensorrt/tensorrt_utils.h"
#include "NvInferRuntimeCommon.h"
namespace mindspore::lite {
REGISTER_TENSORRT_PLUGIN(EqualPluginCreater);
template class TensorRTPluginCreater<EqualPlugin>;
template <class T>
nvinfer1::PluginFieldCollection TensorRTPluginCreater<T>::field_collection_{};
template <class T>
std::vector<nvinfer1::PluginField> TensorRTPluginCreater<T>::fields_;
int EqualTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) {
if (!IsShapeKnown()) {
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
return RET_ERROR;
}
if (in_tensors.size() != INPUT_SIZE2) {
MS_LOG(ERROR) << "invalid input tensor size: " << in_tensors.size();
return RET_ERROR;
}
if (out_tensors.size() != 1) {
MS_LOG(ERROR) << "invalid output tensor size: " << out_tensors.size();
return RET_ERROR;
}
return RET_OK;
}
int EqualTensorRT::AddInnerOp(TensorRTContext *ctx) {
nvinfer1::ITensor *inputTensors[] = {tensorrt_in_tensors_[0].trt_tensor_, tensorrt_in_tensors_[1].trt_tensor_};
auto plugin = std::make_shared<EqualPlugin>(op_name_, device_id_);
nvinfer1::IPluginV2Layer *equal_layer = ctx->network()->addPluginV2(inputTensors, INPUT_SIZE2, *plugin);
if (equal_layer == nullptr) {
MS_LOG(ERROR) << "create equal layer failed for: " << op_name_;
return RET_ERROR;
}
layer_ = equal_layer;
nvinfer1::ITensor *equal_out = equal_layer->getOutput(0);
equal_layer->setName(op_name_.c_str());
equal_out->setName((op_name_ + "_output").c_str());
this->AddInnerOutTensors(
ITensorHelper{equal_out, tensorrt_in_tensors_[0].format_, tensorrt_in_tensors_[0].same_format_});
return RET_OK;
}
int EqualPlugin::enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc,
const void *const *inputs, void *const *outputs, void *workspace,
cudaStream_t stream) noexcept {
nvinfer1::Dims input_dims = inputDesc[0].dims;
int element_cnt = std::accumulate(input_dims.d, input_dims.d + input_dims.nbDims, 1, std::multiplies<int64_t>());
if (inputDesc->type == nvinfer1::DataType::kINT32) {
const int *input1 = static_cast<const int *>(inputs[0]);
const int *input2 = static_cast<const int *>(inputs[1]);
int *output = static_cast<int *>(outputs[0]);
Equal(input1, input2, output, element_cnt, stream);
} else if (inputDesc->type == nvinfer1::DataType::kFLOAT) {
const float *input1 = static_cast<const float *>(inputs[0]);
const float *input2 = static_cast<const float *>(inputs[1]);
float *output = static_cast<float *>(outputs[0]);
Equal(input1, input2, output, element_cnt, stream);
} else {
MS_LOG(ERROR) << "unsupported equal data type";
}
return RET_OK;
}
nvinfer1::IPluginV2DynamicExt *EqualPlugin::clone() const noexcept {
auto *plugin = new EqualPlugin(*this);
plugin->setPluginNamespace(name_space_.c_str());
return plugin;
}
#if TRT_VERSION_LS(7, 2)
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_Equal, EqualTensorRT)
#endif
} // namespace mindspore::lite

View File

@ -0,0 +1,63 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_EQUAL_TENSORRT_H_
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_EQUAL_TENSORRT_H_
#include <string>
#include <vector>
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
#include "src/runtime/delegate/tensorrt/op/tensorrt_plugin.h"
#include "src/runtime/delegate/tensorrt/cuda_impl/equal.cuh"
namespace mindspore::lite {
constexpr char *EQUAL_PLUGIN_NAME{"EqualPlugin"};
class EqualTensorRT : public TensorRTOp {
public:
EqualTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
const schema::QuantType &quant_type)
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
~EqualTensorRT() override = default;
int AddInnerOp(TensorRTContext *ctx) override;
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) override;
};
class EqualPlugin : public TensorRTPlugin {
public:
EqualPlugin(const std::string name, uint32_t device_id)
: TensorRTPlugin(name, std::string(EQUAL_PLUGIN_NAME), device_id) {}
EqualPlugin(const char *name, const nvinfer1::PluginFieldCollection *fc)
: TensorRTPlugin(std::string(name), std::string(EQUAL_PLUGIN_NAME)) {}
EqualPlugin(const char *name, const void *serialData, size_t serialLength)
: TensorRTPlugin(std::string(name), std::string(EQUAL_PLUGIN_NAME)) {}
EqualPlugin() = delete;
nvinfer1::IPluginV2DynamicExt *clone() const noexcept override;
int enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc,
const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept override;
};
class EqualPluginCreater : public TensorRTPluginCreater<EqualPlugin> {
public:
EqualPluginCreater() : TensorRTPluginCreater(std::string(EQUAL_PLUGIN_NAME)) {}
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_EQUAL_TENSORRT_H_

View File

@ -0,0 +1,106 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/delegate/tensorrt/op/fullyconnected_tensorrt.h"
#include "src/runtime/delegate/tensorrt/tensorrt_utils.h"
#include "src/runtime/delegate/tensorrt/op/activation_tensorrt.h"
namespace mindspore::lite {
constexpr int BIAS_INDEX = 2;
int FullyConnectedTensorRT::IsSupport(const mindspore::schema::Primitive *primitive,
const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) {
if (!IsShapeKnown()) {
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
return RET_ERROR;
}
if (in_tensors.size() != INPUT_SIZE2 && in_tensors.size() != INPUT_SIZE3) {
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
return RET_ERROR;
}
return RET_OK;
}
int FullyConnectedTensorRT::AddInnerOp(TensorRTContext *ctx) {
auto primitive = op_primitive_->value_as_FullConnection();
CHECK_NULL_RETURN(primitive);
activation_ = primitive->activation_type();
int axis = primitive->axis();
if (axis < 0 || axis >= out_tensors_[0].Shape().size()) {
MS_LOG(ERROR) << "axis: " << axis << " is invalid for " << op_name_;
return RET_ERROR;
}
ITensorHelper fc_input;
auto ret = PreprocessInputs(ctx, &fc_input);
if (ret != RET_OK) {
MS_LOG(ERROR) << "PreprocessInputs failed for " << op_name_;
return ret;
}
auto kernel_weight = ConvertWeight(in_tensors_[1].Data().get() == nullptr ? in_tensors_[0] : in_tensors_[1]);
nvinfer1::Weights bias_weight{};
if (primitive->has_bias()) {
bias_weight = ConvertWeight(in_tensors_[BIAS_INDEX]);
}
nvinfer1::IFullyConnectedLayer *fc_layer = ctx->network()->addFullyConnected(
*(fc_input.trt_tensor_), out_tensors_[0].Shape()[axis], kernel_weight, bias_weight);
if (fc_layer == nullptr) {
MS_LOG(ERROR) << "addFullyConnected failed for " << op_name_;
return RET_ERROR;
}
this->layer_ = fc_layer;
fc_layer->setName(op_name_.c_str());
nvinfer1::ITensor *out_tensor = fc_layer->getOutput(0);
if (out_tensor->getDimensions().nbDims != out_tensors_[0].Shape().size()) {
std::vector<int64_t> squeeze_dim(out_tensors_[0].Shape());
squeeze_dim[0] = out_tensor->getDimensions().d[0] == -1 ? -1 : squeeze_dim[0];
out_tensor = Reshape(ctx, out_tensor, squeeze_dim);
}
// add activation
if (activation_ != schema::ActivationType::ActivationType_NO_ACTIVATION) {
nvinfer1::ILayer *activation_layer =
ActivationTensorRT::AddActivation(ctx, activation_, 0, 0, 0, out_tensor, device_id_);
if (activation_layer == nullptr) {
MS_LOG(ERROR) << "addActivation for matmul failed";
return RET_ERROR;
}
activation_layer->setName((op_name_ + "_activation").c_str());
out_tensor = activation_layer->getOutput(0);
}
out_tensor->setName((op_name_ + "_output").c_str());
MS_LOG(DEBUG) << "output " << GetTensorFormat(out_tensor);
this->AddInnerOutTensors(ITensorHelper{out_tensor, fc_input.format_});
return RET_OK;
}
int FullyConnectedTensorRT::PreprocessInputs(TensorRTContext *ctx, ITensorHelper *fc_input) {
auto ret = PreprocessInputs2SameDim(ctx, tensorrt_in_tensors_[0], fc_input);
if (ret != RET_OK) {
MS_LOG(ERROR) << "PreprocessInputs2SameDim failed for " << op_name_;
return ret;
}
auto origin_dims = fc_input->trt_tensor_->getDimensions();
if (origin_dims.nbDims != DIMENSION_4D) {
std::vector<int64_t> expand_dim(origin_dims.d, origin_dims.d + origin_dims.nbDims);
for (int i = 0; i < DIMENSION_4D - origin_dims.nbDims; i++) {
expand_dim.push_back(1);
}
fc_input->trt_tensor_ = Reshape(ctx, fc_input->trt_tensor_, expand_dim);
}
return RET_OK;
}
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_FullConnection, FullyConnectedTensorRT)
} // namespace mindspore::lite

View File

@ -0,0 +1,45 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_FULLYCONNECTED_TENSORRT_H_
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_FULLYCONNECTED_TENSORRT_H_
#include <string>
#include <vector>
#include <map>
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
namespace mindspore::lite {
class FullyConnectedTensorRT : public TensorRTOp {
public:
FullyConnectedTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
const schema::QuantType &quant_type)
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
~FullyConnectedTensorRT() override = default;
int AddInnerOp(TensorRTContext *ctx) override;
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) override;
private:
int PreprocessInputs(TensorRTContext *ctx, ITensorHelper *fc_input);
schema::ActivationType activation_{schema::ActivationType::ActivationType_NO_ACTIVATION};
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_FULLYCONNECTED_TENSORRT_H_

View File

@ -0,0 +1,139 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/delegate/tensorrt/op/gather_d_tensorrt.h"
#include <cuda_runtime.h>
#include <numeric>
#include <memory>
#include <functional>
#include "src/runtime/delegate/tensorrt/tensorrt_utils.h"
namespace mindspore::lite {
REGISTER_TENSORRT_PLUGIN(GatherDPluginCreater);
template class TensorRTPluginCreater<GatherDPlugin>;
template <class T>
nvinfer1::PluginFieldCollection TensorRTPluginCreater<T>::field_collection_{};
template <class T>
std::vector<nvinfer1::PluginField> TensorRTPluginCreater<T>::fields_;
int GatherDTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) {
if (!IsShapeKnown()) {
MS_LOG(ERROR) << "Unsupported gatherd input tensor unknown shape: " << op_name_;
return RET_ERROR;
}
if (in_tensors.size() != INPUT_SIZE3) {
MS_LOG(ERROR) << "invalid gatherd input tensor size: " << in_tensors.size();
return RET_ERROR;
}
if (out_tensors.size() != 1) {
MS_LOG(ERROR) << "invalid gatherd output tensor size: " << out_tensors.size();
return RET_ERROR;
}
return RET_OK;
}
int GatherDTensorRT::AddInnerOp(TensorRTContext *ctx) {
nvinfer1::ITensor *inputTensors[] = {tensorrt_in_tensors_[0].trt_tensor_, tensorrt_in_tensors_[2].trt_tensor_};
auto dim_tensor = static_cast<const int *>(in_tensors_[1].Data().get());
if (dim_tensor == nullptr) {
MS_LOG(ERROR) << op_name_ << " gatherd dim_tensor is null!";
return RET_ERROR;
}
size_t dim = static_cast<size_t>(dim_tensor[0]);
auto plugin = std::make_shared<GatherDPlugin>(op_name_, dim, device_id_);
nvinfer1::IPluginV2Layer *gatherd_layer = ctx->network()->addPluginV2(inputTensors, INPUT_SIZE2, *plugin);
if (gatherd_layer == nullptr) {
MS_LOG(ERROR) << "create gatherd failed for: " << op_name_;
return RET_ERROR;
}
nvinfer1::ITensor *gatherd_out = gatherd_layer->getOutput(0);
gatherd_layer->setName(op_name_.c_str());
gatherd_out->setName((op_name_ + "_output").c_str());
this->AddInnerOutTensors(
ITensorHelper{gatherd_out, tensorrt_in_tensors_[0].format_, tensorrt_in_tensors_[0].same_format_});
this->layer_ = gatherd_layer;
return RET_OK;
}
int GatherDPlugin::enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc,
const void *const *inputs, void *const *outputs, void *workspace,
cudaStream_t stream) noexcept {
nvinfer1::Dims input_dims = inputDesc[0].dims;
int dims = input_dims.nbDims;
if (axis_ < 0) {
axis_ += dims;
}
if (inputDesc->type == nvinfer1::DataType::kINT32) {
auto input = static_cast<const int *>(inputs[0]);
auto index = static_cast<const int *>(inputs[1]);
auto output = static_cast<int *>(outputs[0]);
Reshape(inputDesc, outputDesc);
Gather<int, int>(input, index, output, dim_before_axis_, dim_at_axis_input_, dim_at_axis_output_, dim_after_axis_,
stream, device_id_);
} else if (inputDesc->type == nvinfer1::DataType::kFLOAT) {
auto input = static_cast<const float *>(inputs[0]);
auto index = static_cast<const int *>(inputs[1]);
auto output = static_cast<float *>(outputs[0]);
Reshape(inputDesc, outputDesc);
Gather<float, int>(input, index, output, dim_before_axis_, dim_at_axis_input_, dim_at_axis_output_, dim_after_axis_,
stream, device_id_);
} else {
MS_LOG(ERROR) << "unsupported data type gatherd" << layer_name_;
}
return RET_OK;
}
nvinfer1::IPluginV2DynamicExt *GatherDPlugin::clone() const noexcept {
auto *plugin = new GatherDPlugin(*this);
plugin->setPluginNamespace(name_space_.c_str());
return plugin;
}
nvinfer1::DimsExprs GatherDPlugin::getOutputDimensions(int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs,
nvinfer1::IExprBuilder &exprBuilder) noexcept {
nvinfer1::DimsExprs out_dims{};
out_dims.nbDims = inputs[1].nbDims;
for (int i = 0; i < inputs[1].nbDims; i++) {
out_dims.d[i] = inputs[1].d[i];
}
return out_dims;
}
void GatherDPlugin::Reshape(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc) {
nvinfer1::Dims input_dims = inputDesc[0].dims;
nvinfer1::Dims output_dims = outputDesc[0].dims;
size_t dim_before_axis = 1;
for (size_t i = 0; i < IntToSize(axis_); i++) {
dim_before_axis *= output_dims.d[i];
}
size_t dim_at_axis_input = input_dims.d[IntToSize(axis_)];
size_t dim_at_axis_output = output_dims.d[IntToSize(axis_)];
size_t dim_after_axis = 1;
for (size_t i = IntToSize(axis_) + 1; i < output_dims.nbDims; i++) {
dim_after_axis *= output_dims.d[i];
}
dim_before_axis_ = dim_before_axis;
dim_at_axis_input_ = dim_at_axis_input;
dim_at_axis_output_ = dim_at_axis_output;
dim_after_axis_ = dim_after_axis;
return;
}
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_GatherD, GatherDTensorRT)
} // namespace mindspore::lite

View File

@ -0,0 +1,80 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_GATHER_D_TENSORRT_H_
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_GATHER_D_TENSORRT_H_
#include <string>
#include <vector>
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
#include "src/runtime/delegate/tensorrt/op/tensorrt_plugin.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/gather.cuh"
#include "src/runtime/delegate/tensorrt/tensorrt_utils.h"
namespace mindspore::lite {
constexpr char *GATHER_D_PLUGIN_NAME{"GatherDPluginCreater"};
class GatherDTensorRT : public TensorRTOp {
public:
GatherDTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
const schema::QuantType &quant_type)
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
~GatherDTensorRT() override = default;
int AddInnerOp(TensorRTContext *ctx) override;
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) override;
};
class GatherDPlugin : public TensorRTPlugin {
public:
GatherDPlugin(const std::string name, size_t dim, uint32_t device_id)
: TensorRTPlugin(name, std::string(GATHER_D_PLUGIN_NAME), device_id), axis_(dim) {}
GatherDPlugin(const char *name, const nvinfer1::PluginFieldCollection *fc)
: TensorRTPlugin(std::string(name), std::string(GATHER_D_PLUGIN_NAME)) {
const nvinfer1::PluginField *fields = fc->fields;
axis_ = static_cast<const int *>(fields[0].data)[0];
}
GatherDPlugin(const char *name, const void *serialData, size_t serialLength)
: TensorRTPlugin(std::string(name), std::string(GATHER_D_PLUGIN_NAME)) {
DeserializeValue(&serialData, &serialLength, &axis_, sizeof(int));
}
GatherDPlugin() = delete;
nvinfer1::IPluginV2DynamicExt *clone() const noexcept override;
nvinfer1::DimsExprs getOutputDimensions(int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs,
nvinfer1::IExprBuilder &exprBuilder) noexcept override;
int enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc,
const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept override;
private:
int axis_;
size_t dim_before_axis_;
size_t dim_at_axis_input_;
size_t dim_at_axis_output_;
size_t dim_after_axis_;
void Reshape(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc);
};
class GatherDPluginCreater : public TensorRTPluginCreater<GatherDPlugin> {
public:
GatherDPluginCreater() : TensorRTPluginCreater(std::string(GATHER_D_PLUGIN_NAME)) {}
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_GATHER_D_TENSORRT_H_

View File

@ -0,0 +1,108 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/delegate/tensorrt/op/gather_tensorrt.h"
#include "src/runtime/delegate/tensorrt/tensorrt_utils.h"
namespace mindspore::lite {
constexpr int AXIS_INDEX = 2;
int GatherTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) {
if (!IsShapeKnown()) {
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
return RET_ERROR;
}
if (in_tensors.size() != INPUT_SIZE3) {
MS_LOG(ERROR) << "invalid input tensor size: " << in_tensors.size();
return RET_ERROR;
}
if (out_tensors.size() != 1) {
MS_LOG(ERROR) << "invalid output tensor size: " << out_tensors.size();
return RET_ERROR;
}
if (in_tensors[1].DataType() != DataType::kNumberTypeInt32) {
MS_LOG(ERROR) << "Gather indices only support Int32";
return RET_ERROR;
}
if (in_tensors[AXIS_INDEX].ElementNum() == 1) {
MS_ASSERT(in_tensors[AXIS_INDEX].Data().get());
axis_ = static_cast<const int *>(in_tensors[AXIS_INDEX].Data().get())[0];
} else {
MS_LOG(ERROR) << "TensorRT axis is attribute.";
return RET_ERROR;
}
return RET_OK;
}
int GatherTensorRT::AddInnerOp(TensorRTContext *ctx) {
if (ctx == nullptr || ctx->network() == nullptr) {
MS_LOG(ERROR) << "context or network is invalid";
return RET_ERROR;
}
if (tensorrt_in_tensors_.size() < INPUT_SIZE2 && in_tensors_.size() >= INPUT_SIZE2) {
int const_ms_tensor_index = in_tensors_[0].IsConst() ? 0 : 1;
auto const_input = ConvertConstantTensor(ctx, in_tensors_[const_ms_tensor_index], op_name_);
if (const_input == nullptr) {
MS_LOG(ERROR) << "add const input tensor failed for " << op_name_;
return RET_ERROR;
}
tensorrt_in_tensors_.push_back(ITensorHelper{const_input});
}
int indices_tensor_index = tensorrt_in_tensors_[0].trt_tensor_->getType() == nvinfer1::DataType::kINT32 ? 0 : 1;
ITensorHelper gather_input;
int ret = PreprocessInputs2SameDim(ctx, tensorrt_in_tensors_[1 - indices_tensor_index], &gather_input);
if (ret != RET_OK || gather_input.trt_tensor_ == nullptr) {
MS_LOG(ERROR) << "PreprocessInputs2SameDim gather failed for " << op_name_;
return RET_ERROR;
}
ITensorHelper indices_tensor;
ret = PreprocessInputs2SameDim(ctx, tensorrt_in_tensors_[indices_tensor_index], &indices_tensor);
if (ret != RET_OK || indices_tensor.trt_tensor_ == nullptr) {
MS_LOG(ERROR) << "PreprocessInputs2SameDim indices failed for " << op_name_;
return RET_ERROR;
}
nvinfer1::IGatherLayer *gather_layer =
ctx->network()->addGather(*gather_input.trt_tensor_, *indices_tensor.trt_tensor_, axis_);
if (gather_layer == nullptr) {
MS_LOG(ERROR) << "addGather failed for TensorRT.";
return RET_ERROR;
}
this->layer_ = gather_layer;
gather_layer->setName(op_name_.c_str());
nvinfer1::ITensor *op_output = gather_layer->getOutput(0);
// keep shape
if (in_tensors_[1].Shape().empty()) {
auto squeeze = ctx->network()->addShuffle(*op_output);
if (squeeze == nullptr) {
MS_LOG(ERROR) << "add output squeeze failed for " << op_name_;
return RET_ERROR;
}
squeeze->setName((op_name_ + "_squeeze_out").c_str());
auto old_shape = ConvertMSShape(op_output->getDimensions());
old_shape.erase(old_shape.begin() + axis_);
squeeze->setReshapeDimensions(ConvertCudaDims(old_shape));
op_output = squeeze->getOutput(0);
}
op_output->setName((op_name_ + "_output").c_str());
this->AddInnerOutTensors(ITensorHelper{op_output, gather_input.format_, gather_input.same_format_});
return RET_OK;
}
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_Gather, GatherTensorRT)
} // namespace mindspore::lite

View File

@ -0,0 +1,42 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_GATHER_TENSORRT_H_
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_GATHER_TENSORRT_H_
#include <string>
#include <vector>
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
namespace mindspore::lite {
class GatherTensorRT : public TensorRTOp {
public:
GatherTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
const schema::QuantType &quant_type)
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
~GatherTensorRT() override = default;
int AddInnerOp(TensorRTContext *ctx) override;
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) override;
private:
int axis_{0};
mindspore::MSTensor indices_;
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_GATHER_TENSORRT_H_

View File

@ -0,0 +1,119 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <cuda_runtime.h>
#include <numeric>
#include <memory>
#include <vector>
#include <functional>
#include <unordered_map>
#include "src/runtime/delegate/tensorrt/tensorrt_utils.h"
#include "NvInferRuntimeCommon.h"
#include "src/runtime/delegate/tensorrt/op/logical_not_tensorrt.h"
#include "src/runtime/delegate/tensorrt/cuda_impl/logical.cuh"
namespace mindspore::lite {
int LogicalNotTensorRT::IsSupport(const schema::Primitive *primitive,
const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) {
if (!IsShapeKnown()) {
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
return RET_ERROR;
}
if (in_tensors.size() != 1) {
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
}
if (out_tensors.size() != 1) {
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
}
return RET_OK;
}
int LogicalNotTensorRT::AddInnerOp(TensorRTContext *ctx) {
if (ctx == nullptr || ctx->network() == nullptr || this->tensorrt_in_tensors_.size() != 1) {
MS_LOG(ERROR) << "network or input tensor is invalid";
return RET_ERROR;
}
if (tensorrt_in_tensors_[0].trt_tensor_->getType() != nvinfer1::DataType::kINT32) {
auto cast_layer = ctx->network()->addIdentity(*tensorrt_in_tensors_[0].trt_tensor_);
if (cast_layer == nullptr) {
MS_LOG(ERROR) << "create cast layer failed for: " << op_name_;
return RET_ERROR;
}
cast_layer->setOutputType(0, nvinfer1::DataType::kINT32);
tensorrt_in_tensors_[0].trt_tensor_ = cast_layer->getOutput(0);
}
auto plugin = std::make_shared<LogicalNotPlugin>(op_name_, op_primitive_->value_type());
if (plugin == nullptr) {
MS_LOG(ERROR) << "create ActivationOptPlugin failed for " << op_name_;
return RET_ERROR;
}
nvinfer1::ITensor *inputTensors[] = {tensorrt_in_tensors_[0].trt_tensor_};
nvinfer1::IPluginV2Layer *logical_layer = ctx->network()->addPluginV2(inputTensors, 1, *plugin);
this->layer_ = logical_layer;
nvinfer1::ITensor *op_out_tensor = logical_layer->getOutput(0);
if (op_out_tensor == nullptr) {
MS_LOG(ERROR) << "addElementWise out tensor is nullptr.";
return RET_ERROR;
}
op_out_tensor->setName((op_name_ + "_output").c_str());
this->AddInnerOutTensors(
ITensorHelper{op_out_tensor, tensorrt_in_tensors_[0].format_, tensorrt_in_tensors_[0].same_format_});
return RET_OK;
}
REGISTER_TENSORRT_PLUGIN(LogicalNotPluginCreater);
template class TensorRTPluginCreater<LogicalNotPlugin>;
template <class T>
nvinfer1::PluginFieldCollection TensorRTPluginCreater<T>::field_collection_{};
template <class T>
std::vector<nvinfer1::PluginField> TensorRTPluginCreater<T>::fields_;
int LogicalNotPlugin::enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc,
const void *const *inputs, void *const *outputs, void *workspace,
cudaStream_t stream) noexcept {
return RunCudaLogical(inputDesc, inputs, outputs, stream);
}
int LogicalNotPlugin::RunCudaLogical(const nvinfer1::PluginTensorDesc *inputDesc, const void *const *inputs,
void *const *outputs, cudaStream_t stream) {
switch (primitive_type_) {
case (schema::PrimitiveType_LogicalNot): {
LogicalNot(static_cast<const int *>(inputs[0]), static_cast<int *>(outputs[0]), GetDimsVolume(inputDesc[0].dims),
stream);
break;
}
default: {
MS_LOG(ERROR) << "invalid logical type: " << static_cast<int>(primitive_type_);
return RET_ERROR;
}
}
return RET_OK;
}
nvinfer1::IPluginV2DynamicExt *LogicalNotPlugin::clone() const noexcept {
auto *plugin = new LogicalNotPlugin(*this);
plugin->setPluginNamespace(name_space_.c_str());
return plugin;
}
size_t LogicalNotPlugin::getSerializationSize() const noexcept { return sizeof(schema::PrimitiveType); }
void LogicalNotPlugin::serialize(void *buffer) const noexcept {
SerializeValue(&buffer, &primitive_type_, sizeof(schema::PrimitiveType));
}
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_LogicalNot, LogicalNotTensorRT)
} // namespace mindspore::lite

View File

@ -0,0 +1,78 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_LOGICAL_NOT_TENSORRT_H_
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_LOGICAL_NOT_TENSORRT_H_
#include <string>
#include <vector>
#include "src/runtime/delegate/tensorrt/op/tensorrt_plugin.h"
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
namespace mindspore::lite {
class LogicalNotTensorRT : public TensorRTOp {
public:
LogicalNotTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
const schema::QuantType &quant_type)
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
~LogicalNotTensorRT() override = default;
int AddInnerOp(TensorRTContext *ctx) override;
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) override;
};
constexpr char *LOGICAL_NOT_PLUGIN_NAME{"LogicalNotPlugin"};
class LogicalNotPlugin : public TensorRTPlugin {
public:
LogicalNotPlugin(const std::string name, schema::PrimitiveType primitive_type)
: TensorRTPlugin(name, std::string(LOGICAL_NOT_PLUGIN_NAME)), primitive_type_(primitive_type) {}
LogicalNotPlugin(const char *name, const nvinfer1::PluginFieldCollection *fc)
: TensorRTPlugin(std::string(name), std::string(LOGICAL_NOT_PLUGIN_NAME)) {
const nvinfer1::PluginField *fields = fc->fields;
primitive_type_ = static_cast<const schema::PrimitiveType *>(fields[0].data)[0];
}
LogicalNotPlugin(const char *name, const void *serialData, size_t serialLength)
: TensorRTPlugin(std::string(name), std::string(LOGICAL_NOT_PLUGIN_NAME)) {
DeserializeValue(&serialData, &serialLength, &primitive_type_, sizeof(schema::PrimitiveType));
}
LogicalNotPlugin() = delete;
nvinfer1::IPluginV2DynamicExt *clone() const noexcept override;
int enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc,
const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept override;
size_t getSerializationSize() const noexcept override;
void serialize(void *buffer) const noexcept override;
private:
int RunCudaLogical(const nvinfer1::PluginTensorDesc *inputDesc, const void *const *inputs, void *const *outputs,
cudaStream_t stream);
const std::string layer_name_;
std::string name_space_;
schema::PrimitiveType primitive_type_;
};
class LogicalNotPluginCreater : public TensorRTPluginCreater<LogicalNotPlugin> {
public:
LogicalNotPluginCreater() : TensorRTPluginCreater(std::string(LOGICAL_NOT_PLUGIN_NAME)) {}
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_LOGICAL_NOT_TENSORRT_H_

View File

@ -0,0 +1,129 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <cuda_runtime.h>
#include <numeric>
#include <memory>
#include <vector>
#include <functional>
#include <unordered_map>
#include "src/runtime/delegate/tensorrt/tensorrt_utils.h"
#include "NvInferRuntimeCommon.h"
#include "src/runtime/delegate/tensorrt/op/logical_tensorrt.h"
#include "src/runtime/delegate/tensorrt/cuda_impl/logical.cuh"
namespace mindspore::lite {
int LogicalTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) {
if (!IsShapeKnown()) {
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
return RET_ERROR;
}
if (in_tensors.size() != INPUT_SIZE2) {
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
return RET_ERROR;
}
if (out_tensors.size() != 1) {
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
return RET_ERROR;
}
return RET_OK;
}
int LogicalTensorRT::AddInnerOp(TensorRTContext *ctx) {
if (ctx == nullptr || ctx->network() == nullptr) {
MS_LOG(ERROR) << "network or input tensor is invalid";
return RET_ERROR;
}
for (int i = 0; i != tensorrt_in_tensors_.size(); ++i) {
if (tensorrt_in_tensors_[i].trt_tensor_->getType() != nvinfer1::DataType::kINT32) {
auto cast_layer = ctx->network()->addIdentity(*tensorrt_in_tensors_[0].trt_tensor_);
if (cast_layer == nullptr) {
MS_LOG(ERROR) << "create cast layer failed for: " << op_name_;
return RET_ERROR;
}
cast_layer->setOutputType(0, nvinfer1::DataType::kINT32);
tensorrt_in_tensors_[0].trt_tensor_ = cast_layer->getOutput(0);
}
}
auto plugin = std::make_shared<LogicalPlugin>(op_name_, op_primitive_->value_type());
if (plugin == nullptr) {
MS_LOG(ERROR) << "create ActivationOptPlugin failed for " << op_name_;
return RET_ERROR;
}
nvinfer1::ITensor *inputTensors[] = {tensorrt_in_tensors_[0].trt_tensor_, tensorrt_in_tensors_[1].trt_tensor_};
nvinfer1::IPluginV2Layer *logical_layer = ctx->network()->addPluginV2(inputTensors, 2, *plugin);
this->layer_ = logical_layer;
nvinfer1::ITensor *op_out_tensor = logical_layer->getOutput(0);
if (op_out_tensor == nullptr) {
MS_LOG(ERROR) << "addElementWise out tensor is nullptr.";
return RET_ERROR;
}
op_out_tensor->setName((op_name_ + "_output").c_str());
this->AddInnerOutTensors(
ITensorHelper{op_out_tensor, tensorrt_in_tensors_[0].format_, tensorrt_in_tensors_[0].same_format_});
return RET_OK;
}
REGISTER_TENSORRT_PLUGIN(LogicalPluginCreater);
template class TensorRTPluginCreater<LogicalPlugin>;
template <class T>
nvinfer1::PluginFieldCollection TensorRTPluginCreater<T>::field_collection_{};
template <class T>
std::vector<nvinfer1::PluginField> TensorRTPluginCreater<T>::fields_;
int LogicalPlugin::enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc,
const void *const *inputs, void *const *outputs, void *workspace,
cudaStream_t stream) noexcept {
return RunCudaLogical(inputDesc, inputs, outputs, stream);
}
int LogicalPlugin::RunCudaLogical(const nvinfer1::PluginTensorDesc *inputDesc, const void *const *inputs,
void *const *outputs, cudaStream_t stream) {
switch (primitive_type_) {
case (schema::PrimitiveType_LogicalAnd): {
LogicalAnd(static_cast<const int *>(inputs[0]), static_cast<const int *>(inputs[1]),
static_cast<int *>(outputs[0]), GetDimsVolume(inputDesc[0].dims), stream);
break;
}
case (schema::PrimitiveType_LogicalOr): {
LogicalOr(static_cast<const int *>(inputs[0]), static_cast<const int *>(inputs[1]),
static_cast<int *>(outputs[0]), GetDimsVolume(inputDesc[0].dims), stream);
break;
}
default: {
MS_LOG(ERROR) << "invalid logical type: " << static_cast<int>(primitive_type_);
return RET_ERROR;
}
}
return RET_OK;
}
nvinfer1::IPluginV2DynamicExt *LogicalPlugin::clone() const noexcept {
auto *plugin = new LogicalPlugin(*this);
plugin->setPluginNamespace(name_space_.c_str());
return plugin;
}
size_t LogicalPlugin::getSerializationSize() const noexcept { return sizeof(schema::PrimitiveType); }
void LogicalPlugin::serialize(void *buffer) const noexcept {
SerializeValue(&buffer, &primitive_type_, sizeof(schema::PrimitiveType));
}
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_LogicalOr, LogicalTensorRT)
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_LogicalAnd, LogicalTensorRT)
} // namespace mindspore::lite

View File

@ -0,0 +1,78 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_LOGICAL_PLUGIN_H_
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_LOGICAL_PLUGIN_H_
#include <string>
#include <vector>
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
#include "src/runtime/delegate/tensorrt/op/tensorrt_plugin.h"
namespace mindspore::lite {
class LogicalTensorRT : public TensorRTOp {
public:
LogicalTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
const schema::QuantType &quant_type)
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
~LogicalTensorRT() override = default;
int AddInnerOp(TensorRTContext *ctx) override;
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) override;
};
constexpr char *LOGICAL_PLUGIN_NAME{"LogicalPlugin"};
class LogicalPlugin : public TensorRTPlugin {
public:
LogicalPlugin(const std::string name, schema::PrimitiveType primitive_type)
: TensorRTPlugin(name, std::string(LOGICAL_PLUGIN_NAME)), primitive_type_(primitive_type) {}
LogicalPlugin(const char *name, const nvinfer1::PluginFieldCollection *fc)
: TensorRTPlugin(std::string(name), std::string(LOGICAL_PLUGIN_NAME)) {
const nvinfer1::PluginField *fields = fc->fields;
primitive_type_ = static_cast<const schema::PrimitiveType *>(fields[0].data)[0];
}
LogicalPlugin(const char *name, const void *serialData, size_t serialLength)
: TensorRTPlugin(std::string(name), std::string(LOGICAL_PLUGIN_NAME)) {
DeserializeValue(&serialData, &serialLength, &primitive_type_, sizeof(schema::PrimitiveType));
}
LogicalPlugin() = delete;
nvinfer1::IPluginV2DynamicExt *clone() const noexcept override;
int enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc,
const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept override;
size_t getSerializationSize() const noexcept override;
void serialize(void *buffer) const noexcept override;
private:
int RunCudaLogical(const nvinfer1::PluginTensorDesc *inputDesc, const void *const *inputs, void *const *outputs,
cudaStream_t stream);
const std::string layer_name_;
std::string name_space_;
schema::PrimitiveType primitive_type_;
};
class LogicalPluginCreater : public TensorRTPluginCreater<LogicalPlugin> {
public:
LogicalPluginCreater() : TensorRTPluginCreater(std::string(LOGICAL_PLUGIN_NAME)) {}
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_LOGICAL_PLUGIN_H_

View File

@ -0,0 +1,493 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/delegate/tensorrt/op/lstm_tensorrt.h"
#include "src/runtime/delegate/tensorrt/tensorrt_runtime.h"
namespace mindspore::lite {
int LSTMTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) {
#if TRT_VERSION_GE(7, 0)
if (!IsShapeKnown()) {
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
return RET_ERROR;
}
if (in_tensors.size() < INPUT_TENSOR_SIZE) {
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
return RET_ERROR;
}
if (out_tensors.size() != OUTPUT_TENSOR_SIZE) {
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
return RET_ERROR;
}
mindspore::MSTensor &hidden_in_init = in_tensors_[HIDDEN_IN_TENSOR_INIT];
hidden_init_name_ = hidden_in_init.Name() + "_hidden_init";
mindspore::MSTensor &cell_in_init = in_tensors_[CELL_IN_TENSOR_INIT];
cell_init_name_ = cell_in_init.Name() + "_cell_init";
dynamic_shape_params_.support_dynamic_ = false;
dynamic_shape_params_.support_hw_dynamic_ = false;
return RET_OK;
#else
MS_LOG(WARNING) << "low TensorRT version don't support LSTM op, please upgrade TensorRT version to 7 or higher";
return RET_ERROR;
#endif
}
int LSTMTensorRT::AddInnerOp(TensorRTContext *ctx) {
if (ctx == nullptr || ctx->network() == nullptr) {
MS_LOG(ERROR) << "context or network is invalid";
return RET_ERROR;
}
int input_data_dims_cnt = tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims;
if (input_data_dims_cnt != DIMENSION_3D) {
MS_LOG(ERROR) << "invalid input data shape dims for " << op_name_;
return RET_ERROR;
}
network_ = ctx->network();
int ret = PreProcess();
if (ret != RET_OK) {
MS_LOG(ERROR) << "PreProcess for " << op_name_;
return ret;
}
ret = AddLSTMLayers();
if (ret != RET_OK) {
MS_LOG(ERROR) << "AddLSTMLayers for " << op_name_;
return RET_ERROR;
}
if (op_data_out_ == nullptr) {
MS_LOG(ERROR) << "layers final output tensor is invalid for " << op_name_;
return RET_ERROR;
}
op_data_out_->setName((op_name_ + "_output").c_str());
MS_LOG(DEBUG) << "lstm op_data_out_ " << GetTensorFormat(op_data_out_);
MS_LOG(DEBUG) << "lstm op_hidden_out_ " << GetTensorFormat(op_hidden_out_);
MS_LOG(DEBUG) << "lstm op_cell_out_ " << GetTensorFormat(op_cell_out_);
this->AddInnerOutTensors(ITensorHelper{op_data_out_});
this->AddInnerOutTensors(ITensorHelper{op_hidden_out_});
this->AddInnerOutTensors(ITensorHelper{op_cell_out_});
return RET_OK;
}
int LSTMTensorRT::PreProcess() {
auto ms_input_shape = in_tensors_[0].Shape();
params_.sequence_size_ = ms_input_shape[0];
params_.batch_size_ = ms_input_shape[1];
params_.input_data_size_ = ms_input_shape[INPUT_SIZE_INDEX];
if (params_.batch_size_ != 1) {
MS_LOG(WARNING) << op_name_ << " lstm has batchsize " << params_.batch_size_ << ", needs further verify";
}
// ms: 0 sequence size, 1 batch size, 2 input size -> tensorrt: 0 batch size, 1 sequence size, 2 input size
auto transpose_in_layer = network_->addShuffle(*tensorrt_in_tensors_[0].trt_tensor_);
if (transpose_in_layer == nullptr) {
MS_LOG(ERROR) << "create transpose_in_layer failed for " << op_name_;
return RET_ERROR;
}
nvinfer1::Permutation transpose_perm{{1, 0, INPUT_SIZE_INDEX}};
transpose_in_layer->setFirstTranspose(transpose_perm);
transpose_in_layer->setName((op_name_ + "transpose_in").c_str());
input_data_ = transpose_in_layer->getOutput(0);
MS_LOG(DEBUG) << "lstm input " << GetTensorFormat(input_data_);
auto lstm_op = op_primitive_->value_as_LSTM();
params_.layer_count_ = lstm_op->num_layers() == 0 ? 1 : lstm_op->num_layers();
params_.hidden_size_ = lstm_op->hidden_size();
params_.directional_cnt_ = lstm_op->bidirectional() ? BIDIRECTIONAL : 1;
params_.data_type_ = ConvertDataType(in_tensors_[1].DataType());
return RET_OK;
}
int LSTMTensorRT::AddLSTMLayers() {
mindspore::MSTensor &hidden_in_init = in_tensors_[HIDDEN_IN_TENSOR_INIT];
mindspore::MSTensor &cell_in_init = in_tensors_[CELL_IN_TENSOR_INIT];
nvinfer1::ITensor *data_out{nullptr};
nvinfer1::ITensor *hidden_init = network_->addInput(
hidden_init_name_.c_str(), nvinfer1::DataType::kFLOAT,
nvinfer1::Dims3(params_.layer_count_ * params_.directional_cnt_, params_.batch_size_, params_.hidden_size_));
if (hidden_init == nullptr) {
MS_LOG(ERROR) << "add hidden_init input tensor failed for " << op_name_;
return RET_ERROR;
}
op_binding_tensor_.push_back(BindingHelper{hidden_init_name_, hidden_in_init.MutableData(),
nvinfer1::DataType::kFLOAT, hidden_in_init.DataSize()});
nvinfer1::ITensor *cell_init = network_->addInput(
cell_init_name_.c_str(), nvinfer1::DataType::kFLOAT,
nvinfer1::Dims3(params_.layer_count_ * params_.directional_cnt_, params_.batch_size_, params_.hidden_size_));
if (cell_init == nullptr) {
MS_LOG(ERROR) << "add cell_init input tensor failed for " << op_name_;
return RET_ERROR;
}
op_binding_tensor_.push_back(
BindingHelper{cell_init_name_, cell_in_init.MutableData(), nvinfer1::DataType::kFLOAT, cell_in_init.DataSize()});
sequence_size_input_ =
network_->addInput((op_name_ + "_seq_input").c_str(), nvinfer1::DataType::kINT32, nvinfer1::Dims{});
if (sequence_size_input_ == nullptr) {
MS_LOG(ERROR) << "add sequence_size_input_ input tensor failed for " << op_name_;
return RET_ERROR;
}
op_binding_tensor_.push_back(
BindingHelper{(op_name_ + "_seq_input"), &params_.sequence_size_, nvinfer1::DataType::kINT32, sizeof(int)});
nvinfer1::ITensor *max_sequence_size =
network_->addConstant(nvinfer1::Dims{}, nvinfer1::Weights{nvinfer1::DataType::kINT32, &params_.sequence_size_, 1})
->getOutput(0);
if (max_sequence_size == nullptr) {
MS_LOG(ERROR) << "add max_sequence_size constant tensor failed for " << op_name_;
return RET_ERROR;
}
LstmState next_state{input_data_, nullptr, nullptr}; // init states
std::vector<nvinfer1::ITensor *> hidden_outputs;
std::vector<nvinfer1::ITensor *> cell_outputs;
int input_weight_offset = 0;
int state_weight_offset = 0;
int bias_offset = 0;
if (params_.layer_count_ != 1) {
MS_LOG(WARNING) << op_name_ << " needs verify for layer cnt: " << params_.layer_count_;
}
for (int i = 0; i < params_.layer_count_; i++) {
LstmState layer_input_states[BIDIRECTIONAL];
LstmWeights layer_weights[BIDIRECTIONAL];
layer_weights[0].max_seq_size_ = max_sequence_size;
int ret = ParseLSTMCellInputs(i, hidden_init, cell_init, layer_input_states, &input_weight_offset,
&state_weight_offset, &bias_offset, layer_weights, next_state);
if (ret != RET_OK) {
MS_LOG(ERROR) << "ParseLSTMCellInputs failed for " << op_name_;
return RET_ERROR;
}
data_out = AddLSTMCell(layer_input_states, layer_weights, &next_state);
hidden_outputs.push_back(next_state.hidden_);
cell_outputs.push_back(next_state.cell_);
if (data_out == nullptr || next_state.hidden_ == nullptr || next_state.cell_ == nullptr) {
MS_LOG(ERROR) << "AddLSTMCell failed for " << op_name_;
return RET_ERROR;
}
}
op_hidden_out_ = ConcateAll(hidden_outputs);
if (op_hidden_out_ == nullptr) {
MS_LOG(ERROR) << "concat hidden output failed for " << op_name_;
return RET_ERROR;
}
op_hidden_out_->setName(out_tensors_[OUTPUT_HIDDEN_INDEX].Name().c_str());
op_cell_out_ = ConcateAll(cell_outputs);
if (op_cell_out_ == nullptr) {
MS_LOG(ERROR) << "concat cell output failed for " << op_name_;
return RET_ERROR;
}
op_cell_out_->setName(out_tensors_[OUTPUT_CELL_INDEX].Name().c_str());
op_data_out_ = data_out;
return RET_OK;
}
int LSTMTensorRT::ParseLSTMCellInputs(int layer_index, nvinfer1::ITensor *hidden_init, nvinfer1::ITensor *cell_init,
LstmState *layer_input_states, int *input_weight_offset, int *state_weight_offset,
int *bias_offset, LstmWeights *layer_weights, const LstmState &next_state) {
nvinfer1::Dims2 dim_input_weight(LSTM_GATE_NUM * params_.hidden_size_, params_.input_data_size_);
nvinfer1::Dims2 dim_state_weight(LSTM_GATE_NUM * params_.hidden_size_, params_.hidden_size_);
nvinfer1::Dims dim_bias{1, {LSTM_GATE_NUM * params_.hidden_size_}};
mindspore::MSTensor &input_weight = in_tensors_[INPUT_WEIGHT];
mindspore::MSTensor &state_weight = in_tensors_[STATE_WEIGHT];
mindspore::MSTensor &bias = in_tensors_[BIAS];
nvinfer1::Dims dimW = layer_index == 0 ? dim_input_weight : dim_state_weight;
for (int direction_index = 0; direction_index < params_.directional_cnt_; direction_index++) {
nvinfer1::ITensor *index =
network_
->addConstant(nvinfer1::Dims{},
nvinfer1::Weights{nvinfer1::DataType::kINT32,
&INDICES[layer_index * params_.directional_cnt_ + direction_index], 1})
->getOutput(0);
MS_ASSERT(index);
layer_input_states[direction_index].data_ = next_state.data_;
layer_input_states[direction_index].hidden_ = network_->addGather(*hidden_init, *index, 0)->getOutput(0);
layer_input_states[direction_index].cell_ = network_->addGather(*cell_init, *index, 0)->getOutput(0);
MS_ASSERT(layer_input_states[direction_index].hidden_);
MS_ASSERT(layer_input_states[direction_index].cell_);
// weight order: input, output, forget, cell
if (params_.data_type_ != nvinfer1::DataType::kFLOAT) {
MS_LOG(WARNING) << "more data type need to be done";
return RET_ERROR;
}
const float *input_weight_ptr = static_cast<const float *>(input_weight.Data().get());
const float *state_weight_ptr = static_cast<const float *>(state_weight.Data().get());
const float *bias_ptr = static_cast<const float *>(bias.Data().get());
nvinfer1::Weights slice_input_weight{params_.data_type_, input_weight_ptr + *input_weight_offset,
GetDimsVolume(dimW)};
(*input_weight_offset) += slice_input_weight.count;
nvinfer1::Weights slice_state_weight{params_.data_type_, state_weight_ptr + *state_weight_offset,
GetDimsVolume(dim_state_weight)};
(*state_weight_offset) += slice_state_weight.count;
layer_weights[direction_index].input_weights_ = network_->addConstant(dimW, slice_input_weight)->getOutput(0);
layer_weights[direction_index].state_weights_ =
network_->addConstant(dim_state_weight, slice_state_weight)->getOutput(0);
MS_ASSERT(layer_weights[direction_index].input_weights_);
MS_ASSERT(layer_weights[direction_index].state_weights_);
// bias
nvinfer1::Weights slice_input_bias{params_.data_type_, bias_ptr + *bias_offset, GetDimsVolume(dim_bias)};
(*bias_offset) += slice_input_bias.count;
nvinfer1::Weights slice_state_bias{params_.data_type_, bias_ptr + *bias_offset, GetDimsVolume(dim_bias)};
(*bias_offset) += slice_state_bias.count;
layer_weights[direction_index].input_bias_ = network_->addConstant(dim_bias, slice_input_bias)->getOutput(0);
layer_weights[direction_index].state_bias_ = network_->addConstant(dim_bias, slice_state_bias)->getOutput(0);
MS_ASSERT(layer_weights[direction_index].input_bias_);
MS_ASSERT(layer_weights[direction_index].state_bias_);
}
if (params_.directional_cnt_ == BIDIRECTIONAL) {
layer_weights[1].max_seq_size_ = layer_weights[0].max_seq_size_;
}
return RET_OK;
}
nvinfer1::ITensor *LSTMTensorRT::Reshape(nvinfer1::ITensor *tensor, nvinfer1::Dims dims) {
nvinfer1::IShuffleLayer *shuffle = network_->addShuffle(*tensor);
shuffle->setReshapeDimensions(dims);
return shuffle->getOutput(0);
}
nvinfer1::ITensor *LSTMTensorRT::ConcateAll(std::vector<nvinfer1::ITensor *> all_tensor, int axis) {
if (all_tensor.size() == 1) {
return all_tensor[0];
}
nvinfer1::IConcatenationLayer *concat = network_->addConcatenation(all_tensor.data(), all_tensor.size());
if (concat == nullptr) {
MS_LOG(ERROR) << "addConcatenation failed for " << op_name_;
return nullptr;
}
if (axis >= all_tensor[0]->getDimensions().nbDims) {
MS_LOG(ERROR) << op_name_ << " concat axis is " << axis << ", larger than tensor dims "
<< all_tensor[0]->getDimensions().nbDims;
return nullptr;
}
concat->setAxis(axis);
return concat->getOutput(0);
}
nvinfer1::ITensor *LSTMTensorRT::AddLSTMCell(const LstmState *layer_input_states, const LstmWeights *layer_weights,
LstmState *next_state) {
nvinfer1::ITensor *backward_output = nullptr;
nvinfer1::ITensor *backward_hidden_out = nullptr;
nvinfer1::ITensor *backward_cell_out = nullptr;
nvinfer1::ITensor *forward_hidden_out = nullptr;
nvinfer1::ITensor *forward_cell_out = nullptr;
nvinfer1::ITensor *forward_output =
AddLSTMCalculation(layer_input_states[0], layer_weights[0], &forward_hidden_out, &forward_cell_out);
if (params_.directional_cnt_ == BIDIRECTIONAL) {
backward_output =
AddLSTMCalculation(layer_input_states[1], layer_weights[1], &backward_hidden_out, &backward_cell_out, true);
}
// concate forward and backward
nvinfer1::ITensor *output_tensor = forward_output;
nvinfer1::ITensor *cell_out = forward_cell_out;
nvinfer1::ITensor *hidden_out = forward_hidden_out;
if (backward_output != nullptr && backward_hidden_out != nullptr && backward_cell_out != nullptr) {
nvinfer1::ITensor *output_concat_input[BIDIRECTIONAL] = {forward_output, backward_output};
auto ouput_out_layer = network_->addConcatenation(output_concat_input, BIDIRECTIONAL);
this->layer_ = ouput_out_layer;
if (ouput_out_layer == nullptr) {
MS_LOG(ERROR) << "create one loop output concat failed for " << op_name_;
return nullptr;
}
ouput_out_layer->setAxis(1); // ms: 0 sequence size, 1 layer * direction, 2 batchsize, 3 hidden
output_tensor = ouput_out_layer->getOutput(0);
nvinfer1::ITensor *hidden_concat_input[BIDIRECTIONAL] = {forward_hidden_out, backward_hidden_out};
auto hidden_out_layer = network_->addConcatenation(hidden_concat_input, BIDIRECTIONAL);
hidden_out_layer->setAxis(0);
hidden_out = hidden_out_layer->getOutput(0);
nvinfer1::ITensor *cell_concat_input[BIDIRECTIONAL] = {forward_cell_out, backward_cell_out};
auto cell_out_layer = network_->addConcatenation(cell_concat_input, BIDIRECTIONAL);
cell_out_layer->setAxis(0);
cell_out = cell_out_layer->getOutput(0);
}
if (hidden_out == nullptr || cell_out == nullptr) {
MS_LOG(ERROR) << "get one loop hidden_out and cell_out failed for " << op_name_;
return nullptr;
}
*next_state = LstmState{output_tensor, hidden_out, cell_out};
return output_tensor;
}
nvinfer1::ITensor *LSTMTensorRT::AddLSTMCalculation(const LstmState &input_state, const LstmWeights &lstm_weights,
nvinfer1::ITensor **hidden_out, nvinfer1::ITensor **cell_out,
bool is_backward) {
std::vector<nvinfer1::ITensor *> all_batch_outputs;
std::vector<nvinfer1::ITensor *> all_batch_hidden;
std::vector<nvinfer1::ITensor *> all_batch_cell;
for (int batch_index = 0; batch_index < params_.batch_size_; batch_index++) {
LstmState one_batch_input_state;
nvinfer1::ITensor *batch_index_tensor =
network_->addConstant(nvinfer1::Dims{}, nvinfer1::Weights{nvinfer1::DataType::kINT32, &INDICES[batch_index], 1})
->getOutput(0);
one_batch_input_state.data_ = network_->addGather(*input_state.data_, *batch_index_tensor, 0)->getOutput(0);
one_batch_input_state.hidden_ = network_->addGather(*input_state.hidden_, *batch_index_tensor, 0)->getOutput(0);
one_batch_input_state.cell_ = network_->addGather(*input_state.cell_, *batch_index_tensor, 0)->getOutput(0);
nvinfer1::ITensor *one_batch_hidden = nullptr;
nvinfer1::ITensor *one_batch_cell = nullptr;
nvinfer1::ITensor *one_batch_output =
AddLSTMOneLoop(one_batch_input_state, lstm_weights, &one_batch_hidden, &one_batch_cell, is_backward);
if (one_batch_output == nullptr || one_batch_cell == nullptr || one_batch_hidden == nullptr) {
MS_LOG(ERROR) << "AddLSTMOneLoop failed for " << op_name_ << " at batch index " << batch_index;
return nullptr;
}
all_batch_outputs.push_back(one_batch_output);
all_batch_hidden.push_back(one_batch_hidden);
all_batch_cell.push_back(one_batch_cell);
}
*hidden_out = ConcateAll(all_batch_hidden, 1);
*cell_out = ConcateAll(all_batch_cell, 1);
return ConcateAll(all_batch_outputs, BATCH_SIZE_INDEX);
}
nvinfer1::ITensor *LSTMTensorRT::AddLSTMOneLoop(const LstmState &input_state, const LstmWeights &lstm_weights,
nvinfer1::ITensor **hidden_out, nvinfer1::ITensor **cell_out,
bool is_backward) {
#if TRT_VERSION_GE(7, 0)
nvinfer1::ILoop *sequence_loop = network_->addLoop();
if (sequence_loop == nullptr) {
MS_LOG(ERROR) << "add sequence_loop layer failed for " << op_name_;
return nullptr;
}
std::string loop_name = op_name_ + "_loop" + (is_backward ? "_backward" : "_forward");
sequence_loop->setName(loop_name.c_str());
sequence_loop->addTripLimit(*sequence_size_input_, nvinfer1::TripLimit::kCOUNT);
nvinfer1::ITensor *input = sequence_loop->addIterator(*input_state.data_, 0, is_backward)->getOutput(0);
nvinfer1::ILayer *hidden_mid = sequence_loop->addRecurrence(*input_state.hidden_);
if (hidden_mid == nullptr) {
MS_LOG(ERROR) << "add hidden layer failed for " << op_name_;
return nullptr;
}
nvinfer1::ILayer *cell_mid = sequence_loop->addRecurrence(*input_state.cell_);
if (cell_mid == nullptr) {
MS_LOG(ERROR) << "add cell layer failed for " << op_name_;
return nullptr;
}
nvinfer1::ITensor *input_matmul =
network_
->addMatrixMultiply(*input, nvinfer1::MatrixOperation::kVECTOR, *lstm_weights.input_weights_,
nvinfer1::MatrixOperation::kTRANSPOSE)
->getOutput(0);
nvinfer1::ITensor *hidden_matmul =
network_
->addMatrixMultiply(*hidden_mid->getOutput(0), nvinfer1::MatrixOperation::kVECTOR, *lstm_weights.state_weights_,
nvinfer1::MatrixOperation::kTRANSPOSE)
->getOutput(0);
nvinfer1::ITensor *weights_add =
network_->addElementWise(*input_matmul, *hidden_matmul, nvinfer1::ElementWiseOperation::kSUM)->getOutput(0);
nvinfer1::ITensor *bias =
network_->addElementWise(*lstm_weights.input_bias_, *lstm_weights.state_bias_, nvinfer1::ElementWiseOperation::kSUM)
->getOutput(0);
nvinfer1::ITensor *gates_calculate =
network_->addElementWise(*weights_add, *bias, nvinfer1::ElementWiseOperation::kSUM)->getOutput(0);
const auto isolateGate = [&](nvinfer1::ITensor &gates, int gateIndex) -> nvinfer1::ITensor * {
nvinfer1::ISliceLayer *slice =
network_->addSlice(gates, nvinfer1::Dims{1, {gateIndex * params_.hidden_size_}},
nvinfer1::Dims{1, {params_.hidden_size_}}, nvinfer1::Dims{1, {1}});
return Reshape(slice->getOutput(0), nvinfer1::Dims{1, {params_.hidden_size_}});
};
// weight order: input, output, forget, cell
nvinfer1::ITensor *i =
network_->addActivation(*isolateGate(*gates_calculate, 0), nvinfer1::ActivationType::kSIGMOID)->getOutput(0);
nvinfer1::ITensor *o =
network_->addActivation(*isolateGate(*gates_calculate, 1), nvinfer1::ActivationType::kSIGMOID)->getOutput(0);
nvinfer1::ITensor *f =
network_->addActivation(*isolateGate(*gates_calculate, FORGET_GATE), nvinfer1::ActivationType::kSIGMOID)
->getOutput(0);
nvinfer1::ITensor *c =
network_->addActivation(*isolateGate(*gates_calculate, CELL_GATE), nvinfer1::ActivationType::kTANH)->getOutput(0);
nvinfer1::ITensor *C =
network_
->addElementWise(
*network_->addElementWise(*f, *cell_mid->getOutput(0), nvinfer1::ElementWiseOperation::kPROD)->getOutput(0),
*network_->addElementWise(*i, *c, nvinfer1::ElementWiseOperation::kPROD)->getOutput(0),
nvinfer1::ElementWiseOperation::kSUM)
->getOutput(0);
nvinfer1::ITensor *H =
network_
->addElementWise(*o, *network_->addActivation(*C, nvinfer1::ActivationType::kTANH)->getOutput(0),
nvinfer1::ElementWiseOperation::kPROD)
->getOutput(0);
// Recurrent backedge input for hidden and cell.
cell_mid->setInput(1, *C);
hidden_mid->setInput(1, *H);
// outputs
nvinfer1::LoopOutput output_mode = is_backward ? nvinfer1::LoopOutput::kREVERSE : nvinfer1::LoopOutput::kCONCATENATE;
nvinfer1::ILoopOutputLayer *output_layer = sequence_loop->addLoopOutput(*H, output_mode);
output_layer->setInput(1, *lstm_weights.max_seq_size_);
*hidden_out =
Reshape(sequence_loop->addLoopOutput(*hidden_mid->getOutput(0), nvinfer1::LoopOutput::kLAST_VALUE)->getOutput(0),
nvinfer1::Dims3(1, 1, params_.hidden_size_));
*cell_out =
Reshape(sequence_loop->addLoopOutput(*cell_mid->getOutput(0), nvinfer1::LoopOutput::kLAST_VALUE)->getOutput(0),
nvinfer1::Dims3(1, 1, params_.hidden_size_));
return Reshape(output_layer->getOutput(0), nvinfer1::Dims4(params_.sequence_size_, 1, 1, params_.hidden_size_));
#else
MS_LOG(ERROR) << "low TensorRT version don't support LSTM op, please upgrade TensorRT version to 7 or higher";
return nullptr;
#endif
}
int LSTMTensorRT::Prepare(void **network_tensor_bindings, nvinfer1::ICudaEngine *engine) {
if (op_binding_tensor_.size() == 0) {
MS_LOG(DEBUG) << "unsing serialized engine, add input tensor for " << op_name_;
mindspore::MSTensor &hidden_in_init = in_tensors_[HIDDEN_IN_TENSOR_INIT];
mindspore::MSTensor &cell_in_init = in_tensors_[CELL_IN_TENSOR_INIT];
op_binding_tensor_.push_back(BindingHelper{hidden_init_name_, hidden_in_init.MutableData(),
nvinfer1::DataType::kFLOAT, hidden_in_init.DataSize()});
op_binding_tensor_.push_back(
BindingHelper{cell_init_name_, cell_in_init.MutableData(), nvinfer1::DataType::kFLOAT, cell_in_init.DataSize()});
params_.sequence_size_ = in_tensors_[0].Shape()[0];
op_binding_tensor_.push_back(
BindingHelper{(op_name_ + "_seq_input"), &params_.sequence_size_, nvinfer1::DataType::kINT32, sizeof(int)});
}
for (auto tensor : op_binding_tensor_) {
auto device_ptr = runtime_->GetAllocator()->MallocDeviceMem(tensor.name_, tensor.size_, tensor.data_type_);
if (device_ptr == nullptr) {
MS_LOG(ERROR) << "malloc for inputs tensor device memory failed " << tensor.name_;
return RET_ERROR;
}
int index = engine->getBindingIndex(tensor.name_.c_str());
network_tensor_bindings[index] = device_ptr;
runtime_->GetAllocator()->SyncMemInHostAndDevice(tensor.data_, tensor.name_, tensor.size_, true);
runtime_->GetAllocator()->MarkMemValid(tensor.name_, true);
}
return RET_OK;
}
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_LSTM, LSTMTensorRT)
} // namespace mindspore::lite

View File

@ -0,0 +1,115 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_LSTM_TENSORRT_H_
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_LSTM_TENSORRT_H_
#include <string>
#include <vector>
#include <array>
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
namespace mindspore::lite {
constexpr int INPUT_TENSOR_SIZE = 6;
constexpr int OUTPUT_TENSOR_SIZE = 3;
constexpr int INPUT_WEIGHT = 1;
constexpr int STATE_WEIGHT = 2;
constexpr int BIAS = 3;
constexpr int HIDDEN_IN_TENSOR_INIT = 4;
constexpr int CELL_IN_TENSOR_INIT = 5;
constexpr int LSTM_GATE_NUM = 4;
constexpr int BIDIRECTIONAL = 2;
constexpr int OUTPUT_HIDDEN_INDEX = 1;
constexpr int OUTPUT_CELL_INDEX = 2;
constexpr int INPUT_SIZE_INDEX = 2;
constexpr int FORGET_GATE = 2;
constexpr int CELL_GATE = 3;
constexpr int BATCH_SIZE_INDEX = 2;
static const std::array<int, 4> INDICES{0, 1, 2, 3};
struct LSTMParams {
int sequence_size_;
int input_data_size_;
int batch_size_;
int layer_count_;
int hidden_size_;
nvinfer1::DataType data_type_;
int directional_cnt_;
};
struct LstmState {
nvinfer1::ITensor *data_{nullptr};
nvinfer1::ITensor *hidden_{nullptr};
nvinfer1::ITensor *cell_{nullptr};
};
struct LstmWeights {
nvinfer1::ITensor *input_weights_{nullptr};
nvinfer1::ITensor *state_weights_{nullptr};
nvinfer1::ITensor *input_bias_{nullptr};
nvinfer1::ITensor *state_bias_{nullptr};
nvinfer1::ITensor *max_seq_size_{nullptr};
};
class LSTMTensorRT : public TensorRTOp {
public:
LSTMTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
const schema::QuantType &quant_type)
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
~LSTMTensorRT() override = default;
int AddInnerOp(TensorRTContext *ctx) override;
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) override;
int Prepare(void **network_tensor_bindings, nvinfer1::ICudaEngine *engine) override;
private:
int PreProcess();
int AddLSTMLayers();
nvinfer1::ITensor *AddLSTMCell(const LstmState *layer_input_states, const LstmWeights *layer_weights,
LstmState *next_state);
nvinfer1::ITensor *Reshape(nvinfer1::ITensor *tensor, nvinfer1::Dims dims);
nvinfer1::ITensor *ConcateAll(std::vector<nvinfer1::ITensor *> all_tensort, int axis = 0);
nvinfer1::ITensor *AddLSTMCalculation(const LstmState &input_state, const LstmWeights &lstm_weights,
nvinfer1::ITensor **hidden_out, nvinfer1::ITensor **cell_out,
bool is_backward = false);
nvinfer1::ITensor *AddLSTMOneLoop(const LstmState &input_state, const LstmWeights &lstm_weights,
nvinfer1::ITensor **hidden_out, nvinfer1::ITensor **cell_out,
bool is_backward = false);
int ParseLSTMCellInputs(int layer_index, nvinfer1::ITensor *hidden_init, nvinfer1::ITensor *cell_init,
LstmState *input_state, int *input_weight_offset, int *state_weight_offset, int *bias_offset,
LstmWeights *lstm_weights, const LstmState &next_state);
nvinfer1::INetworkDefinition *network_{nullptr};
nvinfer1::ITensor *input_data_{nullptr};
nvinfer1::ITensor *sequence_size_input_{nullptr};
nvinfer1::ITensor *op_data_out_{nullptr};
nvinfer1::ITensor *op_hidden_out_{nullptr};
nvinfer1::ITensor *op_cell_out_{nullptr};
LSTMParams params_;
std::string hidden_init_name_;
std::string cell_init_name_;
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_LSTM_TENSORRT_H_

View File

@ -0,0 +1,202 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/delegate/tensorrt/op/matmul_opt_plugin.h"
#include <cuda_runtime.h>
#include <numeric>
#include <memory>
#include <functional>
#include "src/runtime/delegate/tensorrt/tensorrt_utils.h"
#include "src/runtime/delegate/tensorrt/cuda_impl/cuda_helper.h"
#include "NvInferRuntimeCommon.h"
namespace mindspore::lite {
REGISTER_TENSORRT_PLUGIN(MatmulOptPluginCreater);
template class TensorRTPluginCreater<MatmulOptPlugin>;
template <class T>
nvinfer1::PluginFieldCollection TensorRTPluginCreater<T>::field_collection_{};
template <class T>
std::vector<nvinfer1::PluginField> TensorRTPluginCreater<T>::fields_;
// MatmulOptPlugin
int MatmulOptPlugin::enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc,
const void *const *inputs, void *const *outputs, void *workspace,
cudaStream_t stream) noexcept {
CHECK_NULL_RETURN(cublas_handle_);
CUBLAS_CHECK(cublasSetStream(cublas_handle_, stream));
const nvinfer1::PluginTensorDesc desc_a = inputDesc[0];
const nvinfer1::PluginTensorDesc desc_b = inputDesc[1];
const nvinfer1::PluginTensorDesc desc_c = outputDesc[0];
if (desc_a.dims.nbDims == DIMENSION_2D) {
// a: m * k, b: k * n, c: m * n
int m = desc_c.dims.d[0];
int n = desc_c.dims.d[1];
int k = b_trans_ ? desc_b.dims.d[1] : desc_b.dims.d[0];
const int mm_params[]{m, n, k};
CublasMM1Batch(inputs[0], inputs[1], outputs[0], mm_params, operations_, data_types_, cublas_handle_);
} else if (desc_a.dims.nbDims == DIMENSION_3D) {
return RunBatchedMatmul(inputDesc, outputDesc, inputs, outputs, workspace, stream);
} else {
MS_LOG(ERROR) << layer_name_ << " input dims needs check a: " << desc_a.dims.nbDims;
return RET_ERROR;
}
return RET_OK;
}
int MatmulOptPlugin::RunBatchedMatmul(const nvinfer1::PluginTensorDesc *inputDesc,
const nvinfer1::PluginTensorDesc *outputDesc, const void *const *inputs,
void *const *outputs, void *workspace, cudaStream_t stream) {
const nvinfer1::PluginTensorDesc desc_b = inputDesc[1];
const nvinfer1::PluginTensorDesc desc_c = outputDesc[0];
int batch = desc_c.dims.d[0];
int m = desc_c.dims.d[1];
int n = desc_c.dims.d[DIMENSION_2D];
int k = b_trans_ ? desc_b.dims.d[DIMENSION_2D] : desc_b.dims.d[1];
const int mm_params[]{m, n, k, batch};
for (int i = 0; i < batch; i++) {
a_addrs_[i] = inputs[0] + i * m * k * sizeof(float);
b_addrs_[i] = inputs[1] + i * k * n * sizeof(float);
c_addrs_[i] = outputs[0] + i * m * n * sizeof(float);
}
int data_size = batch * sizeof(void *);
int max_batchsize = a_addrs_.size();
if (a_device_addrs_ == nullptr) {
CUDA_CHECK(cudaMalloc(&a_device_addrs_, sizeof(void *) * max_batchsize));
}
if (b_device_addrs_ == nullptr) {
CUDA_CHECK(cudaMalloc(&b_device_addrs_, sizeof(void *) * max_batchsize));
}
if (c_device_addrs_ == nullptr) {
CUDA_CHECK(cudaMalloc(&c_device_addrs_, sizeof(void *) * max_batchsize));
}
CUDA_CHECK(cudaMemcpy(a_device_addrs_, a_addrs_.data(), data_size, cudaMemcpyHostToDevice));
CUDA_CHECK(cudaMemcpy(b_device_addrs_, b_addrs_.data(), data_size, cudaMemcpyHostToDevice));
CUDA_CHECK(cudaMemcpy(c_device_addrs_, c_addrs_.data(), data_size, cudaMemcpyHostToDevice));
CublasMMBatched(a_device_addrs_, b_device_addrs_, c_device_addrs_, mm_params, operations_, data_types_,
cublas_handle_);
return RET_OK;
}
nvinfer1::IPluginV2DynamicExt *MatmulOptPlugin::clone() const noexcept {
auto *plugin = new MatmulOptPlugin(*this);
plugin->setPluginNamespace(name_space_.c_str());
return plugin;
}
nvinfer1::DimsExprs MatmulOptPlugin::getOutputDimensions(int outputIndex, const nvinfer1::DimsExprs *inputs,
int nbInputs, nvinfer1::IExprBuilder &exprBuilder) noexcept {
nvinfer1::DimsExprs out_dims{};
if (nbInputs != INPUT_SIZE2 && nbInputs != INPUT_SIZE3) {
MS_LOG(ERROR) << "invalid input size " << nbInputs << " of " << layer_name_;
return out_dims;
}
out_dims.nbDims = inputs[0].nbDims;
if (out_dims.nbDims == DIMENSION_2D) {
out_dims.d[0] = a_trans_ ? inputs[0].d[1] : inputs[0].d[0];
out_dims.d[1] = b_trans_ ? inputs[1].d[0] : inputs[1].d[1];
return out_dims;
} else if (out_dims.nbDims == DIMENSION_3D) {
out_dims.d[0] = inputs[0].d[0];
out_dims.d[1] = a_trans_ ? inputs[0].d[DIMENSION_2D] : inputs[0].d[1];
out_dims.d[DIMENSION_2D] = b_trans_ ? inputs[1].d[1] : inputs[1].d[DIMENSION_2D];
return out_dims;
}
MS_LOG(ERROR) << "invalid input dims " << out_dims.nbDims << " of " << layer_name_;
return out_dims;
}
void MatmulOptPlugin::configurePlugin(const nvinfer1::DynamicPluginTensorDesc *in, int nbInputs,
const nvinfer1::DynamicPluginTensorDesc *out, int nbOutputs) noexcept {
operations_[0] = a_trans_ ? CUBLAS_OP_T : CUBLAS_OP_N;
operations_[1] = b_trans_ ? CUBLAS_OP_T : CUBLAS_OP_N;
data_types_[0] = ConvertDataType(in[0].desc.type); // input a
data_types_[1] = ConvertDataType(in[1].desc.type); // input b
data_types_[THIRD_INPUT] = ConvertDataType(out[0].desc.type); // output c
data_types_[FOURTH_INPUT] =
(in[0].desc.type == nvinfer1::DataType::kHALF || in[1].desc.type == nvinfer1::DataType::kHALF)
? CUDA_R_16F
: CUDA_R_32F; // compute type
if (in[0].max.nbDims == DIMENSION_3D) {
int max_batchsize = in[0].max.d[0];
a_addrs_.resize(max_batchsize);
b_addrs_.resize(max_batchsize);
c_addrs_.resize(max_batchsize);
if (a_device_addrs_ == nullptr) {
CUDA_CHECK_VOID(cudaMalloc(&a_device_addrs_, sizeof(void *) * max_batchsize));
}
if (b_device_addrs_ == nullptr) {
CUDA_CHECK_VOID(cudaMalloc(&b_device_addrs_, sizeof(void *) * max_batchsize));
}
if (c_device_addrs_ == nullptr) {
CUDA_CHECK_VOID(cudaMalloc(&c_device_addrs_, sizeof(void *) * max_batchsize));
}
}
}
int MatmulOptPlugin::initialize() noexcept {
if (cublas_handle_ == nullptr) {
CUBLAS_CHECK(cublasCreate(&cublas_handle_));
}
for (int i = 0; i < DIMENSION_4D; i++) {
if (data_types_[i] != CUDA_R_32F) {
MS_LOG(ERROR) << layer_name_ << " only support fp32";
return RET_ERROR;
}
}
}
void MatmulOptPlugin::terminate() noexcept {
if (cublas_handle_ != nullptr) {
auto cublas_ret = cublasDestroy(cublas_handle_);
if (cublas_ret != CUBLAS_STATUS_SUCCESS) {
MS_LOG(ERROR) << "cublasDestroy failed: " << cublas_ret;
} else {
cublas_handle_ = nullptr;
}
}
cudaError_t err;
if (a_device_addrs_ != nullptr) {
err = cudaFree(a_device_addrs_);
if (err != cudaSuccess) {
MS_LOG(ERROR) << layer_name_ << " free cuda device mem failed " << err;
}
a_device_addrs_ = nullptr;
}
if (b_device_addrs_ != nullptr) {
err = cudaFree(b_device_addrs_);
if (err != cudaSuccess) {
MS_LOG(ERROR) << layer_name_ << " free cuda device mem failed " << err;
}
b_device_addrs_ = nullptr;
}
if (c_device_addrs_ != nullptr) {
err = cudaFree(c_device_addrs_);
if (err != cudaSuccess) {
MS_LOG(ERROR) << layer_name_ << " free cuda device mem failed " << err;
}
c_device_addrs_ = nullptr;
}
}
size_t MatmulOptPlugin::getSerializationSize() const noexcept { return 2 * sizeof(bool); }
void MatmulOptPlugin::serialize(void *buffer) const noexcept {
SerializeValue(&buffer, &a_trans_, sizeof(bool));
SerializeValue(&buffer, &b_trans_, sizeof(bool));
}
} // namespace mindspore::lite

View File

@ -0,0 +1,80 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_MATMUL_OPT_PLUGIN_H_
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_MATMUL_OPT_PLUGIN_H_
#include <string>
#include <vector>
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
#include "src/runtime/delegate/tensorrt/op/tensorrt_plugin.h"
#include "src/runtime/delegate/tensorrt/cuda_impl/cublas_utils.h"
namespace mindspore::lite {
constexpr char *MATMUL_OPT_PLUGIN_NAME{"MatmulOptPlugin"};
class MatmulOptPlugin : public TensorRTPlugin {
public:
MatmulOptPlugin(const std::string name, bool a_trans, bool b_trans, uint32_t device_id)
: TensorRTPlugin(name, std::string(MATMUL_OPT_PLUGIN_NAME), device_id), a_trans_(a_trans), b_trans_(b_trans) {}
MatmulOptPlugin(const char *name, const nvinfer1::PluginFieldCollection *fc)
: TensorRTPlugin(std::string(name), std::string(MATMUL_OPT_PLUGIN_NAME)) {
const nvinfer1::PluginField *fields = fc->fields;
a_trans_ = static_cast<const bool *>(fields[0].data)[0];
b_trans_ = static_cast<const bool *>(fields[1].data)[0];
}
MatmulOptPlugin(const char *name, const void *serialData, size_t serialLength)
: TensorRTPlugin(std::string(name), std::string(MATMUL_OPT_PLUGIN_NAME)) {
DeserializeValue(&serialData, &serialLength, &a_trans_, sizeof(bool));
DeserializeValue(&serialData, &serialLength, &b_trans_, sizeof(bool));
}
MatmulOptPlugin() = delete;
// IPluginV2DynamicExt Methods
nvinfer1::IPluginV2DynamicExt *clone() const noexcept override;
nvinfer1::DimsExprs getOutputDimensions(int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs,
nvinfer1::IExprBuilder &exprBuilder) noexcept override;
void configurePlugin(const nvinfer1::DynamicPluginTensorDesc *in, int nbInputs,
const nvinfer1::DynamicPluginTensorDesc *out, int nbOutputs) noexcept override;
int enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc,
const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept override;
int initialize() noexcept override;
void terminate() noexcept override;
size_t getSerializationSize() const noexcept override;
void serialize(void *buffer) const noexcept override;
private:
int RunBatchedMatmul(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc,
const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream);
bool a_trans_{false};
bool b_trans_{false};
cublasHandle_t cublas_handle_{nullptr};
cublasOperation_t operations_[2]{CUBLAS_OP_N, CUBLAS_OP_N};
cudaDataType data_types_[4]{CUDA_R_32F, CUDA_R_32F, CUDA_R_32F, CUDA_R_32F};
std::vector<const void *> a_addrs_;
std::vector<const void *> b_addrs_;
std::vector<void *> c_addrs_;
void **a_device_addrs_{nullptr};
void **b_device_addrs_{nullptr};
void **c_device_addrs_{nullptr};
};
class MatmulOptPluginCreater : public TensorRTPluginCreater<MatmulOptPlugin> {
public:
MatmulOptPluginCreater() : TensorRTPluginCreater(std::string(MATMUL_OPT_PLUGIN_NAME)) {}
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_MATMUL_OPT_PLUGIN_H_

View File

@ -0,0 +1,310 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/delegate/tensorrt/op/matmul_tensorrt.h"
#include <memory>
#include "src/runtime/delegate/tensorrt/tensorrt_utils.h"
#include "src/runtime/delegate/tensorrt/op/activation_tensorrt.h"
#include "src/runtime/delegate/tensorrt/op/matmul_opt_plugin.h"
#include "src/runtime/delegate/tensorrt/tensorrt_runtime.h"
namespace mindspore::lite {
MatMulTensorRT::~MatMulTensorRT() {
if (weight_ptr_ != nullptr) {
free(weight_ptr_);
weight_ptr_ = nullptr;
}
}
int MatMulTensorRT::IsSupport(const mindspore::schema::Primitive *primitive,
const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) {
if (!IsShapeKnown()) {
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
return RET_ERROR;
}
if (in_tensors.size() != INPUT_SIZE2 && in_tensors.size() != INPUT_SIZE3) {
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
return RET_ERROR;
}
if (out_tensors.size() != 1) {
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
return RET_ERROR;
}
return RET_OK;
}
int MatMulTensorRT::AddInnerOp(TensorRTContext *ctx) {
if (type_ == schema::PrimitiveType_MatMulFusion) {
auto primitive = this->GetPrimitive()->value_as_MatMulFusion();
if (primitive == nullptr) {
MS_LOG(ERROR) << "convert to primitive matmul failed for " << op_name_;
return RET_ERROR;
}
transpose_a_ = primitive->transpose_a();
transpose_b_ = primitive->transpose_b();
activation_ = primitive->activation_type();
}
nvinfer1::ITensor *out_tensor = nullptr;
if (RunOptPlugin()) {
out_tensor = AddAsOptPlugin(ctx);
} else if (RunFullConnect()) {
MS_LOG(DEBUG) << "use fully connected instead of matmul for " << op_name_;
out_tensor = AddAsFullConnect(ctx);
} else {
MS_LOG(DEBUG) << "use origin tensorrt matmul for " << op_name_;
out_tensor = AddAsMatmul(ctx);
}
if (out_tensor == nullptr) {
MS_LOG(ERROR) << "add matmul failed for " << op_name_;
return RET_ERROR;
}
// add activation
if (activation_ != schema::ActivationType::ActivationType_NO_ACTIVATION) {
nvinfer1::ILayer *activation_layer =
ActivationTensorRT::AddActivation(ctx, activation_, 0, 0, 0, out_tensor, device_id_);
if (activation_layer == nullptr) {
MS_LOG(ERROR) << "addActivation for matmul failed";
return RET_ERROR;
}
activation_layer->setName((op_name_ + "_activation").c_str());
out_tensor = activation_layer->getOutput(0);
}
out_tensor->setName((op_name_ + "_output").c_str());
MS_LOG(DEBUG) << "output " << GetTensorFormat(out_tensor, out_format_, true);
this->AddInnerOutTensors(ITensorHelper{out_tensor, out_format_});
return RET_OK;
}
int MatMulTensorRT::PreprocessMatMulInputs(TensorRTContext *ctx, ITensorHelper *matmul_a, ITensorHelper *matmul_b) {
if (tensorrt_in_tensors_.size() == INPUT_SIZE2) {
int a_index =
GetDimsVolume(tensorrt_in_tensors_[0].trt_tensor_->getDimensions()) == GetDimsVolume(in_tensors_[0].Shape()) ? 0
: 1;
int ret = PreprocessInputs2SameDim(ctx, tensorrt_in_tensors_[a_index], matmul_a);
ret += PreprocessInputs2SameDim(ctx, tensorrt_in_tensors_[1 - a_index], matmul_b);
if (ret != RET_OK || matmul_a->trt_tensor_ == nullptr || matmul_b->trt_tensor_ == nullptr) {
MS_LOG(ERROR) << "PreprocessInputs2SameDim of matmul inputs failed for " << op_name_;
return ret;
}
out_format_ = matmul_a->format_;
if (matmul_a->format_ != matmul_b->format_) {
MS_LOG(WARNING) << "matmul input tensor has different format " << op_name_;
out_format_ = Format::NHWC;
}
} else if (tensorrt_in_tensors_.size() == 1) {
auto weight = ProcessWeightTensor(ctx);
if (weight == nullptr) {
MS_LOG(ERROR) << "create constant weight tensor failed for " << op_name_;
return RET_ERROR;
}
int weight_index = in_tensors_[1].Data() != nullptr ? 1 : 0;
ITensorHelper *weight_helper = (weight_index == 1) ? matmul_b : matmul_a;
ITensorHelper *var_helper = (weight_index == 1) ? matmul_a : matmul_b;
weight_helper->trt_tensor_ = weight;
int ret = PreprocessInputs2SameDim(ctx, tensorrt_in_tensors_[1 - weight_index], var_helper);
if (ret != RET_OK || var_helper->trt_tensor_ == nullptr) {
MS_LOG(ERROR) << "PreprocessInputs2SameDim of matmul input var_helper failed for " << op_name_;
return ret;
}
out_format_ = var_helper->format_;
} else {
MS_LOG(ERROR) << op_name_ << " tensorrt in tensor size is invalid " << tensorrt_in_tensors_.size();
return RET_ERROR;
}
return RET_OK;
}
nvinfer1::ITensor *MatMulTensorRT::ProcessWeightTensor(TensorRTContext *ctx) {
nvinfer1::ITensor *weight = nullptr;
int weight_index = in_tensors_[1].Data() != nullptr ? 1 : 0;
if (in_tensors_[weight_index].Shape().size() <
static_cast<size_t>(tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims)) {
std::vector<int64_t> expect_shape(in_tensors_[1 - weight_index].Shape().size(), 1);
auto origin_shape = in_tensors_[weight_index].Shape();
for (int i = 0; i < origin_shape.size(); i++) {
expect_shape[expect_shape.size() - 1 - i] = origin_shape[origin_shape.size() - 1 - i];
}
weight = ConvertTensorWithExpandDims(ctx, in_tensors_[weight_index], expect_shape, op_name_);
} else if (in_tensors_[weight_index].Shape().size() ==
static_cast<size_t>(tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims)) {
weight = ConvertConstantTensor(ctx, in_tensors_[weight_index], op_name_);
} else {
MS_LOG(ERROR) << "input tensor shape is invalid for " << op_name_;
return nullptr;
}
return weight;
}
nvinfer1::ITensor *MatMulTensorRT::AddAsMatmul(TensorRTContext *ctx) {
ITensorHelper matmul_a;
ITensorHelper matmul_b;
int ret = PreprocessMatMulInputs(ctx, &matmul_a, &matmul_b);
if (ret != RET_OK || matmul_a.trt_tensor_ == nullptr || matmul_b.trt_tensor_ == nullptr) {
MS_LOG(ERROR) << "PreprocessMatMulInputs matmul failed for " << op_name_;
return nullptr;
}
MS_LOG(DEBUG) << "matmul input a " << GetTensorFormat(matmul_a);
MS_LOG(DEBUG) << "matmul input b " << GetTensorFormat(matmul_b);
auto matmul_layer = ctx->network()->addMatrixMultiply(
*matmul_a.trt_tensor_, transpose_a_ ? nvinfer1::MatrixOperation::kTRANSPOSE : nvinfer1::MatrixOperation::kNONE,
*matmul_b.trt_tensor_, transpose_b_ ? nvinfer1::MatrixOperation::kTRANSPOSE : nvinfer1::MatrixOperation::kNONE);
if (matmul_layer == nullptr) {
MS_LOG(ERROR) << "addMatrixMultiply failed for " << op_name_;
return nullptr;
}
this->layer_ = matmul_layer;
matmul_layer->setName(op_name_.c_str());
return AddBias(ctx, matmul_layer->getOutput(0));
}
nvinfer1::ITensor *MatMulTensorRT::AddAsFullConnect(TensorRTContext *ctx) {
nvinfer1::Weights weight;
nvinfer1::Weights bias = ConvertWeight(in_tensors_[kBiasIndex]);
nvinfer1::ITensor *input_a = tensorrt_in_tensors_[0].trt_tensor_;
out_format_ = tensorrt_in_tensors_[0].format_;
if (input_a->getDimensions().nbDims != DIMENSION_4D) {
nvinfer1::Dims in_dims(input_a->getDimensions());
in_dims.nbDims = DIMENSION_4D;
for (int i = input_a->getDimensions().nbDims; i < DIMENSION_4D; i++) {
in_dims.d[i] = 1;
}
input_a = Reshape(ctx, input_a, in_dims);
if (input_a == nullptr) {
MS_LOG(ERROR) << "reshape input failed for " << op_name_;
return nullptr;
}
MS_LOG(DEBUG) << "full connect expand input a to " << GetTensorFormat(input_a);
} else {
ITensorHelper tmp_input;
int ret = PreprocessInputs2SameDim(ctx, tensorrt_in_tensors_[0], &tmp_input);
if (ret != RET_OK || tmp_input.trt_tensor_ == nullptr) {
MS_LOG(ERROR) << "rPreprocessInputs2SameDim failed for " << op_name_;
return nullptr;
}
input_a = tmp_input.trt_tensor_;
out_format_ = tmp_input.format_;
MS_LOG(DEBUG) << "full connect preprocess input a to " << GetTensorFormat(tmp_input);
}
if (!transpose_b_) {
// transpose weight
weight = TransposeWeight2D(in_tensors_[1], &weight_ptr_);
if (weight.values == nullptr || weight_ptr_ == nullptr) {
MS_LOG(ERROR) << "TransposeWeight2D input weight failed for " << op_name_;
return nullptr;
}
} else {
weight = ConvertWeight(in_tensors_[1]);
}
int output_cnt = in_tensors_[kBiasIndex].Shape()[0];
auto fc_layer = ctx->network()->addFullyConnected(*input_a, output_cnt, weight, bias);
if (fc_layer == nullptr) {
MS_LOG(ERROR) << "add fully connected layer failed for " << op_name_;
return nullptr;
}
this->layer_ = fc_layer;
fc_layer->setName((op_name_ + "_fullyconnected").c_str());
nvinfer1::ITensor *out_tensor = fc_layer->getOutput(0);
if (out_tensor->getDimensions().nbDims != out_tensors_[0].Shape().size()) {
std::vector<int64_t> out_dims(out_tensors_[0].Shape());
out_dims[0] = out_tensor->getDimensions().d[0];
out_tensor = Reshape(ctx, out_tensor, out_dims);
}
return out_tensor;
}
nvinfer1::ITensor *MatMulTensorRT::AddAsOptPlugin(TensorRTContext *ctx) {
nvinfer1::ITensor *weight_tensor = nullptr;
if (tensorrt_in_tensors_.size() >= INPUT_SIZE2) {
weight_tensor = tensorrt_in_tensors_[1].trt_tensor_;
} else {
weight_tensor = ConvertConstantTensor(ctx, in_tensors_[1], op_name_);
}
auto plugin = std::make_shared<MatmulOptPlugin>(op_name_, transpose_a_, transpose_b_, device_id_);
if (plugin == nullptr) {
MS_LOG(ERROR) << "create MatmulOptPlugin failed for " << op_name_;
return nullptr;
}
nvinfer1::ITensor *inputTensors[] = {tensorrt_in_tensors_[0].trt_tensor_, weight_tensor};
nvinfer1::IPluginV2Layer *matmul_layer = ctx->network()->addPluginV2(inputTensors, INPUT_SIZE2, *plugin);
if (matmul_layer == nullptr) {
MS_LOG(ERROR) << "add matmul opt plugin layer failed for " << op_name_;
return nullptr;
}
layer_ = matmul_layer;
return AddBias(ctx, matmul_layer->getOutput(0));
}
nvinfer1::ITensor *MatMulTensorRT::AddBias(TensorRTContext *ctx, nvinfer1::ITensor *input_tensor) {
nvinfer1::ITensor *out_tensor = input_tensor;
if (in_tensors_.size() == kBiasIndex + 1) {
nvinfer1::ITensor *bias = nullptr;
if (in_tensors_[kBiasIndex].Shape().size() < static_cast<size_t>(out_tensor->getDimensions().nbDims)) {
std::vector<int64_t> expect_dims(out_tensors_[0].Shape());
expect_dims[0] = out_tensor->getDimensions().d[0];
bias = ConvertTensorWithExpandDims(ctx, in_tensors_[kBiasIndex], expect_dims, op_name_);
} else if (in_tensors_[kBiasIndex].Shape().size() == static_cast<size_t>(out_tensor->getDimensions().nbDims)) {
bias = ConvertConstantTensor(ctx, in_tensors_[kBiasIndex], op_name_);
} else {
MS_LOG(ERROR) << "input tensor shape is invalid for " << op_name_;
return nullptr;
}
if (bias == nullptr) {
MS_LOG(ERROR) << "create constant bias tensor failed for " << op_name_;
return nullptr;
}
auto bias_layer = ctx->network()->addElementWise(*out_tensor, *bias, nvinfer1::ElementWiseOperation::kSUM);
if (bias_layer == nullptr) {
MS_LOG(ERROR) << "add bias add layer failed for " << op_name_;
return nullptr;
}
auto bias_layer_name = op_name_ + "_bias";
bias_layer->setName(bias_layer_name.c_str());
out_tensor = bias_layer->getOutput(0);
}
return out_tensor;
}
bool MatMulTensorRT::RunOptPlugin() {
if (quant_type_ == schema::QuantType_QUANT_NONE &&
runtime_->GetRuntimePrecisionMode() == RuntimePrecisionMode::RuntimePrecisionMode_FP32) {
if (in_tensors_[0].Shape().size() == DIMENSION_2D && in_tensors_[1].Shape().size() == DIMENSION_2D &&
in_tensors_[0].Shape()[0] > 1 && tensorrt_in_tensors_[0].trt_tensor_->getDimensions().d[0] == -1) {
MS_LOG(INFO) << op_name_ << " uses optimize matmul plugin for 2D dynamic batchsize";
return true;
} else if (in_tensors_[0].Shape().size() == DIMENSION_3D && in_tensors_[1].Shape().size() == DIMENSION_3D) {
// batched matmul using opt
MS_LOG(INFO) << op_name_ << " uses optimize matmul plugin for 3D batchsized";
return true;
}
}
return false;
}
bool MatMulTensorRT::RunFullConnect() {
if (in_tensors_.size() == INPUT_SIZE3 && in_tensors_[1].Data() != nullptr &&
in_tensors_[kBiasIndex].Data() != nullptr && !transpose_a_ && in_tensors_[1].Shape().size() == DIMENSION_2D &&
(in_tensors_[0].Shape().size() == DIMENSION_2D || in_tensors_[0].Shape().size() == DIMENSION_4D)) {
return true;
}
return false;
}
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_MatMulFusion, MatMulTensorRT)
} // namespace mindspore::lite

View File

@ -0,0 +1,62 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_MATMUL_TENSORRT_H_
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_MATMUL_TENSORRT_H_
#include <utility>
#include <string>
#include <vector>
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
namespace mindspore::lite {
class MatMulTensorRT : public TensorRTOp {
public:
MatMulTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
const schema::QuantType &quant_type)
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
~MatMulTensorRT() override;
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) override;
int AddInnerOp(TensorRTContext *ctx) override;
private:
int PreprocessMatMulInputs(TensorRTContext *ctx, ITensorHelper *matmul_a, ITensorHelper *matmul_b);
nvinfer1::ITensor *ProcessWeightTensor(TensorRTContext *ctx);
nvinfer1::ITensor *AddAsMatmul(TensorRTContext *ctx);
nvinfer1::ITensor *AddAsFullConnect(TensorRTContext *ctx);
nvinfer1::ITensor *AddAsOptPlugin(TensorRTContext *ctx);
nvinfer1::ITensor *AddBias(TensorRTContext *ctx, nvinfer1::ITensor *input_tensor);
bool RunOptPlugin();
bool RunFullConnect();
bool transpose_a_{false};
bool transpose_b_{false};
Format out_format_{Format::NHWC};
schema::ActivationType activation_{schema::ActivationType::ActivationType_NO_ACTIVATION};
void *weight_ptr_{nullptr};
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_MATMUL_TENSORRT_H_

View File

@ -0,0 +1,59 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/delegate/tensorrt/op/normalize_opt_plugin.h"
#include <cuda_runtime.h>
#include <numeric>
#include <memory>
#include <functional>
#include "src/runtime/delegate/tensorrt/cuda_impl/cuda_helper.h"
#include "NvInferRuntimeCommon.h"
#include "src/runtime/delegate/tensorrt/cuda_impl/normalize.cuh"
namespace mindspore::lite {
REGISTER_TENSORRT_PLUGIN(NormalizeOptPluginCreater);
template class TensorRTPluginCreater<NormalizeOptPlugin>;
template <class T>
nvinfer1::PluginFieldCollection TensorRTPluginCreater<T>::field_collection_{};
template <class T>
std::vector<nvinfer1::PluginField> TensorRTPluginCreater<T>::fields_;
int NormalizeOptPlugin::enqueue(const nvinfer1::PluginTensorDesc *inputDesc,
const nvinfer1::PluginTensorDesc *outputDesc, const void *const *inputs,
void *const *outputs, void *workspace, cudaStream_t stream) noexcept {
auto input = static_cast<const float *>(inputs[0]);
auto gamma = static_cast<const float *>(inputs[1]);
auto beta = static_cast<const float *>(inputs[2]);
auto output = static_cast<float *>(outputs[0]);
auto input_dims = inputDesc[0].dims;
size_t dim_at_axis = input_dims.d[axis_];
int element_cnt = std::accumulate(input_dims.d, input_dims.d + input_dims.nbDims, 1, std::multiplies<int64_t>());
Normalize(input, gamma, beta, output, dim_at_axis, epsilion_, element_cnt, stream);
}
nvinfer1::IPluginV2DynamicExt *NormalizeOptPlugin::clone() const noexcept {
auto *plugin = new NormalizeOptPlugin(*this);
plugin->setPluginNamespace(name_space_.c_str());
return plugin;
}
size_t NormalizeOptPlugin::getSerializationSize() const noexcept { return sizeof(size_t) + sizeof(float); }
void NormalizeOptPlugin::serialize(void *buffer) const noexcept {
SerializeValue(&buffer, &axis_, sizeof(size_t));
SerializeValue(&buffer, &epsilion_, sizeof(float));
}
} // namespace mindspore::lite

View File

@ -0,0 +1,61 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_NORMALIZE_OPT_PLUGIN_H_
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_NORMALIZE_OPT_PLUGIN_H_
#include <string>
#include <vector>
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
#include "src/runtime/delegate/tensorrt/op/tensorrt_plugin.h"
namespace mindspore::lite {
constexpr char *NORMALIZE_OPT_PLUGIN_NAME{"NormalizeOptPlugin"};
class NormalizeOptPlugin : public TensorRTPlugin {
public:
NormalizeOptPlugin(const std::string name, size_t axis, float epsilion, uint32_t device_id)
: TensorRTPlugin(name, std::string(NORMALIZE_OPT_PLUGIN_NAME), device_id), axis_(axis), epsilion_(epsilion) {}
NormalizeOptPlugin(const char *name, const nvinfer1::PluginFieldCollection *fc)
: TensorRTPlugin(std::string(name), std::string(NORMALIZE_OPT_PLUGIN_NAME)) {
const nvinfer1::PluginField *fields = fc->fields;
axis_ = static_cast<const size_t *>(fields[0].data)[0];
epsilion_ = static_cast<const float *>(fields[1].data)[0];
}
NormalizeOptPlugin(const char *name, const void *serialData, size_t serialLength)
: TensorRTPlugin(std::string(name), std::string(NORMALIZE_OPT_PLUGIN_NAME)) {
DeserializeValue(&serialData, &serialLength, &axis_, sizeof(size_t));
DeserializeValue(&serialData, &serialLength, &epsilion_, sizeof(float));
}
NormalizeOptPlugin() = delete;
// IPluginV2DynamicExt Methods
nvinfer1::IPluginV2DynamicExt *clone() const noexcept override;
int enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc,
const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept override;
size_t getSerializationSize() const noexcept override;
void serialize(void *buffer) const noexcept override;
private:
size_t axis_{0};
float epsilion_{0.0f};
};
class NormalizeOptPluginCreater : public TensorRTPluginCreater<NormalizeOptPlugin> {
public:
NormalizeOptPluginCreater() : TensorRTPluginCreater(std::string(NORMALIZE_OPT_PLUGIN_NAME)) {}
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_OP_MATMUL_OPT_PLUGIN_H_

View File

@ -0,0 +1,178 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/delegate/tensorrt/op/normalize_tensorrt.h"
#include <functional>
#include <memory>
#include <numeric>
#include "src/runtime/delegate/tensorrt/op/normalize_opt_plugin.h"
namespace mindspore::lite {
int NormalizeTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) {
if (!IsShapeKnown()) {
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
return RET_ERROR;
}
if (in_tensors.size() != INPUT_SIZE3 && in_tensors.size() != 1) {
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
return RET_ERROR;
}
if (out_tensors.size() != INPUT_SIZE3 && out_tensors.size() != 1) {
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << in_tensors.size();
return RET_ERROR;
}
auto norm_op = primitive->value_as_LayerNormFusion();
CHECK_NULL_RETURN(norm_op);
int being_norm_axis = norm_op->begin_norm_axis();
being_norm_axis = being_norm_axis >= 0 ? being_norm_axis : in_tensors[0].Shape().size() + being_norm_axis;
int begin_params_axis = norm_op->begin_params_axis();
begin_params_axis = begin_params_axis >= 0 ? begin_params_axis : in_tensors[0].Shape().size() + begin_params_axis;
if (begin_params_axis != being_norm_axis || begin_params_axis != in_tensors[0].Shape().size() - 1) {
MS_LOG(ERROR) << "only support normalize on last one dim, being_norm_axis is " << being_norm_axis << " for "
<< op_name_;
return RET_ERROR;
}
axis_ = begin_params_axis;
epsilon_ = norm_op->epsilon();
return RET_OK;
}
int NormalizeTensorRT::AddInnerOp(TensorRTContext *ctx) {
CHECK_NULL_RETURN(ctx->network());
int ret = PreprocessInputs(ctx);
if (ret != RET_OK) {
MS_LOG(ERROR) << "preprocess input failed for " << op_name_;
return ret;
}
return RunOptPlugin() ? RunAsOptPlugin(ctx) : RunAsTrtOps(ctx);
}
int NormalizeTensorRT::PreprocessInputs(TensorRTContext *ctx) {
int ret = PreprocessInputs2SameDim(ctx, tensorrt_in_tensors_[0], &norm_input_);
if (ret != RET_OK || norm_input_.trt_tensor_ == nullptr) {
MS_LOG(ERROR) << "PreprocessInputs2SameDim norm_input failed for " << op_name_;
return RET_ERROR;
}
if (in_tensors_.size() == BETA_INDEX + 1) {
gamma_ = ConvertTensorWithExpandDims(ctx, in_tensors_[1], in_tensors_[0].Shape(), op_name_ + in_tensors_[1].Name());
CHECK_NULL_RETURN(gamma_);
beta_ = ConvertTensorWithExpandDims(ctx, in_tensors_[BETA_INDEX], in_tensors_[0].Shape(),
op_name_ + in_tensors_[BETA_INDEX].Name());
CHECK_NULL_RETURN(beta_);
}
return RET_OK;
}
int NormalizeTensorRT::RunAsOptPlugin(TensorRTContext *ctx) {
auto plugin = std::make_shared<NormalizeOptPlugin>(op_name_, axis_, epsilon_, device_id_);
if (plugin == nullptr) {
MS_LOG(ERROR) << "create NormalizeOptPlugin failed for " << op_name_;
return RET_ERROR;
}
nvinfer1::ITensor *inputTensors[] = {norm_input_.trt_tensor_, gamma_, beta_};
nvinfer1::IPluginV2Layer *norm_layer = ctx->network()->addPluginV2(inputTensors, INPUT_SIZE3, *plugin);
if (norm_layer == nullptr) {
MS_LOG(ERROR) << "add norm opt plugin layer failed for " << op_name_;
return RET_ERROR;
}
layer_ = norm_layer;
layer_->setName(op_name_.c_str());
AddInnerOutTensors(ITensorHelper{norm_layer->getOutput(0), norm_input_.format_, norm_input_.same_format_});
return RET_OK;
}
int NormalizeTensorRT::RunAsTrtOps(TensorRTContext *ctx) {
size_t axis = 1u << axis_;
// first output, add later
AddInnerOutTensors(ITensorHelper{nullptr, norm_input_.format_, norm_input_.same_format_});
// mean
auto mean =
ctx->network()->addReduce(*(norm_input_.trt_tensor_), nvinfer1::ReduceOperation::kAVG, axis, true)->getOutput(0);
CHECK_NULL_RETURN(mean);
if (out_tensors_.size() == INPUT_SIZE3) {
AddInnerOutTensors(ITensorHelper{mean, norm_input_.format_, norm_input_.same_format_});
}
// x - mean
auto sub_mean = ctx->network()
->addElementWise(*(norm_input_.trt_tensor_), *mean, nvinfer1::ElementWiseOperation::kSUB)
->getOutput(0);
CHECK_NULL_RETURN(sub_mean);
// (x - mean)^2
auto const_two =
ConvertScalarToITensor(ctx, in_tensors_[0].Shape().size(), &two_, DataType::kNumberTypeFloat32, op_name_ + "_two");
CHECK_NULL_RETURN(const_two);
auto pow = ctx->network()->addElementWise(*sub_mean, *const_two, nvinfer1::ElementWiseOperation::kPOW)->getOutput(0);
CHECK_NULL_RETURN(pow);
// mean of (x - mean)^2
auto var = ctx->network()->addReduce(*pow, nvinfer1::ReduceOperation::kAVG, axis, true)->getOutput(0);
CHECK_NULL_RETURN(var);
if (out_tensors_.size() == INPUT_SIZE3) {
AddInnerOutTensors(ITensorHelper{var, norm_input_.format_, norm_input_.same_format_});
}
// var + min epsilon
auto const_epsilon = ConvertScalarToITensor(ctx, in_tensors_[0].Shape().size(), &epsilon_,
DataType::kNumberTypeFloat32, op_name_ + "_epsilion");
CHECK_NULL_RETURN(const_epsilon);
auto var_epsilon =
ctx->network()->addElementWise(*var, *const_epsilon, nvinfer1::ElementWiseOperation::kSUM)->getOutput(0);
CHECK_NULL_RETURN(var_epsilon);
// standard deviation
auto std_dev = ctx->network()->addUnary(*var_epsilon, nvinfer1::UnaryOperation::kSQRT)->getOutput(0);
CHECK_NULL_RETURN(std_dev);
// sub_mean / std_dev
auto norm_layer = ctx->network()->addElementWise(*sub_mean, *std_dev, nvinfer1::ElementWiseOperation::kDIV);
CHECK_NULL_RETURN(norm_layer);
this->layer_ = norm_layer;
auto norm = norm_layer->getOutput(0);
CHECK_NULL_RETURN(norm);
// scale with gamma and beta
if (gamma_ != nullptr && beta_ != nullptr) {
auto gamma_out =
ctx->network()->addElementWise(*norm, *gamma_, nvinfer1::ElementWiseOperation::kPROD)->getOutput(0);
CHECK_NULL_RETURN(gamma_out);
auto beta_out =
ctx->network()->addElementWise(*gamma_out, *beta_, nvinfer1::ElementWiseOperation::kSUM)->getOutput(0);
CHECK_NULL_RETURN(beta_out);
tensorrt_out_tensors_[0].trt_tensor_ = beta_out;
} else {
tensorrt_out_tensors_[0].trt_tensor_ = norm;
}
return RET_OK;
}
bool NormalizeTensorRT::RunOptPlugin() {
if (out_tensors_.size() == 1 && in_tensors_.size() == INPUT_SIZE3 && axis_ == in_tensors_[0].Shape().size() - 1 &&
in_tensors_[0].Shape()[axis_] < GET_THREADS) {
// insufficient shared memory
int dim_sum = std::accumulate(in_tensors_[0].Shape().begin(), in_tensors_[0].Shape().begin() + axis_, 1,
std::multiplies<int>());
const int kSharedMemoryThreshold = 2048;
if (dim_sum > kSharedMemoryThreshold) {
return false;
}
MS_LOG(INFO) << op_name_ << " use opt plugin";
return true;
}
return false;
}
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_LayerNormFusion, NormalizeTensorRT)
} // namespace mindspore::lite

View File

@ -0,0 +1,56 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_NORMALIZE_TENSORRT_H_
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_NORMALIZE_TENSORRT_H_
#include <string>
#include <vector>
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
namespace mindspore::lite {
constexpr int BETA_INDEX = 2;
class NormalizeTensorRT : public TensorRTOp {
public:
NormalizeTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
const schema::QuantType &quant_type)
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
~NormalizeTensorRT() override = default;
int AddInnerOp(TensorRTContext *ctx) override;
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) override;
private:
int PreprocessInputs(TensorRTContext *ctx);
int RunAsOptPlugin(TensorRTContext *ctx);
int RunAsTrtOps(TensorRTContext *ctx);
bool RunOptPlugin();
ITensorHelper norm_input_;
nvinfer1::ITensor *gamma_{nullptr};
nvinfer1::ITensor *beta_{nullptr};
size_t axis_{0};
const float two_{2.0f};
float epsilon_{0.0f};
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_NORMALIZE_TENSORRT_H_

View File

@ -0,0 +1,140 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <numeric>
#include <functional>
#include "src/runtime/delegate/tensorrt/op/pad_tensorrt.h"
#include "src/runtime/delegate/tensorrt/tensorrt_utils.h"
namespace mindspore::lite {
int PadTensorRT::IsSupport(const mindspore::schema::Primitive *primitive,
const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) {
if (!IsShapeKnown()) {
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
return RET_ERROR;
}
if (in_tensors.size() != INPUT_SIZE2 && in_tensors.size() != INPUT_SIZE3) {
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
return RET_ERROR;
}
if (out_tensors.size() != 1) {
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
return RET_ERROR;
}
if (in_tensors_[1].Data() == nullptr) {
MS_LOG(ERROR) << "invalid pad tensor for: " << op_name_;
return RET_ERROR;
}
auto pad_primitive = this->GetPrimitive()->value_as_PadFusion();
if (pad_primitive == nullptr) {
MS_LOG(ERROR) << "convert PadFusion failed: " << op_name_;
return RET_ERROR;
}
schema::PaddingMode padding_mode = pad_primitive->padding_mode();
if (padding_mode != schema::PaddingMode::PaddingMode_CONSTANT) {
MS_LOG(ERROR) << "Unsupported padding mode: " << schema::PaddingMode(padding_mode) << ", for op: " << op_name_;
return RET_ERROR;
}
if (in_tensors[0].format() != Format::NHWC && in_tensors[0].format() != Format::NCHW) {
MS_LOG(ERROR) << "Unsupported input tensor format of " << in_tensors[0].format();
return RET_ERROR;
}
constant_value_ = pad_primitive->constant_value();
return RET_OK;
}
int PadTensorRT::AddInnerOp(TensorRTContext *ctx) {
mindspore::MSTensor &pad_tensor = in_tensors_[1];
int element_cnt = std::accumulate(pad_tensor.Shape().begin(), pad_tensor.Shape().end(), 1, std::multiplies<int>());
if (element_cnt != tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims * INPUT_SIZE2) {
MS_LOG(ERROR) << "pad tensor cnt is invalid. cnt: " << element_cnt
<< ", input tensor dims cnt: " << tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims;
return RET_ERROR;
}
nvinfer1::ITensor *pad_input = tensorrt_in_tensors_[0].trt_tensor_;
MS_LOG(DEBUG) << "before transpose "
<< GetTensorFormat(pad_input, tensorrt_in_tensors_[0].format_, tensorrt_in_tensors_[0].same_format_);
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D &&
tensorrt_in_tensors_[0].format_ == Format::NHWC) {
// transpose: NHWC->NCHW
nvinfer1::IShuffleLayer *transpose_layer_in = NHWC2NCHW(ctx, *tensorrt_in_tensors_[0].trt_tensor_);
if (transpose_layer_in == nullptr) {
MS_LOG(ERROR) << "transpose: NHWC->NCHW failed";
return RET_ERROR;
}
transpose_layer_in->setName((op_name_ + "_transpose2NCHW").c_str());
this->transpose_layer_ = transpose_layer_in;
pad_input = transpose_layer_in->getOutput(0);
MS_LOG(DEBUG) << "after transpose " << GetTensorFormat(pad_input, Format::NCHW, false);
}
// trt 6 only support 2D padding
const int *padding_data = reinterpret_cast<const int *>(in_tensors_[1].Data().get());
MS_ASSERT(padding_data);
nvinfer1::IPaddingLayer *padding_layer = nullptr;
if (element_cnt == index_NHWC_ * INPUT_SIZE2) {
// only support pad at HW index
int h_pre;
int h_post;
int w_pre;
int w_post;
if (SameDims(pad_input->getDimensions(), in_tensors_[0].Shape())) {
// NCHW: 0: N_pre, 1: N_post, 2: C_pre, 3: C_post, 4: H_pre, 5: H_post, 6: W_pre, 7: W_post
if (*padding_data != 0 || *(padding_data + 1) != 0 || *(padding_data + 2) != 0 || *(padding_data + 3) != 0) {
MS_LOG(WARNING) << "tensorrt padding only support pad at HW index, unsupported padding value of: " << op_name_;
}
h_pre = 4;
h_post = 5;
w_pre = 6;
w_post = 7;
} else {
// NHWC: 0: N_pre, 1: N_post, 2: H_pre, 3: H_post, 4: W_pre, 5: W_post, 6: C_pre, 7: C_post
if (*padding_data != 0 || *(padding_data + 1) != 0 || *(padding_data + 6) != 0 || *(padding_data + 7) != 0) {
MS_LOG(WARNING) << "tensorrt padding only support pad at HW index, unsupported padding value of: " << op_name_;
}
h_pre = 2;
h_post = 3;
w_pre = 4;
w_post = 5;
}
nvinfer1::DimsHW prePadding{*(padding_data + h_pre), *(padding_data + w_pre)};
nvinfer1::DimsHW postPadding{*(padding_data + h_post), *(padding_data + w_post)};
MS_LOG(DEBUG) << op_name_ << " prePadding: " << prePadding.d[0] << ", " << prePadding.d[1]
<< "; postPadding: " << postPadding.d[0] << ", " << postPadding.d[1];
padding_layer = ctx->network()->addPadding(*pad_input, prePadding, postPadding);
} else {
MS_LOG(ERROR) << "need check for pad_tensor dims: " << op_name_
<< ", pad_tensor ElementNum: " << pad_tensor.ElementNum();
return RET_ERROR;
}
if (padding_layer == nullptr) {
MS_LOG(ERROR) << "add padding layer failed for " << op_name_;
return RET_ERROR;
}
this->layer_ = padding_layer;
padding_layer->setName(op_name_.c_str());
padding_layer->getOutput(0)->setName((op_name_ + "_output").c_str());
bool same_format = SameDims(padding_layer->getOutput(0)->getDimensions(), out_tensors_[0].Shape()) &&
SameDims(tensorrt_in_tensors_[0].trt_tensor_->getDimensions(), in_tensors_[0].Shape());
this->AddInnerOutTensors(ITensorHelper{padding_layer->getOutput(0), Format::NCHW, same_format});
MS_LOG(DEBUG) << "after transpose " << GetTensorFormat(tensorrt_out_tensors_[0]);
return RET_OK;
}
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_PadFusion, PadTensorRT)
} // namespace mindspore::lite

View File

@ -0,0 +1,42 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_PAD_TENSORRT_H_
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_PAD_TENSORRT_H_
#include <string>
#include <vector>
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
namespace mindspore::lite {
class PadTensorRT : public TensorRTOp {
public:
PadTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
const schema::QuantType &quant_type)
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
~PadTensorRT() override = default;
int AddInnerOp(TensorRTContext *ctx) override;
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) override;
private:
const int index_NHWC_ = 4;
float constant_value_ = 0.0f;
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_PAD_TENSORRT_H_

View File

@ -0,0 +1,220 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/delegate/tensorrt/op/pool_tensorrt.h"
#include "src/runtime/delegate/tensorrt/op/activation_tensorrt.h"
#include "src/runtime/delegate/tensorrt/tensorrt_utils.h"
namespace mindspore::lite {
int PoolTensorRT::IsSupport(const mindspore::schema::Primitive *primitive,
const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) {
if (!IsShapeKnown()) {
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
return RET_ERROR;
}
if (in_tensors.size() != 1) {
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
return RET_ERROR;
}
if (out_tensors.size() != 1) {
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
return RET_ERROR;
}
if (in_tensors[0].format() != Format::NHWC && in_tensors[0].format() != Format::NCHW) {
MS_LOG(ERROR) << "Unsupported input tensor format of " << in_tensors[0].format();
return RET_ERROR;
}
return RET_OK;
}
int PoolTensorRT::AddInnerOp(TensorRTContext *ctx) {
if (tensorrt_in_tensors_.size() != 1) {
MS_LOG(ERROR) << "invalid input tensor size: " << tensorrt_in_tensors_.size();
return RET_ERROR;
}
MS_LOG(DEBUG) << "before transpose " << GetTensorFormat(tensorrt_in_tensors_[0]);
int ret = ParseParams();
if (ret != RET_OK) {
MS_LOG(ERROR) << "ParseParams failed for : " << op_name_;
return RET_ERROR;
}
nvinfer1::ITensor *pool_input = tensorrt_in_tensors_[0].trt_tensor_;
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D &&
tensorrt_in_tensors_[0].format_ == Format::NHWC) {
// transpose: NHWC->NCHW
nvinfer1::IShuffleLayer *transpose_layer_in = NHWC2NCHW(ctx, *tensorrt_in_tensors_[0].trt_tensor_);
if (transpose_layer_in == nullptr) {
MS_LOG(ERROR) << "transpose: NHWC->NCHW failed";
return RET_ERROR;
}
transpose_layer_in->setName((op_name_ + "_transpose2NCHW").c_str());
this->transpose_layer_ = transpose_layer_in;
pool_input = transpose_layer_in->getOutput(0);
}
// pooling layer
nvinfer1::Dims windowSize = lite::ConvertCudaDims(kernel_size_);
if (windowSize.nbDims == -1) {
MS_LOG(ERROR) << "ConvertCudaDims failed for " << op_name_;
return RET_ERROR;
}
nvinfer1::IPoolingLayer *pooling_layer = ctx->network()->addPoolingNd(*pool_input, pooling_type_, windowSize);
if (pooling_layer == nullptr) {
MS_LOG(ERROR) << "addPoolingNd failed for TensorRT.";
return RET_ERROR;
}
AddParams(pooling_layer);
pooling_layer->setName(op_name_.c_str());
this->layer_ = pooling_layer;
// add activation
nvinfer1::ILayer *activation_layer = nullptr;
if (activation_type_ == schema::ActivationType::ActivationType_NO_ACTIVATION) {
activation_layer = pooling_layer;
} else {
activation_layer =
ActivationTensorRT::AddActivation(ctx, activation_type_, 0, 0, 0, pooling_layer->getOutput(0), device_id_);
if (activation_layer == nullptr) {
MS_LOG(ERROR) << "addActivation for pool failed";
return RET_ERROR;
}
activation_layer->setName((op_name_ + "_activation").c_str());
}
nvinfer1::ITensor *out_trt_tensor = activation_layer->getOutput(0);
out_trt_tensor->setName((op_name_ + "_output").c_str());
this->AddInnerOutTensors(ITensorHelper{out_trt_tensor, Format::NCHW, false});
MS_LOG(DEBUG) << "output " << GetTensorFormat(tensorrt_out_tensors_[0]);
return RET_OK;
}
int PoolTensorRT::ParseParams() {
int in_h = in_tensors_[0].Shape()[kNHWC_H];
int in_w = in_tensors_[0].Shape()[kNHWC_W];
int out_h = out_tensors_[0].Shape()[kNHWC_H];
int out_w = out_tensors_[0].Shape()[kNHWC_W];
int kernel_h;
int kernel_w;
switch (type_) {
case (schema::PrimitiveType_AvgPoolFusion): {
const schema::AvgPoolFusion *pool_primitive = this->GetPrimitive()->value_as_AvgPoolFusion();
if (pool_primitive == nullptr) {
MS_LOG(ERROR) << "convert PoolFusion failed: " << op_name_;
return RET_ERROR;
}
pooling_type_ = nvinfer1::PoolingType::kAVERAGE;
auto stride = pool_primitive->strides();
if (stride == nullptr) {
MS_LOG(ERROR) << "get stride failed: " << op_name_;
return RET_ERROR;
}
stride_ = std::vector<int64_t>(stride->begin(), stride->end());
kernel_h = in_h - (out_h - 1) * stride_[0];
kernel_w = in_w - (out_w - 1) * stride_[1];
auto kernel_size = pool_primitive->kernel_size();
if (kernel_size == nullptr) {
kernel_size_.push_back(kernel_h);
kernel_size_.push_back(kernel_w);
MS_LOG(WARNING) << op_name_ << "don't has kernel size, calculate kernel size on ms tensor, kernel_h is "
<< kernel_h << ", kernel_w is " << kernel_w;
} else {
kernel_size_ = std::vector<int64_t>(kernel_size->begin(), kernel_size->end());
}
auto padding = pool_primitive->pad();
if (padding != nullptr && padding->size() != DIMENSION_4D) {
MS_LOG(ERROR) << op_name_ << "has invalid pad dims: " << padding->size();
return RET_ERROR;
} else if (padding == nullptr || padding->size() == 0) {
padding_ = std::vector<int64_t>(DIMENSION_4D, 0);
} else {
padding_ = std::vector<int64_t>(padding->begin(), padding->end());
}
pad_mode_ = pool_primitive->pad_mode();
activation_type_ = pool_primitive->activation_type();
break;
}
case (schema::PrimitiveType_MaxPoolFusion): {
const schema::MaxPoolFusion *pool_primitive = this->GetPrimitive()->value_as_MaxPoolFusion();
if (pool_primitive == nullptr) {
MS_LOG(ERROR) << "convert PoolFusion failed: " << op_name_;
return RET_ERROR;
}
pooling_type_ = nvinfer1::PoolingType::kMAX;
auto kernel_size = pool_primitive->kernel_size();
if (kernel_size == nullptr) {
MS_LOG(ERROR) << "get kernel size failed: " << op_name_;
return RET_ERROR;
}
kernel_size_ = std::vector<int64_t>(kernel_size->begin(), kernel_size->end());
auto stride = pool_primitive->strides();
if (stride == nullptr) {
MS_LOG(ERROR) << "get stride failed: " << op_name_;
return RET_ERROR;
}
stride_ = std::vector<int64_t>(stride->begin(), stride->end());
kernel_h = in_h - (out_h - 1) * stride_[0];
kernel_w = in_w - (out_w - 1) * stride_[1];
auto padding = pool_primitive->pad();
if (padding == nullptr) {
MS_LOG(INFO) << "get padding is null, set to default 0: " << op_name_;
padding_ = {0, 0, 0, 0};
} else {
padding_ = std::vector<int64_t>(padding->begin(), padding->end());
}
pad_mode_ = pool_primitive->pad_mode();
activation_type_ = pool_primitive->activation_type();
break;
}
default: {
MS_LOG(ERROR) << "unsupported primitive type of " << type_ << " for node: " << op_name_;
return RET_ERROR;
}
}
// some model kernel size is large than hw, correct it
if (kernel_size_[0] > in_h || kernel_size_[1] > in_w) {
MS_LOG(WARNING) << op_name_ << " kernel size is larger than input size";
kernel_size_[0] = kernel_size_[0] > kernel_h ? kernel_h : kernel_size_[0];
kernel_size_[1] = kernel_size_[1] > kernel_w ? kernel_w : kernel_size_[1];
}
return RET_OK;
}
void PoolTensorRT::AddParams(nvinfer1::IPoolingLayer *pooling_layer) {
nvinfer1::Dims stride_dims = ConvertCudaDims(stride_);
if (stride_dims.nbDims == -1) {
MS_LOG(ERROR) << "ConvertCudaDims failed for " << op_name_;
return;
}
pooling_layer->setStrideNd(stride_dims);
if (pad_mode_ == schema::PadMode::PadMode_SAME) {
pooling_layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
} else {
nvinfer1::Dims dims{};
dims.nbDims = DIMENSION_2D;
dims.d[0] = padding_[0];
dims.d[1] = padding_[DIMENSION_2D];
pooling_layer->setPaddingNd(dims);
}
}
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_AvgPoolFusion, PoolTensorRT)
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_MaxPoolFusion, PoolTensorRT)
} // namespace mindspore::lite

View File

@ -0,0 +1,55 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_POOL_TENSORRT_H_
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_POOL_TENSORRT_H_
#include <string>
#include <vector>
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
namespace mindspore::lite {
class PoolTensorRT : public TensorRTOp {
public:
PoolTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
const schema::QuantType &quant_type)
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
~PoolTensorRT() override = default;
int AddInnerOp(TensorRTContext *ctx) override;
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) override;
private:
int ParseParams();
void AddParams(nvinfer1::IPoolingLayer *pooling_layer);
std::vector<int64_t> kernel_size_;
std::vector<int64_t> stride_;
std::vector<int64_t> padding_;
nvinfer1::PoolingType pooling_type_;
schema::PadMode pad_mode_;
schema::ActivationType activation_type_;
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_POOL_TENSORRT_H_

View File

@ -0,0 +1,79 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <numeric>
#include "src/runtime/delegate/tensorrt/op/prelu_tensorrt.h"
#include "src/runtime/delegate/tensorrt/tensorrt_utils.h"
namespace mindspore::lite {
int PReluTensorRT::IsSupport(const mindspore::schema::Primitive *primitive,
const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) {
if (!IsShapeKnown()) {
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
return RET_ERROR;
}
if (in_tensors.size() != INPUT_SIZE2) {
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size() << " : " << op_name_;
return RET_ERROR;
}
if (out_tensors.size() != 1) {
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size() << " : " << op_name_;
return RET_ERROR;
}
return RET_OK;
}
int PReluTensorRT::AddInnerOp(TensorRTContext *ctx) {
ITensorHelper prelu_input;
int ret = PreprocessInputs2SameDim(ctx, tensorrt_in_tensors_[0], &prelu_input);
if (ret != RET_OK || prelu_input.trt_tensor_ == nullptr) {
MS_LOG(ERROR) << "PreprocessInputs2SameDim input tensor failed for " << op_name_;
return ret;
}
int input_nbdims = prelu_input.trt_tensor_->getDimensions().nbDims;
int slope_nbdims = in_tensors_[1].Shape().size();
auto slope = tensorrt_in_tensors_[1].trt_tensor_;
if (input_nbdims != slope_nbdims) {
slope = ConvertTensorWithExpandDims(ctx, in_tensors_[1], in_tensors_[0].Shape(), op_name_ + "_slope");
tensorrt_in_tensors_[1].trt_tensor_ = slope;
}
if (slope == nullptr) {
MS_LOG(ERROR) << "add const input tensor failed for " << op_name_;
return RET_ERROR;
}
ITensorHelper slope_helper;
ret = PreprocessInputs2SameDim(ctx, tensorrt_in_tensors_[1], &slope_helper);
if (ret != RET_OK || slope_helper.trt_tensor_ == nullptr) {
MS_LOG(ERROR) << "PreprocessInputs2SameDim slope tensor failed for " << op_name_;
return ret;
}
auto *prelu_layer = ctx->network()->addParametricReLU(*prelu_input.trt_tensor_, *slope_helper.trt_tensor_);
if (prelu_layer == nullptr) {
MS_LOG(ERROR) << "addParameticReLU failed for TensorRT : " << op_name_;
return RET_ERROR;
}
nvinfer1::ITensor *out_tensor = prelu_layer->getOutput(0);
out_tensor->setName((op_name_ + "_0").c_str());
this->AddInnerOutTensors(ITensorHelper{out_tensor, prelu_input.format_, prelu_input.same_format_});
this->layer_ = prelu_layer;
return RET_OK;
}
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_PReLUFusion, PReluTensorRT)
} // namespace mindspore::lite

View File

@ -0,0 +1,39 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_OP_PRELU_TENSORRT_H_
#define MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_OP_PRELU_TENSORRT_H_
#include <string>
#include <vector>
#include <algorithm>
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
namespace mindspore::lite {
class PReluTensorRT : public TensorRTOp {
public:
PReluTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
const schema::QuantType &quant_type)
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
~PReluTensorRT() override = default;
int AddInnerOp(TensorRTContext *ctx) override;
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) override;
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_OP_PRELU_TENSORRT_H_

View File

@ -0,0 +1,139 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <valarray>
#include "src/runtime/delegate/tensorrt/op/reduce_tensorrt.h"
namespace mindspore::lite {
int ReduceTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) {
if (!IsShapeKnown()) {
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
return RET_ERROR;
}
if (in_tensors.size() != INPUT_SIZE2) {
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
}
if (out_tensors.size() != 1) {
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
}
return RET_OK;
}
int ReduceTensorRT::AddInnerOp(TensorRTContext *ctx) {
if (ctx == nullptr || ctx->network() == nullptr) {
MS_LOG(ERROR) << "context or network is invalid";
return RET_ERROR;
}
auto reduce_op = op_primitive_->value_as_ReduceFusion();
if (reduce_op == nullptr) {
MS_LOG(ERROR) << "convert failed";
return RET_ERROR;
}
bool keep_dims = reduce_op->keep_dims();
out_format_ = tensorrt_in_tensors_[0].format_;
nvinfer1::ITensor *reduce_input = tensorrt_in_tensors_[0].trt_tensor_;
MS_LOG(DEBUG) << "origin input " << GetTensorFormat(tensorrt_in_tensors_[0]);
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D &&
!SameDims(tensorrt_in_tensors_[0].trt_tensor_->getDimensions(), in_tensors_[0].Shape())) {
if (tensorrt_in_tensors_[0].format_ == Format::NCHW) {
// NCHW->NHWC
nvinfer1::IShuffleLayer *transpose_layer = NCHW2NHWC(ctx, *tensorrt_in_tensors_[0].trt_tensor_);
if (transpose_layer == nullptr) {
MS_LOG(ERROR) << "create transpose layer failed for " << op_name_;
return RET_ERROR;
}
transpose_layer->setName((op_name_ + "_transpose_in").c_str());
reduce_input = transpose_layer->getOutput(0);
out_format_ = Format::NHWC;
this->transpose_layer_ = transpose_layer;
} else if (tensorrt_in_tensors_[0].format_ == Format::NHWC) {
// NHWC->NCHW
nvinfer1::IShuffleLayer *transpose_layer = NHWC2NCHW(ctx, *tensorrt_in_tensors_[0].trt_tensor_);
if (transpose_layer == nullptr) {
MS_LOG(ERROR) << "create transpose layer failed for " << op_name_;
return RET_ERROR;
}
transpose_layer->setName((op_name_ + "_transpose_in").c_str());
reduce_input = transpose_layer->getOutput(0);
out_format_ = Format::NCHW;
this->transpose_layer_ = transpose_layer;
} else {
MS_LOG(WARNING) << "input tensor format needs check: " << op_name_;
}
}
MS_LOG(DEBUG) << "after transpose input " << GetTensorFormat(reduce_input, out_format_, true);
if (reduce_op->mode() == schema::ReduceMode::ReduceMode_ReduceL2) {
// x^2
auto *pow2_layer =
ctx->network()->addElementWise(*reduce_input, *reduce_input, nvinfer1::ElementWiseOperation::kPROD);
CHECK_NULL_RETURN(pow2_layer);
pow2_layer->setName((op_name_ + "_pow2").c_str());
reduce_input = pow2_layer->getOutput(0);
CHECK_NULL_RETURN(reduce_input);
}
uint32_t reduceAxis = GetAxis();
auto reduce_operation_opt = TryConvertTRTReduceMode(reduce_op->mode());
if (!reduce_operation_opt) {
MS_LOG(WARNING) << "invalid reduce for TensorRT, need check: " << static_cast<int>(reduce_op->mode());
return RET_ERROR;
}
nvinfer1::IReduceLayer *layer =
ctx->network()->addReduce(*reduce_input, reduce_operation_opt.value(), reduceAxis, keep_dims);
CHECK_NULL_RETURN(layer);
layer->setName(op_name_.c_str());
this->layer_ = layer;
nvinfer1::ITensor *out_tensor = layer->getOutput(0);
CHECK_NULL_RETURN(out_tensor);
if (reduce_op->mode() == schema::ReduceMode::ReduceMode_ReduceL2) {
auto sqrt_layer = ctx->network()->addUnary(*out_tensor, nvinfer1::UnaryOperation::kSQRT);
CHECK_NULL_RETURN(sqrt_layer);
sqrt_layer->setName((op_name_ + "_sqrt").c_str());
out_tensor = sqrt_layer->getOutput(0);
}
out_tensor->setName((op_name_ + "_output").c_str());
this->AddInnerOutTensors(ITensorHelper{out_tensor, out_format_, true});
MS_LOG(DEBUG) << "output " << GetTensorFormat(tensorrt_out_tensors_[0]);
return RET_OK;
}
uint32_t ReduceTensorRT::GetAxis() {
// axis
uint32_t reduceAxis = 0;
mindspore::MSTensor axis_tensor = this->in_tensors_[1];
if (axis_tensor.Data() == nullptr) {
MS_LOG(ERROR) << "invalid axis_tensor";
return reduceAxis;
}
if (axis_tensor.DataType() != DataType::kNumberTypeInt32) {
MS_LOG(WARNING) << "not int data type";
}
int *axis_data = reinterpret_cast<int *>(axis_tensor.MutableData());
CHECK_NULL_RETURN(axis_data);
for (int i = 0; i < axis_tensor.ElementNum(); i++) {
int format_axis_data = (*axis_data == -1) ? in_tensors_[0].Shape().size() - 1 : *axis_data;
MS_LOG(DEBUG) << op_name_ << " reduceAxis at index : " << *axis_data;
reduceAxis |= 1u << format_axis_data;
axis_data++;
}
return reduceAxis;
}
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_ReduceFusion, ReduceTensorRT)
} // namespace mindspore::lite

View File

@ -0,0 +1,44 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_REDUCE_TENSORRT_H_
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_REDUCE_TENSORRT_H_
#include <string>
#include <vector>
#include <map>
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
namespace mindspore::lite {
class ReduceTensorRT : public TensorRTOp {
public:
ReduceTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
const schema::QuantType &quant_type)
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
~ReduceTensorRT() override = default;
int AddInnerOp(TensorRTContext *ctx) override;
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) override;
private:
uint32_t GetAxis();
Format out_format_;
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_REDUCE_TENSORRT_H_

View File

@ -0,0 +1,126 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/delegate/tensorrt/op/reducescatter_tensorrt.h"
#include <numeric>
#include <thread>
#include "NvInferRuntimeCommon.h"
namespace mindspore::lite {
REGISTER_TENSORRT_PLUGIN(ReduceScatterPluginCreater);
template class TensorRTPluginCreater<ReduceScatterPlugin>;
template <class T>
nvinfer1::PluginFieldCollection TensorRTPluginCreater<T>::field_collection_{};
template <class T>
std::vector<nvinfer1::PluginField> TensorRTPluginCreater<T>::fields_;
int ReduceScatterTensorRT::IsSupport(const schema::Primitive *primitive,
const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) {
#ifndef LITE_CUDA_DISTRIBUTION
MS_LOG(ERROR)
<< "Unsupported package for gpu distribution feature, please recompile with MS_ENABLE_CUDA_DISTRIBUTION set to on.";
return RET_ERROR;
#else
if (!IsShapeKnown()) {
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
return RET_ERROR;
}
if (in_tensors.size() != 1) {
MS_LOG(ERROR) << "invalid input tensor size: " << in_tensors.size();
return RET_ERROR;
}
if (out_tensors.size() != 1) {
MS_LOG(ERROR) << "invalid output tensor size: " << out_tensors.size();
return RET_ERROR;
}
dynamic_shape_params_.support_hw_dynamic_ = false;
return RET_OK;
#endif
}
int ReduceScatterTensorRT::AddInnerOp(TensorRTContext *ctx) {
nvinfer1::ITensor *inputTensors[] = {tensorrt_in_tensors_[0].trt_tensor_};
auto reduce_op = op_primitive_->value_as_ReduceScatter();
if (reduce_op == nullptr) {
MS_LOG(ERROR) << "convert failed for " << op_name_;
return RET_ERROR;
}
auto reduce_mode = reduce_op->mode();
auto rank = GetGPUGroupSize();
auto plugin = std::make_shared<ReduceScatterPlugin>(op_name_, reduce_mode, rank, device_id_);
MS_LOG(INFO) << op_name_ << " group size: " << rank << ", rank id: " << GetRankID();
nvinfer1::IPluginV2Layer *reduce_scatter_layer = ctx->network()->addPluginV2(inputTensors, 1, *plugin);
if (reduce_scatter_layer == nullptr) {
MS_LOG(ERROR) << "create ReduceScatter layer failed for: " << op_name_;
return RET_ERROR;
}
nvinfer1::ITensor *reduce_scatter_out = reduce_scatter_layer->getOutput(0);
reduce_scatter_layer->setName(op_name_.c_str());
reduce_scatter_out->setName((op_name_ + "_output").c_str());
this->layer_ = reduce_scatter_layer;
this->AddInnerOutTensors(
ITensorHelper{reduce_scatter_out, tensorrt_in_tensors_[0].format_, tensorrt_in_tensors_[0].same_format_});
return RET_OK;
}
// ReduceScatterPlugin
int ReduceScatterPlugin::enqueue(const nvinfer1::PluginTensorDesc *inputDesc,
const nvinfer1::PluginTensorDesc *outputDesc, const void *const *inputs,
void *const *outputs, void *workspace, cudaStream_t stream) noexcept {
MS_LOG(INFO) << "ReduceScatter run at rank id: " << GetRankID() << " stream: " << stream;
nvinfer1::Dims output_dims = outputDesc[0].dims;
int recieve_element_cnt =
std::accumulate(output_dims.d, output_dims.d + output_dims.nbDims, 1, std::multiplies<int64_t>());
const void *input = inputs[0];
void *output = outputs[0];
auto data_type = inputDesc->type;
auto ret = DistributionCollective::instance().ReduceScatterWrapper(input, output, recieve_element_cnt, data_type,
red_mode_, stream, NCCL_WORLD_GROUP);
if (ret != RET_OK) {
MS_LOG(ERROR) << "ReduceScatter nccl run failed for " << layer_name_;
return ret;
}
return RET_OK;
}
nvinfer1::IPluginV2DynamicExt *ReduceScatterPlugin::clone() const noexcept {
auto *plugin = new ReduceScatterPlugin(*this);
plugin->setPluginNamespace(name_space_.c_str());
return plugin;
}
nvinfer1::DimsExprs ReduceScatterPlugin::getOutputDimensions(int outputIndex, const nvinfer1::DimsExprs *inputs,
int nbInputs,
nvinfer1::IExprBuilder &exprBuilder) noexcept {
nvinfer1::DimsExprs out_dims{};
out_dims.nbDims = inputs->nbDims;
auto rank_dim = exprBuilder.constant(rank_);
out_dims.d[0] = exprBuilder.operation(nvinfer1::DimensionOperation::kCEIL_DIV, *inputs->d[0], *rank_dim);
for (int i = 1; i < inputs->nbDims; i++) {
out_dims.d[i] = inputs->d[i];
}
return out_dims;
}
size_t ReduceScatterPlugin::getSerializationSize() const noexcept { return sizeof(schema::ReduceMode); }
void ReduceScatterPlugin::serialize(void *buffer) const noexcept {
SerializeValue(&buffer, &red_mode_, sizeof(schema::ReduceMode));
}
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_ReduceScatter, ReduceScatterTensorRT)
} // namespace mindspore::lite

View File

@ -0,0 +1,83 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_REDUCESCATTER_TENSORRT_H_
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_REDUCESCATTER_TENSORRT_H_
#include <string>
#include <vector>
#include <memory>
#include <functional>
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
#include "src/runtime/delegate/tensorrt/op/tensorrt_plugin.h"
#include "src/runtime/delegate/tensorrt/tensorrt_utils.h"
#include "src/runtime/delegate/tensorrt/distribution/distribution_collective.h"
namespace mindspore::lite {
constexpr char *REDUCESCATTER_PLUGIN_NAME{"ReduceScatterPlugin"};
class ReduceScatterTensorRT : public TensorRTOp {
public:
ReduceScatterTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
const schema::QuantType &quant_type)
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
~ReduceScatterTensorRT() override = default;
int AddInnerOp(TensorRTContext *ctx) override;
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) override;
};
class ReduceScatterPlugin : public TensorRTPlugin {
public:
ReduceScatterPlugin(const std::string name, schema::ReduceMode red_mode, int rank, uint32_t device_id)
: TensorRTPlugin(name, std::string(REDUCESCATTER_PLUGIN_NAME), device_id), red_mode_(red_mode), rank_(rank) {}
ReduceScatterPlugin(const char *name, const nvinfer1::PluginFieldCollection *fc)
: TensorRTPlugin(std::string(name), std::string(REDUCESCATTER_PLUGIN_NAME)) {
const nvinfer1::PluginField *fields = fc->fields;
red_mode_ = static_cast<const schema::ReduceMode *>(fields[0].data)[0];
rank_ = static_cast<const int *>(fields[1].data)[0];
}
ReduceScatterPlugin(const char *name, const void *serialData, size_t serialLength)
: TensorRTPlugin(std::string(name), std::string(REDUCESCATTER_PLUGIN_NAME)) {
DeserializeValue(&serialData, &serialLength, &red_mode_, sizeof(schema::ReduceMode));
DeserializeValue(&serialData, &serialLength, &rank_, sizeof(int));
}
ReduceScatterPlugin() = delete;
// IPluginV2DynamicExt Methods
nvinfer1::IPluginV2DynamicExt *clone() const noexcept override;
nvinfer1::DimsExprs getOutputDimensions(int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs,
nvinfer1::IExprBuilder &exprBuilder) noexcept override;
int enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc,
const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept override;
size_t getSerializationSize() const noexcept override;
void serialize(void *buffer) const noexcept override;
private:
int rank_{0};
schema::ReduceMode red_mode_;
};
class ReduceScatterPluginCreater : public TensorRTPluginCreater<ReduceScatterPlugin> {
public:
ReduceScatterPluginCreater() : TensorRTPluginCreater(std::string(REDUCESCATTER_PLUGIN_NAME)) {}
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_REDUCESCATTER_TENSORRT_H_

View File

@ -0,0 +1,230 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <algorithm>
#include "src/runtime/delegate/tensorrt/op/resize_tensorrt.h"
#include "nnacl/nnacl_common.h"
namespace mindspore::lite {
int ResizeTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) {
if (!IsShapeKnown()) {
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
return RET_ERROR;
}
if (in_tensors.size() != 1 && in_tensors.size() != INPUT_SIZE2) {
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
}
if (out_tensors.size() != 1) {
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
}
resize_op_ = op_primitive_->value_as_Resize();
if (resize_op_ == nullptr) {
MS_LOG(ERROR) << "convert failed " << op_name_;
return RET_ERROR;
}
if (resize_op_->method() == schema::ResizeMethod_LINEAR) {
MS_LOG(WARNING) << "TensorRT linear resize has precision issue, using cpu instead for " << op_name_;
return RET_ERROR;
}
dynamic_shape_params_.support_hw_dynamic_ =
(resize_op_->new_height() > 0 && resize_op_->new_width() > 0) ? false : true;
// constant new hw op don't support hw resize
return RET_OK;
}
int ResizeTensorRT::AddInnerOp(TensorRTContext *ctx) {
if (ctx == nullptr || ctx->network() == nullptr) {
MS_LOG(ERROR) << "context or network is invalid";
return RET_ERROR;
}
nvinfer1::ITensor *resize_in_tensor = tensorrt_in_tensors_[0].trt_tensor_;
MS_LOG(DEBUG) << "origin input " << GetTensorFormat(tensorrt_in_tensors_[0]);
if (resize_in_tensor->getDimensions().nbDims == DIMENSION_4D && tensorrt_in_tensors_[0].format_ == Format::NHWC) {
// NHWC->NCHW
nvinfer1::IShuffleLayer *transpose_layer = NHWC2NCHW(ctx, *tensorrt_in_tensors_[0].trt_tensor_);
if (transpose_layer == nullptr) {
MS_LOG(ERROR) << "create transpose layer failed for " << op_name_;
return RET_ERROR;
}
transpose_layer->setName((op_name_ + "_transpose_in").c_str());
resize_in_tensor = transpose_layer->getOutput(0);
this->transpose_layer_ = transpose_layer;
}
MS_LOG(DEBUG) << "after transpose input " << GetTensorFormat(resize_in_tensor, Format::NCHW, false);
nvinfer1::IResizeLayer *resize_layer = ctx->network()->addResize(*resize_in_tensor);
if (resize_layer == nullptr) {
MS_LOG(ERROR) << "create resize layer failed for " << op_name_;
return RET_ERROR;
}
int ret = SetOutputDims(resize_in_tensor, resize_layer);
if (ret != RET_OK) {
MS_LOG(ERROR) << "SetOutputDims failed for " << op_name_;
return RET_ERROR;
}
ret = SetParams(resize_layer);
if (ret != RET_OK) {
MS_LOG(ERROR) << "SetParams failed for " << op_name_;
return RET_ERROR;
}
resize_layer->getOutput(0)->setName((op_name_ + "_output").c_str());
this->AddInnerOutTensors(ITensorHelper{resize_layer->getOutput(0), Format::NCHW, false});
MS_LOG(DEBUG) << "output " << GetTensorFormat(tensorrt_out_tensors_[0]);
this->layer_ = resize_layer;
return RET_OK;
}
int ResizeTensorRT::SetOutputDims(nvinfer1::ITensor *resize_in_tensor, nvinfer1::IResizeLayer *resize_layer) {
nvinfer1::Dims in_dims = resize_in_tensor->getDimensions();
if (in_tensors_.size() == 1 && !dynamic_shape_params_.support_dynamic_ && in_dims.nbDims == DIMENSION_4D) {
nvinfer1::Dims4 new_dims(in_dims.d[0], in_dims.d[1], resize_op_->new_height(), resize_op_->new_width()); // nchw
resize_layer->setOutputDimensions(new_dims); // static shape
} else if (in_tensors_.size() == 1 && !dynamic_shape_params_.support_hw_dynamic_ &&
dynamic_shape_params_.support_dynamic_ && in_dims.nbDims == DIMENSION_4D) {
// hw is static, but has dynamic batch size
float scales[DIMENSION_4D]{1, 1, 1, 1};
scales[kNCHW_H] = static_cast<float>(resize_op_->new_height()) / static_cast<float>(in_dims.d[kNCHW_H]);
scales[kNCHW_W] = static_cast<float>(resize_op_->new_width()) / static_cast<float>(in_dims.d[kNCHW_W]);
resize_layer->setScales(scales, DIMENSION_4D);
} else {
auto shape_value_tensor = in_tensors_[1];
if (shape_value_tensor.Data() == nullptr && tensorrt_in_tensors_.size() >= INPUT_SIZE2) {
// dynamic output shape
resize_layer->setInput(1, *tensorrt_in_tensors_[1].trt_tensor_);
} else {
std::vector<float> out_shape;
ParseValueFromShapeTensor(shape_value_tensor, &out_shape);
if (SameDims(out_shape, out_tensors_[0].Shape())) {
// static dims
if (out_shape.size() == DIMENSION_4D) {
// convert nhwc to nchw
auto channel = out_shape[out_shape.size() - 1];
out_shape.insert(out_shape.begin() + 1, channel);
out_shape.erase(out_shape.begin() + out_shape.size() - 1);
}
resize_layer->setOutputDimensions(ConvertCudaDims(out_shape));
} else if (IsScaleOutputDim(in_tensors_[0].Shape(), out_tensors_[0].Shape(), out_shape)) {
// scale dims
float scales[DIMENSION_4D]{1, 1, 1, 1};
scales[kNCHW_H] =
static_cast<float>(out_tensors_[0].Shape()[kNHWC_H]) / static_cast<float>(in_tensors_[0].Shape()[kNHWC_H]);
scales[kNCHW_W] =
static_cast<float>(out_tensors_[0].Shape()[kNHWC_W]) / static_cast<float>(in_tensors_[0].Shape()[kNHWC_W]);
resize_layer->setScales(scales, DIMENSION_4D);
} else if (out_tensors_[0].Shape().size() == DIMENSION_4D) {
MS_LOG(DEBUG) << op_name_ << " output shape tensor value is const, but set to scales for dynamic input shape.";
float scales[out_tensors_[0].Shape().size()];
for (size_t i = 0; i < out_tensors_[0].Shape().size(); i++) {
scales[i] = static_cast<float>(out_tensors_[0].Shape()[i]) / static_cast<float>(in_tensors_[0].Shape()[i]);
}
// change to nchw
scales[kNCHW_W] = scales[kNHWC_W];
scales[kNCHW_H] = scales[kNHWC_H];
scales[kNCHW_C] = 1;
MS_LOG(DEBUG) << op_name_ << "scale at H " << kNCHW_H << ": " << scales[kNCHW_H] << ", W " << kNCHW_W << ": "
<< scales[kNCHW_W];
resize_layer->setScales(scales, out_tensors_[0].Shape().size());
} else {
MS_LOG(ERROR) << "resize dims needs check for " << op_name_;
return RET_ERROR;
}
}
}
return RET_OK;
}
void ResizeTensorRT::ParseValueFromShapeTensor(const mindspore::MSTensor &shape_value_tensor,
std::vector<float> *out_shape) {
switch (shape_value_tensor.DataType()) {
case DataType::kNumberTypeFloat32: {
const float *shape_data_fp32 = static_cast<const float *>(shape_value_tensor.Data().get());
for (int i = 0; i < shape_value_tensor.ElementNum(); i++) {
out_shape->push_back(*(shape_data_fp32 + i));
}
break;
}
case DataType::kNumberTypeFloat16: {
const uint16_t *shape_data_fp16 = static_cast<const uint16_t *>(shape_value_tensor.Data().get());
for (int i = 0; i < shape_value_tensor.ElementNum(); i++) {
out_shape->push_back(ShortToFloat32(*(shape_data_fp16 + i)));
}
break;
}
case DataType::kNumberTypeInt32: {
const int *shape_data_fp16 = static_cast<const int *>(shape_value_tensor.Data().get());
for (int i = 0; i < shape_value_tensor.ElementNum(); i++) {
out_shape->push_back(*(shape_data_fp16 + i));
}
break;
}
default:
MS_LOG(WARNING) << op_name_
<< " more datatype need to check: " << static_cast<int>(shape_value_tensor.DataType());
break;
}
if (out_shape->size() == DIMENSION_2D &&
tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D) {
// out_shape: origin_n, out_shape[0], out_shape[1], origin_c
out_shape->insert(out_shape->begin(),
tensorrt_in_tensors_[0].trt_tensor_->getDimensions().d[0]); // batch size is dynamic
out_shape->push_back(in_tensors_[0].Shape()[kNHWC_C]); // channel is const
}
}
bool ResizeTensorRT::IsScaleOutputDim(const std::vector<int64_t> &in_shape, const std::vector<int64_t> &out_shape,
const std::vector<float> &shape_tensor_val) {
if (out_shape.size() != DIMENSION_4D) {
MS_LOG(WARNING) << "dims count needs check for " << op_name_;
return false;
}
if (in_shape.size() != out_shape.size() || shape_tensor_val.size() != in_shape.size()) {
MS_LOG(WARNING) << "tensor shape is not same for " << op_name_;
return false;
}
for (size_t i = 0; i < in_shape.size(); i++) {
if (std::abs(in_shape[i] * shape_tensor_val[i] - out_shape[i]) > 1e-6) {
return false;
}
}
return true;
}
int ResizeTensorRT::SetParams(nvinfer1::IResizeLayer *resize_layer) {
auto method = resize_op_->method();
std::map<schema::ResizeMethod, nvinfer1::ResizeMode> method_map = {
{schema::ResizeMethod_LINEAR, nvinfer1::ResizeMode::kLINEAR},
{schema::ResizeMethod_NEAREST, nvinfer1::ResizeMode::kNEAREST}};
if (method_map.find(method) == method_map.end()) {
MS_LOG(ERROR) << op_name_ << " unsupported resize mode " << EnumNameResizeMethod(method);
return RET_ERROR;
}
resize_layer->setResizeMode(method_map.at(method));
// unsupported for trt6, but support setCoordinateTransformation() in version8
auto coordinate_transform_mode = resize_op_->coordinate_transform_mode();
if (coordinate_transform_mode != schema::CoordinateTransformMode_ASYMMETRIC) {
MS_LOG(WARNING) << op_name_ << " has coordinate_transform_mode may not supported: "
<< EnumNameCoordinateTransformMode(coordinate_transform_mode);
}
return RET_OK;
}
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_Resize, ResizeTensorRT)
} // namespace mindspore::lite

View File

@ -0,0 +1,52 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_RESIZE_TENSORRT_H_
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_RESIZE_TENSORRT_H_
#include <string>
#include <vector>
#include <map>
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
namespace mindspore::lite {
class ResizeTensorRT : public TensorRTOp {
public:
ResizeTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
const schema::QuantType &quant_type)
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
~ResizeTensorRT() override = default;
int AddInnerOp(TensorRTContext *ctx) override;
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) override;
private:
int SetOutputDims(nvinfer1::ITensor *resize_in_tensor, nvinfer1::IResizeLayer *resize_layer);
void ParseValueFromShapeTensor(const mindspore::MSTensor &shape_value_tensor, std::vector<float> *out_shape);
bool IsScaleOutputDim(const std::vector<int64_t> &in_shape, const std::vector<int64_t> &out_shape,
const std::vector<float> &shape_tensor_val);
int SetParams(nvinfer1::IResizeLayer *resize_layer);
const schema::Resize *resize_op_{nullptr};
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_RESIZE_TENSORRT_H_

View File

@ -0,0 +1,227 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <numeric>
#include <functional>
#include "src/runtime/delegate/tensorrt/op/scale_tensorrt.h"
#include "src/runtime/delegate/tensorrt/op/activation_tensorrt.h"
#include "src/runtime/delegate/tensorrt/tensorrt_utils.h"
namespace mindspore::lite {
constexpr int SCALE_INDEX = 1;
constexpr int SHIFT_INDEX = 2;
constexpr int POWER_INDEX = 3;
int ScaleTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) {
if (!IsShapeKnown()) {
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
return RET_ERROR;
}
if (in_tensors.size() != INPUT_SIZE2 && in_tensors.size() != INPUT_SIZE3 && in_tensors.size() != INPUT_SIZE4) {
MS_LOG(ERROR) << "Unsupported input tensor size, size is: " << in_tensors.size();
return RET_ERROR;
}
if (out_tensors.size() != 1) {
MS_LOG(ERROR) << "Unsupported output tensor size, size is: " << out_tensors.size();
return RET_ERROR;
}
return RET_OK;
}
int ScaleTensorRT::AddInnerOp(TensorRTContext *ctx) {
CHECK_NULL_RETURN(ctx);
auto scale_op = op_primitive_->value_as_ScaleFusion();
CHECK_NULL_RETURN(scale_op);
schema::ActivationType activation_type = scale_op->activation_type();
// mode of scale
axis_ = scale_op->axis();
axis_ = axis_ < 0 ? static_cast<int64_t>(in_tensors_[0].Shape().size() + axis_) : axis_;
out_format_ = tensorrt_in_tensors_[0].format_;
out_same_format_ = tensorrt_in_tensors_[0].same_format_;
mode_ = GetScaleMode(axis_);
MS_LOG(DEBUG) << "before transpose " << GetTensorFormat(tensorrt_in_tensors_[0]);
nvinfer1::ITensor *scale_in_tensor = PreProcessInputTensor(ctx);
if (scale_in_tensor == nullptr) {
MS_LOG(ERROR) << "PreProcessInputTensor failed: " << op_name_;
return RET_ERROR;
}
MS_LOG(DEBUG) << "after transpose " << GetTensorFormat(scale_in_tensor, out_format_, out_same_format_);
nvinfer1::ITensor *op_out_tensor{nullptr};
if (scale_in_tensor->getDimensions().nbDims == DIMENSION_4D) {
op_out_tensor = RunAs4DimsScale(ctx, scale_in_tensor);
} else {
op_out_tensor = RunAsMutiDimsScale(ctx, scale_in_tensor);
}
CHECK_NULL_RETURN(op_out_tensor);
// add activation
if (activation_type != schema::ActivationType::ActivationType_NO_ACTIVATION) {
auto activation_layer = ActivationTensorRT::AddActivation(ctx, activation_type, 0, 0, 0, op_out_tensor, device_id_);
CHECK_NULL_RETURN(activation_layer);
activation_layer->setName((op_name_ + "_activation").c_str());
op_out_tensor = activation_layer->getOutput(0);
}
op_out_tensor->setName((op_name_ + "_output").c_str());
this->AddInnerOutTensors(ITensorHelper{op_out_tensor, out_format_, out_same_format_});
MS_LOG(DEBUG) << "output " << GetTensorFormat(tensorrt_out_tensors_[0]);
return RET_OK;
}
nvinfer1::ITensor *ScaleTensorRT::PreProcessInputTensor(TensorRTContext *ctx) {
nvinfer1::ITensor *scale_in_tensor = tensorrt_in_tensors_[0].trt_tensor_;
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D &&
mode_ == nvinfer1::ScaleMode::kCHANNEL) {
// per channel input format should be nchw, otherwise should be same with scale nhwc
// transpose: NHWC->NCHW
if ((tensorrt_in_tensors_[0].format_ == Format::NHWC && axis_ == kNHWC_C) ||
(tensorrt_in_tensors_[0].same_format_ == true && axis_ == kNHWC_C)) {
nvinfer1::IShuffleLayer *transpose_layer_in = NHWC2NCHW(ctx, *tensorrt_in_tensors_[0].trt_tensor_);
if (transpose_layer_in == nullptr) {
MS_LOG(ERROR) << "op action convert failed";
return nullptr;
}
transpose_layer_in->setName((op_name_ + "_transpose2NCHW").c_str());
scale_in_tensor = transpose_layer_in->getOutput(0);
out_format_ = Format::NCHW;
out_same_format_ = !out_same_format_;
} else if (out_format_ != Format::NCHW && axis_ != kNCHW_C) {
MS_LOG(WARNING) << op_name_ << " out format (NHWC:1, NCHW:0) infer as " << out_format_ << ", and axis is "
<< axis_;
}
} else if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D &&
tensorrt_in_tensors_[0].format_ == Format::NCHW && mode_ == nvinfer1::ScaleMode::kELEMENTWISE) {
// transpose: NCHW->NHWC
nvinfer1::IShuffleLayer *transpose_layer_in = NCHW2NHWC(ctx, *tensorrt_in_tensors_[0].trt_tensor_);
if (transpose_layer_in == nullptr) {
MS_LOG(ERROR) << "op action convert failed";
return nullptr;
}
transpose_layer_in->setName((op_name_ + "_transpose2NHWC").c_str());
scale_in_tensor = transpose_layer_in->getOutput(0);
out_format_ = Format::NHWC;
out_same_format_ = true;
}
return scale_in_tensor;
}
nvinfer1::ScaleMode ScaleTensorRT::GetScaleMode(int64_t axis) {
nvinfer1::ScaleMode mode = nvinfer1::ScaleMode::kUNIFORM;
auto input_data_shape = in_tensors_[0].Shape();
auto input_weight_shape = in_tensors_[1].Shape();
int total = std::accumulate(input_data_shape.begin(), input_data_shape.end(), 1, std::multiplies<int>());
if (input_weight_shape.size() == 0 || (input_weight_shape.size() == 1 && input_weight_shape[0] == 1)) {
mode = nvinfer1::ScaleMode::kUNIFORM;
} else if ((axis < static_cast<int64_t>(input_data_shape.size()) && input_weight_shape.size() == 1 &&
input_data_shape[axis] == input_weight_shape[0]) ||
(input_data_shape.size() == DIMENSION_4D && axis == DIMENSION_3D)) {
mode = nvinfer1::ScaleMode::kCHANNEL;
} else if (input_weight_shape.size() == 1 && input_weight_shape[0] == total) {
mode = nvinfer1::ScaleMode::kELEMENTWISE;
} else {
MS_LOG(ERROR) << "ScaleMode create failed: " << op_name_;
return mode;
}
MS_LOG(DEBUG) << op_name_ << " ScaleMode(UNIFORM 0, CHANNEL 1, ELEMENTWISE 2): " << static_cast<int>(mode);
return mode;
}
nvinfer1::ITensor *ScaleTensorRT::RunAs4DimsScale(TensorRTContext *ctx, nvinfer1::ITensor *scale_in_tensor) {
bool nd = false;
// (input * scale + shift) ^ power
nvinfer1::Weights power{nvinfer1::DataType::kFLOAT, nullptr, 0};
nvinfer1::Weights shift{nvinfer1::DataType::kFLOAT, nullptr, 0};
nvinfer1::Weights scale{nvinfer1::DataType::kFLOAT, nullptr, 0};
if (in_tensors_.size() > SCALE_INDEX) {
scale.values = in_tensors_[SCALE_INDEX].MutableData();
MS_ASSERT(scale.values);
scale.count = in_tensors_[SCALE_INDEX].ElementNum();
scale.type = ConvertDataType(in_tensors_[SCALE_INDEX].DataType());
shift.type = scale.type;
power.type = scale.type;
nd = in_tensors_[1].Shape().size() == 1 ? false : true;
}
if (in_tensors_.size() > SHIFT_INDEX) {
shift.values = in_tensors_[SHIFT_INDEX].MutableData();
MS_ASSERT(shift.values);
shift.count = in_tensors_[SHIFT_INDEX].ElementNum();
}
if (in_tensors_.size() > POWER_INDEX) {
power.values = in_tensors_[POWER_INDEX].MutableData();
MS_ASSERT(power.values);
power.count = in_tensors_[POWER_INDEX].ElementNum();
}
nvinfer1::IScaleLayer *cal_layer = nullptr;
if (nd) {
MS_LOG(WARNING) << "multi dims ScaleMode enter";
cal_layer = ctx->network()->addScaleNd(*scale_in_tensor, mode_, shift, scale, power, axis_);
} else {
cal_layer = ctx->network()->addScale(*scale_in_tensor, mode_, shift, scale, power);
}
if (cal_layer == nullptr) {
MS_LOG(ERROR) << "addScaleNd failed for: " << op_name_;
return nullptr;
}
cal_layer->setName(op_name_.c_str());
this->layer_ = cal_layer;
return cal_layer->getOutput(0);
}
nvinfer1::ITensor *ScaleTensorRT::RunAsMutiDimsScale(TensorRTContext *ctx, nvinfer1::ITensor *scale_in_tensor) {
auto scale_tensor = ConvertConstantTensorWithDims(ctx, in_tensors_[1], in_tensors_[0].Shape(), op_name_);
if (scale_tensor == nullptr) {
MS_LOG(ERROR) << "ConvertConstantTensorWithDims failed for " << op_name_;
return nullptr;
}
auto mul_layer =
ctx->network()->addElementWise(*scale_in_tensor, *scale_tensor, nvinfer1::ElementWiseOperation::kPROD);
if (mul_layer == nullptr) {
MS_LOG(ERROR) << "add mul failed for " << op_name_;
return nullptr;
}
mul_layer->setName((op_name_ + "_scale").c_str());
layer_ = mul_layer;
nvinfer1::ITensor *out_tensor = mul_layer->getOutput(0);
// add shift
if (in_tensors_.size() >= INPUT_SIZE3) {
auto shift_tensor = ConvertConstantTensorWithDims(ctx, in_tensors_[SHIFT_INDEX], in_tensors_[0].Shape(), op_name_);
if (shift_tensor == nullptr) {
MS_LOG(ERROR) << "ConvertConstantTensorWithDims failed for " << op_name_;
return nullptr;
}
auto shift_layer = ctx->network()->addElementWise(*out_tensor, *shift_tensor, nvinfer1::ElementWiseOperation::kSUM);
if (shift_layer == nullptr) {
MS_LOG(ERROR) << "add bias failed for " << op_name_;
return nullptr;
}
shift_layer->setName((op_name_ + "_shift").c_str());
out_tensor = shift_layer->getOutput(0);
}
if (in_tensors_.size() == INPUT_SIZE4) {
MS_LOG(WARNING) << op_name_ << " has power";
return nullptr;
}
return out_tensor;
}
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_ScaleFusion, ScaleTensorRT)
} // namespace mindspore::lite

View File

@ -0,0 +1,57 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_SCALE_TENSORRT_H_
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_SCALE_TENSORRT_H_
#include <string>
#include <vector>
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
namespace mindspore::lite {
class ScaleTensorRT : public TensorRTOp {
public:
ScaleTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
const schema::QuantType &quant_type)
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
~ScaleTensorRT() override = default;
int AddInnerOp(TensorRTContext *ctx) override;
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) override;
private:
nvinfer1::ScaleMode GetScaleMode(int64_t axis);
nvinfer1::ITensor *PreProcessInputTensor(TensorRTContext *ctx);
nvinfer1::ITensor *RunAs4DimsScale(TensorRTContext *ctx, nvinfer1::ITensor *scale_in_tensor);
nvinfer1::ITensor *RunAsMutiDimsScale(TensorRTContext *ctx, nvinfer1::ITensor *scale_in_tensor);
Format out_format_;
bool out_same_format_{false};
nvinfer1::ScaleMode mode_;
int64_t axis_{0};
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_SCALE_TENSORRT_H_

View File

@ -0,0 +1,99 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <numeric>
#include "src/runtime/delegate/tensorrt/op/scatternd_tensorrt.h"
#include "src/runtime/delegate/tensorrt/tensorrt_utils.h"
namespace mindspore::lite {
int ScatterNdTensorRT::IsSupport(const mindspore::schema::Primitive *primitive,
const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) {
#if TRT_VERSION_GE(8, 2)
if (!IsShapeKnown()) {
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
return RET_ERROR;
}
if (in_tensors.size() != INPUT_SIZE3) {
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size() << " : " << op_name_;
return RET_ERROR;
}
if (out_tensors.size() != 1) {
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size() << " : " << op_name_;
return RET_ERROR;
}
return RET_OK;
#else
MS_LOG(WARNING) << "low TensorRT version don't support Scatter op, please upgrade TensorRT version to 8.2 or higher";
return RET_ERROR;
#endif
}
int ScatterNdTensorRT::AddInnerOp(TensorRTContext *ctx) {
#if TRT_VERSION_GE(8, 2)
ITensorHelper scatter_input;
int ret = PreprocessInputs2SameDim(ctx, tensorrt_in_tensors_[0], &scatter_input);
if (ret != RET_OK || scatter_input.trt_tensor_ == nullptr) {
MS_LOG(ERROR) << "PreprocessInputs2SameDim input tensor failed for " << op_name_;
return ret;
}
if (tensorrt_in_tensors_.size() < INPUT_SIZE3) {
auto indices = ConvertConstantTensor(ctx, in_tensors_[1], op_name_ + "_indice");
if (indices == nullptr) {
MS_LOG(ERROR) << "add const input tensor failed for " << op_name_;
return RET_ERROR;
}
tensorrt_in_tensors_.push_back(ITensorHelper{indices});
auto updates = ConvertConstantTensor(ctx, in_tensors_[INPUT_SIZE2], op_name_ + "_update");
if (updates == nullptr) {
MS_LOG(ERROR) << "add const input tensor failed for " << op_name_;
return RET_ERROR;
}
tensorrt_in_tensors_.push_back(ITensorHelper{updates});
}
ITensorHelper indices_helper;
ret = PreprocessInputs2SameDim(ctx, tensorrt_in_tensors_[1], &indices_helper);
if (ret != RET_OK || indices_helper.trt_tensor_ == nullptr) {
MS_LOG(ERROR) << "PreprocessInputs2SameDim indices tensor failed for " << op_name_;
return ret;
}
ITensorHelper updates_helper;
ret = PreprocessInputs2SameDim(ctx, tensorrt_in_tensors_[INPUT_SIZE2], &updates_helper);
if (ret != RET_OK || updates_helper.trt_tensor_ == nullptr) {
MS_LOG(ERROR) << "PreprocessInputs2SameDim update tensor failed for " << op_name_;
return ret;
}
nvinfer1::IScatterLayer *scatter_layer = ctx->network()->addScatter(
*scatter_input.trt_tensor_, *indices_helper.trt_tensor_, *updates_helper.trt_tensor_, nvinfer1::ScatterMode::kND);
if (scatter_layer == nullptr) {
MS_LOG(ERROR) << "addScatter failed for TensorRT.";
return RET_ERROR;
}
nvinfer1::ITensor *out_tensor = scatter_layer->getOutput(0);
out_tensor->setName((op_name_ + "_0").c_str());
this->AddInnerOutTensors(ITensorHelper{out_tensor, scatter_input.format_, scatter_input.same_format_});
this->layer_ = scatter_layer;
return RET_OK;
#else
MS_LOG(WARNING) << "low TensorRT version don't support Scatter op, please upgrade TensorRT version to 8.2 or higher";
return RET_ERROR;
#endif
}
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_ScatterNdUpdate, ScatterNdTensorRT)
} // namespace mindspore::lite

View File

@ -0,0 +1,39 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_OP_SCATTERND_TENSORRT_H_
#define MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_OP_SCATTERND_TENSORRT_H_
#include <string>
#include <vector>
#include <algorithm>
#include "src/runtime/delegate/tensorrt/op/tensorrt_op.h"
namespace mindspore::lite {
class ScatterNdTensorRT : public TensorRTOp {
public:
ScatterNdTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name,
const schema::QuantType &quant_type)
: TensorRTOp(primitive, in_tensors, out_tensors, name, quant_type) {}
~ScatterNdTensorRT() override = default;
int AddInnerOp(TensorRTContext *ctx) override;
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) override;
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_OP_SCATTERND_TENSORRT_H_

View File

@ -0,0 +1,69 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/delegate/tensorrt/op/shape_tensorrt.h"
namespace mindspore::lite {
int ShapeTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) {
if (!IsShapeKnown()) {
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
return RET_ERROR;
}
if (in_tensors.size() != 1) {
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
return RET_ERROR;
}
if (out_tensors.size() != 1) {
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
return RET_ERROR;
}
dynamic_shape_params_.support_dynamic_ = false;
dynamic_shape_params_.support_hw_dynamic_ = false;
return RET_OK;
}
int ShapeTensorRT::AddInnerOp(TensorRTContext *ctx) {
if (ctx == nullptr || ctx->network() == nullptr) {
MS_LOG(ERROR) << "context or network is invalid";
return RET_ERROR;
}
nvinfer1::ITensor *shape_input = tensorrt_in_tensors_[0].trt_tensor_;
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D &&
tensorrt_in_tensors_[0].format_ == Format::NCHW) {
// transpose: NCHW->NHWC
nvinfer1::IShuffleLayer *transpose_layer_in = NCHW2NHWC(ctx, *tensorrt_in_tensors_[0].trt_tensor_);
if (transpose_layer_in == nullptr) {
MS_LOG(ERROR) << "transpose: NCHW->NHWC failed for " << op_name_;
return RET_ERROR;
}
transpose_layer_in->setName((op_name_ + "_transpose2NHWC").c_str());
shape_input = transpose_layer_in->getOutput(0);
this->transpose_layer_ = transpose_layer_in;
}
nvinfer1::IShapeLayer *shape_layer = ctx->network()->addShape(*shape_input);
if (shape_layer == nullptr) {
MS_LOG(ERROR) << "add shape op failed for TensorRT.";
return RET_ERROR;
}
shape_layer->setName(op_name_.c_str());
shape_layer->getOutput(0)->setName((op_name_ + "_output").c_str());
this->AddInnerOutTensors(ITensorHelper{shape_layer->getOutput(0), Format::NHWC, true});
this->layer_ = shape_layer;
return RET_OK;
}
REGISTER_TENSORRT_CREATOR(schema::PrimitiveType_Shape, ShapeTensorRT)
} // namespace mindspore::lite

Some files were not shown because too many files have changed in this diff Show More