fix dropout and matrix_inverse op bug.

This commit is contained in:
linqingke 2021-06-08 16:05:40 +08:00
parent 77e562db4f
commit c68ba5ac6a
2 changed files with 20 additions and 11 deletions

View File

@ -39,17 +39,22 @@ class MatrixInverseGpuKernel : public GpuKernel {
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
T *input_addr = GetDeviceAddress<T>(inputs, 0);
T *output_addr = GetDeviceAddress<T>(outputs, 0);
auto lu_batch_addr = GetDeviceAddress<T *>(workspace, 0);
auto inv_batch_addr = GetDeviceAddress<T *>(workspace, 1);
auto pivo_addr = GetDeviceAddress<int>(workspace, 2);
auto info_addr = GetDeviceAddress<int>(workspace, 3);
auto compute_input_addr = GetDeviceAddress<T>(workspace, 0);
auto lu_batch_addr = GetDeviceAddress<T *>(workspace, 1);
auto inv_batch_addr = GetDeviceAddress<T *>(workspace, 2);
auto pivo_addr = GetDeviceAddress<int>(workspace, 3);
auto info_addr = GetDeviceAddress<int>(workspace, 4);
int len = SizeToInt(size_);
int batchsize = SizeToInt(batch_size_);
for (size_t i = 0; i < batch_size_; i++) {
lu_addr_[i] = input_addr + i * len * len;
lu_addr_[i] = compute_input_addr + i * len * len;
inv_addr_[i] = output_addr + i * len * len;
}
CHECK_CUDA_RET_WITH_ERROR(kernel_node_,
cudaMemcpyAsync(compute_input_addr, input_addr, input_size_, cudaMemcpyDeviceToDevice,
reinterpret_cast<cudaStream_t>(stream_ptr)),
"cuda memcopy Fail");
CHECK_CUDA_RET_WITH_ERROR(kernel_node_,
cudaMemcpyAsync(lu_batch_addr, lu_addr_.data(), sizeof(T *) * batch_size_,
cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)),
@ -114,16 +119,17 @@ class MatrixInverseGpuKernel : public GpuKernel {
protected:
void InitSizeLists() override {
input_size_list_.push_back(input_size_);
output_size_list_.push_back(input_size_);
input_size_list_.emplace_back(input_size_);
output_size_list_.emplace_back(input_size_);
workspace_size_list_.emplace_back(input_size_);
size_t lu_size = batch_size_ * sizeof(T *);
workspace_size_list_.push_back(lu_size);
workspace_size_list_.emplace_back(lu_size);
size_t inv_size = batch_size_ * sizeof(T *);
workspace_size_list_.push_back(inv_size);
workspace_size_list_.emplace_back(inv_size);
size_t pivo_size = batch_size_ * size_ * sizeof(int);
workspace_size_list_.push_back(pivo_size);
workspace_size_list_.emplace_back(pivo_size);
size_t info_size = batch_size_ * sizeof(int);
workspace_size_list_.push_back(info_size);
workspace_size_list_.emplace_back(info_size);
}
private:

View File

@ -87,6 +87,9 @@ class DropoutGpuFwdKernel : public GpuKernel {
int64_t seed = GetAttr<int64_t>(kernel_node, "Seed0");
if (seed == 0) {
seed = GetAttr<int64_t>(kernel_node, "Seed1");
if (seed == 0) {
seed = time(NULL);
}
}
seed_ = static_cast<uint64_t>(seed);