!29609 update tensorarray stack for static size

Merge pull request !29609 from VectorSL/static_tensorarray_stack
This commit is contained in:
i-robot 2022-02-14 09:19:20 +00:00 committed by Gitee
commit 705134e01a
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
13 changed files with 145 additions and 62 deletions

View File

@ -25,15 +25,18 @@ namespace kernel {
using mindspore::device::TensorArrayMgr;
using mindspore::device::TensorArrayPtr;
TensorArrayStackCpuKernelMod::TensorArrayStackCpuKernelMod()
: handle_(0), value_size_(0), ele_size_(0), type_(nullptr) {
: handle_(0), value_size_(0), ele_size_(0), type_(nullptr), is_dynamic_(true) {
ResetResource();
}
void TensorArrayStackCpuKernelMod::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_node_ = kernel_node;
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
auto shape = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, "element_shape");
auto max_element = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "max_element");
is_dynamic_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "is_dynamic_shape");
auto size = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "size");
for (auto i : shape) {
shapes_.push_back(LongToSize(i));
}
@ -42,7 +45,11 @@ void TensorArrayStackCpuKernelMod::InitKernel(const CNodePtr &kernel_node) {
for (auto i : shapes_) {
ele_size_ *= i;
}
value_size_ = ele_size_ * LongToSize(max_element);
if (is_dynamic_) {
value_size_ = ele_size_ * LongToSize(max_element);
} else {
value_size_ = ele_size_ * LongToSize(size);
}
output_size_list_.push_back(value_size_);
input_size_list_.push_back(sizeof(int64_t));
}
@ -61,6 +68,7 @@ void TensorArrayStackCpuKernelMod::ResetResource() noexcept {
handle_ = 0;
value_size_ = 0;
ele_size_ = 0;
is_dynamic_ = true;
shapes_.clear();
input_size_list_.clear();
output_size_list_.clear();
@ -73,6 +81,14 @@ bool TensorArrayStackCpuKernelMod::Launch(const std::vector<AddressPtr> &inputs,
auto out_value = GetDeviceAddress<unsigned char>(outputs, 0);
MS_EXCEPTION_IF_NULL(out_value);
MS_EXCEPTION_IF_NULL(handle_addr);
// Set out_value to zeros when TensorArray in static size.
if (!is_dynamic_) {
auto ret = memset_s(out_value, outputs[0]->size, 0, value_size_);
if (ret != EOK) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', memset failed, errorno(" << ret << ")";
}
}
handle_ = handle_addr[0];
TensorArrayPtr tensors_ = TensorArrayMgr::GetInstance().GetTensorArray(handle_);
MS_EXCEPTION_IF_NULL(tensors_);
@ -85,10 +101,12 @@ bool TensorArrayStackCpuKernelMod::Launch(const std::vector<AddressPtr> &inputs,
MS_EXCEPTION_IF_NULL(src_addr);
auto ret = memcpy_s(out_value + ele_size_ * i, out_ele_size, src_addr, ele_size_);
if (ret != EOK) {
MS_LOG(EXCEPTION) << "Memcpy failed, errorno(" << ret << ")";
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', memcpy failed, errorno(" << ret << ")";
}
}
PostExecute();
if (is_dynamic_) {
PostExecute();
}
return true;
}
} // namespace kernel

View File

@ -45,6 +45,7 @@ class TensorArrayStackCpuKernelMod : public NativeCpuKernelMod {
size_t ele_size_;
std::vector<size_t> shapes_;
TypePtr type_;
bool is_dynamic_;
};
MS_REG_CPU_KERNEL(TensorArrayStack, KernelAttr().AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),

View File

@ -42,9 +42,7 @@ bool TensorArrayClearKernelMod::Launch(const std::vector<AddressPtr> &inputs, co
auto handle_addr = GetDeviceAddress<int64_t>(inputs, 0);
MS_ERROR_IF_NULL(handle_addr);
int64_t handle = 0;
CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_,
cudaMemcpyAsync(&handle, handle_addr, sizeof(int64_t), cudaMemcpyDeviceToHost,
reinterpret_cast<cudaStream_t>(stream)),
CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaMemcpy(&handle, handle_addr, sizeof(int64_t), cudaMemcpyDeviceToHost),
"Get handle to host failed");
TensorArrayPtr tensors_ = TensorArrayMgr::GetInstance().GetTensorArray(handle);
MS_ERROR_IF_NULL(tensors_);

View File

@ -42,9 +42,7 @@ bool TensorArrayCloseKernelMod::Launch(const std::vector<AddressPtr> &inputs, co
auto handle_addr = GetDeviceAddress<int64_t>(inputs, 0);
MS_ERROR_IF_NULL(handle_addr);
int64_t handle = 0;
CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_,
cudaMemcpyAsync(&handle, handle_addr, sizeof(int64_t), cudaMemcpyDeviceToHost,
reinterpret_cast<cudaStream_t>(stream)),
CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaMemcpy(&handle, handle_addr, sizeof(int64_t), cudaMemcpyDeviceToHost),
"Get handle to host failed");
GPUTensorArrayPtr tensors_ =
std::dynamic_pointer_cast<GPUTensorArray>(TensorArrayMgr::GetInstance().GetTensorArray(handle));

View File

@ -57,9 +57,7 @@ bool TensorArrayReadKernelMod::Launch(const std::vector<AddressPtr> &inputs, con
reinterpret_cast<cudaStream_t>(stream)),
"Get index to host failed");
int64_t handle = 0;
CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_,
cudaMemcpyAsync(&handle, handle_addr, sizeof(int64_t), cudaMemcpyDeviceToHost,
reinterpret_cast<cudaStream_t>(stream)),
CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaMemcpy(&handle, handle_addr, sizeof(int64_t), cudaMemcpyDeviceToHost),
"Get handle to host failed");
TensorArrayPtr tensors_ = TensorArrayMgr::GetInstance().GetTensorArray(handle);
MS_ERROR_IF_NULL(tensors_);

View File

@ -27,7 +27,7 @@ namespace kernel {
using mindspore::device::TensorArrayMgr;
using mindspore::device::TensorArrayPtr;
TensorArrayStackKernelMod::TensorArrayStackKernelMod()
: handle_(0), value_size_(0), ele_size_(0), stream_ptr_(nullptr), type_(nullptr) {
: handle_(0), value_size_(0), ele_size_(0), stream_ptr_(nullptr), type_(nullptr), is_dynamic_(true) {
ResetResource();
}
@ -36,6 +36,8 @@ bool TensorArrayStackKernelMod::Init(const CNodePtr &kernel_node) {
kernel_node_ = kernel_node;
auto shape = GetAttr<std::vector<int64_t>>(kernel_node, "element_shape");
auto max_element = GetAttr<int64_t>(kernel_node, "max_element");
is_dynamic_ = GetAttr<bool>(kernel_node, "is_dynamic_shape");
auto size = GetAttr<int64_t>(kernel_node, "size");
for (auto i : shape) {
shapes_.push_back(LongToSize(i));
}
@ -44,7 +46,14 @@ bool TensorArrayStackKernelMod::Init(const CNodePtr &kernel_node) {
for (auto i : shapes_) {
ele_size_ *= i;
}
value_size_ = ele_size_ * LongToSize(max_element);
if (is_dynamic_) {
value_size_ = ele_size_ * LongToSize(max_element);
} else {
if (size <= 0) {
MS_LOG(EXCEPTION) << "Size should larger than 0 when is_dynamic_shape = false, but get " << size;
}
value_size_ = ele_size_ * LongToSize(size);
}
InitSizeLists();
return true;
}
@ -84,9 +93,14 @@ bool TensorArrayStackKernelMod::Launch(const std::vector<AddressPtr> &inputs, co
auto out_value = GetDeviceAddress<unsigned char>(outputs, 0);
MS_ERROR_IF_NULL(out_value);
MS_ERROR_IF_NULL(handle_addr);
CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_,
cudaMemcpyAsync(&handle_, handle_addr, sizeof(int64_t), cudaMemcpyDeviceToHost,
reinterpret_cast<cudaStream_t>(stream_ptr)),
// Set out_value to zeros when TensorArray in static size.
if (!is_dynamic_) {
CHECK_CUDA_RET_WITH_EXCEPT(
kernel_node_, cudaMemsetAsync(out_value, 0, outputs[0]->size, reinterpret_cast<cudaStream_t>(stream_ptr)),
"Cudamemset output value failed");
}
CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaMemcpy(&handle_, handle_addr, sizeof(int64_t), cudaMemcpyDeviceToHost),
"Get handle to host failed");
TensorArrayPtr tensors_ = TensorArrayMgr::GetInstance().GetTensorArray(handle_);
MS_ERROR_IF_NULL(tensors_);

View File

@ -46,6 +46,7 @@ class TensorArrayStackKernelMod : public NativeGpuKernelMod {
void *stream_ptr_;
std::vector<size_t> shapes_;
TypePtr type_;
bool is_dynamic_;
};
MS_REG_GPU_KERNEL(TensorArrayStack, TensorArrayStackKernelMod)

View File

@ -60,9 +60,7 @@ bool TensorArrayWriteKernelMod::Launch(const std::vector<AddressPtr> &inputs, co
reinterpret_cast<cudaStream_t>(stream)),
"Get indexd failed");
int64_t handle = 0;
CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_,
cudaMemcpyAsync(&handle, handle_addr, sizeof(int64_t), cudaMemcpyDeviceToHost,
reinterpret_cast<cudaStream_t>(stream)),
CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaMemcpy(&handle, handle_addr, sizeof(int64_t), cudaMemcpyDeviceToHost),
"Get handle to host failed");
GPUTensorArrayPtr tensors_ =
std::dynamic_pointer_cast<GPUTensorArray>(TensorArrayMgr::GetInstance().GetTensorArray(handle));

View File

@ -42,19 +42,37 @@ AbstractBasePtr InferImplTensorArrayStack(const AnalysisEnginePtr &, const Primi
if (attr_dtype == nullptr) {
MS_LOG(EXCEPTION) << "No attribute [dtype] in " << op_name;
}
auto attr_is_dynamic = primitive->GetAttr("is_dynamic_shape");
if (attr_is_dynamic == nullptr) {
MS_LOG(EXCEPTION) << "No attribute [is_dynamic_shape] in " << op_name;
}
auto attr_size = primitive->GetAttr("size");
if (attr_size == nullptr) {
MS_LOG(EXCEPTION) << "No attribute [size] in " << op_name;
}
auto is_dynamic = GetValue<bool>(attr_is_dynamic);
auto size = GetValue<int64_t>(attr_size);
auto ele_shape = GetValue<std::vector<int64_t>>(attr_shape);
auto type = GetValue<TypePtr>(attr_dtype);
primitive->set_attr("max_element", MakeValue(kMaxElement));
auto max_shape_ = ele_shape;
auto min_shape_ = ele_shape;
auto out_shape_ = ele_shape;
(void)max_shape_.insert(max_shape_.begin(), kMaxElement);
(void)min_shape_.insert(min_shape_.begin(), 1);
(void)out_shape_.insert(out_shape_.begin(), -1);
ShapeVector out_shape = out_shape_;
ShapeVector min_shape = min_shape_;
ShapeVector max_shape = max_shape_;
auto output = std::make_shared<AbstractTensor>(type, std::make_shared<Shape>(out_shape, min_shape, max_shape));
std::shared_ptr<mindspore::abstract::AbstractTensor> output;
if (is_dynamic) {
auto max_shape_ = ele_shape;
auto min_shape_ = ele_shape;
auto out_shape_ = ele_shape;
(void)max_shape_.insert(max_shape_.begin(), kMaxElement);
(void)min_shape_.insert(min_shape_.begin(), 1);
(void)out_shape_.insert(out_shape_.begin(), -1);
ShapeVector out_shape = out_shape_;
ShapeVector min_shape = min_shape_;
ShapeVector max_shape = max_shape_;
output = std::make_shared<AbstractTensor>(type, std::make_shared<Shape>(out_shape, min_shape, max_shape));
} else {
auto out_shape_ = ele_shape;
(void)out_shape_.insert(out_shape_.begin(), size);
ShapeVector out_shape = out_shape_;
output = std::make_shared<AbstractTensor>(type, std::make_shared<Shape>(out_shape));
}
return output;
}
} // namespace abstract

View File

@ -66,7 +66,7 @@ class TensorArray(Cell):
self.tensor_array_read = ta.TensorArrayRead(dtype, element_shape)
self.tensor_array_close = ta.TensorArrayClose()
self.tensor_array_clear = ta.TensorArrayClear()
self.tensor_array_stack = ta.TensorArrayStack(dtype, element_shape)
self.tensor_array_stack = ta.TensorArrayStack(dtype, element_shape, dynamic_size, size)
self.tensor_array_size = ta.TensorArraySize()
def write(self, index, value):

View File

@ -250,12 +250,13 @@ class TensorArrayStack(Primitive):
[1 2]
"""
@prim_attr_register
def __init__(self, dtype, element_shape):
def __init__(self, dtype, element_shape, dynamic_size, size):
"""Initialize TensorArrayStack"""
self.init_prim_io_names(inputs=[''], outputs=['output'])
self.add_prim_attr('dtype', dtype)
self.add_prim_attr('element_shape', element_shape)
self.add_prim_attr('is_dynamic_shape', True)
self.add_prim_attr('is_dynamic_shape', dynamic_size)
self.add_prim_attr('size', size)
self.add_prim_attr('side_effect_mem', True)

View File

@ -21,9 +21,9 @@ import mindspore.nn as nn
from mindspore import Tensor
class TensorArrayNet(nn.Cell):
def __init__(self, dtype, element_shape):
def __init__(self, dtype, element_shape, is_dynamic_shape=True, size=0):
super(TensorArrayNet, self).__init__()
self.ta = nn.TensorArray(dtype, element_shape)
self.ta = nn.TensorArray(dtype, element_shape, is_dynamic_shape, size)
def construct(self, index, value):
self.ta.write(index, value)
@ -92,3 +92,22 @@ def test_tensorarray():
assert np.allclose(v.asnumpy(), expect_s[1])
assert np.allclose(s.asnumpy(), expect_s)
ta.close()
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_static_tensorarray():
"""
Feature: TensorArray cpu TEST.
Description: Test the static tensorarray.
Expectation: success.
"""
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
index = Tensor(0, mindspore.int64)
value = Tensor(5, mindspore.int64)
ta = TensorArrayNet(dtype=mindspore.int64, element_shape=(), is_dynamic_shape=False, size=12)
v, s = ta(index, value)
expect_v = 5
expect_s = [5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
assert np.allclose(s.asnumpy(), expect_s)
assert np.allclose(v.asnumpy(), expect_v)

View File

@ -21,9 +21,9 @@ import mindspore.nn as nn
from mindspore import Tensor
class TensorArrayNet(nn.Cell):
def __init__(self, dtype, element_shape):
def __init__(self, dtype, element_shape, is_dynamic_shape=True, size=0):
super(TensorArrayNet, self).__init__()
self.ta = nn.TensorArray(dtype, element_shape)
self.ta = nn.TensorArray(dtype, element_shape, is_dynamic_shape, size)
def construct(self, index, value):
for i in range(2):
@ -59,43 +59,62 @@ def test_tensorarray():
assert np.allclose(v.asnumpy(), expect_v)
context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU")
ta = nn.TensorArray(mindspore.int64, ())
tb = nn.TensorArray(mindspore.int64, ())
for i in range(5):
ta.write(i, 99)
v = ta.read(0)
s = ta.stack()
tb.write(i, 99)
v = tb.read(0)
s = tb.stack()
expect_v = 99
expect_s = [99, 99, 99, 99, 99]
assert np.allclose(s.asnumpy(), expect_s)
assert np.allclose(v.asnumpy(), expect_v)
ta_size = ta.size()
assert np.allclose(ta_size.asnumpy(), 5)
ta.clear()
ta_size = ta.size()
assert np.allclose(ta_size.asnumpy(), 0)
ta.write(0, 88)
v = ta.read(0)
s = ta.stack()
ta.close()
tb_size = tb.size()
assert np.allclose(tb_size.asnumpy(), 5)
tb.clear()
tb_size = tb.size()
assert np.allclose(tb_size.asnumpy(), 0)
tb.write(0, 88)
v = tb.read(0)
s = tb.stack()
tb.close()
expect_v = 88
expect_s = [88]
assert np.allclose(s.asnumpy(), expect_s)
assert np.allclose(v.asnumpy(), expect_v)
ta = nn.TensorArray(mindspore.float32, ())
ta.write(5, 1.)
s = ta.stack()
tc = nn.TensorArray(mindspore.float32, ())
tc.write(5, 1.)
s = tc.stack()
expect_s = [0., 0., 0., 0., 0., 1.]
assert np.allclose(s.asnumpy(), expect_s)
ta.write(2, 1.)
s = ta.stack()
tc.write(2, 1.)
s = tc.stack()
expect_s = [0., 0., 1., 0., 0., 1.]
assert np.allclose(s.asnumpy(), expect_s)
ta.close()
ta = nn.TensorArray(mindspore.bool_, ())
ta.write(1, Tensor(True, mindspore.bool_))
s = ta.stack()
v = ta.read(1)
tc.close()
td = nn.TensorArray(mindspore.bool_, ())
td.write(1, Tensor(True, mindspore.bool_))
s = td.stack()
v = td.read(1)
expect_s = [False, True]
assert np.allclose(v.asnumpy(), expect_s[1])
assert np.allclose(s.asnumpy(), expect_s)
ta.close()
td.close()
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_static_tensorarray():
"""
Feature: TensorArray gpu TEST.
Description: Test the static tensorarray.
Expectation: success.
"""
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
index = Tensor(0, mindspore.int64)
value = Tensor(5, mindspore.int64)
ta = TensorArrayNet(dtype=mindspore.int64, element_shape=(), is_dynamic_shape=False, size=12)
v, s = ta(index, value)
expect_v = 24
expect_s = [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 0, 0]
assert np.allclose(s.asnumpy(), expect_s)
assert np.allclose(v.asnumpy(), expect_v)