!29609 update tensorarray stack for static size
Merge pull request !29609 from VectorSL/static_tensorarray_stack
This commit is contained in:
commit
705134e01a
|
@ -25,15 +25,18 @@ namespace kernel {
|
|||
using mindspore::device::TensorArrayMgr;
|
||||
using mindspore::device::TensorArrayPtr;
|
||||
TensorArrayStackCpuKernelMod::TensorArrayStackCpuKernelMod()
|
||||
: handle_(0), value_size_(0), ele_size_(0), type_(nullptr) {
|
||||
: handle_(0), value_size_(0), ele_size_(0), type_(nullptr), is_dynamic_(true) {
|
||||
ResetResource();
|
||||
}
|
||||
|
||||
void TensorArrayStackCpuKernelMod::InitKernel(const CNodePtr &kernel_node) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
|
||||
auto shape = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, "element_shape");
|
||||
auto max_element = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "max_element");
|
||||
is_dynamic_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "is_dynamic_shape");
|
||||
auto size = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "size");
|
||||
for (auto i : shape) {
|
||||
shapes_.push_back(LongToSize(i));
|
||||
}
|
||||
|
@ -42,7 +45,11 @@ void TensorArrayStackCpuKernelMod::InitKernel(const CNodePtr &kernel_node) {
|
|||
for (auto i : shapes_) {
|
||||
ele_size_ *= i;
|
||||
}
|
||||
value_size_ = ele_size_ * LongToSize(max_element);
|
||||
if (is_dynamic_) {
|
||||
value_size_ = ele_size_ * LongToSize(max_element);
|
||||
} else {
|
||||
value_size_ = ele_size_ * LongToSize(size);
|
||||
}
|
||||
output_size_list_.push_back(value_size_);
|
||||
input_size_list_.push_back(sizeof(int64_t));
|
||||
}
|
||||
|
@ -61,6 +68,7 @@ void TensorArrayStackCpuKernelMod::ResetResource() noexcept {
|
|||
handle_ = 0;
|
||||
value_size_ = 0;
|
||||
ele_size_ = 0;
|
||||
is_dynamic_ = true;
|
||||
shapes_.clear();
|
||||
input_size_list_.clear();
|
||||
output_size_list_.clear();
|
||||
|
@ -73,6 +81,14 @@ bool TensorArrayStackCpuKernelMod::Launch(const std::vector<AddressPtr> &inputs,
|
|||
auto out_value = GetDeviceAddress<unsigned char>(outputs, 0);
|
||||
MS_EXCEPTION_IF_NULL(out_value);
|
||||
MS_EXCEPTION_IF_NULL(handle_addr);
|
||||
|
||||
// Set out_value to zeros when TensorArray in static size.
|
||||
if (!is_dynamic_) {
|
||||
auto ret = memset_s(out_value, outputs[0]->size, 0, value_size_);
|
||||
if (ret != EOK) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', memset failed, errorno(" << ret << ")";
|
||||
}
|
||||
}
|
||||
handle_ = handle_addr[0];
|
||||
TensorArrayPtr tensors_ = TensorArrayMgr::GetInstance().GetTensorArray(handle_);
|
||||
MS_EXCEPTION_IF_NULL(tensors_);
|
||||
|
@ -85,10 +101,12 @@ bool TensorArrayStackCpuKernelMod::Launch(const std::vector<AddressPtr> &inputs,
|
|||
MS_EXCEPTION_IF_NULL(src_addr);
|
||||
auto ret = memcpy_s(out_value + ele_size_ * i, out_ele_size, src_addr, ele_size_);
|
||||
if (ret != EOK) {
|
||||
MS_LOG(EXCEPTION) << "Memcpy failed, errorno(" << ret << ")";
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', memcpy failed, errorno(" << ret << ")";
|
||||
}
|
||||
}
|
||||
PostExecute();
|
||||
if (is_dynamic_) {
|
||||
PostExecute();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
} // namespace kernel
|
||||
|
|
|
@ -45,6 +45,7 @@ class TensorArrayStackCpuKernelMod : public NativeCpuKernelMod {
|
|||
size_t ele_size_;
|
||||
std::vector<size_t> shapes_;
|
||||
TypePtr type_;
|
||||
bool is_dynamic_;
|
||||
};
|
||||
|
||||
MS_REG_CPU_KERNEL(TensorArrayStack, KernelAttr().AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
|
||||
|
|
|
@ -42,9 +42,7 @@ bool TensorArrayClearKernelMod::Launch(const std::vector<AddressPtr> &inputs, co
|
|||
auto handle_addr = GetDeviceAddress<int64_t>(inputs, 0);
|
||||
MS_ERROR_IF_NULL(handle_addr);
|
||||
int64_t handle = 0;
|
||||
CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_,
|
||||
cudaMemcpyAsync(&handle, handle_addr, sizeof(int64_t), cudaMemcpyDeviceToHost,
|
||||
reinterpret_cast<cudaStream_t>(stream)),
|
||||
CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaMemcpy(&handle, handle_addr, sizeof(int64_t), cudaMemcpyDeviceToHost),
|
||||
"Get handle to host failed");
|
||||
TensorArrayPtr tensors_ = TensorArrayMgr::GetInstance().GetTensorArray(handle);
|
||||
MS_ERROR_IF_NULL(tensors_);
|
||||
|
|
|
@ -42,9 +42,7 @@ bool TensorArrayCloseKernelMod::Launch(const std::vector<AddressPtr> &inputs, co
|
|||
auto handle_addr = GetDeviceAddress<int64_t>(inputs, 0);
|
||||
MS_ERROR_IF_NULL(handle_addr);
|
||||
int64_t handle = 0;
|
||||
CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_,
|
||||
cudaMemcpyAsync(&handle, handle_addr, sizeof(int64_t), cudaMemcpyDeviceToHost,
|
||||
reinterpret_cast<cudaStream_t>(stream)),
|
||||
CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaMemcpy(&handle, handle_addr, sizeof(int64_t), cudaMemcpyDeviceToHost),
|
||||
"Get handle to host failed");
|
||||
GPUTensorArrayPtr tensors_ =
|
||||
std::dynamic_pointer_cast<GPUTensorArray>(TensorArrayMgr::GetInstance().GetTensorArray(handle));
|
||||
|
|
|
@ -57,9 +57,7 @@ bool TensorArrayReadKernelMod::Launch(const std::vector<AddressPtr> &inputs, con
|
|||
reinterpret_cast<cudaStream_t>(stream)),
|
||||
"Get index to host failed");
|
||||
int64_t handle = 0;
|
||||
CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_,
|
||||
cudaMemcpyAsync(&handle, handle_addr, sizeof(int64_t), cudaMemcpyDeviceToHost,
|
||||
reinterpret_cast<cudaStream_t>(stream)),
|
||||
CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaMemcpy(&handle, handle_addr, sizeof(int64_t), cudaMemcpyDeviceToHost),
|
||||
"Get handle to host failed");
|
||||
TensorArrayPtr tensors_ = TensorArrayMgr::GetInstance().GetTensorArray(handle);
|
||||
MS_ERROR_IF_NULL(tensors_);
|
||||
|
|
|
@ -27,7 +27,7 @@ namespace kernel {
|
|||
using mindspore::device::TensorArrayMgr;
|
||||
using mindspore::device::TensorArrayPtr;
|
||||
TensorArrayStackKernelMod::TensorArrayStackKernelMod()
|
||||
: handle_(0), value_size_(0), ele_size_(0), stream_ptr_(nullptr), type_(nullptr) {
|
||||
: handle_(0), value_size_(0), ele_size_(0), stream_ptr_(nullptr), type_(nullptr), is_dynamic_(true) {
|
||||
ResetResource();
|
||||
}
|
||||
|
||||
|
@ -36,6 +36,8 @@ bool TensorArrayStackKernelMod::Init(const CNodePtr &kernel_node) {
|
|||
kernel_node_ = kernel_node;
|
||||
auto shape = GetAttr<std::vector<int64_t>>(kernel_node, "element_shape");
|
||||
auto max_element = GetAttr<int64_t>(kernel_node, "max_element");
|
||||
is_dynamic_ = GetAttr<bool>(kernel_node, "is_dynamic_shape");
|
||||
auto size = GetAttr<int64_t>(kernel_node, "size");
|
||||
for (auto i : shape) {
|
||||
shapes_.push_back(LongToSize(i));
|
||||
}
|
||||
|
@ -44,7 +46,14 @@ bool TensorArrayStackKernelMod::Init(const CNodePtr &kernel_node) {
|
|||
for (auto i : shapes_) {
|
||||
ele_size_ *= i;
|
||||
}
|
||||
value_size_ = ele_size_ * LongToSize(max_element);
|
||||
if (is_dynamic_) {
|
||||
value_size_ = ele_size_ * LongToSize(max_element);
|
||||
} else {
|
||||
if (size <= 0) {
|
||||
MS_LOG(EXCEPTION) << "Size should larger than 0 when is_dynamic_shape = false, but get " << size;
|
||||
}
|
||||
value_size_ = ele_size_ * LongToSize(size);
|
||||
}
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
@ -84,9 +93,14 @@ bool TensorArrayStackKernelMod::Launch(const std::vector<AddressPtr> &inputs, co
|
|||
auto out_value = GetDeviceAddress<unsigned char>(outputs, 0);
|
||||
MS_ERROR_IF_NULL(out_value);
|
||||
MS_ERROR_IF_NULL(handle_addr);
|
||||
CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_,
|
||||
cudaMemcpyAsync(&handle_, handle_addr, sizeof(int64_t), cudaMemcpyDeviceToHost,
|
||||
reinterpret_cast<cudaStream_t>(stream_ptr)),
|
||||
|
||||
// Set out_value to zeros when TensorArray in static size.
|
||||
if (!is_dynamic_) {
|
||||
CHECK_CUDA_RET_WITH_EXCEPT(
|
||||
kernel_node_, cudaMemsetAsync(out_value, 0, outputs[0]->size, reinterpret_cast<cudaStream_t>(stream_ptr)),
|
||||
"Cudamemset output value failed");
|
||||
}
|
||||
CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaMemcpy(&handle_, handle_addr, sizeof(int64_t), cudaMemcpyDeviceToHost),
|
||||
"Get handle to host failed");
|
||||
TensorArrayPtr tensors_ = TensorArrayMgr::GetInstance().GetTensorArray(handle_);
|
||||
MS_ERROR_IF_NULL(tensors_);
|
||||
|
|
|
@ -46,6 +46,7 @@ class TensorArrayStackKernelMod : public NativeGpuKernelMod {
|
|||
void *stream_ptr_;
|
||||
std::vector<size_t> shapes_;
|
||||
TypePtr type_;
|
||||
bool is_dynamic_;
|
||||
};
|
||||
|
||||
MS_REG_GPU_KERNEL(TensorArrayStack, TensorArrayStackKernelMod)
|
||||
|
|
|
@ -60,9 +60,7 @@ bool TensorArrayWriteKernelMod::Launch(const std::vector<AddressPtr> &inputs, co
|
|||
reinterpret_cast<cudaStream_t>(stream)),
|
||||
"Get indexd failed");
|
||||
int64_t handle = 0;
|
||||
CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_,
|
||||
cudaMemcpyAsync(&handle, handle_addr, sizeof(int64_t), cudaMemcpyDeviceToHost,
|
||||
reinterpret_cast<cudaStream_t>(stream)),
|
||||
CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaMemcpy(&handle, handle_addr, sizeof(int64_t), cudaMemcpyDeviceToHost),
|
||||
"Get handle to host failed");
|
||||
GPUTensorArrayPtr tensors_ =
|
||||
std::dynamic_pointer_cast<GPUTensorArray>(TensorArrayMgr::GetInstance().GetTensorArray(handle));
|
||||
|
|
|
@ -42,19 +42,37 @@ AbstractBasePtr InferImplTensorArrayStack(const AnalysisEnginePtr &, const Primi
|
|||
if (attr_dtype == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "No attribute [dtype] in " << op_name;
|
||||
}
|
||||
auto attr_is_dynamic = primitive->GetAttr("is_dynamic_shape");
|
||||
if (attr_is_dynamic == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "No attribute [is_dynamic_shape] in " << op_name;
|
||||
}
|
||||
auto attr_size = primitive->GetAttr("size");
|
||||
if (attr_size == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "No attribute [size] in " << op_name;
|
||||
}
|
||||
auto is_dynamic = GetValue<bool>(attr_is_dynamic);
|
||||
auto size = GetValue<int64_t>(attr_size);
|
||||
auto ele_shape = GetValue<std::vector<int64_t>>(attr_shape);
|
||||
auto type = GetValue<TypePtr>(attr_dtype);
|
||||
primitive->set_attr("max_element", MakeValue(kMaxElement));
|
||||
auto max_shape_ = ele_shape;
|
||||
auto min_shape_ = ele_shape;
|
||||
auto out_shape_ = ele_shape;
|
||||
(void)max_shape_.insert(max_shape_.begin(), kMaxElement);
|
||||
(void)min_shape_.insert(min_shape_.begin(), 1);
|
||||
(void)out_shape_.insert(out_shape_.begin(), -1);
|
||||
ShapeVector out_shape = out_shape_;
|
||||
ShapeVector min_shape = min_shape_;
|
||||
ShapeVector max_shape = max_shape_;
|
||||
auto output = std::make_shared<AbstractTensor>(type, std::make_shared<Shape>(out_shape, min_shape, max_shape));
|
||||
std::shared_ptr<mindspore::abstract::AbstractTensor> output;
|
||||
if (is_dynamic) {
|
||||
auto max_shape_ = ele_shape;
|
||||
auto min_shape_ = ele_shape;
|
||||
auto out_shape_ = ele_shape;
|
||||
(void)max_shape_.insert(max_shape_.begin(), kMaxElement);
|
||||
(void)min_shape_.insert(min_shape_.begin(), 1);
|
||||
(void)out_shape_.insert(out_shape_.begin(), -1);
|
||||
ShapeVector out_shape = out_shape_;
|
||||
ShapeVector min_shape = min_shape_;
|
||||
ShapeVector max_shape = max_shape_;
|
||||
output = std::make_shared<AbstractTensor>(type, std::make_shared<Shape>(out_shape, min_shape, max_shape));
|
||||
} else {
|
||||
auto out_shape_ = ele_shape;
|
||||
(void)out_shape_.insert(out_shape_.begin(), size);
|
||||
ShapeVector out_shape = out_shape_;
|
||||
output = std::make_shared<AbstractTensor>(type, std::make_shared<Shape>(out_shape));
|
||||
}
|
||||
return output;
|
||||
}
|
||||
} // namespace abstract
|
||||
|
|
|
@ -66,7 +66,7 @@ class TensorArray(Cell):
|
|||
self.tensor_array_read = ta.TensorArrayRead(dtype, element_shape)
|
||||
self.tensor_array_close = ta.TensorArrayClose()
|
||||
self.tensor_array_clear = ta.TensorArrayClear()
|
||||
self.tensor_array_stack = ta.TensorArrayStack(dtype, element_shape)
|
||||
self.tensor_array_stack = ta.TensorArrayStack(dtype, element_shape, dynamic_size, size)
|
||||
self.tensor_array_size = ta.TensorArraySize()
|
||||
|
||||
def write(self, index, value):
|
||||
|
|
|
@ -250,12 +250,13 @@ class TensorArrayStack(Primitive):
|
|||
[1 2]
|
||||
"""
|
||||
@prim_attr_register
|
||||
def __init__(self, dtype, element_shape):
|
||||
def __init__(self, dtype, element_shape, dynamic_size, size):
|
||||
"""Initialize TensorArrayStack"""
|
||||
self.init_prim_io_names(inputs=[''], outputs=['output'])
|
||||
self.add_prim_attr('dtype', dtype)
|
||||
self.add_prim_attr('element_shape', element_shape)
|
||||
self.add_prim_attr('is_dynamic_shape', True)
|
||||
self.add_prim_attr('is_dynamic_shape', dynamic_size)
|
||||
self.add_prim_attr('size', size)
|
||||
self.add_prim_attr('side_effect_mem', True)
|
||||
|
||||
|
||||
|
|
|
@ -21,9 +21,9 @@ import mindspore.nn as nn
|
|||
from mindspore import Tensor
|
||||
|
||||
class TensorArrayNet(nn.Cell):
|
||||
def __init__(self, dtype, element_shape):
|
||||
def __init__(self, dtype, element_shape, is_dynamic_shape=True, size=0):
|
||||
super(TensorArrayNet, self).__init__()
|
||||
self.ta = nn.TensorArray(dtype, element_shape)
|
||||
self.ta = nn.TensorArray(dtype, element_shape, is_dynamic_shape, size)
|
||||
|
||||
def construct(self, index, value):
|
||||
self.ta.write(index, value)
|
||||
|
@ -92,3 +92,22 @@ def test_tensorarray():
|
|||
assert np.allclose(v.asnumpy(), expect_s[1])
|
||||
assert np.allclose(s.asnumpy(), expect_s)
|
||||
ta.close()
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_static_tensorarray():
|
||||
"""
|
||||
Feature: TensorArray cpu TEST.
|
||||
Description: Test the static tensorarray.
|
||||
Expectation: success.
|
||||
"""
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
|
||||
index = Tensor(0, mindspore.int64)
|
||||
value = Tensor(5, mindspore.int64)
|
||||
ta = TensorArrayNet(dtype=mindspore.int64, element_shape=(), is_dynamic_shape=False, size=12)
|
||||
v, s = ta(index, value)
|
||||
expect_v = 5
|
||||
expect_s = [5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
|
||||
assert np.allclose(s.asnumpy(), expect_s)
|
||||
assert np.allclose(v.asnumpy(), expect_v)
|
||||
|
|
|
@ -21,9 +21,9 @@ import mindspore.nn as nn
|
|||
from mindspore import Tensor
|
||||
|
||||
class TensorArrayNet(nn.Cell):
|
||||
def __init__(self, dtype, element_shape):
|
||||
def __init__(self, dtype, element_shape, is_dynamic_shape=True, size=0):
|
||||
super(TensorArrayNet, self).__init__()
|
||||
self.ta = nn.TensorArray(dtype, element_shape)
|
||||
self.ta = nn.TensorArray(dtype, element_shape, is_dynamic_shape, size)
|
||||
|
||||
def construct(self, index, value):
|
||||
for i in range(2):
|
||||
|
@ -59,43 +59,62 @@ def test_tensorarray():
|
|||
assert np.allclose(v.asnumpy(), expect_v)
|
||||
|
||||
context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU")
|
||||
ta = nn.TensorArray(mindspore.int64, ())
|
||||
tb = nn.TensorArray(mindspore.int64, ())
|
||||
for i in range(5):
|
||||
ta.write(i, 99)
|
||||
v = ta.read(0)
|
||||
s = ta.stack()
|
||||
tb.write(i, 99)
|
||||
v = tb.read(0)
|
||||
s = tb.stack()
|
||||
expect_v = 99
|
||||
expect_s = [99, 99, 99, 99, 99]
|
||||
assert np.allclose(s.asnumpy(), expect_s)
|
||||
assert np.allclose(v.asnumpy(), expect_v)
|
||||
ta_size = ta.size()
|
||||
assert np.allclose(ta_size.asnumpy(), 5)
|
||||
ta.clear()
|
||||
ta_size = ta.size()
|
||||
assert np.allclose(ta_size.asnumpy(), 0)
|
||||
ta.write(0, 88)
|
||||
v = ta.read(0)
|
||||
s = ta.stack()
|
||||
ta.close()
|
||||
tb_size = tb.size()
|
||||
assert np.allclose(tb_size.asnumpy(), 5)
|
||||
tb.clear()
|
||||
tb_size = tb.size()
|
||||
assert np.allclose(tb_size.asnumpy(), 0)
|
||||
tb.write(0, 88)
|
||||
v = tb.read(0)
|
||||
s = tb.stack()
|
||||
tb.close()
|
||||
expect_v = 88
|
||||
expect_s = [88]
|
||||
assert np.allclose(s.asnumpy(), expect_s)
|
||||
assert np.allclose(v.asnumpy(), expect_v)
|
||||
ta = nn.TensorArray(mindspore.float32, ())
|
||||
ta.write(5, 1.)
|
||||
s = ta.stack()
|
||||
tc = nn.TensorArray(mindspore.float32, ())
|
||||
tc.write(5, 1.)
|
||||
s = tc.stack()
|
||||
expect_s = [0., 0., 0., 0., 0., 1.]
|
||||
assert np.allclose(s.asnumpy(), expect_s)
|
||||
ta.write(2, 1.)
|
||||
s = ta.stack()
|
||||
tc.write(2, 1.)
|
||||
s = tc.stack()
|
||||
expect_s = [0., 0., 1., 0., 0., 1.]
|
||||
assert np.allclose(s.asnumpy(), expect_s)
|
||||
ta.close()
|
||||
ta = nn.TensorArray(mindspore.bool_, ())
|
||||
ta.write(1, Tensor(True, mindspore.bool_))
|
||||
s = ta.stack()
|
||||
v = ta.read(1)
|
||||
tc.close()
|
||||
td = nn.TensorArray(mindspore.bool_, ())
|
||||
td.write(1, Tensor(True, mindspore.bool_))
|
||||
s = td.stack()
|
||||
v = td.read(1)
|
||||
expect_s = [False, True]
|
||||
assert np.allclose(v.asnumpy(), expect_s[1])
|
||||
assert np.allclose(s.asnumpy(), expect_s)
|
||||
ta.close()
|
||||
td.close()
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_static_tensorarray():
|
||||
"""
|
||||
Feature: TensorArray gpu TEST.
|
||||
Description: Test the static tensorarray.
|
||||
Expectation: success.
|
||||
"""
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
|
||||
index = Tensor(0, mindspore.int64)
|
||||
value = Tensor(5, mindspore.int64)
|
||||
ta = TensorArrayNet(dtype=mindspore.int64, element_shape=(), is_dynamic_shape=False, size=12)
|
||||
v, s = ta(index, value)
|
||||
expect_v = 24
|
||||
expect_s = [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 0, 0]
|
||||
assert np.allclose(s.asnumpy(), expect_s)
|
||||
assert np.allclose(v.asnumpy(), expect_v)
|
||||
|
|
Loading…
Reference in New Issue