From 89837bcdfdbef80b0da710228a873c84c0c31e12 Mon Sep 17 00:00:00 2001 From: He Wei Date: Mon, 24 Aug 2020 17:20:27 +0800 Subject: [PATCH] Avoid copy when create Tensor from numpy array --- mindspore/ccsrc/pybind_api/ir/tensor_py.cc | 88 ++++++++++++++++++++-- mindspore/common/parameter.py | 2 +- mindspore/core/ir/tensor.cc | 8 +- mindspore/core/ir/tensor.h | 17 ++++- 4 files changed, 107 insertions(+), 8 deletions(-) diff --git a/mindspore/ccsrc/pybind_api/ir/tensor_py.cc b/mindspore/ccsrc/pybind_api/ir/tensor_py.cc index 7596d75c28..7a0f189796 100644 --- a/mindspore/ccsrc/pybind_api/ir/tensor_py.cc +++ b/mindspore/ccsrc/pybind_api/ir/tensor_py.cc @@ -117,6 +117,75 @@ static bool IsCContiguous(const py::array &input) { return (flags & pybind11::detail::npy_api::NPY_ARRAY_C_CONTIGUOUS_) != 0; } +// TensorDataNumpy implements TensorData using numpy array. +class TensorDataNumpy : public TensorData { + public: + explicit TensorDataNumpy(const py::array &input) : data_(input) {} + + /// Total number of elements. + ssize_t size() const override { return data_.size(); } + + /// Byte size of a single element. + ssize_t itemsize() const override { return data_.itemsize(); } + + /// Total number of bytes. + ssize_t nbytes() const override { return data_.nbytes(); } + + /// Number of dimensions. + ssize_t ndim() const override { return data_.ndim(); } + + /// Data pointer. + void *data() override { + EnsureDataContiguous(); + return data_.request(true).ptr; + } + + const void *const_data() const override { + EnsureDataContiguous(); + return data_.request(false).ptr; + } + + /// Is data equals. + bool equals(const TensorData &other) const override { + auto ptr = dynamic_cast(&other); + if (ptr == nullptr) { + // Not same type, compare data byte by byte. + return TensorData::equals(other); + } + return NumpyEquals(*ptr); + } + + bool NumpyEquals(const TensorDataNumpy &other) const { + auto all_data_equal = [&other, this]() -> bool { + auto np = py::module::import("numpy"); + auto equal = np.attr("equal")(data_, other.data_); + auto all_equal = np.attr("all")(equal); + return all_equal.cast(); + }; + return this == &other || data_.is(other.data_) || all_data_equal(); + } + + /// To string. + std::string ToString(const TypeId type, const std::vector &shape) const override { + return std::string(py::str(data_)); + } + + /// Data. + py::array data() const { return data_; } + + private: + void EnsureDataContiguous() const { + if (!IsCContiguous(data_)) { + // Call numpy.ascontiguousarray() to convert data to C contiguous if it is not. + auto np = py::module::import("numpy"); + auto convert = np.attr("ascontiguousarray"); + data_ = convert(data_); + } + } + + mutable py::array data_; +}; + TensorPtr TensorPy::MakeTensor(const py::array &input, const TypePtr &type_ptr) { // Get input buffer info. py::buffer_info buf = input.request(); @@ -130,6 +199,13 @@ TensorPtr TensorPy::MakeTensor(const py::array &input, const TypePtr &type_ptr) if (data_type == TypeId::kTypeUnknown) { data_type = buf_type; } + // Get tensor shape. + std::vector shape(buf.shape.begin(), buf.shape.end()); + if (data_type == buf_type) { + // Make a tensor with shared data with numpy if no type convertion needed. + auto tensor_data = std::make_shared(input); + return std::make_shared(data_type, shape, tensor_data); + } // Convert input array to C contiguous if need. std::unique_ptr tmp_buf; if (!IsCContiguous(input)) { @@ -144,8 +220,6 @@ TensorPtr TensorPy::MakeTensor(const py::array &input, const TypePtr &type_ptr) PyBuffer_Release(&pybuf); buf.ptr = tmp_buf.get(); } - // Get tensor shape. - std::vector shape(buf.shape.begin(), buf.shape.end()); if (data_type == buf_type) { // Use memory copy if input data type is the same as the required type. return std::make_shared(data_type, shape, buf.ptr, buf.size * buf.itemsize); @@ -186,12 +260,16 @@ py::tuple TensorPy::GetPyTupleShape(const Tensor &tensor) { py::array TensorPy::SyncAsNumpy(const Tensor &tensor) { tensor.data_sync(); - auto info = GetPyBufferInfo(tensor); - py::object self = py::cast(&tensor); - return py::array(py::dtype(info), info.shape, info.strides, info.ptr, self); + return AsNumpy(tensor); } py::array TensorPy::AsNumpy(const Tensor &tensor) { + auto data_numpy = dynamic_cast(&tensor.data()); + if (data_numpy) { + // Return internal numpy array if tensor data is implemented base on it. + return data_numpy->data(); + } + // Otherwise, create numpy array by buffer protocol. auto info = GetPyBufferInfo(tensor); py::object self = py::cast(&tensor); return py::array(py::dtype(info), info.shape, info.strides, info.ptr, self); diff --git a/mindspore/common/parameter.py b/mindspore/common/parameter.py index ee41fdd165..83ae60e6f5 100644 --- a/mindspore/common/parameter.py +++ b/mindspore/common/parameter.py @@ -145,7 +145,7 @@ class Parameter(MetaTensor): data = data.to_tensor() if isinstance(data, Tensor): # make a copy of Tensor to init the parameter - return (Tensor, data.asnumpy(),) + return (Tensor, data.asnumpy().copy(),) if isinstance(data, int): return (Tensor, data, mstype.int32) if isinstance(data, float): diff --git a/mindspore/core/ir/tensor.cc b/mindspore/core/ir/tensor.cc index efffc2ba26..f216493c63 100644 --- a/mindspore/core/ir/tensor.cc +++ b/mindspore/core/ir/tensor.cc @@ -198,10 +198,16 @@ class TensorDataImpl : public TensorData { return data_.get(); } + const void *const_data() const override { + // May return nullptr if data not initialized. + return data_.get(); + } + bool equals(const TensorData &other) const override { auto ptr = dynamic_cast *>(&other); if (ptr == nullptr) { - return false; + // Not same type, compare data byte by byte. + return TensorData::equals(other); } if (ptr == this) { return true; diff --git a/mindspore/core/ir/tensor.h b/mindspore/core/ir/tensor.h index 752edb50da..69aa3f0650 100644 --- a/mindspore/core/ir/tensor.h +++ b/mindspore/core/ir/tensor.h @@ -49,8 +49,23 @@ class TensorData { virtual ssize_t ndim() const = 0; /// Data pointer. virtual void *data() = 0; + /// Const Data pointer. + virtual const void *const_data() const = 0; /// Is data equals. - virtual bool equals(const TensorData &other) const = 0; + virtual bool equals(const TensorData &other) const { + if (this == &other) { + return true; + } + // By default, compare data byte by byte. + auto this_data = static_cast(const_data()); + auto other_data = static_cast(other.const_data()); + if (this_data == nullptr || other_data == nullptr) { + // null means data not initialized, compare uninitialized data always return false. + return false; + } + return (this_data == other_data) || (ndim() == other.ndim() && nbytes() == other.nbytes() && + std::equal(this_data, this_data + nbytes(), other_data)); + } /// To string. virtual std::string ToString(const TypeId type, const std::vector &shape) const = 0; };