From 4a1641bfd2be918867cd4de7dd1467e46c0e48ca Mon Sep 17 00:00:00 2001 From: lilei Date: Sat, 25 Jul 2020 17:13:15 +0800 Subject: [PATCH] init random normal --- .../cpu/random_op_cpu_kernel.h | 156 ++++++++++++++++++ mindspore/ccsrc/pipeline/jit/init.cc | 2 + mindspore/ccsrc/pipeline/jit/pipeline.cc | 45 +++++ mindspore/ccsrc/pipeline/jit/pipeline.h | 4 + mindspore/common/initializer.py | 12 +- 5 files changed, 216 insertions(+), 3 deletions(-) create mode 100644 mindspore/ccsrc/backend/kernel_compiler/cpu/random_op_cpu_kernel.h diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/random_op_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/random_op_cpu_kernel.h new file mode 100644 index 00000000000..a5a7e3940eb --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/random_op_cpu_kernel.h @@ -0,0 +1,156 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_KERNEL_CPU_RANDOM_OP_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_KERNEL_CPU_RANDOM_OP_CPU_KERNEL_H_ +#include +#include +#include +#include + +namespace mindspore { +namespace kernel { +static constexpr int gResultNum = 4; +class PhiloxGenerator { + public: + explicit PhiloxGenerator(uint64_t seed) { + key_var_[0] = static_cast(seed); + key_var_[1] = static_cast(seed >> 32); + counter_[0] = 0; + counter_[1] = 0; + counter_[2] = static_cast(seed); + counter_[3] = static_cast(seed >> 32); + } + + void Jump() { + if ((++counter_[0] == 0) && (++counter_[1] == 0) && (++counter_[2] == 0)) { + ++counter_[3]; + } + } + + void JumpStep(uint64_t step) { + uint64_t min_counter, max_counter; + min_counter = static_cast(counter_[1]); + min_counter = min_counter << 32; + min_counter += counter_[0]; + + max_counter = static_cast(counter_[3]); + max_counter = max_counter << 32; + max_counter += counter_[2]; + min_counter += step; + if (min_counter < step) { + max_counter++; + } + counter_[0] = static_cast(min_counter); + counter_[1] = static_cast(min_counter >> 32); + counter_[2] = static_cast(max_counter); + counter_[3] = static_cast(max_counter >> 32); + } + + static std::array Compute(const std::array &counter_, + const std::array &key_var_) { + std::array min_value; + std::array max_value; + for (uint32_t i = 0; i < gResultNum; i += 2) { + uint64_t temp = static_cast(keyConstant[i]) * counter_[i]; + min_value[i] = static_cast(temp); + max_value[i] = static_cast(temp >> 32); + } + std::array result; + result[0] = (max_value[2] ^ counter_[1] ^ key_var_[0]); + result[1] = min_value[2]; + result[2] = (max_value[0] ^ counter_[3] ^ key_var_[0]); + result[3] = min_value[0]; + return result; + } + + std::array operator()() { + for (uint32_t i = 0; i < 10; i++) { + counter_ = Compute(counter_, key_var_); + key_var_[0] += keyConstant[1]; + key_var_[1] += keyConstant[3]; + } + Jump(); + return counter_; + } + + private: + std::array counter_; + std::array key_var_; + static constexpr std::array keyConstant = {0xD2511F53, 0x9E3779B9, 0xCD9E8D57, 0xBB67AE85}; +}; + +template +class NormalDistribution; +template +class NormalDistribution { + public: + std::array result; + + bool UInt32ToFloat32(uint32_t input, float *output) { + const uint32_t temp_value = input & 0x7fffffu; + const uint32_t exp = static_cast(127); + const uint32_t val = (exp << 23) | temp_value; + errno_t mem_ret; + mem_ret = memcpy_s(output, sizeof(val), &val, sizeof(val)); + if (mem_ret != EOK) { + std::cout << "UInt32ToFloat32 memcpy is failed" << std::endl; + return false; + } + *output = *output - 1.0f; + return true; + } + + std::array operator()(T *generator) { + std::array generate_value = (*generator)(); + const float PI = 3.14; + for (uint32_t i = 0; i < gResultNum; i += 2) { + float temp[2]; + UInt32ToFloat32(generate_value[i], &temp[0]); + UInt32ToFloat32(generate_value[i + 1], &temp[1]); + const float threshold = 1.0e-7f; + temp[0] = temp[0] < threshold ? threshold : temp[0]; + temp[1] = temp[1] < threshold ? threshold : temp[1]; + result[i] = sqrt(-2.0 * log(temp[0])) * sin(2 * PI * temp[1]); + result[i + 1] = sqrt(-2.0 * log(temp[0])) * cos(2 * PI * temp[1]); + } + return result; + } +}; + +template +bool FillRandoms(PhiloxGenerator generator, float *output, int64_t vet_size, int64_t thread_Id) { + T distribution; + errno_t mem_ret; + generator.JumpStep((vet_size * thread_Id + gResultNum - 1) / gResultNum); + for (int32_t i = 0; i < vet_size; i += gResultNum) { + auto outputResult = distribution(&generator); + if (vet_size - i >= gResultNum) { + mem_ret = memcpy_s(&output[i], gResultNum * sizeof(float), &outputResult[0], gResultNum * sizeof(float)); + } else { + mem_ret = memcpy_s(&output[i], (vet_size - i) * sizeof(float), &outputResult[0], (vet_size - i) * sizeof(float)); + } + if (mem_ret != EOK) { + std::cout << "FillRandoms memcpy is failed" << std::endl; + return false; + } + } + return true; +} + +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_KERNEL_CPU_RANDOM_OP_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/pipeline/jit/init.cc b/mindspore/ccsrc/pipeline/jit/init.cc index 65adebb6e25..c72baadc34c 100644 --- a/mindspore/ccsrc/pipeline/jit/init.cc +++ b/mindspore/ccsrc/pipeline/jit/init.cc @@ -94,6 +94,8 @@ PYBIND11_MODULE(_c_expression, m) { (void)m.def("init_exec_dataset", &mindspore::pipeline::InitExecDataset, py::arg("queue_name"), py::arg("size"), py::arg("batch_size"), py::arg("types"), py::arg("shapes"), py::arg("input_indexs"), py::arg("phase") = py::str("dataset"), py::arg("need_run") = py::bool_(true), "Init and exec dataset."); + (void)m.def("random_normal", &mindspore::pipeline::InitRandomNormal, py::arg("mean"), py::arg("stddev"), + py::arg("outshape"), py::arg("seed"), py::arg("outputtensor"), "InitRandRandom"); (void)m.def("_set_dataset_mode_config", &mindspore::ConfigManager::SetDatasetModeConfig, "API for set dataset mode."); (void)m.def("init_backend", &mindspore::pipeline::InitBackend, "Init Backend."); diff --git a/mindspore/ccsrc/pipeline/jit/pipeline.cc b/mindspore/ccsrc/pipeline/jit/pipeline.cc index 76e1ad53a5d..898b02b0cca 100644 --- a/mindspore/ccsrc/pipeline/jit/pipeline.cc +++ b/mindspore/ccsrc/pipeline/jit/pipeline.cc @@ -41,6 +41,7 @@ #include "pipeline/pynative/pynative_execute.h" #include "frontend/optimizer/py_pass_manager.h" #include "pybind_api/pybind_patch.h" +#include "backend/kernel_compiler/cpu/random_op_cpu_kernel.h" #if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU)) #include "frontend/parallel/ps/common.h" @@ -878,6 +879,50 @@ bool InitExecDatasetVm(const std::string &queue_name, int64_t size, int64_t batc return true; } +bool InitRandomNormal(float mean, float stddev, std::vector out_shape, int64_t seed, + const py::object &output_tensor) { + if (out_shape.size() == 0) { + std::cout << "output data shape is error" << std::endl; + } + int64_t total_count = 1; + for (uint32_t i = 0; i < out_shape.size(); i++) { + total_count *= out_shape[i]; + } + uint32_t thread_num = 16; + if (total_count <= thread_num) { + thread_num = 1; + } + auto temp = py::cast>(output_tensor); + float *start_ptr = reinterpret_cast(temp->data_c()); + if (start_ptr == nullptr) { + std::cout << "start_ptr is nullptr" << std::endl; + return false; + } + int64_t batchSize = total_count / thread_num; + std::vector threads(thread_num); + mindspore::kernel::PhiloxGenerator generator = mindspore::kernel::PhiloxGenerator(seed); + if (thread_num != 1) { + for (uint32_t i = 0; i < thread_num - 1; i++) { + float *offset_ptr = start_ptr + batchSize * i; + threads[i] = std::thread(mindspore::kernel::FillRandoms< + mindspore::kernel::NormalDistribution>, + generator, offset_ptr, batchSize, i); + } + float *offset_ptr = start_ptr + batchSize * (thread_num - 1); + threads[thread_num - 1] = std::thread( + mindspore::kernel::FillRandoms>, + generator, offset_ptr, total_count - (thread_num - 1) * batchSize, thread_num - 1); + } else { + threads[0] = std::thread( + mindspore::kernel::FillRandoms>, + generator, start_ptr, total_count, 0); + } + for (uint32_t i = 0; i < thread_num; i++) { + threads[i].join(); + } + return true; +} + void ResetOpId() { mindspore::id_generator::reset_id(); } void InitHccl() { diff --git a/mindspore/ccsrc/pipeline/jit/pipeline.h b/mindspore/ccsrc/pipeline/jit/pipeline.h index f4fabd6115b..ce276aa06fc 100644 --- a/mindspore/ccsrc/pipeline/jit/pipeline.h +++ b/mindspore/ccsrc/pipeline/jit/pipeline.h @@ -139,6 +139,10 @@ bool InitExecDatasetVm(const std::string &queue_name, int64_t size, int64_t batc const std::vector &types, const std::vector> &shapes, const std::vector &input_indexes, bool need_run); +// init random normal +bool InitRandomNormal(float mean, float stddev, std::vector outshape, int64_t seed, + const py::object &outputTensor); + void ProcessVmArgInner(const py::tuple &args, const ResourcePtr &res, VectorRef *const arg_list); } // namespace pipeline diff --git a/mindspore/common/initializer.py b/mindspore/common/initializer.py index 48a142a23f2..d4ae63977eb 100644 --- a/mindspore/common/initializer.py +++ b/mindspore/common/initializer.py @@ -23,6 +23,7 @@ from mindspore import log as logger from . import dtype as mstype from .tensor import Tensor +from .._c_expression import random_normal _INITIALIZER_ALIAS = dict() @@ -279,9 +280,12 @@ class Normal(Initializer): self.sigma = sigma def _initialize(self, arr): - tmp = np.random.normal(0, self.sigma, arr.shape) - _assignment(arr, tmp) - + seed = np.random.get_state()[1][0] + output_tensor = Tensor(np.zeros(arr.shape, dtype=np.float32)) + random_normal(0, self.sigma, arr.shape, seed, output_tensor) + output_data = output_tensor.asnumpy() + output_data *= self.sigma + _assignment(arr, output_data) @_register() class TruncatedNormal(Initializer): @@ -327,6 +331,8 @@ def initializer(init, shape=None, dtype=mstype.float32): Examples: >>> tensor = initializer('ones', [1, 2, 3], mindspore.float32) + >>> tensor = initializer(One(), [1, 2, 3], mindspore.float32) + >>> tensor = initializer(0, [1, 2, 3], mindspore.float32) """ if not isinstance(init, (Tensor, numbers.Number, str, Initializer)): raise TypeError("Unsupported init type '{}'.".format(type(init)))