Optimize parameter random initializer

2022-08-16 14:46:04 +08:00 · 2022-08-16 14:46:04 +08:00 · d7ca9b9942
parent b456ec7b87
commit d7ca9b9942
17 changed files with 998 additions and 471 deletions
--- a/mindspore/ccsrc/CMakeLists.txt
+++ b/mindspore/ccsrc/CMakeLists.txt
@ -196,6 +196,9 @@ if(CMAKE_SYSTEM_NAME MATCHES "Windows")
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-attributes -DHAVE_SNPRINTF")
 endif()

+# Set compile flags to ensure float compute consistency.
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-fast-math")
+
 if(ENABLE_MPI)
    add_compile_definitions(ENABLE_MPI)
 endif()
--- a/mindspore/ccsrc/common/thread_pool.cc
+++ b/mindspore/ccsrc/common/thread_pool.cc
@ -20,33 +20,12 @@
 #include "utils/log_adapter.h"
 #include "utils/convert_utils_base.h"
 #include "utils/ms_exception.h"
-#include "utils/ms_context.h"

 namespace mindspore {
 namespace common {
-constexpr size_t kDeviceNum = 8;
-constexpr size_t kMaxThreadNum = 23;
 constexpr size_t kYieldThreshold = 1000;

-ThreadPool::ThreadPool() {
-  size_t process_core_num = std::thread::hardware_concurrency() - 1;
-  if (process_core_num < 1) {
-    process_core_num = 1;
-  }
-  auto ms_context = MsContext::GetInstance();
-  auto device_target = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
-  if (device_target == kAscendDevice || device_target == kGPUDevice) {
-    max_thread_num_ = process_core_num / kDeviceNum;
-  } else {
-    max_thread_num_ = process_core_num;
-  }
-  if (max_thread_num_ < 1) {
-    max_thread_num_ = 1;
-  }
-  if (max_thread_num_ > kMaxThreadNum) {
-    max_thread_num_ = kMaxThreadNum;
-  }
-}
+ThreadPool::ThreadPool() : max_thread_num_(std::thread::hardware_concurrency()) {}

 void ThreadPool::SyncRunLoop(const std::shared_ptr<ThreadContext> &context) {
  if (context == nullptr) {
--- a/mindspore/ccsrc/include/common/random.h
+++ b/mindspore/ccsrc/include/common/random.h
@ -0,0 +1,546 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_INCLUDE_COMMON_RANDOM_H_
+#define MINDSPORE_CCSRC_INCLUDE_COMMON_RANDOM_H_
+
+#include <cstdint>
+#include <cmath>
+#include <array>
+#include <limits>
+#include <random>
+#include <vector>
+#include <optional>
+#include <algorithm>
+#include <utility>
+#include "include/common/thread_pool.h"
+#include "include/common/utils/utils.h"
+#include "utils/log_adapter.h"
+
+namespace mindspore::random {
+//
+// Generate random numbers into a buffer.
+//
+template <typename T, typename Generator, typename Distribution, typename... Args>
+void GenerateRandoms(std::uint64_t seed, size_t skip, T *buf, size_t size, Args... args) {
+  MS_EXCEPTION_IF_NULL(buf);
+  Generator gen{seed};
+  gen.discard(skip);
+  Distribution dis{args...};
+  for (size_t i = 0; i < size; ++i) {
+    buf[i] = T(dis(gen));
+  }
+}
+
+// Compute number of task and batch size of each task.
+static inline std::pair<size_t, size_t> ComputeTaskNumSize(size_t total_size, size_t thread_num) {
+  constexpr size_t min_parallel_size = 1024;
+  if (thread_num == 0 || total_size <= min_parallel_size) {
+    return {1, total_size};
+  }
+  constexpr size_t block_size = 4;
+  const size_t block_count = (total_size + block_size - 1) / block_size;
+  if (block_count <= thread_num) {
+    return {block_count, block_size};
+  }
+  const size_t blocks_per_thread = (block_count + thread_num - 1) / thread_num;
+  const size_t task_num = (block_count + blocks_per_thread - 1) / blocks_per_thread;
+  const size_t batch_size = blocks_per_thread * block_size;
+  return {task_num, batch_size};
+}
+
+//
+// Parallel generate random numbers into a buffer.
+//
+template <typename T, typename Generator, typename Distribution, typename... Args>
+void GenerateRandomsParallel(std::uint64_t input_seed, T *buf, size_t buf_size, Args... args) {
+  MS_EXCEPTION_IF_NULL(buf);
+
+  // Calculate number of tasks and batch size.
+  auto &thread_pool = common::ThreadPool::GetInstance();
+  auto [task_num, batch_size] = ComputeTaskNumSize(buf_size, thread_pool.GetSyncRunThreadNum());
+
+  // Generate random seed if required.
+  std::uint64_t seed = input_seed;
+  if (seed == 0) {
+    std::random_device rd;
+    seed = rd();
+  }
+
+  if (task_num == 1) {
+    // Use single thread for small data size.
+    GenerateRandoms<T, Generator, Distribution>(seed, 0, buf, buf_size, args...);
+    return;
+  }
+
+  // Prepare parallel tasks.
+  std::vector<common::Task> tasks;
+  tasks.reserve(task_num);
+  T *task_buf = buf;
+  size_t skip = 0;
+  for (size_t i = 0; i < task_num; ++i) {
+    const auto task_size = ((i == task_num - 1) ? (buf_size - (task_num - 1) * batch_size) : batch_size);
+    (void)tasks.emplace_back([seed, skip, task_buf, task_size, args...]() {
+      GenerateRandoms<T, Generator, Distribution>(seed, skip, task_buf, task_size, args...);
+      return common::SUCCESS;
+    });
+    skip += task_size;
+    task_buf += task_size;
+  }
+  // Parallel execute tasks by thread pool.
+  thread_pool.SyncRun(tasks);
+}
+
+//
+// Philox is a random number generator that is suitable for parallel random number generating.
+//
+class Philox {
+ public:
+  explicit Philox(uint64_t seed)
+      : key_({static_cast<uint32_t>(seed), static_cast<uint32_t>(seed >> kShift32)}),
+        counter_({0, 0, static_cast<uint32_t>(seed), static_cast<uint32_t>(seed >> kShift32)}) {}
+
+  Philox(uint64_t seed, uint64_t seed2)
+      : key_({static_cast<uint32_t>(seed), static_cast<uint32_t>(seed >> kShift32)}),
+        counter_({0, 0, static_cast<uint32_t>(seed2), static_cast<uint32_t>(seed2 >> kShift32)}) {}
+
+  ~Philox() = default;
+
+  uint32_t operator()() {
+    if (index_ == kCounterNum) {
+      results_ = next();
+      index_ = 0;
+    }
+    return results_[index_++];
+  }
+
+  void discard(uint64_t step) {
+    if (index_ == kCounterNum) {
+      const auto count = (step / kCounterNum);
+      skip(count);
+      const auto remain = (step % kCounterNum);
+      if (remain > 0) {
+        results_ = next();
+        index_ = remain;
+      }
+    } else {
+      const auto pos = index_ + step;
+      if (pos <= kCounterNum) {
+        index_ = pos;
+      } else {
+        const auto count = (pos - kCounterNum) / kCounterNum;
+        skip(count);
+        const auto remain = (pos % kCounterNum);
+        if (remain > 0) {
+          results_ = next();
+          index_ = remain;
+        } else {
+          index_ = kCounterNum;
+        }
+      }
+    }
+  }
+
+  static constexpr uint32_t min() { return 0; }
+  static constexpr uint32_t max() { return std::numeric_limits<uint32_t>::max(); }
+
+ private:
+  static constexpr int kShift32 = 32;
+  static constexpr size_t kCounterNum = 4;
+  static constexpr size_t kKeyNum = 2;
+  static constexpr size_t kIndex0 = 0;
+  static constexpr size_t kIndex1 = 1;
+  static constexpr size_t kIndex2 = 2;
+  static constexpr size_t kIndex3 = 3;
+  static constexpr uint32_t kMagic0 = 0xD2511F53;
+  static constexpr uint32_t kMagic1 = 0xCD9E8D57;
+  static constexpr uint32_t kKeyStep0 = 0x9E3779B9;
+  static constexpr uint32_t kKeyStep1 = 0xBB67AE85;
+
+  using Counter = std::array<uint32_t, kCounterNum>;
+  using Key = std::array<uint32_t, kKeyNum>;
+
+  Key key_;
+  Counter counter_;
+  Counter results_;
+  size_t index_ = kCounterNum;
+
+  static void compute(uint32_t *counter, const uint32_t *key) {
+    const uint64_t t0 = static_cast<uint64_t>(kMagic0) * counter[kIndex0];
+    const uint32_t l0 = static_cast<uint32_t>(t0);
+    const uint32_t h0 = static_cast<uint32_t>(t0 >> kShift32);
+    const uint64_t t1 = static_cast<uint64_t>(kMagic1) * counter[kIndex2];
+    const uint32_t l1 = static_cast<uint32_t>(t1);
+    const uint32_t h1 = static_cast<uint32_t>(t1 >> kShift32);
+    counter[kIndex0] = (h1 ^ counter[kIndex1] ^ key[kIndex0]);
+    counter[kIndex1] = l1;
+    counter[kIndex2] = (h0 ^ counter[kIndex3] ^ key[kIndex1]);
+    counter[kIndex3] = l0;
+  }
+
+  static void raise_key(uint32_t *key) {
+    key[kIndex0] += kKeyStep0;
+    key[kIndex1] += kKeyStep1;
+  }
+
+  // Generate next 4 random numbers and advance counter.
+  Counter next() {
+    Counter result = counter_;
+    Key key = key_;
+    // For performance reason, we do not use loop here,
+    // but manually call compute() 10 times.
+    compute(result.data(), key.data());
+    raise_key(key.data());
+    compute(result.data(), key.data());
+    raise_key(key.data());
+    compute(result.data(), key.data());
+    raise_key(key.data());
+    compute(result.data(), key.data());
+    raise_key(key.data());
+    compute(result.data(), key.data());
+    raise_key(key.data());
+    compute(result.data(), key.data());
+    raise_key(key.data());
+    compute(result.data(), key.data());
+    raise_key(key.data());
+    compute(result.data(), key.data());
+    raise_key(key.data());
+    compute(result.data(), key.data());
+    raise_key(key.data());
+    compute(result.data(), key.data());
+    skip_one();
+    return result;
+  }
+
+  // Advance counter for one step.
+  void skip_one() {
+    if ((++counter_[kIndex0] == 0) && (++counter_[kIndex1] == 0) && (++counter_[kIndex2] == 0)) {
+      ++counter_[kIndex3];
+    }
+  }
+
+  // Skip the given number of samples of 4 uint32.
+  void skip(uint64_t count) {
+    const uint32_t lo = static_cast<uint32_t>(count);
+    uint32_t hi = static_cast<uint32_t>(count >> kShift32);
+    counter_[kIndex0] += lo;
+    if (counter_[kIndex0] < lo) {
+      ++hi;
+    }
+    counter_[kIndex1] += hi;
+    if (counter_[kIndex1] < hi && (++counter_[kIndex2] == 0)) {
+      ++counter_[kIndex3];
+    }
+  }
+};
+
+//
+// Uniform distribution.
+//
+template <typename T>
+class UniformDistribution {
+ public:
+  UniformDistribution(T a, T b) : a_(a), b_(b) {}
+  ~UniformDistribution() = default;
+
+  template <typename Generator>
+  T operator()(Generator &&g) {
+    const auto min_num = g.min();
+    const auto max_num = g.max();
+    const long double range = static_cast<long double>(max_num) - static_cast<long double>(min_num) + 1.0L;
+    T s = T(g() - min_num) / range;
+    if (__builtin_expect(s >= T(1), 0)) {
+      s = std::nextafter(T(1), T(0));
+    }
+    return (b_ - a_) * s + a_;
+  }
+
+ private:
+  T a_;
+  T b_;
+};  // namespace mindspore::random
+
+//
+// Normal distribution.
+//
+template <typename T>
+class NormalDistribution {
+ public:
+  NormalDistribution(T mean, T sigma) : mean_(mean), sigma_(sigma) {}
+  ~NormalDistribution() = default;
+
+  template <typename Generator>
+  T operator()(Generator &&g) {
+    if (has_next_) {
+      has_next_ = false;
+      return next_;
+    }
+    // Box-Muller transform algorithm:
+    // z1 = sqrt(-2 * ln(u1)) * cos(2 * pi * u2)
+    // z2 = sqrt(-2 * ln(u1)) * sin(2 * pi * u2)
+    constexpr T pi = 3.1415926f;
+    constexpr T threshold = 1.0e-7f;
+    const T u1 = std::max(to_float(g()), threshold);
+    const T u2 = std::max(to_float(g()), threshold);
+    const T x = std::sqrt(-2.0f * std::log(u1)) * sigma_;
+    const T y = 2.0f * pi * u2;
+    next_ = mean_ + (x * std::sin(y));
+    has_next_ = true;
+    return mean_ + (x * std::cos(y));
+  }
+
+ private:
+  T mean_;
+  T sigma_;
+  T next_ = 0;
+  bool has_next_ = false;
+
+  static T to_float(uint32_t input) {
+    constexpr uint32_t mask = 0x7fffffu;
+    constexpr uint32_t exp = (127 << 23);
+    union {
+      uint32_t int_val;
+      float float_val;
+    } val;
+    val.int_val = (input & mask) | exp;
+    return T(val.float_val - 1.0f);
+  }
+
+  template <typename Generator>
+  T generate(Generator &&g) {}
+};
+
+//
+// Truncated normal distribution.
+//
+template <typename T>
+class TruncatedNormal {
+ public:
+  TruncatedNormal(T a, T b, T mean, T sigma) : lower_(a), upper_(b), mean_(mean), sigma_(sigma) {
+    if (sigma == 0) {
+      MS_LOG(EXCEPTION) << "TruncatedNormal: 'sigma' can not be zero.";
+    } else {
+      alpha_ = (a - mean) / sigma;
+      beta_ = (b - mean) / sigma;
+    }
+  }
+
+  ~TruncatedNormal() = default;
+
+  template <typename Generator>
+  T operator()(Generator &&g) {
+    // Inverse CDF (Cumulative Distribution Function) method.
+    const T u = std_uniform_(g);
+    const T cdf_a = cdf(alpha_);
+    const T cdf_b = cdf(beta_);
+    const T p = cdf_a + u * (cdf_b - cdf_a);
+    const T x = quantile(p);
+    return mean_ + x * sigma_;
+  }
+
+ private:
+  UniformDistribution<T> std_uniform_{0.0f, 1.0f};
+  T lower_;
+  T upper_;
+  T mean_;
+  T sigma_;
+  T alpha_;
+  T beta_;
+
+  static constexpr T kRootTwo = 1.4142135f;
+
+  static T cdf(T x) {
+    const T diff = x / kRootTwo;
+    return std::erfc(-diff) / 2.0f;
+  }
+
+  static T quantile(T p) {
+    auto z = 2.0f * p;
+    const T x = erfc_inv(z);
+    return -x * kRootTwo;
+  }
+
+  static T erfc_inv(T z) {
+    // Keep z in range (0, 2).
+    if (__builtin_expect((z <= 0), 0)) {
+      z = std::nextafterf(0.0f, 2.0f);
+    } else if (__builtin_expect((z >= 2.0f), 0)) {
+      z = std::nextafterf(2.0f, 0.0f);
+    }
+    T p, q, s;
+    if (z > 1.0f) {
+      q = 2.0f - z;
+      p = 1.0f - q;
+      s = -1;
+    } else {
+      p = 1.0f - z;
+      q = z;
+      s = 1;
+    }
+    return s * erf_inv_imp(p, q);
+  }
+
+  // The algorithm and polynomia constants are borrow from boost.
+  static T erf_inv_imp(T p, T q) {
+    if (p <= 0.5f) {
+      constexpr float Y = 0.0891314744949340820313f;
+      constexpr T P[] = {T(-0.000508781949658280665617), T(-0.00836874819741736770379), T(0.0334806625409744615033),
+                         T(-0.0126926147662974029034),   T(-0.0365637971411762664006),  T(0.0219878681111168899165),
+                         T(0.00822687874676915743155),   T(-0.00538772965071242932965)};
+      constexpr T Q[] = {T(1.0),
+                         T(-0.970005043303290640362),
+                         T(-1.56574558234175846809),
+                         T(1.56221558398423026363),
+                         T(0.662328840472002992063),
+                         T(-0.71228902341542847553),
+                         T(-0.0527396382340099713954),
+                         T(0.0795283687341571680018),
+                         T(-0.00233393759374190016776),
+                         T(0.000886216390456424707504)};
+      T g = p * (p + 10.0f);
+      T r = eval_polynomial(P, p) / eval_polynomial(Q, p);
+      return g * Y + g * r;
+    }
+    if (q >= 0.25f) {
+      constexpr float Y = 2.249481201171875f;
+      constexpr T P[] = {T(-0.202433508355938759655), T(0.105264680699391713268), T(8.37050328343119927838),
+                         T(17.6447298408374015486),   T(-18.8510648058714251895), T(-44.6382324441786960818),
+                         T(17.445385985570866523),    T(21.1294655448340526258),  T(-3.67192254707729348546)};
+      constexpr T Q[] = {T(1.0),
+                         T(6.24264124854247537712),
+                         T(3.9713437953343869095),
+                         T(-28.6608180499800029974),
+                         T(-20.1432634680485188801),
+                         T(48.5609213108739935468),
+                         T(10.8268667355460159008),
+                         T(-22.6436933413139721736),
+                         T(1.72114765761200282724)};
+      T g = std::sqrt(-2.0f * std::log(q));
+      T xs = q - 0.25f;
+      T r = eval_polynomial(P, xs) / eval_polynomial(Q, xs);
+      return g / (Y + r);
+    }
+    // Avoid static check warning for 'function body too long'.
+    return erf_inv_imp2(p, q);
+  }
+
+  static T erf_inv_imp2(T p, T q) {
+    T x = std::sqrt(-std::log(q));
+    if (x < 3.0f) {
+      constexpr float Y = 0.807220458984375f;
+      constexpr T P[] = {T(-0.131102781679951906451),   T(-0.163794047193317060787),   T(0.117030156341995252019),
+                         T(0.387079738972604337464),    T(0.337785538912035898924),    T(0.142869534408157156766),
+                         T(0.0290157910005329060432),   T(0.00214558995388805277169),  T(-0.679465575181126350155e-6),
+                         T(0.285225331782217055858e-7), T(-0.681149956853776992068e-9)};
+      constexpr T Q[] = {T(1.0),
+                         T(3.46625407242567245975),
+                         T(5.38168345707006855425),
+                         T(4.77846592945843778382),
+                         T(2.59301921623620271374),
+                         T(0.848854343457902036425),
+                         T(0.152264338295331783612),
+                         T(0.01105924229346489121)};
+      T xs = x - 1.125f;
+      T R = eval_polynomial(P, xs) / eval_polynomial(Q, xs);
+      return Y * x + R * x;
+    }
+    if (x < 6.0f) {
+      constexpr float Y = 0.93995571136474609375f;
+      constexpr T P[] = {T(-0.0350353787183177984712),  T(-0.00222426529213447927281),  T(0.0185573306514231072324),
+                         T(0.00950804701325919603619),  T(0.00187123492819559223345),   T(0.000157544617424960554631),
+                         T(0.460469890584317994083e-5), T(-0.230404776911882601748e-9), T(0.266339227425782031962e-11)};
+      constexpr T Q[] = {T(1.0),
+                         T(1.3653349817554063097),
+                         T(0.762059164553623404043),
+                         T(0.220091105764131249824),
+                         T(0.0341589143670947727934),
+                         T(0.00263861676657015992959),
+                         T(0.764675292302794483503e-4)};
+      T xs = x - 3.0f;
+      T R = eval_polynomial(P, xs) / eval_polynomial(Q, xs);
+      return Y * x + R * x;
+    }
+    if (x < 18.0f) {
+      constexpr float Y = 0.98362827301025390625f;
+      constexpr T P[] = {T(-0.0167431005076633737133),  T(-0.00112951438745580278863),   T(0.00105628862152492910091),
+                         T(0.000209386317487588078668), T(0.149624783758342370182e-4),   T(0.449696789927706453732e-6),
+                         T(0.462596163522878599135e-8), T(-0.281128735628831791805e-13), T(0.99055709973310326855e-16)};
+      constexpr T Q[] = {T(1.0),
+                         T(0.591429344886417493481),
+                         T(0.138151865749083321638),
+                         T(0.0160746087093676504695),
+                         T(0.000964011807005165528527),
+                         T(0.275335474764726041141e-4),
+                         T(0.282243172016108031869e-6)};
+      T xs = x - 6.0f;
+      T R = eval_polynomial(P, xs) / eval_polynomial(Q, xs);
+      return Y * x + R * x;
+    }
+    if (x < 44.0f) {
+      constexpr float Y = 0.99714565277099609375f;
+      constexpr T P[] = {T(-0.0024978212791898131227),   T(-0.779190719229053954292e-5), T(0.254723037413027451751e-4),
+                         T(0.162397777342510920873e-5),  T(0.396341011304801168516e-7),  T(0.411632831190944208473e-9),
+                         T(0.145596286718675035587e-11), T(-0.116765012397184275695e-17)};
+      constexpr T Q[] = {T(1.0),
+                         T(0.207123112214422517181),
+                         T(0.0169410838120975906478),
+                         T(0.000690538265622684595676),
+                         T(0.145007359818232637924e-4),
+                         T(0.144437756628144157666e-6),
+                         T(0.509761276599778486139e-9)};
+      T xs = x - 18.0f;
+      T R = eval_polynomial(P, xs) / eval_polynomial(Q, xs);
+      return Y * x + R * x;
+    }
+    constexpr float Y = 0.99941349029541015625f;
+    constexpr T P[] = {T(-0.000539042911019078575891), T(-0.28398759004727721098e-6),  T(0.899465114892291446442e-6),
+                       T(0.229345859265920864296e-7),  T(0.225561444863500149219e-9),  T(0.947846627503022684216e-12),
+                       T(0.135880130108924861008e-14), T(-0.348890393399948882918e-21)};
+    constexpr T Q[] = {T(1.0),
+                       T(0.0845746234001899436914),
+                       T(0.00282092984726264681981),
+                       T(0.468292921940894236786e-4),
+                       T(0.399968812193862100054e-6),
+                       T(0.161809290887904476097e-8),
+                       T(0.231558608310259605225e-11)};
+    T xs = x - 44.0f;
+    T R = eval_polynomial(P, xs) / eval_polynomial(Q, xs);
+    return Y * x + R * x;
+  }
+
+  // We use template function to unrolling polynomial evaluations
+  // at compile time to improve performance.
+  template <size_t N>
+  static T eval_polynomial(const T (&arr)[N], T x) {
+    T sum = arr[N - 1];
+    if constexpr (N > 1) {
+      eval_polynomial_loop<N - 2>(arr, x, &sum);
+    }
+    return sum;
+  }
+
+  template <size_t Index>
+  static void eval_polynomial_loop(const T *arr, T x, T *sum) {
+    *sum *= x;
+    *sum += arr[Index];
+    if constexpr (Index > 0) {
+      eval_polynomial_loop<Index - 1>(arr, x, sum);
+    }
+  }
+};
+}  // namespace mindspore::random
+
+#endif  // MINDSPORE_CCSRC_INCLUDE_COMMON_RANDOM_H_
--- a/mindspore/ccsrc/include/common/utils/philox_generator.h
+++ b/mindspore/ccsrc/include/common/utils/philox_generator.h
@ -1,89 +0,0 @@
-/**
- * Copyright 2020-2022 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef MINDSPORE_CCSRC_INCLUDE_COMMON_UTILS_PHILOX_GENERATOR_H_
-#define MINDSPORE_CCSRC_INCLUDE_COMMON_UTILS_PHILOX_GENERATOR_H_
-
-#include <securec.h>
-#include <math.h>
-#include <array>
-#include "utils/log_adapter.h"
-#include "utils/convert_utils_base.h"
-#include "include/common/visible.h"
-
-namespace mindspore {
-static constexpr int kResultNum = 4;
-class COMMON_EXPORT PhiloxGenerator {
- public:
-  explicit PhiloxGenerator(uint64_t seed_) {
-    key_var_[0] = static_cast<uint32_t>(seed_);
-    key_var_[1] = static_cast<uint32_t>(seed_ >> 32);
-    counter_[0] = 0;
-    counter_[1] = 0;
-    counter_[2] = static_cast<uint32_t>(seed_);
-    counter_[3] = static_cast<uint32_t>(seed_ >> 32);
-  }
-
-  explicit PhiloxGenerator(uint64_t seed_, uint64_t seed2_) {
-    key_var_[0] = static_cast<uint32_t>(seed_);
-    key_var_[1] = static_cast<uint32_t>(seed_ >> 32);
-    counter_[0] = 0;
-    counter_[1] = 0;
-    counter_[2] = static_cast<uint32_t>(seed2_);
-    counter_[3] = static_cast<uint32_t>(seed2_ >> 32);
-  }
-
-  ~PhiloxGenerator() = default;
-
-  void Jump();
-
-  void JumpStep(uint64_t step);
-
-  std::array<uint32_t, kResultNum> Compute(const std::array<uint32_t, kResultNum> &counter,
-                                           const std::array<uint32_t, 2> &key_var) const;
-
-  std::array<uint32_t, kResultNum> operator()();
-
- private:
-  std::array<uint32_t, kResultNum> counter_;
-  std::array<uint32_t, 2> key_var_;
-  static constexpr std::array<uint32_t, kResultNum> keyConstant = {0xD2511F53, 0x9E3779B9, 0xCD9E8D57, 0xBB67AE85};
-};
-
-template <class T>
-bool FillRandoms(PhiloxGenerator generator, float *output, int64_t vet_size, int64_t thread_Id) {
-  T distribution;
-  errno_t mem_ret;
-  generator.JumpStep(LongToSize((vet_size * thread_Id + kResultNum - 1) / kResultNum));
-  for (int32_t i = 0; i < vet_size; i += kResultNum) {
-    auto outputResult = distribution(&generator);
-    size_t max_length = 0;
-    if (vet_size - i >= kResultNum) {
-      max_length = kResultNum * sizeof(float);
-      mem_ret = memcpy_s(&output[i], max_length, &outputResult[0], max_length);
-    } else {
-      max_length = LongToSize(vet_size - i) * sizeof(float);
-      mem_ret = memcpy_s(&output[i], max_length, &outputResult[0], max_length);
-    }
-    if (mem_ret != EOK) {
-      MS_LOG(ERROR) << "FillRandoms memcpy is failed";
-      return false;
-    }
-  }
-  return true;
-}
-}  // namespace mindspore
-
-#endif  // MINDSPORE_CCSRC_INCLUDE_COMMON_UTILS_PHILOX_GENERATOR_H_
--- a/mindspore/ccsrc/ps/random_normal/random_normal.cc
+++ b/mindspore/ccsrc/ps/random_normal/random_normal.cc
@ -1,5 +1,5 @@
 /**
- * Copyright 2020 Huawei Technologies Co., Ltd
+ * Copyright 2020-2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -13,102 +13,36 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
+
 #include "ps/random_normal/random_normal.h"
-#include <iostream>
-#include <thread>
-#include <memory>
-#include <algorithm>
-#include "utils/convert_utils_base.h"
-#include "pybind_api/random_normal/random_cpu_kernel.h"
-
-namespace mindspore {
-namespace ps {
-static const uint32_t kMaxThreadNum = 16;
-static const uint32_t kCPUCoreNum = std::thread::hardware_concurrency();
-
-namespace {
-// Update standard deviation to parameter: stddev
-void UpdateStandardDeviation(float stddev, size_t total_count, float *output) {
-  MS_EXCEPTION_IF_NULL(output);
-
-  auto update_stddev_task = [](float stddev, size_t task_len, float *data) {
-    if (data == nullptr) {
-      MS_LOG(ERROR) << "The pointer data is nullptr";
-      return;
-    }
-
-    for (size_t i = 0; i < task_len; i++) {
-      data[i] *= stddev;
-    }
-  };
-
-  uint32_t thread_num = std::max(kMaxThreadNum, kCPUCoreNum);
-  if (total_count <= thread_num) {
-    thread_num = 1;
-  }
-
-  std::vector<std::thread> threads(thread_num);
-  size_t task_offset = 0;
-
-  for (size_t i = 0; i < thread_num; ++i) {
-    size_t task_len = total_count / thread_num + (i < (total_count % thread_num) ? 1 : 0);
-    threads[i] = std::thread(update_stddev_task, stddev, task_len, output + task_offset);
-    task_offset += task_len;
-  }
-
-  for (size_t i = 0; i < thread_num; i++) {
-    threads[i].join();
-  }
-}
-}  // namespace
+#include <random>
+#include "include/common/random.h"
+#include "utils/log_adapter.h"

+namespace mindspore::ps {
 bool InitRandomNormal(float mean, float stddev, std::vector<size_t> out_shape, size_t global_seed, size_t op_seed,
                      float *output_data) {
-  MS_ERROR_IF_NULL_W_RET_VAL(output_data, false);
-  if (out_shape.size() == 0) {
-    std::cout << "output data shape is error" << std::endl;
-  }
-  int64_t total_count = 1;
-  for (uint32_t i = 0; i < out_shape.size(); i++) {
-    total_count *= SizeToLong(out_shape[i]);
-  }
-
-  uint32_t thread_num = std::max(kMaxThreadNum, kCPUCoreNum);
-  if (total_count <= thread_num) {
-    thread_num = 1;
-  }
-  float *start_ptr = output_data;
-  if (start_ptr == nullptr) {
-    std::cout << "start_ptr is nullptr" << std::endl;
+  // Check output data pointer.
+  if (output_data == nullptr) {
+    MS_LOG(ERROR) << "output data is null.";
    return false;
  }
-  // The value of thread_num is >= 1.
-  int64_t batchSize = total_count / thread_num;
-  std::vector<std::thread> threads(thread_num);
-  int64_t seed = SizeToLong(global_seed);
-  int64_t seed2 = SizeToLong(op_seed);
-  seed = (seed == 0 && seed2 == 0) ? clock() : seed;
-  PhiloxGenerator generator = PhiloxGenerator(seed, seed2);
-  if (thread_num != 1) {
-    float *offset_ptr = nullptr;
-    for (uint32_t i = 0; i < thread_num - 1; i++) {
-      offset_ptr = start_ptr + batchSize * i;
-      threads[i] =
-        std::thread(FillRandoms<NormalDistribution<PhiloxGenerator, float>>, generator, offset_ptr, batchSize, i);
-    }
-    offset_ptr = start_ptr + batchSize * (thread_num - 1);
-    threads[thread_num - 1] = std::thread(FillRandoms<NormalDistribution<PhiloxGenerator, float>>, generator,
-                                          offset_ptr, total_count - (thread_num - 1) * batchSize, thread_num - 1);
-  } else {
-    threads[0] =
-      std::thread(FillRandoms<NormalDistribution<PhiloxGenerator, float>>, generator, start_ptr, total_count, 0);
+  // Check shape.
+  if (out_shape.size() == 0) {
+    MS_LOG(ERROR) << "output data shape is empty.";
+    return false;
  }
-  for (uint32_t i = 0; i < thread_num; i++) {
-    threads[i].join();
+  // Calculate data size from shape.
+  size_t data_size = 1;
+  for (size_t i = 0; i < out_shape.size(); ++i) {
+    data_size *= out_shape[i];
  }
-
-  UpdateStandardDeviation(stddev, total_count, output_data);
+  // Generate randoms parallel.
+  constexpr int seed_shift = 32;
+  const uint64_t seed = (global_seed << seed_shift) + op_seed;
+  using Generator = random::Philox;
+  using Distribution = random::NormalDistribution<float>;
+  random::GenerateRandomsParallel<float, Generator, Distribution>(seed, output_data, data_size, mean, stddev);
  return true;
 }
-}  // namespace ps
-}  // namespace mindspore
+}  // namespace mindspore::ps
--- a/mindspore/ccsrc/pybind_api/initializer/init_random.cc
+++ b/mindspore/ccsrc/pybind_api/initializer/init_random.cc
@ -0,0 +1,67 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cstdint>
+#include <random>
+#include "include/common/random.h"
+#include "include/common/pybind_api/api_register.h"
+#include "utils/log_adapter.h"
+
+namespace mindspore::initializer {
+//
+// Generate float random numbers into a python buffer.
+//
+template <typename Generator, typename Distribution, typename... Args>
+void GenerateFloatRandoms(std::uint64_t seed, const py::buffer &py_buf, Args... args) {
+  // Check buffer info.
+  py::buffer_info info = py_buf.request();
+  if (info.format != py::format_descriptor<float>::format()) {
+    MS_LOG(EXCEPTION) << "Unsupported data type '" << info.format << "'.";
+  }
+
+  // Get buffer pointer and size.
+  const size_t buf_size = info.size;
+  float *buf = reinterpret_cast<float *>(info.ptr);
+  MS_EXCEPTION_IF_NULL(buf);
+
+  // Parallel generate randoms into buffer.
+  random::GenerateRandomsParallel<float, Generator, Distribution>(seed, buf, buf_size, args...);
+}
+
+void RandomUniform(std::uint64_t seed, const py::buffer &py_buf, float a, float b) {
+  using Generator = random::Philox;
+  using Distribution = random::UniformDistribution<double>;
+  GenerateFloatRandoms<Generator, Distribution>(seed, py_buf, a, b);
+}
+
+void RandomNormal(std::uint64_t seed, const py::buffer &py_buf, float mean, float sigma) {
+  using Generator = random::Philox;
+  using Distribution = random::NormalDistribution<double>;
+  GenerateFloatRandoms<Generator, Distribution>(seed, py_buf, mean, sigma);
+}
+
+void TruncatedNormal(std::uint64_t seed, const py::buffer &py_buf, float a, float b, float mean, float sigma) {
+  using Generator = random::Philox;
+  using Distribution = random::TruncatedNormal<double>;
+  GenerateFloatRandoms<Generator, Distribution>(seed, py_buf, a, b, mean, sigma);
+}
+
+REGISTER_PYBIND_DEFINE(init_random, ([](py::module *const m) {
+                         (void)m->def("_random_uniform", RandomUniform);
+                         (void)m->def("_random_normal", RandomNormal);
+                         (void)m->def("_truncated_normal", TruncatedNormal);
+                       }));
+}  // namespace mindspore::initializer
--- a/mindspore/ccsrc/pybind_api/random_normal/random_cpu_kernel.cc
+++ b/mindspore/ccsrc/pybind_api/random_normal/random_cpu_kernel.cc
@ -1,68 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "pybind_api/random_normal/random_cpu_kernel.h"
-
-#include <memory>
-#include "plugin/device/cpu/hal/device/cpu_device_address.h"
-#include "ir/tensor.h"
-
-namespace mindspore {
-bool InitRandomNormal(std::vector<int64_t> out_shape, int64_t seed, int64_t seed2, const py::object &output_tensor) {
-  if (out_shape.size() == 0) {
-    std::cout << "output data shape is error" << std::endl;
-  }
-  int64_t total_count = 1;
-  for (uint32_t i = 0; i < out_shape.size(); i++) {
-    total_count *= out_shape[i];
-  }
-  uint32_t thread_num = 16;
-  if (total_count <= thread_num) {
-    thread_num = 1;
-  }
-  auto temp = py::cast<std::shared_ptr<mindspore::tensor::Tensor>>(output_tensor);
-  float *start_ptr = static_cast<float *>(temp->data_c());
-  if (start_ptr == nullptr) {
-    std::cout << "start_ptr is nullptr" << std::endl;
-    return false;
-  }
-  int64_t batchSize = total_count / thread_num;
-  std::vector<std::thread> threads(thread_num);
-  seed = (seed == 0 && seed2 == 0) ? clock() : seed;
-  mindspore::PhiloxGenerator generator = mindspore::PhiloxGenerator(seed, seed2);
-  float *offset_ptr = nullptr;
-  if (thread_num != 1) {
-    for (uint32_t i = 0; i < thread_num - 1; i++) {
-      offset_ptr = start_ptr + batchSize * i;
-      threads[i] = std::thread(mindspore::FillRandoms<mindspore::NormalDistribution<mindspore::PhiloxGenerator, float>>,
-                               generator, offset_ptr, batchSize, i);
-    }
-    offset_ptr = start_ptr + batchSize * (thread_num - 1);
-    threads[thread_num - 1] =
-      std::thread(mindspore::FillRandoms<mindspore::NormalDistribution<mindspore::PhiloxGenerator, float>>, generator,
-                  offset_ptr, total_count - (thread_num - 1) * batchSize, thread_num - 1);
-  } else {
-    threads[0] = std::thread(mindspore::FillRandoms<mindspore::NormalDistribution<mindspore::PhiloxGenerator, float>>,
-                             generator, start_ptr, total_count, 0);
-  }
-  for (uint32_t i = 0; i < thread_num; i++) {
-    threads[i].join();
-  }
-  return true;
-}
-
-REGISTER_PYBIND_DEFINE(random_normal,
-                       ([](py::module *const m) { (void)m->def("random_normal", &InitRandomNormal, "testnormal"); }));
-}  // namespace mindspore
--- a/mindspore/ccsrc/pybind_api/random_normal/random_cpu_kernel.h
+++ b/mindspore/ccsrc/pybind_api/random_normal/random_cpu_kernel.h
@ -1,68 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef PYBIND_API_API_IR_RANDOM_NORMAL_RANDOM_CPU_KERNEL_H_
-#define PYBIND_API_API_IR_RANDOM_NORMAL_RANDOM_CPU_KERNEL_H_
-#include <vector>
-#include "include/common/utils/philox_generator.h"
-#include "include/common/pybind_api/api_register.h"
-#include "pybind11/pytypes.h"
-#include "utils/log_adapter.h"
-
-namespace py = pybind11;
-
-namespace mindspore {
-template <class T, typename vartype>
-class NormalDistribution;
-template <class T>
-class NormalDistribution<T, float> {
- public:
-  bool UInt32ToFloat32(uint32_t input, float *output) const {
-    const uint32_t temp_value = input & 0x7fffffu;
-    const uint32_t exp = static_cast<uint32_t>(127);
-    const uint32_t val = (exp << 23) | temp_value;
-    errno_t mem_ret = memcpy_s(output, sizeof(float), &val, sizeof(uint32_t));
-    if (mem_ret != EOK) {
-      MS_LOG(ERROR) << "UInt32ToFloat32 memcpy is failed";
-      return false;
-    }
-    *output = *output - 1.0f;
-    return true;
-  }
-
-  std::array<float, kResultNum> operator()(T *generator) {
-    std::array<uint32_t, 4> generate_value = (*generator)();
-    const float PI = 3.14;
-    for (uint32_t i = 0; i < kResultNum; i += 2) {
-      float temp[2];
-      UInt32ToFloat32(generate_value[i], &temp[0]);
-      UInt32ToFloat32(generate_value[i + 1], &temp[1]);
-      const float threshold = 1.0e-7f;
-      temp[0] = temp[0] < threshold ? threshold : temp[0];
-      temp[1] = temp[1] < threshold ? threshold : temp[1];
-      result_[i] = sqrt(-2.0 * log(temp[0])) * sin(2 * PI * temp[1]);
-      result_[i + 1] = sqrt(-2.0 * log(temp[0])) * cos(2 * PI * temp[1]);
-    }
-    return result_;
-  }
-
- private:
-  std::array<float, kResultNum> result_;
-};
-
-bool InitRandomNormal(std::vector<int64_t> out_shape, int64_t seed, int64_t seed2, const py::object &output_tensor);
-}  // namespace mindspore
-
-#endif  // PYBIND_API_API_IR_RANDOM_NORMAL_RANDOM_CPU_KERNEL_H_
--- a/mindspore/ccsrc/utils/philox_generator.cc
+++ b/mindspore/ccsrc/utils/philox_generator.cc
@ -1,73 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "include/common/utils/philox_generator.h"
-
-namespace mindspore {
-static constexpr uint64_t kShiftNum = 32;
-static constexpr uint64_t kGenerateNum = 10;
-void PhiloxGenerator::Jump() {
-  if ((++counter_[0] == 0) && (++counter_[1] == 0) && (++counter_[2] == 0)) {
-    ++counter_[3];
-  }
-}
-
-void PhiloxGenerator::JumpStep(uint64_t step) {
-  uint64_t min_counter, max_counter;
-  min_counter = static_cast<uint64_t>(counter_[1]);
-  min_counter = min_counter << kShiftNum;
-  min_counter += counter_[0];
-
-  max_counter = static_cast<uint64_t>(counter_[3]);
-  max_counter = max_counter << kShiftNum;
-  max_counter += counter_[2];
-  min_counter += step;
-  if (min_counter < step) {
-    max_counter++;
-  }
-  counter_[0] = static_cast<uint32_t>(min_counter);
-  counter_[1] = static_cast<uint32_t>(min_counter >> kShiftNum);
-  counter_[2] = static_cast<uint32_t>(max_counter);
-  counter_[3] = static_cast<uint32_t>(max_counter >> kShiftNum);
-}
-
-std::array<uint32_t, kResultNum> PhiloxGenerator::Compute(const std::array<uint32_t, kResultNum> &counter,
-                                                          const std::array<uint32_t, 2> &key_var) const {
-  std::array<uint32_t, kResultNum> min_value;
-  std::array<uint32_t, kResultNum> max_value;
-  constexpr auto step = 2;
-  for (size_t i = 0; i < kResultNum; i += step) {
-    uint64_t temp = static_cast<uint64_t>(keyConstant[i]) * counter[i];
-    min_value[i] = static_cast<uint32_t>(temp);
-    max_value[i] = static_cast<uint32_t>(temp >> kShiftNum);
-  }
-  std::array<uint32_t, kResultNum> result;
-  result[0] = (max_value[2] ^ counter[1] ^ key_var[0]);
-  result[1] = min_value[2];
-  result[2] = (max_value[0] ^ counter[3] ^ key_var[0]);
-  result[3] = min_value[0];
-  return result;
-}
-
-std::array<uint32_t, kResultNum> PhiloxGenerator::operator()() {
-  for (size_t i = 0; i < kGenerateNum; i++) {
-    counter_ = Compute(counter_, key_var_);
-    key_var_[0] += keyConstant[1];
-    key_var_[1] += keyConstant[3];
-  }
-  Jump();
-  return counter_;
-}
-}  // namespace mindspore
--- a/mindspore/python/mindspore/common/initializer.py
+++ b/mindspore/python/mindspore/common/initializer.py
@ -1,4 +1,4 @@
-# Copyright 2020-2021 Huawei Technologies Co., Ltd
+# Copyright 2020-2022 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -20,11 +20,10 @@ import math

 from functools import reduce
 import numpy as np
-from scipy.stats import truncnorm
 from mindspore.common.seed import get_seed, _get_graph_seed
 from mindspore.common import dtype as mstype
 from mindspore.common.tensor import Tensor
-from mindspore._c_expression import random_normal
+from mindspore._c_expression import _random_normal, _random_uniform, _truncated_normal

 _INITIALIZER_ALIAS = dict()

@ -36,6 +35,7 @@ class Initializer:
    Args:
        kwargs (dict): Keyword arguments for Initializer.
    """
+
    def __init__(self, **kwargs):
        self._kwargs = kwargs
        self._seed = None
@ -58,6 +58,7 @@ class Initializer:
    def __call__(self, arr):
        return self._initialize(arr)

+
 def _register(*aliases):
    """Return the alias register."""
    def alias_reg(cls):
@ -89,6 +90,29 @@ def _assignment(arr, num):
    return arr


+def _numpy_seed():
+    # This will produce same value after call numpy.random.seed with same seed.
+    return np.random.randint(low=1, high=(1 << 63))
+
+
+def _init_random_normal(mean, sigma, shape):
+    data = np.ndarray(shape=shape, dtype=np.float32)
+    _random_normal(_numpy_seed(), data, mean, sigma)
+    return data
+
+
+def _init_random_uniform(a, b, shape):
+    data = np.ndarray(shape=shape, dtype=np.float32)
+    _random_uniform(_numpy_seed(), data, a, b)
+    return data
+
+
+def _init_truncated_normal(a, b, mean, sigma, shape):
+    data = np.ndarray(shape=shape, dtype=np.float32)
+    _truncated_normal(_numpy_seed(), data, a, b, mean, sigma)
+    return data
+
+
@_register('zeros')
 class Zero(Initializer):
    """
@ -100,8 +124,9 @@ class Zero(Initializer):
        >>> tensor1 = initializer(Zero(), [1, 2, 3], mindspore.float32)
        >>> tensor2 = initializer('zeros', [1, 2, 3], mindspore.float32)
    """
+
    def _initialize(self, arr):
-        _assignment(arr, 0)
+        arr.fill(0)


@_register('ones')
@ -115,8 +140,9 @@ class One(Initializer):
        >>> tensor1 = initializer(One(), [1, 2, 3], mindspore.float32)
        >>> tensor2 = initializer('ones', [1, 2, 3], mindspore.float32)
    """
+
    def _initialize(self, arr):
-        _assignment(arr, 1)
+        arr.fill(1)


 def _calculate_fan_in_and_fan_out(shape):
@ -253,16 +279,15 @@ class XavierUniform(Initializer):
        >>> tensor1 = initializer(XavierUniform(), [1, 2, 3], mindspore.float32)
        >>> tensor2 = initializer('xavier_uniform', [1, 2, 3], mindspore.float32)
    """
+
    def __init__(self, gain=1):
        super(XavierUniform, self).__init__(gain=gain)
        self.gain = gain

    def _initialize(self, arr):
        n_in, n_out = _calculate_fan_in_and_fan_out(arr.shape)
-
        boundary = self.gain * math.sqrt(6.0 / (n_in + n_out))
-        data = np.random.uniform(-boundary, boundary, arr.shape)
-
+        data = _init_random_uniform(-boundary, boundary, arr.shape)
        _assignment(arr, data)


@ -297,6 +322,7 @@ class HeUniform(Initializer):
        >>> tensor1 = initializer(HeUniform(), [1, 2, 3], mindspore.float32)
        >>> tensor2 = initializer('he_uniform', [1, 2, 3], mindspore.float32)
    """
+
    def __init__(self, negative_slope=0, mode='fan_in', nonlinearity='leaky_relu'):
        super(HeUniform, self).__init__(negative_slope=negative_slope, mode=mode, nonlinearity=nonlinearity)
        self.negative_slope = negative_slope
@ -308,8 +334,7 @@ class HeUniform(Initializer):
        gain = _calculate_gain(self.nonlinearity, self.negative_slope)
        std = gain / math.sqrt(fan)
        boundary = math.sqrt(3.0) * std
-        data = np.random.uniform(-boundary, boundary, arr.shape)
-
+        data = _init_random_uniform(-boundary, boundary, arr.shape)
        _assignment(arr, data)


@ -343,6 +368,7 @@ class HeNormal(Initializer):
        >>> tensor1 = initializer(HeNormal(), [1, 2, 3], mindspore.float32)
        >>> tensor2 = initializer('he_normal', [1, 2, 3], mindspore.float32)
    """
+
    def __init__(self, negative_slope=0, mode='fan_in', nonlinearity='leaky_relu'):
        super(HeNormal, self).__init__(negative_slope=negative_slope, mode=mode, nonlinearity=nonlinearity)
        self.negative_slope = negative_slope
@ -353,8 +379,7 @@ class HeNormal(Initializer):
        fan = _calculate_correct_fan(arr.shape, self.mode)
        gain = _calculate_gain(self.nonlinearity, self.negative_slope)
        std = gain / math.sqrt(fan)
-        data = np.random.normal(0, std, arr.shape)
-
+        data = _init_random_normal(0, std, arr.shape)
        _assignment(arr, data)


@ -372,12 +397,13 @@ class Constant(Initializer):
        >>> tensor1 = initializer(0, [1, 2, 3], mindspore.float32)
        >>> tensor2 = initializer(5, [1, 2, 3], mindspore.float32)
    """
+
    def __init__(self, value):
        super(Constant, self).__init__(value=value)
        self.value = value

    def _initialize(self, arr):
-        _assignment(arr, self.value)
+        arr.fill(self.value)


@_register()
@ -394,6 +420,7 @@ class Identity(Initializer):
        >>> tensor1 = initializer(Identity(), [2, 3], mindspore.float32)
        >>> tensor2 = initializer('identity', [2, 3], mindspore.float32)
    """
+
    def _initialize(self, arr):
        if len(arr.shape) != 2:
            raise ValueError('For Identity initializer, the dimension of the initialized tensor should be 2, '
@ -420,6 +447,7 @@ class Sparse(Initializer):
        >>> from mindspore.common.initializer import initializer, Sparse
        >>> tensor1 = initializer(Sparse(sparsity=0.1, sigma=0.01), [5, 8], mindspore.float32)
    """
+
    def __init__(self, sparsity, sigma=0.01):
        super(Sparse, self).__init__()
        self.sparsity = sparsity
@ -431,7 +459,7 @@ class Sparse(Initializer):
                             'but got {}.'.format(len(arr.shape)))
        rows, cols = arr.shape
        zero_num = int(np.ceil(self.sparsity * rows))
-        data = np.random.normal(0, self.sigma, arr.shape)
+        data = _init_random_normal(0, self.sigma, arr.shape)
        for col_idx in range(cols):
            row_idx = np.random.permutation(list(range(rows)))[: zero_num]
            data[row_idx, col_idx] = 0.
@ -509,6 +537,7 @@ class Orthogonal(Initializer):
        >>> tensor1 = initializer(Orthogonal(gain=2.), [2, 3, 4], mindspore.float32)
        >>> tensor2 = initializer('orthogonal', [2, 3, 4], mindspore.float32)
    """
+
    def __init__(self, gain=1.):
        super(Orthogonal, self).__init__(gain=gain)
        self.gain = gain
@ -520,7 +549,7 @@ class Orthogonal(Initializer):
        rows = arr.shape[0]

        cols = np.prod(arr.shape) // rows
-        data = np.random.normal(0, 1, size=(rows, cols))
+        data = _init_random_normal(0, 1, (rows, cols))

        if rows < cols:
            data = data.T
@ -565,6 +594,7 @@ class VarianceScaling(Initializer):
        ...                                       distribution='untruncated_normal'), [2, 3], mindspore.float32)
        >>> tensor2 = initializer('varianceScaling', [2, 3], mindspore.float32)
    """
+
    def __init__(self, scale=1.0, mode='fan_in', distribution='truncated_normal'):
        super(VarianceScaling, self).__init__(scale=scale, mode=mode, distribution=distribution)
        if scale <= 0.:
@ -595,13 +625,13 @@ class VarianceScaling(Initializer):

        if self.distribution == 'truncated_norm':
            stddev = np.sqrt(scale) / 0.87962566103423978
-            data = truncnorm.rvs(-2, 2, loc=0, scale=stddev, size=arr.shape, random_state=None)
+            data = _init_truncated_normal(-2, 2, 0, stddev, arr.shape)
        elif self.distribution == 'untruncated_normal':
            stddev = np.sqrt(scale)
-            data = np.random.normal(0, stddev, arr.shape)
+            data = _init_random_normal(0, stddev, arr.shape)
        else:
            limit = np.sqrt(3.0 * scale)
-            data = np.random.uniform(-limit, limit, arr.shape)
+            data = _init_random_uniform(-limit, limit, arr.shape)
        _assignment(arr, data)


@ -621,12 +651,13 @@ class Uniform(Initializer):
        >>> tensor1 = initializer(Uniform(), [1, 2, 3], mindspore.float32)
        >>> tensor2 = initializer('uniform', [1, 2, 3], mindspore.float32)
    """
+
    def __init__(self, scale=0.07):
        super(Uniform, self).__init__(scale=scale)
        self.scale = scale

    def _initialize(self, arr):
-        tmp = np.random.uniform(-self.scale, self.scale, arr.shape)
+        tmp = _init_random_uniform(-self.scale, self.scale, arr.shape)
        _assignment(arr, tmp)


@ -649,18 +680,16 @@ class Normal(Initializer):
        >>> tensor1 = initializer(Normal(), [1, 2, 3], mindspore.float32)
        >>> tensor2 = initializer('normal', [1, 2, 3], mindspore.float32)
    """
+
    def __init__(self, sigma=0.01, mean=0.0):
        super(Normal, self).__init__(sigma=sigma, mean=mean)
        self.sigma = sigma
        self.mean = mean

    def _initialize(self, arr):
-        seed, seed2 = self.seed
-        output_tensor = Tensor(np.zeros(arr.shape, dtype=np.float32))
-        random_normal(arr.shape, seed, seed2, output_tensor)
-        output_data = output_tensor.asnumpy()
-        output_data = output_data * self.sigma + self.mean
-        _assignment(arr, output_data)
+        data = _init_random_normal(self.mean, self.sigma, arr.shape)
+        _assignment(arr, data)
+

@_register()
 class TruncatedNormal(Initializer):
@ -677,12 +706,13 @@ class TruncatedNormal(Initializer):
        >>> tensor1 = initializer(TruncatedNormal(), [1, 2, 3], mindspore.float32)
        >>> tensor2 = initializer('truncatedNormal', [1, 2, 3], mindspore.float32)
    """
+
    def __init__(self, sigma=0.01):
        super(TruncatedNormal, self).__init__(sigma=sigma)
        self.sigma = sigma

    def _initialize(self, arr):
-        tmp = truncnorm.rvs(-2, 2, loc=0, scale=self.sigma, size=arr.shape, random_state=None)
+        tmp = _init_truncated_normal(-2, 2, 0, self.sigma, arr.shape)
        _assignment(arr, tmp)


@ -759,6 +789,7 @@ def initializer(init, shape=None, dtype=mstype.float32):
    init_obj = Tensor(dtype=dtype, shape=shape, init=init)
    return init_obj

+
 __all__ = [
    'Initializer',
    'initializer',
--- a/tests/st/auto_parallel/cell_shard.py
+++ b/tests/st/auto_parallel/cell_shard.py
@ -378,6 +378,6 @@ def test_train_feed(num_classes=65536):
    model = Model(net, loss_fn=loss, optimizer=opt)
    model.train(3, dataset, dataset_sink_mode=False, callbacks=parallel_callback)
    loss_value = np.array(parallel_callback.loss_list)
-    expect_out = [11.087254, 10.876551, 10.057782]
+    expect_out = [11.344119, 10.747661, 11.134097]
    print(loss_value)
    assert np.allclose(loss_value, expect_out, 0.0001, 0.0001)
--- a/tests/st/dynamic_shape/test_dcn_dynamic_gpu.py
+++ b/tests/st/dynamic_shape/test_dcn_dynamic_gpu.py
@ -232,10 +232,7 @@ def test_train():
    # For graph mode
    set_seed(0)
    graph_loss = get_train_loss(numeric_columns, sparse_columns, data_list, context.GRAPH_MODE)
-    expect_loss = np.array([16.476381, 2425.9783, 8769.053], dtype=graph_loss[0].dtype)
-    assert np.allclose(graph_loss, expect_loss, 0.01, 0.01)
    # For PyNative mode
    set_seed(0)
    pynative_loss = get_train_loss(numeric_columns, sparse_columns, data_list, context.PYNATIVE_MODE)
-    expect_loss = np.array([16.476381, 2425.9783, 8769.053], dtype=pynative_loss[0].dtype)
-    assert np.allclose(pynative_loss, expect_loss, 0.01, 0.01)
+    assert np.allclose(pynative_loss, graph_loss)
--- a/tests/st/dynamic_shape/test_dynamic_wenet_ascend.py
+++ b/tests/st/dynamic_shape/test_dynamic_wenet_ascend.py
@ -1817,5 +1817,5 @@ def test_train():
    )

    train_loss = callback.loss
-    expect_loss = 113.286
+    expect_loss = 114.664
    assert np.allclose(train_loss, expect_loss, 0.001, 0.001)
--- a/tests/st/networks/models/bert/bert_performance/test_bert_thor.py
+++ b/tests/st/networks/models/bert/bert_performance/test_bert_thor.py
@ -237,7 +237,7 @@ def test_bert_thor_8p():

    print("End training...")
    assert mean_cost < 96
-    assert mean_loss < 8.125
+    assert mean_loss < 8.15


 if __name__ == '__main__':
--- a/tests/st/summary/test_summary_collector.py
+++ b/tests/st/summary/test_summary_collector.py
@ -312,18 +312,22 @@ class TestSummary:
    @pytest.mark.env_onecard
    @security_off_wrap
    def test_summary_collector_landscape(self):
-        """Test summary collector with landscape."""
+        """
+        Feature: Summary collector with landscape.
+        Description: Test summary collector with landscape.
+        Expectation: Landscape data collected with expected value.
+        """
        set_seed(1)
        interval_1 = [1, 2, 3]
        num_samples = 6
        summary_dir = self._train_network(epoch=3, num_samples=num_samples,
                                          collect_specified_data={'collect_landscape':
-                                                                      {'landscape_size': 4,
-                                                                       'unit': 'epoch',
-                                                                       'create_landscape': {'train': True,
-                                                                                            'result': True},
-                                                                       'num_samples': num_samples,
-                                                                       'intervals': [interval_1]}})
+                                                                  {'landscape_size': 4,
+                                                                   'unit': 'epoch',
+                                                                   'create_landscape': {'train': True,
+                                                                                        'result': True},
+                                                                   'num_samples': num_samples,
+                                                                   'intervals': [interval_1]}})

        tag_list = self._list_summary_collect_landscape_tags(summary_dir)
        expected_tags = {'epoch_group', 'model_params_file_map', 'step_per_epoch', 'unit', 'num_samples',
@ -332,23 +336,9 @@ class TestSummary:
        device_id = int(os.getenv('DEVICE_ID')) if os.getenv('DEVICE_ID') else 0
        summary_landscape = SummaryLandscape(summary_dir)
        summary_landscape.gen_landscapes_with_multi_process(callback_fn, device_ids=[device_id])
-        expected_pca_value = np.array([2.2795451, 2.2795504, 2.2795559, 2.2795612, 2.2795450, 2.2795503, 2.2795557,
-                                       2.2795612, 2.2795449, 2.2795503, 2.2795557, 2.2795610, 2.2795449, 2.2795502,
-                                       2.2795555, 2.2795610])
-        expe_pca_value_asc = np.array([2.2795452, 2.2795503, 2.2795557, 2.2795612, 2.2795450, 2.2795503, 2.2795557,
-                                       2.2795612, 2.2795449, 2.2795502, 2.2795555, 2.2795609, 2.2795449, 2.2795502,
-                                       2.2795554, 2.2795610])
-        expected_random_value = np.array([2.2729474, 2.2777648, 2.2829195, 2.2884243, 2.2724223, 2.2771732, 2.2822458,
-                                          2.2875971, 2.2725493, 2.2771329, 2.2819973, 2.2875895, 2.2730918, 2.2774068,
-                                          2.2822349, 2.2881028])
-        expe_random_value_asc = np.array([2.2729466, 2.2777647, 2.2829201, 2.2884242, 2.2724224, 2.2771732, 2.2822458,
-                                          2.2875975, 2.2725484, 2.2771326, 2.2819972, 2.2875896, 2.2730910, 2.2774070,
-                                          2.2822352, 2.2881035])
        tag_list_landscape = self._list_landscape_tags(summary_dir)
-        assert np.all(abs(expected_pca_value - tag_list_landscape[0]) < 1.e-6) or \
-               np.all(abs(expe_pca_value_asc - tag_list_landscape[0]) < 1.e-6)
-        assert np.all(abs(expected_random_value - tag_list_landscape[1]) < 1.e-6) or \
-               np.all(abs(expe_random_value_asc - tag_list_landscape[1]) < 1.e-6)
+        assert np.allclose(tag_list_landscape[0], 2.28, atol=0.03)
+        assert np.allclose(tag_list_landscape[1], 2.28, atol=0.03)

    @pytest.mark.level0
    @pytest.mark.platform_x86_ascend_training
--- a/tests/ut/cpp/common/random_test.cc
+++ b/tests/ut/cpp/common/random_test.cc
@ -0,0 +1,172 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <random>
+#include <vector>
+#include <iostream>
+#include "common/common_test.h"
+#include "include/common/random.h"
+
+namespace mindspore {
+class TestRandom : public UT::Common {};
+
+/// Feature: Philox random number generator.
+/// Description: Test Philox random number generator.
+/// Expectation: Random number generator works as expected.
+TEST_F(TestRandom, test_philox_generator) {
+  const uint64_t seed = 1234;
+  auto rng = random::Philox(seed);
+  std::vector<uint32_t> numbers;
+  for (size_t i = 0; i < 20; ++i) {
+    numbers.push_back(rng());
+  }
+
+  // Discard.
+  rng = random::Philox(seed);
+  rng.discard(0);
+  EXPECT_EQ(rng(), numbers[0]);
+
+  rng = random::Philox(seed);
+  rng.discard(8);
+  EXPECT_EQ(rng(), numbers[8]);
+
+  rng = random::Philox(seed);
+  rng.discard(9);
+  EXPECT_EQ(rng(), numbers[9]);
+
+  rng = random::Philox(seed);
+  rng.discard(10);
+  EXPECT_EQ(rng(), numbers[10]);
+
+  rng = random::Philox(seed);
+  rng.discard(11);
+  EXPECT_EQ(rng(), numbers[11]);
+
+  rng = random::Philox(seed);
+  rng.discard(12);
+  EXPECT_EQ(rng(), numbers[12]);
+
+  rng = random::Philox(seed);
+  rng.discard(13);
+  EXPECT_EQ(rng(), numbers[13]);
+
+  // Discard after generate.
+  rng = random::Philox(seed);
+  rng();
+  rng.discard(8 - 1);
+  EXPECT_EQ(rng(), numbers[8]);
+  rng.discard(1);
+  EXPECT_EQ(rng(), numbers[10]);
+
+  rng = random::Philox(seed);
+  rng();
+  rng.discard(9 - 1);
+  EXPECT_EQ(rng(), numbers[9]);
+  rng.discard(2);
+  EXPECT_EQ(rng(), numbers[12]);
+
+  rng = random::Philox(seed);
+  rng();
+  rng.discard(10 - 1);
+  EXPECT_EQ(rng(), numbers[10]);
+  rng.discard(3);
+  EXPECT_EQ(rng(), numbers[14]);
+
+  rng = random::Philox(seed);
+  rng();
+  rng.discard(11 - 1);
+  EXPECT_EQ(rng(), numbers[11]);
+  rng.discard(4);
+  EXPECT_EQ(rng(), numbers[16]);
+
+  rng = random::Philox(seed);
+  rng();
+  rng.discard(12 - 1);
+  EXPECT_EQ(rng(), numbers[12]);
+  rng.discard(5);
+  EXPECT_EQ(rng(), numbers[18]);
+
+  rng = random::Philox(seed);
+  rng();
+  rng.discard(13 - 1);
+  EXPECT_EQ(rng(), numbers[13]);
+}
+
+/// Feature: Random distributions.
+/// Description: Test random distributions.
+/// Expectation: distributions works as expected.
+TEST_F(TestRandom, test_distributions) {
+  using Rng = random::Philox;
+  using Uniform = random::UniformDistribution<float>;
+  using Normal = random::NormalDistribution<float>;
+  using TruncNormal = random::TruncatedNormal<float>;
+
+  const uint64_t seed = 4321;
+  const size_t length = 10000;
+  const size_t half_len = length / 2;
+  std::vector<float> randoms1(length);
+  std::vector<float> randoms2(length);
+
+  random::GenerateRandoms<float, Rng, Uniform>(seed, 0, randoms1.data(), length, 0.0f, 1.0f);
+  random::GenerateRandoms<float, Rng, Uniform>(seed, 0, randoms2.data(), half_len, 0.0f, 1.0f);
+  random::GenerateRandoms<float, Rng, Uniform>(seed, half_len, randoms2.data() + half_len, half_len, 0.0f, 1.0f);
+  EXPECT_EQ(randoms1, randoms2);
+
+  random::GenerateRandoms<float, Rng, Normal>(seed, 0, randoms1.data(), length, 0.0f, 1.0f);
+  random::GenerateRandoms<float, Rng, Normal>(seed, 0, randoms2.data(), half_len, 0.0f, 1.0f);
+  random::GenerateRandoms<float, Rng, Normal>(seed, half_len, randoms2.data() + half_len, half_len, 0.0f, 1.0f);
+  EXPECT_EQ(randoms1, randoms2);
+
+  random::GenerateRandoms<float, Rng, TruncNormal>(seed, 0, randoms1.data(), length, -2.0f, 2.0f, 0.0f, 1.0f);
+  random::GenerateRandoms<float, Rng, TruncNormal>(seed, 0, randoms2.data(), half_len, -2.0f, 2.0f, 0.0f, 1.0f);
+  random::GenerateRandoms<float, Rng, TruncNormal>(seed, half_len, randoms2.data() + half_len, half_len, -2.0f, 2.0f,
+                                                   0.0f, 1.0f);
+  EXPECT_EQ(randoms1, randoms2);
+}
+
+/// Feature: Parallel task size compute.
+/// Description: Test parallel task size compute.
+/// Expectation: Result parallel task size is correct.
+TEST_F(TestRandom, test_parallel_task_size) {
+  auto result = random::ComputeTaskNumSize(1000, 10);
+  EXPECT_EQ(result.first, 1);
+  EXPECT_EQ(result.second, 1000);
+
+  result = random::ComputeTaskNumSize(1024, 10);
+  EXPECT_EQ(result.first, 1);
+  EXPECT_EQ(result.second, 1024);
+
+  result = random::ComputeTaskNumSize(1025, 10);
+  EXPECT_EQ(result.first, 10);
+  EXPECT_EQ(result.second, 104);
+
+  result = random::ComputeTaskNumSize(2020, 10);
+  EXPECT_EQ(result.first, 10);
+  EXPECT_EQ(result.second, 204);
+
+  result = random::ComputeTaskNumSize(2021, 10);
+  EXPECT_EQ(result.first, 10);
+  EXPECT_EQ(result.second, 204);
+
+  result = random::ComputeTaskNumSize(2040, 10);
+  EXPECT_EQ(result.first, 10);
+  EXPECT_EQ(result.second, 204);
+
+  result = random::ComputeTaskNumSize(2041, 10);
+  EXPECT_EQ(result.first, 10);
+  EXPECT_EQ(result.second, 208);
+}
+}  // namespace mindspore
--- a/tests/ut/python/utils/test_initializer.py
+++ b/tests/ut/python/utils/test_initializer.py
@ -14,6 +14,7 @@
 # ============================================================================
 """ test_initializer """
 import math
+import unittest
 from functools import reduce
 import numpy as np
 import pytest as py
@ -27,6 +28,7 @@ from mindspore.common.parameter import Parameter
 from mindspore.common.tensor import Tensor
 from mindspore.nn import Conv2d
 from mindspore.ops import operations as P
+from mindspore._c_expression import _random_normal, _random_uniform, _truncated_normal
 from ..ut_filter import non_graph_engine


@ -279,7 +281,12 @@ def test_init_variancescaling():


 def test_conv2d_abnormal_kernel_negative():
-    kernel = np.random.randn(64, 3, 7, 7).astype(np.float32)
+    """
+    Feature: Random initializers that implemented in cpp.
+    Description: Test random initializers that implemented in cpp.
+    Expectation: Data is initialized successfully.
+    """
+    kernel = init.initializer(init.Normal(sigma=1.0), [64, 3, 7, 7], ms.float32).init_data()
    with py.raises(ValueError):
        ms.Model(
            Conv2d(in_channels=3, out_channels=64, kernel_size=-7, stride=3,
@ -288,17 +295,27 @@ def test_conv2d_abnormal_kernel_negative():

@non_graph_engine
 def test_conv2d_abnormal_kernel_normal():
-    kernel = np.random.randn(64, 3, 7, 7).astype(np.float32)
-    input_data = np.random.randn(32, 3, 224, 112).astype(np.float32)
+    """
+    Feature: Random initializers that implemented in cpp.
+    Description: Test random initializers that implemented in cpp.
+    Expectation: Data is initialized successfully.
+    """
+    kernel = init.initializer(init.Normal(sigma=1.0), [64, 3, 7, 7], ms.float32).init_data()
+    input_data = init.initializer(init.Normal(sigma=1.0), [32, 3, 224, 112], ms.float32).init_data()
    context.set_context(mode=context.GRAPH_MODE)
    model = ms.Model(
        Conv2d(in_channels=3, out_channels=64, kernel_size=7, stride=3,
-               padding=0, weight_init=ms.Tensor(kernel)))
-    model.predict(ms.Tensor(input_data))
+               padding=0, weight_init=kernel))
+    model.predict(input_data)


@non_graph_engine
 def test_conv2d_abnormal_kernel_truncated_normal():
+    """
+    Feature: Random initializers that implemented in cpp.
+    Description: Test random initializers that implemented in cpp.
+    Expectation: Data is initialized successfully.
+    """
    input_data = init.initializer(init.TruncatedNormal(), [64, 3, 7, 7], ms.float32).init_data()
    context.set_context(mode=context.GRAPH_MODE)
    model = ms.Model(
@ -327,3 +344,92 @@ def test_weight_shape():
    net = Net()
    out = net(t)
    print(out)
+
+
+def test_init_with_same_numpy_seed():
+    """
+    Feature: Random initializers that depend on numpy random seed.
+    Description: Test random initializers with same numpy random seed.
+    Expectation: Initialized data is same with same numpy random seed.
+    """
+    shape = [12, 34]
+    np.random.seed(1234)
+    uniform1 = init.initializer('uniform', shape, ms.float32).init_data()
+    normal1 = init.initializer('normal', shape, ms.float32).init_data()
+    truncnorm1 = init.initializer('truncatednormal', shape, ms.float32).init_data()
+
+    np.random.seed(1234)
+    uniform2 = init.initializer('uniform', shape, ms.float32).init_data()
+    normal2 = init.initializer('normal', shape, ms.float32).init_data()
+    truncnorm2 = init.initializer('truncatednormal', shape, ms.float32).init_data()
+
+    assert np.allclose(uniform1.asnumpy(), uniform2.asnumpy())
+    assert np.allclose(normal1.asnumpy(), normal2.asnumpy())
+    assert np.allclose(truncnorm1.asnumpy(), truncnorm2.asnumpy())
+
+    # Reset numpy random seed after test.
+    np.random.seed()
+
+
+def test_cpp_random_initializer():
+    """
+    Feature: Random initializers that implemented in cpp.
+    Description: Test random initializers that implemented in cpp.
+    Expectation: Data is initialized successfully.
+    """
+    ut = unittest.TestCase()
+    shape = (11, 512)
+
+    # Random normal.
+    data = np.ndarray(shape=shape, dtype=np.float32)
+    _random_normal(0, data, 0.0, 1.0)
+    ut.assertAlmostEqual(np.mean(data), 0.0, delta=0.1)
+    ut.assertAlmostEqual(np.std(data), 1.0, delta=0.1)
+
+    # Random uniform.
+    data = np.ndarray(shape=shape, dtype=np.float32)
+    _random_uniform(0, data, -1.0, 1.0)
+    ut.assertAlmostEqual(np.mean(data), 0.0, delta=0.1)
+    ut.assertGreater(np.std(data), 0.0)
+
+    # Truncated random.
+    data = np.ndarray(shape=shape, dtype=np.float32)
+    _truncated_normal(0, data, -2.0, 2.0, 0.0, 1.0)
+    ut.assertAlmostEqual(np.mean(data), 0.0, delta=0.1)
+    ut.assertGreaterEqual(np.min(data), -2.0)
+    ut.assertLessEqual(np.max(data), 2.0)
+
+    # Same seeds, same results.
+    data1 = np.ndarray(shape=shape, dtype=np.float32)
+    _random_normal(12345678, data1, 0.0, 1.0)
+    data2 = np.ndarray(shape=shape, dtype=np.float32)
+    _random_normal(12345678, data2, 0.0, 1.0)
+    assert np.allclose(data1, data2)
+
+    # Different seeds, different results.
+    data3 = np.ndarray(shape=shape, dtype=np.float32)
+    _random_normal(12345679, data3, 0.0, 1.0)
+    assert not np.allclose(data1, data3)
+
+    # Check distributions by K-S test.
+    np.random.seed(42)
+    seed = np.random.randint(low=1, high=(1 << 63))
+    count = 10000
+    data = np.ndarray(shape=(count), dtype=np.float32)
+    _random_uniform(seed, data, 0.0, 1.0)
+    data2 = np.random.uniform(0.0, 1.0, size=count)
+    _, p = stats.kstest(data, data2, N=count)
+    assert p > 0.05
+
+    _random_normal(seed, data, 0.0, 1.0)
+    data2 = np.random.normal(0.0, 1.0, size=count)
+    _, p = stats.kstest(data, data2, N=count)
+    assert p > 0.05
+
+    _truncated_normal(seed, data, -2, 2, 0.0, 1.0)
+    data2 = stats.truncnorm.rvs(-2, 2, loc=0.0, scale=1.0, size=count, random_state=None)
+    _, p = stats.kstest(data, data2, N=count)
+    assert p > 0.05
+
+    # Reset numpy random seed after test.
+    np.random.seed()