forked from mindspore-Ecosystem/mindspore
Optimize parameter random initializer
This commit is contained in:
parent
b456ec7b87
commit
d7ca9b9942
|
@ -196,6 +196,9 @@ if(CMAKE_SYSTEM_NAME MATCHES "Windows")
|
|||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-attributes -DHAVE_SNPRINTF")
|
||||
endif()
|
||||
|
||||
# Set compile flags to ensure float compute consistency.
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-fast-math")
|
||||
|
||||
if(ENABLE_MPI)
|
||||
add_compile_definitions(ENABLE_MPI)
|
||||
endif()
|
||||
|
|
|
@ -20,33 +20,12 @@
|
|||
#include "utils/log_adapter.h"
|
||||
#include "utils/convert_utils_base.h"
|
||||
#include "utils/ms_exception.h"
|
||||
#include "utils/ms_context.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace common {
|
||||
constexpr size_t kDeviceNum = 8;
|
||||
constexpr size_t kMaxThreadNum = 23;
|
||||
constexpr size_t kYieldThreshold = 1000;
|
||||
|
||||
ThreadPool::ThreadPool() {
|
||||
size_t process_core_num = std::thread::hardware_concurrency() - 1;
|
||||
if (process_core_num < 1) {
|
||||
process_core_num = 1;
|
||||
}
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
auto device_target = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
|
||||
if (device_target == kAscendDevice || device_target == kGPUDevice) {
|
||||
max_thread_num_ = process_core_num / kDeviceNum;
|
||||
} else {
|
||||
max_thread_num_ = process_core_num;
|
||||
}
|
||||
if (max_thread_num_ < 1) {
|
||||
max_thread_num_ = 1;
|
||||
}
|
||||
if (max_thread_num_ > kMaxThreadNum) {
|
||||
max_thread_num_ = kMaxThreadNum;
|
||||
}
|
||||
}
|
||||
ThreadPool::ThreadPool() : max_thread_num_(std::thread::hardware_concurrency()) {}
|
||||
|
||||
void ThreadPool::SyncRunLoop(const std::shared_ptr<ThreadContext> &context) {
|
||||
if (context == nullptr) {
|
||||
|
|
|
@ -0,0 +1,546 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_INCLUDE_COMMON_RANDOM_H_
|
||||
#define MINDSPORE_CCSRC_INCLUDE_COMMON_RANDOM_H_
|
||||
|
||||
#include <cstdint>
|
||||
#include <cmath>
|
||||
#include <array>
|
||||
#include <limits>
|
||||
#include <random>
|
||||
#include <vector>
|
||||
#include <optional>
|
||||
#include <algorithm>
|
||||
#include <utility>
|
||||
#include "include/common/thread_pool.h"
|
||||
#include "include/common/utils/utils.h"
|
||||
#include "utils/log_adapter.h"
|
||||
|
||||
namespace mindspore::random {
|
||||
//
|
||||
// Generate random numbers into a buffer.
|
||||
//
|
||||
template <typename T, typename Generator, typename Distribution, typename... Args>
|
||||
void GenerateRandoms(std::uint64_t seed, size_t skip, T *buf, size_t size, Args... args) {
|
||||
MS_EXCEPTION_IF_NULL(buf);
|
||||
Generator gen{seed};
|
||||
gen.discard(skip);
|
||||
Distribution dis{args...};
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
buf[i] = T(dis(gen));
|
||||
}
|
||||
}
|
||||
|
||||
// Compute number of task and batch size of each task.
|
||||
static inline std::pair<size_t, size_t> ComputeTaskNumSize(size_t total_size, size_t thread_num) {
|
||||
constexpr size_t min_parallel_size = 1024;
|
||||
if (thread_num == 0 || total_size <= min_parallel_size) {
|
||||
return {1, total_size};
|
||||
}
|
||||
constexpr size_t block_size = 4;
|
||||
const size_t block_count = (total_size + block_size - 1) / block_size;
|
||||
if (block_count <= thread_num) {
|
||||
return {block_count, block_size};
|
||||
}
|
||||
const size_t blocks_per_thread = (block_count + thread_num - 1) / thread_num;
|
||||
const size_t task_num = (block_count + blocks_per_thread - 1) / blocks_per_thread;
|
||||
const size_t batch_size = blocks_per_thread * block_size;
|
||||
return {task_num, batch_size};
|
||||
}
|
||||
|
||||
//
|
||||
// Parallel generate random numbers into a buffer.
|
||||
//
|
||||
template <typename T, typename Generator, typename Distribution, typename... Args>
|
||||
void GenerateRandomsParallel(std::uint64_t input_seed, T *buf, size_t buf_size, Args... args) {
|
||||
MS_EXCEPTION_IF_NULL(buf);
|
||||
|
||||
// Calculate number of tasks and batch size.
|
||||
auto &thread_pool = common::ThreadPool::GetInstance();
|
||||
auto [task_num, batch_size] = ComputeTaskNumSize(buf_size, thread_pool.GetSyncRunThreadNum());
|
||||
|
||||
// Generate random seed if required.
|
||||
std::uint64_t seed = input_seed;
|
||||
if (seed == 0) {
|
||||
std::random_device rd;
|
||||
seed = rd();
|
||||
}
|
||||
|
||||
if (task_num == 1) {
|
||||
// Use single thread for small data size.
|
||||
GenerateRandoms<T, Generator, Distribution>(seed, 0, buf, buf_size, args...);
|
||||
return;
|
||||
}
|
||||
|
||||
// Prepare parallel tasks.
|
||||
std::vector<common::Task> tasks;
|
||||
tasks.reserve(task_num);
|
||||
T *task_buf = buf;
|
||||
size_t skip = 0;
|
||||
for (size_t i = 0; i < task_num; ++i) {
|
||||
const auto task_size = ((i == task_num - 1) ? (buf_size - (task_num - 1) * batch_size) : batch_size);
|
||||
(void)tasks.emplace_back([seed, skip, task_buf, task_size, args...]() {
|
||||
GenerateRandoms<T, Generator, Distribution>(seed, skip, task_buf, task_size, args...);
|
||||
return common::SUCCESS;
|
||||
});
|
||||
skip += task_size;
|
||||
task_buf += task_size;
|
||||
}
|
||||
// Parallel execute tasks by thread pool.
|
||||
thread_pool.SyncRun(tasks);
|
||||
}
|
||||
|
||||
//
|
||||
// Philox is a random number generator that is suitable for parallel random number generating.
|
||||
//
|
||||
class Philox {
|
||||
public:
|
||||
explicit Philox(uint64_t seed)
|
||||
: key_({static_cast<uint32_t>(seed), static_cast<uint32_t>(seed >> kShift32)}),
|
||||
counter_({0, 0, static_cast<uint32_t>(seed), static_cast<uint32_t>(seed >> kShift32)}) {}
|
||||
|
||||
Philox(uint64_t seed, uint64_t seed2)
|
||||
: key_({static_cast<uint32_t>(seed), static_cast<uint32_t>(seed >> kShift32)}),
|
||||
counter_({0, 0, static_cast<uint32_t>(seed2), static_cast<uint32_t>(seed2 >> kShift32)}) {}
|
||||
|
||||
~Philox() = default;
|
||||
|
||||
uint32_t operator()() {
|
||||
if (index_ == kCounterNum) {
|
||||
results_ = next();
|
||||
index_ = 0;
|
||||
}
|
||||
return results_[index_++];
|
||||
}
|
||||
|
||||
void discard(uint64_t step) {
|
||||
if (index_ == kCounterNum) {
|
||||
const auto count = (step / kCounterNum);
|
||||
skip(count);
|
||||
const auto remain = (step % kCounterNum);
|
||||
if (remain > 0) {
|
||||
results_ = next();
|
||||
index_ = remain;
|
||||
}
|
||||
} else {
|
||||
const auto pos = index_ + step;
|
||||
if (pos <= kCounterNum) {
|
||||
index_ = pos;
|
||||
} else {
|
||||
const auto count = (pos - kCounterNum) / kCounterNum;
|
||||
skip(count);
|
||||
const auto remain = (pos % kCounterNum);
|
||||
if (remain > 0) {
|
||||
results_ = next();
|
||||
index_ = remain;
|
||||
} else {
|
||||
index_ = kCounterNum;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static constexpr uint32_t min() { return 0; }
|
||||
static constexpr uint32_t max() { return std::numeric_limits<uint32_t>::max(); }
|
||||
|
||||
private:
|
||||
static constexpr int kShift32 = 32;
|
||||
static constexpr size_t kCounterNum = 4;
|
||||
static constexpr size_t kKeyNum = 2;
|
||||
static constexpr size_t kIndex0 = 0;
|
||||
static constexpr size_t kIndex1 = 1;
|
||||
static constexpr size_t kIndex2 = 2;
|
||||
static constexpr size_t kIndex3 = 3;
|
||||
static constexpr uint32_t kMagic0 = 0xD2511F53;
|
||||
static constexpr uint32_t kMagic1 = 0xCD9E8D57;
|
||||
static constexpr uint32_t kKeyStep0 = 0x9E3779B9;
|
||||
static constexpr uint32_t kKeyStep1 = 0xBB67AE85;
|
||||
|
||||
using Counter = std::array<uint32_t, kCounterNum>;
|
||||
using Key = std::array<uint32_t, kKeyNum>;
|
||||
|
||||
Key key_;
|
||||
Counter counter_;
|
||||
Counter results_;
|
||||
size_t index_ = kCounterNum;
|
||||
|
||||
static void compute(uint32_t *counter, const uint32_t *key) {
|
||||
const uint64_t t0 = static_cast<uint64_t>(kMagic0) * counter[kIndex0];
|
||||
const uint32_t l0 = static_cast<uint32_t>(t0);
|
||||
const uint32_t h0 = static_cast<uint32_t>(t0 >> kShift32);
|
||||
const uint64_t t1 = static_cast<uint64_t>(kMagic1) * counter[kIndex2];
|
||||
const uint32_t l1 = static_cast<uint32_t>(t1);
|
||||
const uint32_t h1 = static_cast<uint32_t>(t1 >> kShift32);
|
||||
counter[kIndex0] = (h1 ^ counter[kIndex1] ^ key[kIndex0]);
|
||||
counter[kIndex1] = l1;
|
||||
counter[kIndex2] = (h0 ^ counter[kIndex3] ^ key[kIndex1]);
|
||||
counter[kIndex3] = l0;
|
||||
}
|
||||
|
||||
static void raise_key(uint32_t *key) {
|
||||
key[kIndex0] += kKeyStep0;
|
||||
key[kIndex1] += kKeyStep1;
|
||||
}
|
||||
|
||||
// Generate next 4 random numbers and advance counter.
|
||||
Counter next() {
|
||||
Counter result = counter_;
|
||||
Key key = key_;
|
||||
// For performance reason, we do not use loop here,
|
||||
// but manually call compute() 10 times.
|
||||
compute(result.data(), key.data());
|
||||
raise_key(key.data());
|
||||
compute(result.data(), key.data());
|
||||
raise_key(key.data());
|
||||
compute(result.data(), key.data());
|
||||
raise_key(key.data());
|
||||
compute(result.data(), key.data());
|
||||
raise_key(key.data());
|
||||
compute(result.data(), key.data());
|
||||
raise_key(key.data());
|
||||
compute(result.data(), key.data());
|
||||
raise_key(key.data());
|
||||
compute(result.data(), key.data());
|
||||
raise_key(key.data());
|
||||
compute(result.data(), key.data());
|
||||
raise_key(key.data());
|
||||
compute(result.data(), key.data());
|
||||
raise_key(key.data());
|
||||
compute(result.data(), key.data());
|
||||
skip_one();
|
||||
return result;
|
||||
}
|
||||
|
||||
// Advance counter for one step.
|
||||
void skip_one() {
|
||||
if ((++counter_[kIndex0] == 0) && (++counter_[kIndex1] == 0) && (++counter_[kIndex2] == 0)) {
|
||||
++counter_[kIndex3];
|
||||
}
|
||||
}
|
||||
|
||||
// Skip the given number of samples of 4 uint32.
|
||||
void skip(uint64_t count) {
|
||||
const uint32_t lo = static_cast<uint32_t>(count);
|
||||
uint32_t hi = static_cast<uint32_t>(count >> kShift32);
|
||||
counter_[kIndex0] += lo;
|
||||
if (counter_[kIndex0] < lo) {
|
||||
++hi;
|
||||
}
|
||||
counter_[kIndex1] += hi;
|
||||
if (counter_[kIndex1] < hi && (++counter_[kIndex2] == 0)) {
|
||||
++counter_[kIndex3];
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
//
|
||||
// Uniform distribution.
|
||||
//
|
||||
template <typename T>
|
||||
class UniformDistribution {
|
||||
public:
|
||||
UniformDistribution(T a, T b) : a_(a), b_(b) {}
|
||||
~UniformDistribution() = default;
|
||||
|
||||
template <typename Generator>
|
||||
T operator()(Generator &&g) {
|
||||
const auto min_num = g.min();
|
||||
const auto max_num = g.max();
|
||||
const long double range = static_cast<long double>(max_num) - static_cast<long double>(min_num) + 1.0L;
|
||||
T s = T(g() - min_num) / range;
|
||||
if (__builtin_expect(s >= T(1), 0)) {
|
||||
s = std::nextafter(T(1), T(0));
|
||||
}
|
||||
return (b_ - a_) * s + a_;
|
||||
}
|
||||
|
||||
private:
|
||||
T a_;
|
||||
T b_;
|
||||
}; // namespace mindspore::random
|
||||
|
||||
//
|
||||
// Normal distribution.
|
||||
//
|
||||
template <typename T>
|
||||
class NormalDistribution {
|
||||
public:
|
||||
NormalDistribution(T mean, T sigma) : mean_(mean), sigma_(sigma) {}
|
||||
~NormalDistribution() = default;
|
||||
|
||||
template <typename Generator>
|
||||
T operator()(Generator &&g) {
|
||||
if (has_next_) {
|
||||
has_next_ = false;
|
||||
return next_;
|
||||
}
|
||||
// Box-Muller transform algorithm:
|
||||
// z1 = sqrt(-2 * ln(u1)) * cos(2 * pi * u2)
|
||||
// z2 = sqrt(-2 * ln(u1)) * sin(2 * pi * u2)
|
||||
constexpr T pi = 3.1415926f;
|
||||
constexpr T threshold = 1.0e-7f;
|
||||
const T u1 = std::max(to_float(g()), threshold);
|
||||
const T u2 = std::max(to_float(g()), threshold);
|
||||
const T x = std::sqrt(-2.0f * std::log(u1)) * sigma_;
|
||||
const T y = 2.0f * pi * u2;
|
||||
next_ = mean_ + (x * std::sin(y));
|
||||
has_next_ = true;
|
||||
return mean_ + (x * std::cos(y));
|
||||
}
|
||||
|
||||
private:
|
||||
T mean_;
|
||||
T sigma_;
|
||||
T next_ = 0;
|
||||
bool has_next_ = false;
|
||||
|
||||
static T to_float(uint32_t input) {
|
||||
constexpr uint32_t mask = 0x7fffffu;
|
||||
constexpr uint32_t exp = (127 << 23);
|
||||
union {
|
||||
uint32_t int_val;
|
||||
float float_val;
|
||||
} val;
|
||||
val.int_val = (input & mask) | exp;
|
||||
return T(val.float_val - 1.0f);
|
||||
}
|
||||
|
||||
template <typename Generator>
|
||||
T generate(Generator &&g) {}
|
||||
};
|
||||
|
||||
//
|
||||
// Truncated normal distribution.
|
||||
//
|
||||
template <typename T>
|
||||
class TruncatedNormal {
|
||||
public:
|
||||
TruncatedNormal(T a, T b, T mean, T sigma) : lower_(a), upper_(b), mean_(mean), sigma_(sigma) {
|
||||
if (sigma == 0) {
|
||||
MS_LOG(EXCEPTION) << "TruncatedNormal: 'sigma' can not be zero.";
|
||||
} else {
|
||||
alpha_ = (a - mean) / sigma;
|
||||
beta_ = (b - mean) / sigma;
|
||||
}
|
||||
}
|
||||
|
||||
~TruncatedNormal() = default;
|
||||
|
||||
template <typename Generator>
|
||||
T operator()(Generator &&g) {
|
||||
// Inverse CDF (Cumulative Distribution Function) method.
|
||||
const T u = std_uniform_(g);
|
||||
const T cdf_a = cdf(alpha_);
|
||||
const T cdf_b = cdf(beta_);
|
||||
const T p = cdf_a + u * (cdf_b - cdf_a);
|
||||
const T x = quantile(p);
|
||||
return mean_ + x * sigma_;
|
||||
}
|
||||
|
||||
private:
|
||||
UniformDistribution<T> std_uniform_{0.0f, 1.0f};
|
||||
T lower_;
|
||||
T upper_;
|
||||
T mean_;
|
||||
T sigma_;
|
||||
T alpha_;
|
||||
T beta_;
|
||||
|
||||
static constexpr T kRootTwo = 1.4142135f;
|
||||
|
||||
static T cdf(T x) {
|
||||
const T diff = x / kRootTwo;
|
||||
return std::erfc(-diff) / 2.0f;
|
||||
}
|
||||
|
||||
static T quantile(T p) {
|
||||
auto z = 2.0f * p;
|
||||
const T x = erfc_inv(z);
|
||||
return -x * kRootTwo;
|
||||
}
|
||||
|
||||
static T erfc_inv(T z) {
|
||||
// Keep z in range (0, 2).
|
||||
if (__builtin_expect((z <= 0), 0)) {
|
||||
z = std::nextafterf(0.0f, 2.0f);
|
||||
} else if (__builtin_expect((z >= 2.0f), 0)) {
|
||||
z = std::nextafterf(2.0f, 0.0f);
|
||||
}
|
||||
T p, q, s;
|
||||
if (z > 1.0f) {
|
||||
q = 2.0f - z;
|
||||
p = 1.0f - q;
|
||||
s = -1;
|
||||
} else {
|
||||
p = 1.0f - z;
|
||||
q = z;
|
||||
s = 1;
|
||||
}
|
||||
return s * erf_inv_imp(p, q);
|
||||
}
|
||||
|
||||
// The algorithm and polynomia constants are borrow from boost.
|
||||
static T erf_inv_imp(T p, T q) {
|
||||
if (p <= 0.5f) {
|
||||
constexpr float Y = 0.0891314744949340820313f;
|
||||
constexpr T P[] = {T(-0.000508781949658280665617), T(-0.00836874819741736770379), T(0.0334806625409744615033),
|
||||
T(-0.0126926147662974029034), T(-0.0365637971411762664006), T(0.0219878681111168899165),
|
||||
T(0.00822687874676915743155), T(-0.00538772965071242932965)};
|
||||
constexpr T Q[] = {T(1.0),
|
||||
T(-0.970005043303290640362),
|
||||
T(-1.56574558234175846809),
|
||||
T(1.56221558398423026363),
|
||||
T(0.662328840472002992063),
|
||||
T(-0.71228902341542847553),
|
||||
T(-0.0527396382340099713954),
|
||||
T(0.0795283687341571680018),
|
||||
T(-0.00233393759374190016776),
|
||||
T(0.000886216390456424707504)};
|
||||
T g = p * (p + 10.0f);
|
||||
T r = eval_polynomial(P, p) / eval_polynomial(Q, p);
|
||||
return g * Y + g * r;
|
||||
}
|
||||
if (q >= 0.25f) {
|
||||
constexpr float Y = 2.249481201171875f;
|
||||
constexpr T P[] = {T(-0.202433508355938759655), T(0.105264680699391713268), T(8.37050328343119927838),
|
||||
T(17.6447298408374015486), T(-18.8510648058714251895), T(-44.6382324441786960818),
|
||||
T(17.445385985570866523), T(21.1294655448340526258), T(-3.67192254707729348546)};
|
||||
constexpr T Q[] = {T(1.0),
|
||||
T(6.24264124854247537712),
|
||||
T(3.9713437953343869095),
|
||||
T(-28.6608180499800029974),
|
||||
T(-20.1432634680485188801),
|
||||
T(48.5609213108739935468),
|
||||
T(10.8268667355460159008),
|
||||
T(-22.6436933413139721736),
|
||||
T(1.72114765761200282724)};
|
||||
T g = std::sqrt(-2.0f * std::log(q));
|
||||
T xs = q - 0.25f;
|
||||
T r = eval_polynomial(P, xs) / eval_polynomial(Q, xs);
|
||||
return g / (Y + r);
|
||||
}
|
||||
// Avoid static check warning for 'function body too long'.
|
||||
return erf_inv_imp2(p, q);
|
||||
}
|
||||
|
||||
static T erf_inv_imp2(T p, T q) {
|
||||
T x = std::sqrt(-std::log(q));
|
||||
if (x < 3.0f) {
|
||||
constexpr float Y = 0.807220458984375f;
|
||||
constexpr T P[] = {T(-0.131102781679951906451), T(-0.163794047193317060787), T(0.117030156341995252019),
|
||||
T(0.387079738972604337464), T(0.337785538912035898924), T(0.142869534408157156766),
|
||||
T(0.0290157910005329060432), T(0.00214558995388805277169), T(-0.679465575181126350155e-6),
|
||||
T(0.285225331782217055858e-7), T(-0.681149956853776992068e-9)};
|
||||
constexpr T Q[] = {T(1.0),
|
||||
T(3.46625407242567245975),
|
||||
T(5.38168345707006855425),
|
||||
T(4.77846592945843778382),
|
||||
T(2.59301921623620271374),
|
||||
T(0.848854343457902036425),
|
||||
T(0.152264338295331783612),
|
||||
T(0.01105924229346489121)};
|
||||
T xs = x - 1.125f;
|
||||
T R = eval_polynomial(P, xs) / eval_polynomial(Q, xs);
|
||||
return Y * x + R * x;
|
||||
}
|
||||
if (x < 6.0f) {
|
||||
constexpr float Y = 0.93995571136474609375f;
|
||||
constexpr T P[] = {T(-0.0350353787183177984712), T(-0.00222426529213447927281), T(0.0185573306514231072324),
|
||||
T(0.00950804701325919603619), T(0.00187123492819559223345), T(0.000157544617424960554631),
|
||||
T(0.460469890584317994083e-5), T(-0.230404776911882601748e-9), T(0.266339227425782031962e-11)};
|
||||
constexpr T Q[] = {T(1.0),
|
||||
T(1.3653349817554063097),
|
||||
T(0.762059164553623404043),
|
||||
T(0.220091105764131249824),
|
||||
T(0.0341589143670947727934),
|
||||
T(0.00263861676657015992959),
|
||||
T(0.764675292302794483503e-4)};
|
||||
T xs = x - 3.0f;
|
||||
T R = eval_polynomial(P, xs) / eval_polynomial(Q, xs);
|
||||
return Y * x + R * x;
|
||||
}
|
||||
if (x < 18.0f) {
|
||||
constexpr float Y = 0.98362827301025390625f;
|
||||
constexpr T P[] = {T(-0.0167431005076633737133), T(-0.00112951438745580278863), T(0.00105628862152492910091),
|
||||
T(0.000209386317487588078668), T(0.149624783758342370182e-4), T(0.449696789927706453732e-6),
|
||||
T(0.462596163522878599135e-8), T(-0.281128735628831791805e-13), T(0.99055709973310326855e-16)};
|
||||
constexpr T Q[] = {T(1.0),
|
||||
T(0.591429344886417493481),
|
||||
T(0.138151865749083321638),
|
||||
T(0.0160746087093676504695),
|
||||
T(0.000964011807005165528527),
|
||||
T(0.275335474764726041141e-4),
|
||||
T(0.282243172016108031869e-6)};
|
||||
T xs = x - 6.0f;
|
||||
T R = eval_polynomial(P, xs) / eval_polynomial(Q, xs);
|
||||
return Y * x + R * x;
|
||||
}
|
||||
if (x < 44.0f) {
|
||||
constexpr float Y = 0.99714565277099609375f;
|
||||
constexpr T P[] = {T(-0.0024978212791898131227), T(-0.779190719229053954292e-5), T(0.254723037413027451751e-4),
|
||||
T(0.162397777342510920873e-5), T(0.396341011304801168516e-7), T(0.411632831190944208473e-9),
|
||||
T(0.145596286718675035587e-11), T(-0.116765012397184275695e-17)};
|
||||
constexpr T Q[] = {T(1.0),
|
||||
T(0.207123112214422517181),
|
||||
T(0.0169410838120975906478),
|
||||
T(0.000690538265622684595676),
|
||||
T(0.145007359818232637924e-4),
|
||||
T(0.144437756628144157666e-6),
|
||||
T(0.509761276599778486139e-9)};
|
||||
T xs = x - 18.0f;
|
||||
T R = eval_polynomial(P, xs) / eval_polynomial(Q, xs);
|
||||
return Y * x + R * x;
|
||||
}
|
||||
constexpr float Y = 0.99941349029541015625f;
|
||||
constexpr T P[] = {T(-0.000539042911019078575891), T(-0.28398759004727721098e-6), T(0.899465114892291446442e-6),
|
||||
T(0.229345859265920864296e-7), T(0.225561444863500149219e-9), T(0.947846627503022684216e-12),
|
||||
T(0.135880130108924861008e-14), T(-0.348890393399948882918e-21)};
|
||||
constexpr T Q[] = {T(1.0),
|
||||
T(0.0845746234001899436914),
|
||||
T(0.00282092984726264681981),
|
||||
T(0.468292921940894236786e-4),
|
||||
T(0.399968812193862100054e-6),
|
||||
T(0.161809290887904476097e-8),
|
||||
T(0.231558608310259605225e-11)};
|
||||
T xs = x - 44.0f;
|
||||
T R = eval_polynomial(P, xs) / eval_polynomial(Q, xs);
|
||||
return Y * x + R * x;
|
||||
}
|
||||
|
||||
// We use template function to unrolling polynomial evaluations
|
||||
// at compile time to improve performance.
|
||||
template <size_t N>
|
||||
static T eval_polynomial(const T (&arr)[N], T x) {
|
||||
T sum = arr[N - 1];
|
||||
if constexpr (N > 1) {
|
||||
eval_polynomial_loop<N - 2>(arr, x, &sum);
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
template <size_t Index>
|
||||
static void eval_polynomial_loop(const T *arr, T x, T *sum) {
|
||||
*sum *= x;
|
||||
*sum += arr[Index];
|
||||
if constexpr (Index > 0) {
|
||||
eval_polynomial_loop<Index - 1>(arr, x, sum);
|
||||
}
|
||||
}
|
||||
};
|
||||
} // namespace mindspore::random
|
||||
|
||||
#endif // MINDSPORE_CCSRC_INCLUDE_COMMON_RANDOM_H_
|
|
@ -1,89 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020-2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_INCLUDE_COMMON_UTILS_PHILOX_GENERATOR_H_
|
||||
#define MINDSPORE_CCSRC_INCLUDE_COMMON_UTILS_PHILOX_GENERATOR_H_
|
||||
|
||||
#include <securec.h>
|
||||
#include <math.h>
|
||||
#include <array>
|
||||
#include "utils/log_adapter.h"
|
||||
#include "utils/convert_utils_base.h"
|
||||
#include "include/common/visible.h"
|
||||
|
||||
namespace mindspore {
|
||||
static constexpr int kResultNum = 4;
|
||||
class COMMON_EXPORT PhiloxGenerator {
|
||||
public:
|
||||
explicit PhiloxGenerator(uint64_t seed_) {
|
||||
key_var_[0] = static_cast<uint32_t>(seed_);
|
||||
key_var_[1] = static_cast<uint32_t>(seed_ >> 32);
|
||||
counter_[0] = 0;
|
||||
counter_[1] = 0;
|
||||
counter_[2] = static_cast<uint32_t>(seed_);
|
||||
counter_[3] = static_cast<uint32_t>(seed_ >> 32);
|
||||
}
|
||||
|
||||
explicit PhiloxGenerator(uint64_t seed_, uint64_t seed2_) {
|
||||
key_var_[0] = static_cast<uint32_t>(seed_);
|
||||
key_var_[1] = static_cast<uint32_t>(seed_ >> 32);
|
||||
counter_[0] = 0;
|
||||
counter_[1] = 0;
|
||||
counter_[2] = static_cast<uint32_t>(seed2_);
|
||||
counter_[3] = static_cast<uint32_t>(seed2_ >> 32);
|
||||
}
|
||||
|
||||
~PhiloxGenerator() = default;
|
||||
|
||||
void Jump();
|
||||
|
||||
void JumpStep(uint64_t step);
|
||||
|
||||
std::array<uint32_t, kResultNum> Compute(const std::array<uint32_t, kResultNum> &counter,
|
||||
const std::array<uint32_t, 2> &key_var) const;
|
||||
|
||||
std::array<uint32_t, kResultNum> operator()();
|
||||
|
||||
private:
|
||||
std::array<uint32_t, kResultNum> counter_;
|
||||
std::array<uint32_t, 2> key_var_;
|
||||
static constexpr std::array<uint32_t, kResultNum> keyConstant = {0xD2511F53, 0x9E3779B9, 0xCD9E8D57, 0xBB67AE85};
|
||||
};
|
||||
|
||||
template <class T>
|
||||
bool FillRandoms(PhiloxGenerator generator, float *output, int64_t vet_size, int64_t thread_Id) {
|
||||
T distribution;
|
||||
errno_t mem_ret;
|
||||
generator.JumpStep(LongToSize((vet_size * thread_Id + kResultNum - 1) / kResultNum));
|
||||
for (int32_t i = 0; i < vet_size; i += kResultNum) {
|
||||
auto outputResult = distribution(&generator);
|
||||
size_t max_length = 0;
|
||||
if (vet_size - i >= kResultNum) {
|
||||
max_length = kResultNum * sizeof(float);
|
||||
mem_ret = memcpy_s(&output[i], max_length, &outputResult[0], max_length);
|
||||
} else {
|
||||
max_length = LongToSize(vet_size - i) * sizeof(float);
|
||||
mem_ret = memcpy_s(&output[i], max_length, &outputResult[0], max_length);
|
||||
}
|
||||
if (mem_ret != EOK) {
|
||||
MS_LOG(ERROR) << "FillRandoms memcpy is failed";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_INCLUDE_COMMON_UTILS_PHILOX_GENERATOR_H_
|
|
@ -1,5 +1,5 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
* Copyright 2020-2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
@ -13,102 +13,36 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "ps/random_normal/random_normal.h"
|
||||
#include <iostream>
|
||||
#include <thread>
|
||||
#include <memory>
|
||||
#include <algorithm>
|
||||
#include "utils/convert_utils_base.h"
|
||||
#include "pybind_api/random_normal/random_cpu_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace ps {
|
||||
static const uint32_t kMaxThreadNum = 16;
|
||||
static const uint32_t kCPUCoreNum = std::thread::hardware_concurrency();
|
||||
|
||||
namespace {
|
||||
// Update standard deviation to parameter: stddev
|
||||
void UpdateStandardDeviation(float stddev, size_t total_count, float *output) {
|
||||
MS_EXCEPTION_IF_NULL(output);
|
||||
|
||||
auto update_stddev_task = [](float stddev, size_t task_len, float *data) {
|
||||
if (data == nullptr) {
|
||||
MS_LOG(ERROR) << "The pointer data is nullptr";
|
||||
return;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < task_len; i++) {
|
||||
data[i] *= stddev;
|
||||
}
|
||||
};
|
||||
|
||||
uint32_t thread_num = std::max(kMaxThreadNum, kCPUCoreNum);
|
||||
if (total_count <= thread_num) {
|
||||
thread_num = 1;
|
||||
}
|
||||
|
||||
std::vector<std::thread> threads(thread_num);
|
||||
size_t task_offset = 0;
|
||||
|
||||
for (size_t i = 0; i < thread_num; ++i) {
|
||||
size_t task_len = total_count / thread_num + (i < (total_count % thread_num) ? 1 : 0);
|
||||
threads[i] = std::thread(update_stddev_task, stddev, task_len, output + task_offset);
|
||||
task_offset += task_len;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < thread_num; i++) {
|
||||
threads[i].join();
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
#include <random>
|
||||
#include "include/common/random.h"
|
||||
#include "utils/log_adapter.h"
|
||||
|
||||
namespace mindspore::ps {
|
||||
bool InitRandomNormal(float mean, float stddev, std::vector<size_t> out_shape, size_t global_seed, size_t op_seed,
|
||||
float *output_data) {
|
||||
MS_ERROR_IF_NULL_W_RET_VAL(output_data, false);
|
||||
if (out_shape.size() == 0) {
|
||||
std::cout << "output data shape is error" << std::endl;
|
||||
}
|
||||
int64_t total_count = 1;
|
||||
for (uint32_t i = 0; i < out_shape.size(); i++) {
|
||||
total_count *= SizeToLong(out_shape[i]);
|
||||
}
|
||||
|
||||
uint32_t thread_num = std::max(kMaxThreadNum, kCPUCoreNum);
|
||||
if (total_count <= thread_num) {
|
||||
thread_num = 1;
|
||||
}
|
||||
float *start_ptr = output_data;
|
||||
if (start_ptr == nullptr) {
|
||||
std::cout << "start_ptr is nullptr" << std::endl;
|
||||
// Check output data pointer.
|
||||
if (output_data == nullptr) {
|
||||
MS_LOG(ERROR) << "output data is null.";
|
||||
return false;
|
||||
}
|
||||
// The value of thread_num is >= 1.
|
||||
int64_t batchSize = total_count / thread_num;
|
||||
std::vector<std::thread> threads(thread_num);
|
||||
int64_t seed = SizeToLong(global_seed);
|
||||
int64_t seed2 = SizeToLong(op_seed);
|
||||
seed = (seed == 0 && seed2 == 0) ? clock() : seed;
|
||||
PhiloxGenerator generator = PhiloxGenerator(seed, seed2);
|
||||
if (thread_num != 1) {
|
||||
float *offset_ptr = nullptr;
|
||||
for (uint32_t i = 0; i < thread_num - 1; i++) {
|
||||
offset_ptr = start_ptr + batchSize * i;
|
||||
threads[i] =
|
||||
std::thread(FillRandoms<NormalDistribution<PhiloxGenerator, float>>, generator, offset_ptr, batchSize, i);
|
||||
}
|
||||
offset_ptr = start_ptr + batchSize * (thread_num - 1);
|
||||
threads[thread_num - 1] = std::thread(FillRandoms<NormalDistribution<PhiloxGenerator, float>>, generator,
|
||||
offset_ptr, total_count - (thread_num - 1) * batchSize, thread_num - 1);
|
||||
} else {
|
||||
threads[0] =
|
||||
std::thread(FillRandoms<NormalDistribution<PhiloxGenerator, float>>, generator, start_ptr, total_count, 0);
|
||||
// Check shape.
|
||||
if (out_shape.size() == 0) {
|
||||
MS_LOG(ERROR) << "output data shape is empty.";
|
||||
return false;
|
||||
}
|
||||
for (uint32_t i = 0; i < thread_num; i++) {
|
||||
threads[i].join();
|
||||
// Calculate data size from shape.
|
||||
size_t data_size = 1;
|
||||
for (size_t i = 0; i < out_shape.size(); ++i) {
|
||||
data_size *= out_shape[i];
|
||||
}
|
||||
|
||||
UpdateStandardDeviation(stddev, total_count, output_data);
|
||||
// Generate randoms parallel.
|
||||
constexpr int seed_shift = 32;
|
||||
const uint64_t seed = (global_seed << seed_shift) + op_seed;
|
||||
using Generator = random::Philox;
|
||||
using Distribution = random::NormalDistribution<float>;
|
||||
random::GenerateRandomsParallel<float, Generator, Distribution>(seed, output_data, data_size, mean, stddev);
|
||||
return true;
|
||||
}
|
||||
} // namespace ps
|
||||
} // namespace mindspore
|
||||
} // namespace mindspore::ps
|
||||
|
|
|
@ -0,0 +1,67 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <cstdint>
|
||||
#include <random>
|
||||
#include "include/common/random.h"
|
||||
#include "include/common/pybind_api/api_register.h"
|
||||
#include "utils/log_adapter.h"
|
||||
|
||||
namespace mindspore::initializer {
|
||||
//
|
||||
// Generate float random numbers into a python buffer.
|
||||
//
|
||||
template <typename Generator, typename Distribution, typename... Args>
|
||||
void GenerateFloatRandoms(std::uint64_t seed, const py::buffer &py_buf, Args... args) {
|
||||
// Check buffer info.
|
||||
py::buffer_info info = py_buf.request();
|
||||
if (info.format != py::format_descriptor<float>::format()) {
|
||||
MS_LOG(EXCEPTION) << "Unsupported data type '" << info.format << "'.";
|
||||
}
|
||||
|
||||
// Get buffer pointer and size.
|
||||
const size_t buf_size = info.size;
|
||||
float *buf = reinterpret_cast<float *>(info.ptr);
|
||||
MS_EXCEPTION_IF_NULL(buf);
|
||||
|
||||
// Parallel generate randoms into buffer.
|
||||
random::GenerateRandomsParallel<float, Generator, Distribution>(seed, buf, buf_size, args...);
|
||||
}
|
||||
|
||||
void RandomUniform(std::uint64_t seed, const py::buffer &py_buf, float a, float b) {
|
||||
using Generator = random::Philox;
|
||||
using Distribution = random::UniformDistribution<double>;
|
||||
GenerateFloatRandoms<Generator, Distribution>(seed, py_buf, a, b);
|
||||
}
|
||||
|
||||
void RandomNormal(std::uint64_t seed, const py::buffer &py_buf, float mean, float sigma) {
|
||||
using Generator = random::Philox;
|
||||
using Distribution = random::NormalDistribution<double>;
|
||||
GenerateFloatRandoms<Generator, Distribution>(seed, py_buf, mean, sigma);
|
||||
}
|
||||
|
||||
void TruncatedNormal(std::uint64_t seed, const py::buffer &py_buf, float a, float b, float mean, float sigma) {
|
||||
using Generator = random::Philox;
|
||||
using Distribution = random::TruncatedNormal<double>;
|
||||
GenerateFloatRandoms<Generator, Distribution>(seed, py_buf, a, b, mean, sigma);
|
||||
}
|
||||
|
||||
REGISTER_PYBIND_DEFINE(init_random, ([](py::module *const m) {
|
||||
(void)m->def("_random_uniform", RandomUniform);
|
||||
(void)m->def("_random_normal", RandomNormal);
|
||||
(void)m->def("_truncated_normal", TruncatedNormal);
|
||||
}));
|
||||
} // namespace mindspore::initializer
|
|
@ -1,68 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "pybind_api/random_normal/random_cpu_kernel.h"
|
||||
|
||||
#include <memory>
|
||||
#include "plugin/device/cpu/hal/device/cpu_device_address.h"
|
||||
#include "ir/tensor.h"
|
||||
|
||||
namespace mindspore {
|
||||
bool InitRandomNormal(std::vector<int64_t> out_shape, int64_t seed, int64_t seed2, const py::object &output_tensor) {
|
||||
if (out_shape.size() == 0) {
|
||||
std::cout << "output data shape is error" << std::endl;
|
||||
}
|
||||
int64_t total_count = 1;
|
||||
for (uint32_t i = 0; i < out_shape.size(); i++) {
|
||||
total_count *= out_shape[i];
|
||||
}
|
||||
uint32_t thread_num = 16;
|
||||
if (total_count <= thread_num) {
|
||||
thread_num = 1;
|
||||
}
|
||||
auto temp = py::cast<std::shared_ptr<mindspore::tensor::Tensor>>(output_tensor);
|
||||
float *start_ptr = static_cast<float *>(temp->data_c());
|
||||
if (start_ptr == nullptr) {
|
||||
std::cout << "start_ptr is nullptr" << std::endl;
|
||||
return false;
|
||||
}
|
||||
int64_t batchSize = total_count / thread_num;
|
||||
std::vector<std::thread> threads(thread_num);
|
||||
seed = (seed == 0 && seed2 == 0) ? clock() : seed;
|
||||
mindspore::PhiloxGenerator generator = mindspore::PhiloxGenerator(seed, seed2);
|
||||
float *offset_ptr = nullptr;
|
||||
if (thread_num != 1) {
|
||||
for (uint32_t i = 0; i < thread_num - 1; i++) {
|
||||
offset_ptr = start_ptr + batchSize * i;
|
||||
threads[i] = std::thread(mindspore::FillRandoms<mindspore::NormalDistribution<mindspore::PhiloxGenerator, float>>,
|
||||
generator, offset_ptr, batchSize, i);
|
||||
}
|
||||
offset_ptr = start_ptr + batchSize * (thread_num - 1);
|
||||
threads[thread_num - 1] =
|
||||
std::thread(mindspore::FillRandoms<mindspore::NormalDistribution<mindspore::PhiloxGenerator, float>>, generator,
|
||||
offset_ptr, total_count - (thread_num - 1) * batchSize, thread_num - 1);
|
||||
} else {
|
||||
threads[0] = std::thread(mindspore::FillRandoms<mindspore::NormalDistribution<mindspore::PhiloxGenerator, float>>,
|
||||
generator, start_ptr, total_count, 0);
|
||||
}
|
||||
for (uint32_t i = 0; i < thread_num; i++) {
|
||||
threads[i].join();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
REGISTER_PYBIND_DEFINE(random_normal,
|
||||
([](py::module *const m) { (void)m->def("random_normal", &InitRandomNormal, "testnormal"); }));
|
||||
} // namespace mindspore
|
|
@ -1,68 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef PYBIND_API_API_IR_RANDOM_NORMAL_RANDOM_CPU_KERNEL_H_
|
||||
#define PYBIND_API_API_IR_RANDOM_NORMAL_RANDOM_CPU_KERNEL_H_
|
||||
#include <vector>
|
||||
#include "include/common/utils/philox_generator.h"
|
||||
#include "include/common/pybind_api/api_register.h"
|
||||
#include "pybind11/pytypes.h"
|
||||
#include "utils/log_adapter.h"
|
||||
|
||||
namespace py = pybind11;
|
||||
|
||||
namespace mindspore {
|
||||
template <class T, typename vartype>
|
||||
class NormalDistribution;
|
||||
template <class T>
|
||||
class NormalDistribution<T, float> {
|
||||
public:
|
||||
bool UInt32ToFloat32(uint32_t input, float *output) const {
|
||||
const uint32_t temp_value = input & 0x7fffffu;
|
||||
const uint32_t exp = static_cast<uint32_t>(127);
|
||||
const uint32_t val = (exp << 23) | temp_value;
|
||||
errno_t mem_ret = memcpy_s(output, sizeof(float), &val, sizeof(uint32_t));
|
||||
if (mem_ret != EOK) {
|
||||
MS_LOG(ERROR) << "UInt32ToFloat32 memcpy is failed";
|
||||
return false;
|
||||
}
|
||||
*output = *output - 1.0f;
|
||||
return true;
|
||||
}
|
||||
|
||||
std::array<float, kResultNum> operator()(T *generator) {
|
||||
std::array<uint32_t, 4> generate_value = (*generator)();
|
||||
const float PI = 3.14;
|
||||
for (uint32_t i = 0; i < kResultNum; i += 2) {
|
||||
float temp[2];
|
||||
UInt32ToFloat32(generate_value[i], &temp[0]);
|
||||
UInt32ToFloat32(generate_value[i + 1], &temp[1]);
|
||||
const float threshold = 1.0e-7f;
|
||||
temp[0] = temp[0] < threshold ? threshold : temp[0];
|
||||
temp[1] = temp[1] < threshold ? threshold : temp[1];
|
||||
result_[i] = sqrt(-2.0 * log(temp[0])) * sin(2 * PI * temp[1]);
|
||||
result_[i + 1] = sqrt(-2.0 * log(temp[0])) * cos(2 * PI * temp[1]);
|
||||
}
|
||||
return result_;
|
||||
}
|
||||
|
||||
private:
|
||||
std::array<float, kResultNum> result_;
|
||||
};
|
||||
|
||||
bool InitRandomNormal(std::vector<int64_t> out_shape, int64_t seed, int64_t seed2, const py::object &output_tensor);
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // PYBIND_API_API_IR_RANDOM_NORMAL_RANDOM_CPU_KERNEL_H_
|
|
@ -1,73 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "include/common/utils/philox_generator.h"
|
||||
|
||||
namespace mindspore {
|
||||
static constexpr uint64_t kShiftNum = 32;
|
||||
static constexpr uint64_t kGenerateNum = 10;
|
||||
void PhiloxGenerator::Jump() {
|
||||
if ((++counter_[0] == 0) && (++counter_[1] == 0) && (++counter_[2] == 0)) {
|
||||
++counter_[3];
|
||||
}
|
||||
}
|
||||
|
||||
void PhiloxGenerator::JumpStep(uint64_t step) {
|
||||
uint64_t min_counter, max_counter;
|
||||
min_counter = static_cast<uint64_t>(counter_[1]);
|
||||
min_counter = min_counter << kShiftNum;
|
||||
min_counter += counter_[0];
|
||||
|
||||
max_counter = static_cast<uint64_t>(counter_[3]);
|
||||
max_counter = max_counter << kShiftNum;
|
||||
max_counter += counter_[2];
|
||||
min_counter += step;
|
||||
if (min_counter < step) {
|
||||
max_counter++;
|
||||
}
|
||||
counter_[0] = static_cast<uint32_t>(min_counter);
|
||||
counter_[1] = static_cast<uint32_t>(min_counter >> kShiftNum);
|
||||
counter_[2] = static_cast<uint32_t>(max_counter);
|
||||
counter_[3] = static_cast<uint32_t>(max_counter >> kShiftNum);
|
||||
}
|
||||
|
||||
std::array<uint32_t, kResultNum> PhiloxGenerator::Compute(const std::array<uint32_t, kResultNum> &counter,
|
||||
const std::array<uint32_t, 2> &key_var) const {
|
||||
std::array<uint32_t, kResultNum> min_value;
|
||||
std::array<uint32_t, kResultNum> max_value;
|
||||
constexpr auto step = 2;
|
||||
for (size_t i = 0; i < kResultNum; i += step) {
|
||||
uint64_t temp = static_cast<uint64_t>(keyConstant[i]) * counter[i];
|
||||
min_value[i] = static_cast<uint32_t>(temp);
|
||||
max_value[i] = static_cast<uint32_t>(temp >> kShiftNum);
|
||||
}
|
||||
std::array<uint32_t, kResultNum> result;
|
||||
result[0] = (max_value[2] ^ counter[1] ^ key_var[0]);
|
||||
result[1] = min_value[2];
|
||||
result[2] = (max_value[0] ^ counter[3] ^ key_var[0]);
|
||||
result[3] = min_value[0];
|
||||
return result;
|
||||
}
|
||||
|
||||
std::array<uint32_t, kResultNum> PhiloxGenerator::operator()() {
|
||||
for (size_t i = 0; i < kGenerateNum; i++) {
|
||||
counter_ = Compute(counter_, key_var_);
|
||||
key_var_[0] += keyConstant[1];
|
||||
key_var_[1] += keyConstant[3];
|
||||
}
|
||||
Jump();
|
||||
return counter_;
|
||||
}
|
||||
} // namespace mindspore
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
# Copyright 2020-2022 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
@ -20,11 +20,10 @@ import math
|
|||
|
||||
from functools import reduce
|
||||
import numpy as np
|
||||
from scipy.stats import truncnorm
|
||||
from mindspore.common.seed import get_seed, _get_graph_seed
|
||||
from mindspore.common import dtype as mstype
|
||||
from mindspore.common.tensor import Tensor
|
||||
from mindspore._c_expression import random_normal
|
||||
from mindspore._c_expression import _random_normal, _random_uniform, _truncated_normal
|
||||
|
||||
_INITIALIZER_ALIAS = dict()
|
||||
|
||||
|
@ -36,6 +35,7 @@ class Initializer:
|
|||
Args:
|
||||
kwargs (dict): Keyword arguments for Initializer.
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
self._kwargs = kwargs
|
||||
self._seed = None
|
||||
|
@ -58,6 +58,7 @@ class Initializer:
|
|||
def __call__(self, arr):
|
||||
return self._initialize(arr)
|
||||
|
||||
|
||||
def _register(*aliases):
|
||||
"""Return the alias register."""
|
||||
def alias_reg(cls):
|
||||
|
@ -89,6 +90,29 @@ def _assignment(arr, num):
|
|||
return arr
|
||||
|
||||
|
||||
def _numpy_seed():
|
||||
# This will produce same value after call numpy.random.seed with same seed.
|
||||
return np.random.randint(low=1, high=(1 << 63))
|
||||
|
||||
|
||||
def _init_random_normal(mean, sigma, shape):
|
||||
data = np.ndarray(shape=shape, dtype=np.float32)
|
||||
_random_normal(_numpy_seed(), data, mean, sigma)
|
||||
return data
|
||||
|
||||
|
||||
def _init_random_uniform(a, b, shape):
|
||||
data = np.ndarray(shape=shape, dtype=np.float32)
|
||||
_random_uniform(_numpy_seed(), data, a, b)
|
||||
return data
|
||||
|
||||
|
||||
def _init_truncated_normal(a, b, mean, sigma, shape):
|
||||
data = np.ndarray(shape=shape, dtype=np.float32)
|
||||
_truncated_normal(_numpy_seed(), data, a, b, mean, sigma)
|
||||
return data
|
||||
|
||||
|
||||
@_register('zeros')
|
||||
class Zero(Initializer):
|
||||
"""
|
||||
|
@ -100,8 +124,9 @@ class Zero(Initializer):
|
|||
>>> tensor1 = initializer(Zero(), [1, 2, 3], mindspore.float32)
|
||||
>>> tensor2 = initializer('zeros', [1, 2, 3], mindspore.float32)
|
||||
"""
|
||||
|
||||
def _initialize(self, arr):
|
||||
_assignment(arr, 0)
|
||||
arr.fill(0)
|
||||
|
||||
|
||||
@_register('ones')
|
||||
|
@ -115,8 +140,9 @@ class One(Initializer):
|
|||
>>> tensor1 = initializer(One(), [1, 2, 3], mindspore.float32)
|
||||
>>> tensor2 = initializer('ones', [1, 2, 3], mindspore.float32)
|
||||
"""
|
||||
|
||||
def _initialize(self, arr):
|
||||
_assignment(arr, 1)
|
||||
arr.fill(1)
|
||||
|
||||
|
||||
def _calculate_fan_in_and_fan_out(shape):
|
||||
|
@ -253,16 +279,15 @@ class XavierUniform(Initializer):
|
|||
>>> tensor1 = initializer(XavierUniform(), [1, 2, 3], mindspore.float32)
|
||||
>>> tensor2 = initializer('xavier_uniform', [1, 2, 3], mindspore.float32)
|
||||
"""
|
||||
|
||||
def __init__(self, gain=1):
|
||||
super(XavierUniform, self).__init__(gain=gain)
|
||||
self.gain = gain
|
||||
|
||||
def _initialize(self, arr):
|
||||
n_in, n_out = _calculate_fan_in_and_fan_out(arr.shape)
|
||||
|
||||
boundary = self.gain * math.sqrt(6.0 / (n_in + n_out))
|
||||
data = np.random.uniform(-boundary, boundary, arr.shape)
|
||||
|
||||
data = _init_random_uniform(-boundary, boundary, arr.shape)
|
||||
_assignment(arr, data)
|
||||
|
||||
|
||||
|
@ -297,6 +322,7 @@ class HeUniform(Initializer):
|
|||
>>> tensor1 = initializer(HeUniform(), [1, 2, 3], mindspore.float32)
|
||||
>>> tensor2 = initializer('he_uniform', [1, 2, 3], mindspore.float32)
|
||||
"""
|
||||
|
||||
def __init__(self, negative_slope=0, mode='fan_in', nonlinearity='leaky_relu'):
|
||||
super(HeUniform, self).__init__(negative_slope=negative_slope, mode=mode, nonlinearity=nonlinearity)
|
||||
self.negative_slope = negative_slope
|
||||
|
@ -308,8 +334,7 @@ class HeUniform(Initializer):
|
|||
gain = _calculate_gain(self.nonlinearity, self.negative_slope)
|
||||
std = gain / math.sqrt(fan)
|
||||
boundary = math.sqrt(3.0) * std
|
||||
data = np.random.uniform(-boundary, boundary, arr.shape)
|
||||
|
||||
data = _init_random_uniform(-boundary, boundary, arr.shape)
|
||||
_assignment(arr, data)
|
||||
|
||||
|
||||
|
@ -343,6 +368,7 @@ class HeNormal(Initializer):
|
|||
>>> tensor1 = initializer(HeNormal(), [1, 2, 3], mindspore.float32)
|
||||
>>> tensor2 = initializer('he_normal', [1, 2, 3], mindspore.float32)
|
||||
"""
|
||||
|
||||
def __init__(self, negative_slope=0, mode='fan_in', nonlinearity='leaky_relu'):
|
||||
super(HeNormal, self).__init__(negative_slope=negative_slope, mode=mode, nonlinearity=nonlinearity)
|
||||
self.negative_slope = negative_slope
|
||||
|
@ -353,8 +379,7 @@ class HeNormal(Initializer):
|
|||
fan = _calculate_correct_fan(arr.shape, self.mode)
|
||||
gain = _calculate_gain(self.nonlinearity, self.negative_slope)
|
||||
std = gain / math.sqrt(fan)
|
||||
data = np.random.normal(0, std, arr.shape)
|
||||
|
||||
data = _init_random_normal(0, std, arr.shape)
|
||||
_assignment(arr, data)
|
||||
|
||||
|
||||
|
@ -372,12 +397,13 @@ class Constant(Initializer):
|
|||
>>> tensor1 = initializer(0, [1, 2, 3], mindspore.float32)
|
||||
>>> tensor2 = initializer(5, [1, 2, 3], mindspore.float32)
|
||||
"""
|
||||
|
||||
def __init__(self, value):
|
||||
super(Constant, self).__init__(value=value)
|
||||
self.value = value
|
||||
|
||||
def _initialize(self, arr):
|
||||
_assignment(arr, self.value)
|
||||
arr.fill(self.value)
|
||||
|
||||
|
||||
@_register()
|
||||
|
@ -394,6 +420,7 @@ class Identity(Initializer):
|
|||
>>> tensor1 = initializer(Identity(), [2, 3], mindspore.float32)
|
||||
>>> tensor2 = initializer('identity', [2, 3], mindspore.float32)
|
||||
"""
|
||||
|
||||
def _initialize(self, arr):
|
||||
if len(arr.shape) != 2:
|
||||
raise ValueError('For Identity initializer, the dimension of the initialized tensor should be 2, '
|
||||
|
@ -420,6 +447,7 @@ class Sparse(Initializer):
|
|||
>>> from mindspore.common.initializer import initializer, Sparse
|
||||
>>> tensor1 = initializer(Sparse(sparsity=0.1, sigma=0.01), [5, 8], mindspore.float32)
|
||||
"""
|
||||
|
||||
def __init__(self, sparsity, sigma=0.01):
|
||||
super(Sparse, self).__init__()
|
||||
self.sparsity = sparsity
|
||||
|
@ -431,7 +459,7 @@ class Sparse(Initializer):
|
|||
'but got {}.'.format(len(arr.shape)))
|
||||
rows, cols = arr.shape
|
||||
zero_num = int(np.ceil(self.sparsity * rows))
|
||||
data = np.random.normal(0, self.sigma, arr.shape)
|
||||
data = _init_random_normal(0, self.sigma, arr.shape)
|
||||
for col_idx in range(cols):
|
||||
row_idx = np.random.permutation(list(range(rows)))[: zero_num]
|
||||
data[row_idx, col_idx] = 0.
|
||||
|
@ -509,6 +537,7 @@ class Orthogonal(Initializer):
|
|||
>>> tensor1 = initializer(Orthogonal(gain=2.), [2, 3, 4], mindspore.float32)
|
||||
>>> tensor2 = initializer('orthogonal', [2, 3, 4], mindspore.float32)
|
||||
"""
|
||||
|
||||
def __init__(self, gain=1.):
|
||||
super(Orthogonal, self).__init__(gain=gain)
|
||||
self.gain = gain
|
||||
|
@ -520,7 +549,7 @@ class Orthogonal(Initializer):
|
|||
rows = arr.shape[0]
|
||||
|
||||
cols = np.prod(arr.shape) // rows
|
||||
data = np.random.normal(0, 1, size=(rows, cols))
|
||||
data = _init_random_normal(0, 1, (rows, cols))
|
||||
|
||||
if rows < cols:
|
||||
data = data.T
|
||||
|
@ -565,6 +594,7 @@ class VarianceScaling(Initializer):
|
|||
... distribution='untruncated_normal'), [2, 3], mindspore.float32)
|
||||
>>> tensor2 = initializer('varianceScaling', [2, 3], mindspore.float32)
|
||||
"""
|
||||
|
||||
def __init__(self, scale=1.0, mode='fan_in', distribution='truncated_normal'):
|
||||
super(VarianceScaling, self).__init__(scale=scale, mode=mode, distribution=distribution)
|
||||
if scale <= 0.:
|
||||
|
@ -595,13 +625,13 @@ class VarianceScaling(Initializer):
|
|||
|
||||
if self.distribution == 'truncated_norm':
|
||||
stddev = np.sqrt(scale) / 0.87962566103423978
|
||||
data = truncnorm.rvs(-2, 2, loc=0, scale=stddev, size=arr.shape, random_state=None)
|
||||
data = _init_truncated_normal(-2, 2, 0, stddev, arr.shape)
|
||||
elif self.distribution == 'untruncated_normal':
|
||||
stddev = np.sqrt(scale)
|
||||
data = np.random.normal(0, stddev, arr.shape)
|
||||
data = _init_random_normal(0, stddev, arr.shape)
|
||||
else:
|
||||
limit = np.sqrt(3.0 * scale)
|
||||
data = np.random.uniform(-limit, limit, arr.shape)
|
||||
data = _init_random_uniform(-limit, limit, arr.shape)
|
||||
_assignment(arr, data)
|
||||
|
||||
|
||||
|
@ -621,12 +651,13 @@ class Uniform(Initializer):
|
|||
>>> tensor1 = initializer(Uniform(), [1, 2, 3], mindspore.float32)
|
||||
>>> tensor2 = initializer('uniform', [1, 2, 3], mindspore.float32)
|
||||
"""
|
||||
|
||||
def __init__(self, scale=0.07):
|
||||
super(Uniform, self).__init__(scale=scale)
|
||||
self.scale = scale
|
||||
|
||||
def _initialize(self, arr):
|
||||
tmp = np.random.uniform(-self.scale, self.scale, arr.shape)
|
||||
tmp = _init_random_uniform(-self.scale, self.scale, arr.shape)
|
||||
_assignment(arr, tmp)
|
||||
|
||||
|
||||
|
@ -649,18 +680,16 @@ class Normal(Initializer):
|
|||
>>> tensor1 = initializer(Normal(), [1, 2, 3], mindspore.float32)
|
||||
>>> tensor2 = initializer('normal', [1, 2, 3], mindspore.float32)
|
||||
"""
|
||||
|
||||
def __init__(self, sigma=0.01, mean=0.0):
|
||||
super(Normal, self).__init__(sigma=sigma, mean=mean)
|
||||
self.sigma = sigma
|
||||
self.mean = mean
|
||||
|
||||
def _initialize(self, arr):
|
||||
seed, seed2 = self.seed
|
||||
output_tensor = Tensor(np.zeros(arr.shape, dtype=np.float32))
|
||||
random_normal(arr.shape, seed, seed2, output_tensor)
|
||||
output_data = output_tensor.asnumpy()
|
||||
output_data = output_data * self.sigma + self.mean
|
||||
_assignment(arr, output_data)
|
||||
data = _init_random_normal(self.mean, self.sigma, arr.shape)
|
||||
_assignment(arr, data)
|
||||
|
||||
|
||||
@_register()
|
||||
class TruncatedNormal(Initializer):
|
||||
|
@ -677,12 +706,13 @@ class TruncatedNormal(Initializer):
|
|||
>>> tensor1 = initializer(TruncatedNormal(), [1, 2, 3], mindspore.float32)
|
||||
>>> tensor2 = initializer('truncatedNormal', [1, 2, 3], mindspore.float32)
|
||||
"""
|
||||
|
||||
def __init__(self, sigma=0.01):
|
||||
super(TruncatedNormal, self).__init__(sigma=sigma)
|
||||
self.sigma = sigma
|
||||
|
||||
def _initialize(self, arr):
|
||||
tmp = truncnorm.rvs(-2, 2, loc=0, scale=self.sigma, size=arr.shape, random_state=None)
|
||||
tmp = _init_truncated_normal(-2, 2, 0, self.sigma, arr.shape)
|
||||
_assignment(arr, tmp)
|
||||
|
||||
|
||||
|
@ -759,6 +789,7 @@ def initializer(init, shape=None, dtype=mstype.float32):
|
|||
init_obj = Tensor(dtype=dtype, shape=shape, init=init)
|
||||
return init_obj
|
||||
|
||||
|
||||
__all__ = [
|
||||
'Initializer',
|
||||
'initializer',
|
||||
|
|
|
@ -378,6 +378,6 @@ def test_train_feed(num_classes=65536):
|
|||
model = Model(net, loss_fn=loss, optimizer=opt)
|
||||
model.train(3, dataset, dataset_sink_mode=False, callbacks=parallel_callback)
|
||||
loss_value = np.array(parallel_callback.loss_list)
|
||||
expect_out = [11.087254, 10.876551, 10.057782]
|
||||
expect_out = [11.344119, 10.747661, 11.134097]
|
||||
print(loss_value)
|
||||
assert np.allclose(loss_value, expect_out, 0.0001, 0.0001)
|
||||
|
|
|
@ -232,10 +232,7 @@ def test_train():
|
|||
# For graph mode
|
||||
set_seed(0)
|
||||
graph_loss = get_train_loss(numeric_columns, sparse_columns, data_list, context.GRAPH_MODE)
|
||||
expect_loss = np.array([16.476381, 2425.9783, 8769.053], dtype=graph_loss[0].dtype)
|
||||
assert np.allclose(graph_loss, expect_loss, 0.01, 0.01)
|
||||
# For PyNative mode
|
||||
set_seed(0)
|
||||
pynative_loss = get_train_loss(numeric_columns, sparse_columns, data_list, context.PYNATIVE_MODE)
|
||||
expect_loss = np.array([16.476381, 2425.9783, 8769.053], dtype=pynative_loss[0].dtype)
|
||||
assert np.allclose(pynative_loss, expect_loss, 0.01, 0.01)
|
||||
assert np.allclose(pynative_loss, graph_loss)
|
||||
|
|
|
@ -1817,5 +1817,5 @@ def test_train():
|
|||
)
|
||||
|
||||
train_loss = callback.loss
|
||||
expect_loss = 113.286
|
||||
expect_loss = 114.664
|
||||
assert np.allclose(train_loss, expect_loss, 0.001, 0.001)
|
||||
|
|
|
@ -237,7 +237,7 @@ def test_bert_thor_8p():
|
|||
|
||||
print("End training...")
|
||||
assert mean_cost < 96
|
||||
assert mean_loss < 8.125
|
||||
assert mean_loss < 8.15
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
@ -312,18 +312,22 @@ class TestSummary:
|
|||
@pytest.mark.env_onecard
|
||||
@security_off_wrap
|
||||
def test_summary_collector_landscape(self):
|
||||
"""Test summary collector with landscape."""
|
||||
"""
|
||||
Feature: Summary collector with landscape.
|
||||
Description: Test summary collector with landscape.
|
||||
Expectation: Landscape data collected with expected value.
|
||||
"""
|
||||
set_seed(1)
|
||||
interval_1 = [1, 2, 3]
|
||||
num_samples = 6
|
||||
summary_dir = self._train_network(epoch=3, num_samples=num_samples,
|
||||
collect_specified_data={'collect_landscape':
|
||||
{'landscape_size': 4,
|
||||
'unit': 'epoch',
|
||||
'create_landscape': {'train': True,
|
||||
'result': True},
|
||||
'num_samples': num_samples,
|
||||
'intervals': [interval_1]}})
|
||||
{'landscape_size': 4,
|
||||
'unit': 'epoch',
|
||||
'create_landscape': {'train': True,
|
||||
'result': True},
|
||||
'num_samples': num_samples,
|
||||
'intervals': [interval_1]}})
|
||||
|
||||
tag_list = self._list_summary_collect_landscape_tags(summary_dir)
|
||||
expected_tags = {'epoch_group', 'model_params_file_map', 'step_per_epoch', 'unit', 'num_samples',
|
||||
|
@ -332,23 +336,9 @@ class TestSummary:
|
|||
device_id = int(os.getenv('DEVICE_ID')) if os.getenv('DEVICE_ID') else 0
|
||||
summary_landscape = SummaryLandscape(summary_dir)
|
||||
summary_landscape.gen_landscapes_with_multi_process(callback_fn, device_ids=[device_id])
|
||||
expected_pca_value = np.array([2.2795451, 2.2795504, 2.2795559, 2.2795612, 2.2795450, 2.2795503, 2.2795557,
|
||||
2.2795612, 2.2795449, 2.2795503, 2.2795557, 2.2795610, 2.2795449, 2.2795502,
|
||||
2.2795555, 2.2795610])
|
||||
expe_pca_value_asc = np.array([2.2795452, 2.2795503, 2.2795557, 2.2795612, 2.2795450, 2.2795503, 2.2795557,
|
||||
2.2795612, 2.2795449, 2.2795502, 2.2795555, 2.2795609, 2.2795449, 2.2795502,
|
||||
2.2795554, 2.2795610])
|
||||
expected_random_value = np.array([2.2729474, 2.2777648, 2.2829195, 2.2884243, 2.2724223, 2.2771732, 2.2822458,
|
||||
2.2875971, 2.2725493, 2.2771329, 2.2819973, 2.2875895, 2.2730918, 2.2774068,
|
||||
2.2822349, 2.2881028])
|
||||
expe_random_value_asc = np.array([2.2729466, 2.2777647, 2.2829201, 2.2884242, 2.2724224, 2.2771732, 2.2822458,
|
||||
2.2875975, 2.2725484, 2.2771326, 2.2819972, 2.2875896, 2.2730910, 2.2774070,
|
||||
2.2822352, 2.2881035])
|
||||
tag_list_landscape = self._list_landscape_tags(summary_dir)
|
||||
assert np.all(abs(expected_pca_value - tag_list_landscape[0]) < 1.e-6) or \
|
||||
np.all(abs(expe_pca_value_asc - tag_list_landscape[0]) < 1.e-6)
|
||||
assert np.all(abs(expected_random_value - tag_list_landscape[1]) < 1.e-6) or \
|
||||
np.all(abs(expe_random_value_asc - tag_list_landscape[1]) < 1.e-6)
|
||||
assert np.allclose(tag_list_landscape[0], 2.28, atol=0.03)
|
||||
assert np.allclose(tag_list_landscape[1], 2.28, atol=0.03)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_ascend_training
|
||||
|
|
|
@ -0,0 +1,172 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <random>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include "common/common_test.h"
|
||||
#include "include/common/random.h"
|
||||
|
||||
namespace mindspore {
|
||||
class TestRandom : public UT::Common {};
|
||||
|
||||
/// Feature: Philox random number generator.
|
||||
/// Description: Test Philox random number generator.
|
||||
/// Expectation: Random number generator works as expected.
|
||||
TEST_F(TestRandom, test_philox_generator) {
|
||||
const uint64_t seed = 1234;
|
||||
auto rng = random::Philox(seed);
|
||||
std::vector<uint32_t> numbers;
|
||||
for (size_t i = 0; i < 20; ++i) {
|
||||
numbers.push_back(rng());
|
||||
}
|
||||
|
||||
// Discard.
|
||||
rng = random::Philox(seed);
|
||||
rng.discard(0);
|
||||
EXPECT_EQ(rng(), numbers[0]);
|
||||
|
||||
rng = random::Philox(seed);
|
||||
rng.discard(8);
|
||||
EXPECT_EQ(rng(), numbers[8]);
|
||||
|
||||
rng = random::Philox(seed);
|
||||
rng.discard(9);
|
||||
EXPECT_EQ(rng(), numbers[9]);
|
||||
|
||||
rng = random::Philox(seed);
|
||||
rng.discard(10);
|
||||
EXPECT_EQ(rng(), numbers[10]);
|
||||
|
||||
rng = random::Philox(seed);
|
||||
rng.discard(11);
|
||||
EXPECT_EQ(rng(), numbers[11]);
|
||||
|
||||
rng = random::Philox(seed);
|
||||
rng.discard(12);
|
||||
EXPECT_EQ(rng(), numbers[12]);
|
||||
|
||||
rng = random::Philox(seed);
|
||||
rng.discard(13);
|
||||
EXPECT_EQ(rng(), numbers[13]);
|
||||
|
||||
// Discard after generate.
|
||||
rng = random::Philox(seed);
|
||||
rng();
|
||||
rng.discard(8 - 1);
|
||||
EXPECT_EQ(rng(), numbers[8]);
|
||||
rng.discard(1);
|
||||
EXPECT_EQ(rng(), numbers[10]);
|
||||
|
||||
rng = random::Philox(seed);
|
||||
rng();
|
||||
rng.discard(9 - 1);
|
||||
EXPECT_EQ(rng(), numbers[9]);
|
||||
rng.discard(2);
|
||||
EXPECT_EQ(rng(), numbers[12]);
|
||||
|
||||
rng = random::Philox(seed);
|
||||
rng();
|
||||
rng.discard(10 - 1);
|
||||
EXPECT_EQ(rng(), numbers[10]);
|
||||
rng.discard(3);
|
||||
EXPECT_EQ(rng(), numbers[14]);
|
||||
|
||||
rng = random::Philox(seed);
|
||||
rng();
|
||||
rng.discard(11 - 1);
|
||||
EXPECT_EQ(rng(), numbers[11]);
|
||||
rng.discard(4);
|
||||
EXPECT_EQ(rng(), numbers[16]);
|
||||
|
||||
rng = random::Philox(seed);
|
||||
rng();
|
||||
rng.discard(12 - 1);
|
||||
EXPECT_EQ(rng(), numbers[12]);
|
||||
rng.discard(5);
|
||||
EXPECT_EQ(rng(), numbers[18]);
|
||||
|
||||
rng = random::Philox(seed);
|
||||
rng();
|
||||
rng.discard(13 - 1);
|
||||
EXPECT_EQ(rng(), numbers[13]);
|
||||
}
|
||||
|
||||
/// Feature: Random distributions.
|
||||
/// Description: Test random distributions.
|
||||
/// Expectation: distributions works as expected.
|
||||
TEST_F(TestRandom, test_distributions) {
|
||||
using Rng = random::Philox;
|
||||
using Uniform = random::UniformDistribution<float>;
|
||||
using Normal = random::NormalDistribution<float>;
|
||||
using TruncNormal = random::TruncatedNormal<float>;
|
||||
|
||||
const uint64_t seed = 4321;
|
||||
const size_t length = 10000;
|
||||
const size_t half_len = length / 2;
|
||||
std::vector<float> randoms1(length);
|
||||
std::vector<float> randoms2(length);
|
||||
|
||||
random::GenerateRandoms<float, Rng, Uniform>(seed, 0, randoms1.data(), length, 0.0f, 1.0f);
|
||||
random::GenerateRandoms<float, Rng, Uniform>(seed, 0, randoms2.data(), half_len, 0.0f, 1.0f);
|
||||
random::GenerateRandoms<float, Rng, Uniform>(seed, half_len, randoms2.data() + half_len, half_len, 0.0f, 1.0f);
|
||||
EXPECT_EQ(randoms1, randoms2);
|
||||
|
||||
random::GenerateRandoms<float, Rng, Normal>(seed, 0, randoms1.data(), length, 0.0f, 1.0f);
|
||||
random::GenerateRandoms<float, Rng, Normal>(seed, 0, randoms2.data(), half_len, 0.0f, 1.0f);
|
||||
random::GenerateRandoms<float, Rng, Normal>(seed, half_len, randoms2.data() + half_len, half_len, 0.0f, 1.0f);
|
||||
EXPECT_EQ(randoms1, randoms2);
|
||||
|
||||
random::GenerateRandoms<float, Rng, TruncNormal>(seed, 0, randoms1.data(), length, -2.0f, 2.0f, 0.0f, 1.0f);
|
||||
random::GenerateRandoms<float, Rng, TruncNormal>(seed, 0, randoms2.data(), half_len, -2.0f, 2.0f, 0.0f, 1.0f);
|
||||
random::GenerateRandoms<float, Rng, TruncNormal>(seed, half_len, randoms2.data() + half_len, half_len, -2.0f, 2.0f,
|
||||
0.0f, 1.0f);
|
||||
EXPECT_EQ(randoms1, randoms2);
|
||||
}
|
||||
|
||||
/// Feature: Parallel task size compute.
|
||||
/// Description: Test parallel task size compute.
|
||||
/// Expectation: Result parallel task size is correct.
|
||||
TEST_F(TestRandom, test_parallel_task_size) {
|
||||
auto result = random::ComputeTaskNumSize(1000, 10);
|
||||
EXPECT_EQ(result.first, 1);
|
||||
EXPECT_EQ(result.second, 1000);
|
||||
|
||||
result = random::ComputeTaskNumSize(1024, 10);
|
||||
EXPECT_EQ(result.first, 1);
|
||||
EXPECT_EQ(result.second, 1024);
|
||||
|
||||
result = random::ComputeTaskNumSize(1025, 10);
|
||||
EXPECT_EQ(result.first, 10);
|
||||
EXPECT_EQ(result.second, 104);
|
||||
|
||||
result = random::ComputeTaskNumSize(2020, 10);
|
||||
EXPECT_EQ(result.first, 10);
|
||||
EXPECT_EQ(result.second, 204);
|
||||
|
||||
result = random::ComputeTaskNumSize(2021, 10);
|
||||
EXPECT_EQ(result.first, 10);
|
||||
EXPECT_EQ(result.second, 204);
|
||||
|
||||
result = random::ComputeTaskNumSize(2040, 10);
|
||||
EXPECT_EQ(result.first, 10);
|
||||
EXPECT_EQ(result.second, 204);
|
||||
|
||||
result = random::ComputeTaskNumSize(2041, 10);
|
||||
EXPECT_EQ(result.first, 10);
|
||||
EXPECT_EQ(result.second, 208);
|
||||
}
|
||||
} // namespace mindspore
|
|
@ -14,6 +14,7 @@
|
|||
# ============================================================================
|
||||
""" test_initializer """
|
||||
import math
|
||||
import unittest
|
||||
from functools import reduce
|
||||
import numpy as np
|
||||
import pytest as py
|
||||
|
@ -27,6 +28,7 @@ from mindspore.common.parameter import Parameter
|
|||
from mindspore.common.tensor import Tensor
|
||||
from mindspore.nn import Conv2d
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore._c_expression import _random_normal, _random_uniform, _truncated_normal
|
||||
from ..ut_filter import non_graph_engine
|
||||
|
||||
|
||||
|
@ -279,7 +281,12 @@ def test_init_variancescaling():
|
|||
|
||||
|
||||
def test_conv2d_abnormal_kernel_negative():
|
||||
kernel = np.random.randn(64, 3, 7, 7).astype(np.float32)
|
||||
"""
|
||||
Feature: Random initializers that implemented in cpp.
|
||||
Description: Test random initializers that implemented in cpp.
|
||||
Expectation: Data is initialized successfully.
|
||||
"""
|
||||
kernel = init.initializer(init.Normal(sigma=1.0), [64, 3, 7, 7], ms.float32).init_data()
|
||||
with py.raises(ValueError):
|
||||
ms.Model(
|
||||
Conv2d(in_channels=3, out_channels=64, kernel_size=-7, stride=3,
|
||||
|
@ -288,17 +295,27 @@ def test_conv2d_abnormal_kernel_negative():
|
|||
|
||||
@non_graph_engine
|
||||
def test_conv2d_abnormal_kernel_normal():
|
||||
kernel = np.random.randn(64, 3, 7, 7).astype(np.float32)
|
||||
input_data = np.random.randn(32, 3, 224, 112).astype(np.float32)
|
||||
"""
|
||||
Feature: Random initializers that implemented in cpp.
|
||||
Description: Test random initializers that implemented in cpp.
|
||||
Expectation: Data is initialized successfully.
|
||||
"""
|
||||
kernel = init.initializer(init.Normal(sigma=1.0), [64, 3, 7, 7], ms.float32).init_data()
|
||||
input_data = init.initializer(init.Normal(sigma=1.0), [32, 3, 224, 112], ms.float32).init_data()
|
||||
context.set_context(mode=context.GRAPH_MODE)
|
||||
model = ms.Model(
|
||||
Conv2d(in_channels=3, out_channels=64, kernel_size=7, stride=3,
|
||||
padding=0, weight_init=ms.Tensor(kernel)))
|
||||
model.predict(ms.Tensor(input_data))
|
||||
padding=0, weight_init=kernel))
|
||||
model.predict(input_data)
|
||||
|
||||
|
||||
@non_graph_engine
|
||||
def test_conv2d_abnormal_kernel_truncated_normal():
|
||||
"""
|
||||
Feature: Random initializers that implemented in cpp.
|
||||
Description: Test random initializers that implemented in cpp.
|
||||
Expectation: Data is initialized successfully.
|
||||
"""
|
||||
input_data = init.initializer(init.TruncatedNormal(), [64, 3, 7, 7], ms.float32).init_data()
|
||||
context.set_context(mode=context.GRAPH_MODE)
|
||||
model = ms.Model(
|
||||
|
@ -327,3 +344,92 @@ def test_weight_shape():
|
|||
net = Net()
|
||||
out = net(t)
|
||||
print(out)
|
||||
|
||||
|
||||
def test_init_with_same_numpy_seed():
|
||||
"""
|
||||
Feature: Random initializers that depend on numpy random seed.
|
||||
Description: Test random initializers with same numpy random seed.
|
||||
Expectation: Initialized data is same with same numpy random seed.
|
||||
"""
|
||||
shape = [12, 34]
|
||||
np.random.seed(1234)
|
||||
uniform1 = init.initializer('uniform', shape, ms.float32).init_data()
|
||||
normal1 = init.initializer('normal', shape, ms.float32).init_data()
|
||||
truncnorm1 = init.initializer('truncatednormal', shape, ms.float32).init_data()
|
||||
|
||||
np.random.seed(1234)
|
||||
uniform2 = init.initializer('uniform', shape, ms.float32).init_data()
|
||||
normal2 = init.initializer('normal', shape, ms.float32).init_data()
|
||||
truncnorm2 = init.initializer('truncatednormal', shape, ms.float32).init_data()
|
||||
|
||||
assert np.allclose(uniform1.asnumpy(), uniform2.asnumpy())
|
||||
assert np.allclose(normal1.asnumpy(), normal2.asnumpy())
|
||||
assert np.allclose(truncnorm1.asnumpy(), truncnorm2.asnumpy())
|
||||
|
||||
# Reset numpy random seed after test.
|
||||
np.random.seed()
|
||||
|
||||
|
||||
def test_cpp_random_initializer():
|
||||
"""
|
||||
Feature: Random initializers that implemented in cpp.
|
||||
Description: Test random initializers that implemented in cpp.
|
||||
Expectation: Data is initialized successfully.
|
||||
"""
|
||||
ut = unittest.TestCase()
|
||||
shape = (11, 512)
|
||||
|
||||
# Random normal.
|
||||
data = np.ndarray(shape=shape, dtype=np.float32)
|
||||
_random_normal(0, data, 0.0, 1.0)
|
||||
ut.assertAlmostEqual(np.mean(data), 0.0, delta=0.1)
|
||||
ut.assertAlmostEqual(np.std(data), 1.0, delta=0.1)
|
||||
|
||||
# Random uniform.
|
||||
data = np.ndarray(shape=shape, dtype=np.float32)
|
||||
_random_uniform(0, data, -1.0, 1.0)
|
||||
ut.assertAlmostEqual(np.mean(data), 0.0, delta=0.1)
|
||||
ut.assertGreater(np.std(data), 0.0)
|
||||
|
||||
# Truncated random.
|
||||
data = np.ndarray(shape=shape, dtype=np.float32)
|
||||
_truncated_normal(0, data, -2.0, 2.0, 0.0, 1.0)
|
||||
ut.assertAlmostEqual(np.mean(data), 0.0, delta=0.1)
|
||||
ut.assertGreaterEqual(np.min(data), -2.0)
|
||||
ut.assertLessEqual(np.max(data), 2.0)
|
||||
|
||||
# Same seeds, same results.
|
||||
data1 = np.ndarray(shape=shape, dtype=np.float32)
|
||||
_random_normal(12345678, data1, 0.0, 1.0)
|
||||
data2 = np.ndarray(shape=shape, dtype=np.float32)
|
||||
_random_normal(12345678, data2, 0.0, 1.0)
|
||||
assert np.allclose(data1, data2)
|
||||
|
||||
# Different seeds, different results.
|
||||
data3 = np.ndarray(shape=shape, dtype=np.float32)
|
||||
_random_normal(12345679, data3, 0.0, 1.0)
|
||||
assert not np.allclose(data1, data3)
|
||||
|
||||
# Check distributions by K-S test.
|
||||
np.random.seed(42)
|
||||
seed = np.random.randint(low=1, high=(1 << 63))
|
||||
count = 10000
|
||||
data = np.ndarray(shape=(count), dtype=np.float32)
|
||||
_random_uniform(seed, data, 0.0, 1.0)
|
||||
data2 = np.random.uniform(0.0, 1.0, size=count)
|
||||
_, p = stats.kstest(data, data2, N=count)
|
||||
assert p > 0.05
|
||||
|
||||
_random_normal(seed, data, 0.0, 1.0)
|
||||
data2 = np.random.normal(0.0, 1.0, size=count)
|
||||
_, p = stats.kstest(data, data2, N=count)
|
||||
assert p > 0.05
|
||||
|
||||
_truncated_normal(seed, data, -2, 2, 0.0, 1.0)
|
||||
data2 = stats.truncnorm.rvs(-2, 2, loc=0.0, scale=1.0, size=count, random_state=None)
|
||||
_, p = stats.kstest(data, data2, N=count)
|
||||
assert p > 0.05
|
||||
|
||||
# Reset numpy random seed after test.
|
||||
np.random.seed()
|
||||
|
|
Loading…
Reference in New Issue