Fix burn-tch's random implementation for standard dist (#469)

This commit is contained in:
Dilshod Tadjibaev 2023-07-06 07:50:50 -05:00 committed by GitHub
parent 7d7bd1f135
commit e62ee1269b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
30 changed files with 99 additions and 74 deletions

View File

@ -59,9 +59,9 @@ mod tests {
#[test]
fn grad_same_shape_as_forward_tensor() {
let x: Tensor<TestADBackend, 2> =
Tensor::random([2, 1], Distribution::Standard).require_grad();
Tensor::random([2, 1], Distribution::Default).require_grad();
let y: Tensor<TestADBackend, 2> =
Tensor::random([2, 3], Distribution::Standard).require_grad();
Tensor::random([2, 3], Distribution::Default).require_grad();
let z = x.clone().add(y);
let grads = z.backward();

View File

@ -318,7 +318,7 @@ mod tests {
let mha = MultiHeadAttentionConfig::new(d_model, n_heads).init::<TestBackend>();
let input = MhaInput::self_attn(Tensor::random(
[batch_size, seq_length, d_model],
Distribution::Standard,
Distribution::Default,
));
let output = mha.forward(input);
@ -340,9 +340,9 @@ mod tests {
let [batch_size, seq_length_1, seq_length_2, d_model, n_heads] = [7, 13, 15, 32, 4];
let mha = MultiHeadAttentionConfig::new(d_model, n_heads).init::<TestBackend>();
let input = MhaInput::new(
Tensor::random([batch_size, seq_length_1, d_model], Distribution::Standard),
Tensor::random([batch_size, seq_length_2, d_model], Distribution::Standard),
Tensor::random([batch_size, seq_length_2, d_model], Distribution::Standard),
Tensor::random([batch_size, seq_length_1, d_model], Distribution::Default),
Tensor::random([batch_size, seq_length_2, d_model], Distribution::Default),
Tensor::random([batch_size, seq_length_2, d_model], Distribution::Default),
);
let output = mha.forward(input);
@ -374,7 +374,7 @@ mod tests {
let tensor_1 = Tensor::<TestBackend, 3>::random(
[batch_size, seq_length, d_model],
Distribution::Standard,
Distribution::Default,
);
// Change the end of the tensor
let tensor_2 = tensor_1.clone().slice_assign(
@ -383,7 +383,7 @@ mod tests {
seq_length - num_padded..seq_length,
0..d_model,
],
Tensor::random([batch_size, num_padded, d_model], Distribution::Standard),
Tensor::random([batch_size, num_padded, d_model], Distribution::Default),
);
let input_1 = MhaInput::self_attn(tensor_1).mask_pad(mask_pad.clone());
@ -413,7 +413,7 @@ mod tests {
let tensor = Tensor::<TestBackend, 3>::random(
[batch_size, seq_length, d_model],
Distribution::Standard,
Distribution::Default,
);
let mask_attn = generate_autoregressive_mask(batch_size, seq_length, &tensor.device());
let input = MhaInput::self_attn(tensor.clone()).mask_attn(mask_attn);

View File

@ -403,11 +403,11 @@ mod tests {
let memory = Tensor::<TestBackend, 3>::random(
[batch_size, seq_length, d_model],
Distribution::Standard,
Distribution::Default,
);
let target = Tensor::<TestBackend, 3>::random(
[batch_size, seq_length, d_model],
Distribution::Standard,
Distribution::Default,
);
let mask_attn = generate_autoregressive_mask(batch_size, seq_length, &target.device());
let input = TransformerDecoderInput::new(target.clone(), memory.clone())

View File

@ -349,7 +349,7 @@ mod tests {
let tensor = Tensor::<TestBackend, 3>::random(
[batch_size, seq_length, d_model],
Distribution::Standard,
Distribution::Default,
);
let mask_attn = generate_autoregressive_mask(batch_size, seq_length, &tensor.device());
let input = TransformerEncoderInput::new(tensor.clone()).mask_attn(mask_attn);

View File

@ -202,7 +202,7 @@ mod tests {
#[test]
fn test_adam_optimizer_save_load_state() {
let linear = nn::LinearConfig::new(6, 6).init();
let x = Tensor::<TestADBackend, 2>::random([2, 6], Distribution::Standard);
let x = Tensor::<TestADBackend, 2>::random([2, 6], Distribution::Default);
let mut optimizer = create_adam();
let grads = linear.forward(x).backward();
let grads = GradientsParams::from_grads(grads, &linear);

View File

@ -115,6 +115,6 @@ mod tests {
}
fn random_tensor() -> Tensor<TestADBackend, 2> {
Tensor::<TestADBackend, 2>::random([2, 20], Distribution::Standard)
Tensor::<TestADBackend, 2>::random([2, 20], Distribution::Default)
}
}

View File

@ -117,6 +117,6 @@ mod tests {
}
fn random_tensor() -> Tensor<TestADBackend, 2> {
Tensor::<TestADBackend, 2>::random([2, 20], Distribution::Standard)
Tensor::<TestADBackend, 2>::random([2, 20], Distribution::Default)
}
}

View File

@ -159,7 +159,7 @@ mod tests {
}
fn random_tensor() -> Tensor<TestADBackend, 2> {
Tensor::<TestADBackend, 2>::random(Shape::new([2, 20]), Distribution::Standard)
Tensor::<TestADBackend, 2>::random(Shape::new([2, 20]), Distribution::Default)
}
fn layer() -> Linear<TestADBackend> {

View File

@ -14,7 +14,7 @@ pub struct ModuleBasic<B: Backend> {
impl<B: Backend> ModuleBasic<B> {
fn new() -> Self {
let weight_basic = Tensor::random(Shape::new([20, 20]), Distribution::Standard);
let weight_basic = Tensor::random(Shape::new([20, 20]), Distribution::Default);
Self {
weight_basic: Param::from(weight_basic),
}
@ -29,7 +29,7 @@ pub struct ModuleComposed<B: Backend> {
impl<B: Backend> ModuleComposed<B> {
fn new() -> Self {
let weight = Tensor::random(Shape::new([20, 20]), Distribution::Standard);
let weight = Tensor::random(Shape::new([20, 20]), Distribution::Default);
Self {
weight: Param::from(weight),
basic: ModuleBasic::new(),

View File

@ -123,7 +123,7 @@ mod tests {
fn should_support_into_and_from_data_1d() {
let data_expected = Data::<f32, 1>::random(
Shape::new([3]),
Distribution::Standard,
Distribution::Default,
&mut get_seeded_rng(),
);
let tensor = NdArrayTensor::from_data(data_expected.clone());
@ -137,7 +137,7 @@ mod tests {
fn should_support_into_and_from_data_2d() {
let data_expected = Data::<f32, 2>::random(
Shape::new([2, 3]),
Distribution::Standard,
Distribution::Default,
&mut get_seeded_rng(),
);
let tensor = NdArrayTensor::from_data(data_expected.clone());
@ -151,7 +151,7 @@ mod tests {
fn should_support_into_and_from_data_3d() {
let data_expected = Data::<f32, 3>::random(
Shape::new([2, 3, 4]),
Distribution::Standard,
Distribution::Default,
&mut get_seeded_rng(),
);
let tensor = NdArrayTensor::from_data(data_expected.clone());
@ -165,7 +165,7 @@ mod tests {
fn should_support_into_and_from_data_4d() {
let data_expected = Data::<f32, 4>::random(
Shape::new([2, 3, 4, 2]),
Distribution::Standard,
Distribution::Default,
&mut get_seeded_rng(),
);
let tensor = NdArrayTensor::from_data(data_expected.clone());

View File

@ -3,7 +3,7 @@
use burn_no_std_tests::mlp::*;
use burn_no_std_tests::model::*;
use burn::tensor::{backend::Backend, Distribution::Standard, Tensor};
use burn::tensor::{backend::Backend, Distribution::Default, Tensor};
use burn_ndarray::NdArrayBackend;
#[test]
@ -20,7 +20,7 @@ fn test_mnist_model_with_random_input() {
// Some random input
let input_shape = [1, 28, 28];
let input = Tensor::<Backend, 3>::random(input_shape, Standard);
let input = Tensor::<Backend, 3>::random(input_shape, Default);
// Run through the model
let output = mnist_model.forward(input);

View File

@ -14,9 +14,11 @@ impl<E: TchElement> TensorOps<TchBackend<E>> for TchBackend<E> {
device: &TchDevice,
) -> TchTensor<E, D> {
match distribution {
Distribution::Standard => {
Distribution::Default => {
let mut tensor = TchTensor::<E, D>::empty(shape, *device);
tensor.mut_ops(|tensor| tensor.normal_(0.0, 1.0)).unwrap()
tensor
.mut_ops(|tensor| tensor.rand_like_out(tensor))
.unwrap()
}
Distribution::Bernoulli(prob) => {
let mut tensor = TchTensor::<E, D>::empty(shape, *device);

View File

@ -233,7 +233,7 @@ mod tests {
fn should_support_into_and_from_data_1d() {
let data_expected = Data::<f32, 1>::random(
Shape::new([3]),
Distribution::Standard,
Distribution::Default,
&mut StdRng::from_entropy(),
);
let tensor = TchTensor::from_data(data_expected.clone(), tch::Device::Cpu);
@ -247,7 +247,7 @@ mod tests {
fn should_support_into_and_from_data_2d() {
let data_expected = Data::<f32, 2>::random(
Shape::new([2, 3]),
Distribution::Standard,
Distribution::Default,
&mut StdRng::from_entropy(),
);
let tensor = TchTensor::from_data(data_expected.clone(), tch::Device::Cpu);

View File

@ -28,8 +28,8 @@ pub struct Data<E, const D: usize> {
/// Distribution for random value of a tensor.
#[derive(Clone, Copy)]
pub enum Distribution<E> {
/// Standard distribution.
Standard,
/// Uniform distribution from 0 (inclusive) to 1 (exclusive).
Default,
/// Bernoulli distribution with the given probability.
Bernoulli(f64),
@ -112,7 +112,7 @@ where
/// The distribution sampler.
pub fn sampler<R: RngCore>(self, rng: &'_ mut R) -> DistributionSampler<'_, E, R> {
let kind = match self {
Distribution::Standard => {
Distribution::Default => {
DistributionSamplerKind::Standard(rand::distributions::Standard {})
}
Distribution::Uniform(low, high) => {
@ -141,7 +141,7 @@ where
/// The converted distribution.
pub fn convert<EOther: Element>(self) -> Distribution<EOther> {
match self {
Distribution::Standard => Distribution::Standard,
Distribution::Default => Distribution::Default,
Distribution::Uniform(a, b) => {
Distribution::Uniform(EOther::from_elem(a), EOther::from_elem(b))
}
@ -163,14 +163,22 @@ impl<const D: usize, E: Element> Data<E, D> {
}
/// Asserts each value is within a given range.
/// Bounds are inclusive.
///
/// # Arguments
///
/// * `range` - The range.
///
/// # Panics
///
/// If any value is not within the half-open range bounded inclusively below
/// and exclusively above (`start..end`).
pub fn assert_within_range<EOther: Element>(&self, range: core::ops::Range<EOther>) {
let start = range.start.elem::<f32>();
let end = range.end.elem::<f32>();
for elem in self.value.iter() {
let elem = elem.elem::<f32>();
if elem < start || elem > end {
if elem < start || elem >= end {
panic!("Element ({elem:?}) is not within range {range:?}");
}
}
@ -449,7 +457,7 @@ mod tests {
let shape = Shape::new([3, 5, 6]);
let num_elements = shape.num_elements();
let data =
Data::<f32, 3>::random(shape, Distribution::Standard, &mut StdRng::from_entropy());
Data::<f32, 3>::random(shape, Distribution::Default, &mut StdRng::from_entropy());
assert_eq!(num_elements, data.value.len());
}

View File

@ -25,34 +25,35 @@ macro_rules! testgen_all {
// test ops
burn_tensor::testgen_add!();
burn_tensor::testgen_arange!();
burn_tensor::testgen_cat!();
burn_tensor::testgen_aggregation!();
burn_tensor::testgen_arange!();
burn_tensor::testgen_arg!();
burn_tensor::testgen_maxmin!();
burn_tensor::testgen_cat!();
burn_tensor::testgen_cos!();
burn_tensor::testgen_div!();
burn_tensor::testgen_erf!();
burn_tensor::testgen_exp!();
burn_tensor::testgen_log!();
burn_tensor::testgen_sqrt!();
burn_tensor::testgen_log1p!();
burn_tensor::testgen_slice!();
burn_tensor::testgen_flatten!();
burn_tensor::testgen_gather_scatter!();
burn_tensor::testgen_select!();
burn_tensor::testgen_log!();
burn_tensor::testgen_log1p!();
burn_tensor::testgen_map_comparison!();
burn_tensor::testgen_mask!();
burn_tensor::testgen_matmul!();
burn_tensor::testgen_maxmin!();
burn_tensor::testgen_mul!();
burn_tensor::testgen_neg!();
burn_tensor::testgen_powf!();
burn_tensor::testgen_random!();
burn_tensor::testgen_repeat!();
burn_tensor::testgen_reshape!();
burn_tensor::testgen_flatten!();
burn_tensor::testgen_select!();
burn_tensor::testgen_sin!();
burn_tensor::testgen_slice!();
burn_tensor::testgen_sqrt!();
burn_tensor::testgen_squeeze!();
burn_tensor::testgen_tanh!();
burn_tensor::testgen_sub!();
burn_tensor::testgen_tanh!();
burn_tensor::testgen_transpose!();
// test stats

View File

@ -18,6 +18,7 @@ mod maxmin;
mod mul;
mod neg;
mod powf;
mod random;
mod repeat;
mod reshape;
mod select;

View File

@ -0,0 +1,13 @@
#[burn_tensor_testgen::testgen(random)]
mod tests {
use super::*;
use burn_tensor::{Distribution, Tensor};
#[test]
fn rand_standard() {
let tensor = Tensor::<TestBackend, 1>::random([20], Distribution::Default);
// check that the tensor is within the range of [0..1) (1 is exclusive)
tensor.into_data().assert_within_range(0.0..1.0);
}
}

View File

@ -45,8 +45,8 @@ impl<const D: usize, G: GraphicsApi> Benchmark<G> for BinaryBenchmark<D> {
}
fn prepare(&self, device: &WgpuDevice) -> Self::Args {
let lhs = Tensor::random(self.shape.clone(), Distribution::Standard).to_device(device);
let rhs = Tensor::random(self.shape.clone(), Distribution::Standard).to_device(device);
let lhs = Tensor::random(self.shape.clone(), Distribution::Default).to_device(device);
let rhs = Tensor::random(self.shape.clone(), Distribution::Default).to_device(device);
(lhs, rhs)
}

View File

@ -44,8 +44,8 @@ where
}
fn prepare(&self, device: &WgpuDevice) -> Self::Args {
let lhs = Tensor::random(self.shape_lhs.clone(), Distribution::Standard).to_device(device);
let rhs = Tensor::random(self.shape_rhs.clone(), Distribution::Standard).to_device(device);
let lhs = Tensor::random(self.shape_lhs.clone(), Distribution::Default).to_device(device);
let rhs = Tensor::random(self.shape_rhs.clone(), Distribution::Default).to_device(device);
(lhs, rhs)
}

View File

@ -36,7 +36,7 @@ impl<const D: usize, G: GraphicsApi> Benchmark<G> for UnaryBenchmark<D> {
}
fn prepare(&self, device: &WgpuDevice) -> Self::Args {
Tensor::random(self.shape.clone(), Distribution::Standard).to_device(device)
Tensor::random(self.shape.clone(), Distribution::Default).to_device(device)
}
}

View File

@ -146,8 +146,8 @@ mod tests {
#[test]
fn binary_should_work_with_multiple_invocations() {
let lhs = Tensor::<TestBackend, 2>::random([6, 256], Distribution::Standard);
let rhs = Tensor::<TestBackend, 2>::random([6, 256], Distribution::Standard);
let lhs = Tensor::<TestBackend, 2>::random([6, 256], Distribution::Default);
let rhs = Tensor::<TestBackend, 2>::random([6, 256], Distribution::Default);
let lhs_ref = Tensor::<ReferenceBackend, 2>::from_data(lhs.to_data());
let rhs_ref = Tensor::<ReferenceBackend, 2>::from_data(rhs.to_data());
@ -163,8 +163,8 @@ mod tests {
#[test]
fn binary_inplace_should_work_with_multiple_invocations() {
let lhs = Tensor::<TestBackend, 2>::random([6, 256], Distribution::Standard);
let rhs = Tensor::<TestBackend, 2>::random([6, 256], Distribution::Standard);
let lhs = Tensor::<TestBackend, 2>::random([6, 256], Distribution::Default);
let rhs = Tensor::<TestBackend, 2>::random([6, 256], Distribution::Default);
let lhs_ref = Tensor::<ReferenceBackend, 2>::from_data(lhs.to_data());
let rhs_ref = Tensor::<ReferenceBackend, 2>::from_data(rhs.to_data());

View File

@ -60,8 +60,8 @@ mod tests {
}
fn test_same_as_reference(shape: [usize; 2]) {
let tensor1 = Tensor::<TestBackend, 2>::random(shape, Distribution::Standard);
let tensor2 = Tensor::<TestBackend, 2>::random(shape, Distribution::Standard);
let tensor1 = Tensor::<TestBackend, 2>::random(shape, Distribution::Default);
let tensor2 = Tensor::<TestBackend, 2>::random(shape, Distribution::Default);
let tensor1_ref = Tensor::<ReferenceBackend, 2>::from_data(tensor1.to_data());
let tensor2_ref = Tensor::<ReferenceBackend, 2>::from_data(tensor2.to_data());

View File

@ -55,7 +55,7 @@ mod tests {
#[test]
fn gather_should_work_with_multiple_workgroups() {
TestBackend::seed(0);
let tensor = Tensor::<TestBackend, 2>::random([6, 256], Distribution::Standard);
let tensor = Tensor::<TestBackend, 2>::random([6, 256], Distribution::Default);
let indices = Tensor::<TestBackend, 1, Int>::from_data(
Tensor::<TestBackend, 1>::random([6 * 256], Distribution::Uniform(0., 256.))
.into_data()

View File

@ -96,8 +96,8 @@ mod tests {
fn same_as_reference<const D: usize>(dim: usize, shape: [usize; D]) {
TestBackend::seed(0);
let tensor = Tensor::<TestBackend, D>::random(shape, Distribution::Standard);
let value = Tensor::<TestBackend, D>::random(shape, Distribution::Standard);
let tensor = Tensor::<TestBackend, D>::random(shape, Distribution::Default);
let value = Tensor::<TestBackend, D>::random(shape, Distribution::Default);
let indices = Tensor::<TestBackend, 1, Int>::from_data(
Tensor::<TestBackend, 1>::random(
[shape.iter().product()],

View File

@ -114,7 +114,7 @@ mod tests {
#[test]
fn select_should_work_with_multiple_workgroups() {
let tensor = Tensor::<TestBackend, 2>::random([6, 256], Distribution::Standard);
let tensor = Tensor::<TestBackend, 2>::random([6, 256], Distribution::Default);
let indices = Tensor::<TestBackend, 1, Int>::arange(0..100);
let tensor_ref = Tensor::<ReferenceBackend, 2>::from_data(tensor.to_data());
let indices_ref =
@ -156,8 +156,8 @@ mod tests {
fn select_assign_same_as_ref<const D: usize>(dim: usize, shape: [usize; D]) {
TestBackend::seed(0);
let tensor = Tensor::<TestBackend, D>::random(shape, Distribution::Standard);
let value = Tensor::<TestBackend, D>::random(shape, Distribution::Standard);
let tensor = Tensor::<TestBackend, D>::random(shape, Distribution::Default);
let value = Tensor::<TestBackend, D>::random(shape, Distribution::Default);
let indices = Tensor::<TestBackend, 1, Int>::from_data(
Tensor::<TestBackend, 1>::random(
[shape[dim]],

View File

@ -103,7 +103,7 @@ mod tests {
#[test]
fn slice_should_work_with_multiple_workgroups() {
let tensor = Tensor::<TestBackend, 2>::random([6, 256], Distribution::Standard);
let tensor = Tensor::<TestBackend, 2>::random([6, 256], Distribution::Default);
let indices = [3..5, 45..256];
let tensor_ref = Tensor::<ReferenceBackend, 2>::from_data(tensor.to_data());
@ -118,8 +118,8 @@ mod tests {
#[test]
fn slice_assign_should_work_with_multiple_workgroups() {
let tensor = Tensor::<TestBackend, 2>::random([6, 256], Distribution::Standard);
let value = Tensor::<TestBackend, 2>::random([2, 211], Distribution::Standard);
let tensor = Tensor::<TestBackend, 2>::random([6, 256], Distribution::Default);
let value = Tensor::<TestBackend, 2>::random([2, 211], Distribution::Default);
let indices = [3..5, 45..256];
let tensor_ref = Tensor::<ReferenceBackend, 2>::from_data(tensor.to_data());
let value_ref = Tensor::<ReferenceBackend, 2>::from_data(value.to_data());

View File

@ -237,7 +237,7 @@ mod tests {
Tensor<ReferenceBackend, 3>,
Tensor<ReferenceBackend, 3, Bool>,
) {
let tensor = Tensor::<TestBackend, 3>::random([2, 6, 256], Distribution::Standard);
let tensor = Tensor::<TestBackend, 3>::random([2, 6, 256], Distribution::Default);
let mask = Tensor::<TestBackend, 3>::random([2, 6, 256], Distribution::Uniform(0., 1.))
.lower_equal_elem(0.5);
let tensor_ref = Tensor::<ReferenceBackend, 3>::from_data(tensor.to_data());
@ -256,8 +256,8 @@ mod tests {
Tensor<ReferenceBackend, 3, Bool>,
) {
TestBackend::seed(0);
let tensor = Tensor::<TestBackend, 3>::random([2, 6, 256], Distribution::Standard);
let value = Tensor::<TestBackend, 3>::random([2, 6, 256], Distribution::Standard);
let tensor = Tensor::<TestBackend, 3>::random([2, 6, 256], Distribution::Default);
let value = Tensor::<TestBackend, 3>::random([2, 6, 256], Distribution::Default);
let mask = Tensor::<TestBackend, 3>::random([2, 6, 256], Distribution::Uniform(0., 1.))
.lower_equal_elem(0.5);
let tensor_ref = Tensor::<ReferenceBackend, 3>::from_data(tensor.to_data());

View File

@ -163,7 +163,7 @@ mod tests {
#[test]
fn unary_should_work_with_multiple_invocations() {
let tensor = Tensor::<TestBackend, 2>::random([6, 256], Distribution::Standard);
let tensor = Tensor::<TestBackend, 2>::random([6, 256], Distribution::Default);
let tensor_ref = Tensor::<ReferenceBackend, 2>::from_data(tensor.to_data());
let actual = unary::<TestKernel, _, 2, 16>(tensor.into_primitive());
@ -177,7 +177,7 @@ mod tests {
#[test]
fn unary_inplace_should_work_with_multiple_invocations() {
let tensor = Tensor::<TestBackend, 2>::random([6, 256], Distribution::Standard);
let tensor = Tensor::<TestBackend, 2>::random([6, 256], Distribution::Default);
let tensor_ref = Tensor::<ReferenceBackend, 2>::from_data(tensor.to_data());
let actual = unary_inplace::<TestKernelInplace, _, 2, 16>(tensor.into_primitive());

View File

@ -181,7 +181,7 @@ mod tests {
#[test]
fn unary_scalar_should_work_with_multiple_invocations() {
let tensor = Tensor::<TestBackend, 2>::random([6, 256], Distribution::Standard);
let tensor = Tensor::<TestBackend, 2>::random([6, 256], Distribution::Default);
let tensor_ref = Tensor::<ReferenceBackend, 2>::from_data(tensor.to_data());
let actual = unary_scalar::<TestKernel, _, 2, 16>(tensor.into_primitive(), 5.0);
@ -195,7 +195,7 @@ mod tests {
#[test]
fn unary_scalar_inplace_should_work_with_multiple_invocations() {
let tensor = Tensor::<TestBackend, 2>::random([6, 256], Distribution::Standard);
let tensor = Tensor::<TestBackend, 2>::random([6, 256], Distribution::Default);
let tensor_ref = Tensor::<ReferenceBackend, 2>::from_data(tensor.to_data());
let actual =

View File

@ -12,12 +12,12 @@ pub fn run<B: Backend>() {
let weights = NamedTensor::<B, (Batch, DModel, DModel)>::random(
[1, d_model, d_model],
Distribution::Standard,
Distribution::Default,
);
let input = NamedTensor::<B, (Batch, SeqLength, DModel)>::random(
[batch_size, seq_length, d_model],
Distribution::Standard,
Distribution::Default,
);
// Doesn't compile