burn/backend-comparison/benches/custom_gelu.rs

use backend_comparison::persistence::save;
use burn::backend::Autodiff;
use burn::tensor::{backend::Backend, Distribution, Shape, Tensor};
use burn_common::benchmark::{run_benchmark, Benchmark};
use core::f64::consts::SQRT_2;
use derive_new::new;

#[derive(Debug)]
enum GeluKind {
    Reference,
    WithReferenceErf,
    WithCustomErf,
}

/// Benchmark how well a backend executes a custom activation function with a lot of basic tensor
/// operations.
#[derive(new)]
struct CustomGeluBenchmark<B: Backend, const D: usize> {
    shape: Shape,
    device: B::Device,
    kind: GeluKind,
    autodiff: bool,
}

impl<B: Backend, const D: usize> Benchmark for CustomGeluBenchmark<B, D> {
    type Args = Tensor<B, D>;

    fn name(&self) -> String {
        match self.autodiff {
            true => format!("gelu_autodiff_{:?}", self.kind),
            false => format!("gelu_{:?}", self.kind),
        }
    }

    fn options(&self) -> Option<String> {
        Some(format!("{:?}", self.kind))
    }

    fn shapes(&self) -> Vec<Vec<usize>> {
        vec![self.shape.dims.clone()]
    }

    fn execute(&self, tensor: Self::Args) {
        match self.autodiff {
            true => {
                let tensor: Tensor<Autodiff<B>, D> = Tensor::from_inner(tensor).require_grad();
                let output = match self.kind {
                    GeluKind::Reference => burn::tensor::activation::gelu(tensor.clone()),
                    GeluKind::WithReferenceErf => gelu_custom(tensor.clone(), Tensor::erf),
                    GeluKind::WithCustomErf => gelu_custom(tensor.clone(), erf_custom),
                };
                let mut gradients = output.sum().backward();
                let _tmp = tensor.grad_remove(&mut gradients).unwrap();
            }

            false => {
                match self.kind {
                    GeluKind::Reference => burn::tensor::activation::gelu(tensor),
                    GeluKind::WithReferenceErf => gelu_custom(tensor, Tensor::erf),
                    GeluKind::WithCustomErf => gelu_custom(tensor, erf_custom),
                };
            }
        };
    }

    fn prepare(&self) -> Self::Args {
        Tensor::random(self.shape.clone(), Distribution::Default, &self.device)
    }

    fn sync(&self) {
        B::sync(&self.device)
    }

    fn num_samples(&self) -> usize {
        10
    }
}

fn gelu_custom<B, const D: usize, Erf>(x: Tensor<B, D>, erf: Erf) -> Tensor<B, D>
where
    B: Backend,
    Erf: Fn(Tensor<B, D>) -> Tensor<B, D>,
{
    let x = x.clone() * (erf(x / SQRT_2) + 1);
    x / 2
}

fn erf_custom<B: Backend, const D: usize>(x: Tensor<B, D>) -> Tensor<B, D> {
    let x1 = -erf_positive(-x.clone());
    let x2 = erf_positive(x.clone());
    let mask = x.greater_elem(0);

    x1.mask_where(mask, x2)
}

/// An approximation of the error function: https://en.wikipedia.org/wiki/Error_function#Numerical_approximations
///
/// > (maximum error: 1.5×10−7)
/// > All of these approximations are valid for x ≥ 0. To use these approximations for negative x, use the fact that erf x is an odd function, so erf x = −erf(−x).
fn erf_positive<B: Backend, const D: usize>(x: Tensor<B, D>) -> Tensor<B, D> {
    let p = 0.3275911;
    let a1 = 0.254829592;
    let a2 = -0.284496736;
    let a3 = 1.421413741;
    let a4 = -1.453152027;
    let a5 = 1.061405429;

    let x1 = x.clone().abs() * p + 1;
    let t = x1.recip();
    let tmp = (((((t.clone() * a5) + a4) * t.clone()) + a3) * t.clone() + a2) * t.clone() + a1;

    -(tmp * t * (-x.clone() * x).exp()) + 1.0
}

#[allow(dead_code)]
fn bench<B: Backend>(
    device: &B::Device,
    feature_name: &str,
    url: Option<&str>,
    token: Option<&str>,
) {
    const D: usize = 3;
    let shape: Shape = [32, 512, 2048].into();

    let run = |autodiff: bool| {
        let reference_gelu = CustomGeluBenchmark::<B, D>::new(
            shape.clone(),
            device.clone(),
            GeluKind::Reference,
            autodiff,
        );
        let reference_erf_gelu = CustomGeluBenchmark::<B, D>::new(
            shape.clone(),
            device.clone(),
            GeluKind::WithReferenceErf,
            autodiff,
        );
        let custom_erf_gelu = CustomGeluBenchmark::<B, D>::new(
            shape.clone(),
            device.clone(),
            GeluKind::WithCustomErf,
            autodiff,
        );

        save::<B>(
            vec![
                run_benchmark(reference_gelu),
                run_benchmark(reference_erf_gelu),
                run_benchmark(custom_erf_gelu),
            ],
            device,
            feature_name,
            url,
            token,
        )
        .unwrap();
    };

    run(false);
    run(true);
}

fn main() {
    backend_comparison::bench_on_backend!();
}
-												Refactor serialization of backend-comparison benchmarks (#1131)

* Refactor serialization of benchmarks

* flatten benchmarks data to make it easier to save documents to a database and
query them
* split some information into their own fields like backend and device
* add new seralized info:
  - computed values (mean, median, variance, min, max)
  - number of samples
  - operation name
  - tensor shapes if any
* serialize to separate files, one file per benchmark run
* simplify persistence module to only a save method

* Update bench save file format to use name and uuid

* Compute serialized fields count automatically via a macro

* Rework naming of benchmarks, shapes and add options field

Remove operations field
Correctly create one file per ran benchmark

* Serialize benchmark num_repeats

* Fix expect message to follow the 'should' convention

* Cargo fmt :-)

* Make Clippy happy

* Save files in the burn subdirectory

* Change name of custom_gelu bench to just gelu

* Remove num_repeats from backend-comparison benchmarks

* Fix wrong variable name to compute the median

* Remove false positive possibility in test_mean_duration
											
										
										
											2024-01-13 00:15:00 +08:00
+								use backend_comparison::persistence::save;
-												Feat/autodiff/checkpoint ops (#1358)


											
										
										
											2024-02-27 06:19:09 +08:00
+								use burn::backend::Autodiff;
-												Add new backend comparison benchmark (#958)

* Add new benchmark

* Remove bad comment

* Add more gelu
											
										
										
											2023-11-16 21:15:21 +08:00
+								use burn::tensor::{backend::Backend, Distribution, Shape, Tensor};
 								use burn_common::benchmark::{run_benchmark, Benchmark};
 								use core::f64::consts::SQRT_2;
 								use derive_new::new;
 								#[derive(Debug)]
 								enum GeluKind {
 								    Reference,
 								    WithReferenceErf,
 								    WithCustomErf,
 								}
 								/// Benchmark how well a backend executes a custom activation function with a lot of basic tensor
 								/// operations.
 								#[derive(new)]
 								struct CustomGeluBenchmark<B: Backend, const D: usize> {
-												Remove const D generic (#2298)

* Remove const D generic

* Missing merge conflicts
											
										
										
											2024-09-24 20:35:52 +08:00
+								    shape: Shape,
-												Add new backend comparison benchmark (#958)

* Add new benchmark

* Remove bad comment

* Add more gelu
											
										
										
											2023-11-16 21:15:21 +08:00
+								    device: B::Device,
 								    kind: GeluKind,
-												Feat/autodiff/checkpoint ops (#1358)


											
										
										
											2024-02-27 06:19:09 +08:00
+								    autodiff: bool,
-												Add new backend comparison benchmark (#958)

* Add new benchmark

* Remove bad comment

* Add more gelu
											
										
										
											2023-11-16 21:15:21 +08:00
+								}
 								impl<B: Backend, const D: usize> Benchmark for CustomGeluBenchmark<B, D> {
 								    type Args = Tensor<B, D>;
 								    fn name(&self) -> String {
-												Feat/autodiff/checkpoint ops (#1358)


											
										
										
											2024-02-27 06:19:09 +08:00
+								        match self.autodiff {
-												Tweak/add kind to gelu benchmark name (#1533)

* Add kind to gelu benchmark name

* [backend-comparison] Compute column size in benchmarks report
											
										
										
											2024-03-29 00:35:15 +08:00
+								            true => format!("gelu_autodiff_{:?}", self.kind),
 								            false => format!("gelu_{:?}", self.kind),
-												Feat/autodiff/checkpoint ops (#1358)


											
										
										
											2024-02-27 06:19:09 +08:00
+								        }
-												Refactor serialization of backend-comparison benchmarks (#1131)

* Refactor serialization of benchmarks

* flatten benchmarks data to make it easier to save documents to a database and
query them
* split some information into their own fields like backend and device
* add new seralized info:
  - computed values (mean, median, variance, min, max)
  - number of samples
  - operation name
  - tensor shapes if any
* serialize to separate files, one file per benchmark run
* simplify persistence module to only a save method

* Update bench save file format to use name and uuid

* Compute serialized fields count automatically via a macro

* Rework naming of benchmarks, shapes and add options field

Remove operations field
Correctly create one file per ran benchmark

* Serialize benchmark num_repeats

* Fix expect message to follow the 'should' convention

* Cargo fmt :-)

* Make Clippy happy

* Save files in the burn subdirectory

* Change name of custom_gelu bench to just gelu

* Remove num_repeats from backend-comparison benchmarks

* Fix wrong variable name to compute the median

* Remove false positive possibility in test_mean_duration
											
										
										
											2024-01-13 00:15:00 +08:00
+								    }
 								    fn options(&self) -> Option<String> {
 								        Some(format!("{:?}", self.kind))
 								    }
 								    fn shapes(&self) -> Vec<Vec<usize>> {
-												Remove const D generic (#2298)

* Remove const D generic

* Missing merge conflicts
											
										
										
											2024-09-24 20:35:52 +08:00
+								        vec![self.shape.dims.clone()]
-												Add new backend comparison benchmark (#958)

* Add new benchmark

* Remove bad comment

* Add more gelu
											
										
										
											2023-11-16 21:15:21 +08:00
+								    }
-												Feat/autodiff/checkpoint ops (#1358)


											
										
										
											2024-02-27 06:19:09 +08:00
+								    fn execute(&self, tensor: Self::Args) {
 								        match self.autodiff {
 								            true => {
 								                let tensor: Tensor<Autodiff<B>, D> = Tensor::from_inner(tensor).require_grad();
 								                let output = match self.kind {
 								                    GeluKind::Reference => burn::tensor::activation::gelu(tensor.clone()),
 								                    GeluKind::WithReferenceErf => gelu_custom(tensor.clone(), Tensor::erf),
 								                    GeluKind::WithCustomErf => gelu_custom(tensor.clone(), erf_custom),
 								                };
 								                let mut gradients = output.sum().backward();
 								                let _tmp = tensor.grad_remove(&mut gradients).unwrap();
 								            }
 								            false => {
 								                match self.kind {
 								                    GeluKind::Reference => burn::tensor::activation::gelu(tensor),
 								                    GeluKind::WithReferenceErf => gelu_custom(tensor, Tensor::erf),
 								                    GeluKind::WithCustomErf => gelu_custom(tensor, erf_custom),
 								                };
 								            }
-												Feat/fusion inplace (#1128)


											
										
										
											2024-01-11 01:37:17 +08:00
+								        };
-												Add new backend comparison benchmark (#958)

* Add new benchmark

* Remove bad comment

* Add more gelu
											
										
										
											2023-11-16 21:15:21 +08:00
+								    }
 								    fn prepare(&self) -> Self::Args {
-												Explicit device tensors (#1081)


											
										
										
											2023-12-21 06:49:59 +08:00
+								        Tensor::random(self.shape.clone(), Distribution::Default, &self.device)
-												Add new backend comparison benchmark (#958)

* Add new benchmark

* Remove bad comment

* Add more gelu
											
										
										
											2023-11-16 21:15:21 +08:00
+								    }
 								    fn sync(&self) {
-												Update cubecl (#2376)


											
										
										
											2024-10-17 03:56:12 +08:00
+								        B::sync(&self.device)
-												Add new backend comparison benchmark (#958)

* Add new benchmark

* Remove bad comment

* Add more gelu
											
										
										
											2023-11-16 21:15:21 +08:00
+								    }
-												Wgpu fusion auto-vectorized operations (#1123)


											
										
										
											2024-01-09 05:58:39 +08:00
 								    fn num_samples(&self) -> usize {
-												Feat/autodiff/checkpoint ops (#1358)


											
										
										
											2024-02-27 06:19:09 +08:00
-												Wgpu fusion auto-vectorized operations (#1123)


											
										
										
											2024-01-09 05:58:39 +08:00
+								    }
-												Add new backend comparison benchmark (#958)

* Add new benchmark

* Remove bad comment

* Add more gelu
											
										
										
											2023-11-16 21:15:21 +08:00
+								}
 								fn gelu_custom<B, const D: usize, Erf>(x: Tensor<B, D>, erf: Erf) -> Tensor<B, D>
 								where
 								    B: Backend,
 								    Erf: Fn(Tensor<B, D>) -> Tensor<B, D>,
 								{
 								    let x = x.clone() * (erf(x / SQRT_2) + 1);
-												ci/Fix `cargo clippy` action (#942)


											
										
										
											2023-11-17 08:35:38 +08:00
+								    x / 2
-												Add new backend comparison benchmark (#958)

* Add new benchmark

* Remove bad comment

* Add more gelu
											
										
										
											2023-11-16 21:15:21 +08:00
+								}
 								fn erf_custom<B: Backend, const D: usize>(x: Tensor<B, D>) -> Tensor<B, D> {
 								    let x1 = -erf_positive(-x.clone());
 								    let x2 = erf_positive(x.clone());
 								    let mask = x.greater_elem(0);
 								    x1.mask_where(mask, x2)
 								}
 								/// An approximation of the error function: https://en.wikipedia.org/wiki/Error_function#Numerical_approximations
 								///
 								/// > (maximum error: 1.5×10−7)
 								/// > All of these approximations are valid for x ≥ 0. To use these approximations for negative x, use the fact that erf x is an odd function, so erf x = −erf(−x).
 								fn erf_positive<B: Backend, const D: usize>(x: Tensor<B, D>) -> Tensor<B, D> {
 								    let p = 0.3275911;
 								    let a1 = 0.254829592;
 								    let a2 = -0.284496736;
 								    let a3 = 1.421413741;
 								    let a4 = -1.453152027;
 								    let a5 = 1.061405429;
 								    let x1 = x.clone().abs() * p + 1;
 								    let t = x1.recip();
 								    let tmp = (((((t.clone() * a5) + a4) * t.clone()) + a3) * t.clone() + a2) * t.clone() + a1;
-												ci/Fix `cargo clippy` action (#942)


											
										
										
											2023-11-17 08:35:38 +08:00
+								    -(tmp * t * (-x.clone() * x).exp()) + 1.0
-												Add new backend comparison benchmark (#958)

* Add new benchmark

* Remove bad comment

* Add more gelu
											
										
										
											2023-11-16 21:15:21 +08:00
+								}
 								#[allow(dead_code)]
-												Make backend names in JSON reports match burnbench CLI  (#1375)

* Make backend names in JSON reports match burnbench CLI

- add `config_name`  to `Backend` trait
- add `backend_config_name` to  `Benchmark` trait
- fix documentation for JSON reports to use correct unit of time

* Revert "Make backend names in JSON reports match burnbench CLI"

This reverts commit a09edb638942e7f8d1c9c271e923dca1b9c33433.

* [backend-comparison] Serialize the feature name passed to burnbench

---------

Co-authored-by: syl20bnr <sylvain.benner@gmail.com>
											
										
										
											2024-04-01 21:48:44 +08:00
+								fn bench<B: Backend>(
 								    device: &B::Device,
 								    feature_name: &str,
 								    url: Option<&str>,
 								    token: Option<&str>,
 								) {
-												Add new backend comparison benchmark (#958)

* Add new benchmark

* Remove bad comment

* Add more gelu
											
										
										
											2023-11-16 21:15:21 +08:00
+								    const D: usize = 3;
-												Remove const D generic (#2298)

* Remove const D generic

* Missing merge conflicts
											
										
										
											2024-09-24 20:35:52 +08:00
+								    let shape: Shape = [32, 512, 2048].into();
-												Feat/fusion inplace (#1128)


											
										
										
											2024-01-11 01:37:17 +08:00
-												Feat/autodiff/checkpoint ops (#1358)


											
										
										
											2024-02-27 06:19:09 +08:00
+								    let run = |autodiff: bool| {
 								        let reference_gelu = CustomGeluBenchmark::<B, D>::new(
 								            shape.clone(),
 								            device.clone(),
 								            GeluKind::Reference,
 								            autodiff,
 								        );
 								        let reference_erf_gelu = CustomGeluBenchmark::<B, D>::new(
 								            shape.clone(),
 								            device.clone(),
 								            GeluKind::WithReferenceErf,
 								            autodiff,
 								        );
 								        let custom_erf_gelu = CustomGeluBenchmark::<B, D>::new(
 								            shape.clone(),
 								            device.clone(),
 								            GeluKind::WithCustomErf,
 								            autodiff,
 								        );
 								        save::<B>(
 								            vec![
 								                run_benchmark(reference_gelu),
 								                run_benchmark(reference_erf_gelu),
 								                run_benchmark(custom_erf_gelu),
 								            ],
 								            device,
-												Make backend names in JSON reports match burnbench CLI  (#1375)

* Make backend names in JSON reports match burnbench CLI

- add `config_name`  to `Backend` trait
- add `backend_config_name` to  `Benchmark` trait
- fix documentation for JSON reports to use correct unit of time

* Revert "Make backend names in JSON reports match burnbench CLI"

This reverts commit a09edb638942e7f8d1c9c271e923dca1b9c33433.

* [backend-comparison] Serialize the feature name passed to burnbench

---------

Co-authored-by: syl20bnr <sylvain.benner@gmail.com>
											
										
										
											2024-04-01 21:48:44 +08:00
+								            feature_name,
-												[backend-comparison] Upload benchmarks to server (#1381)

Uploading is enabled with already implemented --share argument
of the burnbench command line tool.

The burnbench binary passes the URL of the server and the auth
token to the cargo bench process using the additional arguments
--sharing-url and --sharing-token respectively.

The persistence module then upload the results when a --sharing-url
is provided.

The URL is for now hardcoded. The endpoint is production when
compiling in release mode and it is localhost otherwise.
											
										
										
											2024-03-03 00:38:18 +08:00
+								            url,
 								            token,
-												Feat/autodiff/checkpoint ops (#1358)


											
										
										
											2024-02-27 06:19:09 +08:00
+								        )
 								        .unwrap();
 								    };
 								    run(false);
 								    run(true);
-												Add new backend comparison benchmark (#958)

* Add new benchmark

* Remove bad comment

* Add more gelu
											
										
										
											2023-11-16 21:15:21 +08:00
+								}
 								fn main() {
 								    backend_comparison::bench_on_backend!();
 								}