From 2c151a5570deeb93f85bfc9e9fde0d6f584875dd Mon Sep 17 00:00:00 2001 From: Nathaniel Simard Date: Sun, 2 Apr 2023 17:37:01 -0400 Subject: [PATCH] Update module doc + add sponsors section (#267) --- README.md | 25 +++++++++++----- burn-core/src/nn/conv/conv1d.rs | 5 +--- burn-core/src/nn/conv/conv2d.rs | 5 +--- burn-core/src/nn/dropout.rs | 6 ++-- burn-core/src/nn/gelu.rs | 6 ++-- burn-core/src/nn/pool/max_pool2d.rs | 7 ++--- burn-core/src/nn/relu.rs | 6 ++-- burn-derive/src/lib.rs | 2 -- burn-derive/src/module/base.rs | 44 +++++++++++++++++++++++++++++ 9 files changed, 73 insertions(+), 33 deletions(-) diff --git a/README.md b/README.md index c5cdde6a9..658dca506 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,7 @@ __Sections__ * [Config](#config) * [Learner](#learner) * [no_std support](#no_std-support) +* [Sponsors](#sponsors) * [License](#license) ## Features @@ -123,18 +124,17 @@ fn main() { #### Module The `Module` derive allows you to create your own neural network modules, similar to PyTorch. -Note that the `Module` derive generates all the necessary methods to make your type essentially a parameter container. -It makes no assumptions about how the forward function is declared. +The derive function only generates the necessary methods to essentially act as a parameter container for your type, it makes no assumptions about how the forward pass is declared. ```rust use burn::nn; -use burn::module::{Param, Module}; +use burn::module::Module; use burn::tensor::backend::Backend; #[derive(Module, Debug)] pub struct PositionWiseFeedForward { - linear_inner: Param>, - linear_outer: Param>, + linear_inner: Linear, + linear_outer: Linear, dropout: Dropout, gelu: GELU, } @@ -150,7 +150,8 @@ impl PositionWiseFeedForward { } ``` -Note that only the fields wrapped inside `Param` are updated during training, and the other fields should implement the `Clone` trait. +Note that all fields declared in the struct must also implement the `Module` trait. +The `Tensor` struct doesn't implement `Module`, but `Param>` does. #### Config @@ -189,6 +190,7 @@ In order to create a learner, you must use the `LearnerBuilder`. ```rust use burn::train::LearnerBuilder; use burn::train::metric::{AccuracyMetric, LossMetric}; +use burn::record::DefaultRecordSettings; fn main() { let dataloader_train = ...; @@ -202,7 +204,7 @@ fn main() { .metric_valid_plot(AccuracyMetric::new()) .metric_train(LossMetric::new()) .metric_valid(LossMetric::new()) - .with_file_checkpointer::(2) + .with_file_checkpointer::(2) .num_epochs(10) .build(model, optim); @@ -222,6 +224,15 @@ Additionally `burn-core` and `burn-tensor` crates support `no_std` with `alloc` Note, under the `no_std` mode, a random seed is generated during the build time if the seed is not initialized by `Backend::seed` method. Additionally, [spin::mutex::Mutex](https://docs.rs/spin/latest/spin/mutex/struct.Mutex.html) is used in place of [std::sync::Mutex](https://doc.rust-lang.org/std/sync/struct.Mutex.html) under the `no_std` mode. +## Sponsors + +You can sponsor the founder of Burn from his [GitHub Sponsors profile](https://github.com/sponsors/nathanielsimard). +The Burn-rs organization doesn't yet have a fiscal entity, but other sponsor methods might become available as the project grows. + +Thanks to all current sponsors 🙏. + +nathanielsimard + ## License Burn is distributed under the terms of both the MIT license and the Apache License (Version 2.0). diff --git a/burn-core/src/nn/conv/conv1d.rs b/burn-core/src/nn/conv/conv1d.rs index f3370d058..c9000c40e 100644 --- a/burn-core/src/nn/conv/conv1d.rs +++ b/burn-core/src/nn/conv/conv1d.rs @@ -3,7 +3,6 @@ use alloc::vec::Vec; use crate as burn; use crate::config::Config; -use crate::constant; use crate::module::Module; use crate::module::Param; use crate::nn::Initializer; @@ -34,7 +33,7 @@ pub struct Conv1dConfig { } /// Padding configuration for 1D convolution [config](Conv1dConfig). -#[derive(Config, Debug)] +#[derive(Module, Config, Debug)] pub enum Conv1dPaddingConfig { /// Dynamicaly calculate the amount of padding necessary to ensure that the output size will be /// the same as the input. @@ -43,8 +42,6 @@ pub enum Conv1dPaddingConfig { Explicit(usize), } -constant!(Conv1dPaddingConfig); - /// Applies a 1D convolution over input tensors. /// /// # Params diff --git a/burn-core/src/nn/conv/conv2d.rs b/burn-core/src/nn/conv/conv2d.rs index 7035e2835..519f0a93b 100644 --- a/burn-core/src/nn/conv/conv2d.rs +++ b/burn-core/src/nn/conv/conv2d.rs @@ -3,7 +3,6 @@ use alloc::vec::Vec; use crate as burn; use crate::config::Config; -use crate::constant; use crate::module::Module; use crate::module::Param; use crate::nn::Initializer; @@ -33,7 +32,7 @@ pub struct Conv2dConfig { } /// Padding configuration for 2D convolution [config](Conv2dConfig). -#[derive(Config, Debug)] +#[derive(Module, Config, Debug)] pub enum Conv2dPaddingConfig { /// Dynamicaly calculate the amount of padding necessary to ensure that the output size will be /// the same as the input. @@ -44,8 +43,6 @@ pub enum Conv2dPaddingConfig { Explicit(usize, usize), } -constant!(Conv2dPaddingConfig); - /// Applies a 2D convolution over input tensors. /// /// # Params diff --git a/burn-core/src/nn/dropout.rs b/burn-core/src/nn/dropout.rs index f161752b8..c1c5aaaef 100644 --- a/burn-core/src/nn/dropout.rs +++ b/burn-core/src/nn/dropout.rs @@ -1,7 +1,7 @@ use crate as burn; use crate::config::Config; -use crate::constant; +use crate::module::Module; use crate::tensor::backend::Backend; use crate::tensor::{Distribution, Tensor}; @@ -18,13 +18,11 @@ pub struct DropoutConfig { /// [Improving neural networks by preventing co-adaptation of feature detectors](https://arxiv.org/abs/1207.0580). /// /// The input is also scaled during training to `1 / (1 - prob_keep)`. -#[derive(Clone, Debug)] +#[derive(Module, Clone, Debug)] pub struct Dropout { prob: f64, } -constant!(Dropout); - impl DropoutConfig { /// Initialize a new [dropout](Dropout) module. pub fn init(&self) -> Dropout { diff --git a/burn-core/src/nn/gelu.rs b/burn-core/src/nn/gelu.rs index 3e542149d..020b6e5ee 100644 --- a/burn-core/src/nn/gelu.rs +++ b/burn-core/src/nn/gelu.rs @@ -1,15 +1,13 @@ use crate as burn; -use crate::constant; +use crate::module::Module; use crate::tensor::backend::Backend; use crate::tensor::Tensor; /// Applies the Gaussian Error Linear Units function element-wise. -#[derive(Clone, Debug, Default)] +#[derive(Module, Clone, Debug, Default)] pub struct GELU {} -constant!(GELU); - impl GELU { /// Create the module. pub fn new() -> Self { diff --git a/burn-core/src/nn/pool/max_pool2d.rs b/burn-core/src/nn/pool/max_pool2d.rs index 3957ea797..fa41a194d 100644 --- a/burn-core/src/nn/pool/max_pool2d.rs +++ b/burn-core/src/nn/pool/max_pool2d.rs @@ -1,6 +1,7 @@ -use crate::{self as burn, constant}; +use crate as burn; use crate::config::Config; +use crate::module::Module; use crate::nn::conv::Conv2dPaddingConfig; use crate::tensor::backend::Backend; use crate::tensor::Tensor; @@ -25,15 +26,13 @@ pub struct MaxPool2dConfig { pub type MaxPool2dPaddingConfig = Conv2dPaddingConfig; /// Applies a 2D max pooling over input tensors. -#[derive(Debug, Clone)] +#[derive(Module, Debug, Clone)] pub struct MaxPool2d { stride: [usize; 2], kernel_size: [usize; 2], padding: MaxPool2dPaddingConfig, } -constant!(MaxPool2d); - impl MaxPool2dConfig { /// Initialize a new [max pool 2d](MaxPool2d) module. pub fn init(&self) -> MaxPool2d { diff --git a/burn-core/src/nn/relu.rs b/burn-core/src/nn/relu.rs index 2681f3b54..92e260c7e 100644 --- a/burn-core/src/nn/relu.rs +++ b/burn-core/src/nn/relu.rs @@ -1,17 +1,15 @@ use crate as burn; -use crate::constant; +use crate::module::Module; use crate::tensor::backend::Backend; use crate::tensor::Tensor; /// Applies the rectified linear unit function element-wise: /// /// `y = max(0, x)` -#[derive(Clone, Debug, Default)] +#[derive(Module, Clone, Debug, Default)] pub struct ReLU {} -constant!(ReLU); - impl ReLU { /// Create the module. pub fn new() -> Self { diff --git a/burn-derive/src/lib.rs b/burn-derive/src/lib.rs index 193c21c74..0145e0e8f 100644 --- a/burn-derive/src/lib.rs +++ b/burn-derive/src/lib.rs @@ -10,8 +10,6 @@ use module::module_derive_impl; #[proc_macro_derive(Module)] pub fn module_derive(input: TokenStream) -> TokenStream { let input = syn::parse(input).unwrap(); - - // panic!("{}", gen); module_derive_impl(&input) } diff --git a/burn-derive/src/module/base.rs b/burn-derive/src/module/base.rs index 7e4d169ed..c7a3a4141 100644 --- a/burn-derive/src/module/base.rs +++ b/burn-derive/src/module/base.rs @@ -2,9 +2,53 @@ use super::{fn_generator::FnGenerator, record::RecordGenerator}; use crate::module::display; use proc_macro::TokenStream; use quote::quote; +use syn::parse_quote; + +pub(crate) fn constant_derive_impl(ast: &syn::DeriveInput) -> TokenStream { + let name = &ast.ident; + let (_, generics_ty, generics_where) = ast.generics.split_for_impl(); + + let backend: syn::Generics = parse_quote! { }; + let backend_ad: syn::Generics = parse_quote! { }; + + let mut generics_module = ast.generics.clone(); + let mut generics_module_ad = ast.generics.clone(); + + for param in backend.params.into_iter() { + generics_module.params.push(param); + } + for param in backend_ad.params.into_iter() { + generics_module_ad.params.push(param); + } + let (generics_module, _, _) = generics_module.split_for_impl(); + let (generics_module_ad, _, _) = generics_module_ad.split_for_impl(); + + let gen = quote! { + impl #generics_module burn::module::Module for #name #generics_ty #generics_where { + burn::constant!(module); + } + + impl #generics_module_ad burn::module::ADModule for #name #generics_ty #generics_where { + burn::constant!(ad_module, #name #generics_ty); + } + }; + + gen.into() +} pub(crate) fn module_derive_impl(ast: &syn::DeriveInput) -> TokenStream { let name = &ast.ident; + let has_backend = ast + .generics + .type_params() + .map(|param| param.ident == "B") + .reduce(|accum, is_backend| is_backend || accum) + .unwrap_or(false); + + if !has_backend { + return constant_derive_impl(ast); + } + let (generics, generics_ty, generics_where) = ast.generics.split_for_impl(); let display_fn = display::display_fn(name);