From 2c151a5570deeb93f85bfc9e9fde0d6f584875dd Mon Sep 17 00:00:00 2001
From: Nathaniel Simard <nathaniel.simard.42@gmail.com>
Date: Sun, 2 Apr 2023 17:37:01 -0400
Subject: [PATCH] Update module doc + add sponsors section (#267)

---
 README.md                           | 25 +++++++++++-----
 burn-core/src/nn/conv/conv1d.rs     |  5 +---
 burn-core/src/nn/conv/conv2d.rs     |  5 +---
 burn-core/src/nn/dropout.rs         |  6 ++--
 burn-core/src/nn/gelu.rs            |  6 ++--
 burn-core/src/nn/pool/max_pool2d.rs |  7 ++---
 burn-core/src/nn/relu.rs            |  6 ++--
 burn-derive/src/lib.rs              |  2 --
 burn-derive/src/module/base.rs      | 44 +++++++++++++++++++++++++++++
 9 files changed, 73 insertions(+), 33 deletions(-)
diff --git a/README.md b/README.md
index c5cdde6a9..658dca506 100644
--- a/README.md
+++ b/README.md
@@ -28,6 +28,7 @@ __Sections__
         * [Config](#config)
         * [Learner](#learner)
 * [no_std support](#no_std-support)
+* [Sponsors](#sponsors)
 * [License](#license)
 
 ## Features
@@ -123,18 +124,17 @@ fn main() {
 #### Module
 
 The `Module` derive allows you to create your own neural network modules, similar to PyTorch.
-Note that the `Module` derive generates all the necessary methods to make your type essentially a parameter container.
-It makes no assumptions about how the forward function is declared.
+The derive function only generates the necessary methods to essentially act as a parameter container for your type, it makes no assumptions about how the forward pass is declared.
 
 ```rust
 use burn::nn;
-use burn::module::{Param, Module};
+use burn::module::Module;
 use burn::tensor::backend::Backend;
 
 #[derive(Module, Debug)]
 pub struct PositionWiseFeedForward<B: Backend> {
-    linear_inner: Param<Linear<B>>,
-    linear_outer: Param<Linear<B>>,
+    linear_inner: Linear<B>,
+    linear_outer: Linear<B>,
     dropout: Dropout,
     gelu: GELU,
 }
@@ -150,7 +150,8 @@ impl<B: Backend> PositionWiseFeedForward<B> {
 }
 ```
 
-Note that only the fields wrapped inside `Param` are updated during training, and the other fields should implement the `Clone` trait.
+Note that all fields declared in the struct must also implement the `Module` trait.
+The `Tensor` struct doesn't implement `Module`, but `Param<Tensor<B, D>>` does.
 
 #### Config
 
@@ -189,6 +190,7 @@ In order to create a learner, you must use the `LearnerBuilder`.
 ```rust
 use burn::train::LearnerBuilder;
 use burn::train::metric::{AccuracyMetric, LossMetric};
+use burn::record::DefaultRecordSettings;
 
 fn main() {
     let dataloader_train = ...;
@@ -202,7 +204,7 @@ fn main() {
         .metric_valid_plot(AccuracyMetric::new())
         .metric_train(LossMetric::new())
         .metric_valid(LossMetric::new())
-        .with_file_checkpointer::<f32>(2)
+        .with_file_checkpointer::<DefaultRecordSettings>(2)
         .num_epochs(10)
         .build(model, optim);
 
@@ -222,6 +224,15 @@ Additionally `burn-core` and `burn-tensor` crates support `no_std` with `alloc`
 Note, under the `no_std` mode, a random seed is generated during the build time if the seed is not initialized by `Backend::seed` method.
 Additionally, [spin::mutex::Mutex](https://docs.rs/spin/latest/spin/mutex/struct.Mutex.html) is used in place of [std::sync::Mutex](https://doc.rust-lang.org/std/sync/struct.Mutex.html) under the `no_std` mode.
 
+## Sponsors
+
+You can sponsor the founder of Burn from his [GitHub Sponsors profile](https://github.com/sponsors/nathanielsimard).
+The Burn-rs organization doesn't yet have a fiscal entity, but other sponsor methods might become available as the project grows.
+
+Thanks to all current sponsors 🙏.
+
+<a href="https://github.com/smallstepman"><img src="https://github.com/smallstepman.png" width="60px" style="border-radius: 50%;" alt="nathanielsimard" /></a>
+
 ## License
 
 Burn is distributed under the terms of both the MIT license and the Apache License (Version 2.0).
diff --git a/burn-core/src/nn/conv/conv1d.rs b/burn-core/src/nn/conv/conv1d.rs
index f3370d058..c9000c40e 100644
--- a/burn-core/src/nn/conv/conv1d.rs
+++ b/burn-core/src/nn/conv/conv1d.rs
@@ -3,7 +3,6 @@ use alloc::vec::Vec;
 use crate as burn;
 
 use crate::config::Config;
-use crate::constant;
 use crate::module::Module;
 use crate::module::Param;
 use crate::nn::Initializer;
@@ -34,7 +33,7 @@ pub struct Conv1dConfig {
 }
 
 /// Padding configuration for 1D convolution [config](Conv1dConfig).
-#[derive(Config, Debug)]
+#[derive(Module, Config, Debug)]
 pub enum Conv1dPaddingConfig {
     /// Dynamicaly calculate the amount of padding necessary to ensure that the output size will be
     /// the same as the input.
@@ -43,8 +42,6 @@ pub enum Conv1dPaddingConfig {
     Explicit(usize),
 }
 
-constant!(Conv1dPaddingConfig);
-
 /// Applies a 1D convolution over input tensors.
 ///
 /// # Params
diff --git a/burn-core/src/nn/conv/conv2d.rs b/burn-core/src/nn/conv/conv2d.rs
index 7035e2835..519f0a93b 100644
--- a/burn-core/src/nn/conv/conv2d.rs
+++ b/burn-core/src/nn/conv/conv2d.rs
@@ -3,7 +3,6 @@ use alloc::vec::Vec;
 use crate as burn;
 
 use crate::config::Config;
-use crate::constant;
 use crate::module::Module;
 use crate::module::Param;
 use crate::nn::Initializer;
@@ -33,7 +32,7 @@ pub struct Conv2dConfig {
 }
 
 /// Padding configuration for 2D convolution [config](Conv2dConfig).
-#[derive(Config, Debug)]
+#[derive(Module, Config, Debug)]
 pub enum Conv2dPaddingConfig {
     /// Dynamicaly calculate the amount of padding necessary to ensure that the output size will be
     /// the same as the input.
@@ -44,8 +43,6 @@ pub enum Conv2dPaddingConfig {
     Explicit(usize, usize),
 }
 
-constant!(Conv2dPaddingConfig);
-
 /// Applies a 2D convolution over input tensors.
 ///
 /// # Params
diff --git a/burn-core/src/nn/dropout.rs b/burn-core/src/nn/dropout.rs
index f161752b8..c1c5aaaef 100644
--- a/burn-core/src/nn/dropout.rs
+++ b/burn-core/src/nn/dropout.rs
@@ -1,7 +1,7 @@
 use crate as burn;
 
 use crate::config::Config;
-use crate::constant;
+use crate::module::Module;
 use crate::tensor::backend::Backend;
 use crate::tensor::{Distribution, Tensor};
 
@@ -18,13 +18,11 @@ pub struct DropoutConfig {
 /// [Improving neural networks by preventing co-adaptation of feature detectors](https://arxiv.org/abs/1207.0580).
 ///
 /// The input is also scaled during training to `1 / (1 - prob_keep)`.
-#[derive(Clone, Debug)]
+#[derive(Module, Clone, Debug)]
 pub struct Dropout {
     prob: f64,
 }
 
-constant!(Dropout);
-
 impl DropoutConfig {
     /// Initialize a new [dropout](Dropout) module.
     pub fn init(&self) -> Dropout {
diff --git a/burn-core/src/nn/gelu.rs b/burn-core/src/nn/gelu.rs
index 3e542149d..020b6e5ee 100644
--- a/burn-core/src/nn/gelu.rs
+++ b/burn-core/src/nn/gelu.rs
@@ -1,15 +1,13 @@
 use crate as burn;
 
-use crate::constant;
+use crate::module::Module;
 use crate::tensor::backend::Backend;
 use crate::tensor::Tensor;
 
 /// Applies the Gaussian Error Linear Units function element-wise.
-#[derive(Clone, Debug, Default)]
+#[derive(Module, Clone, Debug, Default)]
 pub struct GELU {}
 
-constant!(GELU);
-
 impl GELU {
     /// Create the module.
     pub fn new() -> Self {
diff --git a/burn-core/src/nn/pool/max_pool2d.rs b/burn-core/src/nn/pool/max_pool2d.rs
index 3957ea797..fa41a194d 100644
--- a/burn-core/src/nn/pool/max_pool2d.rs
+++ b/burn-core/src/nn/pool/max_pool2d.rs
@@ -1,6 +1,7 @@
-use crate::{self as burn, constant};
+use crate as burn;
 
 use crate::config::Config;
+use crate::module::Module;
 use crate::nn::conv::Conv2dPaddingConfig;
 use crate::tensor::backend::Backend;
 use crate::tensor::Tensor;
@@ -25,15 +26,13 @@ pub struct MaxPool2dConfig {
 pub type MaxPool2dPaddingConfig = Conv2dPaddingConfig;
 
 /// Applies a 2D max pooling over input tensors.
-#[derive(Debug, Clone)]
+#[derive(Module, Debug, Clone)]
 pub struct MaxPool2d {
     stride: [usize; 2],
     kernel_size: [usize; 2],
     padding: MaxPool2dPaddingConfig,
 }
 
-constant!(MaxPool2d);
-
 impl MaxPool2dConfig {
     /// Initialize a new [max pool 2d](MaxPool2d) module.
     pub fn init(&self) -> MaxPool2d {
diff --git a/burn-core/src/nn/relu.rs b/burn-core/src/nn/relu.rs
index 2681f3b54..92e260c7e 100644
--- a/burn-core/src/nn/relu.rs
+++ b/burn-core/src/nn/relu.rs
@@ -1,17 +1,15 @@
 use crate as burn;
 
-use crate::constant;
+use crate::module::Module;
 use crate::tensor::backend::Backend;
 use crate::tensor::Tensor;
 
 /// Applies the rectified linear unit function element-wise:
 ///
 /// `y = max(0, x)`
-#[derive(Clone, Debug, Default)]
+#[derive(Module, Clone, Debug, Default)]
 pub struct ReLU {}
 
-constant!(ReLU);
-
 impl ReLU {
     /// Create the module.
     pub fn new() -> Self {
diff --git a/burn-derive/src/lib.rs b/burn-derive/src/lib.rs
index 193c21c74..0145e0e8f 100644
--- a/burn-derive/src/lib.rs
+++ b/burn-derive/src/lib.rs
@@ -10,8 +10,6 @@ use module::module_derive_impl;
 #[proc_macro_derive(Module)]
 pub fn module_derive(input: TokenStream) -> TokenStream {
     let input = syn::parse(input).unwrap();
-
-    // panic!("{}", gen);
     module_derive_impl(&input)
 }
 
diff --git a/burn-derive/src/module/base.rs b/burn-derive/src/module/base.rs
index 7e4d169ed..c7a3a4141 100644
--- a/burn-derive/src/module/base.rs
+++ b/burn-derive/src/module/base.rs
@@ -2,9 +2,53 @@ use super::{fn_generator::FnGenerator, record::RecordGenerator};
 use crate::module::display;
 use proc_macro::TokenStream;
 use quote::quote;
+use syn::parse_quote;
+
+pub(crate) fn constant_derive_impl(ast: &syn::DeriveInput) -> TokenStream {
+    let name = &ast.ident;
+    let (_, generics_ty, generics_where) = ast.generics.split_for_impl();
+
+    let backend: syn::Generics = parse_quote! { <B: burn::tensor::backend::Backend >};
+    let backend_ad: syn::Generics = parse_quote! { <B: burn::tensor::backend::ADBackend >};
+
+    let mut generics_module = ast.generics.clone();
+    let mut generics_module_ad = ast.generics.clone();
+
+    for param in backend.params.into_iter() {
+        generics_module.params.push(param);
+    }
+    for param in backend_ad.params.into_iter() {
+        generics_module_ad.params.push(param);
+    }
+    let (generics_module, _, _) = generics_module.split_for_impl();
+    let (generics_module_ad, _, _) = generics_module_ad.split_for_impl();
+
+    let gen = quote! {
+        impl #generics_module burn::module::Module<B> for #name #generics_ty #generics_where {
+            burn::constant!(module);
+        }
+
+        impl #generics_module_ad burn::module::ADModule<B> for #name #generics_ty #generics_where {
+            burn::constant!(ad_module, #name #generics_ty);
+        }
+    };
+
+    gen.into()
+}
 
 pub(crate) fn module_derive_impl(ast: &syn::DeriveInput) -> TokenStream {
     let name = &ast.ident;
+    let has_backend = ast
+        .generics
+        .type_params()
+        .map(|param| param.ident == "B")
+        .reduce(|accum, is_backend| is_backend || accum)
+        .unwrap_or(false);
+
+    if !has_backend {
+        return constant_derive_impl(ast);
+    }
+
     let (generics, generics_ty, generics_where) = ast.generics.split_for_impl();
 
     let display_fn = display::display_fn(name);