diff --git a/burn-book/src/basic-workflow/backend.md b/burn-book/src/basic-workflow/backend.md index 92eda0349..73d30619d 100644 --- a/burn-book/src/basic-workflow/backend.md +++ b/burn-book/src/basic-workflow/backend.md @@ -5,19 +5,27 @@ explicitly designated the backend to be used at any point. This will be defined entrypoint of our program, namely the `main` function defined in `src/main.rs`. ```rust , ignore -use burn::optim::AdamConfig; -use burn::backend::{Autodiff, Wgpu, wgpu::AutoGraphicsApi}; -use crate::model::ModelConfig; +# mod data; +# mod model; +# mod training; +# +use crate::{model::ModelConfig, training::TrainingConfig}; +use burn::{ + backend::{wgpu::AutoGraphicsApi, Autodiff, Wgpu}, +# data::dataset::Dataset, + optim::AdamConfig, +}; fn main() { type MyBackend = Wgpu; type MyAutodiffBackend = Autodiff; let device = burn::backend::wgpu::WgpuDevice::default(); + let artifact_dir = "/tmp/guide"; crate::training::train::( - "/tmp/guide", - crate::training::TrainingConfig::new(ModelConfig::new(10, 512), AdamConfig::new()), - device, + artifact_dir, + TrainingConfig::new(ModelConfig::new(10, 512), AdamConfig::new()), + device.clone(), ); } ``` diff --git a/burn-book/src/basic-workflow/data.md b/burn-book/src/basic-workflow/data.md index 53446d2a2..3faed9f35 100644 --- a/burn-book/src/basic-workflow/data.md +++ b/burn-book/src/basic-workflow/data.md @@ -42,6 +42,22 @@ not all backends expose the same devices. As an example, the Libtorch-based back Next, we need to actually implement the batching logic. ```rust , ignore +# use burn::{ +# data::{dataloader::batcher::Batcher, dataset::vision::MnistItem}, +# prelude::*, +# }; +# +# #[derive(Clone)] +# pub struct MnistBatcher { +# device: B::Device, +# } +# +# impl MnistBatcher { +# pub fn new(device: B::Device) -> Self { +# Self { device } +# } +# } +# #[derive(Clone, Debug)] pub struct MnistBatch { pub images: Tensor, diff --git a/burn-book/src/basic-workflow/inference.md b/burn-book/src/basic-workflow/inference.md index 4c170414f..277421af1 100644 --- a/burn-book/src/basic-workflow/inference.md +++ b/burn-book/src/basic-workflow/inference.md @@ -10,6 +10,16 @@ cost. Let's create a simple `infer` method in a new file `src/inference.rs` whic load our trained model. ```rust , ignore +# use burn::{ +# config::Config, +# data::{dataloader::batcher::Batcher, dataset::vision::MnistItem}, +# module::Module, +# record::{CompactRecorder, Recorder}, +# tensor::backend::Backend, +# }; +# +# use crate::{data::MnistBatcher, training::TrainingConfig}; +# pub fn infer(artifact_dir: &str, device: B::Device, item: MnistItem) { let config = TrainingConfig::load(format!("{artifact_dir}/config.json")) .expect("Config should exist for the model"); @@ -39,6 +49,29 @@ By running the infer function, you should see the predictions of your model! Add the call to `infer` to the `main.rs` file after the `train` function call: ```rust , ignore +# mod data; +# mod inference; +# mod model; +# mod training; +# +# use crate::{model::ModelConfig, training::TrainingConfig}; +# use burn::{ +# backend::{wgpu::AutoGraphicsApi, Autodiff, Wgpu}, +# data::dataset::Dataset, +# optim::AdamConfig, +# }; +# +# fn main() { +# type MyBackend = Wgpu; +# type MyAutodiffBackend = Autodiff; +# +# let device = burn::backend::wgpu::WgpuDevice::default(); +# let artifact_dir = "/tmp/guide"; +# crate::training::train::( +# artifact_dir, +# TrainingConfig::new(ModelConfig::new(10, 512), AdamConfig::new()), +# device.clone(), +# ); crate::inference::infer::( artifact_dir, device, @@ -46,6 +79,7 @@ Add the call to `infer` to the `main.rs` file after the `train` function call: .get(42) .unwrap(), ); +# } ``` The number `42` is the index of the image in the MNIST dataset. You can explore and verify them using diff --git a/burn-book/src/basic-workflow/model.md b/burn-book/src/basic-workflow/model.md index 07775a082..9868bc814 100644 --- a/burn-book/src/basic-workflow/model.md +++ b/burn-book/src/basic-workflow/model.md @@ -165,11 +165,34 @@ at the top of the main file: ```rust , ignore mod model; +# +# fn main() { +# } ``` Next, we need to instantiate the model for training. ```rust , ignore +# use burn::{ +# nn::{ +# conv::{Conv2d, Conv2dConfig}, +# pool::{AdaptiveAvgPool2d, AdaptiveAvgPool2dConfig}, +# Dropout, DropoutConfig, Linear, LinearConfig, Relu, +# }, +# prelude::*, +# }; +# +# #[derive(Module, Debug)] +# pub struct Model { +# conv1: Conv2d, +# conv2: Conv2d, +# pool: AdaptiveAvgPool2d, +# dropout: Dropout, +# linear1: Linear, +# linear2: Linear, +# activation: Relu, +# } +# #[derive(Config, Debug)] pub struct ModelConfig { num_classes: usize, @@ -253,6 +276,49 @@ which we will flatten in the forward pass to have a 1024 (16 _ 8 _ 8) resulting Now let's see how the forward pass is defined. ```rust , ignore +# use burn::{ +# nn::{ +# conv::{Conv2d, Conv2dConfig}, +# pool::{AdaptiveAvgPool2d, AdaptiveAvgPool2dConfig}, +# Dropout, DropoutConfig, Linear, LinearConfig, Relu, +# }, +# prelude::*, +# }; +# +# #[derive(Module, Debug)] +# pub struct Model { +# conv1: Conv2d, +# conv2: Conv2d, +# pool: AdaptiveAvgPool2d, +# dropout: Dropout, +# linear1: Linear, +# linear2: Linear, +# activation: Relu, +# } +# +# #[derive(Config, Debug)] +# pub struct ModelConfig { +# num_classes: usize, +# hidden_size: usize, +# #[config(default = "0.5")] +# dropout: f64, +# } +# +# impl ModelConfig { +# /// Returns the initialized model. +# pub fn init(&self, device: &B::Device) -> Model { +# Model { +# conv1: Conv2dConfig::new([1, 8], [3, 3]).init(device), +# conv2: Conv2dConfig::new([8, 16], [3, 3]).init(device), +# pool: AdaptiveAvgPool2dConfig::new([8, 8]).init(), +# activation: Relu::new(), +# linear1: LinearConfig::new(16 * 8 * 8, self.hidden_size).init(device), +# linear2: LinearConfig::new(self.hidden_size, self.num_classes).init(device), +# dropout: DropoutConfig::new(self.dropout).init(), +# } +# } +# } +# impl Model { /// # Shapes /// - Images [batch_size, height, width] diff --git a/burn-book/src/basic-workflow/training.md b/burn-book/src/basic-workflow/training.md index 88abb3689..f3665ea6c 100644 --- a/burn-book/src/basic-workflow/training.md +++ b/burn-book/src/basic-workflow/training.md @@ -15,6 +15,23 @@ beyond the scope of this guide. Since the MNIST task is a classification problem, we will use the `ClassificationOutput` type. ```rust , ignore +# use crate::{ +# data::{MnistBatch, MnistBatcher}, +# model::{Model, ModelConfig}, +# }; +# use burn::{ +# data::{dataloader::DataLoaderBuilder, dataset::vision::MnistDataset}, +# nn::loss::CrossEntropyLossConfig, +# optim::AdamConfig, +# prelude::*, +# record::CompactRecorder, +# tensor::backend::AutodiffBackend, +# train::{ +# metric::{AccuracyMetric, LossMetric}, +# ClassificationOutput, LearnerBuilder, TrainOutput, TrainStep, ValidStep, +# }, +# }; +# impl Model { pub fn forward_classification( &self, @@ -43,6 +60,42 @@ Moving forward, we will proceed with the implementation of both the training and for our model. ```rust , ignore +# use burn::{ +# config::Config, +# data::{dataloader::DataLoaderBuilder, dataset::vision::MnistDataset}, +# module::Module, +# nn::loss::CrossEntropyLoss, +# optim::AdamConfig, +# record::CompactRecorder, +# tensor::{ +# backend::{AutodiffBackend, Backend}, +# Int, Tensor, +# }, +# train::{ +# metric::{AccuracyMetric, LossMetric}, +# ClassificationOutput, LearnerBuilder, TrainOutput, TrainStep, ValidStep, +# }, +# }; +# +# use crate::{ +# data::{MnistBatch, MnistBatcher}, +# model::{Model, ModelConfig}, +# }; +# +# impl Model { +# pub fn forward_classification( +# &self, +# images: Tensor, +# targets: Tensor, +# ) -> ClassificationOutput { +# let output = self.forward(images); +# let loss = +# CrossEntropyLoss::new(None, &output.device()).forward(output.clone(), targets.clone()); +# +# ClassificationOutput::new(loss, output, targets) +# } +# } +# impl TrainStep, ClassificationOutput> for Model { fn step(&self, batch: MnistBatch) -> TrainOutput> { let item = self.forward_classification(batch.images, batch.targets); @@ -94,6 +147,56 @@ Book. Let us move on to establishing the practical training configuration. ```rust , ignore +# use burn::{ +# config::Config, +# data::{dataloader::DataLoaderBuilder, dataset::vision::MnistDataset}, +# module::Module, +# nn::loss::CrossEntropyLoss, +# optim::AdamConfig, +# record::CompactRecorder, +# tensor::{ +# backend::{AutodiffBackend, Backend}, +# Int, Tensor, +# }, +# train::{ +# metric::{AccuracyMetric, LossMetric}, +# ClassificationOutput, LearnerBuilder, TrainOutput, TrainStep, ValidStep, +# }, +# }; +# +# use crate::{ +# data::{MnistBatch, MnistBatcher}, +# model::{Model, ModelConfig}, +# }; +# +# impl Model { +# pub fn forward_classification( +# &self, +# images: Tensor, +# targets: Tensor, +# ) -> ClassificationOutput { +# let output = self.forward(images); +# let loss = +# CrossEntropyLoss::new(None, &output.device()).forward(output.clone(), targets.clone()); +# +# ClassificationOutput::new(loss, output, targets) +# } +# } +# +# impl TrainStep, ClassificationOutput> for Model { +# fn step(&self, batch: MnistBatch) -> TrainOutput> { +# let item = self.forward_classification(batch.images, batch.targets); +# +# TrainOutput::new(self, item.loss.backward(), item) +# } +# } +# +# impl ValidStep, ClassificationOutput> for Model { +# fn step(&self, batch: MnistBatch) -> ClassificationOutput { +# self.forward_classification(batch.images, batch.targets) +# } +# } +# #[derive(Config)] pub struct TrainingConfig { pub model: ModelConfig,