255 lines
18 KiB
HTML
255 lines
18 KiB
HTML
<!DOCTYPE HTML>
|
|
<html lang="en" class="light" dir="ltr">
|
|
<head>
|
|
<!-- Book generated using mdBook -->
|
|
<meta charset="UTF-8">
|
|
<title>Error management - Candle Documentation</title>
|
|
|
|
|
|
<!-- Custom HTML head -->
|
|
|
|
<meta name="description" content="">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
|
<meta name="theme-color" content="#ffffff">
|
|
|
|
<link rel="icon" href="favicon.svg">
|
|
<link rel="shortcut icon" href="favicon.png">
|
|
<link rel="stylesheet" href="css/variables.css">
|
|
<link rel="stylesheet" href="css/general.css">
|
|
<link rel="stylesheet" href="css/chrome.css">
|
|
<link rel="stylesheet" href="css/print.css" media="print">
|
|
|
|
<!-- Fonts -->
|
|
<link rel="stylesheet" href="FontAwesome/css/font-awesome.css">
|
|
<link rel="stylesheet" href="fonts/fonts.css">
|
|
|
|
<!-- Highlight.js Stylesheets -->
|
|
<link rel="stylesheet" href="highlight.css">
|
|
<link rel="stylesheet" href="tomorrow-night.css">
|
|
<link rel="stylesheet" href="ayu-highlight.css">
|
|
|
|
<!-- Custom theme stylesheets -->
|
|
|
|
</head>
|
|
<body class="sidebar-visible no-js">
|
|
<div id="body-container">
|
|
<!-- Provide site root to javascript -->
|
|
<script>
|
|
var path_to_root = "";
|
|
var default_theme = window.matchMedia("(prefers-color-scheme: dark)").matches ? "navy" : "light";
|
|
</script>
|
|
|
|
<!-- Work around some values being stored in localStorage wrapped in quotes -->
|
|
<script>
|
|
try {
|
|
var theme = localStorage.getItem('mdbook-theme');
|
|
var sidebar = localStorage.getItem('mdbook-sidebar');
|
|
|
|
if (theme.startsWith('"') && theme.endsWith('"')) {
|
|
localStorage.setItem('mdbook-theme', theme.slice(1, theme.length - 1));
|
|
}
|
|
|
|
if (sidebar.startsWith('"') && sidebar.endsWith('"')) {
|
|
localStorage.setItem('mdbook-sidebar', sidebar.slice(1, sidebar.length - 1));
|
|
}
|
|
} catch (e) { }
|
|
</script>
|
|
|
|
<!-- Set the theme before any content is loaded, prevents flash -->
|
|
<script>
|
|
var theme;
|
|
try { theme = localStorage.getItem('mdbook-theme'); } catch(e) { }
|
|
if (theme === null || theme === undefined) { theme = default_theme; }
|
|
var html = document.querySelector('html');
|
|
html.classList.remove('light')
|
|
html.classList.add(theme);
|
|
var body = document.querySelector('body');
|
|
body.classList.remove('no-js')
|
|
body.classList.add('js');
|
|
</script>
|
|
|
|
<input type="checkbox" id="sidebar-toggle-anchor" class="hidden">
|
|
|
|
<!-- Hide / unhide sidebar before it is displayed -->
|
|
<script>
|
|
var body = document.querySelector('body');
|
|
var sidebar = null;
|
|
var sidebar_toggle = document.getElementById("sidebar-toggle-anchor");
|
|
if (document.body.clientWidth >= 1080) {
|
|
try { sidebar = localStorage.getItem('mdbook-sidebar'); } catch(e) { }
|
|
sidebar = sidebar || 'visible';
|
|
} else {
|
|
sidebar = 'hidden';
|
|
}
|
|
sidebar_toggle.checked = sidebar === 'visible';
|
|
body.classList.remove('sidebar-visible');
|
|
body.classList.add("sidebar-" + sidebar);
|
|
</script>
|
|
|
|
<nav id="sidebar" class="sidebar" aria-label="Table of contents">
|
|
<div class="sidebar-scrollbox">
|
|
<ol class="chapter"><li class="chapter-item expanded affix "><a href="index.html">Introduction</a></li><li class="chapter-item expanded affix "><li class="part-title">User Guide</li><li class="chapter-item expanded "><a href="guide/installation.html"><strong aria-hidden="true">1.</strong> Installation</a></li><li class="chapter-item expanded "><a href="guide/hello_world.html"><strong aria-hidden="true">2.</strong> Hello World - MNIST</a></li><li class="chapter-item expanded "><a href="guide/cheatsheet.html"><strong aria-hidden="true">3.</strong> PyTorch cheatsheet</a></li><li class="chapter-item expanded affix "><li class="part-title">Reference Guide</li><li class="chapter-item expanded "><a href="inference/inference.html"><strong aria-hidden="true">4.</strong> Running a model</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="inference/hub.html"><strong aria-hidden="true">4.1.</strong> Using the hub</a></li></ol></li><li class="chapter-item expanded "><a href="error_manage.html" class="active"><strong aria-hidden="true">5.</strong> Error management</a></li><li class="chapter-item expanded "><a href="training/training.html"><strong aria-hidden="true">6.</strong> Training</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="training/simplified.html"><strong aria-hidden="true">6.1.</strong> Simplified</a></li><li class="chapter-item expanded "><a href="training/mnist.html"><strong aria-hidden="true">6.2.</strong> MNIST</a></li><li class="chapter-item expanded "><div><strong aria-hidden="true">6.3.</strong> Fine-tuning</div></li><li class="chapter-item expanded "><div><strong aria-hidden="true">6.4.</strong> Serialization</div></li></ol></li><li class="chapter-item expanded "><div><strong aria-hidden="true">7.</strong> Advanced Cuda usage</div></li><li><ol class="section"><li class="chapter-item expanded "><div><strong aria-hidden="true">7.1.</strong> Writing a custom kernel</div></li><li class="chapter-item expanded "><div><strong aria-hidden="true">7.2.</strong> Porting a custom kernel</div></li></ol></li><li class="chapter-item expanded "><div><strong aria-hidden="true">8.</strong> Using MKL</div></li><li class="chapter-item expanded "><div><strong aria-hidden="true">9.</strong> Creating apps</div></li><li><ol class="section"><li class="chapter-item expanded "><div><strong aria-hidden="true">9.1.</strong> Creating a WASM app</div></li><li class="chapter-item expanded "><div><strong aria-hidden="true">9.2.</strong> Creating a REST api webserver</div></li><li class="chapter-item expanded "><div><strong aria-hidden="true">9.3.</strong> Creating a desktop Tauri app</div></li></ol></li></ol>
|
|
</div>
|
|
<div id="sidebar-resize-handle" class="sidebar-resize-handle">
|
|
<div class="sidebar-resize-indicator"></div>
|
|
</div>
|
|
</nav>
|
|
|
|
<!-- Track and set sidebar scroll position -->
|
|
<script>
|
|
var sidebarScrollbox = document.querySelector('#sidebar .sidebar-scrollbox');
|
|
sidebarScrollbox.addEventListener('click', function(e) {
|
|
if (e.target.tagName === 'A') {
|
|
sessionStorage.setItem('sidebar-scroll', sidebarScrollbox.scrollTop);
|
|
}
|
|
}, { passive: true });
|
|
var sidebarScrollTop = sessionStorage.getItem('sidebar-scroll');
|
|
sessionStorage.removeItem('sidebar-scroll');
|
|
if (sidebarScrollTop) {
|
|
// preserve sidebar scroll position when navigating via links within sidebar
|
|
sidebarScrollbox.scrollTop = sidebarScrollTop;
|
|
} else {
|
|
// scroll sidebar to current active section when navigating via "next/previous chapter" buttons
|
|
var activeSection = document.querySelector('#sidebar .active');
|
|
if (activeSection) {
|
|
activeSection.scrollIntoView({ block: 'center' });
|
|
}
|
|
}
|
|
</script>
|
|
|
|
<div id="page-wrapper" class="page-wrapper">
|
|
|
|
<div class="page">
|
|
<div id="menu-bar-hover-placeholder"></div>
|
|
<div id="menu-bar" class="menu-bar sticky">
|
|
<div class="left-buttons">
|
|
<label id="sidebar-toggle" class="icon-button" for="sidebar-toggle-anchor" title="Toggle Table of Contents" aria-label="Toggle Table of Contents" aria-controls="sidebar">
|
|
<i class="fa fa-bars"></i>
|
|
</label>
|
|
<button id="theme-toggle" class="icon-button" type="button" title="Change theme" aria-label="Change theme" aria-haspopup="true" aria-expanded="false" aria-controls="theme-list">
|
|
<i class="fa fa-paint-brush"></i>
|
|
</button>
|
|
<ul id="theme-list" class="theme-popup" aria-label="Themes" role="menu">
|
|
<li role="none"><button role="menuitem" class="theme" id="light">Light</button></li>
|
|
<li role="none"><button role="menuitem" class="theme" id="rust">Rust</button></li>
|
|
<li role="none"><button role="menuitem" class="theme" id="coal">Coal</button></li>
|
|
<li role="none"><button role="menuitem" class="theme" id="navy">Navy</button></li>
|
|
<li role="none"><button role="menuitem" class="theme" id="ayu">Ayu</button></li>
|
|
</ul>
|
|
<button id="search-toggle" class="icon-button" type="button" title="Search. (Shortkey: s)" aria-label="Toggle Searchbar" aria-expanded="false" aria-keyshortcuts="S" aria-controls="searchbar">
|
|
<i class="fa fa-search"></i>
|
|
</button>
|
|
</div>
|
|
|
|
<h1 class="menu-title">Candle Documentation</h1>
|
|
|
|
<div class="right-buttons">
|
|
<a href="print.html" title="Print this book" aria-label="Print this book">
|
|
<i id="print-button" class="fa fa-print"></i>
|
|
</a>
|
|
|
|
</div>
|
|
</div>
|
|
|
|
<div id="search-wrapper" class="hidden">
|
|
<form id="searchbar-outer" class="searchbar-outer">
|
|
<input type="search" id="searchbar" name="searchbar" placeholder="Search this book ..." aria-controls="searchresults-outer" aria-describedby="searchresults-header">
|
|
</form>
|
|
<div id="searchresults-outer" class="searchresults-outer hidden">
|
|
<div id="searchresults-header" class="searchresults-header"></div>
|
|
<ul id="searchresults">
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
|
|
<!-- Apply ARIA attributes after the sidebar and the sidebar toggle button are added to the DOM -->
|
|
<script>
|
|
document.getElementById('sidebar-toggle').setAttribute('aria-expanded', sidebar === 'visible');
|
|
document.getElementById('sidebar').setAttribute('aria-hidden', sidebar !== 'visible');
|
|
Array.from(document.querySelectorAll('#sidebar a')).forEach(function(link) {
|
|
link.setAttribute('tabIndex', sidebar === 'visible' ? 0 : -1);
|
|
});
|
|
</script>
|
|
|
|
<div id="content" class="content">
|
|
<main>
|
|
<h1 id="error-management"><a class="header" href="#error-management">Error management</a></h1>
|
|
<p>You might have seen in the code base a lot of <code>.unwrap()</code> or <code>?</code>.
|
|
If you're unfamiliar with Rust check out the <a href="https://doc.rust-lang.org/book/ch09-02-recoverable-errors-with-result.html">Rust book</a>
|
|
for more information.</p>
|
|
<p>What's important to know though, is that if you want to know <em>where</em> a particular operation failed
|
|
You can simply use <code>RUST_BACKTRACE=1</code> to get the location of where the model actually failed.</p>
|
|
<p>Let's see on failing code:</p>
|
|
<pre><code class="language-rust ignore">let x = Tensor::zeros((1, 784), DType::F32, &device)?;
|
|
let y = Tensor::zeros((1, 784), DType::F32, &device)?;
|
|
let z = x.matmul(&y)?;</code></pre>
|
|
<p>Will print at runtime:</p>
|
|
<pre><code class="language-bash">Error: ShapeMismatchBinaryOp { lhs: [1, 784], rhs: [1, 784], op: "matmul" }
|
|
</code></pre>
|
|
<p>After adding <code>RUST_BACKTRACE=1</code>:</p>
|
|
<pre><code class="language-bash">Error: WithBacktrace { inner: ShapeMismatchBinaryOp { lhs: [1, 784], rhs: [1, 784], op: "matmul" }, backtrace: Backtrace [{ fn: "candle::error::Error::bt", file: "/home/nicolas/.cargo/git/checkouts/candle-5bb8ef7e0626d693/f291065/candle-core/src/error.rs", line: 200 }, { fn: "candle::tensor::Tensor::matmul", file: "/home/nicolas/.cargo/git/checkouts/candle-5bb8ef7e0626d693/f291065/candle-core/src/tensor.rs", line: 816 }, { fn: "myapp::main", file: "./src/main.rs", line: 29 }, { fn: "core::ops::function::FnOnce::call_once", file: "/rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/core/src/ops/function.rs", line: 250 }, { fn: "std::sys_common::backtrace::__rust_begin_short_backtrace", file: "/rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/sys_common/backtrace.rs", line: 135 }, { fn: "std::rt::lang_start::{{closure}}", file: "/rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/rt.rs", line: 166 }, { fn: "core::ops::function::impls::<impl core::ops::function::FnOnce<A> for &F>::call_once", file: "/rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/core/src/ops/function.rs", line: 284 }, { fn: "std::panicking::try::do_call", file: "/rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/panicking.rs", line: 500 }, { fn: "std::panicking::try", file: "/rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/panicking.rs", line: 464 }, { fn: "std::panic::catch_unwind", file: "/rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/panic.rs", line: 142 }, { fn: "std::rt::lang_start_internal::{{closure}}", file: "/rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/rt.rs", line: 148 }, { fn: "std::panicking::try::do_call", file: "/rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/panicking.rs", line: 500 }, { fn: "std::panicking::try", file: "/rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/panicking.rs", line: 464 }, { fn: "std::panic::catch_unwind", file: "/rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/panic.rs", line: 142 }, { fn: "std::rt::lang_start_internal", file: "/rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/rt.rs", line: 148 }, { fn: "std::rt::lang_start", file: "/rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/rt.rs", line: 165 }, { fn: "main" }, { fn: "__libc_start_main" }, { fn: "_start" }] }
|
|
</code></pre>
|
|
<p>Not super pretty at the moment, but we can see error occurred on <code>{ fn: "myapp::main", file: "./src/main.rs", line: 29 }</code></p>
|
|
<p>Another thing to note, is that since Rust is compiled it is not necessarily as easy to recover proper stacktraces
|
|
especially in release builds. We're using <a href="https://docs.rs/anyhow/latest/anyhow/"><code>anyhow</code></a> for that.
|
|
The library is still young, please <a href="https://github.com/LaurentMazare/candle/issues">report</a> any issues detecting where an error is coming from.</p>
|
|
<h2 id="cuda-error-management"><a class="header" href="#cuda-error-management">Cuda error management</a></h2>
|
|
<p>When running a model on Cuda, you might get a stacktrace not really representing the error.
|
|
The reason is that CUDA is async by nature, and therefore the error might be caught while you were sending totally different kernels.</p>
|
|
<p>One way to avoid this is to use <code>CUDA_LAUNCH_BLOCKING=1</code> as an environment variable. This will force every kernel to be launched sequentially.
|
|
You might still however see the error happening on other kernels as the faulty kernel might exit without an error but spoiling some pointer for which the error will happen when dropping the <code>CudaSlice</code> only.</p>
|
|
<p>If this occurs, you can use <a href="https://docs.nvidia.com/compute-sanitizer/ComputeSanitizer/index.html"><code>compute-sanitizer</code></a>
|
|
This tool is like <code>valgrind</code> but for cuda. It will help locate the errors in the kernels.</p>
|
|
|
|
</main>
|
|
|
|
<nav class="nav-wrapper" aria-label="Page navigation">
|
|
<!-- Mobile navigation buttons -->
|
|
<a rel="prev" href="inference/hub.html" class="mobile-nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left">
|
|
<i class="fa fa-angle-left"></i>
|
|
</a>
|
|
|
|
<a rel="next prefetch" href="training/training.html" class="mobile-nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right">
|
|
<i class="fa fa-angle-right"></i>
|
|
</a>
|
|
|
|
<div style="clear: both"></div>
|
|
</nav>
|
|
</div>
|
|
</div>
|
|
|
|
<nav class="nav-wide-wrapper" aria-label="Page navigation">
|
|
<a rel="prev" href="inference/hub.html" class="nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left">
|
|
<i class="fa fa-angle-left"></i>
|
|
</a>
|
|
|
|
<a rel="next prefetch" href="training/training.html" class="nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right">
|
|
<i class="fa fa-angle-right"></i>
|
|
</a>
|
|
</nav>
|
|
|
|
</div>
|
|
|
|
|
|
|
|
|
|
<script>
|
|
window.playground_copyable = true;
|
|
</script>
|
|
|
|
|
|
<script src="elasticlunr.min.js"></script>
|
|
<script src="mark.min.js"></script>
|
|
<script src="searcher.js"></script>
|
|
|
|
<script src="clipboard.min.js"></script>
|
|
<script src="highlight.js"></script>
|
|
<script src="book.js"></script>
|
|
|
|
<!-- Custom JS scripts -->
|
|
|
|
|
|
</div>
|
|
</body>
|
|
</html>
|