Introduce CPU support and auto-enable CUDA for NVIDIA cards

This commit is contained in:
Héctor Ramón Jiménez 2024-07-11 10:01:46 +02:00
parent c191107d4b
commit ad3c35eaae
No known key found for this signature in database
GPG Key ID: 7CC46565708259A7
5 changed files with 157 additions and 23 deletions

64
Cargo.lock generated
View File

@ -686,6 +686,25 @@ dependencies = [
"cfg-if",
]
[[package]]
name = "crossbeam-deque"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d"
dependencies = [
"crossbeam-epoch",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.20"
@ -1747,6 +1766,7 @@ dependencies = [
"iced_runtime",
"log",
"rustc-hash",
"sysinfo",
"thiserror",
"tracing",
"wasm-bindgen-futures",
@ -2173,6 +2193,15 @@ dependencies = [
"memoffset",
]
[[package]]
name = "ntapi"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8a3895c6391c39d7fe7ebc444a87eb2991b2a0bc718fdabd071eec617fc68e4"
dependencies = [
"winapi",
]
[[package]]
name = "num-traits"
version = "0.2.19"
@ -2886,6 +2915,26 @@ version = "0.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "20675572f6f24e9e76ef639bc5552774ed45f1c30e2951e1e99c59888861c539"
[[package]]
name = "rayon"
version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa"
dependencies = [
"either",
"rayon-core",
]
[[package]]
name = "rayon-core"
version = "1.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
dependencies = [
"crossbeam-deque",
"crossbeam-utils",
]
[[package]]
name = "rctree"
version = "0.5.0"
@ -3548,6 +3597,21 @@ dependencies = [
"libc",
]
[[package]]
name = "sysinfo"
version = "0.30.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0a5b4ddaee55fb2bea2bf0e5000747e5f5c0de765e5a5ff87f4cd106439f4bb3"
dependencies = [
"cfg-if",
"core-foundation-sys",
"libc",
"ntapi",
"once_cell",
"rayon",
"windows",
]
[[package]]
name = "system-configuration"
version = "0.5.1"

View File

@ -8,7 +8,7 @@ publish = false
[dependencies]
iced.git = "https://github.com/iced-rs/iced.git"
iced.rev = "c63a81f68396ccd53ab757f1bdaeee6ca998d168"
iced.features = ["tokio", "svg", "debug"]
iced.features = ["tokio", "svg", "system", "debug"]
tokio.version = "1.38"
tokio.features = ["fs", "io-util", "process", "time"]

View File

@ -14,13 +14,21 @@ pub struct Assistant {
_container: Arc<Container>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Backend {
CPU,
CUDA,
}
impl Assistant {
const LLAMA_CPP_CONTAINER: &'static str = "ghcr.io/ggerganov/llama.cpp:server-cuda--b1-a59f8fd";
const LLAMA_CPP_CONTAINER_CPU: &'static str = "ghcr.io/ggerganov/llama.cpp:server--b1-a59f8fd";
const LLAMA_CPP_CONTAINER_CUDA: &'static str =
"ghcr.io/ggerganov/llama.cpp:server-cuda--b1-a59f8fd";
const MODELS_DIR: &'static str = "./models";
const HOST_PORT: u64 = 8080;
pub fn boot(file: File) -> impl Stream<Item = Result<BootEvent, Error>> {
pub fn boot(file: File, backend: Backend) -> impl Stream<Item = Result<BootEvent, Error>> {
iced::stream::try_channel(1, move |mut sender| async move {
let _ = fs::create_dir_all(Self::MODELS_DIR).await?;
@ -90,20 +98,37 @@ impl Assistant {
)))
.await;
let mut docker = process::Command::new("docker")
.args(
let command = match backend {
Backend::CPU => {
format!(
"create --rm -p {port}:80 -v {volume}:/models \
{container} --model models/{filename} --conversation \
--port 80 --host 0.0.0.0",
filename = file.name,
container = Self::LLAMA_CPP_CONTAINER_CPU,
port = Self::HOST_PORT,
volume = Self::MODELS_DIR,
)
}
Backend::CUDA => {
format!(
"create --rm --gpus all -p {port}:80 -v {volume}:/models \
{container} --model models/{filename} --conversation \
--port 80 --host 0.0.0.0 --gpu-layers 40",
filename = file.name,
container = Self::LLAMA_CPP_CONTAINER,
container = Self::LLAMA_CPP_CONTAINER_CUDA,
port = Self::HOST_PORT,
volume = Self::MODELS_DIR,
)
.split(' ')
.map(str::trim)
.filter(|arg| !arg.is_empty()),
}
};
let mut docker = process::Command::new("docker")
.args(
command
.split(' ')
.map(str::trim)
.filter(|arg| !arg.is_empty()),
)
.kill_on_drop(true)
.stdout(std::process::Stdio::piped())

View File

@ -7,6 +7,7 @@ use crate::screen::conversation;
use crate::screen::search;
use crate::screen::Screen;
use iced::system;
use iced::{Element, Subscription, Task, Theme};
pub fn main() -> iced::Result {
@ -19,6 +20,7 @@ pub fn main() -> iced::Result {
struct Chat {
screen: Screen,
system: Option<system::Information>,
}
#[derive(Debug, Clone)]
@ -26,6 +28,7 @@ enum Message {
Search(search::Message),
Boot(boot::Message),
Conversation(conversation::Message),
SystemFetched(system::Information),
}
impl Chat {
@ -35,8 +38,12 @@ impl Chat {
(
Self {
screen: Screen::Search(search),
system: None,
},
task.map(Message::Search),
Task::batch([
system::fetch_information().map(Message::SystemFetched),
task.map(Message::Search),
]),
)
}
@ -57,7 +64,8 @@ impl Chat {
match event {
search::Event::None => {}
search::Event::ModelSelected(model) => {
self.screen = Screen::Boot(screen::Boot::new(model));
self.screen =
Screen::Boot(screen::Boot::new(model, self.system.as_ref()));
}
};
@ -102,6 +110,11 @@ impl Chat {
Task::none()
}
}
Message::SystemFetched(system) => {
self.system = Some(system);
Task::none()
}
}
}

View File

@ -1,16 +1,18 @@
use crate::assistant::{Assistant, BootEvent, Error, File, Model};
use crate::assistant::{Assistant, Backend, BootEvent, Error, File, Model};
use iced::alignment::{self, Alignment};
use iced::system;
use iced::task::{self, Task};
use iced::time::{self, Duration, Instant};
use iced::widget::{
button, center, column, container, progress_bar, row, scrollable, stack, text, value,
button, center, column, container, progress_bar, row, scrollable, stack, text, toggler, value,
};
use iced::{Border, Element, Font, Length, Padding, Subscription, Theme};
pub struct Boot {
model: Model,
state: State,
use_cuda: bool,
}
enum State {
@ -31,6 +33,7 @@ pub enum Message {
Tick(Instant),
Cancel,
Abort,
UseCUDAToggled(bool),
}
pub enum Event {
@ -40,10 +43,15 @@ pub enum Event {
}
impl Boot {
pub fn new(model: Model) -> Self {
pub fn new(model: Model, system: Option<&system::Information>) -> Self {
let use_cuda = system
.map(|system| system.graphics_adapter.contains("NVIDIA"))
.unwrap_or_default();
Self {
model: model.clone(),
state: State::Idle,
use_cuda,
}
}
@ -60,7 +68,18 @@ impl Boot {
pub fn update(&mut self, message: Message) -> (Task<Message>, Event) {
match message {
Message::Boot(file) => {
let (task, handle) = Task::run(Assistant::boot(file), Message::Booting).abortable();
let (task, handle) = Task::run(
Assistant::boot(
file,
if self.use_cuda {
Backend::CUDA
} else {
Backend::CPU
},
),
Message::Booting,
)
.abortable();
self.state = State::Booting {
logs: Vec::new(),
@ -113,6 +132,11 @@ impl Boot {
(Task::none(), Event::None)
}
Message::Abort => (Task::none(), Event::Aborted),
Message::UseCUDAToggled(use_cuda) => {
self.use_cuda = use_cuda;
(Task::none(), Event::None)
}
}
}
@ -125,17 +149,25 @@ impl Boot {
let state: Element<_> = match &self.state {
State::Idle => {
let abort = container(
button("Abort")
.style(button::danger)
.on_press(Message::Abort),
let use_cuda = toggler(
Some("Use CUDA".to_owned()),
self.use_cuda,
Message::UseCUDAToggled,
)
.width(Length::Fill)
.align_x(alignment::Horizontal::Right);
.width(Length::Shrink);
let abort = button("Abort")
.style(button::danger)
.on_press(Message::Abort);
column![
row![text("Select a file to boot:").width(Length::Fill), abort]
.align_items(Alignment::Center),
row![
text("Select a file to boot:").width(Length::Fill),
use_cuda,
abort
]
.spacing(10)
.align_items(Alignment::Center),
scrollable(
column(self.model.files.iter().map(|file| {
button(text(&file.name).font(Font::MONOSPACE))