Introduce CPU support and auto-enable CUDA for NVIDIA cards
This commit is contained in:
parent
c191107d4b
commit
ad3c35eaae
|
@ -686,6 +686,25 @@ dependencies = [
|
|||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-deque"
|
||||
version = "0.8.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d"
|
||||
dependencies = [
|
||||
"crossbeam-epoch",
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-epoch"
|
||||
version = "0.9.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
|
||||
dependencies = [
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-utils"
|
||||
version = "0.8.20"
|
||||
|
@ -1747,6 +1766,7 @@ dependencies = [
|
|||
"iced_runtime",
|
||||
"log",
|
||||
"rustc-hash",
|
||||
"sysinfo",
|
||||
"thiserror",
|
||||
"tracing",
|
||||
"wasm-bindgen-futures",
|
||||
|
@ -2173,6 +2193,15 @@ dependencies = [
|
|||
"memoffset",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ntapi"
|
||||
version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e8a3895c6391c39d7fe7ebc444a87eb2991b2a0bc718fdabd071eec617fc68e4"
|
||||
dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-traits"
|
||||
version = "0.2.19"
|
||||
|
@ -2886,6 +2915,26 @@ version = "0.6.2"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "20675572f6f24e9e76ef639bc5552774ed45f1c30e2951e1e99c59888861c539"
|
||||
|
||||
[[package]]
|
||||
name = "rayon"
|
||||
version = "1.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa"
|
||||
dependencies = [
|
||||
"either",
|
||||
"rayon-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rayon-core"
|
||||
version = "1.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
|
||||
dependencies = [
|
||||
"crossbeam-deque",
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rctree"
|
||||
version = "0.5.0"
|
||||
|
@ -3548,6 +3597,21 @@ dependencies = [
|
|||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sysinfo"
|
||||
version = "0.30.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0a5b4ddaee55fb2bea2bf0e5000747e5f5c0de765e5a5ff87f4cd106439f4bb3"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"core-foundation-sys",
|
||||
"libc",
|
||||
"ntapi",
|
||||
"once_cell",
|
||||
"rayon",
|
||||
"windows",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "system-configuration"
|
||||
version = "0.5.1"
|
||||
|
|
|
@ -8,7 +8,7 @@ publish = false
|
|||
[dependencies]
|
||||
iced.git = "https://github.com/iced-rs/iced.git"
|
||||
iced.rev = "c63a81f68396ccd53ab757f1bdaeee6ca998d168"
|
||||
iced.features = ["tokio", "svg", "debug"]
|
||||
iced.features = ["tokio", "svg", "system", "debug"]
|
||||
|
||||
tokio.version = "1.38"
|
||||
tokio.features = ["fs", "io-util", "process", "time"]
|
||||
|
|
|
@ -14,13 +14,21 @@ pub struct Assistant {
|
|||
_container: Arc<Container>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum Backend {
|
||||
CPU,
|
||||
CUDA,
|
||||
}
|
||||
|
||||
impl Assistant {
|
||||
const LLAMA_CPP_CONTAINER: &'static str = "ghcr.io/ggerganov/llama.cpp:server-cuda--b1-a59f8fd";
|
||||
const LLAMA_CPP_CONTAINER_CPU: &'static str = "ghcr.io/ggerganov/llama.cpp:server--b1-a59f8fd";
|
||||
const LLAMA_CPP_CONTAINER_CUDA: &'static str =
|
||||
"ghcr.io/ggerganov/llama.cpp:server-cuda--b1-a59f8fd";
|
||||
|
||||
const MODELS_DIR: &'static str = "./models";
|
||||
const HOST_PORT: u64 = 8080;
|
||||
|
||||
pub fn boot(file: File) -> impl Stream<Item = Result<BootEvent, Error>> {
|
||||
pub fn boot(file: File, backend: Backend) -> impl Stream<Item = Result<BootEvent, Error>> {
|
||||
iced::stream::try_channel(1, move |mut sender| async move {
|
||||
let _ = fs::create_dir_all(Self::MODELS_DIR).await?;
|
||||
|
||||
|
@ -90,20 +98,37 @@ impl Assistant {
|
|||
)))
|
||||
.await;
|
||||
|
||||
let mut docker = process::Command::new("docker")
|
||||
.args(
|
||||
let command = match backend {
|
||||
Backend::CPU => {
|
||||
format!(
|
||||
"create --rm -p {port}:80 -v {volume}:/models \
|
||||
{container} --model models/{filename} --conversation \
|
||||
--port 80 --host 0.0.0.0",
|
||||
filename = file.name,
|
||||
container = Self::LLAMA_CPP_CONTAINER_CPU,
|
||||
port = Self::HOST_PORT,
|
||||
volume = Self::MODELS_DIR,
|
||||
)
|
||||
}
|
||||
Backend::CUDA => {
|
||||
format!(
|
||||
"create --rm --gpus all -p {port}:80 -v {volume}:/models \
|
||||
{container} --model models/{filename} --conversation \
|
||||
--port 80 --host 0.0.0.0 --gpu-layers 40",
|
||||
filename = file.name,
|
||||
container = Self::LLAMA_CPP_CONTAINER,
|
||||
container = Self::LLAMA_CPP_CONTAINER_CUDA,
|
||||
port = Self::HOST_PORT,
|
||||
volume = Self::MODELS_DIR,
|
||||
)
|
||||
.split(' ')
|
||||
.map(str::trim)
|
||||
.filter(|arg| !arg.is_empty()),
|
||||
}
|
||||
};
|
||||
|
||||
let mut docker = process::Command::new("docker")
|
||||
.args(
|
||||
command
|
||||
.split(' ')
|
||||
.map(str::trim)
|
||||
.filter(|arg| !arg.is_empty()),
|
||||
)
|
||||
.kill_on_drop(true)
|
||||
.stdout(std::process::Stdio::piped())
|
||||
|
|
17
src/main.rs
17
src/main.rs
|
@ -7,6 +7,7 @@ use crate::screen::conversation;
|
|||
use crate::screen::search;
|
||||
use crate::screen::Screen;
|
||||
|
||||
use iced::system;
|
||||
use iced::{Element, Subscription, Task, Theme};
|
||||
|
||||
pub fn main() -> iced::Result {
|
||||
|
@ -19,6 +20,7 @@ pub fn main() -> iced::Result {
|
|||
|
||||
struct Chat {
|
||||
screen: Screen,
|
||||
system: Option<system::Information>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
|
@ -26,6 +28,7 @@ enum Message {
|
|||
Search(search::Message),
|
||||
Boot(boot::Message),
|
||||
Conversation(conversation::Message),
|
||||
SystemFetched(system::Information),
|
||||
}
|
||||
|
||||
impl Chat {
|
||||
|
@ -35,8 +38,12 @@ impl Chat {
|
|||
(
|
||||
Self {
|
||||
screen: Screen::Search(search),
|
||||
system: None,
|
||||
},
|
||||
task.map(Message::Search),
|
||||
Task::batch([
|
||||
system::fetch_information().map(Message::SystemFetched),
|
||||
task.map(Message::Search),
|
||||
]),
|
||||
)
|
||||
}
|
||||
|
||||
|
@ -57,7 +64,8 @@ impl Chat {
|
|||
match event {
|
||||
search::Event::None => {}
|
||||
search::Event::ModelSelected(model) => {
|
||||
self.screen = Screen::Boot(screen::Boot::new(model));
|
||||
self.screen =
|
||||
Screen::Boot(screen::Boot::new(model, self.system.as_ref()));
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -102,6 +110,11 @@ impl Chat {
|
|||
Task::none()
|
||||
}
|
||||
}
|
||||
Message::SystemFetched(system) => {
|
||||
self.system = Some(system);
|
||||
|
||||
Task::none()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,16 +1,18 @@
|
|||
use crate::assistant::{Assistant, BootEvent, Error, File, Model};
|
||||
use crate::assistant::{Assistant, Backend, BootEvent, Error, File, Model};
|
||||
|
||||
use iced::alignment::{self, Alignment};
|
||||
use iced::system;
|
||||
use iced::task::{self, Task};
|
||||
use iced::time::{self, Duration, Instant};
|
||||
use iced::widget::{
|
||||
button, center, column, container, progress_bar, row, scrollable, stack, text, value,
|
||||
button, center, column, container, progress_bar, row, scrollable, stack, text, toggler, value,
|
||||
};
|
||||
use iced::{Border, Element, Font, Length, Padding, Subscription, Theme};
|
||||
|
||||
pub struct Boot {
|
||||
model: Model,
|
||||
state: State,
|
||||
use_cuda: bool,
|
||||
}
|
||||
|
||||
enum State {
|
||||
|
@ -31,6 +33,7 @@ pub enum Message {
|
|||
Tick(Instant),
|
||||
Cancel,
|
||||
Abort,
|
||||
UseCUDAToggled(bool),
|
||||
}
|
||||
|
||||
pub enum Event {
|
||||
|
@ -40,10 +43,15 @@ pub enum Event {
|
|||
}
|
||||
|
||||
impl Boot {
|
||||
pub fn new(model: Model) -> Self {
|
||||
pub fn new(model: Model, system: Option<&system::Information>) -> Self {
|
||||
let use_cuda = system
|
||||
.map(|system| system.graphics_adapter.contains("NVIDIA"))
|
||||
.unwrap_or_default();
|
||||
|
||||
Self {
|
||||
model: model.clone(),
|
||||
state: State::Idle,
|
||||
use_cuda,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -60,7 +68,18 @@ impl Boot {
|
|||
pub fn update(&mut self, message: Message) -> (Task<Message>, Event) {
|
||||
match message {
|
||||
Message::Boot(file) => {
|
||||
let (task, handle) = Task::run(Assistant::boot(file), Message::Booting).abortable();
|
||||
let (task, handle) = Task::run(
|
||||
Assistant::boot(
|
||||
file,
|
||||
if self.use_cuda {
|
||||
Backend::CUDA
|
||||
} else {
|
||||
Backend::CPU
|
||||
},
|
||||
),
|
||||
Message::Booting,
|
||||
)
|
||||
.abortable();
|
||||
|
||||
self.state = State::Booting {
|
||||
logs: Vec::new(),
|
||||
|
@ -113,6 +132,11 @@ impl Boot {
|
|||
(Task::none(), Event::None)
|
||||
}
|
||||
Message::Abort => (Task::none(), Event::Aborted),
|
||||
Message::UseCUDAToggled(use_cuda) => {
|
||||
self.use_cuda = use_cuda;
|
||||
|
||||
(Task::none(), Event::None)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -125,17 +149,25 @@ impl Boot {
|
|||
|
||||
let state: Element<_> = match &self.state {
|
||||
State::Idle => {
|
||||
let abort = container(
|
||||
button("Abort")
|
||||
.style(button::danger)
|
||||
.on_press(Message::Abort),
|
||||
let use_cuda = toggler(
|
||||
Some("Use CUDA".to_owned()),
|
||||
self.use_cuda,
|
||||
Message::UseCUDAToggled,
|
||||
)
|
||||
.width(Length::Fill)
|
||||
.align_x(alignment::Horizontal::Right);
|
||||
.width(Length::Shrink);
|
||||
|
||||
let abort = button("Abort")
|
||||
.style(button::danger)
|
||||
.on_press(Message::Abort);
|
||||
|
||||
column![
|
||||
row![text("Select a file to boot:").width(Length::Fill), abort]
|
||||
.align_items(Alignment::Center),
|
||||
row![
|
||||
text("Select a file to boot:").width(Length::Fill),
|
||||
use_cuda,
|
||||
abort
|
||||
]
|
||||
.spacing(10)
|
||||
.align_items(Alignment::Center),
|
||||
scrollable(
|
||||
column(self.model.files.iter().map(|file| {
|
||||
button(text(&file.name).font(Font::MONOSPACE))
|
||||
|
|
Loading…
Reference in New Issue