2023-08-13 19:53:34 +08:00
|
|
|
#[cfg(feature = "accelerate")]
|
|
|
|
extern crate accelerate_src;
|
|
|
|
|
2023-07-08 19:43:56 +08:00
|
|
|
#[cfg(feature = "mkl")]
|
|
|
|
extern crate intel_mkl_src;
|
|
|
|
|
|
|
|
use anyhow::Result;
|
2024-02-26 01:11:47 +08:00
|
|
|
use candle_core::{Device, Module, Tensor};
|
|
|
|
|
|
|
|
use candle_core::quantized::{QMatMul, QTensor};
|
2023-07-08 19:43:56 +08:00
|
|
|
|
|
|
|
fn main() -> Result<()> {
|
|
|
|
let device = Device::new_cuda(0)?;
|
2024-02-26 15:42:44 +08:00
|
|
|
let q = Tensor::randn(0f32, 1.0, (72, 256), &device)?;
|
2024-02-26 01:11:47 +08:00
|
|
|
let q_cpu = q.to_device(&Device::Cpu)?;
|
2024-02-26 15:42:44 +08:00
|
|
|
let q = QTensor::quantize(&q, candle_core::quantized::GgmlDType::Q8K)?;
|
2024-02-26 01:11:47 +08:00
|
|
|
let q = QMatMul::from_qtensor(q)?;
|
2024-02-26 15:42:44 +08:00
|
|
|
let x = Tensor::randn(0f32, 1.0, (5, 256), &device)?;
|
2024-02-26 01:11:47 +08:00
|
|
|
let res_q_cuda = q.forward(&x)?;
|
|
|
|
println!("{res_q_cuda}");
|
2023-08-24 17:16:37 +08:00
|
|
|
|
2024-02-26 15:42:44 +08:00
|
|
|
let q_cpu = QTensor::quantize(&q_cpu, candle_core::quantized::GgmlDType::Q8K)?;
|
2024-02-26 01:11:47 +08:00
|
|
|
let q_cpu_tensor = q_cpu.dequantize(&Device::Cpu)?;
|
|
|
|
let q_cpu = QMatMul::from_qtensor(q_cpu)?;
|
|
|
|
let x_cpu = x.to_device(&Device::Cpu)?;
|
|
|
|
let res_q_cpu = q_cpu.forward(&x_cpu)?;
|
|
|
|
println!("{res_q_cpu}");
|
|
|
|
|
|
|
|
let res_mm = x_cpu.matmul(&q_cpu_tensor.t()?)?;
|
|
|
|
let diff = (res_mm - res_q_cuda.to_device(&Device::Cpu))?
|
|
|
|
.abs()?
|
|
|
|
.flatten_all()?
|
|
|
|
.max(0)?;
|
|
|
|
println!("{diff}");
|
2023-07-08 19:43:56 +08:00
|
|
|
Ok(())
|
|
|
|
}
|