diff --git a/candle-examples/examples/moondream/main.rs b/candle-examples/examples/moondream/main.rs
index dfd83037..c7500ed9 100644
--- a/candle-examples/examples/moondream/main.rs
+++ b/candle-examples/examples/moondream/main.rs
@@ -194,6 +194,10 @@ struct Args {
     #[arg(long)]
     quantized: bool,
 
+    /// Use f16 precision for all the computations rather than f32.
+    #[arg(long)]
+    f16: bool,
+
     #[arg(long)]
     model_file: Option<String>,
 
@@ -283,7 +287,12 @@ async fn main() -> anyhow::Result<()> {
     let start = std::time::Instant::now();
     let device = candle_examples::device(args.cpu)?;
     let config = moondream::Config::v2();
-    let dtype = if device.is_cuda() && !args.quantized {
+    let dtype = if args.quantized {
+        if args.f16 {
+            anyhow::bail!("Quantized model does not support f16");
+        }
+        DType::F32
+    } else if device.is_cuda() || args.f16 {
         DType::F16
     } else {
         DType::F32