StableLM-2 models were updated to use GPT-2 tokenization. (#1847)

2024-03-14 22:01:36 +02:00 · 2024-03-14 22:01:36 +02:00 · e1f9c3776d
parent 3318fe30fb
commit e1f9c3776d
2 changed files with 1 additions and 13 deletions
--- a/candle-examples/examples/stable-lm/README.md
+++ b/candle-examples/examples/stable-lm/README.md
@ -10,11 +10,6 @@ order to be able to use it.

 Other available models are Stable-Code-3B, StableLM-2 and Zephyr variants.

-StableLM-2 uses a Tiktoken based GPT-3.5/GPT-4 tokenizer not supported by
-Candle, so to run it you can download a somewhat compatible
-[tokenizer.json](https://huggingface.co/Xenova/gpt-4/resolve/main/tokenizer.json?download=true)
-and pass it via the --tokenizer-file argument.
-
 ## Running some example

 ```bash
--- a/candle-examples/examples/stable-lm/main.rs
+++ b/candle-examples/examples/stable-lm/main.rs
@ -239,14 +239,7 @@ fn main() -> Result<()> {
    ));
    let tokenizer_filename = match args.tokenizer_file {
        Some(file) => std::path::PathBuf::from(file),
-        None => match args.which {
-            Which::V1Orig | Which::V1 | Which::V1Zephyr | Which::Code => {
-                repo.get("tokenizer.json")?
-            }
-            Which::V2 | Which::V2Zephyr => api
-                .model("lmz/candle-stablelm".to_string())
-                .get("tokenizer-gpt4.json")?,
-        },
+        None => repo.get("tokenizer.json")?,
    };
    let filenames = match args.weight_files {
        Some(files) => files