Add the import script for the T5 tokenizer. (#2399)

This commit is contained in:
Laurent Mazare 2024-08-05 20:03:31 +01:00 committed by GitHub
parent dfdce2b602
commit 59bbc0d287
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 6 additions and 0 deletions

View File

@ -0,0 +1,6 @@
from transformers import AutoModelForCausalLM, AutoTokenizer
BASE_MODEL = "google/t5-v1_1-xxl"
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
# The tokenizer will be saved in /tmp/tokenizer/tokenizer.json
tokenizer.save_pretrained("/tmp/tokenizer/")