Update tokenizers version

This commit is contained in:
nathaniel 2023-08-14 14:37:23 -04:00 committed by Dilshod Tadjibaev
parent 8e55f60676
commit 8430d187ba
4 changed files with 6 additions and 14 deletions

View File

@ -25,7 +25,7 @@ burn-tch = { path = "../../burn-tch", optional = true }
burn-wgpu = { path = "../../burn-wgpu", optional = true } burn-wgpu = { path = "../../burn-wgpu", optional = true }
# Tokenizer # Tokenizer
tokenizers = { version = "0.13.3", default-features = false, features = [ tokenizers = { version = "0.13.4", default-features = false, features = [
"onig", "onig",
"http", "http",
] } ] }

View File

@ -51,12 +51,8 @@ impl Tokenizer for BertCasedTokenizer {
/// Converts a sequence of tokens back into a text string. /// Converts a sequence of tokens back into a text string.
fn decode(&self, tokens: &[usize]) -> String { fn decode(&self, tokens: &[usize]) -> String {
self.tokenizer let tokens = tokens.iter().map(|t| *t as u32).collect::<Vec<u32>>();
.decode( self.tokenizer.decode(&tokens, false).unwrap()
tokens.iter().map(|t| *t as u32).collect::<Vec<u32>>(),
false,
)
.unwrap()
} }
/// Gets the size of the BERT cased tokenizer's vocabulary. /// Gets the size of the BERT cased tokenizer's vocabulary.

View File

@ -17,7 +17,7 @@ burn-autodiff = {path = "../../burn-autodiff"}
burn-tch = {path = "../../burn-tch"} burn-tch = {path = "../../burn-tch"}
# Tokenizer # Tokenizer
tokenizers = {version = "0.13.3", default-features = false, features = [ tokenizers = {version = "0.13.4", default-features = false, features = [
"onig", "onig",
"http", "http",
]} ]}

View File

@ -44,12 +44,8 @@ impl Tokenizer for Gpt2Tokenizer {
} }
fn decode(&self, tokens: &[usize]) -> String { fn decode(&self, tokens: &[usize]) -> String {
self.tokenizer let tokens = tokens.iter().map(|t| *t as u32).collect::<Vec<u32>>();
.decode( self.tokenizer.decode(&tokens, false).unwrap()
tokens.iter().map(|t| *t as u32).collect::<Vec<u32>>(),
false,
)
.unwrap()
} }
fn vocab_size(&self) -> usize { fn vocab_size(&self) -> usize {