Update tokenizers version

This commit is contained in:
nathaniel 2023-08-14 14:37:23 -04:00 committed by Dilshod Tadjibaev
parent 8e55f60676
commit 8430d187ba
4 changed files with 6 additions and 14 deletions

View File

@ -25,7 +25,7 @@ burn-tch = { path = "../../burn-tch", optional = true }
burn-wgpu = { path = "../../burn-wgpu", optional = true }
# Tokenizer
tokenizers = { version = "0.13.3", default-features = false, features = [
tokenizers = { version = "0.13.4", default-features = false, features = [
"onig",
"http",
] }

View File

@ -51,12 +51,8 @@ impl Tokenizer for BertCasedTokenizer {
/// Converts a sequence of tokens back into a text string.
fn decode(&self, tokens: &[usize]) -> String {
self.tokenizer
.decode(
tokens.iter().map(|t| *t as u32).collect::<Vec<u32>>(),
false,
)
.unwrap()
let tokens = tokens.iter().map(|t| *t as u32).collect::<Vec<u32>>();
self.tokenizer.decode(&tokens, false).unwrap()
}
/// Gets the size of the BERT cased tokenizer's vocabulary.

View File

@ -17,7 +17,7 @@ burn-autodiff = {path = "../../burn-autodiff"}
burn-tch = {path = "../../burn-tch"}
# Tokenizer
tokenizers = {version = "0.13.3", default-features = false, features = [
tokenizers = {version = "0.13.4", default-features = false, features = [
"onig",
"http",
]}

View File

@ -44,12 +44,8 @@ impl Tokenizer for Gpt2Tokenizer {
}
fn decode(&self, tokens: &[usize]) -> String {
self.tokenizer
.decode(
tokens.iter().map(|t| *t as u32).collect::<Vec<u32>>(),
false,
)
.unwrap()
let tokens = tokens.iter().map(|t| *t as u32).collect::<Vec<u32>>();
self.tokenizer.decode(&tokens, false).unwrap()
}
fn vocab_size(&self) -> usize {