mirror of https://github.com/tracel-ai/burn.git
Update tokenizers version
This commit is contained in:
parent
8e55f60676
commit
8430d187ba
|
@ -25,7 +25,7 @@ burn-tch = { path = "../../burn-tch", optional = true }
|
|||
burn-wgpu = { path = "../../burn-wgpu", optional = true }
|
||||
|
||||
# Tokenizer
|
||||
tokenizers = { version = "0.13.3", default-features = false, features = [
|
||||
tokenizers = { version = "0.13.4", default-features = false, features = [
|
||||
"onig",
|
||||
"http",
|
||||
] }
|
||||
|
|
|
@ -51,12 +51,8 @@ impl Tokenizer for BertCasedTokenizer {
|
|||
|
||||
/// Converts a sequence of tokens back into a text string.
|
||||
fn decode(&self, tokens: &[usize]) -> String {
|
||||
self.tokenizer
|
||||
.decode(
|
||||
tokens.iter().map(|t| *t as u32).collect::<Vec<u32>>(),
|
||||
false,
|
||||
)
|
||||
.unwrap()
|
||||
let tokens = tokens.iter().map(|t| *t as u32).collect::<Vec<u32>>();
|
||||
self.tokenizer.decode(&tokens, false).unwrap()
|
||||
}
|
||||
|
||||
/// Gets the size of the BERT cased tokenizer's vocabulary.
|
||||
|
|
|
@ -17,7 +17,7 @@ burn-autodiff = {path = "../../burn-autodiff"}
|
|||
burn-tch = {path = "../../burn-tch"}
|
||||
|
||||
# Tokenizer
|
||||
tokenizers = {version = "0.13.3", default-features = false, features = [
|
||||
tokenizers = {version = "0.13.4", default-features = false, features = [
|
||||
"onig",
|
||||
"http",
|
||||
]}
|
||||
|
|
|
@ -44,12 +44,8 @@ impl Tokenizer for Gpt2Tokenizer {
|
|||
}
|
||||
|
||||
fn decode(&self, tokens: &[usize]) -> String {
|
||||
self.tokenizer
|
||||
.decode(
|
||||
tokens.iter().map(|t| *t as u32).collect::<Vec<u32>>(),
|
||||
false,
|
||||
)
|
||||
.unwrap()
|
||||
let tokens = tokens.iter().map(|t| *t as u32).collect::<Vec<u32>>();
|
||||
self.tokenizer.decode(&tokens, false).unwrap()
|
||||
}
|
||||
|
||||
fn vocab_size(&self) -> usize {
|
||||
|
|
Loading…
Reference in New Issue