mirror of https://github.com/tracel-ai/burn.git
Update tokenizers version
This commit is contained in:
parent
8e55f60676
commit
8430d187ba
|
@ -25,7 +25,7 @@ burn-tch = { path = "../../burn-tch", optional = true }
|
||||||
burn-wgpu = { path = "../../burn-wgpu", optional = true }
|
burn-wgpu = { path = "../../burn-wgpu", optional = true }
|
||||||
|
|
||||||
# Tokenizer
|
# Tokenizer
|
||||||
tokenizers = { version = "0.13.3", default-features = false, features = [
|
tokenizers = { version = "0.13.4", default-features = false, features = [
|
||||||
"onig",
|
"onig",
|
||||||
"http",
|
"http",
|
||||||
] }
|
] }
|
||||||
|
|
|
@ -51,12 +51,8 @@ impl Tokenizer for BertCasedTokenizer {
|
||||||
|
|
||||||
/// Converts a sequence of tokens back into a text string.
|
/// Converts a sequence of tokens back into a text string.
|
||||||
fn decode(&self, tokens: &[usize]) -> String {
|
fn decode(&self, tokens: &[usize]) -> String {
|
||||||
self.tokenizer
|
let tokens = tokens.iter().map(|t| *t as u32).collect::<Vec<u32>>();
|
||||||
.decode(
|
self.tokenizer.decode(&tokens, false).unwrap()
|
||||||
tokens.iter().map(|t| *t as u32).collect::<Vec<u32>>(),
|
|
||||||
false,
|
|
||||||
)
|
|
||||||
.unwrap()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Gets the size of the BERT cased tokenizer's vocabulary.
|
/// Gets the size of the BERT cased tokenizer's vocabulary.
|
||||||
|
|
|
@ -17,7 +17,7 @@ burn-autodiff = {path = "../../burn-autodiff"}
|
||||||
burn-tch = {path = "../../burn-tch"}
|
burn-tch = {path = "../../burn-tch"}
|
||||||
|
|
||||||
# Tokenizer
|
# Tokenizer
|
||||||
tokenizers = {version = "0.13.3", default-features = false, features = [
|
tokenizers = {version = "0.13.4", default-features = false, features = [
|
||||||
"onig",
|
"onig",
|
||||||
"http",
|
"http",
|
||||||
]}
|
]}
|
||||||
|
|
|
@ -44,12 +44,8 @@ impl Tokenizer for Gpt2Tokenizer {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn decode(&self, tokens: &[usize]) -> String {
|
fn decode(&self, tokens: &[usize]) -> String {
|
||||||
self.tokenizer
|
let tokens = tokens.iter().map(|t| *t as u32).collect::<Vec<u32>>();
|
||||||
.decode(
|
self.tokenizer.decode(&tokens, false).unwrap()
|
||||||
tokens.iter().map(|t| *t as u32).collect::<Vec<u32>>(),
|
|
||||||
false,
|
|
||||||
)
|
|
||||||
.unwrap()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn vocab_size(&self) -> usize {
|
fn vocab_size(&self) -> usize {
|
||||||
|
|
Loading…
Reference in New Issue