We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent ec54228 commit c5eb93fCopy full SHA for c5eb93f
tokenizers/src/models/bpe/trainer.rs
@@ -499,12 +499,9 @@ impl BpeTrainer {
499
part_b = rest;
500
}
501
502
- let new_token = format!("{part_a}{part_b}");
503
- // implement sentencepiece-like merge.
504
- // if this code were to be merged, integrate a way in the python bindings to communicate this variable
505
- // default should be 0/None to maintain previous behavior. 16 is the spm default.
506
507
// Insert new token if it does not already exist
+ let new_token = format!("{part_a}{part_b}");
508
let new_token_id = word_to_id
509
.get(&CompactString::from(&new_token))
510
.copied()
0 commit comments