Skip to content

Commit d48895b

Browse files
Naman Goyalfacebook-github-bot
authored andcommitted
fixed word level extract features for roberta-xlmr
Summary: Pull Request resolved: fairinternal/fairseq-py#933 Differential Revision: D18783780 fbshipit-source-id: fa0a27fab886a5fa5be8d5f49151d1d9dd9775f1
1 parent 1c56594 commit d48895b

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

fairseq/models/roberta/alignment_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ def align_bpe_to_words(roberta, bpe_tokens: torch.LongTensor, other_tokens: List
2222
List[str]: mapping from *other_tokens* to corresponding *bpe_tokens*.
2323
"""
2424
assert bpe_tokens.dim() == 1
25+
assert bpe_tokens[0] == 0
2526

2627
def clean(text):
2728
return text.strip()
@@ -32,7 +33,6 @@ def clean(text):
3233
other_tokens = [clean(str(o)) for o in other_tokens]
3334

3435
# strip leading <s>
35-
assert bpe_tokens[0] == '<s>'
3636
bpe_tokens = bpe_tokens[1:]
3737
assert ''.join(bpe_tokens) == ''.join(other_tokens)
3838

0 commit comments

Comments
 (0)