nocaptions -> nospeech to match the paper figure

This commit is contained in:
Jong Wook Kim
2022-09-23 15:45:32 +09:00
parent 61989529b7
commit 15ab548263
3 changed files with 27 additions and 39 deletions
+3 -3
View File
@@ -178,8 +178,8 @@ class Tokenizer:
@property
@lru_cache()
def no_captions(self) -> int:
return self._get_single_token_id("<|nocaptions|>")
def no_speech(self) -> int:
return self._get_single_token_id("<|nospeech|>")
@property
@lru_cache()
@@ -283,7 +283,7 @@ def build_tokenizer(name: str = "gpt2"):
"<|transcribe|>",
"<|startoflm|>",
"<|startofprev|>",
"<|nocaptions|>",
"<|nospeech|>",
"<|notimestamps|>",
]