nocaptions -> nospeech to match the paper figure

2022-09-23 15:45:32 +09:00
parent 61989529b7
commit 15ab548263
3 changed files with 27 additions and 39 deletions
@@ -178,8 +178,8 @@ class Tokenizer:

    @property
    @lru_cache()
-    def no_captions(self) -> int:
-        return self._get_single_token_id("<|nocaptions|>")
+    def no_speech(self) -> int:
+        return self._get_single_token_id("<|nospeech|>")

    @property
    @lru_cache()
@@ -283,7 +283,7 @@ def build_tokenizer(name: str = "gpt2"):
        "<|transcribe|>",
        "<|startoflm|>",
        "<|startofprev|>",
-        "<|nocaptions|>",
+        "<|nospeech|>",
        "<|notimestamps|>",
    ]