Decoding improvements (#1033)
* suppress task tokens (transcribe/translate) * not ignoring the last segment ending with one timestamp
This commit is contained in:
@@ -160,6 +160,14 @@ class Tokenizer:
|
||||
def eot(self) -> int:
|
||||
return self.tokenizer.eos_token_id
|
||||
|
||||
@cached_property
|
||||
def transcribe(self) -> int:
|
||||
return self._get_single_token_id("<|transcribe|>")
|
||||
|
||||
@cached_property
|
||||
def translate(self) -> int:
|
||||
return self._get_single_token_id("<|translate|>")
|
||||
|
||||
@cached_property
|
||||
def sot(self) -> int:
|
||||
return self._get_single_token_id("<|startoftranscript|>")
|
||||
|
||||
Reference in New Issue
Block a user