Support using Vosk instead of Whisper for transcripts

For Esperanto, Vosk is used automatically, since Whisper doesn't
support Esperanto
This commit is contained in:
Benno Lang 2025-01-23 15:50:05 +10:30
parent 62f4b7bedd
commit ff23de60bb
3 changed files with 21 additions and 10 deletions

View file

@ -11,6 +11,7 @@ class Options:
filter = None
episodes = 'new'
numEpisodes = 1
tts = 'whisper'
def outputHelp():
print("Usage: ./process [args] [filter [age] [episodes]]")
@ -26,6 +27,8 @@ def outputHelp():
print(" Skip generating transcripts and translations of podcast episodes")
print("--verbose")
print(" Output a detailed report of actions as they are being taken")
print("--vosk")
print(" Use Vosk instead of Whisper to generate transcripts")
print("")
print("filter:")
print(" If specified, only configured podcasts which match the filter will be processed")
@ -61,6 +64,8 @@ def read():
opts.generate = False
elif arg == "--verbose":
opts.verbose = True
elif arg == '--vosk':
opts.tts = 'vosk'
elif arg == 'new':
opts.episodes = 'new'
elif arg == 'old':

View file

@ -109,7 +109,7 @@ def hasTranslation(audioFile):
translationFile = getTranslationFilename(audioFile)
return os.path.isfile(translationFile)
def generateFromAudio(audioFile, task):
def generateFromAudio(audioFile, task, options):
if task == 'transcribe':
newExt = 'transcript.txt'
else:
@ -119,14 +119,18 @@ def generateFromAudio(audioFile, task):
langCode = getLangCode(audioFile)
fileParts = audioFile.split('/')
fileName = fileParts.pop()
newFileName = replaceExtension(fileName, newExt)
dir = '/'.join(fileParts)
os.chdir(dir)
cmd = f"whisper {fileName} --model medium --language {langCode} --task {task} --output_format vtt --fp16 False"
os.system(cmd)
if options.tts == 'vosk' or langCode == 'eo':
cmd = f"vosk-transcriber -i {fileName} -l {langCode} -t srt -o {newFileName}"
os.system(cmd)
else:
cmd = f"whisper {fileName} --model medium --language {langCode} --task {task} --output_format srt --fp16 False"
os.system(cmd)
# rename transcript/translation file generated
generatedFile = replaceExtension(fileName, 'vtt')
newFileName = replaceExtension(fileName, newExt)
os.rename(generatedFile, newFileName)
# rename transcript/translation file generated
generatedFile = replaceExtension(fileName, 'vtt')
os.rename(generatedFile, newFileName)
os.chdir('../../..')

View file

@ -30,11 +30,13 @@ for audioFile in audioFiles:
if not files.hasTranscript(audioFile):
skipped = False
print(f"Transcribing audio: {audioFile} ({audioLength})", flush=True)
files.generateFromAudio(audioFile, 'transcribe')
if language in config['translate'] and not files.hasTranslation(audioFile):
files.generateFromAudio(audioFile, 'transcribe', options)
if (language in config['translate'] and not files.hasTranslation(audioFile)
and language != 'eo' and options.tts != 'vosk'
):
skipped = False
print(f"Translating audio: {audioFile} ({audioLength})", flush=True)
files.generateFromAudio(audioFile, 'translate')
files.generateFromAudio(audioFile, 'translate', options)
if skipped and options.verbose:
print(f"Skipped audio: {audioFile} ({audioLength})")