Support using Vosk instead of Whisper for transcripts
For Esperanto, Vosk is used automatically, since Whisper doesn't support Esperanto
This commit is contained in:
parent
62f4b7bedd
commit
ff23de60bb
|
@ -11,6 +11,7 @@ class Options:
|
|||
filter = None
|
||||
episodes = 'new'
|
||||
numEpisodes = 1
|
||||
tts = 'whisper'
|
||||
|
||||
def outputHelp():
|
||||
print("Usage: ./process [args] [filter [age] [episodes]]")
|
||||
|
@ -26,6 +27,8 @@ def outputHelp():
|
|||
print(" Skip generating transcripts and translations of podcast episodes")
|
||||
print("--verbose")
|
||||
print(" Output a detailed report of actions as they are being taken")
|
||||
print("--vosk")
|
||||
print(" Use Vosk instead of Whisper to generate transcripts")
|
||||
print("")
|
||||
print("filter:")
|
||||
print(" If specified, only configured podcasts which match the filter will be processed")
|
||||
|
@ -61,6 +64,8 @@ def read():
|
|||
opts.generate = False
|
||||
elif arg == "--verbose":
|
||||
opts.verbose = True
|
||||
elif arg == '--vosk':
|
||||
opts.tts = 'vosk'
|
||||
elif arg == 'new':
|
||||
opts.episodes = 'new'
|
||||
elif arg == 'old':
|
||||
|
|
18
lib/files.py
18
lib/files.py
|
@ -109,7 +109,7 @@ def hasTranslation(audioFile):
|
|||
translationFile = getTranslationFilename(audioFile)
|
||||
return os.path.isfile(translationFile)
|
||||
|
||||
def generateFromAudio(audioFile, task):
|
||||
def generateFromAudio(audioFile, task, options):
|
||||
if task == 'transcribe':
|
||||
newExt = 'transcript.txt'
|
||||
else:
|
||||
|
@ -119,14 +119,18 @@ def generateFromAudio(audioFile, task):
|
|||
langCode = getLangCode(audioFile)
|
||||
fileParts = audioFile.split('/')
|
||||
fileName = fileParts.pop()
|
||||
newFileName = replaceExtension(fileName, newExt)
|
||||
dir = '/'.join(fileParts)
|
||||
os.chdir(dir)
|
||||
|
||||
cmd = f"whisper {fileName} --model medium --language {langCode} --task {task} --output_format vtt --fp16 False"
|
||||
os.system(cmd)
|
||||
if options.tts == 'vosk' or langCode == 'eo':
|
||||
cmd = f"vosk-transcriber -i {fileName} -l {langCode} -t srt -o {newFileName}"
|
||||
os.system(cmd)
|
||||
else:
|
||||
cmd = f"whisper {fileName} --model medium --language {langCode} --task {task} --output_format srt --fp16 False"
|
||||
os.system(cmd)
|
||||
|
||||
# rename transcript/translation file generated
|
||||
generatedFile = replaceExtension(fileName, 'vtt')
|
||||
newFileName = replaceExtension(fileName, newExt)
|
||||
os.rename(generatedFile, newFileName)
|
||||
# rename transcript/translation file generated
|
||||
generatedFile = replaceExtension(fileName, 'vtt')
|
||||
os.rename(generatedFile, newFileName)
|
||||
os.chdir('../../..')
|
||||
|
|
8
process
8
process
|
@ -30,11 +30,13 @@ for audioFile in audioFiles:
|
|||
if not files.hasTranscript(audioFile):
|
||||
skipped = False
|
||||
print(f"Transcribing audio: {audioFile} ({audioLength})", flush=True)
|
||||
files.generateFromAudio(audioFile, 'transcribe')
|
||||
if language in config['translate'] and not files.hasTranslation(audioFile):
|
||||
files.generateFromAudio(audioFile, 'transcribe', options)
|
||||
if (language in config['translate'] and not files.hasTranslation(audioFile)
|
||||
and language != 'eo' and options.tts != 'vosk'
|
||||
):
|
||||
skipped = False
|
||||
print(f"Translating audio: {audioFile} ({audioLength})", flush=True)
|
||||
files.generateFromAudio(audioFile, 'translate')
|
||||
files.generateFromAudio(audioFile, 'translate', options)
|
||||
if skipped and options.verbose:
|
||||
print(f"Skipped audio: {audioFile} ({audioLength})")
|
||||
|
||||
|
|
Loading…
Reference in a new issue