Support using Vosk instead of Whisper for transcripts
For Esperanto, Vosk is used automatically, since Whisper doesn't support Esperanto
This commit is contained in:
parent
62f4b7bedd
commit
ff23de60bb
|
@ -11,6 +11,7 @@ class Options:
|
||||||
filter = None
|
filter = None
|
||||||
episodes = 'new'
|
episodes = 'new'
|
||||||
numEpisodes = 1
|
numEpisodes = 1
|
||||||
|
tts = 'whisper'
|
||||||
|
|
||||||
def outputHelp():
|
def outputHelp():
|
||||||
print("Usage: ./process [args] [filter [age] [episodes]]")
|
print("Usage: ./process [args] [filter [age] [episodes]]")
|
||||||
|
@ -26,6 +27,8 @@ def outputHelp():
|
||||||
print(" Skip generating transcripts and translations of podcast episodes")
|
print(" Skip generating transcripts and translations of podcast episodes")
|
||||||
print("--verbose")
|
print("--verbose")
|
||||||
print(" Output a detailed report of actions as they are being taken")
|
print(" Output a detailed report of actions as they are being taken")
|
||||||
|
print("--vosk")
|
||||||
|
print(" Use Vosk instead of Whisper to generate transcripts")
|
||||||
print("")
|
print("")
|
||||||
print("filter:")
|
print("filter:")
|
||||||
print(" If specified, only configured podcasts which match the filter will be processed")
|
print(" If specified, only configured podcasts which match the filter will be processed")
|
||||||
|
@ -61,6 +64,8 @@ def read():
|
||||||
opts.generate = False
|
opts.generate = False
|
||||||
elif arg == "--verbose":
|
elif arg == "--verbose":
|
||||||
opts.verbose = True
|
opts.verbose = True
|
||||||
|
elif arg == '--vosk':
|
||||||
|
opts.tts = 'vosk'
|
||||||
elif arg == 'new':
|
elif arg == 'new':
|
||||||
opts.episodes = 'new'
|
opts.episodes = 'new'
|
||||||
elif arg == 'old':
|
elif arg == 'old':
|
||||||
|
|
18
lib/files.py
18
lib/files.py
|
@ -109,7 +109,7 @@ def hasTranslation(audioFile):
|
||||||
translationFile = getTranslationFilename(audioFile)
|
translationFile = getTranslationFilename(audioFile)
|
||||||
return os.path.isfile(translationFile)
|
return os.path.isfile(translationFile)
|
||||||
|
|
||||||
def generateFromAudio(audioFile, task):
|
def generateFromAudio(audioFile, task, options):
|
||||||
if task == 'transcribe':
|
if task == 'transcribe':
|
||||||
newExt = 'transcript.txt'
|
newExt = 'transcript.txt'
|
||||||
else:
|
else:
|
||||||
|
@ -119,14 +119,18 @@ def generateFromAudio(audioFile, task):
|
||||||
langCode = getLangCode(audioFile)
|
langCode = getLangCode(audioFile)
|
||||||
fileParts = audioFile.split('/')
|
fileParts = audioFile.split('/')
|
||||||
fileName = fileParts.pop()
|
fileName = fileParts.pop()
|
||||||
|
newFileName = replaceExtension(fileName, newExt)
|
||||||
dir = '/'.join(fileParts)
|
dir = '/'.join(fileParts)
|
||||||
os.chdir(dir)
|
os.chdir(dir)
|
||||||
|
|
||||||
cmd = f"whisper {fileName} --model medium --language {langCode} --task {task} --output_format vtt --fp16 False"
|
if options.tts == 'vosk' or langCode == 'eo':
|
||||||
os.system(cmd)
|
cmd = f"vosk-transcriber -i {fileName} -l {langCode} -t srt -o {newFileName}"
|
||||||
|
os.system(cmd)
|
||||||
|
else:
|
||||||
|
cmd = f"whisper {fileName} --model medium --language {langCode} --task {task} --output_format srt --fp16 False"
|
||||||
|
os.system(cmd)
|
||||||
|
|
||||||
# rename transcript/translation file generated
|
# rename transcript/translation file generated
|
||||||
generatedFile = replaceExtension(fileName, 'vtt')
|
generatedFile = replaceExtension(fileName, 'vtt')
|
||||||
newFileName = replaceExtension(fileName, newExt)
|
os.rename(generatedFile, newFileName)
|
||||||
os.rename(generatedFile, newFileName)
|
|
||||||
os.chdir('../../..')
|
os.chdir('../../..')
|
||||||
|
|
8
process
8
process
|
@ -30,11 +30,13 @@ for audioFile in audioFiles:
|
||||||
if not files.hasTranscript(audioFile):
|
if not files.hasTranscript(audioFile):
|
||||||
skipped = False
|
skipped = False
|
||||||
print(f"Transcribing audio: {audioFile} ({audioLength})", flush=True)
|
print(f"Transcribing audio: {audioFile} ({audioLength})", flush=True)
|
||||||
files.generateFromAudio(audioFile, 'transcribe')
|
files.generateFromAudio(audioFile, 'transcribe', options)
|
||||||
if language in config['translate'] and not files.hasTranslation(audioFile):
|
if (language in config['translate'] and not files.hasTranslation(audioFile)
|
||||||
|
and language != 'eo' and options.tts != 'vosk'
|
||||||
|
):
|
||||||
skipped = False
|
skipped = False
|
||||||
print(f"Translating audio: {audioFile} ({audioLength})", flush=True)
|
print(f"Translating audio: {audioFile} ({audioLength})", flush=True)
|
||||||
files.generateFromAudio(audioFile, 'translate')
|
files.generateFromAudio(audioFile, 'translate', options)
|
||||||
if skipped and options.verbose:
|
if skipped and options.verbose:
|
||||||
print(f"Skipped audio: {audioFile} ({audioLength})")
|
print(f"Skipped audio: {audioFile} ({audioLength})")
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue