diff --git a/lib/args.py b/lib/args.py index 9c25f52..760717a 100644 --- a/lib/args.py +++ b/lib/args.py @@ -11,6 +11,7 @@ class Options: filter = None episodes = 'new' numEpisodes = 1 + tts = 'whisper' def outputHelp(): print("Usage: ./process [args] [filter [age] [episodes]]") @@ -26,6 +27,8 @@ def outputHelp(): print(" Skip generating transcripts and translations of podcast episodes") print("--verbose") print(" Output a detailed report of actions as they are being taken") + print("--vosk") + print(" Use Vosk instead of Whisper to generate transcripts") print("") print("filter:") print(" If specified, only configured podcasts which match the filter will be processed") @@ -61,6 +64,8 @@ def read(): opts.generate = False elif arg == "--verbose": opts.verbose = True + elif arg == '--vosk': + opts.tts = 'vosk' elif arg == 'new': opts.episodes = 'new' elif arg == 'old': diff --git a/lib/files.py b/lib/files.py index 788f7b5..c8f6090 100644 --- a/lib/files.py +++ b/lib/files.py @@ -109,7 +109,7 @@ def hasTranslation(audioFile): translationFile = getTranslationFilename(audioFile) return os.path.isfile(translationFile) -def generateFromAudio(audioFile, task): +def generateFromAudio(audioFile, task, options): if task == 'transcribe': newExt = 'transcript.txt' else: @@ -119,14 +119,18 @@ def generateFromAudio(audioFile, task): langCode = getLangCode(audioFile) fileParts = audioFile.split('/') fileName = fileParts.pop() + newFileName = replaceExtension(fileName, newExt) dir = '/'.join(fileParts) os.chdir(dir) - cmd = f"whisper {fileName} --model medium --language {langCode} --task {task} --output_format vtt --fp16 False" - os.system(cmd) + if options.tts == 'vosk' or langCode == 'eo': + cmd = f"vosk-transcriber -i {fileName} -l {langCode} -t srt -o {newFileName}" + os.system(cmd) + else: + cmd = f"whisper {fileName} --model medium --language {langCode} --task {task} --output_format srt --fp16 False" + os.system(cmd) - # rename transcript/translation file generated - generatedFile = replaceExtension(fileName, 'vtt') - newFileName = replaceExtension(fileName, newExt) - os.rename(generatedFile, newFileName) + # rename transcript/translation file generated + generatedFile = replaceExtension(fileName, 'vtt') + os.rename(generatedFile, newFileName) os.chdir('../../..') diff --git a/process b/process index 16a2bca..26f01ad 100755 --- a/process +++ b/process @@ -30,11 +30,13 @@ for audioFile in audioFiles: if not files.hasTranscript(audioFile): skipped = False print(f"Transcribing audio: {audioFile} ({audioLength})", flush=True) - files.generateFromAudio(audioFile, 'transcribe') - if language in config['translate'] and not files.hasTranslation(audioFile): + files.generateFromAudio(audioFile, 'transcribe', options) + if (language in config['translate'] and not files.hasTranslation(audioFile) + and language != 'eo' and options.tts != 'vosk' + ): skipped = False print(f"Translating audio: {audioFile} ({audioLength})", flush=True) - files.generateFromAudio(audioFile, 'translate') + files.generateFromAudio(audioFile, 'translate', options) if skipped and options.verbose: print(f"Skipped audio: {audioFile} ({audioLength})")