diff --git a/Audio-Transcription-Chrome/options.js b/Audio-Transcription-Chrome/options.js index 9717856a..7e8ae5b2 100644 --- a/Audio-Transcription-Chrome/options.js +++ b/Audio-Transcription-Chrome/options.js @@ -103,7 +103,7 @@ async function startRecord(option) { multilingual: option.multilingual, language: option.language, task: option.task, - model_size: option.modelSize + model: option.modelSize }) ); }; diff --git a/Audio-Transcription-Firefox/content.js b/Audio-Transcription-Firefox/content.js index 682b142a..82b045b8 100644 --- a/Audio-Transcription-Firefox/content.js +++ b/Audio-Transcription-Firefox/content.js @@ -78,7 +78,7 @@ function startRecording(data) { multilingual: data.useMultilingual, language: data.language, task: data.task, - model_size: data.modelSize + model: data.modelSize }) ); }; diff --git a/TensorRT_whisper.md b/TensorRT_whisper.md index 2935363f..6ff28681 100644 --- a/TensorRT_whisper.md +++ b/TensorRT_whisper.md @@ -37,10 +37,10 @@ python -c "import torch; import tensorrt; import tensorrt_llm" - We build `small.en` and `small` multilingual TensorRT engine. The script logs the path of the directory with Whisper TensorRT engine. We need the model_path to run the server. ```bash # convert small.en -bash build_whisper_tensorrt /root/TensorRT-LLM-examples small.en +bash scripts/build_whisper_tensorrt.sh /root/TensorRT-LLM-examples small.en # convert small multilingual model -bash build_whisper_tensorrt /root/TensorRT-LLM-examples small +bash scripts/build_whisper_tensorrt.sh /root/TensorRT-LLM-examples small ``` ## Run WhisperLive Server with TensorRT Backend @@ -48,6 +48,7 @@ bash build_whisper_tensorrt /root/TensorRT-LLM-examples small cd /home/WhisperLive # Install requirements +bash scripts/setup.sh pip install -r requirements/server.txt # Required to create mel spectogram diff --git a/requirements/server.txt b/requirements/server.txt index 77e53788..98752ae7 100644 --- a/requirements/server.txt +++ b/requirements/server.txt @@ -2,4 +2,8 @@ faster-whisper==0.10.0 torch websockets onnxruntime==1.16.0 -numba \ No newline at end of file +numba +openai-whisper +kaldialign +soundfile +ffmpeg-python diff --git a/whisper_live/server.py b/whisper_live/server.py index 1d22f501..cf40d53d 100644 --- a/whisper_live/server.py +++ b/whisper_live/server.py @@ -414,10 +414,10 @@ def __init__( def warmup(self, warmup_steps=10): logging.info("[INFO:] Warming up TensorRT engine..") - mel, duration = self.transcriber.log_mel_spectrogram("tests/jfk.flac") + mel, _ = self.transcriber.log_mel_spectrogram("tests/jfk.flac") for i in range(warmup_steps): - last_segment = self.transcriber.transcribe(mel) - + self.transcriber.transcribe(mel) + def set_eos(self, eos): self.lock.acquire() self.eos = eos diff --git a/whisper_live/transcriber_tensorrt.py b/whisper_live/transcriber_tensorrt.py index 8634a8f5..a36bb725 100644 --- a/whisper_live/transcriber_tensorrt.py +++ b/whisper_live/transcriber_tensorrt.py @@ -11,7 +11,7 @@ from whisper.tokenizer import get_tokenizer from whisper_live.tensorrt_utils import (mel_filters, store_transcripts, write_error_stats, load_audio_wav_format, - pad_or_trim) + pad_or_trim, load_audio) import tensorrt_llm import tensorrt_llm.logger as logger @@ -337,4 +337,4 @@ def decode_wav_file( if normalizer: prediction = normalizer(prediction) - return prediction.strip() \ No newline at end of file + return prediction.strip()