Skip to content

Commit 5e6c26c

Browse files
authored
Merge pull request collabora#158 from makaveli10/cpu_usage
fix: cpu usage issue.
2 parents ec13493 + 18b6168 commit 5e6c26c

File tree

2 files changed

+14
-4
lines changed

2 files changed

+14
-4
lines changed

whisper_live/server.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,11 @@ def add_frames(self, frame_np):
408408
if self.frames_np is not None and self.frames_np.shape[0] > 45*self.RATE:
409409
self.frames_offset += 30.0
410410
self.frames_np = self.frames_np[int(30*self.RATE):]
411+
# check timestamp offset(should be >= self.frame_offset)
412+
# this basically means that there is no speech as timestamp offset hasnt updated
413+
# and is less than frame_offset
414+
if self.timestamp_offset < self.frames_offset:
415+
self.timestamp_offset = self.frames_offset
411416
if self.frames_np is None:
412417
self.frames_np = frame_np.copy()
413418
else:
@@ -796,7 +801,8 @@ def transcribe_audio(self, input_sample):
796801
task=self.task,
797802
vad_filter=self.use_vad,
798803
vad_parameters=self.vad_parameters if self.use_vad else None)
799-
if self.language is None:
804+
805+
if self.language is None and info is not None:
800806
self.set_language(info)
801807
return result
802808

@@ -881,7 +887,9 @@ def speech_to_text(self):
881887
input_sample = input_bytes.copy()
882888
result = self.transcribe_audio(input_sample)
883889

884-
if self.language is None:
890+
if result is None or self.language is None:
891+
self.timestamp_offset += duration
892+
time.sleep(0.25) # wait for voice activity, result is None when no voice activity
885893
continue
886894
self.handle_transcription_output(result, duration)
887895

@@ -932,7 +940,6 @@ def update_segments(self, segments, duration):
932940
"""
933941
offset = None
934942
self.current_out = ''
935-
last_segment = None
936943
# process complete segments
937944
if len(segments) > 1:
938945
for i, s in enumerate(segments[:-1]):

whisper_live/transcriber.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ def _get_feature_kwargs(self, model_path) -> dict:
180180

181181
return config
182182

183-
def transcribe(
183+
def transcribe( # noqa: C901
184184
self,
185185
audio: Union[str, BinaryIO, np.ndarray],
186186
language: Optional[str] = None,
@@ -315,6 +315,9 @@ def transcribe(
315315
else:
316316
speech_chunks = None
317317

318+
if audio.shape[0] == 0:
319+
return None, None
320+
318321
features = self.feature_extractor(audio)
319322

320323
encoder_output = None

0 commit comments

Comments
 (0)