Proper microphone recognizer with the queue

skeras · Jan 9, 2021 · 6f2d6d0 · 6f2d6d0
1 parent a8ae602
commit 6f2d6d0
Showing 1 changed file with 79 additions and 18 deletions.
diff --git a/python/example/test_microphone.py b/python/example/test_microphone.py
@@ -1,28 +1,89 @@
 #!/usr/bin/env python3
 
-from vosk import Model, KaldiRecognizer
+import argparse
 import os
+import queue
+import sounddevice as sd
+import vosk
+import sys
 
-if not os.path.exists("model"):
-    print ("Please download the model from https://alphacephei.com/vosk/models and unpack as 'model' in the current folder.")
-    exit (1)
+q = queue.Queue()
 
-import pyaudio
+def int_or_str(text):
+    """Helper function for argument parsing."""
+    try:
+        return int(text)
+    except ValueError:
+        return text
 
-model = Model("model")
-rec = KaldiRecognizer(model, 16000)
+def callback(indata, frames, time, status):
+    """This is called (from a separate thread) for each audio block."""
+    if status:
+        print(status, file=sys.stderr)
+    q.put(indata)
 
-p = pyaudio.PyAudio()
-stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=8000)
-stream.start_stream()
+parser = argparse.ArgumentParser(add_help=False)
+parser.add_argument(
+    '-l', '--list-devices', action='store_true',
+    help='show list of audio devices and exit')
+args, remaining = parser.parse_known_args()
+if args.list_devices:
+    print(sd.query_devices())
+    parser.exit(0)
+parser = argparse.ArgumentParser(
+    description=__doc__,
+    formatter_class=argparse.RawDescriptionHelpFormatter,
+    parents=[parser])
+parser.add_argument(
+    '-f', '--filename', type=str, metavar='FILENAME',
+    help='audio file to store recording to')
+parser.add_argument(
+    '-m', '--model', type=str, metavar='MODEL_PATH',
+    help='Path to the model')
+parser.add_argument(
+    '-d', '--device', type=int_or_str,
+    help='input device (numeric ID or substring)')
+parser.add_argument(
+    '-r', '--samplerate', type=int, help='sampling rate')
+args = parser.parse_args(remaining)
 
-while True:
-    data = stream.read(4000)
-    if len(data) == 0:
-        break
-    if rec.AcceptWaveform(data):
-        print(rec.Result())
+try:
+    if args.model is None:
+        args.model = "model"
+    if not os.path.exists(args.model):
+        print ("Please download a model for your language from https://alphacephei.com/vosk/models")
+        print ("and unpack as 'model' in the current folder.")
+        parser.exit(0)
+    if args.samplerate is None:
+        device_info = sd.query_devices(args.device, 'input')
+        # soundfile expects an int, sounddevice provides a float:
+        args.samplerate = int(device_info['default_samplerate'])
+
+    model = vosk.Model(args.model)
+
+    if args.filename:
+        dump_fn = open(args.filename, "wb")
     else:
-        print(rec.PartialResult())
+        dump_fn = None
+
+    with sd.RawInputStream(samplerate=args.samplerate, blocksize = 8000, device=args.device, dtype='int16',
+                            channels=1, callback=callback):
+            print('#' * 80)
+            print('Press Ctrl+C to stop the recording')
+            print('#' * 80)
+
+            rec = vosk.KaldiRecognizer(model, args.samplerate)
+            while True:
+                data = q.get()
+                if rec.AcceptWaveform(data):
+                    print(rec.Result())
+                else:
+                    print(rec.PartialResult())
+                if dump_fn is not None:
+                    dump_fn.write(data)
 
-print(rec.FinalResult())
+except KeyboardInterrupt:
+    print('\nDone')
+    parser.exit(0)
+except Exception as e:
+    parser.exit(type(e).__name__ + ': ' + str(e))