Skip to content

Commit

Permalink
Implement decoding on iOS with a microphone
Browse files Browse the repository at this point in the history
  • Loading branch information
nshmyrev committed Jan 24, 2021
1 parent fe91b5a commit de83de8
Show file tree
Hide file tree
Showing 6 changed files with 215 additions and 54 deletions.
12 changes: 8 additions & 4 deletions ios/VoskApiTest.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
92375244240C6DAF00DD6076 /* Accelerate.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 92375243240C6DAF00DD6076 /* Accelerate.framework */; };
92375246240C6DC900DD6076 /* libstdc++.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 92375245240C6DC900DD6076 /* libstdc++.tbd */; };
92375274240C6F1E00DD6076 /* 10001-90210-01803.wav in Resources */ = {isa = PBXBuildFile; fileRef = 92375256240C6E3D00DD6076 /* 10001-90210-01803.wav */; };
92D86BD5253F823F0040D53F /* vosk-model-small-en-us-0.4 in Resources */ = {isa = PBXBuildFile; fileRef = 92D86BD3253F823E0040D53F /* vosk-model-small-en-us-0.4 */; };
92BACED125BE125A00B5CC93 /* vosk-model-small-en-us-0.15 in Resources */ = {isa = PBXBuildFile; fileRef = 928CC50C25BE124400490481 /* vosk-model-small-en-us-0.15 */; };
92D6B8D325BDFEAC007FF08D /* VoskModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 92D6B8D225BDFEAC007FF08D /* VoskModel.swift */; };
92D86BD6253F823F0040D53F /* vosk-model-spk-0.4 in Resources */ = {isa = PBXBuildFile; fileRef = 92D86BD4253F823F0040D53F /* vosk-model-spk-0.4 */; };
/* End PBXBuildFile section */

Expand All @@ -34,9 +35,10 @@
92375243240C6DAF00DD6076 /* Accelerate.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Accelerate.framework; path = System/Library/Frameworks/Accelerate.framework; sourceTree = SDKROOT; };
92375245240C6DC900DD6076 /* libstdc++.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = "libstdc++.tbd"; path = "usr/lib/libstdc++.tbd"; sourceTree = SDKROOT; };
92375256240C6E3D00DD6076 /* 10001-90210-01803.wav */ = {isa = PBXFileReference; lastKnownFileType = audio.wav; path = "10001-90210-01803.wav"; sourceTree = "<group>"; };
928CC50C25BE124400490481 /* vosk-model-small-en-us-0.15 */ = {isa = PBXFileReference; lastKnownFileType = folder; name = "vosk-model-small-en-us-0.15"; path = "/Users/shmyrev/Documents/IOS/VoskApiTest/VoskApiTest/Vosk/vosk-model-small-en-us-0.15"; sourceTree = "<absolute>"; };
92AA22AD244CDD1200DA464B /* vosk_api.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = vosk_api.h; sourceTree = "<group>"; };
92AA22AE244CDD5200DA464B /* bridging.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bridging.h; sourceTree = "<group>"; };
92D86BD3253F823E0040D53F /* vosk-model-small-en-us-0.4 */ = {isa = PBXFileReference; lastKnownFileType = folder; path = "vosk-model-small-en-us-0.4"; sourceTree = "<group>"; };
92D6B8D225BDFEAC007FF08D /* VoskModel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = VoskModel.swift; sourceTree = "<group>"; };
92D86BD4253F823F0040D53F /* vosk-model-spk-0.4 */ = {isa = PBXFileReference; lastKnownFileType = folder; path = "vosk-model-spk-0.4"; sourceTree = "<group>"; };
/* End PBXFileReference section */

Expand Down Expand Up @@ -83,14 +85,15 @@
9237522A240C550B00DD6076 /* LaunchScreen.storyboard */,
9237522D240C550B00DD6076 /* Info.plist */,
92375233240C558900DD6076 /* Vosk.swift */,
92D6B8D225BDFEAC007FF08D /* VoskModel.swift */,
);
path = VoskApiTest;
sourceTree = "<group>";
};
92375239240C642000DD6076 /* Vosk */ = {
isa = PBXGroup;
children = (
92D86BD3253F823E0040D53F /* vosk-model-small-en-us-0.4 */,
928CC50C25BE124400490481 /* vosk-model-small-en-us-0.15 */,
92D86BD4253F823F0040D53F /* vosk-model-spk-0.4 */,
92375256240C6E3D00DD6076 /* 10001-90210-01803.wav */,
92AA22AD244CDD1200DA464B /* vosk_api.h */,
Expand Down Expand Up @@ -169,12 +172,12 @@
isa = PBXResourcesBuildPhase;
buildActionMask = 2147483647;
files = (
92BACED125BE125A00B5CC93 /* vosk-model-small-en-us-0.15 in Resources */,
92375274240C6F1E00DD6076 /* 10001-90210-01803.wav in Resources */,
9237522C240C550B00DD6076 /* LaunchScreen.storyboard in Resources */,
92375229240C550B00DD6076 /* Assets.xcassets in Resources */,
92D86BD6253F823F0040D53F /* vosk-model-spk-0.4 in Resources */,
92375227240C550B00DD6076 /* Main.storyboard in Resources */,
92D86BD5253F823F0040D53F /* vosk-model-small-en-us-0.4 in Resources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
Expand All @@ -187,6 +190,7 @@
files = (
92375224240C550B00DD6076 /* ViewController.swift in Sources */,
92375222240C550B00DD6076 /* AppDelegate.swift in Sources */,
92D6B8D325BDFEAC007FF08D /* VoskModel.swift in Sources */,
92375234240C558900DD6076 /* Vosk.swift in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

55 changes: 42 additions & 13 deletions ios/VoskApiTest/Base.lproj/Main.storyboard
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="13771" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" useTraitCollections="YES" colorMatched="YES" initialViewController="BYZ-38-t0r">
<device id="retina4_7" orientation="portrait">
<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="13771" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" useTraitCollections="YES" colorMatched="YES" initialViewController="bdW-KL-Y8Z">
<device id="retina5_5" orientation="portrait">
<adaptation id="fullscreen"/>
</device>
<dependencies>
Expand All @@ -10,23 +10,52 @@
</dependencies>
<scenes>
<!--View Controller-->
<scene sceneID="tne-QT-ifu">
<scene sceneID="nEc-89-Iqu">
<objects>
<viewController id="BYZ-38-t0r" customClass="ViewController" customModule="VoskApiTest" customModuleProvider="target" sceneMemberID="viewController">
<textView key="view" clipsSubviews="YES" multipleTouchEnabled="YES" contentMode="scaleToFill" editable="NO" textAlignment="natural" id="CtX-mx-X98">
<rect key="frame" x="0.0" y="0.0" width="375" height="667"/>
<autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMaxY="YES"/>
<viewController id="bdW-KL-Y8Z" customClass="ViewController" customModule="VoskApiTest" customModuleProvider="target" sceneMemberID="viewController">
<layoutGuides>
<viewControllerLayoutGuide type="top" id="Hyr-Dz-4mU"/>
<viewControllerLayoutGuide type="bottom" id="w4A-5X-uBu"/>
</layoutGuides>
<view key="view" contentMode="scaleToFill" id="m5v-US-bvR">
<rect key="frame" x="0.0" y="0.0" width="414" height="736"/>
<autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
<subviews>
<button opaque="NO" contentMode="scaleToFill" fixedFrame="YES" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="IaT-no-U3i" userLabel="Microphone">
<rect key="frame" x="124" y="34" width="157" height="30"/>
<autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMaxY="YES"/>
<state key="normal" title="Recognize Microphone"/>
<connections>
<action selector="runRecognizeMicrohpone:" destination="bdW-KL-Y8Z" eventType="touchUpInside" id="hGB-lz-N2B"/>
</connections>
</button>
<button opaque="NO" contentMode="scaleToFill" fixedFrame="YES" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="GC5-nT-FQR" userLabel="File">
<rect key="frame" x="90" y="84" width="221" height="41"/>
<autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMaxY="YES"/>
<state key="normal" title="Recognize File"/>
<connections>
<action selector="runRecognizeFile:" destination="bdW-KL-Y8Z" eventType="touchUpInside" id="xp5-Yi-rnN"/>
</connections>
</button>
<textView clipsSubviews="YES" multipleTouchEnabled="YES" contentMode="scaleToFill" fixedFrame="YES" text="Results here" textAlignment="natural" translatesAutoresizingMaskIntoConstraints="NO" id="w4X-cu-USq">
<rect key="frame" x="11" y="112" width="383" height="569"/>
<autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMaxY="YES"/>
<color key="backgroundColor" white="1" alpha="1" colorSpace="calibratedWhite"/>
<fontDescription key="fontDescription" type="system" pointSize="14"/>
<textInputTraits key="textInputTraits" autocapitalizationType="sentences"/>
</textView>
</subviews>
<color key="backgroundColor" white="1" alpha="1" colorSpace="calibratedWhite"/>
<fontDescription key="fontDescription" type="system" pointSize="14"/>
<textInputTraits key="textInputTraits" autocapitalizationType="sentences"/>
</textView>
</view>
<connections>
<outlet property="mainText" destination="CtX-mx-X98" id="oJy-5J-NKp"/>
<outlet property="mainText" destination="w4X-cu-USq" id="rZS-nz-Wql"/>
<outlet property="recognizeFile" destination="GC5-nT-FQR" id="dRe-tc-IA0"/>
<outlet property="recognizeMicrophone" destination="IaT-no-U3i" id="IuM-aa-pAP"/>
</connections>
</viewController>
<placeholder placeholderIdentifier="IBFirstResponder" id="dkx-z0-nzr" sceneMemberID="firstResponder"/>
<placeholder placeholderIdentifier="IBFirstResponder" id="nWA-4D-pA6" userLabel="First Responder" sceneMemberID="firstResponder"/>
</objects>
<point key="canvasLocation" x="32.799999999999997" y="32.833583208395808"/>
<point key="canvasLocation" x="-17.39130434782609" y="-285.32608695652175"/>
</scene>
</scenes>
</document>
107 changes: 95 additions & 12 deletions ios/VoskApiTest/ViewController.swift
Original file line number Diff line number Diff line change
Expand Up @@ -3,30 +3,113 @@
// VoskApiTest
//
// Created by Niсkolay Shmyrev on 01.03.20.
// Copyright © 2020 Alpha Cephei. All rights reserved.
// Copyright © 2020-2021 Alpha Cephei. All rights reserved.
//

import UIKit
import AVFoundation

class ViewController: UIViewController {

@IBOutlet var mainText: UITextView!
enum WorkMode {
case stopped
case microphone
case file
}

override func viewDidLoad() {
super.viewDidLoad()
class ViewController: UIViewController {

DispatchQueue.global(qos: .userInitiated).async {
DispatchQueue.main.async {
self.mainText.text = "Processing file..."
var mode: WorkMode!

@IBOutlet weak var recognizeFile: UIButton!
@IBOutlet weak var mainText: UITextView!
@IBOutlet weak var recognizeMicrophone: UIButton!

var audioEngine : AVAudioEngine!
var processingQueue: DispatchQueue!
var model : VoskModel!

func setMode(mode: WorkMode) {
switch mode {
case .stopped:
self.recognizeFile.isEnabled = true
self.recognizeMicrophone.isEnabled = true
self.recognizeMicrophone.setTitle("Recognize Microphone",for: .normal)
case .microphone:
self.recognizeFile.isEnabled = false
self.recognizeMicrophone.isEnabled = true
self.recognizeMicrophone.setTitle("Stop Microphone",for: .normal)
self.mainText.text = ""
case .file:
self.recognizeFile.isEnabled = false
self.recognizeMicrophone.isEnabled = false
self.mainText.text = "Processing file..."
}
self.mode = mode
}

func startAudioEngine() {
do {

// Create a new audio engine.
audioEngine = AVAudioEngine()

let inputNode = audioEngine.inputNode
let formatInput = inputNode.inputFormat(forBus: 0)
let formatPcm = AVAudioFormat.init(commonFormat: AVAudioCommonFormat.pcmFormatInt16, sampleRate: formatInput.sampleRate, channels: 1, interleaved: true)

let recognizer = Vosk(model: model, sampleRate: Float(formatInput.sampleRate))

inputNode.installTap(onBus: 0,
bufferSize: UInt32(formatInput.sampleRate / 10),
format: formatPcm) { buffer, time in
self.processingQueue.async {
let res = recognizer.recognizeData(buffer: buffer)
DispatchQueue.main.async {
self.mainText.text = res + "\n" + self.mainText.text
}
}
}
let vosk = Vosk()
let res = vosk.recognizeFile()

// Start the stream of audio data.
audioEngine.prepare()
try audioEngine.start()
} catch {
print("Unable to start AVAudioEngine: \(error.localizedDescription)")
}
}

func stopAudioEngine() {
audioEngine.stop()
}

@IBAction func runRecognizeMicrohpone(_ sender: Any) {
if (mode == .stopped) {
setMode(mode: .microphone)
startAudioEngine()
} else {
stopAudioEngine()
setMode(mode: .stopped)
}
}

@IBAction func runRecognizeFile(_ sender: Any) {
setMode(mode: .file)
processingQueue.async {
let recognizer = Vosk(model: self.model, sampleRate: 16000.0)
let res = recognizer.recognizeFile()
DispatchQueue.main.async {
self.mainText.text = res
self.setMode(mode: .stopped)
}
}
}


override func viewDidLoad() {
super.viewDidLoad()
setMode(mode: .stopped)
processingQueue = DispatchQueue(label: "recognizerQueue")
model = VoskModel()
}

override func didReceiveMemoryWarning() {
super.didReceiveMemoryWarning()
}
Expand Down
54 changes: 33 additions & 21 deletions ios/VoskApiTest/Vosk.swift
Original file line number Diff line number Diff line change
Expand Up @@ -3,40 +3,52 @@
// VoskApiTest
//
// Created by Niсkolay Shmyrev on 01.03.20.
// Copyright © 2020 Alpha Cephei. All rights reserved.
// Copyright © 2020-2021 Alpha Cephei. All rights reserved.
//

import Foundation
import AVFoundation

public final class Vosk {

var recognizer : OpaquePointer!

init(model: VoskModel, sampleRate: Float) {
recognizer = vosk_recognizer_new_spk(model.model, model.spkModel, sampleRate)
}

deinit {
vosk_recognizer_free(recognizer);
}

func recognizeFile() -> String {
var sres = ""

if let resourcePath = Bundle.main.resourcePath {

// Set to -1 to disable logs
vosk_set_log_level(0);

let modelPath = resourcePath + "/vosk-model-small-en-us-0.4"
let spkModelPath = resourcePath + "/vosk-model-spk-0.4"

let model = vosk_model_new(modelPath)
let spkModel = vosk_spk_model_new(spkModelPath)
let recognizer = vosk_recognizer_new_spk(model, spkModel, 16000.0)


let audioFile = URL(fileURLWithPath: resourcePath + "/10001-90210-01803.wav")

if let data = try? Data(contentsOf: audioFile) {
let _ = data.withUnsafeBytes {
vosk_recognizer_accept_waveform(recognizer, $0, Int32(data.count))
}
let res = vosk_recognizer_final_result(recognizer);
sres = String(validatingUTF8: res!)!;
print(sres);
let _ = data.withUnsafeBytes {
vosk_recognizer_accept_waveform(recognizer, $0, Int32(data.count))
}
let res = vosk_recognizer_final_result(recognizer);
sres = String(validatingUTF8: res!)!;
print(sres);
}

vosk_recognizer_free(recognizer)
vosk_model_free(model)
}

return sres
}


func recognizeData(buffer : AVAudioPCMBuffer) -> String {
let dataLen = Int(buffer.frameLength * 2)
let channels = UnsafeBufferPointer(start: buffer.int16ChannelData, count: 1)
let endOfSpeech = channels[0].withMemoryRebound(to: Int8.self, capacity: dataLen) {
vosk_recognizer_accept_waveform(recognizer, $0, Int32(dataLen))
}
let res = endOfSpeech == 1 ?vosk_recognizer_result(recognizer) :vosk_recognizer_partial_result(recognizer)
return String(validatingUTF8: res!)!;
}
}
36 changes: 36 additions & 0 deletions ios/VoskApiTest/VoskModel.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
//
// Vosk.swift
// VoskApiTest
//
// Created by Niсkolay Shmyrev on 01.03.20.
// Copyright © 2020-2021 Alpha Cephei. All rights reserved.
//

import Foundation

public final class VoskModel {

var model : OpaquePointer!
var spkModel : OpaquePointer!

init() {

// Set to -1 to disable logs
vosk_set_log_level(0);

if let resourcePath = Bundle.main.resourcePath {
let modelPath = resourcePath + "/vosk-model-small-en-us-0.15"
let spkModelPath = resourcePath + "/vosk-model-spk-0.4"

model = vosk_model_new(modelPath)
spkModel = vosk_spk_model_new(spkModelPath)
}
}

deinit {
vosk_model_free(model)
vosk_spk_model_free(spkModel)
}

}

0 comments on commit de83de8

Please sign in to comment.