yx_speech_to_text_flutter/ios/Classes/YxAsrPlugin.swift

347 lines
12 KiB
Swift

import Flutter
import UIKit
import Speech
import AVFoundation
public class YxAsrPlugin: NSObject, FlutterPlugin {
private var channel: FlutterMethodChannel?
private var resultEventChannel: FlutterEventChannel?
private var errorEventChannel: FlutterEventChannel?
private var statusEventChannel: FlutterEventChannel?
private var resultEventSink: FlutterEventSink?
private var errorEventSink: FlutterEventSink?
private var statusEventSink: FlutterEventSink?
private var speechRecognizer: SFSpeechRecognizer?
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
private var recognitionTask: SFSpeechRecognitionTask?
private var audioEngine: AVAudioEngine?
private var isListening = false
public static func register(with registrar: FlutterPluginRegistrar) {
let instance = YxAsrPlugin()
let channel = FlutterMethodChannel(name: "yx_asr", binaryMessenger: registrar.messenger())
instance.channel = channel
registrar.addMethodCallDelegate(instance, channel: channel)
let resultEventChannel = FlutterEventChannel(name: "yx_asr/results", binaryMessenger: registrar.messenger())
instance.resultEventChannel = resultEventChannel
resultEventChannel.setStreamHandler(ResultStreamHandler(plugin: instance))
let errorEventChannel = FlutterEventChannel(name: "yx_asr/errors", binaryMessenger: registrar.messenger())
instance.errorEventChannel = errorEventChannel
errorEventChannel.setStreamHandler(ErrorStreamHandler(plugin: instance))
let statusEventChannel = FlutterEventChannel(name: "yx_asr/status", binaryMessenger: registrar.messenger())
instance.statusEventChannel = statusEventChannel
statusEventChannel.setStreamHandler(StatusStreamHandler(plugin: instance))
}
public func handle(_ call: FlutterMethodCall, result: @escaping FlutterResult) {
switch call.method {
case "isAvailable":
result(SFSpeechRecognizer.supportedLocales().count > 0)
case "hasPermission":
result(hasPermission())
case "requestPermission":
requestPermission(result: result)
case "startListening":
let arguments = call.arguments as? [String: Any] ?? [:]
let localeId = arguments["localeId"] as? String ?? "en-US"
let partialResults = arguments["partialResults"] as? Bool ?? true
let onDevice = arguments["onDevice"] as? Bool ?? false
startListening(localeId: localeId, partialResults: partialResults, onDevice: onDevice, result: result)
case "stopListening":
stopListening(result: result)
case "cancel":
cancel(result: result)
case "isListening":
result(isListening)
default:
result(FlutterMethodNotImplemented)
}
}
private func hasPermission() -> Bool {
let speechStatus = SFSpeechRecognizer.authorizationStatus()
let microphoneStatus = AVAudioSession.sharedInstance().recordPermission
return speechStatus == .authorized && microphoneStatus == .granted
}
private func requestPermission(result: @escaping FlutterResult) {
if hasPermission() {
result(true)
return
}
var speechPermissionGranted = false
var microphonePermissionGranted = false
let group = DispatchGroup()
// Request speech recognition permission
group.enter()
SFSpeechRecognizer.requestAuthorization { status in
speechPermissionGranted = status == .authorized
group.leave()
}
// Request microphone permission
group.enter()
AVAudioSession.sharedInstance().requestRecordPermission { granted in
microphonePermissionGranted = granted
group.leave()
}
group.notify(queue: .main) {
result(speechPermissionGranted && microphonePermissionGranted)
}
}
private func startListening(localeId: String, partialResults: Bool, onDevice: Bool, result: @escaping FlutterResult) {
guard hasPermission() else {
sendError(errorType: "permissionDenied", errorMsg: "Speech recognition permission not granted", errorCode: nil)
result(FlutterError(code: "PERMISSION_DENIED", message: "Speech recognition permission not granted", details: nil))
return
}
if isListening {
result(nil)
return
}
do {
try startRecognition(localeId: localeId, partialResults: partialResults, onDevice: onDevice)
isListening = true
statusEventSink?(true)
result(nil)
} catch {
sendError(errorType: "service", errorMsg: "Failed to start speech recognition: \(error.localizedDescription)", errorCode: nil)
result(FlutterError(code: "START_FAILED", message: "Failed to start speech recognition", details: error.localizedDescription))
}
}
private func startRecognition(localeId: String, partialResults: Bool, onDevice: Bool) throws {
// Cancel any previous task
recognitionTask?.cancel()
recognitionTask = nil
// Configure audio session
let audioSession = AVAudioSession.sharedInstance()
try audioSession.setCategory(.record, mode: .measurement, options: .duckOthers)
try audioSession.setActive(true, options: .notifyOthersOnDeactivation)
// Create speech recognizer
let locale = Locale(identifier: localeId)
speechRecognizer = SFSpeechRecognizer(locale: locale)
guard let speechRecognizer = speechRecognizer, speechRecognizer.isAvailable else {
throw NSError(domain: "YxAsrPlugin", code: 1, userInfo: [NSLocalizedDescriptionKey: "Speech recognizer not available"])
}
// Create recognition request
recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
guard let recognitionRequest = recognitionRequest else {
throw NSError(domain: "YxAsrPlugin", code: 2, userInfo: [NSLocalizedDescriptionKey: "Unable to create recognition request"])
}
recognitionRequest.shouldReportPartialResults = partialResults
if #available(iOS 13.0, *) {
recognitionRequest.requiresOnDeviceRecognition = onDevice
}
// Create audio engine
audioEngine = AVAudioEngine()
guard let audioEngine = audioEngine else {
throw NSError(domain: "YxAsrPlugin", code: 3, userInfo: [NSLocalizedDescriptionKey: "Unable to create audio engine"])
}
let inputNode = audioEngine.inputNode
let recordingFormat = inputNode.outputFormat(forBus: 0)
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { buffer, _ in
recognitionRequest.append(buffer)
}
audioEngine.prepare()
try audioEngine.start()
// Start recognition task
recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest) { [weak self] result, error in
guard let self = self else { return }
if let result = result {
let recognizedText = result.bestTranscription.formattedString
let confidence = result.bestTranscription.segments.first?.confidence ?? 0.0
let alternatives = result.transcriptions.dropFirst().map { $0.formattedString }
self.sendResult(
recognizedWords: recognizedText,
finalResult: result.isFinal,
confidence: Double(confidence),
alternatives: Array(alternatives)
)
if result.isFinal {
self.cleanup()
}
}
if let error = error {
self.handleRecognitionError(error)
}
}
}
private func stopListening(result: @escaping FlutterResult) {
recognitionRequest?.endAudio()
result(nil)
}
private func cancel(result: @escaping FlutterResult) {
cleanup()
result(nil)
}
private func cleanup() {
recognitionTask?.cancel()
recognitionTask = nil
recognitionRequest = nil
audioEngine?.stop()
audioEngine?.inputNode.removeTap(onBus: 0)
audioEngine = nil
isListening = false
statusEventSink?(false)
do {
try AVAudioSession.sharedInstance().setActive(false, options: .notifyOthersOnDeactivation)
} catch {
print("Error deactivating audio session: \(error)")
}
}
private func handleRecognitionError(_ error: Error) {
let nsError = error as NSError
let errorType: String
let errorMsg: String
switch nsError.code {
case 1700: // kLSRErrorCodeNoSpeechDetected
errorType = "noSpeech"
errorMsg = "No speech detected"
case 1101: // kLSRErrorCodeAudioReadFailed
errorType = "audio"
errorMsg = "Audio read failed"
case 1110: // kLSRErrorCodeUndefinedTemplateClassName
errorType = "service"
errorMsg = "Speech recognition service error"
default:
errorType = "unknown"
errorMsg = error.localizedDescription
}
sendError(errorType: errorType, errorMsg: errorMsg, errorCode: String(nsError.code))
cleanup()
}
private func sendResult(recognizedWords: String, finalResult: Bool, confidence: Double, alternatives: [String]) {
let result: [String: Any] = [
"recognizedWords": recognizedWords,
"finalResult": finalResult,
"confidence": confidence,
"alternatives": alternatives
]
resultEventSink?(result)
}
private func sendError(errorType: String, errorMsg: String, errorCode: String?) {
let error: [String: Any?] = [
"errorType": errorType,
"errorMsg": errorMsg,
"errorCode": errorCode
]
errorEventSink?(error)
}
func setResultEventSink(_ eventSink: FlutterEventSink?) {
resultEventSink = eventSink
}
func setErrorEventSink(_ eventSink: FlutterEventSink?) {
errorEventSink = eventSink
}
func setStatusEventSink(_ eventSink: FlutterEventSink?) {
statusEventSink = eventSink
}
}
class ResultStreamHandler: NSObject, FlutterStreamHandler {
private weak var plugin: YxAsrPlugin?
init(plugin: YxAsrPlugin) {
self.plugin = plugin
}
func onListen(withArguments arguments: Any?, eventSink events: @escaping FlutterEventSink) -> FlutterError? {
plugin?.setResultEventSink(events)
return nil
}
func onCancel(withArguments arguments: Any?) -> FlutterError? {
plugin?.setResultEventSink(nil)
return nil
}
}
class ErrorStreamHandler: NSObject, FlutterStreamHandler {
private weak var plugin: YxAsrPlugin?
init(plugin: YxAsrPlugin) {
self.plugin = plugin
}
func onListen(withArguments arguments: Any?, eventSink events: @escaping FlutterEventSink) -> FlutterError? {
plugin?.setErrorEventSink(events)
return nil
}
func onCancel(withArguments arguments: Any?) -> FlutterError? {
plugin?.setErrorEventSink(nil)
return nil
}
}
class StatusStreamHandler: NSObject, FlutterStreamHandler {
private weak var plugin: YxAsrPlugin?
init(plugin: YxAsrPlugin) {
self.plugin = plugin
}
func onListen(withArguments arguments: Any?, eventSink events: @escaping FlutterEventSink) -> FlutterError? {
plugin?.setStatusEventSink(events)
return nil
}
func onCancel(withArguments arguments: Any?) -> FlutterError? {
plugin?.setStatusEventSink(nil)
return nil
}
}