我正在开发一个使用 的 iOS 项目SFSpeechRecognizer
,它在开始时运行良好。我说一些话,它会回应。但是一两分钟后,它就失败了。它不提供任何已识别结果的反馈。我想知道这是否与缓冲区有关,但我不知道如何修复它。
我基本上使用 SpeechRecognizer 的演示来构建项目。不同之处在于我将识别的结果逐字存储在一个数组中。程序分析数组并响应某些单词,例如“播放”或之前设置的其他一些命令。程序响应命令后,删除数组的这个元素。
谈话很便宜,这里是代码:
识别器,您可以看到
supportedCommands
过滤某些特定单词以供程序响应的数组。其他部分与https://developer.apple.com/library/content/samplecode/SpeakToMe/Listings/SpeakToMe_ViewController_swift.html#//apple_ref/doc/uid/TP40017110-SpeakToMe_ViewController_swift-DontLinkElementID_6的演示类似class SpeechRecognizer: NSObject, SFSpeechRecognizerDelegate { private var speechRecognizer: SFSpeechRecognizer! private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest! private var recognitionTask: SFSpeechRecognitionTask! private let audioEngine = AVAudioEngine() private let locale = Locale(identifier: "en-US") private var lastSavedString: String = "" private let supportedCommands = ["more", "play"] var speechInputQueue: [String] = [String]() func load() { print("load") prepareRecognizer(locale: locale) authorize() } func start() { print("start") if !audioEngine.isRunning { try! startRecording() } } func stop() { if audioEngine.isRunning { audioEngine.stop() recognitionRequest?.endAudio() } } private func authorize() { SFSpeechRecognizer.requestAuthorization { authStatus in OperationQueue.main.addOperation { switch authStatus { case .authorized: print("Authorized!") case .denied: print("Unauthorized!") case .restricted: print("Unauthorized!") case .notDetermined: print("Unauthorized!") } } } } private func prepareRecognizer(locale: Locale) { speechRecognizer = SFSpeechRecognizer(locale: locale)! speechRecognizer.delegate = self } private func startRecording() throws { // Cancel the previous task if it's running. if let recognitionTask = recognitionTask { recognitionTask.cancel() self.recognitionTask = nil } let audioSession = AVAudioSession.sharedInstance() try audioSession.setCategory(AVAudioSessionCategoryPlayAndRecord, with: .defaultToSpeaker) try audioSession.setMode(AVAudioSessionModeDefault) try audioSession.setActive(true, with: .notifyOthersOnDeactivation) recognitionRequest = SFSpeechAudioBufferRecognitionRequest() let inputNode = audioEngine.inputNode guard let recognitionRequest = recognitionRequest else { fatalError("Unable to created a SFSpeechAudioBufferRecognitionRequest object") } // Configure request so that results are returned before audio recording is finished recognitionRequest.shouldReportPartialResults = true // A recognition task represents a speech recognition session. // We keep a reference to the task so that it can be cancelled. recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest) { result, error in var isFinal = false if let result = result { let temp = result.bestTranscription.formattedString.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines).lowercased() //print("temp", temp) if temp != self.lastSavedString && temp.count > self.lastSavedString.count { var tempSplit = temp.split(separator: " ") var lastSplit = self.lastSavedString.split(separator: " ") while lastSplit.count > 0 { if String(tempSplit[0]) == String(lastSplit[0]) { tempSplit.remove(at: 0) lastSplit.remove(at: 0) } else { break } } for command in tempSplit { if self.supportedCommands.contains(String(command)) { self.speechInputQueue.append(String(command)) } } self.lastSavedString = temp } isFinal = result.isFinal } if error != nil || isFinal { self.audioEngine.stop() inputNode.removeTap(onBus: 0) self.recognitionRequest = nil self.recognitionTask = nil } } let recordingFormat = inputNode.outputFormat(forBus: 0) inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer: AVAudioPCMBuffer, when: AVAudioTime) in self.recognitionRequest?.append(buffer) } audioEngine.prepare() try audioEngine.start() } }
我们如何使用它:
if self.speechRecognizer.speechInputQueue.count > 0 { if self.speechRecognizer.speechInputQueue[0] == "more" { print("temp", temp) print("content", content) // isSpeakingContent = true self.textToSpeech(text: content) } else if self.speechRecognizer.speechInputQueue[0] == "play" { print("try to play") let soundURL = URL(fileURLWithPath: Bundle.main.path(forResource: "cascade", ofType: "wav")!) do { audioPlayer = try AVAudioPlayer(contentsOf: soundURL) } catch { print(error) } audioPlayer.prepareToPlay() audioPlayer.play() } else { self.textToSpeech(text: "unrecognized command") } self.speechRecognizer.speechInputQueue.remove(at: 0) print("after :", self.speechRecognizer.speechInputQueue) }
它响应某些命令并播放一些音频。
缓冲区有问题吗?也许经过一两分钟的识别,缓冲区已满?识别器只是随着时间的推移而失败。