1

我目前正在使用 cameraX 录制带音频的视频,并且效果很好,但是现在我必须在录制视频时使用 SpeechRecognizer 并获取用户所说内容的转录。

更新:

我尝试从视频中提取音频,然后将其作为额外的内容提供给意图,但它不起作用。所以我考虑直接从 SpeechRecognizer 意图中获取音频,但这是一个死胡同,因为文档中的任何地方都没有提到 uri 或临时文件。我现在正在尝试在 SpeechRecognizer 开始收听时启动 mediaRecorder ,但这仅在第一次收听时有效。

我基本上在阳光下尝试了一切,但进展为零,有人有新想法吗?

    private fun doWhileRecording(state:UiState){
    when(state){
     UiState.IDLE -> {
            
                    }
                    UiState.RECORDING -> {
                        tts!!.stop()
                        startSpeechToText()
             
                    }
                    UiState.FINALIZED -> {
                       
                    }
    }
    
    private fun startSpeechToText() {
            val speechRecognizer = SpeechRecognizer.createSpeechRecognizer(this.requireContext())
            val speechRecognizerIntent = Intent(RecognizerIntent.EXTRA_AUDIO_INJECT_SOURCE)
            speechRecognizerIntent.putExtra(
                RecognizerIntent.EXTRA_LANGUAGE_MODEL,
                RecognizerIntent.LANGUAGE_MODEL_FREE_FORM
            )
            speechRecognizerIntent.putExtra(RecognizerIntent.EXTRA_LANGUAGE, Locale.getDefault())
    
            speechRecognizer.setRecognitionListener(object : RecognitionListener {
                override fun onReadyForSpeech(bundle: Bundle?) {
                    Log.i("STT","ready")
                }
                override fun onBeginningOfSpeech() {
                    Log.i("STT","begin")
                   startRecordingAudio()
                }
                override fun onRmsChanged(v: Float) {}
                override fun onBufferReceived(bytes: ByteArray?) {}
                override fun onEndOfSpeech() {}
                override fun onError(i: Int) {}
    
                override fun onResults(bundle: Bundle) {
                    val result = bundle.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION)
                    if (result != null) {
                        // result[0] will give the output of speech
                        Log.i("STT","you said: ${result[0]}")
                      stopAudioRecording()
                    }else{
                        Log.i("STT","results null boo")
                    }
                }
                override fun onPartialResults(bundle: Bundle) {}
                override fun onEvent(i: Int, bundle: Bundle?) {}
            })
            // starts listening ...
            speechRecognizer.startListening(speechRecognizerIntent)
        }

 private fun startRecordingAudio() {
    
            // below method is used to initialize
            // the media recorder clss
            mRecorder = MediaRecorder()

            // below method is used to set the audio
            // source which we are using a mic.
            mRecorder.setAudioSource(MediaRecorder.AudioSource.VOICE_RECOGNITION)
            mRecorder.setOutputFormat(MediaRecorder.OutputFormat.THREE_GPP)

            mRecorder.setAudioEncoder(MediaRecorder.AudioEncoder.AMR_NB)

            mRecorder.setOutputFile(audioFileRecorded.path)
            try {

                mRecorder.prepare()
            } catch (e: IOException) {
                Log.e("TAG", "prepare() failed")
            }
            // start method will start
            // the audio recording.
            mRecorder.start()


    }
private fun stopAudioRecording(){
        mRecorder.stop();

        // below method will release
        // the media recorder class.
        mRecorder.release()

}
4

0 回答 0