所以最近对于一个工作项目,我一直在玩语音到文本模型,特别是自定义语音到文本模型。通过一些混合和匹配示例,我设法让一个测试应用程序与正常的 Bing 语音到文本 API 对话。但是当我尝试将它与自定义语音实例一起使用时,只有 HTTPS URL 有效。当我使用任何可用的长格式 Web 套接字 URL时,An unhandled exception of type 'System.NullReferenceException' occurred in SpeechClient.dll
就会发生错误。这有点问题,因为该端点仅支持 2 分钟的转录,而 websocket 端点最多支持 10 分钟。
这个https://docs.microsoft.com/en-us/azure/cognitive-services/custom-speech-service/customspeech-how-to-topics/cognitive-services-custom-speech-use-endpoint页面是什么我要走了 它说我应该在创建服务时使用 web socket url,但这会导致上面的错误。
这是我用于尝试的测试床代码:
using System;
using Microsoft.CognitiveServices.SpeechRecognition;
using System.IO;
namespace ConsoleApp1
{
class Program
{
DataRecognitionClient dataClient;
static void Main(string[] args)
{
Program p = new Program();
p.Run(args);
}
void Run(string[] args)
{
try
{
// Works
//this.dataClient = SpeechRecognitionServiceFactory.CreateDataClient(SpeechRecognitionMode.LongDictation, "en-US", "Key");
// Works
//this.dataClient = SpeechRecognitionServiceFactory.CreateDataClient(SpeechRecognitionMode.LongDictation, "en-US",
// "Key", "Key",
// "https://Id.api.cris.ai/ws/cris/speech/recognize/continuous");
// Doesn't work
this.dataClient = SpeechRecognitionServiceFactory.CreateDataClient(SpeechRecognitionMode.LongDictation, "en-US",
"Key", "Key",
"wss://Id.api.cris.ai/ws/cris/speech/recognize/continuous");
this.dataClient.AuthenticationUri = "https://westus.api.cognitive.microsoft.com/sts/v1.0/issueToken";
this.dataClient.OnResponseReceived += this.ResponseHandler;
this.dataClient.OnConversationError += this.ErrorHandler;
this.dataClient.OnPartialResponseReceived += this.PartialHandler;
Console.WriteLine("Starting Transcription");
this.SendAudioHelper("Audio file path");
(new System.Threading.ManualResetEvent(false)).WaitOne();
} catch(Exception e)
{
Console.WriteLine(e);
}
}
private void SendAudioHelper(string wavFileName)
{
using (FileStream fileStream = new FileStream(wavFileName, FileMode.Open, FileAccess.Read))
{
// Note for wave files, we can just send data from the file right to the server.
// In the case you are not an audio file in wave format, and instead you have just
// raw data (for example audio coming over bluetooth), then before sending up any
// audio data, you must first send up an SpeechAudioFormat descriptor to describe
// the layout and format of your raw audio data via DataRecognitionClient's sendAudioFormat() method.
int bytesRead = 0;
byte[] buffer = new byte[1024];
try
{
do
{
// Get more Audio data to send into byte buffer.
bytesRead = fileStream.Read(buffer, 0, buffer.Length);
// Send of audio data to service.
this.dataClient.SendAudio(buffer, bytesRead);
}
while (bytesRead > 0);
}
finally
{
// We are done sending audio. Final recognition results will arrive in OnResponseReceived event call.
this.dataClient.EndAudio();
}
}
}
void ErrorHandler(object sender, SpeechErrorEventArgs e)
{
Console.WriteLine(e.SpeechErrorText);
}
void ResponseHandler(object sender, SpeechResponseEventArgs e)
{
if(e.PhraseResponse.RecognitionStatus == RecognitionStatus.EndOfDictation || e.PhraseResponse.RecognitionStatus == RecognitionStatus.DictationEndSilenceTimeout)
{
Console.WriteLine("Trnascription Over");
Console.ReadKey();
Environment.Exit(0);
}
for(int i = 0; i < e.PhraseResponse.Results.Length; i++)
{
Console.Write(e.PhraseResponse.Results[i].DisplayText);
}
Console.WriteLine();
}
void PartialHandler(object sender, PartialSpeechResponseEventArgs e)
{
}
}
}
提前感谢您的帮助。