Microsoft Speech 系统有一个很好的示例代码,但是在添加环回以记录它正在播放的内容而不是通过麦克风播放的内容时我遇到了问题。例如,在未在扬声器上播放视频时给出视频的文本描述。似乎这是执行此操作的库,但我收到类型错误,将其推送到识别器的音频流:
using System;
using System.Speech.Recognition;
using NAudio.Wave;
using NAudio.CoreAudioApi.Interfaces;
using NAudio.CoreAudioApi;
using System.IO;
using System.Speech.AudioFormat;
namespace SpeechRecognitionApp
{
class Program
{
static void Main(string[] args)
{
// Create an in-process speech recognizer for the en-US locale.
using (
SpeechRecognitionEngine recognizer =
new SpeechRecognitionEngine(
new System.Globalization.CultureInfo("en-US")))
{
// Create and load a dictation grammar.
recognizer.LoadGrammar(new DictationGrammar());
// Add a handler for the speech recognized event.
recognizer.SpeechRecognized +=
new EventHandler<SpeechRecognizedEventArgs>(recognizer_SpeechRecognized);
// Configure input to the speech recognizer.
//recognizer.SetInputToDefaultAudioDevice();
WasapiLoopbackCapture capture = new WasapiLoopbackCapture();
Stream captureStream = new System.IO.MemoryStream();
capture.DataAvailable += (s, a) =>
{
captureStream.Write(a.Buffer, 0, a.BytesRecorded);
captureStream.Flush();
};
capture.StartRecording();
Console.WriteLine(capture.WaveFormat.AverageBytesPerSecond);
Console.WriteLine(capture.WaveFormat.BitsPerSample);
recognizer.SetInputToAudioStream(captureStream, new SpeechAudioFormatInfo(
capture.WaveFormat.AverageBytesPerSecond, AudioBitsPerSample.Sixteen, AudioChannel.Stereo));
// Start asynchronous, continuous speech recognition.
recognizer.RecognizeAsync(RecognizeMode.Multiple);
// Keep the console window open.
while (true)
{
Console.ReadLine();
}
}
}
// Handle the SpeechRecognized event.
static void recognizer_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
{
Console.WriteLine("Recognized text: " + e.Result.Text);
}
}
}
更新正如您在修改后的代码中看到的,至少现在正在编译,但不能识别任何内部或外部语音。实际上它输出:
384000
32
因此,由于 AudioBitsPerSample 上没有“三十二”,也许我什至不能使用 NAudio 类来获取系统音频?
更新这似乎有点工作,基于另一个答案,但它并没有太多,我认为它可能会发送缓慢或快速的音频?
using System;
using System.Speech.Recognition;
using NAudio.Wave;
using NAudio.CoreAudioApi.Interfaces;
using NAudio.CoreAudioApi;
using System.IO;
using System.Speech.AudioFormat;
namespace SpeechRecognitionApp
{
class FakeStreamer : Stream
{
public bool bExit = false;
Stream stream;
Stream client;
public FakeStreamer(Stream client)
{
this.client = client;
this.stream = client;
}
public override bool CanRead
{
get { return stream.CanRead; }
}
public override bool CanSeek
{
get { return false; }
}
public override bool CanWrite
{
get { return stream.CanWrite; }
}
public override long Length
{
get { return -1L; }
}
public override long Position
{
get { return 0L; }
set { }
}
public override long Seek(long offset, SeekOrigin origin)
{
return 0L;
}
public override void SetLength(long value)
{
stream.SetLength(value);
}
public override int Read(byte[] buffer, int offset, int count)
{
int len = 0, c = count;
while (c > 0 && !bExit)
{
try
{
len = stream.Read(buffer, offset, c);
}
catch (Exception e)
{
Console.WriteLine("ouch");
}
/*if (!client.Connected || len == 0)
{
//Exit read loop
return 0;
}*/
offset += len;
c -= len;
}
return count;
}
public override void Write(byte[] buffer, int offset, int count)
{
stream.Write(buffer, offset, count);
}
public override void Close()
{
stream.Close();
base.Close();
}
public override void Flush()
{
stream.Flush();
}
}
class Program
{
static void Main(string[] args)
{
// Create an in-process speech recognizer for the en-US locale.
using (
SpeechRecognitionEngine recognizer =
new SpeechRecognitionEngine(
new System.Globalization.CultureInfo("en-US")))
{
// Create and load a dictation grammar.
recognizer.LoadGrammar(new DictationGrammar());
// Add a handler for the speech recognized event.
recognizer.SpeechRecognized +=
new EventHandler<SpeechRecognizedEventArgs>(recognizer_SpeechRecognized);
// Configure input to the speech recognizer.
//recognizer.SetInputToDefaultAudioDevice();
WasapiLoopbackCapture capture = new WasapiLoopbackCapture();
Stream captureStream = new System.IO.MemoryStream();
Stream buffStream = new FakeStreamer(captureStream);
capture.DataAvailable += (s, a) =>
{
captureStream.Write(a.Buffer, 0, a.BytesRecorded);
};
capture.StartRecording();
Console.WriteLine(capture.WaveFormat.AverageBytesPerSecond);
Console.WriteLine(capture.WaveFormat.BitsPerSample);
//recognizer.SetInputToDefaultAudioDevice();
recognizer.SetInputToAudioStream(buffStream, new SpeechAudioFormatInfo(
capture.WaveFormat.AverageBytesPerSecond/4, AudioBitsPerSample.Eight, AudioChannel.Stereo));
// Start asynchronous, continuous speech recognition.
recognizer.RecognizeAsync(RecognizeMode.Multiple);
// Keep the console window open.
while (true)
{
Console.ReadLine();
}
}
}
// Handle the SpeechRecognized event.
static void recognizer_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
{
Console.WriteLine("Recognized text: " + e.Result.Text);
}
}
}
更新 3 - 尝试将音频流重新编码为语音识别将执行的操作:不幸的是,它无法捕获重新编码的音频,如您所见......
using System;
using System.Speech.Recognition;
using NAudio.Wave;
using NAudio.CoreAudioApi.Interfaces;
using NAudio.CoreAudioApi;
using System.IO;
using System.Speech.AudioFormat;
namespace SpeechRecognitionApp
{
class FakeStreamer : Stream
{
public bool bExit = false;
Stream stream;
Stream client;
public FakeStreamer(Stream client)
{
this.client = client;
this.stream = client;
}
public override bool CanRead
{
get { return stream.CanRead; }
}
public override bool CanSeek
{
get { return false; }
}
public override bool CanWrite
{
get { return stream.CanWrite; }
}
public override long Length
{
get { return -1L; }
}
public override long Position
{
get { return 0L; }
set { }
}
public override long Seek(long offset, SeekOrigin origin)
{
return 0L;
}
public override void SetLength(long value)
{
stream.SetLength(value);
}
public override int Read(byte[] buffer, int offset, int count)
{
int len = 0, c = count;
while (c > 0 && !bExit)
{
try
{
len = stream.Read(buffer, offset, c);
}
catch (Exception e)
{
Console.WriteLine("ouch");
}
/*if (!client.Connected || len == 0)
{
//Exit read loop
return 0;
}*/
offset += len;
c -= len;
}
return count;
}
public override void Write(byte[] buffer, int offset, int count)
{
stream.Write(buffer, offset, count);
}
public override void Close()
{
stream.Close();
base.Close();
}
public override void Flush()
{
stream.Flush();
}
}
class Program
{
static void Main(string[] args)
{
// Create an in-process speech recognizer for the en-US locale.
using (
SpeechRecognitionEngine recognizer =
new SpeechRecognitionEngine(
new System.Globalization.CultureInfo("en-US")))
{
// Create and load a dictation grammar.
recognizer.LoadGrammar(new DictationGrammar());
// Add a handler for the speech recognized event.
recognizer.SpeechRecognized +=
new EventHandler<SpeechRecognizedEventArgs>(recognizer_SpeechRecognized);
// Configure input to the speech recognizer.
//recognizer.SetInputToDefaultAudioDevice();
WasapiLoopbackCapture capture = new WasapiLoopbackCapture();
Stream captureStream = new System.IO.MemoryStream();
//Stream buffStream = new FakeStreamer(captureStream);
capture.DataAvailable += (s, a) =>
{
captureStream.Write(a.Buffer, 0, a.BytesRecorded);
};
Console.WriteLine(capture.WaveFormat.AverageBytesPerSecond);
Console.WriteLine(capture.WaveFormat.BitsPerSample);
var newFormat = new WaveFormat(8000, 16, 1);
//using (var conversionStream = new WaveFormatConversionStream(newFormat, capture)
var resampler = new MediaFoundationResampler(new NAudio.Wave.RawSourceWaveStream(captureStream,capture.WaveFormat), newFormat);
Stream captureConvertStream = new System.IO.MemoryStream();
resampler.ResamplerQuality = 60;
//WaveFileWriter.WriteWavFileToStream(captureConvertStream, resampler);
//recognizer.SetInputToDefaultAudioDevice();
Stream buffStream = new FakeStreamer(captureConvertStream);
recognizer.SetInputToAudioStream(buffStream, new SpeechAudioFormatInfo(
8000, AudioBitsPerSample.Sixteen, AudioChannel.Mono));
// Start asynchronous, continuous speech recognition.
recognizer.RecognizeAsync(RecognizeMode.Multiple);
capture.StartRecording();
var arr = new byte[128];
while (resampler.Read(arr, 0, arr.Length) > 0)
{
captureConvertStream.Write(arr, 0, arr.Length);
Console.WriteLine("Never getting here");
}
// Keep the console window open.
while (true)
{
Console.ReadLine();
}
}
}
// Handle the SpeechRecognized event.
static void recognizer_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
{
Console.WriteLine("Recognized text: " + e.Result.Text);
}
}
}