使用 Microsoft 的语音 API,我想让它识别一个特定的短语,然后返回给我在音频中检测到该短语的时间。
例如:
短语:“我喜欢苹果”
音频:5分钟的音频
假设在 3 分 30 秒检测到该短语,我想将该信息与该短语存在于音频中的事实一起存储。这可能吗?
使用 Microsoft 的语音 API,我想让它识别一个特定的短语,然后返回给我在音频中检测到该短语的时间。
例如:
短语:“我喜欢苹果”
音频:5分钟的音频
假设在 3 分 30 秒检测到该短语,我想将该信息与该短语存在于音频中的事实一起存储。这可能吗?
找到了一个使用“AudioPosition”的例子,似乎是我正在寻找的:
using System;
using System.Collections.Generic;
using Microsoft.Speech.Recognition;
namespace SampleRecognition
{
class Program
{
static void Main(string[] args)
// Initialize a SpeechRecognitionEngine object.
{
using (SpeechRecognitionEngine recognizer =
new SpeechRecognitionEngine(new System.Globalization.CultureInfo("en-US")))
{
// Create SemanticResultValue objects that contain cities and airport codes.
SemanticResultValue chicago = new SemanticResultValue("Chicago", "ORD");
SemanticResultValue boston = new SemanticResultValue("Boston", "BOS");
SemanticResultValue miami = new SemanticResultValue("Miami", "MIA");
SemanticResultValue dallas = new SemanticResultValue("Dallas", "DFW");
// Create a Choices object and add the SemanticResultValue objects.
Choices cities = new Choices();
cities.Add(new Choices(new GrammarBuilder[] { chicago, boston, miami, dallas }));
// Build the phrase and add SemanticResultKeys.
GrammarBuilder chooseCities = new GrammarBuilder();
chooseCities.Append("I want to fly from");
chooseCities.Append(new SemanticResultKey("origin", cities));
chooseCities.Append("to");
chooseCities.Append(new SemanticResultKey("destination", cities));
// Build a Grammar object from the GrammarBuilder.
Grammar bookFlight = new Grammar(chooseCities);
bookFlight.Name = "Book Flight";
// Add a handler for the SpeechRecognized event.
recognizer.SpeechRecognized +=
new EventHandler<SpeechRecognizedEventArgs>(recognizer_SpeechRecognized);
// Load the grammar object to the recognizer.
recognizer.LoadGrammarAsync(bookFlight);
// Set the input to the recognizer.
recognizer.SetInputToDefaultAudioDevice();
// Start recognition.
recognizer.RecognizeAsync();
Console.WriteLine("Starting asynchronous recognition...");
// Keep the console window open.
Console.ReadLine();
}
}
// Handle the SpeechRecognized event.
static void recognizer_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
{
Console.WriteLine("Recognition result summary:");
Console.WriteLine(
" Recognized phrase: {0}\n" +
" Confidence score {1}\n" +
" Grammar used: {2}\n",
e.Result.Text, e.Result.Confidence, e.Result.Grammar.Name);
// Display the semantic values in the recognition result.
Console.WriteLine(" Semantic results:");
foreach (KeyValuePair<String, SemanticValue> child in e.Result.Semantics)
{
Console.WriteLine(" The {0} city is {1}",
child.Key, child.Value.Value ?? "null");
}
Console.WriteLine();
// Display information about the words in the recognition result.
Console.WriteLine(" Word summary: ");
foreach (RecognizedWordUnit word in e.Result.Words)
{
Console.WriteLine(
" Lexical form ({1})" +
" Pronunciation ({0})" +
" Display form ({2})",
word.Pronunciation, word.LexicalForm, word.DisplayAttributes);
}
// Display information about the audio in the recognition result.
Console.WriteLine(" Input audio summary:\n" +
" Candidate Phrase at: {0} mSec\n" +
" Phrase Length: {1} mSec\n" +
" Input State Time: {2}\n" +
" Input Format: {3}\n",
e.Result.Audio.AudioPosition,
e.Result.Audio.Duration,
e.Result.Audio.StartTime,
e.Result.Audio.Format.EncodingFormat);
// Display information about the alternate recognitions in the recognition result.
Console.WriteLine(" Alternate phrase collection:");
foreach (RecognizedPhrase phrase in e.Result.Alternates)
{
Console.WriteLine(" Phrase: " + phrase.Text);
Console.WriteLine(" Confidence score: " + phrase.Confidence);
}
// Display information about text that was replaced during normalization.
if (e.Result.ReplacementWordUnits.Count != 0)
{
Console.WriteLine(" Replacement text:\n");
foreach (ReplacementText rep in e.Result.ReplacementWordUnits)
{
Console.WriteLine(" At index {0} for {1} words. Text: {2}\n",
rep.FirstWordIndex, rep.CountOfWords, rep.Text);
}
//label.Text += String.Format("\n\n");
}
else
{
Console.WriteLine();
Console.WriteLine("No text was replaced");
}
}
}
}