0

使用 Microsoft 的语音 API,我想让它识别一个特定的短语,然后返回给我在音频中检测到该短语的时间。

例如:

短语:“我喜欢苹果”

音频:5分钟的音频

假设在 3 分 30 秒检测到该短语,我想将该信息与该短语存在于音频中的事实一起存储。这可能吗?

4

1 回答 1

0

找到了一个使用“AudioPosition”的例子,似乎是我正在寻找的:

来源: http: //msdn.microsoft.com/en-us/library/microsoft.speech.recognition.speechrecognizedeventargs (v=office.14).aspx

using System;
using System.Collections.Generic;
using Microsoft.Speech.Recognition;

namespace SampleRecognition
{
  class Program
  {
    static void Main(string[] args)

    // Initialize a SpeechRecognitionEngine object.
    {
      using (SpeechRecognitionEngine recognizer = 
        new SpeechRecognitionEngine(new System.Globalization.CultureInfo("en-US")))
      {

        // Create SemanticResultValue objects that contain cities and airport codes.
        SemanticResultValue chicago = new SemanticResultValue("Chicago", "ORD");
        SemanticResultValue boston = new SemanticResultValue("Boston", "BOS");
        SemanticResultValue miami = new SemanticResultValue("Miami", "MIA");
        SemanticResultValue dallas = new SemanticResultValue("Dallas", "DFW");

        // Create a Choices object and add the SemanticResultValue objects.
        Choices cities = new Choices();
        cities.Add(new Choices(new GrammarBuilder[] { chicago, boston, miami, dallas }));

        // Build the phrase and add SemanticResultKeys.
        GrammarBuilder chooseCities = new GrammarBuilder();
        chooseCities.Append("I want to fly from");
        chooseCities.Append(new SemanticResultKey("origin", cities));
        chooseCities.Append("to");
        chooseCities.Append(new SemanticResultKey("destination", cities));

        // Build a Grammar object from the GrammarBuilder.
        Grammar bookFlight = new Grammar(chooseCities);
        bookFlight.Name = "Book Flight";

        // Add a handler for the SpeechRecognized event.
        recognizer.SpeechRecognized +=
          new EventHandler<SpeechRecognizedEventArgs>(recognizer_SpeechRecognized);

        // Load the grammar object to the recognizer.
        recognizer.LoadGrammarAsync(bookFlight);

        // Set the input to the recognizer.
        recognizer.SetInputToDefaultAudioDevice();

        // Start recognition.
        recognizer.RecognizeAsync();
        Console.WriteLine("Starting asynchronous recognition...");

        // Keep the console window open.
        Console.ReadLine();
      }
    }

    // Handle the SpeechRecognized event.
    static void recognizer_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
    {
      Console.WriteLine("Recognition result summary:");
      Console.WriteLine(
        "  Recognized phrase: {0}\n" +
        "  Confidence score {1}\n" +
        "  Grammar used: {2}\n",
        e.Result.Text, e.Result.Confidence, e.Result.Grammar.Name);

      // Display the semantic values in the recognition result.
      Console.WriteLine("  Semantic results:");
      foreach (KeyValuePair<String, SemanticValue> child in e.Result.Semantics)
      {
        Console.WriteLine("    The {0} city is {1}",
          child.Key, child.Value.Value ?? "null");
      }
      Console.WriteLine();

      // Display information about the words in the recognition result.
      Console.WriteLine("  Word summary: ");
      foreach (RecognizedWordUnit word in e.Result.Words)
      {
        Console.WriteLine(
          "    Lexical form ({1})" +
          " Pronunciation ({0})" +
          " Display form ({2})",
          word.Pronunciation, word.LexicalForm, word.DisplayAttributes);
      }

      // Display information about the audio in the recognition result.
      Console.WriteLine("  Input audio summary:\n" +
            "    Candidate Phrase at:       {0} mSec\n" +
            "    Phrase Length:             {1} mSec\n" +
            "    Input State Time:          {2}\n" +
            "    Input Format:              {3}\n",
            e.Result.Audio.AudioPosition,
            e.Result.Audio.Duration,
            e.Result.Audio.StartTime,
            e.Result.Audio.Format.EncodingFormat);

      // Display information about the alternate recognitions in the recognition result.
      Console.WriteLine("  Alternate phrase collection:");
      foreach (RecognizedPhrase phrase in e.Result.Alternates)
      {
        Console.WriteLine("    Phrase: " + phrase.Text);
        Console.WriteLine("    Confidence score: " + phrase.Confidence);
      }

      // Display information about text that was replaced during normalization.
      if (e.Result.ReplacementWordUnits.Count != 0)
      {
        Console.WriteLine("  Replacement text:\n");
        foreach (ReplacementText rep in e.Result.ReplacementWordUnits)
        {
          Console.WriteLine("      At index {0} for {1} words. Text: {2}\n",
          rep.FirstWordIndex, rep.CountOfWords, rep.Text);
        }
        //label.Text += String.Format("\n\n");

      }
      else
      {
        Console.WriteLine(); 
        Console.WriteLine("No text was replaced");
      }
    }
  }
}
于 2013-05-13T21:07:33.190 回答