6

我的要求与这个问题类似,因为这个问题现在已经 3 年了它发短信。

对于语音识别,我决定使用 sphinx4,我正在尝试增强 sphinx 提供的转录器演示。它很好,但这仅适用于特定的语法(用 .gram 和 .gxml 文件编写)。

编辑 为了能够与英语一起使用?我正在尝试使用 VoxForge_en_0.4 对其进行配置。我的 config.XML 文件如下所示:-

<?xml version="1.0" encoding="UTF-8"?>

<!--
   Sphinx-4 Configuration file
-->

<!-- ******************************************************** -->
<!--  biship  configuration file                              -->
<!-- ******************************************************** -->

<config>        

    <!-- ******************************************************** -->
    <!-- frequently tuned properties                              -->
    <!-- ******************************************************** --> 
    <property name="absoluteBeamWidth"  value="500"/>
    <property name="relativeBeamWidth"  value="1E-80"/>
    <property name="absoluteWordBeamWidth" value="20"/>
    <property name="relativeWordBeamWidth" value="1E-60"/>
    <property name="wordInsertionProbability" value="1E-16"/>
    <property name="languageWeight" value="7.0"/>
    <property name="silenceInsertionProbability" value=".1"/>
    <property name="frontend" value="epFrontEnd"/>
    <property name="recognizer" value="recognizer"/>
    <property name="showCreations" value="false"/>


    <!-- ******************************************************** -->
    <!-- word recognizer configuration                            -->
    <!-- ******************************************************** --> 

    <component name="recognizer" 
                          type="edu.cmu.sphinx.recognizer.Recognizer">
        <property name="decoder" value="decoder"/>
        <propertylist name="monitors">
            <item>accuracyTracker </item>
            <item>speedTracker </item>
            <item>memoryTracker </item>
            <item>recognizerMonitor </item>
        </propertylist>
    </component>

    <!-- ******************************************************** -->
    <!-- The Decoder   configuration                              -->
    <!-- ******************************************************** --> 

    <component name="decoder" type="edu.cmu.sphinx.decoder.Decoder">
        <property name="searchManager" value="wordPruningSearchManager"/>
        <property name="featureBlockSize" value="50"/>
    </component>

    <!-- ******************************************************** -->
    <!-- The Search Manager                                       -->
    <!-- ******************************************************** --> 

    <component name="wordPruningSearchManager" 
    type="edu.cmu.sphinx.decoder.search.WordPruningBreadthFirstSearchManager">
        <property name="logMath" value="logMath"/>
        <property name="linguist" value="lexTreeLinguist"/>
        <property name="pruner" value="trivialPruner"/>
        <property name="scorer" value="threadedScorer"/>
        <property name="activeListManager" value="activeListManager"/>
        <property name="growSkipInterval" value="0"/>
        <property name="checkStateOrder" value="false"/>
        <property name="buildWordLattice" value="true"/>
        <property name="acousticLookaheadFrames" value="1.7"/>
        <property name="relativeBeamWidth" value="${relativeBeamWidth}"/>
    </component>


    <!-- ******************************************************** -->
    <!-- The Active Lists                                         -->
    <!-- ******************************************************** --> 

    <component name="activeListManager" 
             type="edu.cmu.sphinx.decoder.search.SimpleActiveListManager">
        <propertylist name="activeListFactories">
            <item>standardActiveListFactory</item>
            <item>wordActiveListFactory</item>
            <item>wordActiveListFactory</item>
            <item>standardActiveListFactory</item>
            <item>standardActiveListFactory</item>
            <item>standardActiveListFactory</item>
        </propertylist>
    </component>

    <component name="standardActiveListFactory" 
             type="edu.cmu.sphinx.decoder.search.PartitionActiveListFactory">
        <property name="logMath" value="logMath"/>
        <property name="absoluteBeamWidth" value="${absoluteBeamWidth}"/>
        <property name="relativeBeamWidth" value="${relativeBeamWidth}"/>
    </component>

    <component name="wordActiveListFactory" 
             type="edu.cmu.sphinx.decoder.search.PartitionActiveListFactory">
        <property name="logMath" value="logMath"/>
        <property name="absoluteBeamWidth" value="${absoluteWordBeamWidth}"/>
        <property name="relativeBeamWidth" value="${relativeWordBeamWidth}"/>
    </component>

    <!-- ******************************************************** -->
    <!-- The Pruner                                               -->
    <!-- ******************************************************** --> 
    <component name="trivialPruner" 
                type="edu.cmu.sphinx.decoder.pruner.SimplePruner"/>

    <!-- ******************************************************** -->
    <!-- TheScorer                                                -->
    <!-- ******************************************************** --> 
    <component name="threadedScorer" 
                type="edu.cmu.sphinx.decoder.scorer.ThreadedAcousticScorer">
        <property name="frontend" value="${frontend}"/>
    </component>

    <!-- ******************************************************** -->
    <!-- The linguist  configuration                              -->
    <!-- ******************************************************** -->

    <component name="lexTreeLinguist" 
                type="edu.cmu.sphinx.linguist.lextree.LexTreeLinguist">
        <property name="logMath" value="logMath"/>
        <property name="acousticModel" value="wsj"/>
        <property name="languageModel" value="trigramModel"/>
        <property name="dictionary" value="dictionary"/>
        <property name="addFillerWords" value="false"/>
        <property name="fillerInsertionProbability" value="1E-10"/>
        <property name="generateUnitStates" value="false"/>
        <property name="wantUnigramSmear" value="true"/>
        <property name="unigramSmearWeight" value="1"/>
        <property name="wordInsertionProbability" 
                value="${wordInsertionProbability}"/>
        <property name="silenceInsertionProbability" 
                value="${silenceInsertionProbability}"/>
        <property name="languageWeight" value="${languageWeight}"/>
        <property name="unitManager" value="unitManager"/>
    </component>    


    <!-- ******************************************************** -->
    <!-- The Dictionary configuration                            -->
    <!-- ******************************************************** -->
    <component name="dictionary" 
        type="edu.cmu.sphinx.linguist.dictionary.FastDictionary">
        <property name="dictionaryPath"
                  value="file:src/voxforge-en-0.4/etc/cmudict.0.7a"/>
        <property name="fillerPath" 
              value="file:src/voxforge-en-0.4/model_parameters/voxforge_en_sphinx.cd_cont_5000/noisedict"/>
        <property name="addSilEndingPronunciation" value="false"/>
        <property name="wordReplacement" value="&lt;sil&gt;"/>
        <property name="unitManager" value="unitManager"/>
    </component>


    <!-- ******************************************************** -->
    <!-- The Language Model configuration                         -->
    <!-- ******************************************************** -->
    <component name="trigramModel" 
          type="edu.cmu.sphinx.linguist.language.ngram.large.LargeTrigramModel">
        <property name="unigramWeight" value=".5"/>
        <property name="maxDepth" value="3"/>
        <property name="logMath" value="logMath"/>
        <property name="dictionary" value="dictionary"/>
        <property name="location" value="file:src/voxforge-en-0.4/wsj5k.DMP"/>
<!--      <property name="location"   value="file:src/voxforge-Language/language_model.arpaformat.DMP"/>-->
    </component>


    <!-- ******************************************************** -->
    <!-- The acoustic model configuration                         -->
    <!-- ******************************************************** -->
    <component name="wsj"
               type="edu.cmu.sphinx.linguist.acoustic.tiedstate.TiedStateAcousticModel">
        <property name="loader" value="wsjLoader"/>
        <property name="unitManager" value="unitManager"/>
    </component>

    <component name="wsjLoader" type="edu.cmu.sphinx.linguist.acoustic.tiedstate.Sphinx3Loader">
        <property name="logMath" value="logMath"/>
        <property name="unitManager" value="unitManager"/>
<!--        <property name="location" value="file:src/hub4opensrc.cd_continuous_8gau"/>-->
        <property name="location" value="file:src/voxforge-en-0.4/model_parameters/voxforge_en_sphinx.cd_cont_5000" />
        <property name="dataLocation" value=""/>
    </component>

    <!-- ******************************************************** -->
    <!-- The unit manager configuration                           -->
    <!-- ******************************************************** -->

    <component name="unitManager" 
        type="edu.cmu.sphinx.linguist.acoustic.UnitManager"/>


    <!-- ******************************************************** -->
    <!-- The frontend configuration                               -->
    <!-- ******************************************************** -->

    <component name="epFrontEnd" type="edu.cmu.sphinx.frontend.FrontEnd">
        <propertylist name="pipeline">
            <item>audioFileDataSource </item>
            <item>dataBlocker </item>
            <item>speechClassifier </item>
            <item>speechMarker </item>
            <item>nonSpeechDataFilter </item>
            <item>preemphasizer </item>
            <item>windower </item>
            <item>fft </item>
            <item>melFilterBank </item>
            <item>dct </item>
            <item>liveCMN </item>
            <item>featureExtraction </item>
        </propertylist>
    </component>

    <component name="audioFileDataSource" type="edu.cmu.sphinx.frontend.util.AudioFileDataSource"/>


    <component name="microphone" 
                type="edu.cmu.sphinx.frontend.util.Microphone">
        <property name="closeBetweenUtterances" value="false"/>
    </component>

    <component name="dataBlocker" type="edu.cmu.sphinx.frontend.DataBlocker"/>

    <component name="speechClassifier"
                type="edu.cmu.sphinx.frontend.endpoint.SpeechClassifier">
        <property name="threshold" value="13"/>
    </component>

    <component name="nonSpeechDataFilter" 
                type="edu.cmu.sphinx.frontend.endpoint.NonSpeechDataFilter"/>

    <component name="speechMarker" 
                type="edu.cmu.sphinx.frontend.endpoint.SpeechMarker">
        <property name="speechTrailer" value="50"/>
    </component>

    <component name="preemphasizer"
        type="edu.cmu.sphinx.frontend.filter.Preemphasizer"/>

    <component name="windower" 
    type="edu.cmu.sphinx.frontend.window.RaisedCosineWindower"/>

    <component name="fft" 
        type="edu.cmu.sphinx.frontend.transform.DiscreteFourierTransform"/>

    <component name="melFilterBank" 
        type="edu.cmu.sphinx.frontend.frequencywarp.MelFrequencyFilterBank"/>

    <component name="dct" 
            type="edu.cmu.sphinx.frontend.transform.DiscreteCosineTransform"/>

    <component name="liveCMN" 
                type="edu.cmu.sphinx.frontend.feature.LiveCMN"/>

    <component name="featureExtraction" 
        type="edu.cmu.sphinx.frontend.feature.DeltasFeatureExtractor"/>

    <!-- ******************************************************* -->
    <!--  monitors                                               -->
    <!-- ******************************************************* -->

    <component name="accuracyTracker" 
                type="edu.cmu.sphinx.instrumentation.BestPathAccuracyTracker">
        <property name="recognizer" value="${recognizer}"/>
        <property name="showRawResults" value="false"/>
        <property name="showAlignedResults" value="false"/>
    </component>

    <component name="memoryTracker" 
                type="edu.cmu.sphinx.instrumentation.MemoryTracker">
        <property name="recognizer" value="${recognizer}"/>
        <property name="showDetails" value="false"/>
        <property name="showSummary" value="false"/>
    </component>

    <component name="speedTracker" 
                type="edu.cmu.sphinx.instrumentation.SpeedTracker">
        <property name="recognizer" value="${recognizer}"/>
        <property name="frontend" value="${frontend}"/>
        <property name="showDetails" value="false"/>
    </component>

    <component name="recognizerMonitor" 
                type="edu.cmu.sphinx.instrumentation.RecognizerMonitor">
        <property name="recognizer" value="${recognizer}"/>
        <propertylist name="allocatedMonitors">
            <item>configMonitor </item>
        </propertylist>
    </component>

    <component name="configMonitor" 
                type="edu.cmu.sphinx.instrumentation.ConfigMonitor">
        <property name="showConfig" value="false"/>
    </component>


    <!-- ******************************************************* -->
    <!--  Miscellaneous components                               -->
    <!-- ******************************************************* -->

    <component name="logMath" type="edu.cmu.sphinx.util.LogMath">
        <property name="logBase" value="1.0001"/>
        <property name="useAddTable" value="true"/>
    </component>
</config>

他们对我的配置有什么问题吗?请帮忙....

4

1 回答 1

2

对于语音识别,我决定使用 sphinx4,我正在尝试运行 sphinx 提供的演示 Transcriber.jar。当我提供其他文件作为输入时,这不起作用。

如果您更好地描述您的问题,您可以在这个问题上获得更明确的帮助

我该如何进行?

阅读教程

http://cmusphinx.sourceforge.net/wiki/tutorial

我在哪里可以获得可以与 Sphinx4 一起使用的美国英语语言模型?

您可以从 CMUSphinx 网站和其他地方下载它们。您也可以自己构建它们。可能的位置之一是

http://www.keithv.com/software/csr/

任何博客/教程都会有所帮助,

看上面

但是本教程中使用的文件具有不同的扩展名,我被困在这个阶段我无法决定应该在配置中给出哪个路径。请帮助,

请参阅 sphinx4/tests/performance/voxforge_en/voxforge.config.xml 中的示例配置。它具有所有必需的路径

于 2012-01-04T22:51:12.087 回答