javascript - 具有较长文本的 Chrome 语音合成

Question

尝试在 Chrome 33 中使用语音合成 API 时遇到问题。它适用于较短的文本，但如果我尝试较长的文本，它只会停在中间。在它像这样停止一次之后，语音合成在 Chrome 中的任何地方都无法工作，直到重新启动浏览器。

function speak(text) {
    var msg = new SpeechSynthesisUtterance();
    var voices = speechSynthesis.getVoices();
    msg.voice = voices[10];
    msg.voiceURI = 'native';
    msg.volume = 1;
    msg.rate = 1;
    msg.pitch = 2;
    msg.text = text;
    msg.lang = 'en-US';

    speechSynthesis.speak(msg);
}

speak('Short text');
speak('Collaboratively administrate empowered markets via plug-and-play networks. Dynamically procrastinate B2C users after installed base benefits. Dramatically visualize customer directed convergence without revolutionary ROI. Efficiently unleash cross-media information without cross-media value. Quickly maximize timely deliverables for real-time schemas. Dramatically maintain clicks-and-mortar solutions without functional solutions.');
speak('Another short text');

它在第二个文本中间停止说话，之后我无法让任何其他页面说话。

是浏览器错误还是某种安全限制？

score 64 · Accepted Answer

我在使用 Google Chrome 语音合成时遇到这个问题已经有一段时间了。经过一番调查，我发现了以下内容：

只有当声音不是母语时才会发生断句，
剪切通常发生在200-300 个字符之间，
当它确实破裂时，您可以通过这样做来解冻它speechSynthesis.cancel();
' onend ' 事件有时决定不触发。一个古怪的解决方法是在说出它之前 console.log() 输出话语对象。此外，我发现将 speak 调用包装在 setTimeout 回调中有助于解决这些问题。

针对这些问题，我编写了一个克服字符限制的函数，将文本分成更小的话语，然后一个接一个地播放。显然，有时您会听到一些奇怪的声音，因为句子可能会被分成两个单独的话语，每个话语之间有一个小的时间延迟，但是代码会尝试在标点符号处分割这些点，以使声音的中断不那么明显。

更新

我已经在https://gist.github.com/woollsta/2d146f13878a301b36d7#file-chunkify-js公开了这个解决方法。非常感谢Brett Zamir的贡献。

功能：

var speechUtteranceChunker = function (utt, settings, callback) {
    settings = settings || {};
    var newUtt;
    var txt = (settings && settings.offset !== undefined ? utt.text.substring(settings.offset) : utt.text);
    if (utt.voice && utt.voice.voiceURI === 'native') { // Not part of the spec
        newUtt = utt;
        newUtt.text = txt;
        newUtt.addEventListener('end', function () {
            if (speechUtteranceChunker.cancel) {
                speechUtteranceChunker.cancel = false;
            }
            if (callback !== undefined) {
                callback();
            }
        });
    }
    else {
        var chunkLength = (settings && settings.chunkLength) || 160;
        var pattRegex = new RegExp('^[\\s\\S]{' + Math.floor(chunkLength / 2) + ',' + chunkLength + '}[.!?,]{1}|^[\\s\\S]{1,' + chunkLength + '}$|^[\\s\\S]{1,' + chunkLength + '} ');
        var chunkArr = txt.match(pattRegex);

        if (chunkArr[0] === undefined || chunkArr[0].length <= 2) {
            //call once all text has been spoken...
            if (callback !== undefined) {
                callback();
            }
            return;
        }
        var chunk = chunkArr[0];
        newUtt = new SpeechSynthesisUtterance(chunk);
        var x;
        for (x in utt) {
            if (utt.hasOwnProperty(x) && x !== 'text') {
                newUtt[x] = utt[x];
            }
        }
        newUtt.addEventListener('end', function () {
            if (speechUtteranceChunker.cancel) {
                speechUtteranceChunker.cancel = false;
                return;
            }
            settings.offset = settings.offset || 0;
            settings.offset += chunk.length - 1;
            speechUtteranceChunker(utt, settings, callback);
        });
    }

    if (settings.modifier) {
        settings.modifier(newUtt);
    }
    console.log(newUtt); //IMPORTANT!! Do not remove: Logging the object out fixes some onend firing issues.
    //placing the speak invocation inside a callback fixes ordering and onend issues.
    setTimeout(function () {
        speechSynthesis.speak(newUtt);
    }, 0);
};

如何使用它...

//create an utterance as you normally would...
var myLongText = "This is some long text, oh my goodness look how long I'm getting, wooooohooo!";

var utterance = new SpeechSynthesisUtterance(myLongText);

//modify it as you normally would
var voiceArr = speechSynthesis.getVoices();
utterance.voice = voiceArr[2];

//pass it into the chunking function to have it played out.
//you can set the max number of characters by changing the chunkLength property below.
//a callback function can also be added that will fire once the entire text has been spoken.
speechUtteranceChunker(utterance, {
    chunkLength: 120
}, function () {
    //some code to execute when done
    console.log('done');
});

希望人们觉得这很有用。

score 18 · Accepted Answer

我已经解决了这个问题，同时拥有一个调用 pause() 和 resume() 函数并再次调用定时器的定时器函数。在 onend 事件中，我清除计时器。

    var myTimeout;
    function myTimer() {
        window.speechSynthesis.pause();
        window.speechSynthesis.resume();
        myTimeout = setTimeout(myTimer, 10000);
    }
    ...
        window.speechSynthesis.cancel();
        myTimeout = setTimeout(myTimer, 10000);
        var toSpeak = "some text";
        var utt = new SpeechSynthesisUtterance(toSpeak);
        ...
        utt.onend =  function() { clearTimeout(myTimeout); }
        window.speechSynthesis.speak(utt);
    ...

这似乎运作良好。

score 11 · Accepted Answer

一个简单有效的解决方案是定期恢复。

function resumeInfinity() {
    window.speechSynthesis.resume();
    timeoutResumeInfinity = setTimeout(resumeInfinity, 1000);
}

您可以将其与 onend 和 onstart 事件相关联，因此您只会在必要时调用简历。就像是：

var utterance = new SpeechSynthesisUtterance();

utterance.onstart = function(event) {
    resumeInfinity();
};

utterance.onend = function(event) {
    clearTimeout(timeoutResumeInfinity);
};

我偶然发现了这个！

希望这有帮助！

score 8 · Accepted Answer

彼得的答案的问题是，当您设置语音合成队列时它不起作用。该脚本会将新块放在队列的末尾，因此是无序的。示例：https ://jsfiddle.net/1gzkja90/

<script type='text/javascript' src='http://code.jquery.com/jquery-2.1.0.js'></script>
<script type='text/javascript'>    
    u = new SpeechSynthesisUtterance();
    $(document).ready(function () {
        $('.t').each(function () {
            u = new SpeechSynthesisUtterance($(this).text());

            speechUtteranceChunker(u, {
                chunkLength: 120
            }, function () {
                console.log('end');
            });
        });
    });
     /**
     * Chunkify
     * Google Chrome Speech Synthesis Chunking Pattern
     * Fixes inconsistencies with speaking long texts in speechUtterance objects 
     * Licensed under the MIT License
     *
     * Peter Woolley and Brett Zamir
     */
    var speechUtteranceChunker = function (utt, settings, callback) {
        settings = settings || {};
        var newUtt;
        var txt = (settings && settings.offset !== undefined ? utt.text.substring(settings.offset) : utt.text);
        if (utt.voice && utt.voice.voiceURI === 'native') { // Not part of the spec
            newUtt = utt;
            newUtt.text = txt;
            newUtt.addEventListener('end', function () {
                if (speechUtteranceChunker.cancel) {
                    speechUtteranceChunker.cancel = false;
                }
                if (callback !== undefined) {
                    callback();
                }
            });
        }
        else {
            var chunkLength = (settings && settings.chunkLength) || 160;
            var pattRegex = new RegExp('^[\\s\\S]{' + Math.floor(chunkLength / 2) + ',' + chunkLength + '}[.!?,]{1}|^[\\s\\S]{1,' + chunkLength + '}$|^[\\s\\S]{1,' + chunkLength + '} ');
            var chunkArr = txt.match(pattRegex);

            if (chunkArr[0] === undefined || chunkArr[0].length <= 2) {
                //call once all text has been spoken...
                if (callback !== undefined) {
                    callback();
                }
                return;
            }
            var chunk = chunkArr[0];
            newUtt = new SpeechSynthesisUtterance(chunk);
            var x;
            for (x in utt) {
                if (utt.hasOwnProperty(x) && x !== 'text') {
                    newUtt[x] = utt[x];
                }
            }
            newUtt.addEventListener('end', function () {
                if (speechUtteranceChunker.cancel) {
                    speechUtteranceChunker.cancel = false;
                    return;
                }
                settings.offset = settings.offset || 0;
                settings.offset += chunk.length - 1;
                speechUtteranceChunker(utt, settings, callback);
            });
        }

        if (settings.modifier) {
            settings.modifier(newUtt);
        }
        console.log(newUtt); //IMPORTANT!! Do not remove: Logging the object out fixes some onend firing issues.
        //placing the speak invocation inside a callback fixes ordering and onend issues.
        setTimeout(function () {
            speechSynthesis.speak(newUtt);
        }, 0);
    };
</script>
<p class="t">MLA format follows the author-page method of in-text citation. This means that the author's last name and the page number(s) from which the quotation or paraphrase is taken must appear in the text, and a complete reference should appear on your Works Cited page. The author's name may appear either in the sentence itself or in parentheses following the quotation or paraphrase, but the page number(s) should always appear in the parentheses, not in the text of your sentence.</p>
<p class="t">Joe waited for the train.</p>
<p class="t">The train was late.</p>
<p class="t">Mary and Samantha took the bus.</p>

就我而言，答案是在将字符串添加到队列之前将它们“分块”。见这里：http: //jsfiddle.net/vqvyjzq4/

彼得的想法以及正则表达式（我还没有征服）的许多道具。我确信javascript可以被清理，这更像是一个概念证明。

<script type='text/javascript' src='http://code.jquery.com/jquery-2.1.0.js'></script>
<script type='text/javascript'>    
    var chunkLength = 120;
    var pattRegex = new RegExp('^[\\s\\S]{' + Math.floor(chunkLength / 2) + ',' + chunkLength + '}[.!?,]{1}|^[\\s\\S]{1,' + chunkLength + '}$|^[\\s\\S]{1,' + chunkLength + '} ');

    $(document).ready(function () {
        var element = this;
        var arr = [];
        var txt = replaceBlank($(element).text());
        while (txt.length > 0) {
            arr.push(txt.match(pattRegex)[0]);
            txt = txt.substring(arr[arr.length - 1].length);
        }
        $.each(arr, function () {
            var u = new SpeechSynthesisUtterance(this.trim());
            window.speechSynthesis.speak(u);
        });
    });
</script>
<p class="t">MLA format follows the author-page method of in-text citation. This means that the author's last name and the page number(s) from which the quotation or paraphrase is taken must appear in the text, and a complete reference should appear on your Works Cited page. The author's name may appear either in the sentence itself or in parentheses following the quotation or paraphrase, but the page number(s) should always appear in the parentheses, not in the text of your sentence.</p>
<p class="t">Joe waited for the train.</p>
<p class="t">The train was late.</p>
<p class="t">Mary and Samantha took the bus.</p>

score 7 · Accepted Answer

2017 年，这个错误仍然存在。作为屡获殊荣的 Chrome 扩展Read Aloud的开发者，我碰巧很了解这个问题。好吧，开个关于获奖部分的玩笑。

如果超过 15 秒，你的演讲就会卡住。
我发现 Chrome 使用 15 秒空闲计时器来决定何时停用扩展程序的事件/背景页面。我相信这是罪魁祸首。

我使用的解决方法是一个相当复杂的分块算法，它尊重标点符号。对于拉丁语言，我将最大块大小设置为 36 个单词。如果您愿意，代码是开源的：https ://github.com/ken107/read-aloud/blob/315f1e1d5be6b28ba47fe0c309961025521de516/js/speech.js#L212

36 个字的限制在大多数情况下效果很好，保持在 15 秒以内。但是在某些情况下它仍然会卡住。为了从中恢复，我使用了 16 秒计时器。

score 6 · Accepted Answer

这是我最终得到的结果，它只是将我的句子拆分为句点“。”

var voices = window.speechSynthesis.getVoices();

var sayit = function ()
{
    var msg = new SpeechSynthesisUtterance();

    msg.voice = voices[10]; // Note: some voices don't support altering params
    msg.voiceURI = 'native';
    msg.volume = 1; // 0 to 1
    msg.rate = 1; // 0.1 to 10
    msg.pitch = 2; //0 to 2
    msg.lang = 'en-GB';
    msg.onstart = function (event) {

        console.log("started");
    };
    msg.onend = function(event) {
        console.log('Finished in ' + event.elapsedTime + ' seconds.');
    };
    msg.onerror = function(event)
    {

        console.log('Errored ' + event);
    }
    msg.onpause = function (event)
    {
        console.log('paused ' + event);

    }
    msg.onboundary = function (event)
    {
        console.log('onboundary ' + event);
    }

    return msg;
}


var speekResponse = function (text)
{
    speechSynthesis.cancel(); // if it errors, this clears out the error.

    var sentences = text.split(".");
    for (var i=0;i< sentences.length;i++)
    {
        var toSay = sayit();
        toSay.text = sentences[i];
        speechSynthesis.speak(toSay);
    }
}

score 5 · Accepted Answer

我最终将文本分块，并在处理各种标点符号（如句号、逗号等）方面获得了一些情报。例如，如果它是数字的一部分（即 10,000 美元），你不想用逗号将文本分解.

我已经对其进行了测试，它似乎适用于任意大的输入集，而且它似乎不仅适用于桌面，而且适用于 android 手机和 iphone。

在以下位置为合成器设置一个 github 页面：https ://github.com/unk1911/speech

您可以在以下位置看到它：http: //edeliverables.com/tts/

score 1 · Accepted Answer

new Vue({
  el: "#app",
  data: {
    text: `Collaboratively administrate empowered markets via plug-and-play networks. Dynamically procrastinate B2C users after installed base benefits. Dramatically visualize customer directed convergence without revolutionary ROI. Efficiently unleash cross-media information without cross-media value. Quickly maximize timely deliverables for real-time schemas. Dramatically maintain clicks-and-mortar solutions without functional solutions.`
  },

  methods:{
    stop_reading() {
      const synth = window.speechSynthesis;
      synth.cancel();
    },

    talk() {
      const synth = window.speechSynthesis;
      const textInput = this.text;

      const utterThis = new SpeechSynthesisUtterance(textInput);
      utterThis.pitch = 0;
      utterThis.rate = 1;
      synth.speak(utterThis);

      const resumeInfinity = () => {
        window.speechSynthesis.resume();
        const timeoutResumeInfinity = setTimeout(resumeInfinity, 1000);
      }
      
      utterThis.onstart = () => {
        resumeInfinity();
      };
    }
  }
})

<script src="https://cdnjs.cloudflare.com/ajax/libs/vue/2.5.17/vue.js"></script>
<div id="app">
  <button @click="talk">Speak</button>
  <button @click="stop_reading">Stop</button>
</div>

score 0 · Accepted Answer

正如迈克尔建议的那样，彼得的解决方案非常棒，除非您的文本位于不同的行上。Michael 创建了演示以更好地说明问题。- https://jsfiddle.net/1gzkja90/并提出了另一种解决方案。

添加一种可能更简单的方法来解决这个问题是从 Peter 的解决方案中的 textarea 中删除换行符，它工作得很好。

//javascript
var noLineBreaks = document.getElementById('mytextarea').replace(/\n/g,'');

//jquery
var noLineBreaks = $('#mytextarea').val().replace(/\n/g,'');

所以在彼得的解决方案中，它可能看起来如下：

utterance.text = $('#mytextarea').val().replace(/\n/g,'');

但是取消演讲还是有问题的。它只是进入另一个序列并且不会停止。

score 0 · Accepted Answer

其他建议用点做奇怪的事情或说点，不要在句末尊重语音语调。

var CHARACTER_LIMIT = 200;
var lang = "en";

var text = "MLA format follows the author-page method of in-text citation. This means that the author's last name and the page number(s) from which the quotation or paraphrase is taken must appear in the text, and a complete reference should appear on your Works Cited page. The author's name may appear either in the sentence itself or in parentheses following the quotation or paraphrase, but the page number(s) should always appear in the parentheses, not in the text of your sentence. Joe waited for the train. The train was late. Mary and Samantha took the bus.";

    speak(text, lang)

    function speak(text, lang) {

      //Support for multipart text (there is a limit on characters)
      var multipartText = [];

      if (text.length > CHARACTER_LIMIT) {

        var tmptxt = text;

        while (tmptxt.length > CHARACTER_LIMIT) {

          //Split by common phrase delimiters
          var p = tmptxt.search(/[:!?.;]+/);
          var part = '';

          //Coludn't split by priority characters, try commas
          if (p == -1 || p >= CHARACTER_LIMIT) {
            p = tmptxt.search(/[,]+/);
          }

          //Couldn't split by normal characters, then we use spaces
          if (p == -1 || p >= CHARACTER_LIMIT) {

            var words = tmptxt.split(' ');

            for (var i = 0; i < words.length; i++) {

              if (part.length + words[i].length + 1 > CHARACTER_LIMIT)
                break;

              part += (i != 0 ? ' ' : '') + words[i];

            }

          } else {

            part = tmptxt.substr(0, p + 1);

          }

          tmptxt = tmptxt.substr(part.length, tmptxt.length - part.length);

          multipartText.push(part);
          //console.log(part.length + " - " + part);

        }

        //Add the remaining text
        if (tmptxt.length > 0) {
          multipartText.push(tmptxt);
        }

      } else {

        //Small text
        multipartText.push(text);
      }


      //Play multipart text
      for (var i = 0; i < multipartText.length; i++) {

        //Use SpeechSynthesis
        //console.log(multipartText[i]);

        //Create msg object
        var msg = new SpeechSynthesisUtterance();
        //msg.voice = profile.systemvoice;
        //msg.voiceURI = profile.systemvoice.voiceURI;
        msg.volume = 1; // 0 to 1
        msg.rate = 1; // 0.1 to 10
        // msg.rate = usersetting || 1; // 0.1 to 10
        msg.pitch = 1; //0 to 2*/
        msg.text = multipartText[i];
        msg.speak = multipartText;
        msg.lang = lang;
        msg.onend = self.OnFinishedPlaying;
        msg.onerror = function (e) {
          console.log('Error');
          console.log(e);
        };
        /*GC*/
        msg.onstart = function (e) {
          var curenttxt = e.currentTarget.text;
          console.log(curenttxt);
          //highlight(e.currentTarget.text);
          //$('#showtxt').text(curenttxt);
          //console.log(e);
        };
        //console.log(msg);
        speechSynthesis.speak(msg);

      }

    }

https://jsfiddle.net/onigetoc/9r27Ltqz/

score 0 · Accepted Answer

我想说的是，通过 Chrome 扩展程序和应用程序，我通过 using 解决了这个非常烦人的问题chrome.tts，因为chrome.tts它允许您通过浏览器说话，而不是在您关闭窗口时停止说话的窗口。

使用下面的代码，您可以通过大型演讲来解决上述问题：

chrome.tts.speak("Abnormally large string, over 250 characters, etc...");
setInterval(() => { chrome.tts.resume(); }, 100);

我相信这会起作用，但我这样做只是为了安全：

var largeData = "";
var smallChunks = largeData.match(/.{1,250}/g);
for (var chunk of smallChunks) {
  chrome.tts.speak(chunk, {'enqueue': true});
}

希望这对某人有帮助！它有助于使我的应用程序更实用、更出色地工作。

score -1 · Accepted Answer

是的，谷歌综合 api 会在朗读长文本时停止。

我们可以看到 SpeechSynthesisUtterance 的 onend 事件、onpause 和 onerror 事件在突然停止时不会正常触发，speechSynthesis onerror 事件也是如此。

经过几次试验，发现speechSynthesis.paused 可以正常工作，speechSynthesis.resume() 可以帮助恢复说话。

因此，我们只需要一个计时器来检查说话过程中的暂停状态，并调用 SpeechSynthesis.resume() 继续。间隔应该足够小，以防止继续讲话时出现故障。

let timer = null;
let reading = false;

let readText = function(text) {

    if (!reading) {
        speechSynthesis.cancel();
        if (timer) {
            clearInterval(timer);
        }
        let msg = new SpeechSynthesisUtterance();
        let voices = window.speechSynthesis.getVoices();
        msg.voice = voices[82];
        msg.voiceURI = 'native';
        msg.volume = 1; // 0 to 1
        msg.rate = 1.0; // 0.1 to 10
        msg.pitch = 1; //0 to 2
        msg.text = text;
        msg.lang = 'zh-TW';

        msg.onerror = function(e) {
            speechSynthesis.cancel();
            reading = false;
            clearInterval(timer);
        };

        msg.onpause = function(e) {
            console.log('onpause in ' + e.elapsedTime + ' seconds.');
        }            

        msg.onend = function(e) {
            console.log('onend in ' + e.elapsedTime + ' seconds.');
            reading = false;
            clearInterval(timer);
        };

        speechSynthesis.onerror = function(e) {
            console.log('speechSynthesis onerror in ' + e.elapsedTime + ' seconds.');
            speechSynthesis.cancel();
            reading = false;
            clearInterval(timer);
        };

        speechSynthesis.speak(msg);

        timer = setInterval(function(){
            if (speechSynthesis.paused) {
                console.log("#continue")
                speechSynthesis.resume();
            }

        }, 100);

        reading = true;

    }
}

javascript - 具有较长文本的 Chrome 语音合成

12 回答 12

Related

Reference