我为 wav 文件测试了 DeepSpeech,它工作得很好。当我尝试使用无法识别单个单词的音频流时,我的深度语音问题就出现了。音频流是 PCM 48khz 立体声签名的 16 位小端序。我一直在尝试将流转换为其他格式、sampleRate 和频道,但完全没有成功。我在 nodejs 上使用 DeepSpeech
modelStream = englishModel.createStream();
let chunks = [];
stream.on('data', chunk => {
chunks.push(chunk);
}).on('close', () => {
const buffer = Buffer.concat(chunks);
let stream = new Duplex();
stream.push(buffer);
stream.push(null);
let audioStream = new MemoryStream();
stream.pipe(Sox({
global: {
'no-dither': true,
},
output: {
bits: 16,
rate: desiredSampleRate,
channels: 1,
encoding: 'signed-integer',
endian: 'little',
compression: 0.0,
type: 'raw'
}
})).
pipe(audioStream);
audioStream.on('finish', () => {
let audioBuffer = audioStream.toBuffer();
const audioLength = (audioBuffer.length / 2) * (1 / desiredSampleRate);
console.log('audio length', audioLength);
let result = englishModel.stt(audioBuffer);
console.log('result:', result);
});