我找到了一个可行的解决方案,尽管我的音频检测算法非常糟糕,而且不是很准确。声音的幅度或频率似乎是可见的,所以数字的位数越多,声音就越大。我检测到一长串数字来确定这一点,但是一个数字不符合模式并且循环重置。根据前几位数字之间的差异进行真皮会更好。
import wave, struct, logging
# open up a wave
w = wave.open('wavefile.WAV', 'rb')
length = w.getnframes()
rate = w.getframerate()
logging.basicConfig(filename='example.log',level=logging.DEBUG)
count = 0
start = 1
end = 0
startData = 0
endData = 0
for i in range(0,length):
waveData = w.readframes(1)
data = struct.unpack("<h", waveData)
if (start == 1):
if (len(str(int(data[0])))>=len(str(1234))):
count=count+1
else:
count=0
if (count == 100):
startData=i-100
print("Start "+str(startData/float(rate)))
count = 0
start = 0
end = 1
if (end == 1):
if (len(str(int(data[0])))<=len(str(12))):
count=count+1
else:
count=0
if (count == 10):
endData=i-10
print("End "+str(endData/float(rate)))
count = 0
start = 1
end = 0
frames=endData-startData
duration=frames/float(rate)
print("Duration: "+str(duration))