该代码旨在从 PRAAT 中提取数据,因此它应该提取在 wav 文件和 textgrid 中找到的数据,这些数据用于在 PRAAT 中进行注释。然后代码应输出提取到 csv 文件中的信息。
它返回了以下错误,但是,在调试问题后,提取器似乎没有提取任何特征,因此数组为空。
from praatio import tgio
from praatio import tgio
from os.path import join
from scipy.io import wavfile
import subprocess
import os
import numpy as np
import csv
def extract_features(wavfile):
"""
Extract features for an audio file.
:param wavfile: Absolute path to a WAV file.
:return: Praat emotion features
"""
features_script_path = join(os.getcwd(),'features.praat')
result = subprocess.check_output(['C:\Intel\Praat.exe',
'--run',
features_script_path, wavfile])
result = result.decode("utf-8")
result = result.split()
features = np.asarray([float(val) for val in result])
return features
def get_snippet_features(wav_file_path, start_time, end_time):
fs, data = wavfile.read(wav_file_path)
start_time_sample = int(start_time*fs)
end_time_sample = int(end_time*fs)
temp_file_path = join(os.getcwd(), 'data', 'temp', 'temp.wav')
wavfile.write(temp_file_path,rate=fs,data=data[start_time_sample:end_time_sample])
features = extract_features(wavfile=temp_file_path)
os.remove(temp_file_path)
return features
def get_textgrid_features(filename,normalize=True):
file_path = join(os.getcwd(), 'data', filename)
tg = tgio.openTextgrid(fnFullPath=file_path)
wav_file_path = file_path.replace('.TextGrid','.wav')
print(tg.tierNameList)
code_switch_tier = tg.tierDict['Orthographic']
print(code_switch_tier.entryList)
orthographic_tier = tg.tierDict['CodeSwitch']
print(orthographic_tier.entryList)
por_tier = tg.tierDict['PointOfReference']
print(por_tier.entryList)
features = []
labels = []
for item in por_tier.entryList:
file_features = get_snippet_features(wav_file_path=wav_file_path,start_time=item.start,end_time=item.end)
labels.append(item.label)
features.append(file_features)
# normalization
if normalize:
mean = np.mean(features,axis=0)
std = np.std(features,axis=0)
features = (features-mean)/std
return labels, features
def generate_csv(labels,features,output_path):
if os.path.isfile(output_path):
os.remove(output_path)
with open(output_path,'w') as csvfile:
filewriter = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
filewriter.writerow(['CS_TYPE','PITCH_AVG','PITCH_DIR','PITCH_DYN','PITCH_JIT','INT_AVG','INT_DYN','INT_JIT','SPC_SLO','SPC_JIT'])
for index, label in enumerate(labels):
filewriter.writerow([label,
features[index][0],
features[index][1],
features[index][2],
features[index][3],
features[index][4],
features[index][5],
features[index][6],
features[index][7],
features[index][8]])
csvfile.close()
labels, features = get_textgrid_features(filename='Ian.TextGrid',normalize=False)
print(labels, features)
generate_csv(labels=labels,features=features,output_path=join(os.getcwd(),'data','csv','ian.csv'))