-1

该代码旨在从 PRAAT 中提取数据,因此它应该提取在 wav 文件和 textgrid 中找到的数据,这些数据用于在 PRAAT 中进行注释。然后代码应输出提取到 csv 文件中的信息。

它返回了以下错误,但是,在调试问题后,提取器似乎没有提取任何特征,因此数组为空。

            from praatio import tgio
            from praatio import tgio
            from os.path import join
            from scipy.io import wavfile
            import subprocess
            import os
            import numpy as np
            import csv


            def extract_features(wavfile):
                """
                Extract features for an audio file.
                :param wavfile: Absolute path to a WAV file.
                :return: Praat emotion features
                """
                features_script_path = join(os.getcwd(),'features.praat')

                result = subprocess.check_output(['C:\Intel\Praat.exe',
                                                  '--run',
                                                  features_script_path, wavfile])
                result = result.decode("utf-8")
                result = result.split()
                features = np.asarray([float(val) for val in result])
                return features


            def get_snippet_features(wav_file_path, start_time, end_time):
                fs, data = wavfile.read(wav_file_path)
                start_time_sample = int(start_time*fs)
                end_time_sample = int(end_time*fs)

                temp_file_path = join(os.getcwd(), 'data', 'temp', 'temp.wav')
                wavfile.write(temp_file_path,rate=fs,data=data[start_time_sample:end_time_sample])

                features = extract_features(wavfile=temp_file_path)
                os.remove(temp_file_path)
                return features


            def get_textgrid_features(filename,normalize=True):
                file_path = join(os.getcwd(), 'data', filename)
                tg = tgio.openTextgrid(fnFullPath=file_path)

                wav_file_path = file_path.replace('.TextGrid','.wav')

                print(tg.tierNameList)

                code_switch_tier = tg.tierDict['Orthographic']
                print(code_switch_tier.entryList)

                orthographic_tier = tg.tierDict['CodeSwitch']
                print(orthographic_tier.entryList)

                por_tier = tg.tierDict['PointOfReference']
                print(por_tier.entryList)

                features = []
                labels = []
                for item in por_tier.entryList:
                    file_features = get_snippet_features(wav_file_path=wav_file_path,start_time=item.start,end_time=item.end)
                    labels.append(item.label)
                    features.append(file_features)

                # normalization
                if normalize:
                    mean = np.mean(features,axis=0)
                    std = np.std(features,axis=0)
                    features = (features-mean)/std

                return labels, features


            def generate_csv(labels,features,output_path):
                if os.path.isfile(output_path):
                    os.remove(output_path)

                with open(output_path,'w') as csvfile:
                    filewriter = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
                    filewriter.writerow(['CS_TYPE','PITCH_AVG','PITCH_DIR','PITCH_DYN','PITCH_JIT','INT_AVG','INT_DYN','INT_JIT','SPC_SLO','SPC_JIT'])
                    for index, label in enumerate(labels):
                        filewriter.writerow([label,
                                             features[index][0],
                                             features[index][1],
                                             features[index][2],
                                             features[index][3],
                                             features[index][4],
                                             features[index][5],
                                             features[index][6],
                                             features[index][7],
                                             features[index][8]])
                csvfile.close()


            labels, features = get_textgrid_features(filename='Ian.TextGrid',normalize=False)
            print(labels, features)
            generate_csv(labels=labels,features=features,output_path=join(os.getcwd(),'data','csv','ian.csv'))
4

1 回答 1

0

有类似的问题,我只是在这里寻求答案。

于 2019-05-03T16:25:10.073 回答