sidekit
当我遇到以下错误时,我正在尝试使用 UBM 的扬声器适配。
例外:showenroll/something.wav 不在 HDF5 文件中
我在文件“feat”下得到了两个文件“enroll”和“test”,分别包含用于训练和测试的特征(.h5),而我enroll_idmap
的音频(.wav)仅用于训练。
上面的错误是在执行过程中出现的enroll_stat.accumulate_stat(…)
谁能告诉我这个错误是什么意思以及如何解决它?
import sidekit
import os
import numpy as np
import h5py
nbThread = 4
NUM_GUASSIANS = 64
BASE_DIR = "./Database/sidekit_data"
enroll_idmap = sidekit.IdMap.read(os.path.join(BASE_DIR, "task", "enroll_idmap.h5"))
ubm = sidekit.Mixture()
model_name = "ubm_{}.h5".format(NUM_GUASSIANS)
ubm.read(os.path.join(BASE_DIR, "ubm", model_name))
server_eval = sidekit.FeaturesServer(feature_filename_structure="./Database/sidekit_data /feat/{}.h5",
sources=None,
dataset_list=["vad", "energy", "cep", "fb"],
feat_norm="cmvn",
global_cmvn=None,
dct_pca=False,
dct_pca_config=None,
sdc=False,
sdc_config=None,
delta=True,
double_delta=True,
delta_filter=None,
context=None,
traps_dct_nb=None,
rasta=True,
keep_all_features=True)
print("Compute the sufficient statistics")
enroll_stat.accumulate_stat(ubm=ubm,
feature_server=server_eval,
seg_indices=range(enroll_stat.segset.shape[0]),
num_thread=nbThread
)
filename = "enroll_stat_{}.h5".format(NUM_GUASSIANS)
enroll_stat.write(os.path.join(BASE_DIR, "stat", filename))
print("MAP adaptation of the speaker models")
enroll_sv = enroll_stat.adapt_mean_map_multisession(ubm=ubm,
r=3
)
print("Compute trial scores")
scores_gmm_ubm = sidekit.gmm_scoring(ubm=ubm,
enroll=enroll_sv,
ndx=test_ndx,
feature_server=server_eval,
num_thread=nbThread
)
filename = "ubm_scores_{}.h5".format(NUM_GUASSIANS)
scores_gmm_ubm.write(os.path.join(BASE_DIR, "result", filename))
filename = "ubm_scores_explained_{}.txt".format(NUM_GUASSIANS)
fout = open(os.path.join(BASE_DIR, "result", filename), "a")
fout.truncate(0) #clear content
modelset = list(scores_gmm_ubm.modelset)
segset = list(scores_gmm_ubm.segset)
scores = np.array(scores_gmm_ubm.scoremat)
for seg_idx, seg in enumerate(segset):
fout.write("Wav: {}\n".format(seg))
for speaker_idx, speaker in enumerate(modelset):
fout.write("\tSpeaker {}:\t{}\n".format(speaker, scores[speaker_idx, seg_idx]))
fout.write("\n")
fout.close()