我正在使用 SoundFile Python 库和HuggingFace 的 Timit 数据集,但我遇到了 NameError 的问题:名称“sf 未定义”。
我最初使用“pip install soundfile”安装了 SoundFile。我尝试卸载并重新安装,甚至降级到以前的版本(使用“pip install SoundFile==0.10.2”),但这不起作用。
我正在使用 Jupyter Notebook。
这是我的代码:
import soundfile as sf
def speech_file_to_array_fn(batch):
speech_array, sampling_rate = sf.read(batch["file"])
batch["speech"] = speech_array
batch["sampling_rate"] = sampling_rate
batch["target_text"] = batch["text"]
return batch
timit = timit.map(speech_file_to_array_fn, remove_columns=timit.column_names["train"], num_proc=4)
上面的代码给出了以下错误:
---------------------------------------------------------------------------
RemoteTraceback Traceback (most recent call last)
RemoteTraceback:
"""
Traceback (most recent call last):
File "C:\Users\mikea\anaconda3\lib\site-packages\multiprocess\pool.py", line 125, in worker
result = (True, func(*args, **kwds))
File "C:\Users\mikea\anaconda3\lib\site-packages\datasets\arrow_dataset.py", line 185, in wrapper
out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
File "C:\Users\mikea\anaconda3\lib\site-packages\datasets\fingerprint.py", line 397, in wrapper
out = func(self, *args, **kwargs)
File "C:\Users\mikea\anaconda3\lib\site-packages\datasets\arrow_dataset.py", line 2001, in _map_single
example = apply_function_on_filtered_inputs(example, i, offset=offset)
File "C:\Users\mikea\anaconda3\lib\site-packages\datasets\arrow_dataset.py", line 1910, in apply_function_on_filtered_inputs
function(*fn_args, effective_indices, **fn_kwargs) if with_indices else function(*fn_args, **fn_kwargs)
File "<ipython-input-15-2022ae9f9b21>", line 4, in speech_file_to_array_fn
NameError: name 'sf' is not defined
"""
The above exception was the direct cause of the following exception:
NameError Traceback (most recent call last)
<ipython-input-15-2022ae9f9b21> in <module>
8 return batch
9
---> 10 timit = timit.map(speech_file_to_array_fn, remove_columns=timit.column_names["train"], num_proc=4)
~\anaconda3\lib\site-packages\datasets\dataset_dict.py in map(self, function, with_indices, input_columns, batched, batch_size, remove_columns, keep_in_memory, load_from_cache_file, cache_file_names, writer_batch_size, features, disable_nullable, fn_kwargs, num_proc, desc)
469 cache_file_names = {k: None for k in self}
470 return DatasetDict(
--> 471 {
472 k: dataset.map(
473 function=function,
~\anaconda3\lib\site-packages\datasets\dataset_dict.py in <dictcomp>(.0)
470 return DatasetDict(
471 {
--> 472 k: dataset.map(
473 function=function,
474 with_indices=with_indices,
~\anaconda3\lib\site-packages\datasets\arrow_dataset.py in map(self, function, with_indices, input_columns, batched, batch_size, drop_last_batch, remove_columns, keep_in_memory, load_from_cache_file, cache_file_name, writer_batch_size, features, disable_nullable, fn_kwargs, num_proc, suffix_template, new_fingerprint, desc)
1746 logger.info("Spawning {} processes".format(num_proc))
1747 results = [pool.apply_async(self.__class__._map_single, kwds=kwds) for kwds in kwds_per_shard]
-> 1748 transformed_shards = [r.get() for r in results]
1749 logger.info("Concatenating {} shards from multiprocessing".format(num_proc))
1750 result = concatenate_datasets(transformed_shards)
~\anaconda3\lib\site-packages\datasets\arrow_dataset.py in <listcomp>(.0)
1746 logger.info("Spawning {} processes".format(num_proc))
1747 results = [pool.apply_async(self.__class__._map_single, kwds=kwds) for kwds in kwds_per_shard]
-> 1748 transformed_shards = [r.get() for r in results]
1749 logger.info("Concatenating {} shards from multiprocessing".format(num_proc))
1750 result = concatenate_datasets(transformed_shards)
~\anaconda3\lib\site-packages\multiprocess\pool.py in get(self, timeout)
769 return self._value
770 else:
--> 771 raise self._value
772
773 def _set(self, i, obj):
NameError: name 'sf' is not defined