0

我正在使用 SoundFile Python 库和HuggingFace 的 Timit 数据集,但我遇到了 NameError 的问题:名称“sf 未定义”。

我最初使用“pip install soundfile”安装了 SoundFile。我尝试卸载并重新安装,甚至降级到以前的版本(使用“pip install SoundFile==0.10.2”),但这不起作用。

我正在使用 Jupyter Notebook。

这是我的代码:

import soundfile as sf

def speech_file_to_array_fn(batch):
    speech_array, sampling_rate = sf.read(batch["file"])
    batch["speech"] = speech_array
    batch["sampling_rate"] = sampling_rate
    batch["target_text"] = batch["text"]
    return batch

timit = timit.map(speech_file_to_array_fn, remove_columns=timit.column_names["train"], num_proc=4)

上面的代码给出了以下错误:

---------------------------------------------------------------------------
RemoteTraceback                           Traceback (most recent call last)
RemoteTraceback: 
"""
Traceback (most recent call last):
  File "C:\Users\mikea\anaconda3\lib\site-packages\multiprocess\pool.py", line 125, in worker
    result = (True, func(*args, **kwds))
  File "C:\Users\mikea\anaconda3\lib\site-packages\datasets\arrow_dataset.py", line 185, in wrapper
    out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
  File "C:\Users\mikea\anaconda3\lib\site-packages\datasets\fingerprint.py", line 397, in wrapper
    out = func(self, *args, **kwargs)
  File "C:\Users\mikea\anaconda3\lib\site-packages\datasets\arrow_dataset.py", line 2001, in _map_single
    example = apply_function_on_filtered_inputs(example, i, offset=offset)
  File "C:\Users\mikea\anaconda3\lib\site-packages\datasets\arrow_dataset.py", line 1910, in apply_function_on_filtered_inputs
    function(*fn_args, effective_indices, **fn_kwargs) if with_indices else function(*fn_args, **fn_kwargs)
  File "<ipython-input-15-2022ae9f9b21>", line 4, in speech_file_to_array_fn
NameError: name 'sf' is not defined
"""

The above exception was the direct cause of the following exception:

NameError                                 Traceback (most recent call last)
<ipython-input-15-2022ae9f9b21> in <module>
      8     return batch
      9 
---> 10 timit = timit.map(speech_file_to_array_fn, remove_columns=timit.column_names["train"], num_proc=4)

~\anaconda3\lib\site-packages\datasets\dataset_dict.py in map(self, function, with_indices, input_columns, batched, batch_size, remove_columns, keep_in_memory, load_from_cache_file, cache_file_names, writer_batch_size, features, disable_nullable, fn_kwargs, num_proc, desc)
    469             cache_file_names = {k: None for k in self}
    470         return DatasetDict(
--> 471             {
    472                 k: dataset.map(
    473                     function=function,

~\anaconda3\lib\site-packages\datasets\dataset_dict.py in <dictcomp>(.0)
    470         return DatasetDict(
    471             {
--> 472                 k: dataset.map(
    473                     function=function,
    474                     with_indices=with_indices,

~\anaconda3\lib\site-packages\datasets\arrow_dataset.py in map(self, function, with_indices, input_columns, batched, batch_size, drop_last_batch, remove_columns, keep_in_memory, load_from_cache_file, cache_file_name, writer_batch_size, features, disable_nullable, fn_kwargs, num_proc, suffix_template, new_fingerprint, desc)
   1746                 logger.info("Spawning {} processes".format(num_proc))
   1747                 results = [pool.apply_async(self.__class__._map_single, kwds=kwds) for kwds in kwds_per_shard]
-> 1748                 transformed_shards = [r.get() for r in results]
   1749                 logger.info("Concatenating {} shards from multiprocessing".format(num_proc))
   1750                 result = concatenate_datasets(transformed_shards)

~\anaconda3\lib\site-packages\datasets\arrow_dataset.py in <listcomp>(.0)
   1746                 logger.info("Spawning {} processes".format(num_proc))
   1747                 results = [pool.apply_async(self.__class__._map_single, kwds=kwds) for kwds in kwds_per_shard]
-> 1748                 transformed_shards = [r.get() for r in results]
   1749                 logger.info("Concatenating {} shards from multiprocessing".format(num_proc))
   1750                 result = concatenate_datasets(transformed_shards)

~\anaconda3\lib\site-packages\multiprocess\pool.py in get(self, timeout)
    769             return self._value
    770         else:
--> 771             raise self._value
    772 
    773     def _set(self, i, obj):

NameError: name 'sf' is not defined
4

0 回答 0