我正在尝试使用 SimpleTransformers 训练文本分类器。我正在利用 TurkuNLP 语言模型来尝试训练分类器(利用我计算机的 GPU)。
我的代码:
model = ClassificationModel('bert', 'TurkuNLP/wikibert-base-he-cased', num_labels=11, use_cuda=True,
args={'reprocess_input_data': True, 'overwrite_output_dir': True, 'fp16': False,
"num_train_epochs": 3, 'sliding_window': True, 'train_batch_size': 6})
# You can set class weights by using the optional weight argument
# Train the model
model.train_model(train_df)
# Evaluate the model
result, model_outputs, wrong_predictions = model.eval_model(test_df)
然后我开始训练分类器。但是,一旦第一个训练时期结束,我就会收到警告:
WARNING py.warnings:99: [JupyterRequire] C:\Users\work\Anaconda3\envs\py35\lib\site-packages\torch\optim\lr_scheduler.py:216: UserWarning: Please also save or load the state of the optimizer when saving or loading the scheduler. warnings.warn(SAVE_STATE_WARNING, UserWarning)
然后,一旦第二个纪元开始,我就会收到以下错误:
--------------------------------------------------------------------------- Empty Traceback (most recent call last) ~\Anaconda3\envs\py35\lib\site-packages\torch\utils\data\dataloader.py in _try_get_data(self, timeout) 871 try: --> 872 data = self._data_queue.get(timeout=timeout) 873 return (True, data) ~\Anaconda3\envs\py35\lib\multiprocessing\queues.py in get(self, block, timeout) 104 if not self._poll(timeout): --> 105 raise Empty 106 elif not self._poll(): Empty: The above exception was the direct cause of the following exception: RuntimeError Traceback (most recent call last) <ipython-input-31-6ab367d014c4> in <module> 22 23 # Train the model ---> 24 model.train_model(train_df) 25 26 # Evaluate the model ~\AppData\Roaming\Python\Python36\site-packages\simpletransformers\classification\classification_model.py in train_model(self, train_df, multi_label, output_dir, show_running_loss, args, eval_df, verbose, **kwargs) 409 eval_df=eval_df, 410 verbose=verbose, --> 411 **kwargs, 412 ) 413 ~\AppData\Roaming\Python\Python36\site-packages\simpletransformers\classification\classification_model.py in train(self, train_dataloader, output_dir, multi_label, show_running_loss, eval_df, verbose, **kwargs) 627 mininterval=0, 628 ) --> 629 for step, batch in enumerate(batch_iterator): 630 if steps_trained_in_current_epoch > 0: 631 steps_trained_in_current_epoch -= 1 ~\Anaconda3\envs\py35\lib\site-packages\tqdm\notebook.py in __iter__(self, *args, **kwargs) 232 def __iter__(self, *args, **kwargs): 233 try: --> 234 for obj in super(tqdm_notebook, self).__iter__(*args, **kwargs): 235 # return super(tqdm...) will not catch exception 236 yield obj ~\Anaconda3\envs\py35\lib\site-packages\tqdm\std.py in __iter__(self) 1169 1170 try: -> 1171 for obj in iterable: 1172 yield obj 1173 # Update and possibly print the progressbar. ~\Anaconda3\envs\py35\lib\site-packages\torch\utils\data\dataloader.py in __next__(self) 433 if self._sampler_iter is None: 434 self._reset() --> 435 data = self._next_data() 436 self._num_yielded += 1 437 if self._dataset_kind == _DatasetKind.Iterable and \ ~\Anaconda3\envs\py35\lib\site-packages\torch\utils\data\dataloader.py in _next_data(self) 1066 1067 assert not self._shutdown and self._tasks_outstanding > 0 -> 1068 idx, data = self._get_data() 1069 self._tasks_outstanding -= 1 1070 if self._dataset_kind == _DatasetKind.Iterable: ~\Anaconda3\envs\py35\lib\site-packages\torch\utils\data\dataloader.py in _get_data(self) 1032 else: 1033 while True: -> 1034 success, data = self._try_get_data() 1035 if success: 1036 return data ~\Anaconda3\envs\py35\lib\site-packages\torch\utils\data\dataloader.py in _try_get_data(self, timeout) 883 if len(failed_workers) > 0: 884 pids_str = ', '.join(str(w.pid) for w in failed_workers) --> 885 raise RuntimeError('DataLoader worker (pid(s) {}) exited unexpectedly'.format(pids_str)) from e 886 if isinstance(e, queue.Empty): 887 return (False, None) RuntimeError: DataLoader worker (pid(s) 15784, 13540, 16356, 15852, 2700, 10548, 21112, 21788, 23504, 23484, 23100, 20344, 22120, 22668, 21924) exited unexpectedly
有任何想法吗?谢谢 :)