这是我的代码,我使用 pytorch-ignite。样本标签的形状是 (batch_size,),我的网络输出 y_pred 是 (batch_size,10),10 是我的班级人数。我 criterion = F.cross_entropy
用作损失函数。一个时代结束后,我有一个错误,如:
Epoch [1/50]: 100%|██████████| 63/63 [00:55<00:00, 1.12it/s]
Current run is terminating due to exception: y and y_pred must have same shape of (batch_size, num_categories, ...) and num_categories > 1.
Engine run is terminating due to exception: y and y_pred must have same shape of (batch_size, num_categories, ...) and num_categories > 1.
Engine run is terminating due to exception: y and y_pred must have same shape of (batch_size, num_categories, ...) and num_categories > 1.
Traceback (most recent call last):
File "/home/omid/anaconda3/envs/pytorch/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3418, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-2-b8f3a45f8e35>", line 1, in <module>
runfile('/home/omid/OMID/projects/python/mldl/NeuralMusicClassification/tools/train_net.py', wdir='/home/omid/OMID/projects/python/mldl/NeuralMusicClassification/tools')
File "/home/omid/OMID/program/pycharm-professional-2020.2.4/pycharm-2020.2.4/plugins/python/helpers/pydev/_pydev_bundle/pydev_umd.py", line 197, in runfile
pydev_imports.execfile(filename, global_vars, local_vars) # execute the script
File "/home/omid/OMID/program/pycharm-professional-2020.2.4/pycharm-2020.2.4/plugins/python/helpers/pydev/_pydev_imps/_pydev_execfile.py", line 18, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "/home/omid/OMID/projects/python/mldl/NeuralMusicClassification/tools/train_net.py", line 73, in <module>
main()
File "/home/omid/OMID/projects/python/mldl/NeuralMusicClassification/tools/train_net.py", line 69, in main
train(cfg)
File "/home/omid/OMID/projects/python/mldl/NeuralMusicClassification/tools/train_net.py", line 48, in train
do_train(
File "/home/omid/OMID/projects/python/mldl/NeuralMusicClassification/engine/trainer.py", line 113, in do_train
trainer.run(train_loader, max_epochs=epochs)
File "/home/omid/anaconda3/envs/pytorch/lib/python3.8/site-packages/ignite/engine/engine.py", line 702, in run
return self._internal_run()
File "/home/omid/anaconda3/envs/pytorch/lib/python3.8/site-packages/ignite/engine/engine.py", line 775, in _internal_run
self._handle_exception(e)
File "/home/omid/anaconda3/envs/pytorch/lib/python3.8/site-packages/ignite/engine/engine.py", line 469, in _handle_exception
raise e
File "/home/omid/anaconda3/envs/pytorch/lib/python3.8/site-packages/ignite/engine/engine.py", line 752, in _internal_run
self._fire_event(Events.EPOCH_COMPLETED)
File "/home/omid/anaconda3/envs/pytorch/lib/python3.8/site-packages/ignite/engine/engine.py", line 424, in _fire_event
func(*first, *(event_args + others), **kwargs)
File "/home/omid/OMID/projects/python/mldl/NeuralMusicClassification/engine/trainer.py", line 73, in log_training_results
evaluator.run(train_loader)
File "/home/omid/anaconda3/envs/pytorch/lib/python3.8/site-packages/ignite/engine/engine.py", line 702, in run
return self._internal_run()
File "/home/omid/anaconda3/envs/pytorch/lib/python3.8/site-packages/ignite/engine/engine.py", line 775, in _internal_run
self._handle_exception(e)
File "/home/omid/anaconda3/envs/pytorch/lib/python3.8/site-packages/ignite/engine/engine.py", line 469, in _handle_exception
raise e
File "/home/omid/anaconda3/envs/pytorch/lib/python3.8/site-packages/ignite/engine/engine.py", line 745, in _internal_run
time_taken = self._run_once_on_dataset()
File "/home/omid/anaconda3/envs/pytorch/lib/python3.8/site-packages/ignite/engine/engine.py", line 850, in _run_once_on_dataset
self._handle_exception(e)
File "/home/omid/anaconda3/envs/pytorch/lib/python3.8/site-packages/ignite/engine/engine.py", line 469, in _handle_exception
raise e
File "/home/omid/anaconda3/envs/pytorch/lib/python3.8/site-packages/ignite/engine/engine.py", line 834, in _run_once_on_dataset
self._fire_event(Events.ITERATION_COMPLETED)
File "/home/omid/anaconda3/envs/pytorch/lib/python3.8/site-packages/ignite/engine/engine.py", line 424, in _fire_event
func(*first, *(event_args + others), **kwargs)
File "/home/omid/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/autograd/grad_mode.py", line 27, in decorate_context
return func(*args, **kwargs)
File "/home/omid/anaconda3/envs/pytorch/lib/python3.8/site-packages/ignite/metrics/metric.py", line 296, in iteration_completed
self.update(output)
File "/home/omid/anaconda3/envs/pytorch/lib/python3.8/site-packages/ignite/metrics/metric.py", line 575, in wrapper
func(self, *args, **kwargs)
File "/home/omid/anaconda3/envs/pytorch/lib/python3.8/site-packages/ignite/metrics/accuracy.py", line 149, in update
self._check_shape(output)
File "/home/omid/anaconda3/envs/pytorch/lib/python3.8/site-packages/ignite/metrics/accuracy.py", line 47, in _check_shape
raise ValueError(
ValueError: y and y_pred must have same shape of (batch_size, num_categories, ...) and num_categories > 1.
这是我的调试部分截图:
从你可以看到y的形状是 (16,) 并且y_pred的形状是 (16,10) 并且y_shape和y_pred_shape都是 16
# encoding: utf-8
import logging
import torch
from ignite.contrib.handlers import ProgressBar
from ignite.engine import Events, create_supervised_trainer, create_supervised_evaluator
from ignite.handlers import ModelCheckpoint, Timer
from ignite.metrics import Accuracy, Loss, RunningAverage
from ignite.metrics.precision import Precision
from ignite.metrics.recall import Recall
from ignite.metrics.metrics_lambda import MetricsLambda
def do_train(
cfg,
model,
train_loader,
val_loader,
optimizer,
scheduler,
loss_fn,
):
log_period = cfg.SOLVER.LOG_PERIOD
checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD
output_dir = cfg.OUTPUT_DIR
device = cfg.MODEL.DEVICE
device = torch.device("cuda")
epochs = cfg.SOLVER.MAX_EPOCHS
model = model.to(device)
logger = logging.getLogger("template_model.train")
logger.info("Start training")
precision = Precision(average=True, is_multilabel=True)
recall = Recall(average=True, is_multilabel=True)
F1 = precision * recall * 2 / (precision + recall + 1e-20)
F1 = MetricsLambda(lambda t: torch.mean(t).item(), F1)
trainer = create_supervised_trainer(model, optimizer, loss_fn, device=device)
evaluator = create_supervised_evaluator(model, metrics={'accuracy': Accuracy(is_multilabel=True),
'precision': precision,
'recall': recall,
'f1': F1,
'ce_loss': Loss(loss_fn)}, device=device)
checkpointer = ModelCheckpoint(output_dir, 'music', n_saved=10, require_empty=False)
timer = Timer(average=True)
trainer.add_event_handler(Events.EPOCH_COMPLETED, checkpointer, {'model': model,
'optimizer': optimizer})
timer.attach(trainer, start=Events.EPOCH_STARTED, resume=Events.ITERATION_STARTED,
pause=Events.ITERATION_COMPLETED, step=Events.ITERATION_COMPLETED)
RunningAverage(output_transform=lambda x: x).attach(trainer, 'avg_loss')
pbar = ProgressBar(persist=True, bar_format="")
pbar.attach(trainer)
@trainer.on(Events.ITERATION_COMPLETED)
def log_training_loss(engine):
iter = (engine.state.iteration - 1) % len(train_loader) + 1
if iter % log_period == 0:
logger.info("Epoch[{}] Iteration[{}/{}] Loss: {:.2f}"
.format(engine.state.epoch, iter, len(train_loader), engine.state.metrics['avg_loss']))
@trainer.on(Events.EPOCH_COMPLETED)
def log_training_results(engine):
evaluator.run(train_loader)
metrics = evaluator.state.metrics
avg_accuracy = metrics['accuracy']
precision = metrics['precision']
recall = metrics['recall']
f1 = metrics['f1']
avg_loss = metrics['ce_loss']
logger.info(
"Training Results - Epoch: {} Avg accuracy: {:.3f}, precision: {:.3f}, recall: {:.3f}, f1 score: {:.3f}, Avg Loss: {:.3f}"
.format(engine.state.epoch, avg_accuracy, precision, recall, f1, avg_loss))
if val_loader is not None:
@trainer.on(Events.EPOCH_COMPLETED)
def log_validation_results(engine):
evaluator.run(val_loader)
metrics = evaluator.state.metrics
avg_accuracy = metrics['accuracy']
precision = metrics['precision']
recall = metrics['recall']
f1 = metrics['f1']
avg_loss = metrics['ce_loss']
logger.info(
"Validation Results - Epoch: {} Avg accuracy: {:.3f}, precision: {:.3f}, recall: {:.3f}, f1 score: {:.3f}, Avg Loss: {:.3f}"
.format(engine.state.epoch, avg_accuracy, precision, recall, f1, avg_loss)
)
# adding handlers using `trainer.on` decorator API
@trainer.on(Events.EPOCH_COMPLETED)
def print_times(engine):
logger.info('Epoch {} done. Time per batch: {:.3f}[s] Speed: {:.1f}[samples/s]'
.format(engine.state.epoch, timer.value() * timer.step_count,
train_loader.batch_size / timer.value()))
timer.reset()
trainer.run(train_loader, max_epochs=epochs)