0

这是我的代码,我使用 pytorch-ignite。样本标签的形状是 (batch_size,),我的网络输出 y_pred 是 (batch_size,10),10 是我的班级人数。我 criterion = F.cross_entropy用作损失函数。一个时代结束后,我有一个错误,如:

Epoch [1/50]: 100%|██████████| 63/63 [00:55<00:00,  1.12it/s]
Current run is terminating due to exception: y and y_pred must have same shape of (batch_size, num_categories, ...) and num_categories > 1.
Engine run is terminating due to exception: y and y_pred must have same shape of (batch_size, num_categories, ...) and num_categories > 1.
Engine run is terminating due to exception: y and y_pred must have same shape of (batch_size, num_categories, ...) and num_categories > 1.
Traceback (most recent call last):
  File "/home/omid/anaconda3/envs/pytorch/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3418, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-2-b8f3a45f8e35>", line 1, in <module>
    runfile('/home/omid/OMID/projects/python/mldl/NeuralMusicClassification/tools/train_net.py', wdir='/home/omid/OMID/projects/python/mldl/NeuralMusicClassification/tools')
  File "/home/omid/OMID/program/pycharm-professional-2020.2.4/pycharm-2020.2.4/plugins/python/helpers/pydev/_pydev_bundle/pydev_umd.py", line 197, in runfile
    pydev_imports.execfile(filename, global_vars, local_vars)  # execute the script
  File "/home/omid/OMID/program/pycharm-professional-2020.2.4/pycharm-2020.2.4/plugins/python/helpers/pydev/_pydev_imps/_pydev_execfile.py", line 18, in execfile
    exec(compile(contents+"\n", file, 'exec'), glob, loc)
  File "/home/omid/OMID/projects/python/mldl/NeuralMusicClassification/tools/train_net.py", line 73, in <module>
    main()
  File "/home/omid/OMID/projects/python/mldl/NeuralMusicClassification/tools/train_net.py", line 69, in main
    train(cfg)
  File "/home/omid/OMID/projects/python/mldl/NeuralMusicClassification/tools/train_net.py", line 48, in train
    do_train(
  File "/home/omid/OMID/projects/python/mldl/NeuralMusicClassification/engine/trainer.py", line 113, in do_train
    trainer.run(train_loader, max_epochs=epochs)
  File "/home/omid/anaconda3/envs/pytorch/lib/python3.8/site-packages/ignite/engine/engine.py", line 702, in run
    return self._internal_run()
  File "/home/omid/anaconda3/envs/pytorch/lib/python3.8/site-packages/ignite/engine/engine.py", line 775, in _internal_run
    self._handle_exception(e)
  File "/home/omid/anaconda3/envs/pytorch/lib/python3.8/site-packages/ignite/engine/engine.py", line 469, in _handle_exception
    raise e
  File "/home/omid/anaconda3/envs/pytorch/lib/python3.8/site-packages/ignite/engine/engine.py", line 752, in _internal_run
    self._fire_event(Events.EPOCH_COMPLETED)
  File "/home/omid/anaconda3/envs/pytorch/lib/python3.8/site-packages/ignite/engine/engine.py", line 424, in _fire_event
    func(*first, *(event_args + others), **kwargs)
  File "/home/omid/OMID/projects/python/mldl/NeuralMusicClassification/engine/trainer.py", line 73, in log_training_results
    evaluator.run(train_loader)
  File "/home/omid/anaconda3/envs/pytorch/lib/python3.8/site-packages/ignite/engine/engine.py", line 702, in run
    return self._internal_run()
  File "/home/omid/anaconda3/envs/pytorch/lib/python3.8/site-packages/ignite/engine/engine.py", line 775, in _internal_run
    self._handle_exception(e)
  File "/home/omid/anaconda3/envs/pytorch/lib/python3.8/site-packages/ignite/engine/engine.py", line 469, in _handle_exception
    raise e
  File "/home/omid/anaconda3/envs/pytorch/lib/python3.8/site-packages/ignite/engine/engine.py", line 745, in _internal_run
    time_taken = self._run_once_on_dataset()
  File "/home/omid/anaconda3/envs/pytorch/lib/python3.8/site-packages/ignite/engine/engine.py", line 850, in _run_once_on_dataset
    self._handle_exception(e)
  File "/home/omid/anaconda3/envs/pytorch/lib/python3.8/site-packages/ignite/engine/engine.py", line 469, in _handle_exception
    raise e
  File "/home/omid/anaconda3/envs/pytorch/lib/python3.8/site-packages/ignite/engine/engine.py", line 834, in _run_once_on_dataset
    self._fire_event(Events.ITERATION_COMPLETED)
  File "/home/omid/anaconda3/envs/pytorch/lib/python3.8/site-packages/ignite/engine/engine.py", line 424, in _fire_event
    func(*first, *(event_args + others), **kwargs)
  File "/home/omid/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/autograd/grad_mode.py", line 27, in decorate_context
    return func(*args, **kwargs)
  File "/home/omid/anaconda3/envs/pytorch/lib/python3.8/site-packages/ignite/metrics/metric.py", line 296, in iteration_completed
    self.update(output)
  File "/home/omid/anaconda3/envs/pytorch/lib/python3.8/site-packages/ignite/metrics/metric.py", line 575, in wrapper
    func(self, *args, **kwargs)
  File "/home/omid/anaconda3/envs/pytorch/lib/python3.8/site-packages/ignite/metrics/accuracy.py", line 149, in update
    self._check_shape(output)
  File "/home/omid/anaconda3/envs/pytorch/lib/python3.8/site-packages/ignite/metrics/accuracy.py", line 47, in _check_shape
    raise ValueError(
ValueError: y and y_pred must have same shape of (batch_size, num_categories, ...) and num_categories > 1.


这是我的调试部分截图:

在此处输入图像描述

从你可以看到y的形状是 (16,) 并且y_pred的形状是 (16,10) 并且y_shapey_pred_shape都是 16


# encoding: utf-8


import logging

import torch
from ignite.contrib.handlers import ProgressBar
from ignite.engine import Events, create_supervised_trainer, create_supervised_evaluator
from ignite.handlers import ModelCheckpoint, Timer
from ignite.metrics import Accuracy, Loss, RunningAverage
from ignite.metrics.precision import Precision
from ignite.metrics.recall import Recall

from ignite.metrics.metrics_lambda import MetricsLambda


def do_train(
        cfg,
        model,
        train_loader,
        val_loader,
        optimizer,
        scheduler,
        loss_fn,
):
    log_period = cfg.SOLVER.LOG_PERIOD
    checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD
    output_dir = cfg.OUTPUT_DIR
    device = cfg.MODEL.DEVICE
    device = torch.device("cuda")
    epochs = cfg.SOLVER.MAX_EPOCHS

    model = model.to(device)

    logger = logging.getLogger("template_model.train")
    logger.info("Start training")

    precision = Precision(average=True, is_multilabel=True)
    recall = Recall(average=True, is_multilabel=True)
    F1 = precision * recall * 2 / (precision + recall + 1e-20)
    F1 = MetricsLambda(lambda t: torch.mean(t).item(), F1)

    trainer = create_supervised_trainer(model, optimizer, loss_fn, device=device)
    evaluator = create_supervised_evaluator(model, metrics={'accuracy': Accuracy(is_multilabel=True),
                                                            'precision': precision,
                                                            'recall': recall,
                                                            'f1': F1,
                                                            'ce_loss': Loss(loss_fn)}, device=device)
    checkpointer = ModelCheckpoint(output_dir, 'music', n_saved=10, require_empty=False)
    timer = Timer(average=True)

    trainer.add_event_handler(Events.EPOCH_COMPLETED, checkpointer, {'model': model,
                                                                     'optimizer': optimizer})

    timer.attach(trainer, start=Events.EPOCH_STARTED, resume=Events.ITERATION_STARTED,
                 pause=Events.ITERATION_COMPLETED, step=Events.ITERATION_COMPLETED)

    RunningAverage(output_transform=lambda x: x).attach(trainer, 'avg_loss')

    pbar = ProgressBar(persist=True, bar_format="")
    pbar.attach(trainer)

    @trainer.on(Events.ITERATION_COMPLETED)
    def log_training_loss(engine):
        iter = (engine.state.iteration - 1) % len(train_loader) + 1

        if iter % log_period == 0:
            logger.info("Epoch[{}] Iteration[{}/{}] Loss: {:.2f}"
                        .format(engine.state.epoch, iter, len(train_loader), engine.state.metrics['avg_loss']))

    @trainer.on(Events.EPOCH_COMPLETED)
    def log_training_results(engine):
        evaluator.run(train_loader)

        metrics = evaluator.state.metrics

        avg_accuracy = metrics['accuracy']
        precision = metrics['precision']
        recall = metrics['recall']
        f1 = metrics['f1']

        avg_loss = metrics['ce_loss']

        logger.info(
            "Training Results - Epoch: {} Avg accuracy: {:.3f}, precision: {:.3f}, recall: {:.3f}, f1 score: {:.3f},  Avg Loss: {:.3f}"
                .format(engine.state.epoch, avg_accuracy, precision, recall, f1, avg_loss))

    if val_loader is not None:
        @trainer.on(Events.EPOCH_COMPLETED)
        def log_validation_results(engine):
            evaluator.run(val_loader)
            metrics = evaluator.state.metrics

            avg_accuracy = metrics['accuracy']
            precision = metrics['precision']
            recall = metrics['recall']
            f1 = metrics['f1']

            avg_loss = metrics['ce_loss']
            logger.info(
                "Validation Results - Epoch: {} Avg accuracy: {:.3f}, precision: {:.3f}, recall: {:.3f}, f1 score: {:.3f}, Avg Loss: {:.3f}"
                    .format(engine.state.epoch, avg_accuracy, precision, recall, f1, avg_loss)
            )

    # adding handlers using `trainer.on` decorator API
    @trainer.on(Events.EPOCH_COMPLETED)
    def print_times(engine):
        logger.info('Epoch {} done. Time per batch: {:.3f}[s] Speed: {:.1f}[samples/s]'
                    .format(engine.state.epoch, timer.value() * timer.step_count,
                            train_loader.batch_size / timer.value()))
        timer.reset()

    trainer.run(train_loader, max_epochs=epochs)


4

0 回答 0