tensorflow - 尽管使用了 tf 数据管道，但训练速度很慢

Question

我正在训练包含 21000 张图像的图像分类模型。我在 tensorflow 的 tf.data API 的帮助下创建了数据管道。我的问题是尽管使用了 API，但训练速度太慢了。我还启用了 tensorflow gpu 版本。请帮帮我。我首先认为这是由于 keras imagedatagenerator 减慢了我的训练时间，但现在当我更改它时，tf.data 管道它仍然没有使用我的 gpu。下面是我的整个代码

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import models, layers
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.applications import ResNet50, EfficientNetB3, InceptionV3, DenseNet121
from tensorflow.keras.optimizers import Adam

# ignoring warnings
import warnings
warnings.simplefilter("ignore")
import os,cv2


base_dir = "D:/cassava-leaf-disease-classification/"
train_csv = pd.read_csv("D:/cassava-leaf-disease-classification/train.csv")
# print(train_csv.head())
df_sample = pd.read_csv("D:/cassava-leaf-disease-classification/sample_submission.csv")
train_images = "D:/cassava-leaf-disease-classification/train_images/"+train_csv['image_id']
# print(train_images)

# print(os.listdir(train_images))
train_labels = pd.read_csv(os.path.join(base_dir, "train.csv"))

# print(train_labels)
BATCH_SIZE = 16
EPOCHS = 25
STEPS_PER_EPOCH = len(train_labels)*0.8 / BATCH_SIZE
TARGET_SIZE = 300
# train_labels['label'] = train_labels.label.astype('str')
labels = train_labels.iloc[:,-1].values
# print(labels)

def build_decoder(with_labels=True, target_size=(TARGET_SIZE, TARGET_SIZE), ext='jpg'):
    def img_decode(img_path):
        file_bytes = tf.io.read_file(img_path)
        if ext == 'png':
            img = tf.image.decode_png(file_bytes, channels=3)
        elif ext in ['jpg', 'jpeg']:
            img = tf.image.decode_jpeg(file_bytes, channels=3)

        else:
            raise ValueError("Image extension not supported")

        img = tf.cast(img, tf.float32) / 255.0
        img = tf.image.resize(img, target_size)

        return img

    def decode_with_labels(img_path, label):
        return img_decode(img_path), label

    if with_labels == True:
        return decode_with_labels

    else:
        return img_decode


def build_augmenter(with_labels=True):
    def augment(img):
        img = tf.image.random_flip_left_right(img)
        img = tf.image.random_flip_up_down(img)
        img = tf.image.random_brightness(img, 0.1)
        img = tf.image.random_contrast(img, 0.9, 1.1)
        img = tf.image.random_saturation(img, 0.9, 1.1)
        return img

    def augment_with_labels(img, label):
        return augment(img), label

    if with_labels == True:
        return augment_with_labels

    else:
        return augment


def build_dataset(paths, labels=None, bsize=32, cache=True,
                  decode_fn=None, augment_fn=None,
                  augment=True, repeat=True, shuffle=1024,
                  cache_dir=""):
    if cache_dir != "" and cache is True:
        os.makedirs(cache_dir, exist_ok=True)

    if decode_fn is None:
        decode_fn = build_decoder(labels is not None)

    if augment_fn is None:
        augment_fn = build_augmenter(labels is not None)

    AUTO = tf.data.experimental.AUTOTUNE
    slices = paths if labels is None else (paths, labels)

    dset = tf.data.Dataset.from_tensor_slices(slices)
    dset = dset.map(decode_fn, num_parallel_calls=AUTO)
    # dset = dset.cache(cache_dir) if cache else dset
    dset = dset.map(augment_fn, num_parallel_calls=AUTO) if augment else dset
    dset = dset.repeat() if repeat else dset
    dset = dset.shuffle(shuffle) if shuffle else dset
    dset = dset.batch(bsize).prefetch(AUTO)

    return dset

# Train test split
(train_img, valid_img,train_labels,valid_labels) = train_test_split(train_images,labels,train_size = 0.8,random_state = 0)
# print(train, valid)


# Tensorflow datasets
train_df = build_dataset(
    train_img, train_labels, bsize=BATCH_SIZE,
    cache=True)

valid_df = build_dataset(
    valid_img, valid_labels, bsize=BATCH_SIZE,
    repeat=False, shuffle=False, augment=False,
    cache=True)


def create_model():
    model = models.Sequential()
    model.add(EfficientNetB3(include_top=False, weights='imagenet',
                             input_shape=(TARGET_SIZE,TARGET_SIZE,3)))
    model.add(layers.GlobalAveragePooling2D())
    model.add(layers.Dense(5,activation='softmax'))
    model.compile(optimizer=Adam(lr=0.001),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    return model

model = create_model()
model.summary()

model_save = ModelCheckpoint('C:/Users/rosha/PycharmProjects/CLDD/saved_Models/EffNetB3_300_16_best_weights.h5',
                             save_best_only=True,
                             save_weights_only=True,
                             monitor='val_accuracy',
                             mode='max',
                             verbose=1
                             )

early_stop = EarlyStopping(monitor='val_accuracy',
                           min_delta=0.001,
                           patience=5,
                           mode='max',
                           verbose=1)

reduce_lr = ReduceLROnPlateau(monitor='val_accuracy',
                            factor=0.3,
                            patience=2,
                            min_delta=0.001,
                            mode='max',
                            verbose=1)

history = model.fit(
    train_df,
    validation_data=valid_df,
    steps_per_epoch=STEPS_PER_EPOCH,
    epochs=EPOCHS,
    callbacks=[model_save, early_stop, reduce_lr],
    verbose=1,
)


plt.rcParams.update({'font.size': 16})
hist = pd.DataFrame(history.history)
fig, (ax1, ax2) = plt.subplots(figsize=(12, 12), nrows=2, ncols=1)
hist['loss'].plot(ax=ax1, c='k', label='training loss')
hist['val_loss'].plot(ax=ax1, c='r', linestyle='--', label='validation loss')
ax1.legend()
hist['accuracy'].plot(ax=ax2, c='k', label='training accuracy')
hist['val_accuracy'].plot(ax=ax2, c='r', linestyle='--', label='validation accuracy')
ax2.legend()
plt.show()

model.save('./EffNetB3_300_16.h5')

score 1 · Accepted Answer

所以这里有一个我喜欢看的小清单：

执行以下代码，检查tensorflow是否找到GPU：

import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

如果输出是“Num GPUs Available: 0”，那么您应该检查您是否确实安装了 tensorflow-gpu，您可能还需要检查支持库是否也在 gpu 版本中。
如果您的库是正确的，您将需要检查您的 CUDA 驱动程序安装是否正确。此步骤在某种程度上取决于操作系统，但两者都有很多在线教程。我最喜欢的TF可以在官网找到：https ://www.tensorflow.org/install/gpu

tensorflow - 尽管使用了 tf 数据管道，但训练速度很慢

1 回答 1

Related

Reference