0

我有一个有效的 CNN 模型,可以对加载了 csv 文件的自定义数据集中的图像进行分类。数据集在被洗牌后被分成训练、验证和测试数据集。现在我想通过四个包含图像信息/元数据的额外输入类来扩展图像输入。

我已经知道我应该将我的 cnn 模型分成两个分支,一个用于图像,一个用于额外输入。我的问题是,我必须如何修改我的数据输入,以便模型能够正确处理图像和附加输入?

我对在 tensorflow 中创建神经网络非常陌生。我的整个代码基本上来自这个网站。但是,没有一个主题可以解决我的代码的问题。

这是我的代码:(其他元数据称为用法、完成、高度、构造)

import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers
from keras.callbacks import History
import matplotlib.pyplot as plt
import sklearn.metrics
from sklearn.metrics import confusion_matrix
import seaborn as sns
import io

# READ IMAGES, METADATA AND LABELS
df = pd.read_csv('dataset.csv')
df = df.sample(frac=1)
file_paths = df['file_name'].values
labels = df['label'].values
usages = df['usage'].values
completions = df['completion'].values
heights = df['height'].values
constructions = df['construction'].values

# SPLITTING THE DATASET INTO 80 % TRAINING DATA, 10 % VALIDATION DATA, 10 % TEST DATA
dataset_size = len(df.index)
train_size = int(0.8 * dataset_size)
val_size = int(0.1 * dataset_size)
test_size = int(0.1 * dataset_size)

img_height = 350
img_width = 350
batch_size = 16
autotune = tf.data.experimental.AUTOTUNE

# FUNCTION TO READ AND NORMALIZE THE IMAGES
def read_image(image_file, label, usg, com, hei, con):
    image = tf.io.read_file(image_file)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, (img_width, img_height))
    return tf.cast(image, tf.float32) / 255.0, label, \
           tf.cast(usg, tf.float32), tf.cast(com, tf.float32), \
           tf.cast(hei, tf.float32), tf.cast(con, tf.float32)

# FUNCTION FOR DATA AUGMENTATION
def augment(image, labeL, usg, com, hei, con):
    if tf.random.uniform((), minval=0, maxval=1) < 0.1:
        image = tf.tile(tf.image.rgb_to_grayscale(image), [1, 1, 3])
    image = tf.image.random_brightness(image, max_delta=0.25)
    image = tf.image.random_contrast(image, lower=0.75, upper=1.25)
    image = tf.image.random_saturation(image, lower=0.75, upper=1.25)
    image = tf.image.random_flip_left_right(image)
    return image, label, usg, com, hei, con

# SETUP FOR TRAINING, VALIDATION & TEST DATASET
ds_train = ds_train.map(read_image, num_parallel_calls=autotune)
ds_train = ds_train.cache()
ds_train = ds_train.map(augment, num_parallel_calls=autotune)
ds_train = ds_train.batch(batch_size)
ds_train = ds_train.prefetch(autotune)

ds_val = ds_val.map(read_image, num_parallel_calls=autotune)
ds_val = ds_val.batch(batch_size)
ds_val = ds_val.prefetch(autotune)

ds_test = ds_test.map(read_image, num_parallel_calls=autotune)
ds_test = ds_test.batch(batch_size)
ds_test = ds_test.prefetch(autotune)


## HOW TO SPLIT UP THE DATASET FOR THE MODEL FROM HERE? ##


# DEFINING FUNCTIONAL MODEL
input_img = keras.Input(shape=(img_width, img_height, 3))
input_dat = keras.Input(shape=(4,))                              # how is this shape supposed to be?

x = layers.Conv2D(16, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.02), padding='same')(input_img)
x = layers.BatchNormalization(momentum=0.9)(x)
x = layers.MaxPooling2D()(x)
x = layers.Conv2D(32, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.02), padding='same')(x)
x = layers.BatchNormalization(momentum=0.9)(x)
x = layers.MaxPooling2D()(x)
x = layers.Conv2D(64, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.02), padding='same')(x)
x = layers.BatchNormalization(momentum=0.9)(x)
x = layers.MaxPooling2D()(x)
x = layers.Conv2D(128, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.02), padding='same')(x)
x = layers.BatchNormalization(momentum=0.9)(x)
x = layers.MaxPooling2D()(x)
out1 = layers.Flatten()(x)

out2 = layers.Dense(128, activation='relu')(input_dat)

merge = layers.concatenate([out1, out2])
x = layers.Dense(256, activation='relu')(merge)
x = layers.Dropout(0.35)(x)
output = layers.Dense(8, activation='sigmoid')(x)
model = keras.Model(inputs=[input_img, input_dat], outputs=output)

history = History() 

no_overfit = keras.callbacks.EarlyStopping(monitor='val_loss',              # stop training when overfitting occurs
                                           min_delta=0.015, patience=1,
                                           verbose=2, mode='auto')


# TRAINING STEP
model.compile(
    optimizer=keras.optimizers.Adam(3e-5),
    loss=[keras.losses.SparseCategoricalCrossentropy()],
    metrics=["accuracy"])


model.fit(ds_train, epochs=30, callbacks=[no_overfit, history],
          verbose=1, validation_data=ds_val)

到目前为止,我只向数据集张量添加了额外的输入并更改了模型结构。如何将我的数据集拆分为每个模型分支input_imginput_dat以便每个模型分支都能接收到正确的输入?

我还有一个自定义测试步骤来绘制混淆矩阵。这应该如何修改?这是工作代码,仅用于图像输入:

y_true = []
y_pred = []

for x, y in ds_test:
    y_true.append(y)
    predicts = model.predict(x)                                             # compute model predictions for test step
    y_pred.append(np.argmax(predicts, axis=-1))                                              
true = tf.concat([item for item in y_true], axis=0)
pred = tf.concat([item for item in y_pred], axis=0)

cm = confusion_matrix(true, pred)                                           # confusion matrix from seaborn
testacc = np.trace(cm) / float(np.sum(cm))                                  # calculating test accuracy
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
fig, ax = plt.subplots(figsize=(10, 10))
color = sns.light_palette("seagreen", as_cmap=False)
sns.heatmap(cm, annot=True, square=True, cmap=color, fmt=".3f",
            linewidths=0.6, linecolor='k', cbar_kws={"shrink": 0.8})
plt.yticks(rotation=0)
plt.xlabel('\nPredicted Labels', fontsize=18)
plt.ylabel('True Labels\n', fontsize=18)
plt.title('Multiclass Model - Confusion Matrix (Test Step)\n', fontsize=24)
plt.text(10, 1.1, 'Accuracy = {:0.4f}'.format(testacc), fontsize=20)
ax.axhline(y=8, color='k', linewidth=1.5)                                   # depending on amount of classes
ax.axvline(x=8, color='k', linewidth=1.5)
plt.show()
print('\naccuracy: {:0.4f}'.format(testacc))

任何帮助是极大的赞赏!!

4

0 回答 0