我正在尝试将 CNN + 面部地标结合起来进行分类任务 -
流程就像 -
- 使用 pandas 读取文件名
- 从标签中减去 1,使它们在 keras 的范围内为 0 到 6
- 使用 renet50 作为基础模型 (layers.trainable = True)
- 使用 mlxtend 的 extract_face_landmarks 识别人脸和地标 5 将基于 restnet50 的预处理图像和地标保存在各自的数组中,并将它们存储为 npy 文件
下面是代码 -
data = pd.read_csv(os.path.join(src_dir, 'list_partition_demo.txt'), names= ['file_name', 'label'], delimiter= " ")
data['use'] = data['file_name'].str.split('_').str[0]
data['label'] = data['label'] -1
for category in data.use.unique():
print(category)
if not os.path.exists(category):
try:
os.makedirs('train_test' + '/' + category)
except OSError as e:
if e.errno == errno.EEXIST and os.path.isdir('train_test'):
pass
else:
raise
category_data = data[data['use'] == category]
labels = category_data['label'].values
print(labels)
images = []
labels_list = []
landmarks = []
hog_features = []
hog_images = []
for i in range(len(category_data)):
image = cv2.imread(os.path.join(src_dir, 'Original Demo', category_data.iloc[i][0]))
#Resizing image differently for cnn and landmarks as (mlxtend(dlib wrapper) is not recoganizing small scaleed images)
image_cnn = cv2.resize(image, (image_height_cnn, image_width_cnn))
image_landmarks = cv2.resize(image, (image_height_landmarks, image_width_landmarks))
#extract landmark
face_landmarks = extract_face_landmarks(image_landmarks)
# Check if face, landmarks are extracted and then create final Database for training -
if np.count_nonzero(face_landmarks) > 1:
landmarks.append(face_landmarks)
# Preprocessing as per resnet50
#img = np.expand_dims(image_cnn, axis=0)
img = tf.keras.applications.resnet50.preprocess_input(image_cnn)
images.append(image_cnn)
# appending labels
labels_list.append(category_data.iloc[i][1])
else:
pass
print(np.shape(images))
print(np.shape(labels_list))
np.save('train_test' + '/' + category + '/images_test.npy', images)
np.save('train_test' + '/' + category + '/labels_test.npy', labels_list)
np.save('train_test' + '/' + category + '/landmarks_test.npy', landmarks)
cnn_model = tf.keras.applications.ResNet50(include_top= False, weights= 'imagenet',
input_shape= (image_height_cnn, image_width_cnn,3))
for layer in cnn_model.layers:
layer.trainable = True
inputs_x = tf.keras.Input(shape= (image_height_cnn, image_width_cnn,3))
x = cnn_model(inputs_x, training = True)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dense(100, activation = 'relu')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Dropout(0.5)(x)
x = tf.keras.layers.Dense(128, activation= 'relu')(x)
flat_cnn = tf.keras.layers.Flatten()(x)
cnn = tf.keras.Model(inputs_x, flat_cnn)
cnn.summary()
# Landmarks Model
inputs_y = tf.keras.layers.Input(shape = (68,2))
y = tf.keras.layers.Dense(256, activation = 'relu')(inputs_y)
y = tf.keras.layers.BatchNormalization()(y)
y = tf.keras.layers.Dense(256, activation = 'relu')(y)
y = tf.keras.layers.BatchNormalization()(y)
y = tf.keras.layers.Dense(128, activation = 'relu')(y)
flat_landmarks = tf.keras.layers.Flatten()(y)
flat_landmarks = tf.keras.layers.Dense(6, activation= 'softmax')(flat_landmarks)
landmarks = tf.keras.Model(inputs_y, flat_landmarks)
landmarks.summary()
merged_cnn_land = tf.keras.layers.concatenate([cnn.output, landmarks.output])
hidden = tf.keras.layers.Dense(128, activation = 'relu')(merged_cnn_land)
hidden = tf.keras.layers.BatchNormalization()(hidden)
hidden = tf.keras.layers.Dropout(0.4)(hidden)
hidden = tf.keras.layers.Dense(128, activation = 'relu')(hidden)
hidden = tf.keras.layers.BatchNormalization()(hidden)
hidden = tf.keras.layers.Dropout(0.4)(hidden)
hidden = tf.keras.layers.Dense(64, activation = 'relu')(hidden)
hidden = tf.keras.layers.BatchNormalization()(hidden)
hidden = tf.keras.layers.Dense(28, activation = 'relu')(hidden)
output = tf.keras.layers.Dense(6, activation = 'softmax')(hidden)
final_model = tf.keras.Model(inputs= [cnn.input, landmarks.input], outputs = output)
model_checkpoint=tf.keras.callbacks.ModelCheckpoint('transfer_learning_model.h5',
save_best_only=True,
monitor='val_accuracy',
mode='max',
verbose=1)
early_stop = tf.keras.callbacks.EarlyStopping(patience=5)
reduce_on_plateau = tf.keras.callbacks.ReduceLROnPlateau(patience = 5)
final_model.compile(optimizer = keras.optimizers.Nadam(learning_rate=0.001),
loss = 'sparse_categorical_crossentropy',
metrics=['accuracy'])
history = final_model.fit(x= [
np.load(train_path + '/images.npy'),
np.load(train_path + '/landmarks.npy')
],
y= np.load(train_path + '/labels.npy'),
validation_data= ( [
np.load(test_path + '/images.npy'),
np.load(test_path + '/landmarks.npy')
],
np.load(test_path + '/labels.npy')
),
callbacks=[early_stop, reduce_on_plateau, model_checkpoint],
epochs = 100)
现在当我运行它时,损失并没有减少,它进一步表明
需要帮助以了解可能出现的问题。我认为与输入有关,但我无法指出它。