0

我正在尝试将 CNN + 面部地标结合起来进行分类任务 -

图像名称位于文本文件中,例如 - 文件输入

流程就像 -

  1. 使用 pandas 读取文件名
  2. 从标签中减去 1,使它们在 keras 的范围内为 0 到 6
  3. 使用 renet50 作为基础模型 (layers.trainable = True)
  4. 使用 mlxtend 的 extract_face_landmarks 识别人脸和地标 5 将基于 restnet50 的预处理图像和地标保存在各自的数组中,并将它们存储为 npy 文件

下面是代码 -

data = pd.read_csv(os.path.join(src_dir, 'list_partition_demo.txt'), names= ['file_name', 'label'], delimiter= " ")

data['use'] = data['file_name'].str.split('_').str[0]
data['label'] = data['label'] -1
for category in data.use.unique():
print(category)
if not os.path.exists(category):
    try:
        os.makedirs('train_test' + '/' + category)
    except OSError as e:    
        if e.errno == errno.EEXIST and os.path.isdir('train_test'):
            pass
        else:
            raise 
category_data = data[data['use'] == category]
labels = category_data['label'].values 

print(labels)

images = [] 
labels_list = [] 
landmarks = []
hog_features = []
hog_images = [] 

for i in range(len(category_data)):
    image = cv2.imread(os.path.join(src_dir, 'Original Demo', category_data.iloc[i][0])) 
    

    #Resizing image differently for cnn and landmarks as (mlxtend(dlib wrapper) is not recoganizing small scaleed images)
    image_cnn = cv2.resize(image, (image_height_cnn, image_width_cnn))
    image_landmarks = cv2.resize(image, (image_height_landmarks, image_width_landmarks))

    #extract landmark
    face_landmarks = extract_face_landmarks(image_landmarks)

    # Check if face, landmarks are extracted and then create final Database for training - 

    if np.count_nonzero(face_landmarks) > 1:
        landmarks.append(face_landmarks)

        # Preprocessing as per resnet50
        #img = np.expand_dims(image_cnn, axis=0)
        img = tf.keras.applications.resnet50.preprocess_input(image_cnn)
        images.append(image_cnn)

        
        # appending labels
        labels_list.append(category_data.iloc[i][1])
    
    else:
        pass

print(np.shape(images))
print(np.shape(labels_list))
  


np.save('train_test' + '/' + category + '/images_test.npy', images)
np.save('train_test' + '/' + category + '/labels_test.npy', labels_list)
np.save('train_test' + '/' + category + '/landmarks_test.npy', landmarks)


cnn_model = tf.keras.applications.ResNet50(include_top= False, weights= 'imagenet',
                                       input_shape= (image_height_cnn, image_width_cnn,3))

for layer in cnn_model.layers:
    layer.trainable = True

    inputs_x = tf.keras.Input(shape= (image_height_cnn, image_width_cnn,3))
x = cnn_model(inputs_x, training = True)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dense(100, activation = 'relu')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Dropout(0.5)(x)
x = tf.keras.layers.Dense(128, activation= 'relu')(x)
flat_cnn = tf.keras.layers.Flatten()(x)

    cnn = tf.keras.Model(inputs_x, flat_cnn)
cnn.summary()

# Landmarks Model

inputs_y = tf.keras.layers.Input(shape = (68,2))
y = tf.keras.layers.Dense(256,  activation = 'relu')(inputs_y)
y = tf.keras.layers.BatchNormalization()(y)

y = tf.keras.layers.Dense(256,  activation = 'relu')(y)
y = tf.keras.layers.BatchNormalization()(y)
y = tf.keras.layers.Dense(128,  activation = 'relu')(y)

flat_landmarks = tf.keras.layers.Flatten()(y)
flat_landmarks = tf.keras.layers.Dense(6, activation= 'softmax')(flat_landmarks)

landmarks = tf.keras.Model(inputs_y, flat_landmarks)
landmarks.summary()

    merged_cnn_land = tf.keras.layers.concatenate([cnn.output, landmarks.output])
hidden = tf.keras.layers.Dense(128, activation = 'relu')(merged_cnn_land)
hidden = tf.keras.layers.BatchNormalization()(hidden)
hidden = tf.keras.layers.Dropout(0.4)(hidden)

hidden = tf.keras.layers.Dense(128, activation = 'relu')(hidden)
hidden = tf.keras.layers.BatchNormalization()(hidden)
hidden = tf.keras.layers.Dropout(0.4)(hidden)

hidden = tf.keras.layers.Dense(64, activation = 'relu')(hidden)
hidden = tf.keras.layers.BatchNormalization()(hidden)

hidden = tf.keras.layers.Dense(28, activation = 'relu')(hidden)


output = tf.keras.layers.Dense(6, activation = 'softmax')(hidden)
final_model = tf.keras.Model(inputs= [cnn.input, landmarks.input], outputs = output)

    model_checkpoint=tf.keras.callbacks.ModelCheckpoint('transfer_learning_model.h5', 
                                                    save_best_only=True, 
                                                    monitor='val_accuracy', 
                                                    mode='max', 
                                                    verbose=1)
early_stop = tf.keras.callbacks.EarlyStopping(patience=5)
reduce_on_plateau = tf.keras.callbacks.ReduceLROnPlateau(patience = 5)

    final_model.compile(optimizer = keras.optimizers.Nadam(learning_rate=0.001),
              loss = 'sparse_categorical_crossentropy',
              metrics=['accuracy'])

    history = final_model.fit(x= [
                              np.load(train_path + '/images.npy'),
                              np.load(train_path + '/landmarks.npy')
                              ], 
                          y= np.load(train_path + '/labels.npy'),

                validation_data= ( [
                                    np.load(test_path + '/images.npy'),
                                    np.load(test_path + '/landmarks.npy')
                                    ],
                                    np.load(test_path + '/labels.npy')
                                    ),
                                   callbacks=[early_stop, reduce_on_plateau, model_checkpoint],
                                   epochs = 100)

现在当我运行它时,损失并没有减少,它进一步表明

超出最终模型

需要帮助以了解可能出现的问题。我认为与输入有关,但我无法指出它。

4

0 回答 0