我正在尝试构建基于 ResNet50 架构的产品识别工具,如下所示
def get_siamese_model(input_shape):
# Define the tensors for the two input images
left_input = Input(input_shape)
right_input = Input(input_shape)
# Convolutional Neural Network
model = Sequential()
model.add(ResNet50(include_top=False, pooling='avg', weights='imagenet'))
model.layers.pop()
for layer in model.layers:
layer.trainable = False
model.add(Dense(4096,
activation='sigmoid',
kernel_regularizer=l2(1e-3),
kernel_initializer=initialize_weights,bias_initializer=initialize_bias))
# Generate the encodings (feature vectors) for the two images
encoded_l = model(left_input)
encoded_r = model(right_input)
# Add a customized layer to compute the absolute difference between the encodings
L1_layer = Lambda(lambda tensors:K.abs(tensors[0] - tensors[1]))
L1_distance = L1_layer([encoded_l, encoded_r])
# Add a dense layer with a sigmoid unit to generate the similarity score
prediction = Dense(1,activation='sigmoid',bias_initializer=initialize_bias)(L1_distance)
# Connect the inputs with the outputs
siamese_net = Model(inputs=[left_input,right_input],outputs=prediction)
# return the model
return siamese_net
有生成训练批次的功能
def gen_random_batch(in_groups, batch_halfsize = 8):
out_img_a, out_img_b, out_score = [], [], []
all_groups = list(range(len(in_groups)))
for match_group in [True, False]:
group_idx = np.random.choice(all_groups, size = batch_halfsize)
out_img_a += [in_groups[c_idx][np.random.choice(range(in_groups[c_idx].shape[0]))] for c_idx in group_idx]
if match_group:
b_group_idx = group_idx
out_score += [1]*batch_halfsize
else:
# anything but the same group
non_group_idx = [np.random.choice([i for i in all_groups if i!=c_idx]) for c_idx in group_idx]
b_group_idx = non_group_idx
out_score += [0]*batch_halfsize
out_img_b += [in_groups[c_idx][np.random.choice(range(in_groups[c_idx].shape[0]))] for c_idx in b_group_idx]
return np.stack(out_img_a,0), np.stack(out_img_b,0), np.stack(out_score,0)
def siam_gen(in_groups, batch_size = 32):
while True:
pv_a, pv_b, pv_sim = gen_random_batch(train_groups, batch_size//2)
yield [pv_a, pv_b], pv_sim
权重和偏差设置
def initialize_weights(shape, dtype=None):
return K.random_normal(shape, dtype=dtype, mean=0.0, stddev=0.01)
def initialize_bias(shape, dtype=None):
return K.random_normal(shape, dtype=dtype, mean=0.5, stddev=0.01)
模型编译和训练
model = get_siamese_model(x_train.shape[1:])
optimizer = Adam(lr = 0.0001)
model.compile(loss="binary_crossentropy",optimizer=optimizer, metrics=['accuracy'])
valid_a, valid_b, valid_sim = gen_random_batch(test_groups, 1024)
loss_history = model.fit_generator(siam_gen(train_groups),
steps_per_epoch = 100,
validation_data=([valid_a, valid_b], valid_sim),
epochs = 5,
verbose = True)
我想知道为什么这个网络无法识别几乎相似的图片。我还使用此处描述的架构构建了另一个网络http://www.cs.utoronto.ca/~gkoch/files/msc-thesis.pdf 这个网络在识别产品方面确实做得很好。
def get_siamese_model(input_shape):
# Define the tensors for the two input images
left_input = Input(input_shape)
right_input = Input(input_shape)
# Convolutional Neural Network
model = Sequential()
model.add(Conv2D(64, (10,10),
activation='relu',
input_shape=input_shape,
kernel_initializer=initialize_weights,
kernel_regularizer=l2(2e-4)))
model.add(MaxPooling2D())
model.add(Conv2D(128, (7,7),
activation='relu',
kernel_initializer=initialize_weights,
bias_initializer=initialize_bias,
kernel_regularizer=l2(2e-4)))
model.add(MaxPooling2D())
model.add(Conv2D(128, (4,4), activation='relu',
kernel_initializer=initialize_weights,
bias_initializer=initialize_bias,
kernel_regularizer=l2(2e-4)))
model.add(MaxPooling2D())
model.add(Conv2D(256, (4,4), activation='relu',
kernel_initializer=initialize_weights,
bias_initializer=initialize_bias,
kernel_regularizer=l2(2e-4)))
model.add(Flatten())
model.add(Dense(4096,
activation='sigmoid',
kernel_regularizer=l2(1e-3),
kernel_initializer=initialize_weights,bias_initializer=initialize_bias))
# Generate the encodings (feature vectors) for the two images
encoded_l = model(left_input)
encoded_r = model(right_input)
# Add a customized layer to compute the absolute difference between the encodings
L1_layer = Lambda(lambda tensors:K.abs(tensors[0] - tensors[1]))
L1_distance = L1_layer([encoded_l, encoded_r])
# Add a dense layer with a sigmoid unit to generate the similarity score
prediction = Dense(1,activation='sigmoid',bias_initializer=initialize_bias)(L1_distance)
# Connect the inputs with the outputs
siamese_net = Model(inputs=[left_input,right_input],outputs=prediction)
# return the model
return siamese_net
我想知道为什么使用预训练层的网络比从一开始就训练的结果更差。