我在 tensorflow 中使用 CNN 构建了模型,准确率超过 90%。它确实有效,但我不知道如何使用这个模型来检测带有我训练过的边界框的对象。我的模型包括许多类,并且曾经与类名称相关联的标签。我读过一些关于 ssd 的方法,它可以做到这一点,但我真的不明白它是如何工作的。下面是我的 CNN:
def cnn_model_fn(features,labels,mode):
#Input layer
input_layer = tf.reshape(features["x"],[-1,28,28,1])
#Convolutional layer 1
conv1 = tf.layers.conv2d(
inputs=input_layer,
filters=32,
kernel_size=[5,5],
padding="same",
activation=tf.nn.relu)
#Pooling Layer 1
pool1 = tf.layers.max_pooling2d(inputs=conv1,pool_size=[2,2],strides=2)
#Convolutional layer 2
conv2 = tf.layers.conv2d(
inputs=pool1,
filters=64,
kernel_size=[5,5],
padding="same",
activation=tf.nn.relu)
#Pooling layer 2
pool2 = tf.layers.max_pooling2d(inputs=conv2,pool_size=[2,2],strides=2)
#Debse layer
pool2_flat = tf.reshape(pool2,[-1,7*7*64])
dense = tf.layers.dense(inputs=pool2_flat,units=1024,activation=tf.nn.relu)
#Dropout
dropout = tf.layers.dropout(inputs=dense,rate=0.4,training=mode == tf.estimator.ModeKeys.TRAIN)
#Logits layer
logits = tf.layers.dense(inputs=dropout,units=10)
predictions = {
"classes":tf.argmax(input=logits,axis=1),
"probabilities":tf.nn.softmax(logits,name="softmax_tensor")
}
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode=mode,predictions=predictions)
#Calculate Loss
onehot_labels = tf.one_hot(indices=tf.cast(labels,tf.int32),depth=10)
loss = tf.losses.softmax_cross_entropy(onehot_labels=onehot_labels,logits=logits)
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
train_op = optimizer.minimize(
loss=loss,
global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode=mode,loss=loss,train_op=train_op)
eval_metric_ops = {
"accuracy":tf.metrics.accuracy(labels=labels,predictions=predictions["classes"])
}
return tf.estimator.EstimatorSpec(mode=mode,loss=loss,eval_metric_ops=eval_metric_ops)
我用 main 运行我的应用程序:
def main(unused_argv):
# Load training and eval data
train_data_dir = "W:/Projects/AutoDrive/Training"
test_data_dir = "W:/Projects/AutoDrive/Testing"
images,labels = load_data(train_data_dir)
test_images,test_labels = load_data(test_data_dir)
print("Labels: {0} \nImages: {1}".format(len(set(labels)),len(images)))
for image in images[:5]:
print("shape: {0}, min: {1}, max: {2}".format(image.shape, image.min(), image.max()))
images = [skimage.transform.resize(image,(28,28,1)) for image in images]
for image in images[:5]:
print("shape: {0}, min: {1}, max: {2}".format(image.shape, image.min(), image.max()))
images = np.asarray(images,dtype=np.float32)
labels = np.asarray(labels,dtype=np.int32)
# Create the Estimator
TSRecognition_classifier = tf.estimator.Estimator(
model_fn=cnn_model_fn, model_dir="/tmp/TSRecognition_convnet_model")
# Set up logging for predictions
# Log the values in the "Softmax" tensor with label "probabilities"
tensors_to_log = {"probabilities": "softmax_tensor"}
logging_hook = tf.train.LoggingTensorHook(
tensors=tensors_to_log, every_n_iter=50)
# Train the model
train_input_fn = tf.estimator.inputs.numpy_input_fn(
x={"x": images},
y=labels,
batch_size=100,
num_epochs=None,
shuffle=True)
TSRecognition_classifier.train(
input_fn=train_input_fn,
steps=20000,
hooks=[logging_hook])
# Evaluate the model and print results
eval_input_fn = tf.estimator.inputs.numpy_input_fn(
x={"x": test_images},
y=test_labels,
num_epochs=1,
shuffle=False)
eval_results = TSRecognition_classifier.evaluate(input_fn=eval_input_fn)
print(eval_results)
如果您想查看,这是完整的代码:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import tensorflow as tf
import os
import skimage.data
import skimage.transform
import matplotlib
import matplotlib.pyplot as plt
tf.logging.set_verbosity(tf.logging.INFO)
def load_data(data_dir):
"""Load Data and return two lists"""
directories = [d for d in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir,d))]
list_labels = []
list_images = []
for d in directories:
label_dir = os.path.join(data_dir,d)
file_names = [os.path.join(label_dir,f) for f in os.listdir(label_dir) if f.endswith(".ppm")]
for f in file_names:
list_images.append(skimage.data.imread(f))
list_labels.append(int(d))
return list_images,list_labels
def display_images_and_labels(images,labels):
unique_labels = set(labels)
plt.figure(figsize=(15,15))
i = 1
for label in unique_labels:
image = images[labels.index(label)]
plt.subplot(8,8,i)
plt.axis('off')
plt.title("Label {0} ({1})".format(label,labels.count(label)))
i += 1
_ = plt.imshow(image)
plt.show()
def cnn_model_fn(features,labels,mode):
#Input layer
input_layer = tf.reshape(features["x"],[-1,28,28,1])
#Convolutional layer 1
conv1 = tf.layers.conv2d(
inputs=input_layer,
filters=32,
kernel_size=[5,5],
padding="same",
activation=tf.nn.relu)
#Pooling Layer 1
pool1 = tf.layers.max_pooling2d(inputs=conv1,pool_size=[2,2],strides=2)
#Convolutional layer 2
conv2 = tf.layers.conv2d(
inputs=pool1,
filters=64,
kernel_size=[5,5],
padding="same",
activation=tf.nn.relu)
#Pooling layer 2
pool2 = tf.layers.max_pooling2d(inputs=conv2,pool_size=[2,2],strides=2)
#Debse layer
pool2_flat = tf.reshape(pool2,[-1,7*7*64])
dense = tf.layers.dense(inputs=pool2_flat,units=1024,activation=tf.nn.relu)
#Dropout
dropout = tf.layers.dropout(inputs=dense,rate=0.4,training=mode == tf.estimator.ModeKeys.TRAIN)
#Logits layer
logits = tf.layers.dense(inputs=dropout,units=10)
predictions = {
"classes":tf.argmax(input=logits,axis=1),
"probabilities":tf.nn.softmax(logits,name="softmax_tensor")
}
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode=mode,predictions=predictions)
#Calculate Loss
onehot_labels = tf.one_hot(indices=tf.cast(labels,tf.int32),depth=10)
loss = tf.losses.softmax_cross_entropy(onehot_labels=onehot_labels,logits=logits)
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
train_op = optimizer.minimize(
loss=loss,
global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode=mode,loss=loss,train_op=train_op)
eval_metric_ops = {"accuracy":tf.metrics.accuracy(labels=labels,predictions=predictions["classes"])
}
return tf.estimator.EstimatorSpec(mode=mode,loss=loss,eval_metric_ops=eval_metric_ops)
def main(unused_argv):
# Load training and eval data
train_data_dir = "W:/Projects/AutoDrive/Training"
test_data_dir = "W:/Projects/AutoDrive/Testing"
images,labels = load_data(train_data_dir)
test_images,test_labels = load_data(test_data_dir)
print("Labels: {0} \nImages: {1}".format(len(set(labels)),len(images)))
for image in images[:5]:
print("shape: {0}, min: {1}, max: {2}".format(image.shape, image.min(), image.max()))
images = [skimage.transform.resize(image,(28,28,1)) for image in images]
for image in images[:5]:
print("shape: {0}, min: {1}, max: {2}".format(image.shape, image.min(), image.max()))
images = np.asarray(images,dtype=np.float32)
labels = np.asarray(labels,dtype=np.int32)
# Create the Estimator
TSRecognition_classifier = tf.estimator.Estimator(
model_fn=cnn_model_fn, model_dir="/tmp/TSRecognition_convnet_model")
# Set up logging for predictions
# Log the values in the "Softmax" tensor with label "probabilities"
tensors_to_log = {"probabilities": "softmax_tensor"}
logging_hook = tf.train.LoggingTensorHook(
tensors=tensors_to_log, every_n_iter=50)
# Train the model
train_input_fn = tf.estimator.inputs.numpy_input_fn(
x={"x": images},
y=labels,
batch_size=100,
num_epochs=None,
shuffle=True)
TSRecognition_classifier.train(
input_fn=train_input_fn,
steps=20000,
hooks=[logging_hook])
# Evaluate the model and print results
eval_input_fn = tf.estimator.inputs.numpy_input_fn(
x={"x": test_images},
y=test_labels,
num_epochs=1,
shuffle=False)
eval_results = TSRecognition_classifier.evaluate(input_fn=eval_input_fn)
print(eval_results)
if __name__ == "__main__":
tf.app.run()
另外,我看过视频,我认为它可以帮助我。但他们只是帮助我训练单个对象。有什么想法可以帮助我吗?