tensorflow - 如何在张量流中使用经过训练的模型，使用 CNN 检测带有边界框的对象

Question

我在 tensorflow 中使用 CNN 构建了模型，准确率超过 90%。它确实有效，但我不知道如何使用这个模型来检测带有我训练过的边界框的对象。我的模型包括许多类，并且曾经与类名称相关联的标签。我读过一些关于 ssd 的方法，它可以做到这一点，但我真的不明白它是如何工作的。下面是我的 CNN：

def cnn_model_fn(features,labels,mode):
#Input layer
input_layer = tf.reshape(features["x"],[-1,28,28,1])

#Convolutional layer 1
conv1 = tf.layers.conv2d(
inputs=input_layer,
filters=32,
kernel_size=[5,5],
padding="same",
activation=tf.nn.relu)

#Pooling Layer 1
pool1 = tf.layers.max_pooling2d(inputs=conv1,pool_size=[2,2],strides=2)

#Convolutional layer 2
conv2 = tf.layers.conv2d(
    inputs=pool1,
    filters=64,
    kernel_size=[5,5],
    padding="same",
    activation=tf.nn.relu)

#Pooling layer 2
pool2 = tf.layers.max_pooling2d(inputs=conv2,pool_size=[2,2],strides=2)

#Debse layer
pool2_flat = tf.reshape(pool2,[-1,7*7*64])
dense = tf.layers.dense(inputs=pool2_flat,units=1024,activation=tf.nn.relu)

#Dropout
dropout = tf.layers.dropout(inputs=dense,rate=0.4,training=mode == tf.estimator.ModeKeys.TRAIN)

#Logits layer
logits = tf.layers.dense(inputs=dropout,units=10)

predictions = {
"classes":tf.argmax(input=logits,axis=1),
"probabilities":tf.nn.softmax(logits,name="softmax_tensor")
}

if mode == tf.estimator.ModeKeys.PREDICT:
    return tf.estimator.EstimatorSpec(mode=mode,predictions=predictions)

#Calculate Loss
onehot_labels = tf.one_hot(indices=tf.cast(labels,tf.int32),depth=10)
loss = tf.losses.softmax_cross_entropy(onehot_labels=onehot_labels,logits=logits)

if mode == tf.estimator.ModeKeys.TRAIN:
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
    train_op = optimizer.minimize(
        loss=loss,
        global_step=tf.train.get_global_step())
    return tf.estimator.EstimatorSpec(mode=mode,loss=loss,train_op=train_op)

eval_metric_ops = {
"accuracy":tf.metrics.accuracy(labels=labels,predictions=predictions["classes"])
}

return tf.estimator.EstimatorSpec(mode=mode,loss=loss,eval_metric_ops=eval_metric_ops)

我用 main 运行我的应用程序：

def main(unused_argv):
# Load training and eval data
train_data_dir = "W:/Projects/AutoDrive/Training"
test_data_dir = "W:/Projects/AutoDrive/Testing"

images,labels = load_data(train_data_dir) 
test_images,test_labels = load_data(test_data_dir)

print("Labels: {0} \nImages: {1}".format(len(set(labels)),len(images)))

for image in images[:5]:
    print("shape: {0}, min: {1}, max: {2}".format(image.shape, image.min(), image.max()))

images = [skimage.transform.resize(image,(28,28,1)) for image in images]

for image in images[:5]:
    print("shape: {0}, min: {1}, max: {2}".format(image.shape, image.min(), image.max()))

images = np.asarray(images,dtype=np.float32)
labels = np.asarray(labels,dtype=np.int32)


# Create the Estimator
TSRecognition_classifier = tf.estimator.Estimator(
model_fn=cnn_model_fn, model_dir="/tmp/TSRecognition_convnet_model")

# Set up logging for predictions
# Log the values in the "Softmax" tensor with label "probabilities"
tensors_to_log = {"probabilities": "softmax_tensor"}
logging_hook = tf.train.LoggingTensorHook(
tensors=tensors_to_log, every_n_iter=50)

# Train the model
train_input_fn = tf.estimator.inputs.numpy_input_fn(
                       x={"x": images},
                       y=labels,
                       batch_size=100,
                       num_epochs=None,
                       shuffle=True)

TSRecognition_classifier.train(
         input_fn=train_input_fn,
         steps=20000,
         hooks=[logging_hook])

# Evaluate the model and print results
eval_input_fn = tf.estimator.inputs.numpy_input_fn(
                        x={"x": test_images},
                        y=test_labels,
                        num_epochs=1,
                        shuffle=False)
eval_results = TSRecognition_classifier.evaluate(input_fn=eval_input_fn)
print(eval_results)

如果您想查看，这是完整的代码：

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import tensorflow as tf
import os
import skimage.data
import skimage.transform
import matplotlib
import matplotlib.pyplot as plt

tf.logging.set_verbosity(tf.logging.INFO)

def load_data(data_dir):
    """Load Data and return two lists"""
    directories = [d for d in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir,d))]

    list_labels = []
    list_images = []

    for d in directories:
        label_dir = os.path.join(data_dir,d)
        file_names = [os.path.join(label_dir,f) for f in os.listdir(label_dir) if f.endswith(".ppm")]
        for f in file_names:
            list_images.append(skimage.data.imread(f))
            list_labels.append(int(d))

    return list_images,list_labels

def display_images_and_labels(images,labels):
    unique_labels = set(labels)
    plt.figure(figsize=(15,15))
    i = 1
    for label in unique_labels:
        image = images[labels.index(label)]
        plt.subplot(8,8,i)
        plt.axis('off')
        plt.title("Label {0} ({1})".format(label,labels.count(label)))
        i += 1
        _ = plt.imshow(image)
        plt.show()

def cnn_model_fn(features,labels,mode):
        #Input layer
        input_layer = tf.reshape(features["x"],[-1,28,28,1])

        #Convolutional layer 1
        conv1 = tf.layers.conv2d(
            inputs=input_layer,
            filters=32,
            kernel_size=[5,5],
            padding="same",
            activation=tf.nn.relu)

        #Pooling Layer 1
        pool1 = tf.layers.max_pooling2d(inputs=conv1,pool_size=[2,2],strides=2)

        #Convolutional layer 2
        conv2 = tf.layers.conv2d(
                   inputs=pool1,
                   filters=64,
                   kernel_size=[5,5],
                   padding="same",
                   activation=tf.nn.relu)

         #Pooling layer 2
         pool2 = tf.layers.max_pooling2d(inputs=conv2,pool_size=[2,2],strides=2)

         #Debse layer
         pool2_flat = tf.reshape(pool2,[-1,7*7*64])
         dense = tf.layers.dense(inputs=pool2_flat,units=1024,activation=tf.nn.relu)

         #Dropout
         dropout = tf.layers.dropout(inputs=dense,rate=0.4,training=mode == tf.estimator.ModeKeys.TRAIN)

         #Logits layer
         logits = tf.layers.dense(inputs=dropout,units=10)

         predictions = {
              "classes":tf.argmax(input=logits,axis=1),
              "probabilities":tf.nn.softmax(logits,name="softmax_tensor")
               }

         if mode == tf.estimator.ModeKeys.PREDICT:
            return tf.estimator.EstimatorSpec(mode=mode,predictions=predictions)

         #Calculate Loss
         onehot_labels = tf.one_hot(indices=tf.cast(labels,tf.int32),depth=10)
         loss = tf.losses.softmax_cross_entropy(onehot_labels=onehot_labels,logits=logits)

         if mode == tf.estimator.ModeKeys.TRAIN:
            optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
            train_op = optimizer.minimize(
                         loss=loss,
                         global_step=tf.train.get_global_step())
            return tf.estimator.EstimatorSpec(mode=mode,loss=loss,train_op=train_op)

         eval_metric_ops = {"accuracy":tf.metrics.accuracy(labels=labels,predictions=predictions["classes"])
}


          return tf.estimator.EstimatorSpec(mode=mode,loss=loss,eval_metric_ops=eval_metric_ops)


def main(unused_argv):
    # Load training and eval data
    train_data_dir = "W:/Projects/AutoDrive/Training"
    test_data_dir = "W:/Projects/AutoDrive/Testing"

    images,labels = load_data(train_data_dir) 
    test_images,test_labels = load_data(test_data_dir)

    print("Labels: {0} \nImages: {1}".format(len(set(labels)),len(images)))

    for image in images[:5]:
        print("shape: {0}, min: {1}, max: {2}".format(image.shape, image.min(), image.max()))

    images = [skimage.transform.resize(image,(28,28,1)) for image in images]

    for image in images[:5]:
        print("shape: {0}, min: {1}, max: {2}".format(image.shape, image.min(), image.max()))

    images = np.asarray(images,dtype=np.float32)
    labels = np.asarray(labels,dtype=np.int32)


    # Create the Estimator
    TSRecognition_classifier = tf.estimator.Estimator(
    model_fn=cnn_model_fn, model_dir="/tmp/TSRecognition_convnet_model")

    # Set up logging for predictions
    # Log the values in the "Softmax" tensor with label "probabilities"
    tensors_to_log = {"probabilities": "softmax_tensor"}
   logging_hook = tf.train.LoggingTensorHook(
                   tensors=tensors_to_log, every_n_iter=50)

    # Train the model
    train_input_fn = tf.estimator.inputs.numpy_input_fn(
                       x={"x": images},
                       y=labels,
                       batch_size=100,
                       num_epochs=None,
                       shuffle=True)

    TSRecognition_classifier.train(
         input_fn=train_input_fn,
         steps=20000,
         hooks=[logging_hook])

    # Evaluate the model and print results
    eval_input_fn = tf.estimator.inputs.numpy_input_fn(
                        x={"x": test_images},
                        y=test_labels,
                        num_epochs=1,
                        shuffle=False)
    eval_results = TSRecognition_classifier.evaluate(input_fn=eval_input_fn)
    print(eval_results)

if __name__ == "__main__":
    tf.app.run()

另外，我看过视频，我认为它可以帮助我。但他们只是帮助我训练单个对象。有什么想法可以帮助我吗？

score 0 · Accepted Answer

有一些 CNN 可以输出边界框，还有一些 CNN 只对输入图像进行分类。你的是第二种。如果你想要使用 tensorflow 的边界框，你可以使用允许你构建多类 SSD 和 fast-rcnn 的对象检测 API：https ://github.com/tensorflow/models/tree/master/research/object_detection

tensorflow - 如何在张量流中使用经过训练的模型，使用 CNN 检测带有边界框的对象

1 回答 1

Related

Reference