0

我对 TensorFlow 很陌生。我已经从教程中吸收了我能做的,但我坚持自己做东西。我通过做而不是阅读来学得最好,所以我希望有一些我可以修改的东西来更好地理解 TensorFlow。

我收到此错误:“标签的形状(收到的 (32, 32))应该等于 logits 的形状,除了最后一个维度(收到的 (1, 32, 32))。” 我已经摆弄代码无济于事。

该程序应该读取图像并吐出“蒙版”图像,该图像稍后将用于剪切背景。白色像素表示“保留这个”,黑色像素表示“不要保留这个”。

pm.main 是一个文件,其中包含几个用于加载图像的函数和一个用于创建模型的函数。我试图从 train.py(用于训练)和 eval.py(用于运行东西)中提取这些东西,以保持我的代码更干净。

“pm.main”:

import tensorflow as tf
import matplotlib.image as mpimg
from PIL import Image as img
import numpy as np
import os

def rgb2gray(rgb):
    #return rgb
    gray = np.dot(rgb[...,:3], [0.3, 0.3, 0.3]).astype('int32')
    return gray

def load_images(dataDirectory):
    images = []
    for i in range(1, 2):
        filePrefix = f'{i:03}'
        fileName = '%s.png' % filePrefix
        maskFileName = '%sm.png' % filePrefix
        image = mpimg.imread(os.path.join(dataDirectory, fileName))
        maskImg = img.open(os.path.join(dataDirectory, maskFileName))
        maskImg.thumbnail((32, 32), img.ANTIALIAS)
        mask = np.array(maskImg)
        fixedMask = rgb2gray(mask)
        images.append([image, fixedMask])
    return images

class x():
    pass

def cnn_model_fn(input):
    input_layer = tf.reshape(input, [-1, 256, 256, 3])

    def addNewLayer(prevLayer):
        conv = tf.layers.conv2d(inputs=prevLayer, filters=32, kernel_size=[3,3], padding='same', activation=tf.nn.relu)
        pool = tf.layers.max_pooling2d(inputs=conv, pool_size=[2, 2], strides=2)
        return pool

    layer1 = addNewLayer(input_layer)
    layer2 = addNewLayer(layer1)
    layer3 = addNewLayer(layer2)
    # layer3 = 64 x 64 x 32

    #return layer3
    #flat = tf.reshape(layer3, [-1, 32 * 32 * 32])
    #dense = tf.layers.dense(inputs=flat, units=64 * 64, activation=tf.nn.relu)
    #dense = tf.layers.dense(inputs=flat, units=64, activation=tf.nn.relu)
    dense = tf.layers.dense(inputs=layer3, units=1, activation=tf.nn.relu)

    result = x()
    result.logits = dense
    #result.logits = flat
    #flat = tf.reshape(layer3, [-1, 32, 32, 1])
    #result.logits = flat
    #print('layer3: %s' % layer3.shape)

    print('logits: %s' % result.logits.shape)
    return result

和“train.py”:

import tensorflow as tf
from pm import main as pm
from PIL import Image as img
import matplotlib.pyplot as pyplot
import numpy as np
import os

currentDirectory = os.path.dirname(os.path.realpath(__file__))
dataDirectory = os.path.join(currentDirectory, 'data')

images = pm.load_images(dataDirectory)

imagePlaceholder = tf.placeholder(tf.int32, shape=[256, 256, 3])
#maskPlaceholder = tf.placeholder(tf.int32, shape=[1, 256, 256, 1])
maskPlaceholder = tf.placeholder(tf.int32, shape=[32, 32, 1])

pair = images[0]
image = pair[0] # inputs
mask = pair[1] # labels

model = pm.cnn_model_fn(image)
logits = model.logits

print(mask.shape)
print(logits.shape)

saver = tf.train.Saver()

sess = tf.Session()
sess.run(tf.local_variables_initializer())
sess.run(tf.global_variables_initializer())
#saver.restore(sess, 'network.ckpt')


loss_op = tf.losses.sparse_softmax_cross_entropy(labels=mask, logits=logits)

result = sess.run(loss_op, feed_dict={ imagePlaceholder: image, maskPlaceholder: logits })
print(result)

resultData = result.reshape(64, 64).astype('uint8') * 255
print(resultData)

imageData = img.fromarray(resultData)
imageData.save('output.png')

saver.save(sess, 'network.ckpt')

任何见解将不胜感激。

4

1 回答 1

0

您没有提供错误堆栈跟踪,但这是我的最佳猜测。

错误很可能来自交叉熵损失:tf.losses.sparse_softmax_cross_entropy. 如果您查看规范,您会看到此函数期望labels比 1 秩少,logits并且期望最后的所有维度必须相同(以便它知道哪个标签对应于哪个 logits 集)。

在您的情况下,标签的形状似乎为(1, 32, 32). 假设你只是想摆脱1,你可以做labels = tf.squeeze(labels)

于 2018-06-11T23:12:44.110 回答