
import tensorflow as tf
import numpy as np
from PIL import Image
from os import listdir

nodes_l1 = 500
nodes_l2 = 100
nodes_l3 = 500
num_batches = 20
num_epochs = 50

# Array of file dirs
human_file_array = listdir('human/')
human_file_array = [['human/'+human_file_array[i],[1,0]] for i in range(len(human_file_array))]
cucumber_file_array = listdir('cucumber/')
cucumber_file_array = [['cucumber/'+cucumber_file_array[i],[0,1]] for i in range(len(cucumber_file_array))]
file_array_shuffled = human_file_array + cucumber_file_array

htest_file_array = listdir('human_test/')
htest_file_array = [['human_test/'+htest_file_array[i],[1,0]] for i in range(len(htest_file_array))]
ctest_file_array = listdir('cucumber_test/')
ctest_file_array = [['cucumber_test/'+ctest_file_array[i],[0,1]] for i in range(len(ctest_file_array))]
test_file_array = ctest_file_array + htest_file_array

input_data = tf.placeholder('float', [None, 250*250*3]
output_data = tf.placeholder('float')

hl1_vars = {
    'weight': tf.Variable(tf.random_normal([250*250*3, nodes_l1])),
    'bias': tf.Variable(tf.random_normal([nodes_l1]))

hl2_vars = {
    'weight': tf.Variable(tf.random_normal([nodes_l1, nodes_l2])),
    'bias': tf.Variable(tf.random_normal([nodes_l2]))

hl3_vars = {
    'weight': tf.Variable(tf.random_normal([nodes_l2, nodes_l3])),
    'bias': tf.Variable(tf.random_normal([nodes_l3]))

output_layer_vars = {
    'weight': tf.Variable(tf.random_normal([nodes_l3, 2])),
    'bias': tf.Variable(tf.random_normal([2]))

layer1 = tf.add(tf.matmul(input_data, hl1_vars['weight']),hl1_vars['bias'])
layer1 = tf.nn.softmax(layer1)

layer2 = tf.add(tf.matmul(layer1, hl2_vars['weight']), hl2_vars['bias'])
layer2 = tf.nn.softmax(layer2)

layer3 = tf.add(tf.matmul(layer2, hl3_vars['weight']), hl3_vars['bias'])
layer3 = tf.nn.softmax(layer3)

output = tf.add(tf.matmul(layer3, output_layer_vars['weight']), output_layer_vars['bias'])
output = tf.nn.softmax(output)

def convert_image(path):
    with Image.open(path) as img:
        img = img.resize((250,250))
        img = img.convert('RGB')
        return img

def train_network():
    #prediction = output
    cost = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(output, output_data)) # output is the prediction, output_data is key
    optimizer = tf.train.AdamOptimizer().minimize(cost)

    with tf.Session() as sess:
        saver = tf.train.Saver()

        for epoch in range(num_epochs):
            epoch_error = 0
            batch_size = int((len(file_array_shuffled)/num_batches))
            for i in range(num_batches):
                path_var = []
                key_var = []
                img_var = []
                #Still Filename Batch!!
                batch_file_array = file_array_shuffled[batch_size*i:(batch_size*i)+batch_size] #batch1['file&val array']['val']
                for batch_val in batch_file_array:
                #FROM HERE ON path_var AND key_var HAVE MATCHING INDEXES DO NOT RANDOMIZE!!!

                #This section here is complicated!
                for path in path_var:
                    img = convert_image(path)
                    img_var.append(np.reshape(np.array(img), 250*250*3))
                #print np.shape(img_var),np.shape(key_var) #img_var is array of size (batch#, 64*64*3) key_var is the key [human, cucumber]

                #End of complicationimage conversion
                _,c = sess.run([optimizer, cost], feed_dict={input_data:img_var, output_data:key_var})
                epoch_error += c
                #print "Batch",i+1,"done out of",num_batches
            print "Epoch",epoch+1,"completed out of",num_epochs,"\tError",epoch_error
            save_path = saver.save(sess, "model.ckpt")


def use_network():
    #prediction = output
    with tf.Session() as sess:

        saver = tf.train.Saver()
        saver.restore(sess, "model.ckpt")

        for test_file in test_file_array:
            #print test_file
            img = np.reshape(np.array(convert_image(test_file[0])), 250*250*3)
            result = output.eval(feed_dict={input_data:[img]})
            print result,tf.argmax(result,1).eval(),test_file[1]



由于我还是新手使用 tensorflow,我认为尝试创建一个可以识别人类和黄瓜之间差异的程序是一个好主意。我从 Image-Net 中提取图像,并将人类图片放入human/并将黄瓜照片放入cucumber/


  1. 制作一组文件路径和键,然后对其进行洗牌。

  2. 批量创建文件路径。

  3. 批处理中的文件路径被转换为图像,调整为 250x250,并添加到图像批处理数组中。(此时键和图像仍然对齐)。

  4. 图像批次和关键批次送入阵列。

  5. 在所有 epoch 结束时,它针对每个图像中的 10 个测试网络。

当我运行 use_network() 时,我在控制台中得到以下输出:

[[ 0.53653401  0.46346596]] [0] [0, 1]
[[ 0.53653401  0.46346596]] [0] [0, 1]
[[ 0.53653401  0.46346596]] [0] [0, 1]
[[ 0.53653401  0.46346596]] [0] [1, 0]
[[ 0.53653401  0.46346596]] [0] [1, 0]
[[ 0.53653401  0.46346596]] [0] [0, 1]
[[ 0.53653401  0.46346596]] [0] [1, 0]
[[ 0.53653401  0.46346596]] [0] [1, 0]
[[ 0.53653401  0.46346596]] [0] [1, 0]
[[ 0.53653401  0.46346596]] [0] [0, 1]
[[ 0.53653401  0.46346596]] [0] [1, 0]
[[ 0.53653401  0.46346596]] [0] [0, 1]
[[ 0.61422414  0.38577583]] [0] [1, 0]
[[ 0.53653401  0.46346596]] [0] [0, 1]
[[ 0.53653401  0.46346596]] [0] [1, 0]
[[ 0.53653401  0.46346596]] [0] [1, 0]
[[ 0.53653401  0.46346596]] [0] [0, 1]
[[ 0.53653401  0.46346596]] [0] [0, 1]
[[ 0.53653401  0.46346596]] [0] [0, 1]
[[ 0.53653401  0.46346596]] [0] [1, 0]

第一个数组是输出节点,第二个数组是输出的 tf.argmax(),第三个是预期的。


Epoch 1 completed out of 50     Error 3762.83390808
Epoch 2 completed out of 50     Error 3758.51748657
Epoch 3 completed out of 50     Error 3753.70425415
Epoch 4 completed out of 50     Error 3748.32539368
Epoch 5 completed out of 50     Error 3742.45524597
Epoch 6 completed out of 50     Error 3736.21272278
Epoch 7 completed out of 50     Error 3729.56756592
Epoch 45 completed out of 50    Error 3677.34605408
Epoch 46 completed out of 50    Error 3677.34388733
Epoch 47 completed out of 50    Error 3677.34150696
Epoch 48 completed out of 50    Error 3677.3391571
Epoch 49 completed out of 50    Error 3677.33673096
Epoch 50 completed out of 50    Error 3677.33418274


  1. 使图像更小,例如 32x32,和/或黑白。看看较小的图像是否会导致预测发生变化。

  2. 改变 reduce_sum 和 reduce_mean 之间的成本方程,以及 sigmoid_cross_entropy 到 softmax_cross_entropy 之间的内部方程。


  1. 只是糟糕的代码

  2. 输入数据太大,没有足够的节点/层来处理。

  3. 图像和关联的密钥在某处被打乱。


1 回答 1




其次,即使您打算使用密集层,也不应该将 softmax 函数用作隐藏层之间的激活(除了一些例外,例如在注意力模型中,但这是一个更高级的概念。)Softmax 强制每个激活的总和为在您可能不想要的层中。我会将隐藏层之间的激活更改为 relu 或至少 tanh。


于 2016-11-06T17:42:18.880 回答