Python 版本 = 3.6.3 TensorFlow 版本 = 1.3.0
我曾在 .Keras
但现在正试图直接在TensorFlow
. 我正在尝试实现Keras
's的等效项,fit_generator
因此我不必在开始时将所有训练数据加载到内存中,而是可以根据需要将其输入网络进行训练。下面的代码代表我尝试开始这样的事情,但如果我做错了这一切,我很想知道我应该在文档中的哪个位置查找以及我应该使用哪些关键字来搜索它。
我的系统目前基于一个生成器,该生成器读取 sqlite 数据库文件以提取np.arrays
然后将它们转换为我想要的数据形状(具有一个向前预测的时间序列)。我现在正在尝试迁移该系统以使用 TensorflowDataset
并在应用时遇到困难tf.py_func
。这是我现在正在尝试的工作方式
import tensorflow as tf
import os
from tensorflow.contrib.data import Dataset, Iterator
import sqlite3
import pandas as pd
import numpy as np
LOOKBACK_ROWS = 600
DATA_DIR = '/mnt/derived_data/processedData'
files = os.listdir(DATA_DIR)
def data_from_files(f):
with sqlite3.connect(DATA_DIR + f) as conn:
results = conn.execute("SELECT col1, col2, FROM tbl")
col_names = [d[0] for d in results.description]
arr = np.array(results.fetchall())
num_obs = arr.shape[0] - LOOKBACK_ROWS + 1
X = np.zeros((num_obs, LOOKBACK_ROWS, 1), dtype = np.float32)
Y = np.zeros((num_obs, 1), dtype = np.float32)
for i in range(num_obs):
idx = i + LOOKBACK_ROWS - 1
X[i , :, 0] = arr[(idx - LOOKBACK_ROWS + 1):(idx + 1), 0]
Y[i, 0] = arr[idx, 1]
return tf.convert_to_tensor(X, name = 'X'), tf.convert_to_tensor(Y, name = 'Y')
filenames = tf.constant(files)
dataset = Dataset.from_tensor_slices((filenames))
dataset = dataset.map(lambda filename: tuple(tf.py_func(
data_from_files,
[filename],
[tf.float32, tf.float32])))
iterator = Iterator.from_structure(dataset.output_types, dataset.output_shapes)
next_element = iterator.get_next()
dataset_init_op = iterator.make_initializer(dataset)
with tf.Session() as sess:
sess.run(dataset_init_op)
while True:
try:
elem = sess.run(next_element)
print('Success')
except tf.errors.OutOfRangeError:
print('End of dataset.')
break
初始化运行良好,但是当我启动会话并运行时,出现以下错误:
2017-10-16 16:58:45.227612: I tensorflow/core/common_runtime/gpu/gpu_device.cc:976] DMA: 0
2017-10-16 16:58:45.227615: I tensorflow/core/common_runtime/gpu/gpu_device.cc:986] 0: Y
2017-10-16 16:58:45.227620: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1045] Creating TensorFlow device (/gpu:0) -> (device: 0, name: GeForce GTX 1080 Ti, pci bus id: 0000:65:00.0)
2017-10-16 16:58:45.276138: W tensorflow/core/framework/op_kernel.cc:1192] Invalid argument: TypeError: must be str, not bytes
2017-10-16 16:58:45.276306: W tensorflow/core/framework/op_kernel.cc:1192] Invalid argument: TypeError: must be str, not bytes
[[Node: PyFunc = PyFunc[Tin=[DT_STRING], Tout=[DT_FLOAT, DT_FLOAT], token="pyfunc_0"](arg0)]]
Traceback (most recent call last):
File "/opt/python/3.6.3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1327, in _do_call
return fn(*args)
File "/opt/python/3.6.3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1306, in _run_fn
status, run_metadata)
File "/opt/python/3.6.3/lib/python3.6/contextlib.py", line 88, in __exit__
next(self.gen)
File "/opt/python/3.6.3/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py", line 466, in raise_exception_on_not_ok_status
pywrap_tensorflow.TF_GetCode(status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: TypeError: must be str, not bytes
[[Node: PyFunc = PyFunc[Tin=[DT_STRING], Tout=[DT_FLOAT, DT_FLOAT], token="pyfunc_0"](arg0)]]
[[Node: IteratorGetNext = IteratorGetNext[output_shapes=[<unknown>, <unknown>], output_types=[DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/cpu:0"](Iterator)]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/home/usr/code/nn/data_folder/pipeline.py", line 51, in <module>
elem = sess.run(next_element)
File "/opt/python/3.6.3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 895, in run
run_metadata_ptr)
File "/opt/python/3.6.3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1124, in _run
feed_dict_tensor, options, run_metadata)
File "/opt/python/3.6.3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1321, in _do_run
options, run_metadata)
File "/opt/python/3.6.3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1340, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: TypeError: must be str, not bytes
[[Node: PyFunc = PyFunc[Tin=[DT_STRING], Tout=[DT_FLOAT, DT_FLOAT], token="pyfunc_0"](arg0)]]
[[Node: IteratorGetNext = IteratorGetNext[output_shapes=[<unknown>, <unknown>], output_types=[DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/cpu:0"](Iterator)]]
>>> python.el: native completion setup loaded
>>>
问题
(1)这似乎完全是一个用例,py_func
但我错了吗?如果没有,谁能指出一些比 Tensorflow 文档更深入的资源?(我确实注意到了 git 上的一个潜在相关问题:https ://github.com/tensorflow/tensorflow/issues/12396 ,但是用 a 包装所有内容的修复对tuple
我没有帮助)。
(2) 我应该遵循的一般流程是什么,特别是在我想从一堆文件名开始并为每个文件名输出多个训练的情况下Example
?
谢谢你。
下面我重写了我的脚本,使其成为一个独立的可运行示例。我相信问题仍然与上面的代码中的相同,但我也正在重复错误以确认。
包含来自@mrry 答案的更改的自包含可运行代码示例:
import tensorflow as tf
import os
import numpy as np
LOOKBACK_ROWS = 600
arr = np.random.random_sample((2000, 2))
np.save("npfile.npy", arr)
def data_from_files(f):
arr = np.load(f)
num_obs = arr.shape[0] - LOOKBACK_ROWS + 1
X = np.zeros((num_obs, LOOKBACK_ROWS, 1), dtype = np.float32)
Y = np.zeros((num_obs, 1), dtype = np.float32)
for i in range(num_obs):
idx = i + LOOKBACK_ROWS - 1
X[i , :, 0] = arr[(idx - LOOKBACK_ROWS + 1):(idx + 1), 0]
Y[i, 0] = arr[idx, 1]
return X, Y
files = ["npfile.npy"]
filenames = tf.constant(files)
# NOTE: In TensorFlow 1.4, `tf.contrib.data` is now `tf.data`.
dataset = tf.contrib.data.Dataset.from_tensor_slices(filenames)
# NOTE: In TensorFlow 1.4, the `tuple` is no longer needed.
dataset = dataset.map(lambda filename: tuple(tf.py_func(
data_from_files,
[filename],
[tf.float32, tf.float32])))
# NOTE: If you only have one `Dataset`, you do not need to use
# `Iterator.from_structure()`.
iterator = dataset.make_initializable_iterator()
next_element = iterator.get_next()
with tf.Session() as sess:
sess.run(iterator.initializer)
while True:
try:
elem = sess.run(next_element)
print('Success')
except tf.errors.OutOfRangeError:
print('End of dataset.')
break
错误:
2017-10-16 18:30:44.143668: I tensorflow/core/common_runtime/gpu/gpu_device.cc:976] DMA: 0
2017-10-16 18:30:44.143672: I tensorflow/core/common_runtime/gpu/gpu_device.cc:986] 0: Y
2017-10-16 18:30:44.143679: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1045] Creating TensorFlow device (/gpu:0) -> (device: 0, name: GeForce GTX 1080 Ti, pci bus id: 0000:65:00.0)
2017-10-16 18:30:44.190852: W tensorflow/core/framework/op_kernel.cc:1192] Unknown: AttributeError: 'bytes' object has no attribute 'read'
2017-10-16 18:30:44.190959: W tensorflow/core/framework/op_kernel.cc:1192] Unknown: AttributeError: 'bytes' object has no attribute 'read'
[[Node: PyFunc = PyFunc[Tin=[DT_STRING], Tout=[DT_FLOAT, DT_FLOAT], token="pyfunc_0"](arg0)]]
Traceback (most recent call last):
File "/opt/python/3.6.3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1327, in _do_call
return fn(*args)
File "/opt/python/3.6.3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1306, in _run_fn
status, run_metadata)
File "/opt/python/3.6.3/lib/python3.6/contextlib.py", line 88, in __exit__
next(self.gen)
File "/opt/python/3.6.3/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py", line 466, in raise_exception_on_not_ok_status
pywrap_tensorflow.TF_GetCode(status))
tensorflow.python.framework.errors_impl.UnknownError: AttributeError: 'bytes' object has no attribute 'read'
[[Node: PyFunc = PyFunc[Tin=[DT_STRING], Tout=[DT_FLOAT, DT_FLOAT], token="pyfunc_0"](arg0)]]
[[Node: IteratorGetNext = IteratorGetNext[output_shapes=[<unknown>, <unknown>], output_types=[DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/cpu:0"](Iterator)]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "demo.py", line 48, in <module>
elem = sess.run(next_element)
File "/opt/python/3.6.3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 895, in run
run_metadata_ptr)
File "/opt/python/3.6.3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1124, in _run
feed_dict_tensor, options, run_metadata)
File "/opt/python/3.6.3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1321, in _do_run
options, run_metadata)
File "/opt/python/3.6.3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1340, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.UnknownError: AttributeError: 'bytes' object has no attribute 'read'
[[Node: PyFunc = PyFunc[Tin=[DT_STRING], Tout=[DT_FLOAT, DT_FLOAT], token="pyfunc_0"](arg0)]]
[[Node: IteratorGetNext = IteratorGetNext[output_shapes=[<unknown>, <unknown>], output_types=[DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/cpu:0"](Iterator)]]