我正在from_indexable
为tf.data.Dataset
https://github.com/tensorflow/tensorflow/issues/14448工作
for的优点from_indexable
是可以并行化,而python生成器不能并行化。
该函数from_indexable
生成一个tf.data.range
,将可索引包装在一个通用tf.py_func
并调用映射。
对于那些现在想要 a 的人from_indexable
,这里是 lib 代码
import tensorflow as tf
import numpy as np
from tensorflow.python.framework import tensor_shape
from tensorflow.python.util import nest
def py_func_decorator(output_types=None, output_shapes=None, stateful=True, name=None):
def decorator(func):
def call(*args):
nonlocal output_shapes
flat_output_types = nest.flatten(output_types)
flat_values = tf.py_func(
func,
inp=args,
Tout=flat_output_types,
stateful=stateful, name=name
)
if output_shapes is not None:
# I am not sure if this is nessesary
output_shapes = nest.map_structure_up_to(
output_types, tensor_shape.as_shape, output_shapes)
flattened_shapes = nest.flatten_up_to(output_types, output_shapes)
for ret_t, shape in zip(flat_values, flattened_shapes):
ret_t.set_shape(shape)
return nest.pack_sequence_as(output_types, flat_values)
return call
return decorator
def from_indexable(iterator, output_types, output_shapes=None, num_parallel_calls=None, stateful=True, name=None):
ds = tf.data.Dataset.range(len(iterator))
@py_func_decorator(output_types, output_shapes, stateful=stateful, name=name)
def index_to_entry(index):
return iterator[index]
return ds.map(index_to_entry, num_parallel_calls=num_parallel_calls)
这里有一个例子(注意:from_indexable
有一个 num_parallel_calls argument
)
class PyDataSet:
def __len__(self):
return 20
def __getitem__(self, item):
return np.random.normal(size=(item+1, 10))
ds = from_indexable(PyDataSet(), output_types=tf.float64, output_shapes=[None, 10])
it = ds.make_one_shot_iterator()
entry = it.get_next()
with tf.Session() as sess:
print(sess.run(entry).shape)
print(sess.run(entry).shape)
2018 年 6 月 10 日更新:由于https://github.com/tensorflow/tensorflow/pull/15121被合并,代码from_indexable
简化为:
import tensorflow as tf
def py_func_decorator(output_types=None, output_shapes=None, stateful=True, name=None):
def decorator(func):
def call(*args, **kwargs):
return tf.contrib.framework.py_func(
func=func,
args=args, kwargs=kwargs,
output_types=output_types, output_shapes=output_shapes,
stateful=stateful, name=name
)
return call
return decorator
def from_indexable(iterator, output_types, output_shapes=None, num_parallel_calls=None, stateful=True, name=None):
ds = tf.data.Dataset.range(len(iterator))
@py_func_decorator(output_types, output_shapes, stateful=stateful, name=name)
def index_to_entry(index):
return iterator[index]
return ds.map(index_to_entry, num_parallel_calls=num_parallel_calls)