继续这个问题和这里的讨论- 我正在尝试使用 Dataset API 来获取可变长度张量的数据集并将它们切成相等长度的切片(段)。就像是:
Dataset = tf.contrib.data.Dataset
segment_len = 6
batch_size = 16
with tf.Graph().as_default() as g:
# get the tfrecords dataset
dataset = tf.contrib.data.TFRecordDataset(filenames).map(
partial(record_type.parse_single_example, graph=g)).batch(batch_size)
# zip it with the number of segments we need to slice each tensor
dataset2 = Dataset.zip((dataset, Dataset.from_tensor_slices(
tf.constant(num_segments, dtype=tf.int64))))
it2 = dataset2.make_initializable_iterator()
def _dataset_generator():
with g.as_default():
while True:
(im, length), count = sess.run(it2.get_next())
dataset3 = Dataset.zip((
# repeat each tensor then use map to take a stridded slice
Dataset.from_tensors((im, length)).repeat(count),
Dataset.range(count))).map(lambda x, c: (
x[0][:, c: c + segment_len],
x[0][:, c + 1: (c + 1) + segment_len],
it = dataset3.make_initializable_iterator()
it_init = it.initializer
yield it_init
while True:
yield sess.run(it.get_next())
except tf.errors.OutOfRangeError:
except tf.errors.OutOfRangeError:
# Dataset.from_generator need tensorflow > 1.3 !
das_dataset = Dataset.from_generator(
(tf.float32, tf.float32),
# (tf.TensorShape([]), tf.TensorShape([]))
das_dataset_it = das_dataset.make_one_shot_iterator()
with tf.Session(graph=g) as sess:
while True:
tensorflow.python.framework.errors_impl.InvalidArgumentError: TypeError: If shallow structure is a sequence, input must also be a sequence. Input has type: <class 'tensorflow.python.framework.ops.Operation'>.
[[Node: PyFunc = PyFunc[Tin=[DT_INT64], Tout=[DT_FLOAT, DT_FLOAT], token="pyfunc_1"](arg0)]]
[[Node: IteratorGetNext = IteratorGetNext[output_shapes=[<unknown>, <unknown>], output_types=[DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/cpu:0"](OneShotIterator)]]
我猜这是因为我尝试产生迭代器的初始化程序,但我的问题基本上是我是否可以使用数据集 API 实现我正在尝试的所有内容。